def addRoleSet(name):
    global doneCount
    roleSetMainName = name.split('.')[0]
    roleSetMainKatum = roleset.get(roleSetMainName)
    roleSetNewKatum = roleSetMainKatum.get(roleSetMainKatum.countI)
    rolesetsensenum = senseNumber.get(name.split('.')[1])
    roleSetNewKatum._is(rolesetsensenum, False)
    if (pb.roleset(name) != None):
        verbnetCls = pb.roleset(name).get('vncls')
        if (verbnetCls != None):
            verbnetID = verbClassID.find(verbnetCls)
            if (verbnetID != None):
                for vnClass in verbnetID.I:
                    roleSetNewKatum._is(vnClass, False)
                    print doneCount, vnClass.a0.O, vnClass.O, roleSetMainKatum.O, roleSetNewKatum.O
                    doneCount += 1
        roleSetMeaning = pb.roleset(name).get('name')
        if (roleSetMeaning != None):
            roleSetMeaningKatum = meaning.get(roleSetMeaning)
            roleSetNewKatum._is(roleSetMeaningKatum, False)
        for role in pb.roleset(name).findall("roles/role"):
            argCount = role.attrib['n']
            argName = role.attrib['descr']
            if argCount != None and argName != None:
                argumentKatum = argument.get(argName)
                argumentNameNumKatum = argumentKatum.get(argumentKatum.countI)
                argNumKatum = argumentNumber.get(argCount)
                argumentNameNumKatum._is(argNumKatum, False)
                roleSetNewKatum._is(argumentNameNumKatum, False)
def fileGenerator(rolesetString):
    file = open("propbank-examples.xml", "a")
    if (ElementTree.tostring(pb.roleset(rolesetString).find('example')) !=
            None):
        file.write(
            ElementTree.tostring(pb.roleset(rolesetString).find(
                'example')).decode('utf8').strip())
        file.write("\n")
        file.close()
Exemple #3
0
    def __init__(self, verb_string, sent_string, pb_inst, label, mismatch):
        self.label = label
        self.mismatch = mismatch
        self.sent_string = sent_string
        self.verb_string = verb_string
        self.tree = pb_inst.tree

        self.arg0 = None
        self.arg1 = None
        self.arg2 = None
        self.rs_arg0 = None
        self.rs_arg1 = None

        if pb_inst.roleset[-1:].isnumeric():
            self.rs = propbank.roleset(pb_inst.roleset)

        for arg in pb_inst.arguments:
            if isinstance(arg[0], PropbankSplitTreePointer):
                arg = arg[0].pieces
            if isinstance(arg[0], PropbankChainTreePointer):
                arg = arg[0].pieces
            if isinstance(arg[0], PropbankSplitTreePointer):
                arg = arg[0].pieces

            string_rep = str(self.tree[arg[0].treepos(
                self.tree)].productions())
            li = string_rep.strip('[]').split()

            noun_list = []
            for index, elem in enumerate(li):
                if (elem[:2] == 'NN') and (li[index + 1] == '->'):
                    noun_list.append(li[index + 2])
                elif (elem[:3] == 'PRP') and (li[index + 1] == '->'):
                    noun_list.append(li[index + 2])

            if (arg[1] == 'ARG0') and (len(noun_list) > 1):
                self.arg0 = noun_list[0].strip(',\"')
                #print('arg0', self.arg0)

            elif (arg[1] == 'ARG1') and (len(noun_list) > 1):
                self.arg1 = noun_list[0].strip(',\"')
                #print('arg1', self.arg1)

            elif (arg[1] == 'ARG2') and (len(noun_list) > 1):
                self.arg2 = noun_list[0].strip(',\"')
                #print('arg2', self.arg2)

        for role in self.rs.findall('roles/role'):
            if role.attrib['n'][0] == '0':
                self.rs_arg0 = role.attrib['descr'].split()[0]
            if role.attrib['n'][0] == '1':
                self.rs_arg1 = role.attrib['descr'].split()[0]
Exemple #4
0
def rs_args(id, cache={}):
    if id in cache:
        return cache[id]

    print 'roleset %s (cache miss)' % id

    args = {}
    try:
        rs = propbank.roleset(id)
        roles = rs[0]
        for i, role in enumerate(roles.findall('role')):
            args['ARG'+role.attrib['n']] = role.attrib['descr']
    except ValueError as e:
        print e

    cache[id] = args

    return args
Exemple #5
0
def rs_args(id, cache={}):
    if id in cache:
        return cache[id]

    print 'roleset %s (cache miss)' % id

    args = {}
    try:
        rs = propbank.roleset(id)
        roles = rs[0]
        for i, role in enumerate(roles.findall('role')):
            args['ARG' + role.attrib['n']] = role.attrib['descr']
    except ValueError as e:
        print e

    cache[id] = args

    return args
Exemple #6
0
 def findexamples(self):
     self.allexamples=[]
     processed=0
     for instance in pb.instances():
         if self.testing>5:
             print instance.roleset
             print instance.arguments
         try:
             roleset=pb.roleset(instance.roleset)
             examples=roleset.findall('example')
             for e in examples:
                 #print ElementTree.tostring(e).decode('utf8').strip()
                 self.allexamples.append(e)
         except:
             pass
         processed+=1
         if self.testing > 2 and processed>10:
             break
     print "Number of examples: ",len(self.allexamples)
def roleFinder(verb):
	#--converting verb into its present tense
	print "verb**",verb
	
	if verb=="find" or verb=="found" : tverb="find"
	else: tverb=en.verb.present(verb)
	print "targetverb **** ",tverb	
	if tverb=="emerge": propVerb=tverb+".02"
	else: propVerb=tverb+".01"
	print propVerb
	if propVerb=="re-cover.01": propVerb="recover.01"
	allroles={}
	if propVerb=="vaccinate.01": 
		allroles={'A0':'Vaccinator','A1':'Vaccinated','A2':'Against_what/disease'}
	
	else:
		roles=propbank.roleset(propVerb)
		for role in roles.findall('roles/role'):
			role.attrib['descr']=role.attrib['descr'].replace(" ","-")
			allroles["A"+str(role.attrib['n'])]=role.attrib['descr']

	return allroles
Exemple #8
0
 def get_role_descriptions(self: Proposition) -> Dict[str, str]:
     """Retrieves the role descriptions of a specific roleset
     from PropBank/NomBank
     """
     if self.pred_roleset is None:
         pred_roleset = self.generate_roleset()
     else:
         pred_roleset = self.pred_roleset
     roleset_id, pos = pred_roleset.rsplit("-", 1)
     dict_semroles = DICT_MODIFIERS.copy()
     general_dict = DICT_CORE_ROLES.copy()
     if pos == "v":
         roleset = pb.roleset(roleset_id)
     elif pos == "n":
         roleset = nb.roleset(roleset_id)
     else:
         dict_semroles.update(general_dict)
         return dict_semroles
     for role in roleset.findall("roles/role"):
         number, description = role.attrib["n"], role.attrib["descr"]
         dict_semroles[f"A{number}"] = description
     dict_semroles.update(general_dict)
     return dict_semroles
Exemple #9
0
def readFile():
    input_file = open(
        "C:\\Users\\Sergio\\Dropbox\\QMUL\\Data\\choicesNHS\\nhsChoices.txt",
        "r")
    #input_file = open("C:\\Users\\Sergio\\Dropbox\\QMUL\\Data\\choicesNHS\\nhsChoicesDiagnosis.txt", "r")
    #input_file = open("C:\\Users\\Sergio\\Dropbox\\QMUL\\Data\\choicesNHS\\nhsChoicesDiabetesWhole.txt", "r")
    lines = input_file.readlines()
    input_file.close()

    annotationsX = []
    annotationsSLR = []
    annotationsNER = []

    for x in lines:

        annotationX = x
        annotationSLR = annotator.getAnnotations(x, dep_parse=True)['srl']
        #annotationNER = annotator.getAnnotations(x,dep_parse=True)['ner']
        annotationsX.append(annotationX)
        annotationsSLR.append(annotationSLR)
        #annotationsNER.append(annotationNER)

    size = len(annotationsSLR)
    print size

    A0 = 0
    A1 = 0
    pbroles = []
    annotationsA0 = []
    annotationsA1 = []

    for an in range(5):
        print annotationsX[an]
        print annotationsSLR[an]
        sizeIn = len(annotationsSLR[an])
        #print sizeIn
        for an2 in range(sizeIn):

            print "--------------------------------------------------------------------------------------------------------"

            print annotationsSLR[an][an2]["V"]
            w = Word(annotationsSLR[an][an2]["V"]).lemmatize("v")
            #print w
            #print wn.synset(w+'.v.01')

            try:
                for role in propbank.roleset(w + '.01').findall("roles/role"):
                    print(role.attrib['f'], role.attrib['n'],
                          role.attrib['descr'])
                    pbroles.append(role.attrib['descr'])
                #for role in propbank.roleset(w+'.01').findall("aliases/alias"):
                #print(role.attrib['framenet'], role.attrib['pos'], role.attrib['verbnet'])
            except:
                pass

            try:
                print(
                    wn.lemma(w + '.v.01.' + w).derivationally_related_forms())
            except:
                pass

            if "A0" in annotationsSLR[an][an2]:
                print annotationsSLR[an][an2]["A0"]
                A0 = annotationsSLR[an][an2]["A0"]
                #try:
                #A0 = TextBlob(A0, np_extractor=extractor)
                #A0 = A0.noun_phrases[0]
                #print A0
                #except:
                #pass
                try:
                    annotationsA0 = WordNet.spotlightSearch(A0)
                    annotationsA0 = annotationsA0[0].get('URI')
                except:
                    annotationsA0 = "unknown"
                    pass

            if "A1" in annotationsSLR[an][an2]:
                print annotationsSLR[an][an2]["A1"]
                A1 = annotationsSLR[an][an2]["A1"]
                #try:
                #A1 = TextBlob(A1, np_extractor=extractor)
                #A1 = A1.noun_phrases[0]
                #print A1
                #except:
                #pass
                try:
                    annotationsA1 = WordNet.spotlightSearch(A1)
                    annotationsA1 = annotationsA1[0].get('URI')
                except:
                    annotationsA1 = "unknown"
                    pass

            print pbroles

            print "--------------------------------------------------------------------------------------------------------"

            CreateGraphNeo4J.createGraph(w, A0, A1, pbroles, annotationsA0,
                                         annotationsA1)
            del pbroles[:]
            annotationsA0 = []
            annotationsA1 = []
            A0 = 0
            A1 = 0
Exemple #10
0
def get_srl_dict(sense,srl_dict,objects, coref_chain_list):
    
#    non_action_verbs = ['is', 'are']
#    #above verbs used for attributes
#    if srl_dict['predicate'] in non_action_verbs:
#        return
#        
#    #get base form of predicate
#    #lmtzr = WordNetLemmatizer()
#    #lemma = lmtzr.lemmatize(srl_dict['predicate'],'v')
#    props={'annotators': 'lemma','pipelineLanguage':'en','outputFormat':'json'}
#    annotation = nlp.annotate(text, properties=props)
#    #print('lemma', annotated)
#    lemma_annotation = json.loads(annotation)
#    text_split = text.split()
#    predicate_index =  [i for i in range(len(text_split)) if text_split[i] == srl_dict['predicate']][0]
#    
#    print ('found predicate ', text_split[predicate_index])
#    lemma = lemma_annotation['sentences'][0]['tokens'][predicate_index]['lemma']
    #get semantic role names from propbank
    '''with codecs.open('/Users/thilinicooray/sem_img/propbank-frames/frames/' + lemma +'.xml', 'r', 'latin-1') as fd:
        doc = xmltodict.parse(fd.read())

    verb = lemma
    predicates = doc['frameset']['predicate']
    role_dict = {}
    for predicate in predicates:
        #print(predicate)
        if predicate['@lemma'] == verb:
            #print(predicate['roleset'])
            #ignore several verb senses
            roleset = None
            if isinstance(predicate['roleset'], list):
                roleset = predicate['roleset'][0]
            else:
                roleset = predicate['roleset']
            roles = roleset['roles']
            for role in roles['role']:
                name = role['@descr'].split(',')[0]
                arg = 'ARG' + role['@n']
                role_dict[arg] = name
            break'''
     
    region_tag_tuples = []
    try:
        roleset = propbank.roleset(sense)
    except ValueError:
        print('no matching frames for predicate ', sense)
        return  None, None
    #only continue if propbank has a frame
    role_dict = {}
    for role in roleset.findall("roles/role"):
        arg = 'A'+ role.attrib['n']
        name = role.attrib['descr'].split(',')[0]
        role_dict[arg] = name
        
    print ('role dict from srl tool :' , srl_dict)      
    #add place, time and manner
    role_dict[u'AM-LOC'] = 'place'
    role_dict[u'AM-TMP'] = 'time'
    role_dict[u'AM-MNR'] = 'manner'
    role_dict[u'AM-DIR'] = 'direction'
    role_dict[u'AM-ADV'] = 'adverbial modification'
    role_dict[u'AM-DIS'] = 'doscourse marker'
    role_dict[u'AM-EXT'] = 'extent'
    role_dict[u'AM-MOD'] = 'general modification'
    role_dict[u'AM-NEG'] = 'negation'
    role_dict[u'AM-PNC'] = 'proper noun component'
    role_dict[u'AM-PRD'] = 'secondary predicate'
    role_dict[u'AM-PRP'] = 'purpose'
    role_dict[u'AM-REC'] = 'reciprocal'
    mapped_srl_dict = OrderedDict()
    mapped_srl_dict['predicate'] = sense.split('.')[0]
    #keep only NN in value fields of roles
    
    for key, comp_value in srl_dict.iteritems():
        
        if key != 'predicate' and key != 'V':
            #to avoid issues where srl tool given roles which do not actually exist in propbank frame roles
            if key not in role_dict:
                return None, None
                
            key = role_dict[key]
            #without this, can't map objects later
            print('tags needs for ', comp_value, region_tag_tuples)
            tag_tuples, region_tag_tuples = get_tag_tuples(comp_value, region_tag_tuples)
            if tag_tuples is None:
                return None, None
            print(tag_tuples)
            
            # if value contains a predicate, split value from that place and only consider before that
            # if the remaining string is blank, remove that key from the srl
            verb_removed_tag_tuples = []
            verb_removed_comp = ''
            verb_found = False
            #preposition = ''
            for (word,tag) in tag_tuples:
                if tag in verb_tags:
                    verb_found = True
                #pos tagger tag down as adverb, but is it needed for direction
                '''if tag in preposition_tag or word in ['down', 'up']:
                    preposition = word'''
                if not verb_found:
                    verb_removed_comp = verb_removed_comp + word + ' '
                    verb_removed_tag_tuples.append((word,tag))
                
            if len(verb_removed_tag_tuples) == 0:
                continue
            
            if len(verb_removed_tag_tuples) > 1 :
                
                value = [word for (word, tag) in verb_removed_tag_tuples
                         if tag in noun_tags]
                         
                if len(value) > 0 :
                    value = value[0]
                # try to map values to actual annotated objects in the image
                # if value ==  annotated entitiry or value part of annotated entity, add the annotated entity
                    object_names = [obj['names'][0] for obj in objects]
                    if value in object_names:
                        mapped_srl_dict[key] = value
                    else:
                        #no exact match found - use coref details from flickr30k
                        similar_entities = []
                        for chain in coref_chain_list:
                            if value in chain and len(chain) > 1:
                                for ent in chain:
                                    if ent is not value:
                                        similar_entities.append(ent)
                        
                        if len(similar_entities) > 0:
                            ob_set = set(object_names)
                            coref_set = set(similar_entities)
                            matching_ent = ob_set.intersection(coref_set)
                            mapped_srl_dict[key] = ",".join(matching_ent)
                            
                            #when element changes from the original token in the caption, it needs to be updated in PoS tag entry also
                            region_tag_tuples_updated = []
                            for tuple1 in region_tag_tuples:
                                if tuple1[0] == value:
                                    region_tag_tuples_updated.append((",".join(matching_ent),tuple1[1]))
                                else:
                                    region_tag_tuples_updated.append((tuple1[0],tuple1[1]))
                            region_tag_tuples = region_tag_tuples_updated
                            
                        '''matching_elements = [obj for obj in object_names if (value in obj or obj in value)]
                        print('matching elements in the image', matching_elements)
                        if len(matching_elements)> 0 :
                            mapped_srl_dict[key] = matching_elements[0]
                        else :
                            # todo :word similarity needs to be checked if exact match cannot be found
                            return  None, None'''
                            
                else:
                    mapped_srl_dict[key] = verb_removed_comp.strip()           
                        
            else:
                mapped_srl_dict[key] = verb_removed_comp.strip()
                
            #if preposition: #removed preposition part for now
                #mapped_srl_dict[key] = preposition + ',' + mapped_srl_dict[key] 

    if len(mapped_srl_dict.keys()) == 1:
        return  None, None
    
    print('each srl dict ::::' , mapped_srl_dict)
    return mapped_srl_dict, region_tag_tuples
def getPropbankInfo(wordWithSense):
    try:
        word = propbank.roleset(wordWithSense)
    except ValueError:
        return None
    return word.findall("roles/role")
Exemple #12
0
__author__ = 'juliewe'

from nltk.corpus import propbank
from xml.etree import ElementTree

n=103

if __name__=='__main__':
    pb_instances=propbank.instances()
    #print(pb_instances)
    instance=pb_instances[n]
    print instance.roleset,instance.predicate,instance.arguments

    send_01=propbank.roleset('send.01')
    for role in send_01.findall("roles/role"):
        print role.attrib['n'],role.attrib['descr']

    #print (ElementTree.tostring(send_01.find('example')).decode('utf8').strip())
    examples=send_01.findall('example')
    print len(examples)
    for e in examples:
        print ElementTree.tostring(e).decode('utf8').strip()
Exemple #13
0
from nltk.corpus import propbank
turn_01 = propbank.roleset('turn.01')
for role in turn_01.findall("roles/role"):
	print(role.attrib['n'], role.attrib['descr'])
Exemple #14
0
from nltk.corpus import treebank
from nltk.corpus import propbank

#Detailing the Propbank
print("Creating Propbank Objects...")
pb_instances = propbank.instances()
pb_verbs = propbank.verbs()
print()
print("Number of items defined in the propbank: ", len(pb_instances))
print("Number of verbs defined in the propbank: ", len(pb_verbs))
print()
#Argument Structure of 'agree' and 'fall'
#'agree'
agreeCount = 0
print("Argument Structure of 'agree': ")
for role in propbank.roleset('join.01').findall('roles/role'):
	print(role.attrib['n'],role.attrib['descr'])
print()
print("Treebank Sentences with 'agree': ")
for (n,i) in enumerate(pb_instances[:9353]):
    if i.roleset=='agree.01' and len(i.tree.leaves())<=30:
        print("Sentence ", n , ":",' '.join(i.tree.leaves())) #This prints ALL of the sentences
        print("Arguments of Sentence #", (agreeCount + 1))
        for(argloc, argid) in i.arguments:
            print(argid)
            print(argloc.select(i.tree))
        #print(pb_instances[n].tree.draw()) #draws a LOT of trees
        agreeCount = agreeCount + 1
print("There were ", agreeCount, " normally-sized sentences with 'agree'.")
#'fall'
fallCount = 0
def getPropbankInfo(wordWithSense):
    try:
        word = propbank.roleset(wordWithSense)
    except ValueError:
        return None
    return word.findall("roles/role")
Exemple #16
0
	# print "P :",p
	# print "X train : ",x_train[0]
	# print "x_test : ",x_test[0]
	# print "x_test : ",x_test[1]
	# print "x_test : ",x_test[2]
	a=[]
	main_role="";
	max_count=0;
	for x in xrange(0,len(y_test)):
		pred=svc.predict(x_test[x])
		# print x,pred
		a.append(pred[0])
		print pred[0],y_test[x]
		# p = svc.predict_proba( x_test[x] )	
		# print max(p[0])
		if(a.count(pred[0])>max_count):
			main_role=pred[0]
			max_count+=1
		



print "Role of verb : ",main_role	
	
role_01=propbank.roleset(main_role)
	
	# auc = AUC( y_test, p[:,1] )
	# print "SVM AUC", auc	turn_01 = propbank.roleset('turn.01')
for role in role_01.findall("roles/role"):
	print(role.attrib['n'], role.attrib['descr'])