sentence.write(text) sentence.close() #------------------------------------------------------------------------------------ sentence=open(path+"SemanticRoleMiner/code/input/test_input.txt","r") sent=sentence.readline() sentence.close() OrgSent=sent #handling 's inputFile="srl/SemanticRoleMiner/code/input" myTestFile=SenSta(inputFile) myTestFile.makeStanf() stanfile=path+"SemanticRoleMiner/code/input"+"/stanoutput.txt" Stan=modifyStanf(stanfile) # Fixing 's, using poss dependecy , it Only works if stanford recognizes poss dependecy Poss=[] for key,val in Stan.items(): for i, prep in val.items(): pred=prep.keys()[0] rel=prep.values()[0] token1=rel[0] token2=rel[1] if pred=="poss": Poss.append((str(token2.split("-")[0])+str(token1.split("-")[0]),str(token2.split("-")[0]),str(token1.split("-")[0]))) #print "***********Posessins: ",Poss sent=sent.replace(str(token2.split("-")[0])+"'s "+str(token1.split("-")[0]),str(token2.split("-")[0])+str(token1.split("-")[0]))
def translateSent(vlist,result,Poss,indices,PrSent): STs=[] #--- Stanford Translations Dictionary #print "to check nmubers: ",vlist #vlist has all tokens involved with their location attached to them #--- SenSta class accepts a sentence written in a file format, so for every sentence being sent to translateSent() in "result" argument , the sentence is first saved into a file with base directory mentioned in "inputFile" #-- 1- writing the sentence in file. stanfile=open(path+"SemanticRoleMiner/code/test_input.txt","w") stanfile.write(result) stanfile.close() sbj={} obj=[] i=0 #-- 2- the first verb in vlist is the targetverb which is splited from its location of the main text. verb is set to default value of "not found". targetverb=vlist[0].split("-")[0:-1][0] verb="not found" #-- 3- processing stanford-parser and senna on the textfile, the results are store into 2 dictionaries , Stan and Senn inputFile="srl/SemanticRoleMiner/code" myTestFile=SenSta(inputFile) myTestFile.makeSenna() myTestFile.makeStanf() Stan= myTestFile.stanfDict['sen0'].values() Senn= myTestFile.sennaDict['sen0'].values() #thissent=[] #thissent=result.split(" ") #-- 3.5 finding matches for date-pattern to merge all numbers into just 1 object connected to the target verb # sentence is in result string, so i have to find the domain of tokens. I can add the location of any tokens involved into this pattern in 1 list months={'January':01,'Jan':"01",'February':"02","Feb":"02","March":"03","Mar":"03","April":"04","Apr":"04","May":"05","June":06,"Jun":"06","July":"07","Jul":07,"August":"08","Aug":"08","September":"09","Sep":"09","October":"10","Oct":"10","November":"11","Nov":11,"December":"12","Dec":"12"} #datePatternDomain=[] dateObjects={} datepattern1=r"(\d{1,2}\s\w+\s\d{1,4})" match=re.findall(datepattern1,result) for item in match: # generalizing dates, number tags fixed tempSent=PrSent toks=str(item).split(" ") rep=item.replace(" ","") tempSent=tempSent.replace(item,rep) templist=tempSent.split(" ") index=templist.index(rep)+1 tok0Ind=index tok1Ind=index+1 tok2Ind=index+2 #print templist,tok0Ind,tok1Ind,tok2Ind if toks[1] in months.keys(): #print"+++++++", item #toks[0],toks[1],toks[2] newTok=str(toks[0])+"/"+months[toks[1]]+"/"+str(toks[2]) tok0=toks[0] tok0=tok0+"-"+str(tok0Ind) tok1=toks[1] tok1=tok1+"-"+str(tok1Ind) tok2=toks[2] tok2=tok2+"-"+str(tok2Ind) #print "\n((dates))\n", tok0,tok1,tok2 #print indices dateObjects[newTok]=(tok0,tok1,tok2) print "dates detected in sentence: ", dateObjects #--4 Senna Translation----------------------------- #-- 4-1 Find roles , calling roleFinder with targetverb, example : targetverb="conducted-2" , allRoles={"A0":"conductor", "A1":".."..} allRoles={} allRoles=roleFinder(targetverb) #-- 4-2 Finding relatives of a verb, all directly connected tokens are gathered in verbRel. verbRel=[] #print "**vlist ",vlist verbRel=verbRelatives(vlist,Stan,indices) #print "*verbRel ",verbRel #fixed #-- 4-3 Discovering the arg labels senna has assigned to the tokens, all Args are gathered in roleDep example:[('testing-2', 'A1'), ('laboratories-8', 'A0'),...] roleDep=[] for item in Senn:#NER values=item.values()[0].values() #NER #print "values of senna:",values if values[2]!="O" and (values[2].split("-")[0]=="B" or values[2].split("-")[0]=="S"): tok1=item.keys()[0] tok1=indices[int(tok1.split("-")[-1])-1] tok2=values[2].split("-")[1:][0] #print "**2222" print tok1," is ",tok2 #fixed STs.append((str(tok1),"is",str(tok2))) if item.keys()[0] in verbRel: if values[4]!='O': # ARG val1=item.keys()[0] #val2=values[4].split("-")[1:] val2=values[4] damnval2=values[4].split("-")[0] val2=val2.replace(damnval2+"-","") roleDep.append((val1,val2)) #print "###########NER#############################" #number tags checked #-- 4-4 Translating Args into rolesets, if the Arg is not in allRole list , it's printed itself. #print "\n\nSEnn",Senn,"\n\n" for item in roleDep: token=item[0] role=item[1] #number=token.split("-")[-1] token=indices[int(token.split("-")[-1])-1] #fixed if role in allRoles.keys(): #print "*role_added1*" if len(allRoles[role].split(","))>1: #sometimes we have more than 1 role , I take the first one temp=allRoles[role].split(",")[0] print token," has-role ",temp STs.append((str(token),"has-role",temp)) else: print token," has-role ",allRoles[role] STs.append((str(token),"has-role",allRoles[role])) else: # #print "*role_added2*" print token," has-role ",role STs.append((str(token),"has-role",role)) #print "###########-ROLE-NAMEs-#############################" #-- 5- Stan Translation----------------------------------- #-- 5-1 Stan list has all dependencies with the single-verb sentence. in this loop I'm finding the root of these depndenies as "verb" for further loops. #print Stan for item in Stan: #print item if item.keys()[0]=='root': verb=item.values()[0][1] print "verb as root:",verb break for item in Stan: #removing root dependency #print item if item.keys()[0]=='root': Stan.remove(item) break #print "000000000000000000000000000000000000000000" #print Stan #print "**********",targetverb,verb #-- 5-2 in some cases there are no verb in sentence , this loop prints statements containing verb and the verb is the root. #print "000000000000000000000000000000000000000000" stanRoot=verb.split("-")[0] sennaPred=targetverb # print "verbs: ",targetverb,verb.split("-")[0] if (verb !="notfound" and targetverb==verb.split("-")[0]) or (verb !="notfound" and targetverb==verb.split("-")[0]): i=0 for triple in Stan: pred=triple.keys()[0] tok1=triple.values()[0][0] tok1=indices[int(tok1.split("-")[-1])-1] #fixed numbertag tok2=triple.values()[0][1] tok2=indices[int(tok2.split("-")[-1])-1] #fixed numbertag #print "\n here*****",tok1,tok2 #print "verb-tok1-tok2",vlist[0],tok1,tok2 #print "##\n\n ",tok1,tok2,"\n\n\n" #-- 5-2-1 finding the quivalent predicate from dictionary of stanford dependecnies. #-- also nsbj,nsujpass are gathered in sbj[] to make verb-dependencies with # other tokens directly connected to the verb in obj[]. if pred in dictionary.keys(): if dictionary[pred]=="subject": sbj[i]=(tok2,tok1) i+=1 #print "subject",sbj elif tok1==vlist[0] or tok2==vlist[0]:#collecting objects #print "tok1,tok2 : ",tok1,"---",tok2 if tok1==vlist[0]: obj.append([pred,tok2]) elif tok2==vlist[0]: obj.append([pred,tok1]) #print "object:",obj else: #print "**2222" print tok1," ",dictionary[pred]," ",tok2 STs.append((str(tok1),dictionary[pred],str(tok2))) #-- 5-2-2 printing statments withe predicate --verb-- among subjects and other objects directly connected to verb. #-- the location of each token is mentioned but for the verb it's omitted. print ":::::::::::::::::::::::::::::::::::::::::::::::::;" print "subjects All:",sbj # created in previous loop , no need for number fixation print "objects", obj neg=0 for ob in obj: if 'neg' in ob: neg=1 break print "stanford",Stan for subject in sbj.values(): for objects in obj: pred=dictionary[objects[0]] damnVerb=subject[1].split("-")[0] if neg==1: damnVerb="not-"+damnVerb print "!!!!!!!!!!!!!!!!!damnVerb, neg",damnVerb,neg if str(pred)!="is-Arg1" : tok1=subject[0] tok2=objects[1] # print "**" if tok2.split("-")[0]!="not": print tok1," ",damnVerb+"-"+str(pred)," ",tok2 STs.append((tok1,damnVerb+"-"+str(pred),tok2)) else: tok1=subject[0] tok2=objects[1] # print "*nbnbn5*" if tok2.split("-")[0]!="not": print tok1," ",damnVerb," ",tok2 STs.append((tok1,damnVerb,tok2)) #print STs #-- 5-2-3 some sentences in stanford doesn't detect a verb as root. # this is the loop for including relations in sentences without a verb-root else: # tok1 tok2 are coming from stan2nd , so their tag numbers must be fixed #print "###########--verb not root" for triple in Stan: pred=triple.keys()[0] tok1=triple.values()[0][0] #print tok1 tok1=indices[int(tok1.split("-")[-1])-1] #fixed #print indices,tok1 tok2=triple.values()[0][1] tok2=indices[int(tok2.split("-")[-1])-1] #fixed #print pred,tok1,tok2 if pred in dictionary.keys(): print tok1," ",dictionary[pred]," ",tok2 STs.append((str(tok1),dictionary[pred],str(tok2))) #print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" #print "to numerate: ",result sentlist=result.split(" ") #print sentlist #i=0 newSTs=[] #print "888888888888888888888888888888888888888888888888888888" #print Poss #print sentlist #print indices if len(Poss)>0: for item in STs: #print"&*&", item sw=0 i=0 #print len(Poss) for poss in Poss: #filling newSTs i+=1 #print "--Pss",poss possID=poss[0] temp1=poss[1] #john temp2=poss[2] #brother item0=item[0].replace("-"+item[0].split("-")[-1],"") # item[0] is the token with tagnumber, item0 is the token without tagnumber item2=item[2].replace("-"+item[2].split("-")[-1],"") # item[2] is the token with tagnumber, item2 is the token without tagnumber #print "--",item0,possID,item2 if item0==possID and sw==0: # brother -- shopping #print "rmoved ",item," appended ",(temp2,item[1],item[2]) #STs.remove(item) #print "if-1" num3=sentlist.index(possID) #print temp2, indices[num3] tup=(temp2+"-"+str(num3+1),"of",temp1+"-"+str(num3)) #print "tup",tup token=item[2] #with n #print "tupp",(temp2+"-"+str(num3+1),item[1],token) newSTs.append((temp2+"-"+str(num3+1),item[1],token)) sw=1 if tup not in newSTs: newSTs.append(tup) elif item2==possID and sw==0: #print "removed",item," appended ",(item[0],item[1],temp2) #STs.remove(item) #print "elif-2" token=item[0] num3=sentlist.index(possID) newSTs.append((token,item[1],temp2+"-"+str(num3+1))) sw=1 tup=(temp2+"-"+str(num3+1),"of",temp1) #print "tup**",tup if tup not in newSTs: newSTs.append(tup) elif item0!=possID and item2!=possID and sw==0 and i==len(Poss): #print "elif-3" if item not in newSTs: #print "not removed ",item t1=item[0] t2=item[2] #print "tup",(t1,item[1],t2) newSTs.append((t1,item[1],t2)) if len(newSTs)==0: #newSTs is empty because no Poss has been detected , newSTs was created in the loop above . so if there are no poss-dependencies , newSTs would be the same as STs (tag-numbers are checked) newSTs=STs print "all statements", newSTs #!!!!!!!!!!!!!!!!!!!!!!!1checked !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #fixing Dates: dateObjects print "------------------------------------------------" remlist=[] print "dates: ",dateObjects if len(dateObjects)>0: length=len(newSTs) for i in range(0,length): # for not counting newly added tuples item=newSTs[i] tok1=item[0] pred=item[1] tok2=item[2] #i=newSTs.index(item) #print item sw=0 for date,vals in dateObjects.items(): if (tok1==vals[0] or tok1==vals[1] or tok1==vals[2]) and ( tok2==vals[0] or tok2==vals[1] or tok2==vals[2]): # I don't want to include this tuple remlist.append(item) else: if tok1==vals[0] or tok1==vals[1] or tok1==vals[2] : #print "to remove:",item, " to append: ",(date,pred,tok2) if (date,pred,tok2) not in newSTs: newSTs.append((date,pred,tok2)) remlist.append(item) elif tok2==vals[0] or tok2==vals[1] or tok2==vals[2]: #print "to remove:",item, " to append: ",(tok1,pred,date) if (tok1,pred,date) not in newSTs : newSTs.append((tok1,pred,date)) remlist.append(item) remlist=list(set(remlist)) #print "remlist: ",remlist #print "before: ",newSTs for item in remlist: #print item newSTs.remove(item) newSTs=list(set(newSTs)) #print "After: ",newSTs print newSTs print "********///" newSTs+=typeOfs(Stan,indices) return newSTs,obj,sbj