def findiobjextract(patt, node, root): whole, head = "", "" parent = treeops.getparentphrase(node, root) current = parent while current.getnext() is not None: current = current.getnext() text = treeops.text(current) if "advp" in text or "qual" in text or "timex" in text or "pp" in text or "vp" in text or text in "obj" or text in "np" or text in "nps" or text in "ap" or text in "aps": continue elif "iobj" in text: whole, head, excase = treeops.getextractphrase(current, root) break else: break if not head: current = parent while current.getprevious() is not None: current = current.getprevious() text = treeops.text(current) if "advp" in text or "pp" in text or "qual" in text or "timex" in text or "vp" in text or text in "obj" or text in "np" or text in "nps" or text in "ap" or text in "aps": continue elif "iobj" in text: whole, head, excase = treeops.getextractphrase(current, root) break else: break if not head: return "", "" pattcase = helpers.getpatterncase(patt) if pattcase not in excase and excase not in pattcase: return "", "" else: return whole, head
def getobjtrigger(node, root): verbtype, myverb = "active", "" current = node if "<" in treeops.text(node): while current.getprevious() is not None: current = current.getprevious() text = treeops.text(current) if "iobj" in text or "advp" in text or "np-qual" in text or "timex" in text or text in "np" or text in "nps" or text in "ap" or text in "aps": continue elif "vp" in text: myverb = treeops.gettriggerverb(current) if "vpp-comp" in text: verbtype = "passive" continue break elif "pp" in text: continue else: break elif ">" in node.text: while current.getnext() is not None: current = current.getnext() text = current.text if "iobj" in text or "advp" in text or "np" in text or text in "ap" or text in "aps": continue elif "vp" in text: myverb = treeops.gettriggerverb(current) loopcurrent = current ltext = treeops.text(loopcurrent) while loopcurrent.getnext() is not None: loopcurrent = loopcurrent.getnext() if "advp" in ltext or "np" in ltext or ltext in "ap" or ltext in "aps": continue elif "vpi" in ltext or "vps" in ltext or "vpp" in ltext or "vpg" in ltext: myverb = treeops.gettriggerverb(loopcurrent) continue #might be many in a row, only want the last one elif "pp" in ltext: continue else: break elif "pp" in text: continue else: break if not myverb: return "", "" return myverb, verbtype
def patternfinder(self, sentence, lemmas): root = treeops.getroot(sentence) trigger, triggertype = "", "" root = treeops.addlemmas(root, lemmas) for node in root.iter(): #Looking for NPs text = treeops.text(node) yesterfound = False if "done" in helpers.cleantag( node.tag) or "sentence" in helpers.cleantag(node.tag): continue elif "np" in text or "ap-obj" in text or "ap-comp" in text or treeops.yester( node): #to account for ap-obj and ap-comp if treeops.yester(node): trigger, triggertype = triggerops.getyestertrigger( node, root) yesterfound = True myphrase, myhead, case = treeops.getnounphrase(node, root) if not case: if yesterfound: pass else: continue #Don't want a pattern with no case, unless timex elif "qual" in text or "timex" in text: continue elif "subj" in text: trigger, triggertype = triggerops.getsubjtrigger( node, root) extract = "subj" elif "comp" in text: trigger, triggertype = triggerops.getcomptrigger( node, root) extract = "comp" elif "iobj" in text: trigger, triggertype = triggerops.getiobjtrigger( node, root) extract = "iobj" elif "obj" in text: #objnom is included in "obj". AP-OBJ is covered here trigger, triggertype = triggerops.getobjtrigger(node, root) extract = "obj" elif not case: continue #nothing to collect else: #NPs inside PPs trigger, triggertype = triggerops.getpreptrigger( node, root) extract = "noun" if yesterfound: extract = "noun" case = "nom" myphrase = "gær" myhead = "gær" if not trigger or not triggertype or not case: continue idkey = extract + "|" + case + "|" + trigger + "|" + triggertype if idkey not in self.allpatterns: self.allpatterns[idkey] = [ 0, [] ] # NOTE that the freq and extract list will be filled out in extractfinder() return
def findsubjextract(patt, node, root): whole, head = "", "" parent = treeops.getparentphrase(node, root) current = parent while current.getprevious() is not None: current = current.getprevious() text = treeops.text(current) if "advp" in text or "pp" in text or "vp" in text or "qual" in text or "timex" in text or text in "np" or text in "nps" or text in "ap" or text in "aps": continue elif "subj" in text: whole, head, excase = treeops.getextractphrase(current, root) break elif "scp" in text: # =Maðurinn= sem *beit* hestinn newcurrent = current.getprevious() ntext = treeops.text(current) if "advp" in ntext or "pp" in ntext or "qual" in ntext or "timex" in ntext or ntext in "np" or ntext in "nps" or ntext in "ap" or ntext in "aps": continue elif "subj" in ntext: whole, head, excase = treeops.getextractphrase(current, root) break else: break if not head: current = parent while current.getnext() is not None: current = current.getnext() text = treeops.text(current) if "advp" in text or "pp" in text or "qual" in text or "timex" in text or "vp" in text or text in "np" or text in "nps" or text in "ap" or text in "aps": continue elif "subj" in text: whole, head, excase = treeops.getextractphrase(current, root) break else: break if not head: # Looking for AP-COMP acting as subj current = parent while current.getprevious() is not None: current = current.getprevious() text = treeops.text(current) if "advp" in text or "pp" in text or "vp" in text or "qual" in text or "timex" in text or text in "np" or text in "nps": continue elif text in "ap-comp" or text in "aps-comp": whole, head, excase = treeops.getextractphrase(current, root) break elif "scp" in text: # =Maðurinn= sem *beit* hestinn newcurrent = current.getprevious() ntext = treeops.text(current) if "advp" in ntext or "pp" in ntext or "qual" in ntext or "timex" in ntext or ntext in "np" or ntext in "nps": continue elif ntext in "ap-comp" or ntext in "aps-comp": whole, head, excase = treeops.getextractphrase( current, root) break else: break if head: print "yay, ap-comp acting as np-subj! {}".format(whole) if not head: return "", "" pattcase = helpers.getpatterncase(patt) if pattcase not in excase and excase not in pattcase: return "", "" else: return whole, head
def findppextract(patt, node, root): whole, head, case = "", "", "" parts, prep = [], [] parent = treeops.getparentphrase(node, root) current = parent while current.getnext() is not None: current = current.getnext() text = treeops.text(current) if treeops.yester( current ): # Deals with "í gær" being categorized as ADVP, not PP pp = "í" whole = "gær" head = "gær" case = "nom" pattcase = helpers.getpatterncase(patt) if pattcase not in case and case not in pattcase: #Wrong pp phrase head, case = "", "" continue pattprep = helpers.getpatternprep(patt) if pp not in pattprep or pattprep not in pp: pp, head, case = "", "", "" continue break elif "advp" in text: for word in current.findall(".//WORD"): mytag = treeops.tag(word) if "a" in mytag[0]: prep.append(treeops.text(word)) continue nowcurrent = current while nowcurrent.getnext() is not None: nowcurrent = nowcurrent.getnext() nowtext = treeops.text(nowcurrent) nowtext = nowtext.replace("[", "") if nowtext in "np" or nowtext in "nps" or "timex" in nowtext: #Want to treat as a pp phrase for word in nowcurrent.findall(".//WORD"): parts.append(treeops.lemma(word)) if not head: if treeops.isnoun(word): head = treeops.lemma(word) case = treeops.headcase(treeops.tag(word)) continue else: break if not parts or not prep: #Shouldn't be treated as pp phrase head, case, parts, prep = "", "", [], [] continue pattcase = helpers.getpatterncase(patt) if pattcase not in case and case not in pattcase: #Wrong pp phrase head, case, parts, prep = "", "", [], [] continue pp = " ".join(prep) pattprep = helpers.getpatternprep(patt) if pp not in pattprep or pattprep not in pp: pp, head, case, parts, prep = "", "", "", [], [] continue whole = " ".join(parts) break elif "qual" in text or text in "np" or text in "nps" or text in "ap" or text in "aps" or "timex" in text: continue elif "pp" in text: for word in current.findall(".//WORD"): mytag = treeops.tag(word) if "a" in mytag[0]: prep.append(treeops.text(word)) continue else: parts.append(treeops.lemma(word)) if not head: if treeops.isnoun(word): head = treeops.lemma(word) case = treeops.headcase(treeops.tag(word)) if not parts or not prep: #Weird pp phrase, continue search head, case, parts, prep = "", "", [], [] continue pattcase = helpers.getpatterncase(patt) if pattcase not in case and case not in pattcase: #Wrong pp phrase head, case, parts, prep = "", "", [], [] continue pp = " ".join(prep) pattprep = helpers.getpatternprep(patt) if pp not in pattprep or pattprep not in pp: #wrong preposition phrase pp, head, case, parts, prep = "", "", "", [], [] continue if current.getnext( ) is not None and not head: # If I haven't found the head yet mynext = current.getnext() mytext = treeops.text(mynext) if "np" in mytext: for word in mynext.findall(".//WORD"): parts.append(treeops.lemma(word)) if treeops.isnoun(word): head = treeops.lemma(word) case = treeops.headcase(treeops.tag(word)) pp = " ".join(prep) whole = " ".join(parts) break else: break if not head: return "", "" pp = " ".join(prep) whole = " ".join(parts) break else: current = parent if current.getprevious() is not None: current = current.getprevious() text = treeops.text(current) if "scp" in text: if current.getprevious() is not None: current = current.getprevious() text = treeops.text(current) if "pp" in text: for word in current.findall(".//WORD"): mytag = treeops.tag(word) if "a" in mytag[0]: prep.append(treeops.text(word)) continue else: parts.append(treeops.lemma(word)) if not head: if treeops.isnoun(word): head = treeops.lemma(word) case = treeops.headcase( treeops.tag(word)) if not parts or not prep: return "", "" pattcase = helpers.getpatterncase(patt) if pattcase not in case and case not in pattcase: #Wrong pp phrase return "", "" pp = " ".join(prep) pattprep = helpers.getpatternprep(patt) if pp not in pattprep or pattprep not in pp: return "", "" if current.getnext( ) is not None and not head: # If I haven't found the head yet mynext = current.getnext() mytext = treeops.text(mynext) if "np" in text: for word in current.findall(".//WORD"): parts.append(treeops.lemma(word)) if treeops.isnoun(word): head = treeops.lemma(word) case = treeops.headcase( treeops.tag(word)) break else: break whole = " ".join(parts) break else: break else: break else: break else: break if not head: current = parent while current.getprevious() is not None: current = current.getprevious() text = treeops.text(current) text = text.replace("[", "") if treeops.yester(current): pp = "í" whole = "gær" head = "gær" case = "nom" pattcase = helpers.getpatterncase(patt) if pattcase not in case and case not in pattcase: #Wrong pp phrase head, case = "", "" continue pattprep = helpers.getpatternprep(patt) if pp not in pattprep or pattprep not in pp: pp, head, case = "", "", "" continue break elif "advp" in text: for word in current.findall(".//WORD"): mytag = treeops.tag(word) if "a" in mytag[0]: prep.append(treeops.text(word)) continue nowcurrent = current while nowcurrent.getnext() is not None: nowcurrent = nowcurrent.getnext() nowtext = treeops.text(nowcurrent) nowtext = nowtext.replace("[", "") if nowtext in "np" or nowtext in "nps" or "timex" in nowtext: #Want to treat as a pp phrase for word in nowcurrent.findall(".//WORD"): parts.append(treeops.lemma(word)) if not head: if treeops.isnoun(word): head = treeops.lemma(word) case = treeops.headcase(treeops.tag(word)) continue else: break if not parts or not prep: #Shouldn't be treated as pp phrase head, case, parts, prep = "", "", [], [] continue pattcase = helpers.getpatterncase(patt) if pattcase not in case and case not in pattcase: #Wrong pp phrase head, case, parts, prep = "", "", [], [] continue pp = " ".join(prep) pattprep = helpers.getpatternprep(patt) if pp not in pattprep or pattprep not in pp: pp, head, case, parts, prep = "", "", "", [], [] continue whole = " ".join(parts) break elif "qual" in text or text in "np" or text in "nps" or text in "ap" or text in "aps" or "timex" in text: continue elif "pp" in text: for word in current.findall(".//WORD"): mytag = treeops.tag(word) if "a" in mytag[0]: prep.append(treeops.text(word)) continue else: parts.append(treeops.lemma(word)) if not head: if treeops.isnoun(word): head = treeops.lemma(word) case = treeops.headcase(treeops.tag(word)) if not parts or not prep: #Weird pp phrase, continue search head, case, prep = "", "", [] continue pattcase = helpers.getpatterncase(patt) if pattcase not in case and case not in pattcase: #Wrong pp phrase head, case, parts, prep = "", "", [], [] continue pp = " ".join(prep) pattprep = helpers.getpatternprep(patt) if pp not in pattprep or pattprep not in pp: #wrong preposition phrase pp, head, case, parts, prep = "", "", "", [], [] continue whole = " ".join(parts) break if not head: return "", "" return whole, head
def getyestertrigger(node, root): prep = "í" trigger, triggertype = "", "" parent = treeops.getparentphrase(node, root) current = parent while current.getprevious() is not None: current = current.getprevious() text = treeops.text(current) if "np-qual" in text or text in "np" or text in "nps" or text in "ap" or text in "aps" or "timex" in text: continue elif "np" in text: triggertype = "noun pp|" + prep trigger = treeops.gettriggernoun(current) break elif "vp" in text: if "vpp-comp" in text: triggertype = "passive pp|" + prep trigger = treeops.gettriggerverb(current) break elif "vpi" in text or "vps" in text or "vpg" in text: triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(current) break else: # This verb might be the only one, maybe there's something else to check triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(current) loopcurrent = current while loopcurrent.getprevious() is not None: loopcurrent = loopcurrent.getprevious() ltext = treeops.text(loopcurrent) if "advp" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "np-qual" in ltext or "timex" in ltext: continue elif "vpp-comp" in ltext: triggertype = "passive pp|" + prep trigger = treeops.gettriggerverb(loopcurrent) break elif "vpi" in ltext or "vps" in ltext or "vpg" in ltext: triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(loopcurrent) break elif "pp" in ltext: continue else: break #this verb is the only one we have. break break elif "pp" in text: continue else: break #nothing found here if not trigger: current = parent while current.getnext() is not None: current = current.getnext() text = treeops.text(current) if "np-qual" in text or "timex" in text or text in "np" or text in "nps" or text in "ap" or text in "aps" or "np-timex" in text: continue elif "np" in text: triggertype = "noun pp|" + prep trigger = treeops.gettriggernoun(current) break elif "vp" in text: if "vpp-comp" in text: triggertype = "passive pp|" + prep trigger = treeops.gettriggerverb(current) break elif "vpi" in text or "vps" in text or "vpg" in text: triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(current) break else: triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(current) loopcurrent = current while loopcurrent.getnext() is not None: loopcurrent = loopcurrent.getnext() ltext = treeops.text(loopcurrent) if "advp" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "np-qual" in ltext or "timex" in ltext: continue elif "vpp-comp" in ltext: triggertype = "passive pp|" + prep trigger = treeops.gettriggerverb(loopcurrent) break elif "vpi" in ltext or "vps" in ltext or "vpg" in ltext: triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(loopcurrent) break elif "pp" in ltext: continue else: break break elif "pp" in text: continue else: break if not trigger: return "", "" return trigger, triggertype
def getpreptrigger(node, root): parts = [] #To account for multi word prepositions trigger, triggertype = "", "" parent = treeops.getparentphrase(node, root) for word in parent.findall(".//WORD"): newword = treeops.text(word) mytag = treeops.tag(word) if "a" in mytag[0]: #Found preposition parts.append(newword) if not parts: #No preposition found, have unmarked np to find correct trigger for. if parent.getprevious() is not None: previousnode = parent.getprevious() ptext = treeops.text(previousnode) if "pp" in ptext or "advp" in ptext: found = False #Checking if I find a noun in the phrase for aword in previousnode.findall(".//WORD"): newword = treeops.text(aword) mytag = treeops.tag(aword) if "a" in mytag[0]: #Found preposition parts.append(newword) elif "n" in mytag[0]: #Only applies to PP phrases found = True if found: return "", "" elif "iobj" in ptext: trigger, triggertype = getiobjtrigger(previousnode, root) return trigger, triggertype elif "obj" in ptext: trigger, triggertype = getobjtrigger(previousnode, root) return trigger, triggertype elif "subj" in ptext: trigger, triggertype = getsubjtrigger(previousnode, root) return trigger, triggertype elif "ap" in ptext: if previousnode.getprevious() is not None: moreprevious = previousnode.getprevious() mtext = treeops.text(moreprevious) found = False if not "np" in mtext: return "", "" else: for every in moreprevious.findall(".//WORD"): etag = treeops.tag(every) if "a" in etag[0]: getpreptrigger(moreprevious, root) elif "n" in etag[0]: found = True else: continue if not found: #No noun in NP, can add ap and unmarked np to it if "subj" in mtext: trigger, triggertype = getsubjtrigger( moreprevious, root) return trigger, triggertype elif "iobj" in mtext: trigger, triggertype = getiobjtrigger( moreprevious, root) return trigger, triggertype elif "obj" in mtext: trigger, triggertype = getobjtrigger( moreprevious, root) return trigger, triggertype else: return "", "" else: return "", "" else: return "", "" else: return "", "" if len(parts) == 1: prep = parts[0] else: prep = " ".join(parts) # Getting the trigger current = parent while current.getprevious() is not None: current = current.getprevious() text = treeops.text(current) if "np-qual" in text or "timex" in text or "advp" in text or text in "np" or text in "nps" or text in "ap" or text in "aps": continue elif "np" in text: triggertype = "noun pp|" + prep trigger = treeops.gettriggernoun(current) break elif "vp" in text: if "vpp-comp" in text: triggertype = "passive pp|" + prep trigger = treeops.gettriggerverb(current) break elif "vpi" in text or "vps" in text or "vpg" in text: triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(current) break else: # This verb might be the only one, maybe there's something else to check triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(current) loopcurrent = current while loopcurrent.getprevious() is not None: loopcurrent = loopcurrent.getprevious() ltext = treeops.text(loopcurrent) if "advp" in ltext or "np-qual" in ltext or "timex" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps": continue elif "vpp-comp" in ltext: triggertype = "passive pp|" + prep trigger = treeops.gettriggerverb(loopcurrent) break elif "vpi" in ltext or "vps" in ltext or "vpg" in ltext: triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(loopcurrent) break elif "pp" in ltext: continue else: break #this verb is the only one we have. break break elif "pp" in text: continue else: break #nothing found here if not trigger: # Looking after the PP current = parent while current.getnext() is not None: current = current.getnext() text = treeops.text(current) if "np-qual" in text or "nps-qual" in text or "timex" in text or "advp" in text or text in "np" or text in "nps" or text in "ap" or text in "aps": #Change: advp and unmarked NPs added continue elif "np" in text: triggertype = "noun pp|" + prep trigger = treeops.gettriggernoun(current) break elif "vp" in text: if "vpp-comp" in text: triggertype = "passive pp|" + prep trigger = treeops.gettriggerverb(current) break elif "vpi" in text or "vps" in text or "vpg" in text: triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(current) break else: triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(current) loopcurrent = current while loopcurrent.getnext() is not None: loopcurrent = loopcurrent.getnext() ltext = treeops.text(loopcurrent) if "advp" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "np-qual" in ltext or "timex" in ltext: continue elif "vpp-comp" in ltext: triggertype = "passive pp|" + prep trigger = treeops.gettriggerverb(loopcurrent) break elif "vpi" in ltext or "vps" in ltext or "vpg" in ltext: triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(loopcurrent) break elif "pp" in ltext: continue else: break break elif "scp" in text: newnext = current.getnext() newtext = treeops.text(newnext) if "subj" in newtext: newnext = newnext.getnext() newtext = treeops.text(newnext) if "vp" in newtext: if "vpp-comp" in newtext: triggertype = "passive pp|" + prep trigger = treeops.gettriggerverb(current) break elif "vpi" in newtext or "vps" in newtext or "vpg" in newtext: triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(current) break else: triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(current) loopcurrent = current while loopcurrent.getnext() is not None: loopcurrent = loopcurrent.getnext() ltext = treeops.text(loopcurrent) if "advp" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "np-qual" in ltext or "timex" in ltext: continue elif "vpp-comp" in ltext: triggertype = "passive pp|" + prep trigger = treeops.gettriggerverb(loopcurrent) break elif "vpi" in ltext or "vps" in ltext or "vpg" in ltext: triggertype = "active pp|" + prep trigger = treeops.gettriggerverb(loopcurrent) break elif "pp" in ltext: continue else: break elif "pp" in text: continue else: break if not trigger: return "", "" return trigger, triggertype
def getcomptrigger(node, root): #The flag points to the verb, so no use. The -COMP flag comes on the second NP, so the NP-SUBJ should always be in front. current = node trigger = "" triggertype = "compnoun" while current.getprevious() is not None: current = current.getprevious() text = treeops.text(current) if "np-subj" in text: trigger = treeops.gettriggernoun(current) break elif "ap-comp" in text: trigger = treeops.gettriggeradj(current) break if not trigger: # Dealing with AP-COMP acting as subj triggertype = "active" while current.getnext() is not None: current = current.getnext() text = treeops.text(current) if "advp" in text or "np-qual" in text or "timex" in text or text in "np" or text in "nps" or text in "ap" or text in "aps": continue elif "scp" in text: newcurrent = current.getnext() newtext = treeops.text(newcurrent) if "advp" in newtext or newtext in "np" or newtext in "nps" or newtext in "ap" or newtext in "aps" or "np-qual" in newtext or "timex" in text: newcurrent = newcurrent.getnext() newtext = treeops.text(newcurrent) elif "vp" in newtext: myverb = treeops.gettriggerverb(newcurrent) if "vpb" in newtext: loopcurrentnow = current while loopcurrentnow.getnext() is not None: loopcurrentnow = loopcurrentnow.getnext() ltext = treeops.text(loopcurrentnow) if "advp" in text or "pp" in text: continue elif "vpp-comp" in ltext: verbtype = "passive" myverb = treeops.gettriggerverb(loopcurrentnow) break else: break else: loopcurrentnow = current while loopcurrentnow.getnext() is not None: loopcurrentnow = loopcurrentnow.getnext() ltext = treeops.text(loopcurrentnow) if "advp" in ltext: continue elif "vpi" in ltext: verbtype = "active" myverb = treeops.gettriggerverb(loopcurrentnow) break else: break elif "pp" in newtext: newcurrent = newcurrent.getnext() newtext = treeops.text(newcurrent) else: break elif "vp" in text: myverb = treeops.gettriggerverb( current) #the lemma of the main verb # I assume vpp-comp always comes with a vpb in the order vpb, vpp-comp. if "vpb" in text: loopcurrent = current while loopcurrent.getnext() is not None: loopcurrent = loopcurrent.getnext() ltext = treeops.text(loopcurrent) if "advp" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "np-qual" in ltext or "timex" in ltext: continue elif "vpp-comp" in ltext: verbtype = "passive" myverb = treeops.gettriggerverb(loopcurrent) return myverb, verbtype elif "pp" in ltext: #Need to check last because "pp" is in "vpp-comp" ! continue else: break continue elif "pp" in text: #Need to check last because "pp" is in "vpp-comp" ! continue else: break return trigger, triggertype
def getsubjtrigger(node, root): verbtype, myverb = "active", "" current = node if ">" in treeops.text(node): while current.getnext() is not None: current = current.getnext() text = treeops.text(current) if "advp" in text or "np-qual" in text or "timex" in text or "subj" in text or text in "np" or text in "nps" or text in "ap" or text in "aps": continue elif "scp" in text: newcurrent = current.getnext() newtext = treeops.text(newcurrent) if "advp" in newtext or newtext in "np" or newtext in "nps" or newtext in "ap" or newtext in "aps" or "np-qual" in newtext or "timex" in text: newcurrent = newcurrent.getnext() newtext = treeops.text(newcurrent) elif "vp" in newtext: myverb = treeops.gettriggerverb(newcurrent) if "vpb" in newtext: loopcurrentnow = current while loopcurrentnow.getnext() is not None: loopcurrentnow = loopcurrentnow.getnext() ltext = treeops.text(loopcurrentnow) if "advp" in text or "pp" in text: continue elif "vpp-comp" in ltext: verbtype = "passive" myverb = treeops.gettriggerverb(loopcurrentnow) break else: break else: loopcurrentnow = current while loopcurrentnow.getnext() is not None: loopcurrentnow = loopcurrentnow.getnext() ltext = treeops.text(loopcurrentnow) if "advp" in ltext: continue elif "vpi" in ltext: verbtype = "active" myverb = treeops.gettriggerverb(loopcurrentnow) break else: break elif "pp" in newtext: newcurrent = newcurrent.getnext() newtext = treeops.text(newcurrent) else: break elif "vp" in text: myverb = treeops.gettriggerverb( current) #the lemma of the main verb # I assume vpp-comp always comes with a vpb in the order vpb, vpp-comp. if "vpb" in text: loopcurrent = current while loopcurrent.getnext() is not None: loopcurrent = loopcurrent.getnext() ltext = treeops.text(loopcurrent) if "advp" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "np-qual" in ltext or "timex" in ltext: continue elif "vpp-comp" in ltext: verbtype = "passive" myverb = treeops.gettriggerverb(loopcurrent) return myverb, verbtype elif "pp" in ltext: #Need to check last because "pp" is in "vpp-comp" ! continue else: break continue elif "pp" in text: #Need to check last because "pp" is in "vpp-comp" ! continue else: break elif "<" in treeops.text(node): while current.getprevious() is not None: current = current.getprevious() text = treeops.text(current) if "advp" in text or text in "np" or text in "nps" or text in "ap" or text in "aps" or "np-qual" in text or "timex" in text: continue elif "vp" in text: myverb = treeops.gettriggerverb(current) if "vpp-comp" in text: verbtype = "passive" # This is the only verb phrase I need. loopcurrent = node # Checking if more to verb phrase while loopcurrent.getnext() is not None: loopcurrent = loopcurrent.getnext() ltext = treeops.text(loopcurrent) if "advp" in ltext or "np-qual" in ltext or ltext in "np" or ltext in "nps" or ltext in "ap" or ltext in "aps" or "timex" in ltext: continue elif "vpp" in ltext: myverb = treeops.gettriggerverb(loopcurrent) verbtype = "passive" elif "vpi" in ltext or "vps" in ltext or "vpg" in ltext: myverb = treeops.gettriggerverb(loopcurrent) continue # Because might be many in a row, want the last one elif "pp" in ltext: continue else: break # Nothing to be found, verb has non-auxiliary meaning elif "pp" in text: continue else: break else: # If no flag to point to trigger # First case: The phrase isn't really subj, it's the obj from active when the iobj is made subject of the passive # But I will call it a subj, but make sure I can use two subjs when merging and making active... # I can't see the difference between obj and iobj in the passive when acting as subj. Will have to deal with it anyway. # Second case: The object in an expletive sentence (leppsetning). Should look directly before. # Third case: Two or more NP-SUBJ in a row, only flag on one of them. if current.getprevious() is not None: before = current.getprevious() text = treeops.text(before) if "vpp-comp" in text: verbtype = "passive" myverb = treeops.gettriggerverb(before) elif "vpi" in text: verbtype = "active" myverb = treeops.gettriggerverb(before) elif "subj<" in text or "subj>" in text or "comp<" in text or "comp>" in text: myverb, verbtype = getsubjtrigger(before, root) current = node if current.getnext() is not None and not myverb: after = current.getnext() text = treeops.text(after) if "subj<" in text or "subj>" in text or "comp<" in text or "comp>" in text: myverb, verbtype = getsubjtrigger(after, root) if not myverb: return "", "" return myverb, verbtype