def do_rule(sentdict, aux, tree, word_positions_in_tree): auxidx = aux.wordnum try: if sentdict.lemmas[auxidx+1] == 'that': return True except IndexError: pass if not aux_locally_ccommanded_by_verb(sentdict, aux, tree, word_positions_in_tree): if to_precedes_aux(sentdict, aux): return False localt = nt.generate_local_structure_from_subtree(tree, tree[word_positions_in_tree[auxidx-1]]) local_word_subtrees = nt.getsmallestsubtrees(localt) try: checkpuncttag = sentdict.pos[auxidx+1] if is_period(checkpuncttag) or is_comma(checkpuncttag) or is_dash_or_colon(checkpuncttag): endbool = True for subtree in local_word_subtrees: if is_verb(subtree.label()) and subtree != tree[word_positions_in_tree[auxidx-1]]: if nt.ccommands(localt, subtree, tree[word_positions_in_tree[auxidx-1]]): endbool = False break if endbool: return endbool except IndexError: pass # Don't at the end of sentence. try: checkpuncttag = sentdict.pos[auxidx+2] if sentdict.lemmas[auxidx+1] == 'not' and (is_period(checkpuncttag) or is_comma(checkpuncttag) or is_dash_or_colon(checkpuncttag)): endbool = True for subtree in local_word_subtrees: if is_verb(subtree.label()): if nt.ccommands(localt, subtree, tree[word_positions_in_tree[auxidx-1]]): endbool = False break if endbool: return endbool except IndexError: pass if is_ccommanded_by_continuation_word(sentdict ,aux, tree, word_positions_in_tree): return True if verb_follows_aux(sentdict, aux): return False try: if is_preposition(sentdict.pos[auxidx+1]) and sentdict.words[auxidx] != 'done': return True except IndexError: pass return False
def auxlocallyccommandsverb(sentdict, auxidx, t, word_positions_in_tree): try: localt = nltktree.generate_local_structure_from_subtree(t, t[word_positions_in_tree[auxidx-1]]) local_word_subtrees = nltktree.getsmallestsubtrees(localt) for subtree in local_word_subtrees: if isverb(subtree.label()): if nltktree.ccommands(localt, t[word_positions_in_tree[auxidx-1]], subtree)\ and not nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]): return True except IndexError: pass return False
def aux_locally_ccommands_verb(sentdict, aux, tree, word_positions_in_tree): try: localt = nt.generate_local_structure_from_subtree(tree, tree[word_positions_in_tree[aux.wordnum-1]]) local_word_subtrees = nt.getsmallestsubtrees(localt) for subtree in local_word_subtrees: if is_verb(subtree.label()): if nt.ccommands(localt, tree[word_positions_in_tree[aux.wordnum-1]], subtree)\ and not nt.ccommands(localt, subtree, tree[word_positions_in_tree[aux.wordnum-1]]): return True except IndexError: pass return False
def is_ccommanded_by_continuation_word(sentdict, aux, tree, word_positions_in_tree): for i in range(max(0,aux.wordnum-CONTINUATION_WORD_DISTANCE_SEARCH),aux.wordnum): crt_word = sentdict.words[i].lower() if crt_word in ['than','as','so']: if nt.ccommands(tree, tree[word_positions_in_tree[i-1]], tree[word_positions_in_tree[aux.wordnum-1]]): return True return False
def auxccommandsverb(sentdict, auxidx, t, word_positions_in_tree): subtrees = nltktree.getsmallestsubtrees(t) for subtree in subtrees: if isverb(subtree.label()): try: if nltktree.ccommands(t, t[word_positions_in_tree[auxidx-1]], subtree): return True except IndexError: pass return False
def aux_ccommanded_by_verb(sentdict, aux, tree, word_positions_in_tree): subtrees = nt.getsmallestsubtrees(tree) for subtree in subtrees: if is_verb(subtree.label()): try: if nt.ccommands(tree, subtree, tree[word_positions_in_tree[aux.wordnum-1]]): return True except IndexError: pass return False
def isccommandedbycontinuationword(sentdict, auxidx, t, word_positions_in_tree): continuation_words = ['than', 'as', 'so'] local_continuation_words = ['like'] for i in range(0,len(sentdict['words'])): crtword = sentdict['words'][i].lower() if crtword in continuation_words and i >= auxidx - 5 and i < auxidx: if nltktree.ccommands(t, t[word_positions_in_tree[i-1]], t[word_positions_in_tree[auxidx-1]]): return True # localt = nltktree.generatelocalstructurefromsubtree(t, t[word_positions_in_tree[auxidx-1]]) # local_word_subtrees = nltktree.getsmallestsubtrees(localt) # Checking for local c-command. # for subtree in local_word_subtrees: # if subtree.leaves() in local_continuation_words: # if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]): # return True return False
def docheck(sentdict, auxidx, t, word_positions_in_tree, verbose=False): # We DO NOT want to consider 'do so' or 'do the same' sentences here! """try: if sentdict['lemmas'][auxidx+1] == 'so' or (sentdict['lemmas'][auxidx+1] == 'the' and sentdict['lemmas'][auxidx+2] == 'same'): return False # We can be POSITIVE that there is NO vpe if we have 'don't do ...' or 'x does do ...' if sentdict['lemmas'][auxidx-1] == 'do' or (sentdict['lemmas'][auxidx-2] == 'do' and sentdict['lemmas'][auxidx-1] == 'not'): return False if sentdict['lemmas'][auxidx+1] == 'do' or (sentdict['lemmas'][auxidx+1] == 'do' and sentdict['lemmas'][auxidx+2] == 'not'): return False except IndexError: pass""" try: if sentdict['lemmas'][auxidx+1] == 'that': return True except IndexError: pass if not auxislocallyccommandedbyverb(sentdict, auxidx, t, word_positions_in_tree): # # If 'do' locally c-commands a verb AND is locally c-commanded by a verb, we can be basically 100% sure that there is no VPE. # if auxislocallyccommandedbyverb(sentdict, auxidx, tree, word_positions_in_tree) and auxlocallyccommandsverb(sentdict, auxidx, tree, word_positions_in_tree): # return False if toprecedesaux(sentdict, auxidx): return False localt = nltktree.generate_local_structure_from_subtree(t, t[word_positions_in_tree[auxidx-1]]) local_word_subtrees = nltktree.getsmallestsubtrees(localt) # Do at the end of sentence. checkpuncttag = sentdict['pos'][auxidx+1] if isperiod(checkpuncttag) or iscomma(checkpuncttag) or isdashorcolon(checkpuncttag): endbool = True for subtree in local_word_subtrees: if isverb( subtree.label() ) and subtree != t[word_positions_in_tree[auxidx-1]]: if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]): endbool = False break if endbool: return endbool # Don't at the end of sentence. try: checkpuncttag = sentdict['pos'][auxidx+2] if sentdict['lemmas'][auxidx+1] == 'not' and (isperiod(checkpuncttag) or iscomma(checkpuncttag) or isdashorcolon(checkpuncttag)): endbool = True for subtree in local_word_subtrees: if isverb( subtree.label() ): if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]): endbool = False break if endbool: return endbool except IndexError: pass # Small increase in recall, decrease in precision from this. # numverbs = 0 # for subtree in local_word_subtrees: # if isverb(subtree.label()) or isnounorprep(subtree.label()): # numverbs+=1 # if numverbs == 1: # return True if isccommandedbycontinuationword(sentdict ,auxidx, t, word_positions_in_tree): #if not auxlocallyccommandsverb(sentdict ,auxidx, tree, word_positions_in_tree): # 8% recall traded for 4% precision. #if (not isverb(sentdict['pos'][auxidx+1])) or (sentdict['lemmas'][auxidx+1]=='not' and not isverb(sentdict['pos'][auxidx+2])): return True # if not auxccommandsverbthatcomesafter(sentdict ,auxidx, tree, word_positions_in_tree): # return True if verbfollowsaux(sentdict, auxidx): return False if isprep(sentdict['pos'][auxidx+1]) and sentdict['words'][auxidx] != 'done': return True return False
def auxccommandsverbthatcomesafter(sentdict, auxidx, tree, word_positions_in_tree): for i in range(auxidx+1, len(sentdict['pos'])): if isverb(sentdict['pos'][i]): if nltktree.ccommands(tree, tree[word_positions_in_tree[auxidx-1]], tree[word_positions_in_tree[i-1]]): return True return False
def aux_ccommands_verb_that_comes_after(sentdict, aux, tree, word_positions_in_tree): for i in range(aux.wordnum+1, len(sentdict)): if is_verb(sentdict.pos[i]): if nt.ccommands(tree, tree[word_positions_in_tree[aux.wordnum-1]], tree[word_positions_in_tree[i-1]]): return True return False
def ccommands(w1_idx, w2_idx, tree, word_positions): try: return nt.ccommands(tree, word_positions[w1_idx-1], word_positions[w2_idx-1]) except IndexError: return False