def nexttopunct(sentdict, auxidx, t, word_positions_in_tree): localt = nltktree.generate_local_structure_from_subtree(t, t[word_positions_in_tree[auxidx-1]]) local_word_subtrees = nltktree.getsmallestsubtrees(localt) try: checkpuncttag = sentdict['pos'][auxidx+1] if isperiod(checkpuncttag) or iscomma(checkpuncttag) or isdashorcolon(checkpuncttag): endbool = True # for subtree in local_word_subtrees: # if isverb( subtree.label() ) and subtree != t[word_positions_in_tree[auxidx-1]]: # if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]): # endbool = False # break if endbool: return endbool except IndexError: return False try: checkpuncttag = sentdict['pos'][auxidx+2] if sentdict['lemmas'][auxidx+1] == 'not' and (isperiod(checkpuncttag) or iscomma(checkpuncttag) or isdashorcolon(checkpuncttag)): endbool = True # for subtree in local_word_subtrees: # if isverb( subtree.label() ): # if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]): # endbool = False # break if endbool: return endbool except IndexError: return False return False
def do_rule(sentdict, aux, tree, word_positions_in_tree): auxidx = aux.wordnum try: if sentdict.lemmas[auxidx+1] == 'that': return True except IndexError: pass if not aux_locally_ccommanded_by_verb(sentdict, aux, tree, word_positions_in_tree): if to_precedes_aux(sentdict, aux): return False localt = nt.generate_local_structure_from_subtree(tree, tree[word_positions_in_tree[auxidx-1]]) local_word_subtrees = nt.getsmallestsubtrees(localt) try: checkpuncttag = sentdict.pos[auxidx+1] if is_period(checkpuncttag) or is_comma(checkpuncttag) or is_dash_or_colon(checkpuncttag): endbool = True for subtree in local_word_subtrees: if is_verb(subtree.label()) and subtree != tree[word_positions_in_tree[auxidx-1]]: if nt.ccommands(localt, subtree, tree[word_positions_in_tree[auxidx-1]]): endbool = False break if endbool: return endbool except IndexError: pass # Don't at the end of sentence. try: checkpuncttag = sentdict.pos[auxidx+2] if sentdict.lemmas[auxidx+1] == 'not' and (is_period(checkpuncttag) or is_comma(checkpuncttag) or is_dash_or_colon(checkpuncttag)): endbool = True for subtree in local_word_subtrees: if is_verb(subtree.label()): if nt.ccommands(localt, subtree, tree[word_positions_in_tree[auxidx-1]]): endbool = False break if endbool: return endbool except IndexError: pass if is_ccommanded_by_continuation_word(sentdict ,aux, tree, word_positions_in_tree): return True if verb_follows_aux(sentdict, aux): return False try: if is_preposition(sentdict.pos[auxidx+1]) and sentdict.words[auxidx] != 'done': return True except IndexError: pass return False
def auxlocallyccommandsverb(sentdict, auxidx, t, word_positions_in_tree): try: localt = nltktree.generate_local_structure_from_subtree(t, t[word_positions_in_tree[auxidx-1]]) local_word_subtrees = nltktree.getsmallestsubtrees(localt) for subtree in local_word_subtrees: if isverb(subtree.label()): if nltktree.ccommands(localt, t[word_positions_in_tree[auxidx-1]], subtree)\ and not nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]): return True except IndexError: pass return False
def aux_locally_ccommands_verb(sentdict, aux, tree, word_positions_in_tree): try: localt = nt.generate_local_structure_from_subtree(tree, tree[word_positions_in_tree[aux.wordnum-1]]) local_word_subtrees = nt.getsmallestsubtrees(localt) for subtree in local_word_subtrees: if is_verb(subtree.label()): if nt.ccommands(localt, tree[word_positions_in_tree[aux.wordnum-1]], subtree)\ and not nt.ccommands(localt, subtree, tree[word_positions_in_tree[aux.wordnum-1]]): return True except IndexError: pass return False
def docheck(sentdict, auxidx, t, word_positions_in_tree, verbose=False): # We DO NOT want to consider 'do so' or 'do the same' sentences here! """try: if sentdict['lemmas'][auxidx+1] == 'so' or (sentdict['lemmas'][auxidx+1] == 'the' and sentdict['lemmas'][auxidx+2] == 'same'): return False # We can be POSITIVE that there is NO vpe if we have 'don't do ...' or 'x does do ...' if sentdict['lemmas'][auxidx-1] == 'do' or (sentdict['lemmas'][auxidx-2] == 'do' and sentdict['lemmas'][auxidx-1] == 'not'): return False if sentdict['lemmas'][auxidx+1] == 'do' or (sentdict['lemmas'][auxidx+1] == 'do' and sentdict['lemmas'][auxidx+2] == 'not'): return False except IndexError: pass""" try: if sentdict['lemmas'][auxidx+1] == 'that': return True except IndexError: pass if not auxislocallyccommandedbyverb(sentdict, auxidx, t, word_positions_in_tree): # # If 'do' locally c-commands a verb AND is locally c-commanded by a verb, we can be basically 100% sure that there is no VPE. # if auxislocallyccommandedbyverb(sentdict, auxidx, tree, word_positions_in_tree) and auxlocallyccommandsverb(sentdict, auxidx, tree, word_positions_in_tree): # return False if toprecedesaux(sentdict, auxidx): return False localt = nltktree.generate_local_structure_from_subtree(t, t[word_positions_in_tree[auxidx-1]]) local_word_subtrees = nltktree.getsmallestsubtrees(localt) # Do at the end of sentence. checkpuncttag = sentdict['pos'][auxidx+1] if isperiod(checkpuncttag) or iscomma(checkpuncttag) or isdashorcolon(checkpuncttag): endbool = True for subtree in local_word_subtrees: if isverb( subtree.label() ) and subtree != t[word_positions_in_tree[auxidx-1]]: if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]): endbool = False break if endbool: return endbool # Don't at the end of sentence. try: checkpuncttag = sentdict['pos'][auxidx+2] if sentdict['lemmas'][auxidx+1] == 'not' and (isperiod(checkpuncttag) or iscomma(checkpuncttag) or isdashorcolon(checkpuncttag)): endbool = True for subtree in local_word_subtrees: if isverb( subtree.label() ): if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]): endbool = False break if endbool: return endbool except IndexError: pass # Small increase in recall, decrease in precision from this. # numverbs = 0 # for subtree in local_word_subtrees: # if isverb(subtree.label()) or isnounorprep(subtree.label()): # numverbs+=1 # if numverbs == 1: # return True if isccommandedbycontinuationword(sentdict ,auxidx, t, word_positions_in_tree): #if not auxlocallyccommandsverb(sentdict ,auxidx, tree, word_positions_in_tree): # 8% recall traded for 4% precision. #if (not isverb(sentdict['pos'][auxidx+1])) or (sentdict['lemmas'][auxidx+1]=='not' and not isverb(sentdict['pos'][auxidx+2])): return True # if not auxccommandsverbthatcomesafter(sentdict ,auxidx, tree, word_positions_in_tree): # return True if verbfollowsaux(sentdict, auxidx): return False if isprep(sentdict['pos'][auxidx+1]) and sentdict['words'][auxidx] != 'done': return True return False