Ejemplo n.º 1
0
def do_rule(sentdict, aux, tree, word_positions_in_tree):
    auxidx = aux.wordnum

    try:
        if sentdict.lemmas[auxidx+1] == 'that':
            return True
    except IndexError: pass

    if not aux_locally_ccommanded_by_verb(sentdict, aux, tree, word_positions_in_tree):
        if to_precedes_aux(sentdict, aux): return False

        localt = nt.generate_local_structure_from_subtree(tree, tree[word_positions_in_tree[auxidx-1]])
        local_word_subtrees = nt.getsmallestsubtrees(localt)

        try:
            checkpuncttag = sentdict.pos[auxidx+1]
            if is_period(checkpuncttag) or is_comma(checkpuncttag) or is_dash_or_colon(checkpuncttag):
                endbool = True

                for subtree in local_word_subtrees:
                    if is_verb(subtree.label()) and subtree != tree[word_positions_in_tree[auxidx-1]]:
                        if nt.ccommands(localt, subtree, tree[word_positions_in_tree[auxidx-1]]):
                            endbool = False
                            break
                if endbool:
                    return endbool
        except IndexError:
            pass

        # Don't at the end of sentence.
        try:
            checkpuncttag = sentdict.pos[auxidx+2]
            if sentdict.lemmas[auxidx+1] == 'not' and (is_period(checkpuncttag) or is_comma(checkpuncttag) or is_dash_or_colon(checkpuncttag)):
                endbool = True
                for subtree in local_word_subtrees:
                    if is_verb(subtree.label()):
                        if nt.ccommands(localt, subtree, tree[word_positions_in_tree[auxidx-1]]):
                            endbool = False
                            break
                if endbool:
                    return endbool
        except IndexError:
            pass

        if is_ccommanded_by_continuation_word(sentdict ,aux, tree, word_positions_in_tree):
            return True

        if verb_follows_aux(sentdict, aux):
            return False

        try:
            if is_preposition(sentdict.pos[auxidx+1]) and sentdict.words[auxidx] != 'done':
                return True
        except IndexError:
            pass

    return False
Ejemplo n.º 2
0
def auxlocallyccommandsverb(sentdict, auxidx, t, word_positions_in_tree):
    try:
        localt = nltktree.generate_local_structure_from_subtree(t, t[word_positions_in_tree[auxidx-1]])
        local_word_subtrees = nltktree.getsmallestsubtrees(localt)

        for subtree in local_word_subtrees:
            if isverb(subtree.label()):
                if nltktree.ccommands(localt, t[word_positions_in_tree[auxidx-1]], subtree)\
                        and not nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]):
                    return True
    except IndexError: pass
    return False
Ejemplo n.º 3
0
def aux_locally_ccommands_verb(sentdict, aux, tree, word_positions_in_tree):
    try:
        localt = nt.generate_local_structure_from_subtree(tree, tree[word_positions_in_tree[aux.wordnum-1]])
        local_word_subtrees = nt.getsmallestsubtrees(localt)

        for subtree in local_word_subtrees:
            if is_verb(subtree.label()):
                if nt.ccommands(localt, tree[word_positions_in_tree[aux.wordnum-1]], subtree)\
                        and not nt.ccommands(localt, subtree, tree[word_positions_in_tree[aux.wordnum-1]]):
                    return True
    except IndexError: pass
    return False
Ejemplo n.º 4
0
def is_ccommanded_by_continuation_word(sentdict, aux, tree, word_positions_in_tree):
    for i in range(max(0,aux.wordnum-CONTINUATION_WORD_DISTANCE_SEARCH),aux.wordnum):
        crt_word = sentdict.words[i].lower()
        if crt_word in ['than','as','so']:
            if nt.ccommands(tree, tree[word_positions_in_tree[i-1]], tree[word_positions_in_tree[aux.wordnum-1]]):
                return True
    return False
Ejemplo n.º 5
0
def auxccommandsverb(sentdict, auxidx, t, word_positions_in_tree):
    subtrees = nltktree.getsmallestsubtrees(t)

    for subtree in subtrees:
        if isverb(subtree.label()):
            try:
                if nltktree.ccommands(t, t[word_positions_in_tree[auxidx-1]], subtree):
                    return True
            except IndexError:
                pass
    return False
Ejemplo n.º 6
0
def aux_ccommanded_by_verb(sentdict, aux, tree, word_positions_in_tree):
    subtrees = nt.getsmallestsubtrees(tree)

    for subtree in subtrees:
        if is_verb(subtree.label()):
            try:
                if nt.ccommands(tree, subtree, tree[word_positions_in_tree[aux.wordnum-1]]):
                    return True
            except IndexError:
                pass
    return False
Ejemplo n.º 7
0
def isccommandedbycontinuationword(sentdict, auxidx, t, word_positions_in_tree):
    continuation_words = ['than', 'as', 'so']
    local_continuation_words = ['like']

    for i in range(0,len(sentdict['words'])):
        crtword = sentdict['words'][i].lower()
        if crtword in continuation_words and i >= auxidx - 5  and i < auxidx:
            if nltktree.ccommands(t, t[word_positions_in_tree[i-1]], t[word_positions_in_tree[auxidx-1]]):
                return True

    # localt = nltktree.generatelocalstructurefromsubtree(t, t[word_positions_in_tree[auxidx-1]])
    # local_word_subtrees = nltktree.getsmallestsubtrees(localt)
    # Checking for local c-command.
    # for subtree in local_word_subtrees:
    #     if subtree.leaves() in local_continuation_words:
    #         if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]):
    #             return True

    return False
Ejemplo n.º 8
0
def docheck(sentdict, auxidx, t, word_positions_in_tree, verbose=False):

    # We DO NOT want to consider 'do so' or 'do the same' sentences here!
    """try:
        if sentdict['lemmas'][auxidx+1] == 'so' or (sentdict['lemmas'][auxidx+1] == 'the' and sentdict['lemmas'][auxidx+2] == 'same'):
            return False
        # We can be POSITIVE that there is NO vpe if we have 'don't do ...' or 'x does do ...'
        if sentdict['lemmas'][auxidx-1] == 'do' or (sentdict['lemmas'][auxidx-2] == 'do' and sentdict['lemmas'][auxidx-1] == 'not'):
            return False
        if sentdict['lemmas'][auxidx+1] == 'do' or (sentdict['lemmas'][auxidx+1] == 'do' and sentdict['lemmas'][auxidx+2] == 'not'):
            return False
    except IndexError: pass"""

    try:
        if sentdict['lemmas'][auxidx+1] == 'that':
            return True
    except IndexError: pass

    if not auxislocallyccommandedbyverb(sentdict, auxidx, t, word_positions_in_tree):

        # # If 'do' locally c-commands a verb AND is locally c-commanded by a verb, we can be basically 100% sure that there is no VPE.
        # if auxislocallyccommandedbyverb(sentdict, auxidx, tree, word_positions_in_tree) and auxlocallyccommandsverb(sentdict, auxidx, tree, word_positions_in_tree):
        #     return False

        if toprecedesaux(sentdict, auxidx): return False

        localt = nltktree.generate_local_structure_from_subtree(t, t[word_positions_in_tree[auxidx-1]])
        local_word_subtrees = nltktree.getsmallestsubtrees(localt)

        # Do at the end of sentence.
        checkpuncttag = sentdict['pos'][auxidx+1]
        if isperiod(checkpuncttag) or iscomma(checkpuncttag) or isdashorcolon(checkpuncttag):
            endbool = True

            for subtree in local_word_subtrees:
                if isverb( subtree.label() ) and subtree != t[word_positions_in_tree[auxidx-1]]:
                    if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]):
                        endbool = False
                        break

            if endbool: return endbool

        # Don't at the end of sentence.
        try:
            checkpuncttag = sentdict['pos'][auxidx+2]
            if sentdict['lemmas'][auxidx+1] == 'not' and (isperiod(checkpuncttag) or iscomma(checkpuncttag) or isdashorcolon(checkpuncttag)):
                endbool = True

                for subtree in local_word_subtrees:
                    if isverb( subtree.label() ):
                        if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]):
                            endbool = False
                            break

                if endbool: return endbool
        except IndexError:
            pass

        # Small increase in recall, decrease in precision from this.
        # numverbs = 0
        # for subtree in local_word_subtrees:
        #     if isverb(subtree.label()) or isnounorprep(subtree.label()):
        #         numverbs+=1
        # if numverbs == 1:
        #     return True

        if isccommandedbycontinuationword(sentdict ,auxidx, t, word_positions_in_tree):
            #if not auxlocallyccommandsverb(sentdict ,auxidx, tree, word_positions_in_tree): # 8% recall traded for 4% precision.
            #if (not isverb(sentdict['pos'][auxidx+1])) or (sentdict['lemmas'][auxidx+1]=='not' and not isverb(sentdict['pos'][auxidx+2])):
            return True

        # if not auxccommandsverbthatcomesafter(sentdict ,auxidx, tree, word_positions_in_tree):
        #     return True

        if verbfollowsaux(sentdict, auxidx):
            return False

        if isprep(sentdict['pos'][auxidx+1]) and sentdict['words'][auxidx] != 'done':
            return True

    return False
Ejemplo n.º 9
0
def auxccommandsverbthatcomesafter(sentdict, auxidx, tree, word_positions_in_tree):
    for i in range(auxidx+1, len(sentdict['pos'])):
        if isverb(sentdict['pos'][i]):
            if nltktree.ccommands(tree, tree[word_positions_in_tree[auxidx-1]], tree[word_positions_in_tree[i-1]]):
                return True
    return False
Ejemplo n.º 10
0
def aux_ccommands_verb_that_comes_after(sentdict, aux, tree, word_positions_in_tree):
    for i in range(aux.wordnum+1, len(sentdict)):
        if is_verb(sentdict.pos[i]):
            if nt.ccommands(tree, tree[word_positions_in_tree[aux.wordnum-1]], tree[word_positions_in_tree[i-1]]):
                return True
    return False
Ejemplo n.º 11
0
def ccommands(w1_idx, w2_idx, tree, word_positions):
    try:
        return nt.ccommands(tree, word_positions[w1_idx-1], word_positions[w2_idx-1])
    except IndexError:
        return False