예제 #1
0
파일: detectVPE.py 프로젝트: kiankd/vpe
def nexttopunct(sentdict, auxidx, t, word_positions_in_tree):
    localt = nltktree.generate_local_structure_from_subtree(t, t[word_positions_in_tree[auxidx-1]])
    local_word_subtrees = nltktree.getsmallestsubtrees(localt)
    try:
        checkpuncttag = sentdict['pos'][auxidx+1]
        if isperiod(checkpuncttag) or iscomma(checkpuncttag) or isdashorcolon(checkpuncttag):
            endbool = True

            # for subtree in local_word_subtrees:
            #     if isverb( subtree.label() ) and subtree != t[word_positions_in_tree[auxidx-1]]:
            #         if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]):
            #             endbool = False
            #             break

            if endbool: return endbool
    except IndexError:
        return False

    try:
        checkpuncttag = sentdict['pos'][auxidx+2]
        if sentdict['lemmas'][auxidx+1] == 'not' and (isperiod(checkpuncttag) or iscomma(checkpuncttag) or isdashorcolon(checkpuncttag)):
            endbool = True

            # for subtree in local_word_subtrees:
            #     if isverb( subtree.label() ):
            #         if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]):
            #             endbool = False
            #             break

            if endbool: return endbool
    except IndexError:
        return False

    return False
예제 #2
0
def do_rule(sentdict, aux, tree, word_positions_in_tree):
    auxidx = aux.wordnum

    try:
        if sentdict.lemmas[auxidx+1] == 'that':
            return True
    except IndexError: pass

    if not aux_locally_ccommanded_by_verb(sentdict, aux, tree, word_positions_in_tree):
        if to_precedes_aux(sentdict, aux): return False

        localt = nt.generate_local_structure_from_subtree(tree, tree[word_positions_in_tree[auxidx-1]])
        local_word_subtrees = nt.getsmallestsubtrees(localt)

        try:
            checkpuncttag = sentdict.pos[auxidx+1]
            if is_period(checkpuncttag) or is_comma(checkpuncttag) or is_dash_or_colon(checkpuncttag):
                endbool = True

                for subtree in local_word_subtrees:
                    if is_verb(subtree.label()) and subtree != tree[word_positions_in_tree[auxidx-1]]:
                        if nt.ccommands(localt, subtree, tree[word_positions_in_tree[auxidx-1]]):
                            endbool = False
                            break
                if endbool:
                    return endbool
        except IndexError:
            pass

        # Don't at the end of sentence.
        try:
            checkpuncttag = sentdict.pos[auxidx+2]
            if sentdict.lemmas[auxidx+1] == 'not' and (is_period(checkpuncttag) or is_comma(checkpuncttag) or is_dash_or_colon(checkpuncttag)):
                endbool = True
                for subtree in local_word_subtrees:
                    if is_verb(subtree.label()):
                        if nt.ccommands(localt, subtree, tree[word_positions_in_tree[auxidx-1]]):
                            endbool = False
                            break
                if endbool:
                    return endbool
        except IndexError:
            pass

        if is_ccommanded_by_continuation_word(sentdict ,aux, tree, word_positions_in_tree):
            return True

        if verb_follows_aux(sentdict, aux):
            return False

        try:
            if is_preposition(sentdict.pos[auxidx+1]) and sentdict.words[auxidx] != 'done':
                return True
        except IndexError:
            pass

    return False
예제 #3
0
파일: detectVPE.py 프로젝트: kiankd/vpe
def auxlocallyccommandsverb(sentdict, auxidx, t, word_positions_in_tree):
    try:
        localt = nltktree.generate_local_structure_from_subtree(t, t[word_positions_in_tree[auxidx-1]])
        local_word_subtrees = nltktree.getsmallestsubtrees(localt)

        for subtree in local_word_subtrees:
            if isverb(subtree.label()):
                if nltktree.ccommands(localt, t[word_positions_in_tree[auxidx-1]], subtree)\
                        and not nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]):
                    return True
    except IndexError: pass
    return False
예제 #4
0
def aux_locally_ccommands_verb(sentdict, aux, tree, word_positions_in_tree):
    try:
        localt = nt.generate_local_structure_from_subtree(tree, tree[word_positions_in_tree[aux.wordnum-1]])
        local_word_subtrees = nt.getsmallestsubtrees(localt)

        for subtree in local_word_subtrees:
            if is_verb(subtree.label()):
                if nt.ccommands(localt, tree[word_positions_in_tree[aux.wordnum-1]], subtree)\
                        and not nt.ccommands(localt, subtree, tree[word_positions_in_tree[aux.wordnum-1]]):
                    return True
    except IndexError: pass
    return False
예제 #5
0
파일: detectVPE.py 프로젝트: kiankd/vpe
def docheck(sentdict, auxidx, t, word_positions_in_tree, verbose=False):

    # We DO NOT want to consider 'do so' or 'do the same' sentences here!
    """try:
        if sentdict['lemmas'][auxidx+1] == 'so' or (sentdict['lemmas'][auxidx+1] == 'the' and sentdict['lemmas'][auxidx+2] == 'same'):
            return False
        # We can be POSITIVE that there is NO vpe if we have 'don't do ...' or 'x does do ...'
        if sentdict['lemmas'][auxidx-1] == 'do' or (sentdict['lemmas'][auxidx-2] == 'do' and sentdict['lemmas'][auxidx-1] == 'not'):
            return False
        if sentdict['lemmas'][auxidx+1] == 'do' or (sentdict['lemmas'][auxidx+1] == 'do' and sentdict['lemmas'][auxidx+2] == 'not'):
            return False
    except IndexError: pass"""

    try:
        if sentdict['lemmas'][auxidx+1] == 'that':
            return True
    except IndexError: pass

    if not auxislocallyccommandedbyverb(sentdict, auxidx, t, word_positions_in_tree):

        # # If 'do' locally c-commands a verb AND is locally c-commanded by a verb, we can be basically 100% sure that there is no VPE.
        # if auxislocallyccommandedbyverb(sentdict, auxidx, tree, word_positions_in_tree) and auxlocallyccommandsverb(sentdict, auxidx, tree, word_positions_in_tree):
        #     return False

        if toprecedesaux(sentdict, auxidx): return False

        localt = nltktree.generate_local_structure_from_subtree(t, t[word_positions_in_tree[auxidx-1]])
        local_word_subtrees = nltktree.getsmallestsubtrees(localt)

        # Do at the end of sentence.
        checkpuncttag = sentdict['pos'][auxidx+1]
        if isperiod(checkpuncttag) or iscomma(checkpuncttag) or isdashorcolon(checkpuncttag):
            endbool = True

            for subtree in local_word_subtrees:
                if isverb( subtree.label() ) and subtree != t[word_positions_in_tree[auxidx-1]]:
                    if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]):
                        endbool = False
                        break

            if endbool: return endbool

        # Don't at the end of sentence.
        try:
            checkpuncttag = sentdict['pos'][auxidx+2]
            if sentdict['lemmas'][auxidx+1] == 'not' and (isperiod(checkpuncttag) or iscomma(checkpuncttag) or isdashorcolon(checkpuncttag)):
                endbool = True

                for subtree in local_word_subtrees:
                    if isverb( subtree.label() ):
                        if nltktree.ccommands(localt, subtree, t[word_positions_in_tree[auxidx-1]]):
                            endbool = False
                            break

                if endbool: return endbool
        except IndexError:
            pass

        # Small increase in recall, decrease in precision from this.
        # numverbs = 0
        # for subtree in local_word_subtrees:
        #     if isverb(subtree.label()) or isnounorprep(subtree.label()):
        #         numverbs+=1
        # if numverbs == 1:
        #     return True

        if isccommandedbycontinuationword(sentdict ,auxidx, t, word_positions_in_tree):
            #if not auxlocallyccommandsverb(sentdict ,auxidx, tree, word_positions_in_tree): # 8% recall traded for 4% precision.
            #if (not isverb(sentdict['pos'][auxidx+1])) or (sentdict['lemmas'][auxidx+1]=='not' and not isverb(sentdict['pos'][auxidx+2])):
            return True

        # if not auxccommandsverbthatcomesafter(sentdict ,auxidx, tree, word_positions_in_tree):
        #     return True

        if verbfollowsaux(sentdict, auxidx):
            return False

        if isprep(sentdict['pos'][auxidx+1]) and sentdict['words'][auxidx] != 'done':
            return True

    return False