Ejemplo n.º 1
0
def nearest_trig_np(trig, sentences, all_tags, get_words=False):
    """
    @type trig: vpe_objects.Auxiliary
    @type sentences: vpe_objects.AllSentences
    """
    t = sentences.get_sentence_tree(trig.sentnum)
    tree_tuples = nt.pos_word_tuples(t)
    all_nps = nt.find_subtree_phrases(t, ['NP','NP-PRD'])

    trig_tup = (trig.pos, trig.word)
    trig_tup_idx = tree_tuples.index(trig_tup)

    closest_np_value = 99
    closest_np = None
    for NP in all_nps:
        last_np_word_idx = tree_tuples.index(nt.pos_word_tuples(NP)[-1])
        if abs(trig_tup_idx - last_np_word_idx) < closest_np_value:
            closest_np_value = trig_tup_idx - last_np_word_idx
            closest_np = NP
    if closest_np == None:
        closest_np = t

    if get_words:
        try:
            return closest_np.leaves()
        except AttributeError:
            return []

    np_pos = [subtree.label() for subtree in nt.getsmallestsubtrees(closest_np)]

    return encode_pos_tag_frequencies(np_pos, all_tags)
Ejemplo n.º 2
0
def nearest_trig_np(trig, sentences, all_tags, get_words=False):
    """
    @type trig: vpe_objects.Auxiliary
    @type sentences: vpe_objects.AllSentences
    """
    t = sentences.get_sentence_tree(trig.sentnum)
    tree_tuples = nt.pos_word_tuples(t)
    all_nps = nt.find_subtree_phrases(t, ['NP', 'NP-PRD'])

    trig_tup = (trig.pos, trig.word)
    trig_tup_idx = tree_tuples.index(trig_tup)

    closest_np_value = 99
    closest_np = None
    for NP in all_nps:
        last_np_word_idx = tree_tuples.index(nt.pos_word_tuples(NP)[-1])
        if abs(trig_tup_idx - last_np_word_idx) < closest_np_value:
            closest_np_value = trig_tup_idx - last_np_word_idx
            closest_np = NP
    if closest_np == None:
        closest_np = t

    if get_words:
        try:
            return closest_np.leaves()
        except AttributeError:
            return []

    np_pos = [
        subtree.label() for subtree in nt.getsmallestsubtrees(closest_np)
    ]

    return encode_pos_tag_frequencies(np_pos, all_tags)
Ejemplo n.º 3
0
def nearest_ant_np(ant, sentences, all_tags, get_words=False):
    """
    @type ant: vpe_objects.Antecedent
    @type sentences: vpe_objects.AllSentences
    """
    t = sentences.get_sentence_tree(ant.sentnum)
    tree_tuples = nt.pos_word_tuples(t)
    all_nps = nt.find_subtree_phrases(t, ['NP','NP-PRD'])

    ant_tup = (ant.sub_sentdict.pos[len(ant.sub_sentdict)/2], ant.sub_sentdict.words[len(ant.sub_sentdict)/2])
    if ant.sentnum != ant.trigger.sentnum:
        ant_tup_idx = len(tree_tuples)
    else:
        ant_tup_idx = tree_tuples.index(ant_tup)

    closest_np_value = 99
    closest_np = None
    for NP in all_nps:
        last_np_word_idx = tree_tuples.index(nt.pos_word_tuples(NP)[-1])
        if abs(ant_tup_idx - last_np_word_idx) < closest_np_value:
            closest_np_value = ant_tup_idx - last_np_word_idx
            closest_np = NP

    if get_words:
        try:
            return closest_np.leaves()
        except AttributeError:
            return []

    try:
        np_pos = [subtree.label() for subtree in nt.getsmallestsubtrees(closest_np)]
    except AttributeError:
        np_pos = []

    return encode_pos_tag_frequencies(np_pos, all_tags)
Ejemplo n.º 4
0
def nearest_ant_np(ant, sentences, all_tags, get_words=False):
    """
    @type ant: vpe_objects.Antecedent
    @type sentences: vpe_objects.AllSentences
    """
    t = sentences.get_sentence_tree(ant.sentnum)
    tree_tuples = nt.pos_word_tuples(t)
    all_nps = nt.find_subtree_phrases(t, ['NP', 'NP-PRD'])

    ant_tup = (ant.sub_sentdict.pos[len(ant.sub_sentdict) / 2],
               ant.sub_sentdict.words[len(ant.sub_sentdict) / 2])
    if ant.sentnum != ant.trigger.sentnum:
        ant_tup_idx = len(tree_tuples)
    else:
        ant_tup_idx = tree_tuples.index(ant_tup)

    closest_np_value = 99
    closest_np = None
    for NP in all_nps:
        last_np_word_idx = tree_tuples.index(nt.pos_word_tuples(NP)[-1])
        if abs(ant_tup_idx - last_np_word_idx) < closest_np_value:
            closest_np_value = ant_tup_idx - last_np_word_idx
            closest_np = NP

    if get_words:
        try:
            return closest_np.leaves()
        except AttributeError:
            return []

    try:
        np_pos = [
            subtree.label() for subtree in nt.getsmallestsubtrees(closest_np)
        ]
    except AttributeError:
        np_pos = []

    return encode_pos_tag_frequencies(np_pos, all_tags)