コード例 #1
0
def answer_where(s):
    parsed_s = tree_parser.sent_to_tree(s)
    pps = tree_parser.get_phrases(parsed_s, "PP", False, True)
    for pp in pps:
        sent_pp = tree_parser.tree_to_sent(pp)
        tagged_pp = tagger.tag(nltk.tokenize.word_tokenize(sent_pp))
        for tup in tagged_pp:
            if tup[1] == "LOCATION" or tup[1] == "ORGANIZATION":
                return sent_pp.strip()
    return ""
コード例 #2
0
def answer_non_definitions(s, main_nps):
    if is_definition(s):
        return answer_definitions(s, main_nps)
    if len(main_nps) == 0:
        return ""
    main_np = tree_parser.tree_to_sent(main_nps[0])
    parsed_s = tree_parser.sent_to_tree(s)
    vps = tree_parser.get_phrases(parsed_s, "VP", True, True)
    if len(vps) > 0:
        for vp in vps:
            if vp.label() != "VBN":
                main_vp = vp
                break
    else:
        return ""

    verb = get_main_verb(main_vp)

    candidates = s.split(" "+verb)

    if len(candidates) > 1:
        # if main_np in candidates[1]:
        if is_overlap(main_np, candidates[1]):
            ans_tree = tree_parser.sent_to_tree(candidates[1])
            s_nps = tree_parser.get_phrases(ans_tree, "NP", True, False)
            if len(s_nps) > 0:
                return tree_parser.tree_to_sent(s_nps[0])
            else:
                return candidates[0]
        else:
            ans_tree = tree_parser.sent_to_tree(candidates[0])
            s_nps = tree_parser.get_phrases(ans_tree, "NP", True, False)
            if len(s_nps) > 0:
                return tree_parser.tree_to_sent(s_nps[0])
            else:
                return candidates[1]
    else:
        return ""
コード例 #3
0
def answer_when(s):
    parsed_s = tree_parser.sent_to_tree(s)
    pps = tree_parser.get_phrases(parsed_s, "PP", False, True)
    for pp in pps:
        sent_pp = tree_parser.tree_to_sent(pp)
        tagged_pp = tagger.tag(nltk.tokenize.word_tokenize(sent_pp))
        for tup in tagged_pp:
            if tup[1] == "DATE" or tup[1] == "TIME":
                return sent_pp.strip()
    tagged_sent = tagger.tag(nltk.tokenize.word_tokenize(s))
    ans = ""
    for i in xrange(0, len(tagged_sent)):
        tup = tagged_sent[i]
        if tup[1] == "DATE" or tup[1] == "TIME":
            j = i
            while tagged_sent[j][1] == "DATE" or tagged_sent[j][1] == "TIME":
                ans += tagged_sent[j][0] + " "
                j += 1
            return ans.strip()
    return ""