Esempio n. 1
0
def main(processes=1):
    ptb = Constants().ptb
    fileids = list(ptb.fileids())

    params = []
    for fileid in fileids[:10]:
        for sent_num, parse_tree in enumerate(ptb.parsed_sents(fileid)):
            params.append((fileid, sent_num, parse_tree))

    if processes > 1:
        p = Pool(processes)
        p.starmap(score, sorted(params, key=lambda x: (x[0], x[1])))
    else:
        for param in params:
            score(*param)
Esempio n. 2
0
    def trees_from_propbank(
        self,
        word_pos,
        single_anchor=True,
        verb_anchor=True,
        arg_num_match=True,
        gerund_match=True,
        by_phrase_match=True,
    ):

        tree_families = self.tree_families(word_pos)
        instance = Constants().propbank.get_instance(
            word_pos.wsj_entry.file_num, word_pos.wsj_entry.sent_num, word_pos.word
        )
        trees = set()
        for tf in tree_families:
            for t in Constants().grammar.get_trees_from_tree_family(tf):

                # Only want to handle single anchors
                if single_anchor and len(t.anchor_positions()) > 1:
                    continue

                # Only want to handle verbal anchors
                if verb_anchor and t.anchor_positions()[0].prefix() != "V":
                    continue

                # Require same number of args
                if arg_num_match and len(t.subst_nodes()) != len(instance.numbered_args()):
                    continue

                # Skip when one is Gerund and other isn't
                if gerund_match and word_pos.pos == "VBN" and "G" not in t.tree_name and "D" not in t.tree_name:
                    continue
                if gerund_match and word_pos.pos != "VBN" and ("G" in t.tree_name or "D" in t.tree_name):
                    continue

                # Skip by-phrases if no "by" in sentence
                if by_phrase_match and not word_pos.wsj_entry.by_in_word_list() and "by" in t.tree_name:
                    continue

                trees.add(("V", t.tree_name))
        return trees