def main(processes=1): ptb = Constants().ptb fileids = list(ptb.fileids()) params = [] for fileid in fileids[:10]: for sent_num, parse_tree in enumerate(ptb.parsed_sents(fileid)): params.append((fileid, sent_num, parse_tree)) if processes > 1: p = Pool(processes) p.starmap(score, sorted(params, key=lambda x: (x[0], x[1]))) else: for param in params: score(*param)
def trees_from_propbank( self, word_pos, single_anchor=True, verb_anchor=True, arg_num_match=True, gerund_match=True, by_phrase_match=True, ): tree_families = self.tree_families(word_pos) instance = Constants().propbank.get_instance( word_pos.wsj_entry.file_num, word_pos.wsj_entry.sent_num, word_pos.word ) trees = set() for tf in tree_families: for t in Constants().grammar.get_trees_from_tree_family(tf): # Only want to handle single anchors if single_anchor and len(t.anchor_positions()) > 1: continue # Only want to handle verbal anchors if verb_anchor and t.anchor_positions()[0].prefix() != "V": continue # Require same number of args if arg_num_match and len(t.subst_nodes()) != len(instance.numbered_args()): continue # Skip when one is Gerund and other isn't if gerund_match and word_pos.pos == "VBN" and "G" not in t.tree_name and "D" not in t.tree_name: continue if gerund_match and word_pos.pos != "VBN" and ("G" in t.tree_name or "D" in t.tree_name): continue # Skip by-phrases if no "by" in sentence if by_phrase_match and not word_pos.wsj_entry.by_in_word_list() and "by" in t.tree_name: continue trees.add(("V", t.tree_name)) return trees