def generate_nonexp_relations(self, article): for para in article.paragraphs: for s1, s2 in zip(para.sentences[:-1], para.sentences[1:]): if not article.has_exp_relation(s1.id): # TODO: Add detail implementation rel = Relation() rel.article = article rel.doc_id = article.id rel.arg1s['parsed'] = [s1.tree.root ] if not s1.tree.is_null() else [] rel.arg1_leaves = self.remove_leading_tailing_punc( s1.leaves) rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves] rel.arg1_sid = rel.arg1_leaves[-1].goto_tree( ).sent_id if len(rel.arg1_leaves) > 0 else -1 rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves) rel.arg2s['parsed'] = [s2.tree.root ] if not s2.tree.is_null() else [] rel.arg2_leaves = self.remove_leading_tailing_punc( s2.leaves) rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves] rel.arg2_sid = rel.arg2_leaves[0].goto_tree( ).sent_id if len(rel.arg2_leaves) > 0 else -1 rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves) article.nonexp_relations.append(rel)
def generate_nonexp_relations(self, article): for s1, s2 in zip(article.sentences[:-1], article.sentences[1:]): if not article.has_exp_inter_relation(s1.id): # TODO: Add detail implementation rel = Relation() rel.article = article rel.doc_id = article.id rel.arg1s['parsed'] = [s1.tree.root] if not s1.tree.is_null() else [] rel.arg1_leaves = self.remove_leading_tailing_punc(s1.leaves) rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves] rel.arg1_sid = rel.arg1_leaves[-1].goto_tree().sent_id if len(rel.arg1_leaves) > 0 else -1 rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves) rel.arg2s['parsed'] = [s2.tree.root] if not s2.tree.is_null() else [] rel.arg2_leaves = self.remove_leading_tailing_punc(s2.leaves) rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves] rel.arg2_sid = rel.arg2_leaves[0].goto_tree().sent_id if len(rel.arg2_leaves) > 0 else -1 rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves) article.nonexp_relations.append(rel) # sentence intra nonexp relation for sen in article.sentences: tree = sen.tree if len(sen.clauses) <= 1 : continue for c1, c2 in zip(sen.clauses[:-1], sen.clauses[1:]): if not article.has_exp_intra_relation(sen.id): rel = Relation() rel.article = article rel.doc_id = article.id rel.arg1s['parsed'] = tree.find_subtrees(c1) rel.arg1_leaves = self.remove_leading_tailing_punc(c1) rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves] rel.arg1_sid = sen.id rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves) rel.arg2s['parsed'] = tree.find_subtrees(c2) rel.arg2_leaves = self.remove_leading_tailing_punc(c2) rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves] rel.arg2_sid = sen.id rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves) article.nonexp_relations.append(rel)
def prepare_data(self, parse_path, rel_path, which, to_file): rel_dict = Corpus.read_relations(rel_path) articles = [] dist = defaultdict(int) for art in Corpus.read_parses(parse_path, rel_dict): articles.append(art) for rel in art.relations: rel.article = art rel.get_arg_leaves() if rel.rel_type == 'Explicit': continue labels = {s.replace(' ', '_') for s in rel.sense} for l in labels: dist[l] += 1 if which == 'test': labels = ['|'.join(labels)] self.print_features(rel, labels, to_file) # add NoRel relations for art in articles: for s1, s2 in zip(art.sentences[:-1], art.sentences[1:]): if not art.has_inter_relation(s1.id): rel = Relation() rel.article = art rel.doc_id = art.id rel.arg1s['parsed'] = [s1.tree.root ] if not s1.tree.is_null() else [] rel.arg1_leaves = self.remove_leading_tailing_punc( s1.leaves) rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves] rel.arg1_sid = rel.arg1_leaves[-1].goto_tree( ).sent_id if len(rel.arg1_leaves) > 0 else -1 rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves) rel.arg2s['parsed'] = [s2.tree.root ] if not s2.tree.is_null() else [] rel.arg2_leaves = self.remove_leading_tailing_punc( s2.leaves) rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves] rel.arg2_sid = rel.arg2_leaves[0].goto_tree( ).sent_id if len(rel.arg2_leaves) > 0 else -1 rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves) self.print_features(rel, ['NoRel'], to_file)
def generate_nonexp_relations(self, article): for para in article.paragraphs: for s1, s2 in zip(para.sentences[:-1], para.sentences[1:]): if not article.has_exp_relation(s1.id): # TODO: Add detail implementation rel = Relation() rel.article = article rel.doc_id = article.id rel.arg1s['parsed'] = [s1.tree.root] if not s1.tree.is_null() else [] rel.arg1_leaves = self.remove_leading_tailing_punc(s1.leaves) rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves] rel.arg1_sid = rel.arg1_leaves[-1].goto_tree().sent_id if len(rel.arg1_leaves) > 0 else -1 rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves) rel.arg2s['parsed'] = [s2.tree.root] if not s2.tree.is_null() else [] rel.arg2_leaves = self.remove_leading_tailing_punc(s2.leaves) rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves] rel.arg2_sid = rel.arg2_leaves[0].goto_tree().sent_id if len(rel.arg2_leaves) > 0 else -1 rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves) article.nonexp_relations.append(rel)
def prepare_data(self, parse_path, rel_path, which, to_file): rel_dict = Corpus.read_relations(rel_path) articles = [] dist = defaultdict(int) for art in Corpus.read_parses(parse_path, rel_dict): articles.append(art) for rel in art.relations: rel.article = art rel.get_arg_leaves() if rel.rel_type == 'Explicit': continue labels = {s.replace(' ','_') for s in rel.sense} for l in labels: dist[l] += 1 if which == 'test': labels = ['|'.join(labels)] self.print_features(rel, labels, to_file) # add NoRel relations for art in articles: for s1, s2 in zip(art.sentences[:-1], art.sentences[1:]): if not art.has_inter_relation(s1.id): rel = Relation() rel.article = art rel.doc_id = art.id rel.arg1s['parsed'] = [s1.tree.root] if not s1.tree.is_null() else [] rel.arg1_leaves = self.remove_leading_tailing_punc(s1.leaves) rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves] rel.arg1_sid = rel.arg1_leaves[-1].goto_tree().sent_id if len(rel.arg1_leaves) > 0 else -1 rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves) rel.arg2s['parsed'] = [s2.tree.root] if not s2.tree.is_null() else [] rel.arg2_leaves = self.remove_leading_tailing_punc(s2.leaves) rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves] rel.arg2_sid = rel.arg2_leaves[0].goto_tree().sent_id if len(rel.arg2_leaves) > 0 else -1 rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves) self.print_features(rel, ['NoRel'], to_file)