def initialization_feature(instance, use_gold=False): instance['relation'] = Relation.from_json(instance['relation_json']) relation = instance['relation'] instance['arg1_parse'] = json.loads(instance['arg1_parse_json']) instance['arg2_parse'] = json.loads(instance['arg2_parse_json']) if instance['arg1_parse']['dependency_tree'] is not None: instance['arg1_dependency_tree'] = DependencyGraph.from_dict(instance['arg1_parse']['dependency_tree']) else: instance['arg1_dependency_tree'] = None if instance['arg2_parse']['dependency_tree'] is not None: instance['arg2_dependency_tree'] = DependencyGraph.from_dict(instance['arg2_parse']['dependency_tree']) else: instance['arg2_dependency_tree'] = None instance['parse_tree1'] = Tree(instance['arg1_parse']['parsetree']) instance['parse_tree2'] = Tree(instance['arg2_parse']['parsetree']) instance['arg1_token_indices'] = \ get_indices_from_span_ranges(relation.arg1.text_span_ranges, instance['arg1_parse']) instance['arg2_token_indices'] = \ get_indices_from_span_ranges(relation.arg2.text_span_ranges, instance['arg2_parse']) instance['sentence1_tokens'] = [x[0] for x in instance['arg1_parse']['words']] instance['sentence2_tokens'] = [x[0] for x in instance['arg2_parse']['words']] instance['arg1_majority_head_verb_index'] = \ _get_majority_head_verb_index(instance['arg1_parse'], instance['arg1_token_indices']) instance['arg2_majority_head_verb_index'] = \ _get_majority_head_verb_index(instance['arg2_parse'], instance['arg2_token_indices']) return []
def convert_dependencies_to_graph(dependencies): graph = DependencyGraph() root_node = None assert (len(dependencies) > 0) #Try to find root for dep in dependencies: if dep[0] == 'root': root_node = Node(dep[1]) first_real_node = Node(dep[2]) root_edge = Edge(root_node, first_real_node, dep[0]) root_node.edges.append(root_edge) graph.root_node = root_node graph.add_node(root_node) graph.add_node(first_real_node) graph.add_edge(root_edge) break if root_node is not None: _construct_subgraph(dependencies, first_real_node, graph) return graph
def convert_dependencies_to_graph(dependencies): graph = DependencyGraph() root_node = None assert(len(dependencies) > 0) #Try to find root for dep in dependencies: if dep[0] == 'root': root_node = Node(dep[1]) first_real_node = Node(dep[2]) root_edge = Edge(root_node, first_real_node, dep[0]) root_node.edges.append(root_edge) graph.root_node = root_node graph.add_node(root_node) graph.add_node(first_real_node) graph.add_edge(root_edge) break if root_node is not None: _construct_subgraph(dependencies, first_real_node, graph) return graph
return graph def _construct_subgraph(dependencies, root_node, graph): for dep in dependencies: if dep[1] == root_node.token: new_node = Node(dep[2]) new_edge = Edge(root_node, new_node, dep[0]) root_node.edges.append(new_edge) graph.add_node(new_node) graph.add_edge(new_edge) _construct_subgraph(dependencies, new_node, graph) import json from tpl.misc import TJsonEncoder if __name__ == '__main__': """For testing purposes""" from tpl.language.stanford_parser import Parser parser = Parser() parse_result = parser.parse( 'Barack Obama, who you talked to yesterday, play basketball.') sentence = parse_result['sentences'][0] split_clauses_from_sentence(sentence) print json.dumps(sentence, indent=2, cls=TJsonEncoder) print json.dumps(sentence['dependency_tree'].to_dict(), indent=2) dgraph = DependencyGraph.from_dict(sentence['dependency_tree'].to_dict()) print json.dumps(dgraph.to_dict(), indent=2) print dgraph.to_dict() == DependencyGraph.from_dict( dgraph.to_dict()).to_dict()
graph.add_edge(root_edge) break if root_node is not None: _construct_subgraph(dependencies, first_real_node, graph) return graph def _construct_subgraph(dependencies, root_node, graph): for dep in dependencies: if dep[1] == root_node.token: new_node = Node(dep[2]) new_edge = Edge(root_node, new_node, dep[0]) root_node.edges.append(new_edge) graph.add_node(new_node) graph.add_edge(new_edge) _construct_subgraph(dependencies, new_node, graph) import json from tpl.misc import TJsonEncoder if __name__ == '__main__': """For testing purposes""" from tpl.language.stanford_parser import Parser parser = Parser() parse_result = parser.parse('Barack Obama, who you talked to yesterday, play basketball.') sentence = parse_result['sentences'][0] split_clauses_from_sentence(sentence) print json.dumps(sentence, indent=2, cls=TJsonEncoder) print json.dumps(sentence['dependency_tree'].to_dict(), indent=2) dgraph = DependencyGraph.from_dict(sentence['dependency_tree'].to_dict()) print json.dumps(dgraph.to_dict(), indent=2) print dgraph.to_dict() == DependencyGraph.from_dict(dgraph.to_dict()).to_dict()