def merge( cls, cfg_node, sync_rule, left_sub_graph, # type: SubGraph right_sub_graph, # type: SubGraph ): """ :rtype: SubGraph """ # create concrete node and unify with external nodes of subgraphs nodes_mapping = {i: GraphNode() for i in sync_rule.hrg.rhs.nodes} external_nodes_map = {} left_name, left_edge = sync_rule.rhs[0] if left_edge is not None: assert len(left_sub_graph.external_nodes) == len(left_edge.nodes) external_nodes_map.update({ abstract_node: concrete_node for abstract_node, concrete_node in zip( left_edge.nodes, left_sub_graph.external_nodes) }) right_name, right_edge = sync_rule.rhs[1] if right_edge is not None: assert len(right_sub_graph.external_nodes) == len(right_edge.nodes) external_nodes_map.update({ abstract_node: concrete_node for abstract_node, concrete_node in zip( right_edge.nodes, right_sub_graph.external_nodes) }) nodes_mapping.update(external_nodes_map) # build new graph edges = frozenset( cls.transform_edge(nodes_mapping, edge) for edge in sync_rule.hrg.rhs.edges if edge != left_edge and edge != right_edge) non_terminals = [i for i in edges if not i.is_terminal] if non_terminals: raise Exception( "Non-terminals {} found by rule {} in node {}".format( non_terminals, sync_rule, cfg_node)) for new_edge in edges: if len(new_edge.nodes) == 1 and new_edge.span is None: new_edge.span = cfg_node.extra["DelphinSpan"] nodes = frozenset(nodes_mapping.values()) if left_edge is not None: edges |= left_sub_graph.graph.edges nodes |= left_sub_graph.graph.nodes if right_edge is not None: edges |= right_sub_graph.graph.edges nodes |= right_sub_graph.graph.nodes external_nodes = tuple(nodes_mapping[node] for node in sync_rule.hrg.lhs.nodes) sub_graph = HyperGraph(nodes, edges) return SubGraph(sub_graph, external_nodes)
def create_leaf_graph(cls, cfg_node, sync_rule): """ :rtype: SubGraph """ nodes_mapping = {i: GraphNode() for i in sync_rule.hrg.rhs.nodes} edges = frozenset( cls.transform_edge(nodes_mapping, edge) for edge in sync_rule.hrg.rhs.edges) for new_edge in edges: if len(new_edge.nodes) == 1: new_edge.span = cfg_node.extra["DelphinSpan"] sub_graph = HyperGraph(frozenset(nodes_mapping.values()), edges) external_nodes = tuple(nodes_mapping[node] for node in sync_rule.hrg.lhs.nodes) return SubGraph(sub_graph, external_nodes)
def extract( cls, edges, # type: Set[HyperEdge] internal_nodes, # type: Set[GraphNode] external_nodes, # type: Set[GraphNode] label, # type: str cfg_rule=None): nodes = internal_nodes.union(external_nodes) edge_by_node = defaultdict( list) # node -> (edge, index of this node in this edge) for edge in edges: for idx, node in enumerate(edge.nodes): edge_by_node[node].append((edge, idx)) default_hash = hashlib.md5(b"13").digest() node_hashes = {node: default_hash for node in nodes} # node -> hash def get_edge_hashes( node_hashes, # type: Dict[GraphNode, bytes] edge, # type: HyperEdge idx # type: int ): md5_obj = hashlib.md5((edge.label + "#" + str(idx)).encode()) for adj_node in edge.nodes: md5_obj.update(node_hashes[adj_node] + b"#") return md5_obj.digest() def get_sibling_hashes( node_hashes, # type: Dict[GraphNode, bytes] node # type: GraphNode ): md5_obj = hashlib.md5() edge_hashes = sorted( get_edge_hashes(node_hashes, edge, idx) for edge, idx in edge_by_node[node]) for h in edge_hashes: md5_obj.update(h) return md5_obj.digest() for cycle in range(10): new_node_hashes = {} # recalculate hashes for node in nodes: md5_obj = hashlib.md5() md5_obj.update(get_sibling_hashes(node_hashes, node)) md5_obj.update(b'\x01' if node in external_nodes else b'\x00') new_node_hashes[node] = md5_obj.digest() node_hashes = new_node_hashes nodes_in_order = sorted(node_hashes.items(), key=itemgetter(1)) node_rename_map = {} for node_idx, (node, hash_value) in enumerate(nodes_in_order): node_rename_map[node] = GraphNode(str(node_idx)) # get rhs new_edges = [] for edge in edges: new_edges.append( HyperEdge((node_rename_map[node] for node in edge.nodes), edge.label, edge.is_terminal)) rhs = HyperGraph(frozenset(node_rename_map.values()), frozenset(new_edges)) # determine external nodes permutation def get_external_nodes_permutation(): if len(external_nodes) == 2: for permutation in permutations(external_nodes): if any(edge.nodes == permutation for edge in edges): return [node_rename_map[i] for i in permutation] if cfg_rule is not None and len(cfg_rule.child) == 2: left_span = cfg_rule.child[0].span right_span = cfg_rule.child[1].span left_node = [ edge.nodes[0] for edge in edges if len(edge.nodes) == 1 and edge.span == left_span ] right_node = [ edge.nodes[0] for edge in edges if len(edge.nodes) == 1 and edge.span == right_span ] if left_node and right_node and { left_node[0], right_node[0] } == external_nodes: # print("Permutation rule 2 used") return [ node_rename_map[left_node[0]], node_rename_map[right_node[0]] ] return sorted((node_rename_map[i] for i in external_nodes), key=lambda x: int(x.name)) # get lhs lhs = HyperEdge(get_external_nodes_permutation(), label=label, is_terminal=False) return node_rename_map, cls(lhs, rhs)
def predict(self, trees, return_derivations=False): derivations = [] for idx, tree in enumerate(trees): sentence_interface = tree.to_sentence() self.populate_delphin_spans(tree) self.span_ebd_network.init_special() span_features = self.span_ebd_network.get_span_features( sentence_interface) r = [i for i in tree.root_first()] syn_rules = [] for i in tree.root_first(): correspondents = set(self.rule_lookup(i, False).items()) best_rule_getter = self.scorer_network.get_best_rule( span_features[i.span], correspondents, None) exprs = next(best_rule_getter) best_rule, this_loss, real_best_rule = next(best_rule_getter) syn_rules.append(best_rule) rule_mapping = dict(zip(r, syn_rules)) def transform_edge(mapping, edge, span): return HyperEdge((mapping[i] for i in edge.nodes), edge.label, edge.is_terminal, span) # deal wth root rule # create nodes in working graph nodes_mapping = { i: GraphNode() for i in syn_rules[0].hrg.rhs.nodes } # edge -> span span_mapping = {} for cfg_subnode, (name, edge) in zip(r[0], syn_rules[0].rhs): if edge is not None: span_mapping[edge] = cfg_subnode.extra["DelphinSpan"] # create edges in working graph new_edges = frozenset( transform_edge(nodes_mapping, edge, span_mapping.get(edge)) for edge in syn_rules[0].hrg.rhs.edges) for new_edge in new_edges: if len(new_edge.nodes) == 1 and new_edge.span is None: new_edge.span = r[0].extra["DelphinSpan"] step = 0 working_graph = HyperGraph(frozenset(nodes_mapping.values()), new_edges) derivations.append((working_graph, syn_rules[0])) queue = deque() if isinstance(tree.children[0], ConstTree): # add children nodes to queue for i, (_, j) in zip(tree.children, syn_rules[0].rhs): if j is not None: queue.append((i, rule_mapping[i], transform_edge(nodes_mapping, j, span_mapping.get(j)))) while queue: # each step substitute one nonteminal edge into subgraph, # and append child substitution into queue target_cfg_rule, target_sync_rule, target_edge = queue.popleft( ) assert target_edge in working_graph.edges target_nodes_mapping = dict( zip(target_sync_rule.hrg.lhs.nodes, target_edge.nodes)) for node in target_sync_rule.hrg.rhs.nodes: if node not in target_nodes_mapping.keys(): target_nodes_mapping[node] = GraphNode() # edge -> span span_mapping = {} for cfg_subnode, (name, edge) in zip(target_cfg_rule, target_sync_rule.rhs): if edge is not None: span_mapping[edge] = cfg_subnode.extra["DelphinSpan"] new_nodes = working_graph.nodes | frozenset( target_nodes_mapping.values()) new_edges_this_step = frozenset( transform_edge(target_nodes_mapping, edge, span_mapping.get(edge)) for edge in target_sync_rule.hrg.rhs.edges) new_edges = (working_graph.edges - {target_edge}) | new_edges_this_step for new_edge in new_edges_this_step: if len(new_edge.nodes) == 1 and new_edge.span is None: new_edge.span = target_cfg_rule.extra["DelphinSpan"] step += 1 working_graph = HyperGraph(new_nodes, new_edges) derivations.append((working_graph, target_sync_rule)) for i, (_, j) in zip(target_cfg_rule.children, target_sync_rule.rhs): if j is not None: queue.append((i, rule_mapping[i], transform_edge(target_nodes_mapping, j, span_mapping.get(j)))) if not return_derivations: yield tree.extra["ID"], working_graph else: yield tree.extra["ID"], working_graph, derivations dn.renew_cg()
def sync_grammar_fallback_2(self, tree_node): rule_name, main_node_count = tree_node.tag.rsplit("#", 1) word = tree_node.children[0].string main_node_count = int(main_node_count) if main_node_count == 1: main_node = GraphNode("0") surface = tree_node.children[0].string if self.pattern_number.match(surface): label = "card" elif rule_name.find("generic_proper") >= 0: label = "named" else: lemma = self.lemmatizer.lemmatize(word) if rule_name.find("n_-_c-pl-unk_le") >= 0: label = "_{}/nns_u_unknown".format(lemma) elif rule_name.find("n_-_mc_le") >= 0 or rule_name.find("n_-_c_le") >= 0: label = "_{}_n_1".format(lemma) # more number is used elif rule_name.find("generic_mass_count_noun") >= 0: label = "_{}/nn_u_unknown".format(lemma) # more number is used else: candidates = self.lexicon_mapping[HLexicon(word), main_node_count] if candidates: return candidates else: label = "named" old_edge = HyperEdge( nodes=[main_node], label=rule_name, is_terminal=False ) main_edge = HyperEdge( nodes=[main_node], label=label, is_terminal=True ) fallback = CFGRule(lhs=rule_name, rhs=((tree_node.children[0], None),), hrg=HRGRule( lhs=old_edge, rhs=HyperGraph( nodes=frozenset([main_node]), edges=frozenset({main_edge}) ) )) else: ret1 = self.terminal_mapping.get(tree_node.tag) if ret1: return Counter([ret1.most_common(1)[0][0]]) connected_nodes = [GraphNode(str(i)) for i in range(main_node_count)] centural_node = GraphNode(str(main_node_count + 1)) old_edge = HyperEdge( nodes=connected_nodes, label=rule_name, is_terminal=False ) main_edges = [HyperEdge( nodes=[centural_node, i], label="???", is_terminal=True ) for i in connected_nodes] fallback = CFGRule(lhs=rule_name, rhs=((tree_node.children[0], None),), hrg=HRGRule( lhs=old_edge, rhs=HyperGraph( nodes=frozenset(connected_nodes + [centural_node]), edges=frozenset(main_edges) ) )) return Counter([fallback])