예제 #1
0
    def rhs_to_hgraph(self):
        from common.cfg import NonterminalLabel
        from common.hgraph.hgraph import Hgraph
        nt_id_count = 0
        hgraph = Hgraph()

        for node in self.rhs.nodes:  # type: GraphNode
            label = ""
            try:
                ext_id = self.lhs.nodes.index(node)
            except ValueError:
                ext_id = None
            ident = "_" + node.name

            # Insert a node into the AMR
            ignoreme = hgraph[ident]  # Initialize dictionary for this node
            hgraph.node_to_concepts[ident] = label
            if ext_id is not None:
                if ident in hgraph.external_nodes and hgraph.external_nodes[
                        ident] != ext_id:
                    raise Exception(
                        "Incompatible external node IDs for node %s." % ident)
                hgraph.external_nodes[ident] = ext_id
                hgraph.rev_external_nodes[ext_id] = ident
            if ext_id == 0:
                hgraph.roots.append(ident)

        for edge in self.rhs.edges:  # type: HyperEdge
            hyperchild = tuple("_" + node.name for node in edge.nodes[1:])
            ident = "_" + edge.nodes[0].name
            if "_" not in edge.label and not edge.label.startswith("ARG") \
                    and not edge.label.startswith("BV"):
                # this is a nonterminal Edge
                new_edge = NonterminalLabel(edge.label)
                if not new_edge.index:
                    new_edge.index = "_%i" % nt_id_count
                    nt_id_count = nt_id_count + 1
            else:
                new_edge = edge.label

            hgraph._add_triple(ident, new_edge, hyperchild)

        return hgraph
예제 #2
0
파일: grammar.py 프로젝트: ChenluJi/bolinas
def parse_string(s):
    """
    Parse the RHS of a CFG rule.
    """
    tokens = s.strip().split()
    res = []
    nt_index = 0
    for t in tokens:
        if "$" in t: 
            new_token = NonterminalLabel.from_string(t)
            if not new_token.index:
                new_token.index = "_%i" % nt_index
                nt_index = nt_index + 1
        else: 
            new_token = t
        res.append(new_token)
    return res    
예제 #3
0
def parse_string(s):
    """
    Parse the RHS of a CFG rule.
    """
    tokens = s.strip().split()
    res = []
    nt_index = 0
    for t in tokens:
        if "$" in t:
            new_token = NonterminalLabel.from_string(t)
            if not new_token.index:
                new_token.index = "_%i" % nt_index
                nt_index = nt_index + 1
        else:
            new_token = t
        res.append(new_token)
    return res
예제 #4
0
def make_rule(frontier_pair, amr, tree, align, next_index):
    """
  Creates a new rule with the given parts, and collapses these parts in the
  original graph and tree.
  """

    constituent, amr_fragment = frontier_pair
    outside_edges = [
        e for e in amr.triples() if e not in amr_fragment.triples()
    ]

    root_label = amr_fragment.root_edges()[0][1]
    if isinstance(root_label, NonterminalLabel):
        symbol = root_label.label
        m = re.match(r'(.+)_(.+)_(\d+)', symbol)
        role = m.group(1)
    else:
        if ':' in root_label:
            role, concept = root_label.split(':')
        else:
            role = root_label

    external_nodes = amr.find_external_nodes(amr_fragment)
    if len(external_nodes) == 0:
        external_nodes = [amr_fragment.find_leaves()[0]]
    # WARNING: destructive. Unfortunately we can't make the change any earlier.
    # TODO why?
    amr_fragment.external_nodes = external_nodes

    symbol = '%s_%s_%d' % (role, constituent.node, len(external_nodes))
    label = NonterminalLabel(symbol, next_index)

    new_triple = (amr_fragment.roots[0], label, tuple(external_nodes))
    new_amr = amr.collapse_fragment(amr_fragment, label)
    assert new_triple in new_amr.triples()
    new_tree = collapse_constituent(tree, constituent, label)
    new_alignments = collapse_alignments(align, amr_fragment, new_triple)

    rule = Rule(0,
                symbol,
                1,
                amr_fragment,
                constituent,
                original_index=next_index)

    return rule, new_amr, new_tree, new_alignments, next_index + 1
예제 #5
0
 def pop_and_transition():
     # Create all edges in a group from the stack, attach them to the 
     # graph and then transition to the appropriate state in the FSA
     edges = []
     while stack[-1][0] != PNODE: # Pop all edges
         children = []
         while stack[-1][0] == CNODE: # Pop all nodes in hyperedge
             itemtype, node = stack.pop()
             insert_node(node) 
             children.append(node)
         assert stack[-1][0] == EDGE 
         itemtype, edgelabel = stack.pop()
         edges.append((edgelabel, children))
       
     # Construct the hyperedge 
     itemtype, parentnode = stack.pop()
     for edgelabel, children in edges: 
         hypertarget = [] # build hyperedge tail 
         for ident, label, ext_id in children:
             hypertarget.append(ident) 
         hypertarget.reverse()
         hyperchild = tuple(hypertarget)    
         
         if "$" in edgelabel: # this is a nonterminal Edge 
             new_edge = NonterminalLabel.from_string(edgelabel)
             if not new_edge.index:
                 new_edge.index = "_%i" %self.nt_id_count
                 self.nt_id_count = self.nt_id_count + 1
         else: 
             new_edge = edgelabel
         ident, label, ext_id = parentnode
         hgraph._add_triple(ident, new_edge, hyperchild) 
        
     if stack:
         insert_node(parentnode)
         stack.append((CNODE, parentnode))
         state = 4
     else:    
         insert_node(parentnode, root = True)
         state = 5
        def pop_and_transition():
            # Create all edges in a group from the stack, attach them to the
            # graph and then transition to the appropriate state in the FSA
            edges = []
            while stack[-1][0] != PNODE:  # Pop all edges
                children = []
                while stack[-1][0] == CNODE:  # Pop all nodes in hyperedge
                    itemtype, node = stack.pop()
                    insert_node(node)
                    children.append(node)
                assert stack[-1][0] == EDGE
                itemtype, edgelabel = stack.pop()
                edges.append((edgelabel, children))

            # Construct the hyperedge
            itemtype, parentnode = stack.pop()
            for edgelabel, children in edges:
                hypertarget = []  # build hyperedge tail
                for ident, label, ext_id in children:
                    hypertarget.append(ident)
                hypertarget.reverse()
                hyperchild = tuple(hypertarget)

                if "$" in edgelabel:  # this is a nonterminal Edge
                    new_edge = NonterminalLabel.from_string(edgelabel)
                    if not new_edge.index:
                        new_edge.index = "_%i" % self.nt_id_count
                        self.nt_id_count = self.nt_id_count + 1
                else:
                    new_edge = edgelabel
                ident, label, ext_id = parentnode
                hgraph._add_triple(ident, new_edge, hyperchild)

            if stack:
                insert_node(parentnode)
                stack.append((CNODE, parentnode))
                state = 4
            else:
                insert_node(parentnode, root=True)
                state = 5
예제 #7
0
        def convert_chart(partition, external_nodes, nt, first=False):
            nt = NonterminalLabel(nt.label)  # Get rid of the index

            if partition in seen:
                node = seen[partition]
                result.use_counts[node] += 1
                return node

            leaves = chart.tree.leaves()

            edges_in_partition = [
                graph_edge_list[i] for i in range(len(partition.edges))
                if partition.edges[i] == 1
            ]

            if not partition in chart:  # leaf

                graph = Hgraph.from_triples(edges_in_partition, {}, warn=False)
                graph.roots = graph.find_roots()
                graph.roots.sort(lambda x, y: node_order[x] - node_order[y])
                graph.external_nodes = external_nodes
                str_rhs = [
                    leaves[i]
                    for i in range(partition.str_start, partition.str_end + 1)
                ]
                rule = Rule(0, nt.label, graph, tuple(str_rhs), 1)
                rule_id = self.add_rule(rule)
                fragment = fragment_counter[0]
                result[fragment] = [(rule_id, [])]
                result.use_counts[fragment] += 1
                seen[partition] = fragment
                fragment_counter[0] += 1
                return fragment

            poss = []
            count = 0
            for possibility in chart[partition]:
                count += 1
                partition_graph = Hgraph.from_triples(
                    edges_in_partition, {},
                    warn=False)  # This is the parent graph
                partition_graph.roots = partition_graph.find_roots()
                partition_graph.roots.sort(
                    lambda x, y: node_order[x] - node_order[y])
                partition_graph.external_nodes = external_nodes
                children = []
                #print partition_graph.to_amr_string()

                spans_to_nt = {}
                old_pgraph = partition_graph

                index = 1
                for subpartition in possibility:  #These are the different sub-constituents

                    edges_in_subpartition = [
                        graph_edge_list[i]
                        for i in range(len(subpartition.edges))
                        if subpartition.edges[i] == 1
                    ]
                    if edges_in_subpartition:  # Some constituents do not have any edges aligned to them
                        sub_graph = Hgraph.from_triples(edges_in_subpartition,
                                                        {},
                                                        warn=False)
                        sub_graph.roots = sub_graph.find_roots()
                        sub_graph.roots.sort(
                            lambda x, y: node_order[x] - node_order[y])
                        external_node_list = partition_graph.find_external_nodes2(
                            sub_graph)
                        external_node_list.sort(
                            lambda x, y: node_order[x] - node_order[y])
                        sub_external_nodes = dict([
                            (k, v) for v, k in enumerate(external_node_list)
                        ])
                        sub_graph.external_nodes = sub_external_nodes
                        sub_nt = NonterminalLabel(
                            "%s%i" %
                            (subpartition.phrase, len(sub_external_nodes)),
                            index)
                        children.append(
                            convert_chart(subpartition, sub_external_nodes,
                                          sub_nt))  # Recursive call
                        old_pgraph = partition_graph
                        partition_graph = partition_graph.collapse_fragment2(
                            sub_graph,
                            sub_nt,
                            external=external_node_list,
                            warn=False)

                        spans_to_nt[subpartition.str_start] = (
                            sub_nt, subpartition.str_end)
                    else:
                        sub_nt = NonterminalLabel(subpartition.phrase, index)

                    #assert partition_graph.is_connected()
                    index += 1

                partition_graph.roots = partition_graph.find_roots()
                partition_graph.roots.sort(
                    lambda x, y: node_order[x] - node_order[y])

                # Assemble String rule
                str_rhs = []
                i = partition.str_start
                while i <= partition.str_end:
                    if i in spans_to_nt:
                        new_nt, i = spans_to_nt[i]
                        str_rhs.append(new_nt)
                    else:
                        str_rhs.append(leaves[i])
                    i = i + 1

                rule = Rule(0, nt.label, partition_graph, tuple(str_rhs), 1)
                rule_id = self.add_rule(rule)

                poss.append((rule_id, children))

            fragment = fragment_counter[0]
            result[fragment] = poss
            result.use_counts[fragment] += 1
            seen[partition] = fragment
            fragment_counter[0] += 1
            return fragment