Beispiel #1
0
    def parse_bitext(self, obj1, obj2):
        """
      Parse a single pair of objects (two strings, two graphs, or string/graph).
      """
        rhs1type, rhs2type = self.grammar.rhs1_type, self.grammar.rhs2_type
        assert rhs1type in ["string", "hypergraph"] and rhs2type in ["string", "hypergraph"]

        # Remember size of input objects and figure out Item subclass
        if rhs1type == "string":
            obj1size = len(obj1)
        elif rhs1type == "hypergraph":
            obj1size = len(obj1.triples())
        if rhs2type == "string":
            obj2size = len(obj2)
        elif rhs2type == "hypergraph":
            obj2size = len(obj2.triples())
        grammar = self.grammar
        start_time = time.clock()
        log.chatter("parse...")

        # initialize data structures and lookups
        # we use various tables to provide constant-time lookup of fragments available
        # for shifting, completion, etc.
        chart = ddict(set)

        # TODO: command line filter to switch rule filter on/off
        pgrammar = [grammar[r] for r in grammar.reachable_rules(obj1, obj2)]  # grammar.values()
        queue = deque()  # the items left to be visited
        pending = set()  # a copy of queue with constant-time lookup
        attempted = set()  # a cache of previously-attempted item combinations
        visited = set()  # a cache of already-visited items
        nonterminal_lookup = ddict(set)  # a mapping from labels to graph edges
        reverse_lookup = ddict(set)  # a mapping from outside symbols to open items

        # mapping from words to string indices for each string
        word_terminal_lookup1 = ddict(set)
        word_terminal_lookup2 = ddict(set)

        if rhs1type == "string":
            for i in range(len(obj1)):
                word_terminal_lookup1[obj1[i]].add(i)

        if rhs2type == "string":
            for i in range(len(obj2)):
                word_terminal_lookup2[obj2[i]].add(i)

        # mapping from edge labels to graph edges for each graph
        edge_terminal_lookup1 = ddict(set)
        edge_terminal_lookup2 = ddict(set)

        if rhs1type == "hypergraph":
            for edge in obj1.triples(nodelabels=self.nodelabels):
                edge_terminal_lookup1[edge[1]].add(edge)

        if rhs2type == "hypergraph":
            for edge in obj2.triples(nodelabels=self.nodelabels):
                edge_terminal_lookup2[edge[1]].add(edge)

        for rule in pgrammar:
            item1class = CfgItem if rhs1type == "string" else HergItem
            item2class = CfgItem if rhs2type == "string" else HergItem
            axiom = SynchronousItem(rule, item1class, item2class, nodelabels=self.nodelabels)
            queue.append(axiom)
            pending.add(axiom)
            if axiom.outside_is_nonterminal:
                reverse_lookup[axiom.outside_symbol].add(axiom)

        # keep track of whether we found any complete derivation
        success = False

        # parse
        while queue:
            item = queue.popleft()
            pending.remove(item)
            visited.add(item)
            log.debug("handling", item)

            if item.closed:
                log.debug("  is closed.")
                # check if it's a complete derivation
                if self.successful_biparse(obj1, obj2, item, obj1size, obj2size):
                    chart["START"].add((item,))
                    success = True

                # add to nonterminal lookup
                nonterminal_lookup[item.rule.symbol].add(item)

                # wake up any containing rules
                # Unlike in ordinary state-space search, it's possible that we will have
                # to re-visit items which couldn't be merged with anything the first time
                # we saw them, and are waiting for the current item. The reverse_lookup
                # indexes all items by their outside symbol, so we re-append to the queue
                # all items looking for something with the current item's symbol.
                for ritem in reverse_lookup[item.rule.symbol]:
                    if ritem not in pending:
                        queue.append(ritem)
                        pending.add(ritem)

            else:
                if item.outside_is_nonterminal:
                    # complete
                    reverse_lookup[item.outside_symbol].add(item)

                    for oitem in nonterminal_lookup[item.outside_symbol]:
                        log.debug("  oitem:", oitem)
                        if (item, oitem) in attempted:
                            # don't repeat combinations we've tried before
                            continue
                        attempted.add((item, oitem))
                        if not item.can_complete(oitem):
                            log.debug("    fail")
                            continue
                        log.debug("    ok")
                        nitem = item.complete(oitem)
                        chart[nitem].add((item, oitem))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

                else:
                    # shift ; this depends on the configuration (string/graph -> string/graph)
                    if not item.outside1_is_nonterminal and not item.item1.closed:
                        if rhs1type == "string":
                            new_items = [
                                item.shift_word1(item.outside_object1, index)
                                for index in word_terminal_lookup1[item.outside_object1]
                                if item.can_shift_word1(item.outside_object1, index)
                            ]
                        else:
                            assert rhs1type is "hypergraph"
                            new_items = [
                                item.shift_edge1(edge)
                                for edge in edge_terminal_lookup1[item.outside_object1]
                                if item.can_shift_edge1(edge)
                            ]
                    else:
                        assert not item.outside2_is_nonterminal  # Otherwise shift would not be called
                        if rhs2type == "string":
                            new_items = [
                                item.shift_word2(item.outside_object2, index)
                                for index in word_terminal_lookup2[item.outside_object2]
                                if item.can_shift_word2(item.outside_object2, index)
                            ]
                        else:
                            assert rhs2type is "hypergraph"
                            new_items = [
                                item.shift_edge2(edge)
                                for edge in edge_terminal_lookup2[item.outside_object2]
                                if item.can_shift_edge2(edge)
                            ]

                    for nitem in new_items:
                        log.debug("  shift", nitem, nitem.shifted)
                        chart[nitem].add((item,))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

        if success:
            log.chatter("  success!")
        etime = time.clock() - start_time
        log.chatter("done in %.2fs" % etime)

        # TODO return partial chart
        return chart
Beispiel #2
0
    def parse(self, string, graph):
        """
      Parses the given string and/or graph.
      """

        # This is a long function, so let's start with a high-level overview. This is
        # a "deductive-proof-style" parser: We begin with one "axiomatic" chart item
        # for each rule, and combine these items with each other and with fragments of
        # the object(s) being parsed to deduce new items. We can think of these items
        # as defining a search space in which we need to find a path to the goal item.
        # The parser implemented here performs a BFS of this search space.

        grammar = self.grammar

        # remember when we started
        start_time = time.clock()
        log.chatter("parse...")

        # specify what kind of items we're working with
        if string and graph:
            axiom_class = CfgHergItem
        elif string:
            axiom_class = CfgItem
        else:
            axiom_class = HergItem

        # remember the size of the example
        if string:
            string_size = len(string)
        else:
            string_size = -1
        if graph:
            graph_size = len(graph.triples(nodelabels=self.nodelabels))
        else:
            graph_size = -1

        # initialize data structures and lookups
        # we use various tables to provide constant-time lookup of fragments available
        # for shifting, completion, etc.
        chart = ddict(set)

        # TODO: Command line option to switch grammar filter on/off
        if string:
            pgrammar = [grammar[r] for r in grammar.reachable_rules(string, None)]  # grammar.values()
        if graph:
            pgrammar = [grammar[r] for r in grammar.reachable_rules(graph, None)]  # grammar.values()

        queue = deque()  # the items left to be visited
        pending = set()  # a copy of queue with constant-time lookup
        attempted = set()  # a cache of previously-attempted item combinations
        visited = set()  # a cache of already-visited items
        word_terminal_lookup = ddict(set)
        nonterminal_lookup = ddict(set)  # a mapping from labels to graph edges
        reverse_lookup = ddict(set)  # a mapping from outside symbols open items
        if string:
            word_terminal_lookup = ddict(set)  # mapping from words to string indices
            for i in range(len(string)):
                word_terminal_lookup[string[i]].add(i)
        if graph:
            edge_terminal_lookup = ddict(set)  # mapping from edge labels to graph edges
            for edge in graph.triples(nodelabels=self.nodelabels):
                edge_terminal_lookup[edge[1]].add(edge)
        for rule in pgrammar:
            axiom = axiom_class(rule, nodelabels=self.nodelabels)
            queue.append(axiom)
            pending.add(axiom)
            if axiom.outside_is_nonterminal:
                reverse_lookup[axiom.outside_symbol].add(axiom)

        # keep track of whether we found any complete derivation
        success = False

        # parse
        while queue:
            item = queue.popleft()
            pending.remove(item)
            visited.add(item)
            log.debug("handling", item)

            if item.closed:
                log.debug("  is closed.")
                # check if it's a complete derivation
                if self.successful_parse(string, graph, item, string_size, graph_size):
                    chart["START"].add((item,))
                    success = True

                # add to nonterminal lookup
                nonterminal_lookup[item.rule.symbol].add(item)

                # wake up any containing rules
                # Unlike in ordinary state-space search, it's possible that we will have
                # to re-visit items which couldn't be merged with anything the first time
                # we saw them, and are waiting for the current item. The reverse_lookup
                # indexes all items by their outside symbol, so we re-append to the queue
                # all items looking for something with the current item's symbol.
                for ritem in reverse_lookup[item.rule.symbol]:
                    if ritem not in pending:
                        queue.append(ritem)
                        pending.add(ritem)

            else:
                if item.outside_is_nonterminal:
                    # complete
                    reverse_lookup[item.outside_symbol].add(item)

                    for oitem in nonterminal_lookup[item.outside_symbol]:
                        log.debug("  oitem:", oitem)
                        if (item, oitem) in attempted:
                            # don't repeat combinations we've tried before
                            continue
                        attempted.add((item, oitem))
                        if not item.can_complete(oitem):
                            log.debug("    fail")
                            continue
                        log.debug("    ok")
                        nitem = item.complete(oitem)
                        chart[nitem].add((item, oitem))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

                else:
                    # shift
                    if string and graph:
                        if not item.outside_word_is_nonterminal:
                            new_items = [
                                item.shift_word(item.outside_word, index)
                                for index in word_terminal_lookup[item.outside_word]
                                if item.can_shift_word(item.outside_word, index)
                            ]
                        else:
                            assert not item.outside_edge_is_nonterminal
                            new_items = [
                                item.shift_edge(edge)
                                for edge in edge_terminal_lookup[item.outside_edge]
                                if item.can_shift_edge(edge)
                            ]
                    elif string:
                        new_items = [
                            item.shift(item.outside_word, index)
                            for index in word_terminal_lookup[item.outside_word]
                            if item.can_shift(item.outside_word, index)
                        ]
                    else:
                        assert graph
                        new_items = [
                            item.shift(edge) for edge in edge_terminal_lookup[item.outside_edge] if item.can_shift(edge)
                        ]

                    for nitem in new_items:
                        log.debug("  shift", nitem, nitem.shifted)
                        chart[nitem].add((item,))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

        if success:
            log.chatter("  success!")
        etime = time.clock() - start_time
        log.chatter("done in %.2fs" % etime)

        # TODO return partial chart
        return chart
Beispiel #3
0
    def parse_bitext(self, obj1, obj2):
        """
      Parse a single pair of objects (two strings, two graphs, or string/graph).
      """
        rhs1type, rhs2type = self.grammar.rhs1_type, self.grammar.rhs2_type
        assert rhs1type in ["string", "hypergraph"
                            ] and rhs2type in ["string", "hypergraph"]

        # Remember size of input objects and figure out Item subclass
        if rhs1type == "string":
            obj1size = len(obj1)
        elif rhs1type == "hypergraph":
            obj1size = len(obj1.triples())
        if rhs2type == "string":
            obj2size = len(obj2)
        elif rhs2type == "hypergraph":
            obj2size = len(obj2.triples())
        grammar = self.grammar
        start_time = time.clock()
        log.chatter('parse...')

        # initialize data structures and lookups
        # we use various tables to provide constant-time lookup of fragments available
        # for shifting, completion, etc.
        chart = ddict(set)

        #TODO: command line filter to switch rule filter on/off
        pgrammar = [grammar[r] for r in grammar.reachable_rules(obj1, obj2)
                    ]  #grammar.values()
        queue = deque()  # the items left to be visited
        pending = set()  # a copy of queue with constant-time lookup
        attempted = set()  # a cache of previously-attempted item combinations
        visited = set()  # a cache of already-visited items
        nonterminal_lookup = ddict(set)  # a mapping from labels to graph edges
        reverse_lookup = ddict(
            set)  # a mapping from outside symbols to open items

        # mapping from words to string indices for each string
        word_terminal_lookup1 = ddict(set)
        word_terminal_lookup2 = ddict(set)

        if rhs1type == "string":
            for i in range(len(obj1)):
                word_terminal_lookup1[obj1[i]].add(i)

        if rhs2type == "string":
            for i in range(len(obj2)):
                word_terminal_lookup2[obj2[i]].add(i)

        # mapping from edge labels to graph edges for each graph
        edge_terminal_lookup1 = ddict(set)
        edge_terminal_lookup2 = ddict(set)

        if rhs1type == "hypergraph":
            for edge in obj1.triples(nodelabels=self.nodelabels):
                edge_terminal_lookup1[edge[1]].add(edge)

        if rhs2type == "hypergraph":
            for edge in obj2.triples(nodelabels=self.nodelabels):
                edge_terminal_lookup2[edge[1]].add(edge)

        for rule in pgrammar:
            item1class = CfgItem if rhs1type == "string" else HergItem
            item2class = CfgItem if rhs2type == "string" else HergItem
            axiom = SynchronousItem(rule,
                                    item1class,
                                    item2class,
                                    nodelabels=self.nodelabels)
            queue.append(axiom)
            pending.add(axiom)
            if axiom.outside_is_nonterminal:
                reverse_lookup[axiom.outside_symbol].add(axiom)

        # keep track of whether we found any complete derivation
        success = False

        # parse
        while queue:
            item = queue.popleft()
            pending.remove(item)
            visited.add(item)
            log.debug('handling', item)

            if item.closed:
                log.debug('  is closed.')
                # check if it's a complete derivation
                if self.successful_biparse(obj1, obj2, item, obj1size,
                                           obj2size):
                    chart['START'].add((item, ))
                    success = True

                # add to nonterminal lookup
                nonterminal_lookup[item.rule.symbol].add(item)

                # wake up any containing rules
                # Unlike in ordinary state-space search, it's possible that we will have
                # to re-visit items which couldn't be merged with anything the first time
                # we saw them, and are waiting for the current item. The reverse_lookup
                # indexes all items by their outside symbol, so we re-append to the queue
                # all items looking for something with the current item's symbol.
                for ritem in reverse_lookup[item.rule.symbol]:
                    if ritem not in pending:
                        queue.append(ritem)
                        pending.add(ritem)

            else:
                if item.outside_is_nonterminal:
                    # complete
                    reverse_lookup[item.outside_symbol].add(item)

                    for oitem in nonterminal_lookup[item.outside_symbol]:
                        log.debug("  oitem:", oitem)
                        if (item, oitem) in attempted:
                            # don't repeat combinations we've tried before
                            continue
                        attempted.add((item, oitem))
                        if not item.can_complete(oitem):
                            log.debug("    fail")
                            continue
                        log.debug("    ok")
                        nitem = item.complete(oitem)
                        chart[nitem].add((item, oitem))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

                else:
                    # shift ; this depends on the configuration (string/graph -> string/graph)
                    if not item.outside1_is_nonterminal and not item.item1.closed:
                        if rhs1type == "string":
                            new_items = [
                                item.shift_word1(item.outside_object1, index)
                                for index in word_terminal_lookup1[
                                    item.outside_object1]
                                if item.can_shift_word1(
                                    item.outside_object1, index)
                            ]
                        else:
                            assert rhs1type is "hypergraph"
                            new_items = [
                                item.shift_edge1(edge) for edge in
                                edge_terminal_lookup1[item.outside_object1]
                                if item.can_shift_edge1(edge)
                            ]
                    else:
                        assert not item.outside2_is_nonterminal  # Otherwise shift would not be called
                        if rhs2type == "string":
                            new_items = [
                                item.shift_word2(item.outside_object2, index)
                                for index in word_terminal_lookup2[
                                    item.outside_object2]
                                if item.can_shift_word2(
                                    item.outside_object2, index)
                            ]
                        else:
                            assert rhs2type is "hypergraph"
                            new_items = [
                                item.shift_edge2(edge) for edge in
                                edge_terminal_lookup2[item.outside_object2]
                                if item.can_shift_edge2(edge)
                            ]

                    for nitem in new_items:
                        log.debug('  shift', nitem, nitem.shifted)
                        chart[nitem].add((item, ))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

        if success:
            log.chatter('  success!')
        etime = time.clock() - start_time
        log.chatter('done in %.2fs' % etime)

        # TODO return partial chart
        return chart
Beispiel #4
0
    def parse(self, string, graph):
        """
      Parses the given string and/or graph.
      """

        # This is a long function, so let's start with a high-level overview. This is
        # a "deductive-proof-style" parser: We begin with one "axiomatic" chart item
        # for each rule, and combine these items with each other and with fragments of
        # the object(s) being parsed to deduce new items. We can think of these items
        # as defining a search space in which we need to find a path to the goal item.
        # The parser implemented here performs a BFS of this search space.

        grammar = self.grammar

        # remember when we started
        start_time = time.clock()
        log.chatter('parse...')

        # specify what kind of items we're working with
        if string and graph:
            axiom_class = CfgHergItem
        elif string:
            axiom_class = CfgItem
        else:
            axiom_class = HergItem

        # remember the size of the example
        if string:
            string_size = len(string)
        else:
            string_size = -1
        if graph:
            graph_size = len(graph.triples(nodelabels=self.nodelabels))
        else:
            graph_size = -1

        # initialize data structures and lookups
        # we use various tables to provide constant-time lookup of fragments available
        # for shifting, completion, etc.
        chart = ddict(set)

        # TODO: Command line option to switch grammar filter on/off
        if string:
            pgrammar = [
                grammar[r] for r in grammar.reachable_rules(string, None)
            ]  #grammar.values()
        if graph:
            pgrammar = [
                grammar[r] for r in grammar.reachable_rules(graph, None)
            ]  #grammar.values()

        queue = deque()  # the items left to be visited
        pending = set()  # a copy of queue with constant-time lookup
        attempted = set()  # a cache of previously-attempted item combinations
        visited = set()  # a cache of already-visited items
        word_terminal_lookup = ddict(set)
        nonterminal_lookup = ddict(set)  # a mapping from labels to graph edges
        reverse_lookup = ddict(
            set)  # a mapping from outside symbols open items
        if string:
            word_terminal_lookup = ddict(
                set)  # mapping from words to string indices
            for i in range(len(string)):
                word_terminal_lookup[string[i]].add(i)
        if graph:
            edge_terminal_lookup = ddict(
                set)  # mapping from edge labels to graph edges
            for edge in graph.triples(nodelabels=self.nodelabels):
                edge_terminal_lookup[edge[1]].add(edge)
        for rule in pgrammar:
            axiom = axiom_class(rule, nodelabels=self.nodelabels)
            queue.append(axiom)
            pending.add(axiom)
            if axiom.outside_is_nonterminal:
                reverse_lookup[axiom.outside_symbol].add(axiom)

        # keep track of whether we found any complete derivation
        success = False

        # parse
        while queue:
            item = queue.popleft()
            pending.remove(item)
            visited.add(item)
            log.debug('handling', item)

            if item.closed:
                log.debug('  is closed.')
                # check if it's a complete derivation
                if self.successful_parse(string, graph, item, string_size,
                                         graph_size):
                    chart['START'].add((item, ))
                    success = True

                # add to nonterminal lookup
                nonterminal_lookup[item.rule.symbol].add(item)

                # wake up any containing rules
                # Unlike in ordinary state-space search, it's possible that we will have
                # to re-visit items which couldn't be merged with anything the first time
                # we saw them, and are waiting for the current item. The reverse_lookup
                # indexes all items by their outside symbol, so we re-append to the queue
                # all items looking for something with the current item's symbol.
                for ritem in reverse_lookup[item.rule.symbol]:
                    if ritem not in pending:
                        queue.append(ritem)
                        pending.add(ritem)

            else:
                if item.outside_is_nonterminal:
                    # complete
                    reverse_lookup[item.outside_symbol].add(item)

                    for oitem in nonterminal_lookup[item.outside_symbol]:
                        log.debug("  oitem:", oitem)
                        if (item, oitem) in attempted:
                            # don't repeat combinations we've tried before
                            continue
                        attempted.add((item, oitem))
                        if not item.can_complete(oitem):
                            log.debug("    fail")
                            continue
                        log.debug("    ok")
                        nitem = item.complete(oitem)
                        chart[nitem].add((item, oitem))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

                else:
                    # shift
                    if string and graph:
                        if not item.outside_word_is_nonterminal:
                            new_items = [
                                item.shift_word(item.outside_word, index)
                                for index in word_terminal_lookup[
                                    item.outside_word] if item.can_shift_word(
                                        item.outside_word, index)
                            ]
                        else:
                            assert not item.outside_edge_is_nonterminal
                            new_items = [
                                item.shift_edge(edge) for edge in
                                edge_terminal_lookup[item.outside_edge]
                                if item.can_shift_edge(edge)
                            ]
                    elif string:
                        new_items = [
                            item.shift(item.outside_word, index) for index in
                            word_terminal_lookup[item.outside_word]
                            if item.can_shift(item.outside_word, index)
                        ]
                    else:
                        assert graph
                        new_items = [
                            item.shift(edge)
                            for edge in edge_terminal_lookup[item.outside_edge]
                            if item.can_shift(edge)
                        ]

                    for nitem in new_items:
                        log.debug('  shift', nitem, nitem.shifted)
                        chart[nitem].add((item, ))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

        if success:
            log.chatter('  success!')
        etime = time.clock() - start_time
        log.chatter('done in %.2fs' % etime)

        # TODO return partial chart
        return chart
Beispiel #5
0
    def parse(self, graph):
        """
      Parses the given graph with the provided grammar.
      """

        # This function is very similar to its counterpart in the regular
        # (non-tree-decomposing) parser. Read the comments there to understand how it
        # works.

        start_time = time.clock()
        log.chatter('parse...')

        # ensure that the input graph has its shortest-path table precomputed
        graph.compute_fw_table()

        chart = ddict(set)
        # TODO command line option to switch rule filtering on/off
        pgrammar = [
            self.grammar[r] for r in self.grammar.reachable_rules(graph, None)
        ]
        queue = deque()
        pending = set()
        attempted = set()
        visited = set()
        terminal_lookup = ddict(set)
        passive_item_lookup = ddict(set)
        tree_node_lookup = ddict(set)
        passive_item_rev_lookup = ddict(set)
        tree_node_rev_lookup = ddict(set)

        for edge in graph.triples(nodelabels=self.nodelabels):
            terminal_lookup[edge[1]].add(edge)

        for rule in pgrammar:
            for leaf in rule.tree_leaves:
                axiom = self.item_class(rule,
                                        leaf,
                                        graph,
                                        nodelabels=self.nodelabels)
                queue.append(axiom)
                pending.add(axiom)
                assert leaf not in rule.tree_to_edge

        success = False

        while queue:
            item = queue.popleft()
            pending.remove(item)
            visited.add(item)
            log.debug('handling', item, item.subgraph)

            if item.target == Item.NONE:
                log.debug('  none')
                tree_node_lookup[item.self_key].add(item)
                for ritem in tree_node_rev_lookup[item.self_key]:
                    if ritem not in pending:
                        queue.append(ritem)
                        pending.add(ritem)

            elif item.target == Item.ROOT:
                log.debug('  root')
                if self.is_goal(item):
                    chart['START'].add((item, ))
                    success = True
                    log.debug("success!")

                passive_item_lookup[item.self_key].add(item)
                for ritem in passive_item_rev_lookup[item.self_key]:
                    if ritem not in pending:
                        log.debug('    retrieving', ritem)
                        queue.append(ritem)
                        pending.add(ritem)

            elif item.target == Item.TERMINAL:
                log.debug('  terminal')
                new_items = [
                    item.terminal(edge)
                    for edge in terminal_lookup[item.next_key]
                ]
                new_items = [i for i in new_items if i]
                for nitem in new_items:
                    chart[nitem].add((item, ))
                    if nitem not in pending and nitem not in visited:
                        log.debug('    new item!', nitem)
                        queue.append(nitem)
                        pending.add(nitem)

            else:
                if item.target == Item.BINARY:
                    log.debug('  binary')
                    rev_lookup = tree_node_rev_lookup
                    lookup = tree_node_lookup
                    action = self.item_class.binary
                elif item.target == Item.NONTERMINAL:
                    log.debug('  nonterminal')
                    rev_lookup = passive_item_rev_lookup
                    lookup = passive_item_lookup
                    action = self.item_class.nonterminal
                else:
                    assert False

                rev_lookup[item.next_key].add(item)
                for oitem in lookup[item.next_key]:
                    if (item, oitem) in attempted:
                        continue
                    attempted.add((item, oitem))
                    log.debug('  try', oitem, oitem.subgraph)
                    nitem = action(item, oitem)
                    if not nitem:
                        continue
                    log.debug('    new item!', nitem)
                    chart[nitem].add((item, oitem))
                    if nitem not in pending and nitem not in visited:
                        queue.append(nitem)
                        pending.add(nitem)

        if success:
            log.chatter('  success!')

        etime = time.clock() - start_time
        log.chatter('done in %.2fs' % etime)
        return chart
Beispiel #6
0
    def parse(self, graph):
      """
      Parses the given graph with the provided grammar.
      """

      # This function is very similar to its counterpart in the regular
      # (non-tree-decomposing) parser. Read the comments there to understand how it
      # works.

      start_time = time.clock()
      log.chatter('parse...')

      # ensure that the input graph has its shortest-path table precomputed
      graph.compute_fw_table()

      chart = ddict(set)
      # TODO command line option to switch rule filtering on/off
      pgrammar = [self.grammar[r] for r in self.grammar.reachable_rules(graph, None)] 
      queue = deque()
      pending = set()
      attempted = set()
      visited = set()
      terminal_lookup = ddict(set)
      passive_item_lookup = ddict(set)
      tree_node_lookup = ddict(set)
      passive_item_rev_lookup = ddict(set)
      tree_node_rev_lookup = ddict(set)

      for edge in graph.triples(nodelabels = self.nodelabels):
        terminal_lookup[edge[1]].add(edge)

      for rule in pgrammar:
        for leaf in rule.tree_leaves:
          axiom = self.item_class(rule, leaf, graph, nodelabels = self.nodelabels)
          queue.append(axiom)
          pending.add(axiom)
          assert leaf not in rule.tree_to_edge

      success = False

      while queue:
        item = queue.popleft()
        pending.remove(item)
        visited.add(item)
        log.debug('handling', item, item.subgraph)

        if item.target == Item.NONE:
          log.debug('  none')
          tree_node_lookup[item.self_key].add(item)
          for ritem in tree_node_rev_lookup[item.self_key]:
            if ritem not in pending:
              queue.append(ritem)
              pending.add(ritem)

        elif item.target == Item.ROOT:
          log.debug('  root')
          if self.is_goal(item):
            chart['START'].add((item,))
            success = True
            log.debug("success!")

          passive_item_lookup[item.self_key].add(item)
          for ritem in passive_item_rev_lookup[item.self_key]:
            if ritem not in pending:
              log.debug('    retrieving', ritem)
              queue.append(ritem)
              pending.add(ritem)

        elif item.target == Item.TERMINAL:
          log.debug('  terminal')
          new_items = [item.terminal(edge) for edge in terminal_lookup[item.next_key]]
          new_items = [i for i in new_items if i]
          for nitem in new_items:
            chart[nitem].add((item,))
            if nitem not in pending and nitem not in visited:
              log.debug('    new item!', nitem)
              queue.append(nitem)
              pending.add(nitem)

        else:
          if item.target == Item.BINARY:
            log.debug('  binary')
            rev_lookup = tree_node_rev_lookup
            lookup = tree_node_lookup
            action = self.item_class.binary
          elif item.target == Item.NONTERMINAL:
            log.debug('  nonterminal')
            rev_lookup = passive_item_rev_lookup
            lookup = passive_item_lookup
            action = self.item_class.nonterminal
          else:
            assert False

          rev_lookup[item.next_key].add(item)
          for oitem in lookup[item.next_key]:
            if (item, oitem) in attempted:
              continue
            attempted.add((item, oitem))
            log.debug('  try', oitem, oitem.subgraph)
            nitem = action(item, oitem)
            if not nitem:
              continue
            log.debug('    new item!', nitem)
            chart[nitem].add((item, oitem))
            if nitem not in pending and nitem not in visited:
              queue.append(nitem)
              pending.add(nitem)

      if success:
        log.chatter('  success!')

      etime = time.clock() - start_time
      log.chatter('done in %.2fs' % etime)
      return chart