예제 #1
0
 def parse_agenda(self):
     while len(self.agenda) > 0:
         item = self.agenda.pop()
         if logger.level >= 4:
             logger.writeln('pop: %s' % item)
         for item1, item2, inverted in self.neighboring_pairs(item):
             # avoid duplicated edges. note that in ABC grammar,
             # if the boxes of item1 and item2 are given, the nt of the
             # new item is fixed
             if logger.level >= 4:
                 logger.writeln('neighbors: %s %s' % (item1, item2))
             key = (item1.nt, item1.fi, item1.fj, item1.ei, item1.ej,
                    item2.nt, item2.fi, item2.fj, item2.ei, item2.ej)
             if key not in self.edge_index:
                 self.edge_index.add(key)  
                 new_item = self.make_item(item1, item2, inverted)
                 if self.chart_add(new_item):
                     self.agenda.append(new_item)
                     self.neighbor_index.add(new_item)
                     self.glue_nodes.append(new_item)
                     if logger.level >= 4:
                         logger.writeln('push: %s' % new_item)
     # self.stats()
     root = self.final_glue()
     self.hg = Hypergraph(root)
     self.hg.topo_sort()
     self.stats()
     return self.hg
예제 #2
0
 def final_glue1(self):
     """try to cover all phrases AND glue rules"""
     # candidate glue nodes are glue nodes whose boxes are also phrases
     # candidate_glue_nodes = []
     # for node in self.glue_nodes:
     #     bin = self.chart.get((node.fi, node.fj, node.ei, node.ej))
     #     if bin is not None:
     #         if PHRASE in bin:
     #             candidate_glue_nodes.append(node)
     candidates = self.phrases + self.glue_rules
     # topo sort. root node at the end
     candidates.sort()
     roots = []
     while len(candidates) > 0:
         root = candidates.pop()
         print('pop: %s' % root)
         roots.append(root)
         hg = Hypergraph(root)
         hg.find_reachable_nodes()
         candidates = [n for n in candidates if id(n) not in hg.found]
     top_rule = Rule()
     top_rule.lhs = START
     top_edge = PhraseHGEdge(top_rule)
     for root in roots:
         top_rule.f.append(root.nt)
         top_rule.e.append(root.nt)
         top_edge.add_tail(root)
     top_rule.e2f = [i for i in range(len(top_rule.f))]
     top_node = PhraseHGNode(START, 0, self.n1, 0, self.n2)
     top_node.add_incoming(top_edge)
     return top_node
예제 #3
0
def do_match(img1, img2, cang, crat, cdesc):
    M = None

    # Get features and distances between every pair of points from both images
    (kpts1, des1) = get_features(img1, M, 'target.jpg')
    (kpts2, des2) = get_features(img2, M, 'reference.jpg')

    Hgt = Hypergraph(kpts1, des1)
    Hgr = Hypergraph(kpts2, des2)

    # draw.triangulation(kpts1, Hgt.E, img1, 'Triangulation 1')
    # draw.triangulation(kpts2, Hgr.E, img2, 'Triangulation 2')

    print 'Hypergraph construction done'
    edge_matches, point_matches = match(Hgt.E, Hgr.E, kpts1, kpts2, des1, des2,
                                        cang, crat, cdesc, 0.7, 0.75, True)
    print 'Hyperedges matching done'

    # draw.edges_match(edge_matches, kpts1, kpts2, Hgt.E, Hgr.E, img1, img2)

    point_matches = sorted(point_matches, key=lambda x: x.distance)
    draw.points_match(point_matches, kpts1, kpts2, img1, img2)

    cv2.waitKey()
    cv2.destroyAllWindows()
예제 #4
0
    def __init__(self, dag1, dag2):
        """
        The first and second parameters must be DirectedAcyclicGraphs as
        specified on the file datastructures.py"
        """

        self.dag1_mapper = DirectedAcyclicGraphMapper(dag1)
        self.dag2_mapper = DirectedAcyclicGraphMapper(dag2)
        self.hypergraph = Hypergraph()
예제 #5
0
 def build_mini_hypergraph(edges):
     hg = Hypergraph(edges[0].head)
     edges = list(edges[:])
     while len(edges) > 0:
         edge = edges.pop()
         if len(edge.composed_edges) == 0:
             hg.add(edge)
         else:
             edges += edge.composed_edges
     return hg
예제 #6
0
 def final_glue(self):
     unattached = self.phrases[:]
     candidates = self.phrases + self.glue_nodes
     # topo sort. root node at the end
     unattached.sort()
     candidates.sort()
     self.top_roots = []
     self.other_roots = []
     while len(candidates) > 0:
         root = candidates.pop()
         if (root.fi == 0 and
             root.fj == self.n1 and
             root.ei == 0 and
             root.ej == self.n2):
             self.top_roots.append(root)
         else:
             self.other_roots.append(root)
         hg = Hypergraph(root)
         hg.find_reachable_nodes()
         unattached = [n for n in unattached if id(n) not in hg.found]
         candidates = [n for n in candidates if id(n) not in hg.found and \
                       (n.nt == PHRASE or not n < root)]
     top_node = PhraseHGNode(START, 0, self.n1, 0, self.n2)
     # add one edge for each top root
     for root in self.top_roots:
         rule = Rule()
         rule.lhs = START
         rule.f = [root.nt]
         rule.e = [root.nt]
         rule.e2f = [0]
         edge = PhraseHGEdge(rule)
         edge.add_tail(root)
         top_node.add_incoming(edge)
     # add one edge for all other roots
     if ((glue_missing_phrases or len(self.top_roots) == 0)
         and len(self.other_roots) > 0):
         rule = Rule()
         rule.lhs = START
         edge = PhraseHGEdge(rule)
         for root in self.other_roots:
             rule.f.append(root.nt)
             rule.e.append(root.nt)
             edge.add_tail(root)
         rule.e2f = [i for i in range(len(rule.f))]
         top_node.add_incoming(edge)
     return top_node
예제 #7
0
def read_string_file(stream):
	while True:
		line = stream.readline()
		if '[' in line or ']' in line:
			print >>sys.stderr, 'Square brackets found in input. Please escape these to -LSB- and -RSB-'
			sys.exit(1)
		if not line:
			break
		line = line.decode('utf-8').strip()
		yield Hypergraph.from_surface_string(line)
 def read_tree_file(self, treefile):
     f = open(treefile)
     current_indent = 0
     indent_level = 2
     current_edge = None
     stack = []
     for line in f:
         # TODO: why are there None node lines?
         if '|||' in line or line.strip() == 'None':
             indent = 0
             while line[indent] == ' ':
                 indent += 1
             # TODO: hack. None lines have wrong indent in
             # max derivation viterbi trees
             # if line.strip() == 'None':
             #     indent -= 2
             if indent == current_indent + indent_level:
                 current_indent = indent
                 stack.append(node)
             elif indent < current_indent:
                 npop = (current_indent - indent) // indent_level
                 current_indent = indent
                 for i in range(npop):
                     top_tmp = stack.pop()
                     # hg = Hypergraph(top_tmp)
                     # hg.topo_sort()
                     # hg.show()
             node = Node()
             if len(stack) > 0:
                 stack[-1].incoming[0].add_tail(node)
             # TODO: why are there None nodes? these nodes have incoming
             # edges. they are just a nonterminal as a leaf.
             if line.strip() != 'None':
                 rule = Rule()
                 rule.fromstr(line)
                 edge = PhraseHGEdge()
                 edge.rule = rule
                 node.add_incoming(edge)
     hg = Hypergraph(stack[0])
     hg.topo_sort()
     f.close()
     return hg
예제 #9
0
def combine_trees(trees_to_combine):
	if len(trees_to_combine) == 0:
		return None
	hypergraphs_to_combine = []
	total_scores = sum(score for _, score in trees_to_combine)
	for tree, score in trees_to_combine:
		if total_scores != 0.0:
			score = score / total_scores
		else:
			score = 1.0 / len(trees_to_combine)
		computeSpans(tree)
		tree_hg = Hypergraph.from_tree(tree, score)
		tree_hg.sanity_check()
		hypergraphs_to_combine.append(tree_hg)

	final_hypergraph = hypergraphs_to_combine[0]
	for hypergraph in hypergraphs_to_combine[1:]:
		final_hypergraph.combine(hypergraph)
	return final_hypergraph
예제 #10
0
    def compare_communities(self, ):
        for index, interval in enumerate(self.graphs):
            if index < len(self.graphs) - 1:
                self.inclusions = {}
                window_id = 'TF%s -> TF%s' % (index, index + 1)
                Dhypergraph = nx.DiGraph(window=window_id)
                print('Initialize inclusions dict start...')
                Dhypergraph = self.initialize_inclusions(index, Dhypergraph)
                print('Initialize inclusions dict finish...')

                for ic, community_t in enumerate(interval):

                    for ic2, community_t1 in enumerate(self.graphs[index + 1]):

                        inclusion = self.inclusions[community_t.graph['cid']][
                            community_t1.graph['cid']]['inclusion']
                        inversed = self.inclusions[community_t.graph['cid']][
                            community_t1.graph['cid']]['inversed_inclusion']

                        event = Event(community_t, community_t1, inclusion,
                                      inversed, self.inclusions)
                        result = event.classify()
                        if result in ['growing', 'shrinking', 'continuing']:
                            Dhypergraph.add_edge(community_t,
                                                 community_t1,
                                                 event_type=result)
                        self.results.append({
                            'network_t':
                            community_t.graph['cid'],
                            'network_t1':
                            community_t1.graph['cid'],
                            'resulted_event':
                            result
                        })

                hypergraph = Hypergraph(Dhypergraph)
                self.hypergraphs.append(hypergraph)
예제 #11
0
class ABCParser(object):
    """Bilingual parser that glues hiero rules into a hypergraph with
    ABC glue grammar."""
    def __init__(self, n1, n2, phrases):
        """
        n1: French length
        n2: English length
        phrases: a list of PhraseHGNodes that have been partially linked
          according to heiro rule extraction
        """
        self.n1 = n1
        self.n2 = n2
        self.chart = {}
        self.neighbor_index = NeighborIndex()
        self.edge_index = set()
        self.agenda = []
        self.phrases = phrases
        self.glue_nodes = []
        for phrase in phrases:
            bin = self.chart.setdefault((phrase.fi,
                                         phrase.fj,
                                         phrase.ei,
                                         phrase.ej),
                                        {})
            bin[phrase.nt] = phrase
            self.agenda.append(phrase)
            self.neighbor_index.add(phrase)

    # not used, too slow
    def parse(self):
        self.glue_nodes = []
        for i1, j1, i2, j2 in bi_cyk_spans(self.n1, self.n2):
            for k1 in range(i1 + 1, j1):
                for k2 in range(i2 + 1, j2):
                    bin1 = self.chart.get((i1, k1, i2, k2), {})
                    bin2 = self.chart.get((k1, j1, k2, j2), {})
                    for item1 in bin1.values():
                        for item2 in bin2.values():
                            if item2.nt != STRAIGHT:
                                new_item = self.make_item(item1,
                                                          item2,
                                                          False)
                                self.chart_add(new_item)
                    bin1 = self.chart.get((i1, k1, k2, j2), {})
                    bin2 = self.chart.get((k1, j1, i2, k2), {})
                    for item1 in bin1.values():
                        for item2 in bin2.values():
                            if item2.nt != INVERTED:
                                new_item = self.make_item(item1,
                                                          item2,
                                                          True)
                                self.chart_add(new_item)
        self.stats()

    def parse_agenda(self):
        while len(self.agenda) > 0:
            item = self.agenda.pop()
            if logger.level >= 4:
                logger.writeln('pop: %s' % item)
            for item1, item2, inverted in self.neighboring_pairs(item):
                # avoid duplicated edges. note that in ABC grammar,
                # if the boxes of item1 and item2 are given, the nt of the
                # new item is fixed
                if logger.level >= 4:
                    logger.writeln('neighbors: %s %s' % (item1, item2))
                key = (item1.nt, item1.fi, item1.fj, item1.ei, item1.ej,
                       item2.nt, item2.fi, item2.fj, item2.ei, item2.ej)
                if key not in self.edge_index:
                    self.edge_index.add(key)  
                    new_item = self.make_item(item1, item2, inverted)
                    if self.chart_add(new_item):
                        self.agenda.append(new_item)
                        self.neighbor_index.add(new_item)
                        self.glue_nodes.append(new_item)
                        if logger.level >= 4:
                            logger.writeln('push: %s' % new_item)
        # self.stats()
        root = self.final_glue()
        self.hg = Hypergraph(root)
        self.hg.topo_sort()
        self.stats()
        return self.hg

    def final_glue(self):
        unattached = self.phrases[:]
        candidates = self.phrases + self.glue_nodes
        # topo sort. root node at the end
        unattached.sort()
        candidates.sort()
        self.top_roots = []
        self.other_roots = []
        while len(candidates) > 0:
            root = candidates.pop()
            if (root.fi == 0 and
                root.fj == self.n1 and
                root.ei == 0 and
                root.ej == self.n2):
                self.top_roots.append(root)
            else:
                self.other_roots.append(root)
            hg = Hypergraph(root)
            hg.find_reachable_nodes()
            unattached = [n for n in unattached if id(n) not in hg.found]
            candidates = [n for n in candidates if id(n) not in hg.found and \
                          (n.nt == PHRASE or not n < root)]
        top_node = PhraseHGNode(START, 0, self.n1, 0, self.n2)
        # add one edge for each top root
        for root in self.top_roots:
            rule = Rule()
            rule.lhs = START
            rule.f = [root.nt]
            rule.e = [root.nt]
            rule.e2f = [0]
            edge = PhraseHGEdge(rule)
            edge.add_tail(root)
            top_node.add_incoming(edge)
        # add one edge for all other roots
        if ((glue_missing_phrases or len(self.top_roots) == 0)
            and len(self.other_roots) > 0):
            rule = Rule()
            rule.lhs = START
            edge = PhraseHGEdge(rule)
            for root in self.other_roots:
                rule.f.append(root.nt)
                rule.e.append(root.nt)
                edge.add_tail(root)
            rule.e2f = [i for i in range(len(rule.f))]
            top_node.add_incoming(edge)
        return top_node

    # not used
    def final_glue1(self):
        """try to cover all phrases AND glue rules"""
        # candidate glue nodes are glue nodes whose boxes are also phrases
        # candidate_glue_nodes = []
        # for node in self.glue_nodes:
        #     bin = self.chart.get((node.fi, node.fj, node.ei, node.ej))
        #     if bin is not None:
        #         if PHRASE in bin:
        #             candidate_glue_nodes.append(node)
        candidates = self.phrases + self.glue_rules
        # topo sort. root node at the end
        candidates.sort()
        roots = []
        while len(candidates) > 0:
            root = candidates.pop()
            print('pop: %s' % root)
            roots.append(root)
            hg = Hypergraph(root)
            hg.find_reachable_nodes()
            candidates = [n for n in candidates if id(n) not in hg.found]
        top_rule = Rule()
        top_rule.lhs = START
        top_edge = PhraseHGEdge(top_rule)
        for root in roots:
            top_rule.f.append(root.nt)
            top_rule.e.append(root.nt)
            top_edge.add_tail(root)
        top_rule.e2f = [i for i in range(len(top_rule.f))]
        top_node = PhraseHGNode(START, 0, self.n1, 0, self.n2)
        top_node.add_incoming(top_edge)
        return top_node

    def neighboring_pairs(self, item):
        """
        return value is items in the order they appear on f side, and whether
        they are inverted.
        The constraint of ABC grammar is also applied here.
        """
        for neighbor in self.neighbor_index.get((item.fi, item.ej), 0):
            if item.nt != INVERTED:
                yield neighbor, item, True
        for neighbor in self.neighbor_index.get((item.fi, item.ei), 1):
            if item.nt != STRAIGHT:
                yield neighbor, item, False
        for neighbor in self.neighbor_index.get((item.fj, item.ei), 2):
            if neighbor.nt != INVERTED:
                yield item, neighbor, True
        for neighbor in self.neighbor_index.get((item.fj, item.ej), 3):
            if neighbor.nt != STRAIGHT:
                yield item, neighbor, False

    def make_item(self, item1, item2, inverted):
        """item1 and item2 is always given in the order they appear
        on the f side"""
        rule = Rule()
        rule.f = [item1.nt, item2.nt]
        fi = item1.fi
        fj = item2.fj
        if inverted:
            rule.lhs = INVERTED
            rule.e = [item2.nt, item1.nt]
            rule.e2f = [1, 0]
            ei = item2.ei
            ej = item1.ej
        else:
            rule.lhs = STRAIGHT
            rule.e = [item1.nt, item2.nt]
            rule.e2f = [0, 1]
            ei = item1.ei
            ej = item2.ej
        edge = PhraseHGEdge(rule)
        edge.add_tail(item1)
        edge.add_tail(item2)
        new_item = PhraseHGNode(rule.lhs, fi, fj, ei, ej)
        new_item.add_incoming(edge)
        return new_item

    def chart_add(self, item):
        bin = self.chart.setdefault((item.fi,
                                     item.fj,
                                     item.ei,
                                     item.ej),
                                    {})
        added = False
        # the ABCParser applies only glue rules. this test says glue rules
        # are used only when a PHRASE is not already derived for the box
        # if PHRASE not in bin:
        old_item = bin.get(item.nt)
        if old_item:
            old_item.add_incoming(item.incoming[0])
        else:
            added = True
            bin[item.nt] = item
        return added


    def stats(self):
        result = '--ABCParser Stats--\n'

        top_bin = self.chart.get((0, self.n1, 0, self.n2))
        if top_bin is None:
            result += 'parse failed\n'
        else:
            result += 'parse succeeded\n'

        result += self.hg.stats()

        # self.hg.show()

        hiero_rules = 0
        glue_rules = []
        for edge in self.hg.edges():
            if edge.rule.lhs == PHRASE:
                hiero_rules += 1
            else:
                glue_rules.append(edge)
        result += 'hiero rules: %s\n' % hiero_rules
        result += 'glue rules: %s\n' % len(glue_rules)

        rules = []
        for node in self.phrases:
            for edge in node.incoming:
                rules.append(edge.rule)
        hg_rules = set()
        for edge in self.hg.edges():
            hg_rules.add(id(edge.rule))
        unglued_rules = []
        for rule in rules:
            if id(rule) not in hg_rules:
                unglued_rules.append(rule)

        roots = self.top_roots + self.other_roots
        result += 'roots: %s\n' % len(roots)
        for node in roots:
            result += '%s\n' % node

        result += 'unglued rules: %s\n' % len(unglued_rules)
        for rule in unglued_rules:
            result += '%s\n' % rule
        return result
예제 #12
0
 def induced_graph(self, v, force_copy=False):
     if not force_copy and self.hg.nodes() == set(v):
         return self
     h = Hypergraph(vertices=v)
     h.induce_edges(self.__hg.edges())
     return HypergraphPrimalView(h)
예제 #13
0
    def run(self):
        # update per-sentence grammars, if there's any
        for g in self.grammars:
            g.update(self.id)

        self.flog = open('%s/%s_%s' % (FLAGS.run_dir,
                                  'log',
                                  self.suffix),
                    'w')
        if FLAGS.show_time:
            self.flog.write('running on %s\n\n' % socket.gethostname())
            self.flog.flush()

        fwords = self.line.strip().split()


        # added by freesunshine, build the local grammar for oov words for each sentence
        rules = []
        if self.oov_idx is not None and len(self.oov_idx) > 0:
            #oov_weight = 8.0
            oov_weight = 0.0001
            for idx in self.oov_idx:
                fw = fwords[idx]
                ew = "."
                rule_str = "[A0-0] ||| %s ||| %s ||| %lf %lf %lf" %(fw, ew, oov_weight, oov_weight, oov_weight)
                rr = Rule()
                rr.fromstr(rule_str)
                rules.append(rr)

        if self.ner_items is not None and len(self.ner_items) > 0:
            for item in self.ner_items:
                concept_weight = 10.0
                st = item[0][0]
                ed = item[0][1]
                fw = ' '.join(fwords[st:ed])
                #concept_weight *= pow((ed-st), 2)
                ew = item[1]
                value = int(ew[2])

                #Here is the feature for difference of nonterminal type
                #concept_weight /= pow(1.4, value)

                #Here is the feature for the favor of longer spans
                #concept_weight *= pow(2, ed-st)

                #Here is the feature for the number of edges
                #concept_weight /= pow(2.0, get_num_edges(ew))
                #print >>sys.stder, ew, concept_weight
                #rule_str = "[A1-1] ||| %s ||| %s ||| " % (fw, ew)
                rule_str = "%s ||| " % ew
                #weight = 5
                if fw == ';':
                    rule_str += "%lf %lf %lf" % (concept_weight, concept_weight, concept_weight)
                else:
                    rule_str += "%lf %lf %lf" % (concept_weight, concept_weight, concept_weight)
                rr = Rule()
                #print rule_str
                rr.fromstr(rule_str)
                rules.append(rr)

        #print '===== local_gr ====='
        #for r in rules:
        #    print r

        local_gr = None
        if len(rules) > 0:
          local_gr = Grammar(FLAGS.rule_bin_size)
          local_gr.build(rules, self.grammars[0].features)

        if FLAGS.preprocess:
            self.fidx2replacement = {}
            j = 0
            for i, token in enumerate(fwords):
                if token in ('$number', '$date'):
                    self.fidx2replacement[i] = self.special[j][1]
                    j += 1

        self.flog.write('[%s][%s words] %s\n' %
                   (self.id, len(fwords), self.line))

        decoder = Decoder(fwords,
                          self.grammars,
                          self.features,
                          local_gr)

        begin_time = time()
        if FLAGS.decoding_method == 'agenda':
            item = decoder.decode()
        elif FLAGS.decoding_method == 'cyk':
            item = decoder.decode_cyk()
        elif FLAGS.decoding_method == 'earley':
            item = decoder.decode_earley()
        else:
            assert False, '"%s" not valid decoding option' \
                    % FLAGS.decoding_method
        self.time = time() - begin_time

        if item is None:
            self.out = '[decoder failed to build a goal item]'
        else:
            ttt, succ = item
            item = ttt
            hg = Hypergraph(item)
            hg.set_semiring(hypergraph.SHORTEST_PATH)
            hg.set_functions(lambda x: x.cost, None, None)
            hg.topo_sort()
            self.kbest = hg.root.best_paths()
            #output_tokens = self.kbest[0].translation[:]

            #if FLAGS.preprocess:
            #    for i in range(len(output_tokens)):
            #        if output_tokens[i] in ('$number', '$date'):
            #            fidx = self.kbest[0].composed_rule.we2f[i]
            #            if fidx is not None:
            #                output_tokens[i] = self.fidx2replacement[fidx]

            # @freesunshine target side string output
            #self.out = ' '.join(output_tokens[FLAGS.lm_order-1:
            #                                  1-FLAGS.lm_order])

            self.flog.write('Decuction Tree:\n%s\n' % self.kbest[0].tree_str())
            #self.out = str(self.kbest[0].translation)
            #if succ:
            self.out = self.kbest[0].translation.to_amr_format()[0]
            #else:
            #    self.out = self.kbest[0].translation.toAMR()
            lines = [x.strip() for x in self.out.split('\n')]
            self.out = "".join(lines)

            self.hg = hg
            if FLAGS.output_hypergraph:
                self.write_hypergraph()

        self.flog.write('%s\n' % self.out)
        self.flog.write('\n')
        #if item is not None:
        #    self.flog.write(self.kbest[0].tree_str())
        #    self.flog.write('\n')
        #    self.flog.write(hg.stats())
        #    self.flog.write('\n')
        self.flog.write(decoder.agenda_stats())
        self.flog.write('\n')
        self.flog.write(decoder.chart.stats())
        self.flog.write('\n')
        for dotchart in decoder.dotcharts:
            self.flog.write(dotchart.stats())
            self.flog.write('\n')

        if FLAGS.show_time:
            timeline = '{:<35}{:>15.2f}\n'.format('[time]:', self.time)
            self.flog.write(timeline)
        self.write_output_file()
        if FLAGS.output_kbest:
            self.write_kbest_to_file()
        self.flog.close()
예제 #14
0
    def setUp(self):
        w0 = ForestNode('John')
        w1 = ForestNode('saw')
        w2 = ForestNode('a')
        w3 = ForestNode('girl')
        w4 = ForestNode('with')
        w5 = ForestNode('a')
        w6 = ForestNode('telescope')
        t0_1 = ForestNode('NN')
        t1_2_0 = ForestNode('VB')
        t1_2_1 = ForestNode('NN')
        t2_3 = ForestNode('DT')
        t3_4 = ForestNode('NN')
        t4_5 = ForestNode('IN')
        t5_6 = ForestNode('DT')
        t6_7 = ForestNode('NN')
        t2_4 = ForestNode('NP')
        t5_7 = ForestNode('NP')
        t1_4 = ForestNode('VP')
        t4_7 = ForestNode('PP')
        t2_7 = ForestNode('NP')
        t1_7 = ForestNode('VP')
        root = ForestNode('S')
        # [NN,0,1] -> John
        e = ForestEdge()
        e.add_tail(w0)
        e.prob = 0.02
        t0_1.add_incoming(e)
        # [VB,1,2] -> saw
        e = ForestEdge()
        e.add_tail(w1)
        e.prob = 0.01
        t1_2_0.add_incoming(e)
        # [NN,1,2] -> saw
        e = ForestEdge()
        e.add_tail(w1)
        e.prob = 0.01
        t1_2_1.add_incoming(e)
        # [DT,2,3] -> a
        e = ForestEdge()
        e.add_tail(w2)
        e.prob = 0.5
        t2_3.add_incoming(e)
        # [NN,3,4] -> girl
        e = ForestEdge()
        e.add_tail(w3)
        e.prob = 0.05
        t3_4.add_incoming(e)
        # [IN,4,5] -> with
        e = ForestEdge()
        e.add_tail(w4)
        e.prob = 0.25
        t4_5.add_incoming(e)
        # [DT,5,6] -> a
        e = ForestEdge()
        e.add_tail(w5)
        e.prob = 0.5
        t5_6.add_incoming(e)
        # [NN,6,7] -> telescope
        e = ForestEdge()
        e.add_tail(w6)
        e.prob = 0.001
        t6_7.add_incoming(e)
        # [NP,2,4] -> [DT,2,3] [NN,3,4]
        e = ForestEdge()
        e.add_tail(t2_3)
        e.add_tail(t3_4)
        e.prob = 0.7
        t2_4.add_incoming(e)
        # [NP,5,7] -> [DT,5,6] [NN,6,7]
        e = ForestEdge()
        e.add_tail(t5_6)
        e.add_tail(t6_7)
        e.prob = 0.7
        t5_7.add_incoming(e)
        # [VP,1,4] -> [VB,1,2] [NP,2,4]
        e = ForestEdge()
        e.add_tail(t1_2_0)
        e.add_tail(t2_4)
        e.prob = 0.9
        t1_4.add_incoming(e)
        # [PP,4,7] -> [IN,4,5] [NP,5,7]
        e = ForestEdge()
        e.add_tail(t4_5)
        e.add_tail(t5_7)
        e.prob = 1.0
        t4_7.add_incoming(e)
        # [NP,2,7] -> [NP,2,4] [PP,4,7]
        e = ForestEdge()
        e.add_tail(t2_4)
        e.add_tail(t4_7)
        e.prob = 0.3
        t2_7.add_incoming(e)
        # [VP,1,7] -> [VB,1,2] [NP,2,7]
        e = ForestEdge()
        e.add_tail(t1_2_0)
        e.add_tail(t2_7)
        e.prob = 0.5
        t1_7.add_incoming(e)
        # [VP,1,7] -> [VP,1,4] [PP,4,7]
        e = ForestEdge()
        e.add_tail(t1_4)
        e.add_tail(t4_7)
        e.prob = 0.5
        t1_7.add_incoming(e)
        # [S,0,7] -> [NN,0,1] [VP,1,7]
        e = ForestEdge()
        e.add_tail(t0_1)
        e.add_tail(t1_7)
        e.prob = 0.9
        root.add_incoming(e)

        self.hp = Hypergraph(root)
예제 #15
0
class InsideOutsideTest(TestCase):
    def setUp(self):
        w0 = ForestNode('John')
        w1 = ForestNode('saw')
        w2 = ForestNode('a')
        w3 = ForestNode('girl')
        w4 = ForestNode('with')
        w5 = ForestNode('a')
        w6 = ForestNode('telescope')
        t0_1 = ForestNode('NN')
        t1_2_0 = ForestNode('VB')
        t1_2_1 = ForestNode('NN')
        t2_3 = ForestNode('DT')
        t3_4 = ForestNode('NN')
        t4_5 = ForestNode('IN')
        t5_6 = ForestNode('DT')
        t6_7 = ForestNode('NN')
        t2_4 = ForestNode('NP')
        t5_7 = ForestNode('NP')
        t1_4 = ForestNode('VP')
        t4_7 = ForestNode('PP')
        t2_7 = ForestNode('NP')
        t1_7 = ForestNode('VP')
        root = ForestNode('S')
        # [NN,0,1] -> John
        e = ForestEdge()
        e.add_tail(w0)
        e.prob = 0.02
        t0_1.add_incoming(e)
        # [VB,1,2] -> saw
        e = ForestEdge()
        e.add_tail(w1)
        e.prob = 0.01
        t1_2_0.add_incoming(e)
        # [NN,1,2] -> saw
        e = ForestEdge()
        e.add_tail(w1)
        e.prob = 0.01
        t1_2_1.add_incoming(e)
        # [DT,2,3] -> a
        e = ForestEdge()
        e.add_tail(w2)
        e.prob = 0.5
        t2_3.add_incoming(e)
        # [NN,3,4] -> girl
        e = ForestEdge()
        e.add_tail(w3)
        e.prob = 0.05
        t3_4.add_incoming(e)
        # [IN,4,5] -> with
        e = ForestEdge()
        e.add_tail(w4)
        e.prob = 0.25
        t4_5.add_incoming(e)
        # [DT,5,6] -> a
        e = ForestEdge()
        e.add_tail(w5)
        e.prob = 0.5
        t5_6.add_incoming(e)
        # [NN,6,7] -> telescope
        e = ForestEdge()
        e.add_tail(w6)
        e.prob = 0.001
        t6_7.add_incoming(e)
        # [NP,2,4] -> [DT,2,3] [NN,3,4]
        e = ForestEdge()
        e.add_tail(t2_3)
        e.add_tail(t3_4)
        e.prob = 0.7
        t2_4.add_incoming(e)
        # [NP,5,7] -> [DT,5,6] [NN,6,7]
        e = ForestEdge()
        e.add_tail(t5_6)
        e.add_tail(t6_7)
        e.prob = 0.7
        t5_7.add_incoming(e)
        # [VP,1,4] -> [VB,1,2] [NP,2,4]
        e = ForestEdge()
        e.add_tail(t1_2_0)
        e.add_tail(t2_4)
        e.prob = 0.9
        t1_4.add_incoming(e)
        # [PP,4,7] -> [IN,4,5] [NP,5,7]
        e = ForestEdge()
        e.add_tail(t4_5)
        e.add_tail(t5_7)
        e.prob = 1.0
        t4_7.add_incoming(e)
        # [NP,2,7] -> [NP,2,4] [PP,4,7]
        e = ForestEdge()
        e.add_tail(t2_4)
        e.add_tail(t4_7)
        e.prob = 0.3
        t2_7.add_incoming(e)
        # [VP,1,7] -> [VB,1,2] [NP,2,7]
        e = ForestEdge()
        e.add_tail(t1_2_0)
        e.add_tail(t2_7)
        e.prob = 0.5
        t1_7.add_incoming(e)
        # [VP,1,7] -> [VP,1,4] [PP,4,7]
        e = ForestEdge()
        e.add_tail(t1_4)
        e.add_tail(t4_7)
        e.prob = 0.5
        t1_7.add_incoming(e)
        # [S,0,7] -> [NN,0,1] [VP,1,7]
        e = ForestEdge()
        e.add_tail(t0_1)
        e.add_tail(t1_7)
        e.prob = 0.9
        root.add_incoming(e)

        self.hp = Hypergraph(root)

    def test_inside_outside(self):
        self.hp.set_semiring(INSIDE)
        self.hp.set_functions(lambda x: x.prob, lambda x: 1, None)
        self.hp.inside()
        self.hp.outside()
        logger.writeln(self.hp.dot())
        # self.hp.show()

    def test_inside_exp_outside_exp(self):
        self.hp.set_semiring(INSIDE)
        self.hp.set_functions(lambda x: x.prob, lambda x: 1, None)
        self.hp.inside()
        self.hp.outside()
        self.hp.inside_exp()
        self.hp.outside_exp()
        logger.writeln(self.hp.dot())
        # self.hp.show()

    def test_inside_outside_log(self):
        self.hp.set_semiring(LOGPROB)
        self.hp.set_functions(lambda x: log(x.prob), lambda x: 1, None)
        self.hp.inside()
        self.hp.outside()
        logger.writeln(self.hp.dot())
        # self.hp.show()

    def test_inside_exp_outside_exp_log(self):
        self.hp.set_semiring(LOGPROB)
        self.hp.set_functions(lambda x: log(x.prob), lambda x: 1, None)
        self.hp.inside()
        self.hp.outside()
        self.hp.inside_exp()
        self.hp.outside_exp()
        logger.writeln(self.hp.dot())
        # self.hp.show()

    def test_best_paths(self):
        self.hp.set_semiring(INSIDE)
        self.hp.set_functions(lambda x: x.prob, lambda x: 1, None)
        self.hp.assert_done('topo_sort')
        logger.writeln(self.hp.root.best_paths()[0].tree_str())
        logger.writeln(self.hp.root.best_paths()[0].weight)
        logger.writeln(self.hp.root.best_paths()[1].tree_str())
        logger.writeln(self.hp.root.best_paths()[1].weight)
예제 #16
0
 def setUp(self):
     self.a = Hypergraph()
예제 #17
0
    def run(self):
        # update per-sentence grammars, if there's any
        for g in self.grammars:
            g.update(self.id)
        self.flog = open('%s/%s_%s' % (FLAGS.run_dir, 'log', self.suffix), 'w')
        if FLAGS.show_time:
            self.flog.write('running on %s\n\n' % socket.gethostname())
            self.flog.flush()

        fwords = self.line.split()
        if FLAGS.preprocess:
            self.fidx2replacement = {}
            j = 0
            for i, token in enumerate(fwords):
                if token in ('$number', '$date'):
                    self.fidx2replacement[i] = self.special[j][1]
                    j += 1

        self.flog.write('[%s][%s words] %s\n' %
                        (self.id, len(fwords), self.line))

        decoder = Decoder(fwords, self.grammars, self.features)

        begin_time = time()
        if FLAGS.decoding_method == 'agenda':
            item = decoder.decode()
        elif FLAGS.decoding_method == 'cyk':
            item = decoder.decode_cyk()
        elif FLAGS.decoding_method == 'earley':
            item = decoder.decode_earley()
        else:
            assert False, '"%s" not valid decoding option' \
                    % FLAGS.decoding_method
        self.time = time() - begin_time

        if item is None:
            self.out = '[decoder failed to build a goal item]'
        else:
            hg = Hypergraph(item)
            hg.set_semiring(hypergraph.SHORTEST_PATH)
            hg.set_functions(lambda x: x.cost, None, None)
            hg.topo_sort()
            self.kbest = hg.root.best_paths()
            output_tokens = self.kbest[0].translation[:]

            if FLAGS.preprocess:
                for i in range(len(output_tokens)):
                    if output_tokens[i] in ('$number', '$date'):
                        fidx = self.kbest[0].composed_rule.we2f[i]
                        if fidx is not None:
                            output_tokens[i] = self.fidx2replacement[fidx]

            self.out = ' '.join(output_tokens[FLAGS.lm_order - 1:1 -
                                              FLAGS.lm_order])
            self.hg = hg
            if FLAGS.output_hypergraph:
                self.write_hypergraph()

        self.flog.write('%s\n' % self.out)
        self.flog.write('\n')
        if item is not None:
            self.flog.write(self.kbest[0].tree_str())
            self.flog.write('\n')
            self.flog.write(hg.stats())
            self.flog.write('\n')
        self.flog.write(decoder.agenda_stats())
        self.flog.write('\n')
        self.flog.write(decoder.chart.stats())
        self.flog.write('\n')
        for dotchart in decoder.dotcharts:
            self.flog.write(dotchart.stats())
            self.flog.write('\n')

        if FLAGS.show_time:
            timeline = '{:<35}{:>15.2f}\n'.format('[time]:', self.time)
            self.flog.write(timeline)
        self.write_output_file()
        if FLAGS.output_kbest:
            self.write_kbest_to_file()
        self.flog.close()
예제 #18
0
class DirectedAcyclicGraphComparator:
    """
    The class perfoms the comparation between two different
    DirectedAcyclicGraphs

    To use it call the function buildHyperGraph, it will return a hypergraph
    containing the comparision between the two dags. The contents of the
    hypergraph will be as follows:
        nodes: formed by one node of each graph storing the value of applying
               the cost function to both nodes.
        hyperedges: hyperedges are directed, the first node of the hyperedge is
                    the source node of the transformation, the rest of the
                    nodes of the hyperedges will contain the location of the
                    variables. As a value it will store the sum of the cost of
                    the variables plus applying the transformation function to
                    the graphs without the nodes being substituted. Each
                    hyperedge must be different.
    """
    def __init__(self, dag1, dag2):
        """
        The first and second parameters must be DirectedAcyclicGraphs as
        specified on the file datastructures.py"
        """

        self.dag1_mapper = DirectedAcyclicGraphMapper(dag1)
        self.dag2_mapper = DirectedAcyclicGraphMapper(dag2)
        self.hypergraph = Hypergraph()

    def costAssembler(self, functions):
        pass

    def __sort_by_num_of_variables(self, v):
        max_num_of_variables = max(map(lambda x: len(x.variables), v))
        answers = tuple([[] for _ in xrange(max_num_of_variables)])

        for x in v:
            answers[len(x.variables) - 1].append(x)

        return answers

    def __iterate_over_sorted_maps(self, s1, s2):
        for x1, x2 in zip(s1, s2):
            for map1 in x1:
                for map2 in x2:
                    yield (map1, map2)

    def buildHyperGraph(self, number_of_variables=float('inf')):
        """
        This function builds the hypergraph that will contain the comparision
        between the two dags and all its subgraphs

        The function returns a hypergraph containing the comparision between
        the two dags.
        """

        # Compute the nodes of the hypergraph and its associated cost. Each
        # node is formed by each possible pair created using two random nodes
        # of each dag.
        g1 = self.dag1_mapper.dag
        g2 = self.dag2_mapper.dag
        for n1 in self.dag1_mapper.dag.links.iterkeys():
            for n2 in self.dag2_mapper.dag.links.iterkeys():
                # value = t_cost_function_distance([n1], [n2])
                value = t_cost_edit_distance_graphs_no_vars(g1, n1, g2, n2)
                self.hypergraph.addNode((n1, n2), value)

        # In the algorithm we don't allow to compute the cost function between
        # two subgraphs with different number of variables. Here
        # we sort both sequences of subgraphs by its number of variables to
        # assure that doesn't happen.
        map1_sorted_by_vars = self.__sort_by_num_of_variables(
            self.dag1_mapper.generateAllVariableMappings(
                number_of_variables=number_of_variables))
        map2_sorted_by_vars = self.__sort_by_num_of_variables(
            self.dag2_mapper.generateAllVariableMappings(
                number_of_variables=number_of_variables))

        # Thanks to its ordering coming from the Mapper class the hypergraph
        # will be built on a top down fashion.
        # map1 and map2 will always contain the same number of variables.
        for map1, map2 in self.__iterate_over_sorted_maps(
                map1_sorted_by_vars, map2_sorted_by_vars):
            # This variable will contain the total coming from the
            # substituted variables.
            # total_from_variables = 0.0

            # The node of the hypergraph.
            hypergraph_node = (map1.subgraph.root, map2.subgraph.root)

            # The current hyperedge, on this implementation the order
            # matters the first node will be the node acting as a root
            # and the rest the nodes that are going to be substituted
            # by variables.
            hyperedge = (hypergraph_node, ) + tuple(
                zip(map1.variables, map2.variables))

            # The cost of the node of the hypergraph.
            # f1 = t_cost_function([map1.subgraph.root],
            #                      [map2.subgraph.root])
            f1 = t_cost_edit_distance_graphs_with_vars(map1, map2)

            # This is for debuging pourposes
            if DEBUG_MODE:
                print stringifyGraph(map1.graph, map1.subgraph.root,
                                     map1.variables, map1.subgraph.nodes)
                print stringifyGraph(map2.graph, map2.subgraph.root,
                                     map2.variables, map2.subgraph.nodes)

                print 'Hyperedge', hyperedge

            # Obtain the accumulated value for the variables involved on
            # the substitution.
            # for n1, n2 in zip(map1.variables, map2.variables):
            #     if DEBUG_MODE:
            #         print 'Querying:', n1, n2
            #     total_from_variables += self.hypergraph.getNodeWeight((n1,
            #                                                            n2))

            # Add the hyperedge to the graph
            # The hyperedges are directed and as the algorithm works
            # there should't be any duplicates so there is no need to
            # check if it exists.
            subgraphs = (map1.subgraph, map2.subgraph)
            # weight = f1 + total_from_variables
            weight = f1
            self.hypergraph.addHyperedge(hyperedge, subgraphs, weight)

            # Check if with the values we have computed we have to update
            # value of the node.
            # if (f1 + total_from_variables) > \
            #    self.hypergraph.getNodeValue(hypergraph_node):
            #     self.hypergraph.updateNode(hypergraph_node,
            #                                (f1 + total_from_variables))

            # if DEBUG_MODE:
            #     print 'Partial graph value', f1
            #     print 'Variables value', total_from_variables
            #     print "=========================="

        if DEBUG_MODE:
            print "\nNodes:"
            print "=========================="
            self.hypergraph.printNodes()
            print "\nHyperedges:"
            print "=========================="
            self.hypergraph.printHyperedges()

    def buildHyperGraphDebug(self, number_of_variables=float('inf')):
        """
        Debugging function that uses the default computing cost function
        to build the hypergraph.

        Used for testing purposes.
        """

        for n1 in self.dag1_mapper.dag.links.iterkeys():
            for n2 in self.dag2_mapper.dag.links.iterkeys():
                value = t_cost_default([n1], [n2])
                self.hypergraph.addNode((n1, n2), value)

        map1_sorted_by_vars = self.__sort_by_num_of_variables(
            self.dag1_mapper.generateAllVariableMappings(
                number_of_variables=number_of_variables))
        map2_sorted_by_vars = self.__sort_by_num_of_variables(
            self.dag2_mapper.generateAllVariableMappings(
                number_of_variables=number_of_variables))

        for map1, map2 in self.__iterate_over_sorted_maps(
                map1_sorted_by_vars, map2_sorted_by_vars):
            hypergraph_node = (map1.subgraph.root, map2.subgraph.root)
            hyperedge = (hypergraph_node, ) + tuple(
                zip(map1.variables, map2.variables))
            weight = t_cost_default(map1.subgraph.nodes, map2.subgraph.nodes)

            subgraphs = (map1.subgraph, map2.subgraph)
            self.hypergraph.addHyperedge(hyperedge, subgraphs, weight)
예제 #19
0
class TestHypergraph(unittest.TestCase):
    def setUp(self):
        self.a = Hypergraph()

    def test_checkUnknownNode(self):
        self.assertRaises(ValueError,
                          self.a.updateNode,
                          "z",
                          2)

    def test_checkUnknownHyperedge(self):
        self.assertRaises(ValueError,
                          self.a.updateHyperedgeLabel,
                          ("z", "s"),
                          2, 0)

    def test_addNode1(self):
        self.a.addNode('a', 1)

        self.assertEqual(self.a.getNodeWeight('a'), 1)

    def test_addNode2(self):
        self.a.addNode('a', 1)
        self.a.addNode('b', 2)

        self.assertEqual(self.a.getNodeWeight('a'), 1)
        self.assertEqual(self.a.getNodeWeight('b'), 2)

    def test_addNode3(self):
        self.a.addNode('a', 1)
        self.a.addNode('b', 2)
        self.a.addNode('c', 3)

        self.assertEqual(self.a.getNodeWeight('a'), 1)
        self.assertEqual(self.a.getNodeWeight('b'), 2)
        self.assertEqual(self.a.getNodeWeight('c'), 3)

    def test_updateNodeValue(self):
        self.a.addNode('a', 1)
        self.a.addNode('b', 2)

        self.a.updateNode('a', 3)

        self.assertEqual(self.a.getNodeWeight('a'), 3)

    def test_addHyperedge1(self):
        he = ('a', 'b', 'c')

        self.a.addNode('a', 1)
        self.a.addNode('b', 2)
        self.a.addNode('c', 3)

        self.a.addHyperedge(he, "abc", 0)

        self.assertEqual(self.a.getHyperedgeLabel(he).data, 
                         "abc")

    def test_addHyperedge2(self):
        he = ('a', 'b', 'c')
        he2 = ('d', 'b', 'c')

        self.a.addNode('a', 1)
        self.a.addNode('b', 2)
        self.a.addNode('c', 3)
        self.a.addNode('d', 4)

        self.a.addHyperedge(he, "abc", 0)
        self.a.addHyperedge(he2, "dbc", 0)

        self.assertEqual(self.a.getHyperedgeLabel(he2).data,
                         "dbc")

    def test_updateHyperedgeLabel(self):
        he = ('a', 'b', 'c')
        he2 = ('d', 'b', 'c')

        self.a.addNode('a', 1)
        self.a.addNode('b', 2)
        self.a.addNode('c', 3)
        self.a.addNode('d', 4)

        self.a.addHyperedge(he, "abc", 0)
        self.a.addHyperedge(he2, "dbc", 0)
        self.a.updateHyperedgeLabel(he, "test", 0)

        self.assertEqual(self.a.getHyperedgeLabel(he).data,
                         "test")

    def test_checkHyperedgesAndNodes(self):
        he = ('a', 'b', 'c')
        he2 = ('d', 'b', 'c')
        solution = [he, he2]

        self.a.addNode('a', 1)
        self.a.addNode('b', 2)
        self.a.addNode('c', 3)
        self.a.addNode('d', 4)

        self.a.addHyperedge(he, "abc", 0)
        self.a.addHyperedge(he2, "dbc", 0)

        self.assertEqual(self.a.getHyperedgesFromNode('b'), solution)
예제 #20
0
    parser.add_argument(
        "-n",
        "--nodes",
        action="store",
        default=10000,
        type=int,
        help="Select the number of nodes n (if the target dataset is 'model')",
    )
    args = parser.parse_args()
    datasets_info = {'contact': 'contact-high-school',
                     'email': 'email-Eu-full',
                     'substances': 'NDC-substances-full',
                     'tags': 'tags-ask-ubuntu',
                     'threads': 'threads-math-sx',
                     'coauth': 'coauth-DBLP-full'}

    if not os.path.exists('../results'):
        os.mkdir('../results')
    if not os.path.exists('../plots'):
        os.mkdir('../plots')
    if args.dataset in datasets_info:
        graph = Hypergraph(datasets_info[args.dataset], args.dataset)
    elif args.dataset == 'model':
        print("Generating hypergraph using HyperFF model...")
        graph = HyperFF(args.burning, args.expanding, args.nodes - 1)
    else:
        print("Invalid arguments.")
        parser.print_help()
        sys.exit(0)
    main(graph)