Ejemplo n.º 1
0
 def test_coverage(self):
     """test coverage, verify the set by known results"""
     test_graph = Graph(self.graph0)
     cover = test_graph.get_coverage()
     self.assertEqual(cover, set((0, 1, 2, 3)))
     cover = Graph(self.graph1).get_coverage()
     self.assertEqual(cover, set((1, 2)))
Ejemplo n.º 2
0
 def test_undirect(self):
     """test generation of undirect Graph"""
     test_graph = Graph(self.graph1)
     test_result = test_graph.get_undirected()
     self.assertEqual(test_result, Graph(((), (2, ), (1, ), ())))
     test_graph = Graph(self.graph0)
     test_result = test_graph.get_undirected()
     self.assertEqual(test_result, Graph(((1, 2), (0, 3), (0, ), (1, ))))
Ejemplo n.º 3
0
    def test_edges_from_nodes(self):
        """test get edges from nodes"""
        test_graph = Graph(self.graph0)
        test_result = test_graph.edges_from_node(0)
#        print test_result
        self.assertEqual(test_result, set(((0, 1), (1, 3))))
        test_result = test_graph.edges_from_node(1)
#        print test_result
        self.assertEqual(test_result, set(((1, 3), )))
Ejemplo n.º 4
0
 def test_get_cycle_basis(self):
     """test get basic cycles"""
     simschema = self.simschemas[0]
     relations = simschema.get_graph_from_schema()
     graph = Graph(relations)
     ugraph = graph.get_undirected()
     cycles2 = simschema.get_cycle_basis(ugraph._graph)
     cycles2 = cycles2[0]
     cycles = ['Files', 'ProcessedDataset', 'Block']
     cycles1 = [\
     simschema.ordered.index(simschema.nodelist[node.lower()]) for node in cycles]
     cycles1.sort()
     cycles2.sort()
     self.assertEqual(len(cycles1), len(cycles2))
     for idx in range(len(cycles1)):
         self.assertEqual(cycles1[idx], cycles2[idx])
Ejemplo n.º 5
0
    def test_breadth_search(self):
        """test breadth first search, verify the path by known results """
        test_graph = Graph(self.graph0)
        span = test_graph.breadth_first_search(0)
#        print "span 2", span
        self.assertEqual(span, Graph(( (1, 2), (3, ), (), ())))
        span = test_graph.breadth_first_search(1)
#        print "span 1", span
        self.assertEqual(span, Graph((
            (), (3, ), (), ()
            )))
        span = test_graph.breadth_first_search(3)
#        print "span 3", span
        self.assertEqual(span, Graph((
            (), (), (), ()
            )))
Ejemplo n.º 6
0
def write_core_graph(simschema, filename="coreschema"):
    """view core graph on simulate schema graph"""
    fns = filename + '.dot'
    fls = open(fns, 'w')
    dot = DotGraph(fls)
    relations = simschema.get_graph_from_schema()
    graph = Graph(relations)
    ugraph = graph.get_undirected()
    cycles = simschema.get_cycle_basis(ugraph._graph)
    nodes = set([])
    order = simschema.ordered
    for cycle in cycles:
        nodes = nodes.union(set(cycle))
    for node in nodes:
        start_node = relations[node]
        for end_node in start_node:
            if end_node in nodes:
                dot.add_edge(order[node], order[end_node[0]], end_node[1])
    dot.finish_output()
Ejemplo n.º 7
0
    def write_cyclic_graph(self, bdot, name="C"):
        """
        output dot graph with core cyclic component
        """
        basename = name
        multidot = MultiDot(basename)
        relations = self.get_graph_from_schema()
        graph = Graph(relations)
        ugraph = graph.get_undirected()
        cycles = self.get_cycle_basis(ugraph._graph)
        if len(cycles) == 0:
            return False
        nodes = None
        order = self.ordered
        for cycle in cycles:
#            print "--------------cycle----------"
#            print [order[node] for node in cycle]
            nodes = cycle
            dot = multidot.get_dot()
            for node in nodes:
                start_node = relations[node]
                for end_node in start_node:
                    if end_node in nodes:
                        dot.add_edge(order[node], order[end_node])
            dot.finish_output()

        nodes = set([])
        for cycle in cycles:
            nodes = nodes.union(set(cycle))
        for node in nodes:
            start_node = relations[node]
            for end_node in start_node:
                if end_node in nodes:
                    bdot.add_edge(order[node], order[end_node])
        bdot.finish_output()
        return True
Ejemplo n.º 8
0
    def recognize_shortcut(self):
        """
        figure the short cut
        exchange the weight between short cut and passing dependencies
        referential is on primary key.
        dataset<--block<---file
             <------------/
        dataset<----file is a short cut for dataset<--block<--file
        """
        visited = set([]) # visited short_cut
        # Do DFS for each node if a node is founded
        short_cuted = []
        for node in self.v_ent:
            pathes = {} # { end_enitity : [[link1], [link2, link3], ] }
            stack = []
            for link in self.nodelist[node].outlinks:
                stack.append(self.links[link])
                # link.right shouldn't point to itself, need to filter
                # out
            while stack:
                link = stack.pop()
                table = link.rtable
                if table not in self.v_attr and table != node:
#                    and link.rcolumn is in table.primarykey:
                    # if link.left is node then append []
                    # if link.left is not node then append node -->
                    # table
                    new_path = []
                    if link.ltable == node:
                        new_path = [link]
                    else:
                        # trace back
                        for aplink in pathes[link.ltable][0]: # shortest
                            new_path.append(aplink)
                        new_path.append(link)
                    # insert new path to correct position
                    if table not in pathes and table != node:
                        pathes[table] = [new_path]
                    elif len(new_path) > 0:
                        for index in range(len(pathes[table])):
                            if len(new_path) > len(pathes[table][index]):
                                continue
                            distinct = False
                            if len(new_path) == len(pathes[table][index]):
                                for inde in range(len(new_path)):
                                    if new_path[inde] == \
                                        pathes[table][index][inde]:
                                        continue
                                    distinct = True
                            else:
                                distinct = True
                            if distinct:
                                pathes[table].insert(index, new_path)
                for link in self.nodelist[table].outlinks:
                    stack.append(self.links[link])
            # review pathes, print out the short cut
            for table in pathes:
                if len(pathes[table]) > 1:
                    temp = []
                    for path in pathes[table]:
                        strin = ""
                        for link in path:
                            strin += "%s->%s, " % (link.ltable, link.rtable)
                        temp.append(strin)
                    _LOGGER.debug('shortcut found for %s to %s via %s' % \
                    (node, table, str(temp)))
            # update weights for link
            for table in pathes:
                if len(pathes[table]) > 1:
                    if pathes[table][0][0] == pathes[table][1][0]:
                        continue
                    link1 = pathes[table][0][0]
                    link2 = pathes[table][1][0]
                    link3 = pathes[table][1][-1]
                    vis = link1.name + link2.name
                    if not (pathes[table][0][-1] != pathes[table][1][-1] \
                      and pathes[table][0][0] != pathes[table][1][0]):
                        continue
                    short_cuted.append((pathes[table][0],pathes[table][1]))
                    #if vis in visited:
                    #    continue
                    #else:
                    #    visited.add(vis)
                    if link2.weight <= link1.weight and link1.weight < 1:
                        link2.weight = link1.weight + (1 - link1.weight)/2
                    if link3.weight >= link1.weight and link1.weight < 1:
                        link3.weight = link1.weight - (1 - link1.weight)/2
                    _LOGGER.debug('%s.%.5f switch to %s.%.5f' % \
                            (link1, link1.weight, link2, link2.weight))
                    link1.weight, link2.weight = link2.weight, link1.weight
                    link2.weight, link3.weight = link3.weight, link2.weight
        sc_nodeset = []
        for idx in range(len(short_cuted)):
            sc_nodeset.append(set([]))
            for lk in short_cuted[idx][1]:
                sc_nodeset[idx].add(lk.ltable)
                sc_nodeset[idx].add(lk.rtable)
        #print "sc_nodeset", sc_nodeset
        # get all basic cycles on undirected graph
        # for a cycle, calculate node with outdegree == 0 on directed graph
        # then for a cycle with two such nodes,
            # if there is shortcut including,
            # reorganize the link.weight to make sure:
                # 1. path weight to dataset is higher than path weight to files
                # 2. make sure the whole path is selected when all node on this cycle is selected
        relations = self.get_graph_from_schema()
        graph = Graph(relations)
        ugraph = graph.get_undirected()
        cycles = self.get_cycle_basis(ugraph._graph)
        if len(cycles) == 0:
            return False
        nodes = None
        order = self.ordered
        for cycle in cycles:
            # contains outdegree = 0, directed graph
            # contains shortcut 's'
                # get the pathes to outmod and start file, 1
                # get the pathes to outmod and end dataset, 2
                    # sum(1) < sum(2)
                    # sum(1+2) > sum('s') for BFS, min(1,2) > max('s')
            nodes = set([self.ordered[nd].name for nd in cycle])
            is_shortcut = True
            for sc in sc_nodeset:
                if nodes.difference(sc) == set([]):
                    is_shortcut = False
            if not is_shortcut:
                continue
            #print "cycle is", nodes
            tables = set([self.ordered[nd] for nd in cycle])
            degree0 = {}
            for node in cycle:
                tnode = self.nodelist[self.ordered[node].name]
                queue = []
                linkpaths = []
                if self.get_indegree(tnode, tables) == 0:
                    #print tnode
                    for link in tnode.outlinks:
                        linkpath = []
                        #print link,self.links[link].ltable,self.links[link].rtable
                        if self.links[link].rtable in nodes:
                            queue.insert(0, self.nodelist[self.links[link].rtable])
                            linkpath.append(self.links[link])
                            while len(queue) > 0:
                                node = queue.pop()
                                for link in node.outlinks:
                                    if self.links[link].rtable in nodes:
                                        queue.insert(0, self.nodelist[self.links[link].rtable])
                                        linkpath.append(self.links[link])
                        if linkpath != []:
                            linkpaths.append(linkpath)
                if linkpaths != []:
                    degree0[tnode]= linkpaths
            if len(degree0) == 2:
                source1, source2 = degree0.keys()
                s1l1 = degree0[source1][0]
                s1l2 = degree0[source1][1]
                s2l1 = degree0[source2][0]
                s2l2 = degree0[source2][1]
                pathc = None
                pathp = None
                pathcc = None
                pathpc = None
                if len(s1l1) + len(s1l2) > len(s2l1) + len(s2l2):
                    if s1l1[-1].rtable == s2l1[-1].rtable:
                        if len(s1l1) > len(s2l1):
                            pathc = s1l1
                            pathp = s2l1
                            pathcc, pathpc = s1l2, s2l2
                        else:
                            pathcc, pathpc = s1l1, s2l1
                            pathc = s1l2
                            pathp = s2l2
                    else: #s1l1[-1].rtable == s2l2[-1].rtable:
                        if len(s1l1) > len(s2l2):
                            pathc = s1l1
                            pathp = s2l2
                            pathcc, pathpc = s1l2, s2l1
                        else:
                            pathcc, pathpc = s1l1, s2l2
                            pathc = s1l2
                            pathp = s2l1
                else:
                    if s2l1[-1].rtable == s1l1[-1].rtable:
                        if len(s2l1) > len(s1l1):
                            pathc = s2l1
                            pathp = s1l1
                            pathcc, pathpc = s2l2, s1l2
                        else:
                            pathcc, pathpc = s2l1, s1l1
                            pathc = s2l2
                            pathp = s1l2
                    else: #s2l1[-1].rtable == s1l2[-1].rtable:
                        if len(s2l1) > len(s1l2):
                            pathc = s2l1
                            pathp = s1l2
                            pathcc, pathpc = s2l1, s1l2
                        else:
                            pathcc, pathpc = s2l1, s1l2
                            pathc = s2l2
                            pathp = s1l1
                # find path to hierarchical parent node
                # find path to hierarchical child node
                # find path to hierarchical parent to child link, get weight
                base = pathc[-1].weight
                # get the pathes to outmod and start file, 1
                # get the pathes to outmod and end dataset, 2
                    # sum(1) < sum(2)
                    # sum(1+2) > sum('s') for BFS, min(1,2) > max('s')
                pathc[0].weight = base + (1 - base)/4
                pathp[0].weight = base + (1 - base)/3
                pathcc[0].weight = base + (1 - base)/4
                pathpc[0].weight = base + (1 - base)/3
Ejemplo n.º 9
0
    def test_reverse(self):
        """test reverse function"""
        test_graph = Graph(self.graph0)
        test_result = test_graph.get_reverse()
#        print test_result
        self.assertEqual(test_result, Graph(((), (0, ), (0, ), (1, ))))