Beispiel #1
0
def comb_fas( graph):
    '''@param: graph, a nx.DiGraph obj
    '''
    assert isinstance( graph, nx.DiGraph)
    origin_weight = nx.get_edge_attributes( graph, 'weight')
    weight = origin_weight.copy()

    assert len(weight) == graph.number_of_edges(), "Some edge doesnot has a weight attr."
    fas = []
    while( not nx.is_directed_acyclic_graph(graph) ):
        c = list( nx.simple_cycles(graph) )[0]
        mini_weight = min( [ weight[edge] for edge in get_edges(c)] )

        cycle_edges_weight = {edge:weight[edge] for edge in get_edges(c) }
        for eachEdge in cycle_edges_weight.keys():
            cycle_edges_weight[eachEdge] -= mini_weight
            weight[eachEdge ] -= mini_weight
            if cycle_edges_weight[eachEdge] == 0:
                fas.append( eachEdge )
                graph.remove_edge( eachEdge[0], eachEdge[1] )

    for eachEdge in copy.copy(fas):
        graph.add_edge( eachEdge[0], eachEdge[1], {'weight' : origin_weight[eachEdge]} )
        if nx.is_directed_acyclic_graph( graph):
            fas.remove(eachEdge)
            continue
        else:
            graph.remove_edge( eachEdge[0], eachEdge[1] )

    return fas
Beispiel #2
0
def mean_geodesic(pg, debug=0):
    """
    mean_geodesic() calculates the mean geodesic (shortest) distance
    between two vertices in a network.
    """
    length_sum = 0
    if networkx.is_directed_acyclic_graph(pg):
        n_pairs_with_paths = 0
    else:
        n_pairs_with_paths = ( pg.order() * ( pg.order() + 1 ) ) / 2
    tg = networkx.subgraph(pg, pg.nodes())
    for u in pg.nodes_iter():
        tg.delete_node(u)
        for v in tg.nodes_iter():
            try:
                length = networkx.shortest_path_length(pg,u,v)
                if length > 0:
                    length_sum = length_sum + length
                    if networkx.is_directed_acyclic_graph(pg):
                        n_pairs_with_paths = n_pairs_with_paths + 1
            except networkx.exception.NetworkXError:
                pass
    try:
        geodesic = float(length_sum) / float(n_pairs_with_paths)
    except:
        geodesic = -999.
    if debug:
        print 'length_sum:\t', length_sum
        print 'n_pairs_with_paths:\t', n_pairs_with_paths
    return geodesic
Beispiel #3
0
def minkowski_causality(D,N,show_plot=False):
    """
        Instantiates "event_in_minkowski" to return a list of N points.
        """
    def points_in_minkowski(D,N):
        
        points_in_minkowski = []
        
        n=1
        while n<=N:
            point_n = event_in_minkowski(D)
            coords_n = point_n.coord_value_point_n(n)
            points_in_minkowski.append(coords_n)
            n+=1
        return points_in_minkowski
    
    good_points = points_in_minkowski(D,N)
    
    #List --> Dict as nx needs hashable object to add nodes/edges from.
    dict_of_points = {}
    for i in range(len(good_points)):
        good_points[i] = tuple(good_points[i])
        dict_of_points[i] = good_points[i]
    
    #Add nodes to empty nx graph object
    G=nx.DiGraph()
    for point in dict_of_points:
        G.add_node(point)
    print nx.is_directed_acyclic_graph(G)

    #Add edge (from i to j) to empty nx graph object if node j falls within the future light cone of i
    for i in range(len(dict_of_points)):
        for j in range(len(dict_of_points)):
            if i==j:
                continue
            t_separation = dict_of_points[j][0] - dict_of_points[i][0]
            space_separation=0
            for d in range(1,D):
                space_separation += (dict_of_points[i][d] - dict_of_points[j][d])
            if t_separation>=abs(space_separation):
                G.add_edge(i,j)
            else:
                pass

    #Check G is a DAG, print model info

    if nx.is_directed_acyclic_graph(G):
        print "This is a DAG of causal relations between randomly placed events in ",D,"D Minkowski space-time."

    #Show plot
    if show_plot==True:
        draw_in_minkowski(G,dict_of_points)

    return G
Beispiel #4
0
def make_acyclic(G):
	G_copy = G.copy()
	F = []
	original_G = G.copy()
	while not nx.is_directed_acyclic_graph(G_copy):
		#iterate through cycles in G
		
		for cycle in nx.simple_cycles(G_copy):
			min_weight = 100000
			min_u = 0
			min_v = 0
			#Find minimum weight edge in the cycle, weight
			#here is bundle size
			#TODO: start with smallest cycle by sorting
			#print G.edges(data=True)
			for i in xrange(0,len(cycle)-1):
				u = cycle[i]
				v = cycle[i+1]
				if G[u][v]['bsize'] < min_weight:	
					min_weight = G[u][v]['bsize']
					min_u = u
					min_v = v
			if G[cycle[- 1]][cycle[0]]['bsize'] < min_weight:
				min_weight = G[cycle[-1]][cycle[0]]['bsize']
				min_u = cycle[-1]
				min_v = cycle[0]

			#reduce the edge weights by min_weight and remove the edge if its weight is 0
			if min_weight != 100000:
				for i in xrange(0,len(cycle)-1):
					u = cycle[i]
					v = cycle[i+1]
					G[u][v]['bsize'] -= min_weight
				
				G[cycle[-1]][cycle[0]]['bsize'] -= min_weight
				G.remove_edge(min_u,min_v)
				F.append((min_u,min_v,original_G.get_edge_data(min_u,min_v)))
				G_copy = G.copy()
				break

	#Now try adding edges from F to G, TODO do in non-increasing order

		if len(G.edges()) == 0:
			continue
		# if len(G.nodes()) == 0:
		# 	continue
		for edge in F:
			u = edge[0]
			v = edge[1]
			G.add_edge(u,v,edge[2])
			if not nx.is_directed_acyclic_graph(G):
				G.remove_edge(u,v)

	return G
Beispiel #5
0
    def test_topological_sort2(self):
        DG = nx.DiGraph({1: [2], 2: [3], 3: [4],
                         4: [5], 5: [1], 11: [12],
                         12: [13], 13: [14], 14: [15]})
        assert_raises(nx.NetworkXUnfeasible, consume, nx.topological_sort(DG))

        assert_false(nx.is_directed_acyclic_graph(DG))

        DG.remove_edge(1, 2)
        consume(nx.topological_sort(DG))
        assert_true(nx.is_directed_acyclic_graph(DG))
Beispiel #6
0
    def test_topological_sort2(self):
        DG = nx.DiGraph({1: [2], 2: [3], 3: [4], 4: [5], 5: [1], 11: [12], 12: [13], 13: [14], 14: [15]})
        assert_raises(nx.NetworkXUnfeasible, nx.topological_sort, DG)
        assert_raises(nx.NetworkXUnfeasible, nx.topological_sort_recursive, DG)

        assert_false(nx.is_directed_acyclic_graph(DG))

        DG.remove_edge(1, 2)
        assert_equal(nx.topological_sort_recursive(DG), [11, 12, 13, 14, 15, 2, 3, 4, 5, 1])
        assert_equal(nx.topological_sort(DG), [11, 12, 13, 14, 15, 2, 3, 4, 5, 1])
        assert_true(nx.is_directed_acyclic_graph(DG))
def make_dag(g):
    if nx.is_directed_acyclic_graph(g):
        return
    p = nx.periphery(g)
    for c in nx.weakly_connected_component_subgraphs(g):
        if nx.is_directed_acyclic_graph(g):
            continue
        cycles = nx.simple_cycles(c)
        for c in cycles:
            edges = zip(c[:-1], c[1:])
            edges.append((c[-1], c[0]))
            for e in edges:
                data = g.edges(e[0], e[1])[0][2]
                c.remove_edge(e[0], e[1])
Beispiel #8
0
def mean_degree_centrality(pg, normalize=0):
    """
    mean_degree_centrality(pg) calculates mean in- and out-degree
    centralities for directed graphs and simple degree-centralities
    for undirected graphs. If the normalize flag is set, each node's
    centralities are weighted by the number of edges in the (di)graph.
    """
    centrality = {}
    try:
        if networkx.is_directed_acyclic_graph(pg):
            cent_sum_in, cent_sum_out = 0, 0
            for n in pg.nodes():
                n_cent_in = pg.in_degree(n)
                n_cent_out = pg.out_degree(n)
                if normalize:
                    n_cent_in = float(n_cent_in) / float(pg.size()-1)
                    n_cent_out = float(n_cent_out) / float(pg.size()-1)
                cent_sum_in = cent_sum_in + n_cent_in
                cent_sum_out = cent_sum_out + n_cent_out
            centrality['in'] = cent_sum_in / float(pg.order())
            centrality['out'] = cent_sum_out / float(pg.order())
        else:
            cent_sum = 0
            for n in pg.nodes():
                if not normalize:
                    n_cent = pg.degree(n)
                else:
                    n_cent = networkx.degree_centrality(pg,n)
                cent_sum = cent_sum + n_cent
            centrality['all'] = cent_sum / float(pg.order())
    except:
        logging.error('pyp_network.mean_degree_centrality() failed!')
    return centrality
Beispiel #9
0
    def set_indices(self):
        if not nx.is_directed_acyclic_graph(self):
            raise ValueError('The graph is not DAG')
        if not nx.is_connected(self.to_undirected()):
            raise ValueError('The graph is not connected')

        self.base_digraph = nx.DiGraph(self)
        self.ordered_nodes = nx.topological_sort(self)
        for idx, node in enumerate(self.ordered_nodes):
            self.node[node]['index'] = idx

        self.ordered_edges = OrderedDict({})

        index = 0
        for tail in self.ordered_nodes:
            for head in sorted(self[tail]):
                self.ordered_edges[(tail, head)] = index
                self.base_digraph[tail][head]['capacity'] = len(self[tail][head])
                for idx in sorted(self[tail][head]):
                    self[tail][head][idx]['index'] = index
                    index = index + 1

        # reset data structures
        self.coding_matrix = None
        self.dst_evolution_rec = None
        self.alignment_nodes = []
	def generate(self):
		"""Workhorse factory method for producing all valid DAGs for this schema and set
		of constraints."""
		graphs = []

		sys.stderr.write("gen:\n" + self.dumpEdgePossibleSettings())

		edgesPossible = self.getAllVarPairs()		
		edgeCombos = factorialDict(self.edgePossibleSettings)
		for edgeCombo in edgeCombos: # edgeCombo is a dict (s, t) -> 1
			graph = nx.DiGraph()
			for i, ev in enumerate(self.entVars):
				lat = True if (ev in self.latents) else False
				det = self.determines.get(ev, None)
				graph.add_node(ev, latent=lat, determines=det, order=i) # order they were passed in is preserved

			graphSig = ""	
			for s, t in edgesPossible:
				setting = edgeCombo.get((s, t), 0)
				if (setting == 1):
					graph.add_edge(s, t)
				elif (setting == 2):
					graph.add_edge(t, s)
				if (s in self.indexSet) and (t in self.indexSet):
					graphSig += str(setting)
			graph.graph['index'] = int(graphSig, 3)
			
			if (not self.dagsOnly) or (nx.is_directed_acyclic_graph(graph)):
				graphs.append(graph)
		if (len(graphs) < len(edgeCombos)):
			sys.stderr.write("eliminated %d cyclic graphs\n" % (len(edgeCombos) - len(graphs)))
		return sorted(graphs, key=lambda x: x.graph['index'])
Beispiel #11
0
def balance(graph):
    '''parame: graph, a DAG its.__class__ == nx.DiGraph
       return: r,     removed edges set so makr the input graph a b-structure
    '''
    # 只处理整数形式的图,每一个整数对应的节点可以在后面查到
    # 输入进来的图应该是连通的,如果存在非连通图,minimum_edge_cut就会产生问题
    assert nx.is_directed_acyclic_graph(graph),\
        "The target graph you want to banlance is not a DAG"
    r = [] # removed set
    if check(graph):
        return r
    #非B-Stucture时,一直循环下去
    # BUGY: 如果cs为空呢,那么不可能有两个图返回来,这时候怎么办
    print "\nCutting Graph"
    cs, g1, g2 = cut(graph) 
    r = balance(g1) + balance(g2) + cs
    csl = []
    for eachEdge in cs:
        under_check_graph = graph.copy()
        under_check_graph.remove_edges_from(r)
        under_check_graph.add_edges_from(csl)
        under_check_graph.add_edge(eachEdge[0],eachEdge[1])
        if check(under_check_graph):
            print "Edge: %s added back" % str(eachEdge)
            csl.append(eachEdge)
            graph.add_edge(eachEdge[0],eachEdge[1])
    for eachEdge in csl:
        r.remove(eachEdge)
    print "Removed Edge Set: %s" % str(r)
    return r
Beispiel #12
0
 def set_params(self, node, delta, eta, marginal):
     self.clear_memory()
     assert node in self.graph.nodes()
     self.graph.node[node]['delta'] = delta
     self.graph.node[node]['eta'] = eta
     self.graph.node[node]['marginal'] = marginal
     assert nx.is_directed_acyclic_graph(self.graph)
Beispiel #13
0
    def build_graph(self):
        """Build graph of relationships between hills 

        Each hill is a list of things that can be used for that hill.

        Each of these may have inputs (names of other hills).

        A graph is build to show the input relations.

        Checks in case graph is cyclic.

        Does a topological sort on the hills to give and order in
        which they should be processed.
        """
        graph = nx.DiGraph()

        for hill, data in self.hills.items():

            for item in data:
                for link in item.inputs:
                    graph.add_edge(link, hill)

        # check if graph is acyclic
        is_dag = nx.is_directed_acyclic_graph(graph)

        if not is_dag:
            raise ValueError("hills must be acyclic")

        self.hill_order = nx.topological_sort(
            graph)
Beispiel #14
0
def _validate(G):
    '''
    Validates dependency graph to ensure it has no missing or cyclic dependencies
    '''
    for name in G.nodes():
        if 'value' not in G.node[name] and 'template' not in G.node[name]:
            msg = 'Dependency unsatisfied in variable "%s"' % name
            raise ParamException(msg)

    if not nx.is_directed_acyclic_graph(G):
        graph_cycles = nx.simple_cycles(G)

        variable_names = []
        for cycle in graph_cycles:
            try:
                variable_name = cycle[0]
            except IndexError:
                continue

            variable_names.append(variable_name)

        variable_names = ', '.join(sorted(variable_names))
        msg = ('Cyclic dependency found in the following variables: %s. Likely the variable is '
               'referencing itself' % (variable_names))
        raise ParamException(msg)
Beispiel #15
0
def dyad_census(pg, debug=0, debuglog=0):
    """
    dyad_census() calculates the number of null, asymmetric, and
    mutual edges between all pairs of nodes in a directed graph.
    """
    if not networkx.is_directed_acyclic_graph(pg):
        logging.error('pyp_network.dyad_census() requires a directed graph as input!')
        return 0
    else:
        census = {}
        census['null'] = 0
        census['asymmetric'] = 0
        census['mutual'] = 0
        tg = networkx.subgraph(pg, pg.nodes())
        for u in pg.nodes_iter():
            tg.delete_node(u)
            for v in tg.nodes_iter():
                if not pg.has_neighbor(u,v):
                    census['null'] = census['null'] + 1
                elif u in pg.predecessors(v) and v in pg.successors(u):
                    census['mutual'] = census['mutual'] + 1
                    if debug:
                        print 'Nodes %s and %s link to one another!' % ( u, v )
                    if debuglog:
                        logging.error('Nodes %s and %s link to one another!',u, v)
                elif u in pg.predecessors(v) and v not in pg.successors(u):
                    census['asymmetric'] = census['asymmetric'] + 1
                elif u not in pg.predecessors(v) and v in pg.successors(u):
                    census['asymmetric'] = census['asymmetric'] + 1
                else:
                    pass
        del(tg)
        return census
Beispiel #16
0
def graph(dataframe=None):

    G = nx.DiGraph()
    
    nrow = dataframe.shape[0]

    for i in xrange(nrow):
        
        source = dataframe['module_id'][i]
        G.add_node(source)
        
        if pd.isnull(dataframe['children'][i]) is not True:
            try:
                targets = dataframe['children'][i].split()
            except:
                raise ValueError('Data type is not correct:', i, 
                        dataframe.loc[i,], type(dataframe['children'][i]))
            
            for key in targets:
                G.add_edge(source, key)

    # Sanity check
    selfLoop = G.selfloop_edges()
    assert len(selfLoop) == 0, ValueError('self loop:', selfLoop)
    assert nx.is_directed_acyclic_graph(G), valueError('loop exists!')

    return G
Beispiel #17
0
    def __init__(self, tasks_reqs):
        """Construct a PipelineFramework based on the given Tasks and their requirements.

        A PipelineFramework is the structure of the pipeline, it contains no patient data.

        :param tasks_reqs: the Tasks and their requirements
        :type tasks_reqs: iterable of tuples, each with a Task and its list of required UIDs
        :raises: ValueError
        """
        self.dag = DiGraph()
        task_dict = {}
        for task, _ in tasks_reqs:
            if task_dict.get(task._uid) is not None:
                raise ValueError("Pipeline contains duplicate Task {}".format(task._uid))
            self.dag.add_node(task, done=False)
            task_dict[task._uid] = task

        for task, reqs in tasks_reqs:
            for req_uid in reqs:
                uid = task_dict.get(req_uid)
                if uid is None:
                    raise KeyError("Unknown UID {} set as requirement for {}".format(req_uid, task._uid))
                self.dag.add_edge(uid, task)

        if not is_directed_acyclic_graph(self.dag):
            raise ValueError("Pipeline contains a cycle.")
def count_common_subgraphs(graph1, graph2, n1, n2,
                     node_attrib='label', edge_attrib='label'):
    """
    Counts the number of common (dependency parse) subgraphs rooted at n1 and
    n2. This is an implementation of Cm(n1, n2) for dependency structures from
    Collins and Duffy (2001). Parsing with a Single Neuron.
    """
    for graph in (graph1, graph2):
        assert nx.is_directed_acyclic_graph(graph)
    
    if graph1.node[n1][node_attrib] != graph2.node[n2][node_attrib]:
        return 0

    n1_children = dependency_children(graph1, n1, edge_attrib=edge_attrib)
    n2_children = dependency_children(graph2, n2, edge_attrib=edge_attrib)

    if not n1_children or not n2_children:
        return 0
    else:
        result = 1  # neutral element of multiplication
        for n1_target, n2_target in common_dependency_targets(graph1, graph2, n1, n2,
                                                        node_attrib=node_attrib):
            result *= (count_common_subgraphs(graph1, graph2,
                                        n1_target, n2_target,
                                        node_attrib='label',
                                        edge_attrib='label') + 2)
        return result - 1
Beispiel #19
0
def read_pedigree_from_test_file(file_name, genotyped_id_file=None):
    '''Load a pedigree from a PLINK TFAM file.'''
    data = np.genfromtxt(file_name, np.dtype(int))
    p = io_pedigree.read(file_name, genotyped_id_file=genotyped_id_file)
    assert_equal(p._graph.number_of_nodes(), data.shape[0], 'Incorrect number of nodes')
    assert nx.is_directed_acyclic_graph(p._graph), 'Pedigree is not a DAG'
    return p
Beispiel #20
0
def longest_subsequence_dag(a, sign):
    '''Return a longest increasing (if sign=1) or decreasing (if sign=-1) sub-sequence in the
    permutation a of the first n natural integers. Time and storage are O(n). If multiple longest
    sub-sequences exist, arbitrarily returns one of them.'''

    # Dan Cook's idea: use symmetry to solve the decreasing case in terms of the increasing case 
    if sign < 0:
        return list(reversed(longest_subsequence_dag(list(reversed(a)), 1)))
    
    G = build_dag(np.array(a))  # Construct a DAG whose edges represent all candidate pairs of consecutive elements of the longest subsequence
    assert nx.is_directed_acyclic_graph(G)
    # print 'Edges', G.edges()

    depth = longest_path_length(G)  # For each node, calculate the longest path length       
    # print 'depth', depth 
    
    # Back-track from a node of maximum depth to its ancestors to reconstruct the longest path
    x = np.argmax(depth)
    seq = [x]
    # print 'x', x, 'depth', depth[x]
    while G.in_degree(x) > 0:
        # To find the maximum path, choose a parent of minimum depth
        parents = G.predecessors(x)
        # print 'parents', parents
        x = parents[np.argmax(depth[parents])]
        # print 'x', x, 'depth', depth[x]
        seq.append(x)
        # print 'seq', seq
    # print 'final seq', list(reversed(seq))
    return list(reversed(seq))
    def load(self):
        """
        Load dependencies for all loaded schemas.
        This method gets called before any operation that requires dependencies: delete, drop, populate, progress.
        """

        # reload from scratch to prevent duplication of renamed edges
        self.clear()

        # load primary key info
        keys = self._conn.query("""
                SELECT
                    concat('`', table_schema, '`.`', table_name, '`') as tab, column_name
                FROM information_schema.key_column_usage
                WHERE table_name not LIKE "~%%" AND table_schema in ('{schemas}') AND constraint_name="PRIMARY"
                """.format(schemas="','".join(self._conn.schemas)))
        pks = defaultdict(set)
        for key in keys:
            pks[key[0]].add(key[1])

        # add nodes to the graph
        for n, pk in pks.items():
            self.add_node(n, primary_key=pk)

        # load foreign keys
        keys = self._conn.query("""
        SELECT constraint_name,
            concat('`', table_schema, '`.`', table_name, '`') as referencing_table,
            concat('`', referenced_table_schema, '`.`',  referenced_table_name, '`') as referenced_table,
            column_name, referenced_column_name
        FROM information_schema.key_column_usage
        WHERE referenced_table_name NOT LIKE "~%%" AND (referenced_table_schema in ('{schemas}') OR
            referenced_table_schema is not NULL AND table_schema in ('{schemas}'))
        """.format(schemas="','".join(self._conn.schemas)), as_dict=True)
        fks = defaultdict(lambda: dict(attr_map=dict()))
        for key in keys:
            d = fks[(key['constraint_name'], key['referencing_table'], key['referenced_table'])]
            d['referencing_table'] = key['referencing_table']
            d['referenced_table'] = key['referenced_table']
            d['attr_map'][key['column_name']] = key['referenced_column_name']

        # add edges to the graph
        for fk in fks.values():
            props = dict(
                primary=all(attr in pks[fk['referencing_table']] for attr in fk['attr_map']),
                attr_map=fk['attr_map'],
                aliased=any(k != v for k, v in fk['attr_map'].items()),
                multi=not all(a in fk['attr_map'] for a in pks[fk['referencing_table']]))
            if not props['aliased']:
                self.add_edge(fk['referenced_table'], fk['referencing_table'], **props)
            else:
                # for aliased dependencies, add an extra node in the format '1', '2', etc
                alias_node = '%d' % next(self._node_alias_count)
                self.add_node(alias_node)
                self.add_edge(fk['referenced_table'], alias_node, **props)
                self.add_edge(alias_node, fk['referencing_table'], **props)

        if not nx.is_directed_acyclic_graph(self):  # pragma: no cover
            raise DataJointError('DataJoint can only work with acyclic dependencies')
Beispiel #22
0
 def _resolve_dependencies(self):
     self.templates_deps.clear()
     for (tpl_name, tpl_data) in self.templates.iteritems():
         self.templates_deps.add_node(tpl_name)
         for parent in tpl_data["parent"]:
             self.templates_deps.add_edge(tpl_name, parent)
     if not nx.is_directed_acyclic_graph(self.templates_deps):
         raise ParsingError(_("A cycle has been detected in templates"))
Beispiel #23
0
    def is_configuration (self, s) :
        pre = set ()
        for e in s :
            for c in e.pre | e.cont :
                if not sgl (c.pre) <= s : return False

        g = self.asym_graph (True, s, True)
        return networkx.is_directed_acyclic_graph (g)
Beispiel #24
0
 def is_dag(self):
     """ Check to see if we have a directed acyclic graph
     If we have an acyclic graph, it is possible to reorganize
     the nodes according to the downgraph on G """
     if nx.is_directed_acyclic_graph(Node.G):
         return True
     else:
         return False
Beispiel #25
0
def remove_cycles(G):
    while not nx.is_directed_acyclic_graph(G):
        subgraphs = nx.strongly_connected_component_subgraphs(G)
        for subgraph in subgraphs:
            if subgraph.number_of_nodes() > 1:
                edge_index = random.randrange(subgraph.number_of_edges())
                edge = subgraph.edges()[edge_index]
                G.remove_edge(edge[0], edge[1])
Beispiel #26
0
 def __init__(self, scaffold_graph):
     print "Entering PathFinder module:", str(datetime.now())
     self.G = scaffold_graph.copy()
     #Build strandless list of sequences
     sequences = set([n for n in self.G.nodes() if n > 0])
     #Define weakly connected components
     print "1... Defining weakly connected components"
     component_graphs = set([g for g in nx.weakly_connected_component_subgraphs(self.G)])
     single_node_graphs = set([g for g in component_graphs if len(g.nodes()) == 1])
     multi_node_graphs = set([g for g in component_graphs if len(g.nodes()) > 1])
     print "Number of single-node components:", len(single_node_graphs)
     print "Number of multi-node components:", len(multi_node_graphs)
     #Consolidate unscaffolded nodes, discard reverse strand
     print "2... Consolidating single-node components"
     unscaffolded = set([g.nodes()[0] for g in single_node_graphs])
     discard_nodes = set([n for n in unscaffolded if n < 0])
     for g in iter(single_node_graphs.copy()):
         if g.nodes()[0] in discard_nodes:
             single_node_graphs.discard(g)
     print "Number of unscaffolded sequences:", len(single_node_graphs)
     #Classify multi-node graphs
     print "3... Classifying multi-node components"
     DAG = set([])
     Euler = set([])
     for g in multi_node_graphs:
         if nx.is_directed_acyclic_graph(g):
             DAG.add(g)
         elif nx.is_eulerian(g):
             Euler.add(g)
         else:
             sys.exit("FATAL ERROR: Unknown multi-node graph type!")
     print "Number of directed acyclic graphs:",  len(DAG)
     print "Number of Eulerian graphs:", len(Euler)
     #Build scaffolds from DAGs
     print "4... Building scaffolds from directed acyclic graphs"
     self.scaffolds = set([])
     for g in DAG:
         self.build_dag_scaffold(g)
     #Consolidating complementary scaffolds, keep first found
     print "5... Consolidating complementary scaffolds"
     consolidated_scaff = set([])
     for seq in iter(self.scaffolds):
         comp = self.revc(seq)
         if comp in self.scaffolds:
             if comp not in consolidated_scaff:
                 consolidated_scaff.add(seq)
         else:
             print "WARNING: non-complemented scaffold"
     self.scaffolds = consolidated_scaff
     print "Number of scaffolds assembled:", len(self.scaffolds)
     #Build scaffolds from Eulerian graphs
     
     #Add unscaffolded seqs to scaffolds list
     print "6... Adding unscaffolded sequences to output"
     for g in single_node_graphs:
         seq = self.G.node[g.nodes()[0]]['seq']
         self.scaffolds.add(seq)
     print "Leaving PathFinder module:", str(datetime.now())
Beispiel #27
0
    def add(self, u, uw, v, vw, sequential=False, global_dag=None):
        """
        Add nodes u and/or v to the partition
        if sequential is True, break antichains to sequential chains
        """
        # if (self.partition_id == 180):
        #     logger.debug("u = ", u, ", v = ", v, ", partition = ", self.partition_id)
        unew = False if self._dag.node.has_key(u) else True
        vnew = False if self._dag.node.has_key(v) else True
        self._dag.add_node(u, weight=uw)
        self._dag.add_node(v, weight=vw)
        self._dag.add_edge(u, v)

        if (unew and vnew): # we know this is fast
            self._max_antichains = DAGUtil.get_max_antichains(self._dag)
            self._max_dop = 1
        else:
            if (sequential and (global_dag is not None)):
                # break potential antichain to sequential chain
                if (unew):
                    v_ups = nx.ancestors(self._dag, v)
                    for vup in v_ups:
                        if (u == vup):
                            continue
                        if (len(self._dag.predecessors(vup)) == 0):
                            # link u to "root" parent of v to break antichain
                            self._dag.add_edge(u, vup)
                            # change the original global graph
                            global_dag.add_edge(u, vup, weight=0)
                            if (not nx.is_directed_acyclic_graph(global_dag)):
                                global_dag.remove_edge(u, vup)
                else:
                    u_downs = nx.descendants(self._dag, u)
                    for udo in u_downs:
                        if (udo == v):
                            continue
                        if (len(self._dag.successors(udo)) == 0):
                            # link "leaf" children of u to v to break antichain
                            self._dag.add_edge(udo, v)
                            # change the original global graph
                            global_dag.add_edge(udo, v, weight=0)
                            if (not nx.is_directed_acyclic_graph(global_dag)):
                                global_dag.remove_edge(udo, v)

            self._max_dop = self.probe_max_dop(u, v, unew, vnew, update=True)
Beispiel #28
0
def reduce_paths(G):
    """
    Make graph into a directed acyclic graph (DAG).
    """
    from jcvi.algorithms.lpsolve import min_feedback_arc_set

    while not nx.is_directed_acyclic_graph(G):
        edges = []
        for a, b, w in G.edges_iter(data=True):
            w = w['weight']
            edges.append((a, b, w))
        mf, mf_score = min_feedback_arc_set(edges)
        for a, b, w in mf:
            G.remove_edge(a, b)

    assert nx.is_directed_acyclic_graph(G)
    G = transitive_reduction(G)
    return G
Beispiel #29
0
def is_tree(graph):
    flag = nx.is_directed_acyclic_graph(graph)
    if flag == False:
        return False
    l = [v for u, v in graph.edges()]
    s = set(l)
    if len(s) == len(l):
        return True
    return False
Beispiel #30
0
 def _validate(self, graph=None):
     if graph is None:
         graph = self._graph
     # Ensure that there is a valid topological ordering.
     if not nx.is_directed_acyclic_graph(graph):
         raise exc.DependencyFailure("No path through the items in the"
                                     " graph produces an ordering that"
                                     " will allow for correct dependency"
                                     " resolution")
Beispiel #31
0
    def new_space(self,
                  parent,
                  name=None,
                  bases=None,
                  formula=None,
                  refs=None,
                  source=None,
                  is_derived=False,
                  prefix="",
                  doc=None,
                  container=None):
        """Create a new child space.

        Args:
            name (str): Name of the space. If omitted, the space is
                created automatically.
            bases: If specified, the new space becomes a derived space of
                the `base` space.
            formula: Function whose parameters used to set space parameters.
            refs: a mapping of refs to be added.
            source: A source module from which cell definitions are read.
            prefix: Prefix to the autogenerated name when name is None.
        """
        if name is None:
            while True:
                name = parent.spacenamer.get_next(parent.namespace, prefix)
                if self._can_add(parent, name, UserSpaceImpl):
                    break

        elif not self._can_add(parent, name, UserSpaceImpl):
            raise ValueError("Cannot create space '%s'" % name)

        if not prefix and not is_valid_name(name):
            raise ValueError("Invalid name '%s'." % name)

        if bases is None:
            bases = []
        elif isinstance(bases, UserSpaceImpl):
            bases = [bases]

        if parent.is_model():
            node = name
            pnode = []
        else:
            node = parent.namedid + "." + name
            pnode = [parent.namedid]

        nodes = pnode + [b.namedid for b in bases]

        oldsubg_inherit = self._inheritance.subgraph_from_nodes(nodes)
        oldsubg = oldsubg_inherit.get_derived_graph()
        newsubg_inherit = oldsubg_inherit.copy_as_spacegraph(oldsubg_inherit)

        newsubg_inherit.add_node(node, mode="defined", state="defined")

        for b in bases:
            base = b.namedid
            newsubg_inherit.add_edge(base,
                                     node,
                                     mode="defined",
                                     index=newsubg_inherit.max_index(node))

        if not nx.is_directed_acyclic_graph(newsubg_inherit):
            raise ValueError("cyclic inheritance")

        if not newsubg_inherit.check_cyclic(node, node):
            raise ValueError("cyclic inheritance through composition")

        newsubg_inherit.get_mro(node)  # Check if MRO is possible

        for pnode in newsubg_inherit.get_parent_nodes(node):
            newsubg_inherit.nodes[pnode]["mode"] = "defined"

        start = [(tail, node) for tail in newsubg_inherit.ordered_preds(node)]

        newsubg = newsubg_inherit.get_derived_graph(on_edge=self._derive_hook,
                                                    start=start)

        if not nx.is_directed_acyclic_graph(newsubg):
            raise ValueError("cyclic inheritance")

        # Check if MRO is possible for each node in sub graph
        for n in nx.descendants(newsubg, node):
            newsubg.get_mro(n)

        if not parent.is_model():
            parent.set_defined()

        if container is None:
            container = parent._named_spaces

        space = UserSpaceImpl(parent,
                              name,
                              container,
                              is_derived,
                              formula=formula,
                              refs=refs,
                              source=source,
                              doc=doc)
        newsubg.nodes[node]["space"] = space
        newsubg.nodes[node]["state"] = "created"

        self._instructions.execute()
        self._update_graphs(newsubg_inherit, newsubg, oldsubg_inherit, oldsubg)

        return space
Beispiel #32
0
    def finalize(self):
        assert (not self.__finalized)
        if not nx.is_directed_acyclic_graph(self.__graph):
            raise ValueError("The generated graph is not a DAG!\n" + str(g))

        self.__finalized = True
Beispiel #33
0
 def find_and_replace_pattern(graph: nx.MultiDiGraph):
     is_acyclic = nx.is_directed_acyclic_graph(graph)
     graph.graph['is_cyclic'] = not is_acyclic
Beispiel #34
0
    def initialize(self):
        """
        Initialize Graph class instance.

        Initialization includes: create NetworkX DiGraph,
        populate it with input and step nodes, and directed edges.

        Args:
            None.

        Returns:
            On failure: Raises WorkflowDAGException.

        """
        for context in self._parsed_job_work_uri:
            # set default empty values for context options
            if context not in self._context_options:
                self._context_options[context] = {}

        # references to step classes for each context
        try:
            self._load_context_classes()
        except WorkflowDAGException as err:
            msg = 'cannot load context-specific step classes'
            Log.an().error(msg)
            raise WorkflowDAGException(str(err) + '|' + msg) from err

        # flatten parameters
        self._parameters = {
            param_name: param['value']
            for param_name, param in self._workflow['parameters'].items()
        }

        # init DAG object with structure and empty nodes
        self._graph = nx.DiGraph()

        try:
            self._init_graph_structure()
        except WorkflowDAGException as err:
            msg = 'cannot initialize graph structure'
            Log.an().error(msg)
            raise WorkflowDAGException(str(err) + '|' + msg) from err

        # validate that graph is DAG
        if not nx.is_directed_acyclic_graph(self._graph):
            msg = 'graph contains cycles, check step dependencies'
            Log.an().error(msg)
            raise WorkflowDAGException(msg)

        # topological sort of graph nodes
        self._topo_sort = list(nx.topological_sort(self._graph))

        # create URIs for each input and step for all contexts
        try:
            self._init_context_uris()
        except WorkflowDAGException as err:
            msg = 'cannot initialize context uris'
            Log.an().error(msg)
            raise WorkflowDAGException(str(err) + '|' + msg) from err

        # initalize input nodes
        try:
            self._init_inputs()
        except WorkflowDAGException as err:
            msg = 'cannot initialize workflow inputs'
            Log.an().error(msg)
            raise WorkflowDAGException(str(err) + '|' + msg) from err

        # initialize step nodes
        try:
            self._init_steps()
        except WorkflowDAGException as err:
            msg = 'cannot initialize workflow steps'
            Log.an().error(msg)
            raise WorkflowDAGException(str(err) + '|' + msg) from err
Beispiel #35
0
def is_ebunch_dag(ebunch):
	G = nx.DiGraph()
	G.add_edges_from(ebunch)
	return nx.is_directed_acyclic_graph(G)
Beispiel #36
0
 def test_is_directed_acyclic_graph(self):
     G = nx.generators.complete_graph(2)
     assert not nx.is_directed_acyclic_graph(G)
     assert not nx.is_directed_acyclic_graph(G.to_directed())
     assert not nx.is_directed_acyclic_graph(nx.Graph([(3, 4), (4, 5)]))
     assert nx.is_directed_acyclic_graph(nx.DiGraph([(3, 4), (4, 5)]))
Beispiel #37
0
    def _generate_flatgraph(self):
        """Generate a graph containing only Nodes or MapNodes
        """
        import networkx as nx

        logger.debug("expanding workflow: %s", self)
        nodes2remove = []
        if not nx.is_directed_acyclic_graph(self._graph):
            raise Exception(("Workflow: %s is not a directed acyclic graph "
                             "(DAG)") % self.name)
        nodes = list(nx.topological_sort(self._graph))
        for node in nodes:
            logger.debug("processing node: %s", node)
            if isinstance(node, Workflow):
                nodes2remove.append(node)
                # use in_edges instead of in_edges_iter to allow
                # disconnections to take place properly. otherwise, the
                # edge dict is modified.
                # dj: added list() for networkx ver.2
                for u, _, d in list(
                        self._graph.in_edges(nbunch=node, data=True)):
                    logger.debug("in: connections-> %s", str(d["connect"]))
                    for cd in deepcopy(d["connect"]):
                        logger.debug("in: %s", str(cd))
                        dstnode = node._get_parameter_node(cd[1], subtype="in")
                        srcnode = u
                        srcout = cd[0]
                        dstin = cd[1].split(".")[-1]
                        logger.debug("in edges: %s %s %s %s", srcnode, srcout,
                                     dstnode, dstin)
                        self.disconnect(u, cd[0], node, cd[1])
                        self.connect(srcnode, srcout, dstnode, dstin)
                # do not use out_edges_iter for reasons stated in in_edges
                # dj: for ver 2 use list(out_edges)
                for _, v, d in list(
                        self._graph.out_edges(nbunch=node, data=True)):
                    logger.debug("out: connections-> %s", str(d["connect"]))
                    for cd in deepcopy(d["connect"]):
                        logger.debug("out: %s", str(cd))
                        dstnode = v
                        if isinstance(cd[0], tuple):
                            parameter = cd[0][0]
                        else:
                            parameter = cd[0]
                        srcnode = node._get_parameter_node(parameter,
                                                           subtype="out")
                        if isinstance(cd[0], tuple):
                            srcout = list(cd[0])
                            srcout[0] = parameter.split(".")[-1]
                            srcout = tuple(srcout)
                        else:
                            srcout = parameter.split(".")[-1]
                        dstin = cd[1]
                        logger.debug("out edges: %s %s %s %s", srcnode, srcout,
                                     dstnode, dstin)
                        self.disconnect(node, cd[0], v, cd[1])
                        self.connect(srcnode, srcout, dstnode, dstin)
                # expand the workflow node
                # logger.debug('expanding workflow: %s', node)
                node._generate_flatgraph()
                for innernode in node._graph.nodes():
                    innernode._hierarchy = ".".join(
                        (self.name, innernode._hierarchy))
                self._graph.add_nodes_from(node._graph.nodes())
                self._graph.add_edges_from(node._graph.edges(data=True))
        if nodes2remove:
            self._graph.remove_nodes_from(nodes2remove)
        logger.debug("finished expanding workflow: %s", self)
Beispiel #38
0
 def isAcyclic(self):
     return nx.is_directed_acyclic_graph(self.G)
Beispiel #39
0
 def is_cyclic(self):
     """:return : True if the graph has cycle (reentrant arcs are not considerate as cycle).
     """
     if self.is_reentrant:
         return True
     return not nx.is_directed_acyclic_graph(self.nxg)
Beispiel #40
0
 def isTree(self):
     assert NX.is_directed_acyclic_graph(self.nxDg)
     for node in self.nxDg.nodes():
         assert len(self.nxDg.in_edges(node)) < 2
Beispiel #41
0
    def greedy(self,
               threshold=None,
               candidateSet=None,
               candidateChildFrac=2.,
               maxNumOutgroups=1):
        orderedPairs = []
        for source, sinks in self.dm.items():
            for sink, dist in sinks.items():
                if source != self.root and sink != self.root:
                    orderedPairs.append((dist, (source, sink)))
        orderedPairs.sort(key=lambda x: x[0])
        finished = set()
        self.candidateMap = dict()
        if candidateSet is not None:
            assert isinstance(candidateSet, set)
            for candidate in candidateSet:
                self.candidateMap[candidate] = True

        htable = self.heightTable()

        for candidate in orderedPairs:
            source = candidate[1][0]
            sink = candidate[1][1]
            sourceName = self.mcTree.getName(source)
            sinkName = self.mcTree.getName(sink)
            dist = candidate[0]

            # skip leaves (as sources)
            if len(self.dag.out_edges(source)) == 0:
                finished.add(source)

            # skip nodes that were already finished in a previous run
            if sourceName in self.ogMap and len(
                    self.ogMap[sourceName]) >= maxNumOutgroups:
                finished.add(source)

            # skip invalid outgroups
            if sink in self.invalidSet:
                continue

            # skip nodes that aren't in the candidate set (if specified)
            # or don't have enough candidate children
            if not self.inCandidateSet(sink, candidateChildFrac):
                continue

            # canditate pair exceeds given threshold, so we skip
            if threshold is not None and \
            htable[sink] - htable[source] + 1 > threshold:
                continue

            # Don't use any outgroups that are a child of another node
            # already in the outgroup set
            if any([
                    self.onSamePath(x, sink)
                    for x in self.dag.successors(source)
            ]):
                continue

            if source not in finished and \
            not self.onSamePath(source, sink):
                self.dag.add_edge(source, sink, weight=dist, info='outgroup')
                if NX.is_directed_acyclic_graph(self.dag):
                    htable[source] = max(htable[source], htable[sink] + 1)
                    existingOutgroups = [i[0] for i in self.ogMap[sourceName]]
                    if sinkName in existingOutgroups:
                        # This outgroup was already assigned to this source in a previous run
                        # Sanity check that the distance is equal
                        existingOutgroupDist = dict(self.ogMap[sourceName])
                        assert existingOutgroupDist[sinkName] == dist
                        continue
                    self.ogMap[sourceName].append((sinkName, dist))
                    if len(self.ogMap[sourceName]) >= maxNumOutgroups:
                        finished.add(source)
                else:
                    self.dag.remove_edge(source, sink)

        # Since we could be adding to the ogMap instead of creating
        # it, sort the outgroups by distance again. Sorting the
        # outgroups is critical for the multiple-outgroups code to
        # work well.
        for node, outgroups in self.ogMap.items():
            self.ogMap[node] = sorted(outgroups, key=lambda x: x[1])
Beispiel #42
0
 def test_is_dag_nodes_degrees(self, num_nodes, degree):
     """ Tests that generated graph is dag for different numbers of nodes and degrees
     """
     sm = generate_structure(num_nodes, degree)
     assert nx.is_directed_acyclic_graph(sm)
Beispiel #43
0
def test_random(n=50, p=0.1, runs=1000, debug=None):
    for run in range(runs) if debug is None else [debug]:
        g = fast_gnp_random_graph(n, p, seed=run + 1, directed=True)

        # add source connected to all nodes
        source = 100 * (n // 100) + 200
        for v in list(g.nodes()):
            g.add_edge(source, v, weight=0, tokens=0)

        # add random weights and tokens
        wsum = 1
        for _, _, data in g.edges_iter(data=True):
            data['weight'] = randrange(1, 10)
            data['tokens'] = randrange(-1, 8)
            wsum += data['weight']

        # create shortest path formulation for initial tree
        for _, _, data in g.edges_iter(data=True):
            data['sp'] = data['tokens'] * wsum - data['weight']

        # ensure that the graph admits a feasible solution
        its = 0
        while True:
            try:
                its += 1
                tree, distances = bfct.find_shortest_paths(g, source, arg='sp')
                break
            except NegativeCycleException as ex:
                toks = sum(
                    map(lambda vw: g.get_edge_data(*vw).get('tokens'),
                        ex.cycle))
                edge_data = g.get_edge_data(*choice(ex.cycle))
                edge_data['tokens'] += (1 - toks)
                edge_data[
                    'sp'] = edge_data['tokens'] * wsum - edge_data['weight']

        if debug is not None:
            import pdb
            pdb.set_trace()

        negative_toks = False
        for _, _, data in g.edges_iter(data=True):
            if data['tokens'] < 0:
                negative_toks = True
                break

        print("Run {}: negative tokens: {}, iterations: {}".format(
            run, negative_toks, its))

        ratio, cycle = compute_mcr(g, source)
        assert ratio is not None, "Deadlocked cycle found"
        if not cycle:
            # verify that the graph is acyclic
            assert is_directed_acyclic_graph(
                g), "[run = {}] Graph is not acyclic".format(run)
        else:
            wsum, tsum = 0, 0
            for v, w in cycle:
                data = g.get_edge_data(v, w)
                wsum += data['weight']
                tsum += data['tokens']

            assert Fraction(
                wsum, tsum
            ) == ratio, "[run = {}] computed MCR {} does not match ratio of critical cycle {}".format(
                run, ratio, Fraction(wsum, tsum))

            for v, w, data in g.edges_iter(data=True):
                data['weight'] = data['tokens'] * ratio - data['weight']

            try:
                bellman_ford(g, source)
            except Exception:
                print("Exception during run {}".format(run))
Beispiel #44
0
 def test_random_dag_create_one() -> None:
     dag = random_dag(number_of_nodes=5, edge_density=0.4, max_in_degree=4)
     assert nx.is_directed_acyclic_graph(dag)
Beispiel #45
0
    to_one_paper = [vrt_name_one_paper] * num_child  # the newer paper

    # concatenate the lists
    from_all_papers = from_all_papers + from_one_paper
    to_all_papers = to_all_papers + to_one_paper

##### Section: Draw Graph #####

# Build a dataframe with 4 connections
df = pd.DataFrame({'from': from_all_papers, 'to': to_all_papers})

# Build your graph
G = nx.from_pandas_edgelist(df, 'from', 'to', create_using=nx.DiGraph())

# determine vertices' coordinate
if not nx.is_directed_acyclic_graph(G):
    raise TypeError('Cannot to a graph that is not a DAG')

vertices_sorted = list(nx.topological_sort(G))

num_vertices = len(vertices_sorted)

posi = {}
for i in range(num_vertices):
    vrt_name = vertices_sorted[i]
    posi_vert = -i / num_vertices
    posi_hori = random.random()
    posi[vrt_name] = np.array([posi_hori, posi_vert])

# make the vertices less dense
posi_new = vertices_less_dense(posi)
Beispiel #46
0
    def RP_RL(self, model, model_id, parameters_file):
        start_RL = time.perf_counter()
        os.chdir(rpconfig.path)

        # 14k m10n10
        filenames_file = open(rpconfig.filename_profiles, 'r')
        filenames = [i.strip('\n') for i in filenames_file]
        train_filenames = filenames[:10000] + filenames[12000:]
        test_filenames = filenames[
            10000:11000]  # the same 1000 profiles we used in the paper
        validation_filenames = filenames[11000:11500]

        # m10n10
        # filenames = sorted(glob.glob('M10N10-*.csv'))
        # train_filenames = filenames[0:80000]
        # test_filenames = filenames[80000:100000]

        # m20n20
        # filenames_file = open(rpconfig.filename_profiles, 'r')
        # filenames = [i.strip('\n') for i in filenames_file]
        # train_filenames = filenames[:1] #+ filenames[12000:]
        # test_filenames = filenames[:1]
        # validation_filenames = filenames # all available m20n20

        # debugging
        # train_filenames = ['meh']
        # test_filenames = ['4circle.soc']

        # m50n50
        # filenames = sorted(glob.glob('M50N50-*.csv'))
        # train_filenames = filenames[0:1]
        # test_filenames = filenames[0:1000]

        # m40n40
        # filenames = sorted(glob.glob('M40N40-*.csv'))
        # train_filenames = filenames[0:1]
        # test_filenames = filenames[0:1000]

        # m50n50
        # filenames_file = open(rpconfig.filename_profiles, 'r')
        # filenames = [i.strip('\n') for i in filenames_file]
        # train_filenames = filenames[0:1]
        # test_filenames = filenames[0:1]
        # validation_filenames = filenames

        # Read true winners
        os.chdir(rpconfig.winners_path)
        true_winners = []
        winners_file = open("./winners_14k.txt", 'r')
        # winners_file = open("./winners_m20n20.txt", 'r')
        for line in winners_file:
            winners = []
            line = line.replace('[', '')
            line = line.replace(']', '')
            line = line.replace(' ', '')
            line = line.replace('\n', '')
            line = line.split(',')
            for c in line:
                winners.append(int(c))
            true_winners.append(winners)
        os.chdir(rpconfig.path)

        # Split true_winners into train and test
        true_winners_train = true_winners[:10000] + true_winners[12000:]
        true_winners_test = true_winners[10000:11000]
        true_winners_val = true_winners[11000:11100]

        # m20n20
        # true_winners_train = true_winners[:1]
        # true_winners_test = true_winners[:1]
        # true_winners_val = true_winners

        # Open files for output
        output_filename = str(model_id) + "_RL_training_results.txt"
        loss_filename = str(model_id) + "_RL_loss.txt"
        test_output_filename = str(model_id) + "_RL_test_results.txt"
        test_output_summary_filename = str(
            model_id) + "_RL_test_summary_results.txt"
        validation_output_filename = str(model_id) + "_RL_val_results.txt"
        validation_output_summary_filename = str(
            model_id) + "_RL_val_summary_results.txt"
        output_file = open(rpconfig.results_path + output_filename, "w+")
        test_output_file = open(rpconfig.results_path + test_output_filename,
                                "w+")
        test_output_summary_file = open(
            rpconfig.results_path + test_output_summary_filename, "w+")
        val_output_file = open(
            rpconfig.results_path + validation_output_filename, "w+")
        val_output_summary_file = open(
            rpconfig.results_path + validation_output_summary_filename, "w+")
        loss_file = open(rpconfig.results_path + loss_filename, "w+")

        # Create RL base
        if params.f_use_v2:
            if params.f_experience_replay:
                print("Experience replay not implemented for v2")
                sys.exit(0)
            else:
                base = RL_base_v2(len(train_filenames))
        else:
            if params.f_use_PUT_agent and params.f_experience_replay:
                base = RL_base_PUT_agent_experience_replay(
                    len(train_filenames))
            elif params.f_experience_replay:
                base = RL_base_experience_replay(len(train_filenames))
            else:
                base = RL_base(len(train_filenames))

        # Create agent
        if params.f_use_v2:
            agent = RP_RL_agent_v2(model, base.learning_rate, loss_file)
        else:
            if params.f_use_PUT_agent and params.f_experience_replay:
                agent = RP_RL_agent_PUT_experience(model, base.learning_rate,
                                                   loss_file)
            elif params.f_use_PUT_agent:
                agent = RP_RL_agent_PUT(model, base.learning_rate, loss_file)
            else:
                agent = RP_RL_agent(model, base.learning_rate, loss_file)

        total_time = 0
        num_times_tested = 0

        print("***********************************************")
        print("Starting Reinforcement Learning", model_id)

        # Print header
        header = "Inputfile\tPUT-winners\tExploration Rate\tLearning Rate\tTau\tStop Conditions\tNum Nodes\tNum Winners Found\tLoss\tAvg Loss\tIs Acyclic\tIter To Find Winner\tIters To Find All Winners\tRunning Nodes\tWinners Dist\tnum_iters_reset_skipped\tNum hashed\tRuntime"
        print(header)
        output_file.write(header + '\n')
        output_file.flush()

        loss_file.write('Num Nodes' + '\t' + 'Loss Per Node' + '\n')
        loss_file.flush()

        # Open winner distribution file
        if params.f_use_winners_distribution:
            winners_distribution_file = open(
                rpconfig.winners_distribution_filename, 'r')
            winners_distribution = {}
            for line in winners_distribution_file:
                line = line.strip('\n')
                line = line.split('\t')
                if len(line) == 1:
                    current_file = line[0]
                    continue
                if current_file not in winners_distribution:
                    winners_distribution[current_file] = {}
                winners_distribution[current_file][int(line[0])] = int(line[1])
            winners_distribution_file.close()

        # Shuffle training data
        if params.shuffle_training_data:
            combined = list(zip(train_filenames, true_winners_train))
            random.shuffle(combined)
            train_filenames, true_winners_train = zip(*combined)

        # Print test output file heading
        if params.f_test_using_PUT_RP:
            test_header = "inputfile\tPUT-winners\tnum nodes\tdiscovery states\tmax discovery state\tdiscovery times\tmax discovery times\tstop condition hits\tsum stop cond hits\tnum hashes\tnum initial bridges\tnum redundant edges\ttime for cycles\truntime"
        elif params.f_use_PUT_agent:
            test_header = 'Profile\tPUT-Winners\tNum Winners\tMissed Winners\tNum Missed Winners\tNum Nodes\tNode Discovered\t100% Nodes\tRuntime Discovered\t100% Runtime\tRuntime'
            test_summary_header = "Test\tNum PUT-Winners Found\tTotal Num Nodes\tAvg Nodes Per Profile\tAvg 100% Nodes\tTotal Time\tAvg Time Per Profile\tAvg 100% Time\n"
            test_output_summary_file.write(test_summary_header)
            val_output_summary_file.write(test_summary_header)
            test_output_summary_file.flush()
            val_output_summary_file.flush()
        else:
            test_header = 'Profile\tPUT-Winners\tNum Winners\tMissed Winners\tNum Missed Winners\tNum Iters\tIter Discoverd\tMax Iter Discovery\tTime Discovered\tMax Time Discovery\tRuntime'
            test_summary_header = "Test\tNum PUT-Winners Found\tTotal Num Iterations\tAvg Iterations Per Profile\tAvg 100% Iters\tTotal Time\tAvg Time Per Profile\tAvg 100% Time\n"
            test_output_summary_file.write(test_summary_header)
            val_output_summary_file.write(test_summary_header)
            test_output_summary_file.flush()
            val_output_summary_file.flush()
        test_output_file.write(test_header + '\n')
        val_output_file.write(test_header + '\n')
        test_output_file.flush()
        val_output_file.flush()

        # Print additional parameters
        parameters_file.write("RL Data Path\t" + rpconfig.path + '\n')
        parameters_file.write("RL Num Training Data\t" +
                              str(len(train_filenames)) + '\n')
        parameters_file.write("RL Num Testing Data\t" +
                              str(len(test_filenames)) + '\n')
        parameters_file.write("RL Train From...To\t" + train_filenames[0] +
                              "\t" + train_filenames[-1] + '\n')
        parameters_file.write("RL Test From...To\t" + test_filenames[0] +
                              "\t" + test_filenames[-1] + '\n')
        parameters_file.write("RL Loss Function\t" + str(agent.loss_fn) + '\n')
        parameters_file.flush()

        val_results = []

        if params.test_10x:
            print("********** testing 10x *******************")
            assert params.f_start_from_default or params.test_with_LP
            for t in range(10):
                test_model(test_output_file, test_output_summary_file, agent,
                           test_filenames, true_winners_test, model_id,
                           "final_" + str(t), False)

        # assert not params.f_use_testing_v2
        # assert params.f_start_from_default
        # num_samples_range = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
        # runtimes
        # for num_samples in num_samples_range:
        #     params.num_test_iterations = num_samples
        #     print(params.num_test_iterations)
        #     start = time.perf_counter()
        #     test_model(val_output_file, val_output_summary_file, agent, validation_filenames, true_winners_val, model_id, num_times_tested, True)
        #     num_times_tested += 1
        # return

        for epoch in range(params.num_epochs):
            i = 0
            print('---------------Epoch ' + str(epoch) +
                  '------------------------')

            # Shuffle training data
            if params.shuffle_training_data:
                combined = list(zip(train_filenames, true_winners_train))
                random.shuffle(combined)
                train_filenames, true_winners_train = zip(*combined)

            for inputfile in train_filenames:
                # Test model on validation data
                # Not necessary since epochs added
                # if i % params.test_every == 0 and (params.test_at_start or i != 0):
                #     if params.f_test_using_PUT_RP:
                #         test_model_using_PUT_RP(test_output_file, agent, test_filenames, model_id, num_times_tested)
                #     else:
                #         num_iters = test_model(val_output_file, val_output_summary_file, agent, validation_filenames, true_winners_val, model_id, num_times_tested, True)
                #         val_results.append(num_iters)
                #
                #     num_times_tested += 1

                if i % 10 == 0:
                    RP_utils.save_model(model, "RL_" + str(i), model_id)

                profile = read_profile(inputfile)

                # Run the profile
                print(inputfile)
                start = time.perf_counter()
                if params.f_use_winners_distribution:
                    rp_results, iter_to_find_winner, iter_to_find_all_winners = base.reinforcement_loop(
                        agent,
                        profile,
                        winners_distribution=winners_distribution[inputfile])
                elif params.f_train_till_find_all_winners:
                    rp_results, iter_to_find_winner, iter_to_find_all_winners = base.reinforcement_loop(
                        agent,
                        profile,
                        true_winners=set(true_winners_train[i]))
                else:
                    rp_results, iter_to_find_winner, iter_to_find_all_winners = base.reinforcement_loop(
                        agent,
                        profile,
                        true_winners=set(true_winners_train[i]),
                        filename=inputfile)
                end = time.perf_counter()

                # Evaluate and output results
                PUT_winners = sorted(rp_results.known_winners)
                stats = agent.stats

                total_time += (end - start)

                if stats.num_nodes == 0:
                    avg_loss_per_node = 0
                else:
                    avg_loss_per_node = stats.running_loss / stats.num_nodes

                is_acyclic = str(nx.is_directed_acyclic_graph(agent.E_0))

                if params.f_use_winners_distribution:
                    output_winners_distribution = winners_distribution[
                        inputfile]
                else:
                    output_winners_distribution = {}

                result_text = "%s\t%r\t%f\t%f\t%f\t%r\t%d\t%d\t%f\t%f\t%s\t%r\t%d\t%d\t%r\t%d\t%d\t%f" % \
                              (inputfile, PUT_winners, base.exploration_rate, base.learning_rate, base.tau,
                               stats.stop_condition_hits, stats.num_nodes, len(PUT_winners), stats.running_loss,
                               avg_loss_per_node, is_acyclic, iter_to_find_winner, iter_to_find_all_winners, agent.running_nodes, output_winners_distribution, stats.num_iters_reset_skipped, agent.stats.num_hashed, end - start)
                print(i, result_text)
                output_file.write(result_text + '\n')
                output_file.flush()

                i += 1

            # Test on validation data after each epoch
            if params.f_test_using_PUT_RP:
                test_model_using_PUT_RP(test_output_file, agent,
                                        test_filenames, model_id,
                                        num_times_tested)
            else:
                num_iters = test_model(val_output_file,
                                       val_output_summary_file, agent,
                                       validation_filenames, true_winners_val,
                                       model_id, num_times_tested, True)
                val_results.append(num_iters)
            num_times_tested += 1

        print(
            '----------------------Training Done------------------------------'
        )
        print("Validation results:", val_results)
        best_model = np.argmin(val_results)
        print("Best model:", best_model)

        # Use best model from validation testing to test 10x on test set
        RP_utils.load_model(
            model, rpconfig.results_path + str(model_id) + "_RL_val_" +
            str(best_model) + "_model.pth.tar")
        # Create agent
        if params.f_use_v2:
            agent_testing = RP_RL_agent_v2(model, base.learning_rate)
        else:
            agent_testing = RP_RL_agent(model, base.learning_rate)

        for t in range(10):
            test_model(test_output_file, test_output_summary_file,
                       agent_testing, test_filenames, true_winners_test,
                       model_id, "final_" + str(t), False)

        print("Total Time to Train: %f" % total_time)
        print("Average Time Per Profile: %f" %
              (total_time / len(train_filenames)))

        print("Total RL Runtime: %f" % (time.perf_counter() - start_RL))

        # Close files
        output_file.close()
        test_output_file.close()
        test_output_summary_file.close()
        val_output_file.close()
        val_output_summary_file.close()
        loss_file.close()
Beispiel #47
0
 def is_directed_acyclic(self):
     """Returns if this graph is a DAG or not."""
     return nx.is_directed_acyclic_graph(self)
Beispiel #48
0
def parse_obo_file_and_build_dags(obo_file, forced=False):
    """
    Parse the GO OBO into a networkx MultiDiGraph using obonet.
    Then construct a DAG for each category using the 'is_a' relationships 
    *forced*: this function will store the dags as an edgelist for faster parsing
        If forced is true, it will overwrite those
    
    *returns*: a dictionary containing a DAG for each of the 3 GO categories 'C', 'F', and 'P'
    """
    global id_to_name, name_to_id, goid_to_category

    dag_edgelist_file = obo_file.replace(".obo", "-isa-edgelist.txt")
    goid_names_file = obo_file.replace(".obo", "-names.txt")
    if not forced and os.path.isfile(dag_edgelist_file) and os.path.isfile(
            goid_names_file):
        print("Reading GO dags from %s" % (dag_edgelist_file))
        go_dags = {}
        for c in ['C', 'F', 'P']:
            go_dags[c] = nx.DiGraph()
        with open(dag_edgelist_file, 'r') as f:
            for line in f:
                if line[0] == '#':
                    continue
                g1, g2, c = line.rstrip().split('\t')[:3]
                go_dags[c].add_edge(g1, g2)

        for c, dag in go_dags.items():
            print("\tDAG for %s has %d nodes, %d edges" %
                  (c, dag.number_of_nodes(), dag.number_of_edges()))
            # also set the category for each GO term
            for n in dag.nodes():
                goid_to_category[n] = c

        with open(goid_names_file, 'r') as f:
            for line in f:
                if line[0] == '#':
                    continue
                goid, name, c = line.rstrip().split('\t')[:3]
                name_to_id[name] = goid
                id_to_name[goid] = name
    else:
        print("Reading GO OBO file from %s" % (obo_file))
        # obonet returns a networkx MultiDiGraph object containing all of the relationships in the ontology
        graph = obonet.read_obo(obo_file)
        # build a mapping from the GO term IDs to the name of the GO term
        id_to_name = {
            id_: data['name']
            for id_, data in graph.nodes(data=True)
        }
        name_to_id = {
            data['name']: id_
            for id_, data in graph.nodes(data=True)
        }
        print("\t%d nodes, %d edges" %
              (graph.number_of_nodes(), graph.number_of_edges()))

        # make sure this really is a DAG
        if nx.is_directed_acyclic_graph(graph) is False:
            print("\tWarning: graph is not a dag")

        # copied this section from cell 19 of https://github.com/IGACAT/DataPreprocessing/blob/master/scripts/populate_go_terms.ipynb
        # Extract all edges with "is_a" relationship.
        # I did not include "part_of" relationships because the molecular_function and biological_process DAGs are not separate from each other if I do
        is_a_edge_list = []
        for child, parent, key in graph.out_edges(keys=True):
            if key == 'is_a':
                is_a_edge_list.append((child, parent))

        # get a is_a-type edge-induced subgraph
        is_a_subG = nx.MultiDiGraph(is_a_edge_list)
        full_to_category = {
            'cellular_component': 'C',
            'biological_process': 'P',
            'molecular_function': 'F'
        }
        go_dags = {}
        # there are 3 weakly_connected_components. One for each category
        for wcc in nx.weakly_connected_components(is_a_subG):
            G = is_a_subG.subgraph(wcc)

            # store this DAG in the dictionary of GO DAGs
            # find the root node
            root_node = None  # find root_node  (no out_edge)
            for node in G.nodes():
                if G.out_degree(node) == 0:
                    root_node = node
                    #print(root_node, id_to_name[node])
                    break
            c = full_to_category[id_to_name[root_node]]
            print("\tDAG for %s has %d nodes" %
                  (id_to_name[root_node], len(wcc)))
            go_dags[c] = G

            # also set the category for each GO term
            for n in G.nodes():
                goid_to_category[n] = c
        print("\twriting dags to %s" % (dag_edgelist_file))
        with open(dag_edgelist_file, 'w') as out:
            out.write("#child\tparent\thierarchy\n")
            for c, dag in go_dags.items():
                out.write(''.join("%s\t%s\t%s\n" % (g1, g2, c)
                                  for g1, g2 in dag.edges()))

        # also write the names to a file
        print("\twriting goid names to %s" % (goid_names_file))
        with open(goid_names_file, 'w') as out:
            for goid in id_to_name:
                out.write("%s\t%s\t%s\n" %
                          (goid, id_to_name[goid], goid_to_category[goid]))
    return go_dags
def get_graph_properties(edges):
    # Set up graph
    connections = np.array([int(x) for x in edges.split(';')])

    nodes = sorted(list(set(connections)))
    # Calculate Properties
    properties = []
    timings = {}

    if connections[0] > 0:
        edges = connections.reshape(int(connections.size / 2), 2)
        timeS = time.time()

        # directed graph
        G = nx.DiGraph()
        G.add_edges_from(edges)

        # undirected graph
        U = nx.Graph()
        U.add_edges_from(edges)
        # graph generated

        # property 1: number of components
        num_comp = nx.number_connected_components(U)
        properties.append(num_comp)

        # property 2: number of strongly connected components
        num_strong_comp = nx.number_strongly_connected_components(G)
        properties.append(num_strong_comp)

        # property 3: average in/out degree
        indeg = []
        outdeg = []
        indeg_ls = list(G.in_degree())
        outdeg_ls = list(G.out_degree())

        for x in np.arange(len(nodes)):
            indeg.append(indeg_ls[x][1])
            outdeg.append(outdeg_ls[x][1])
        av_deg = np.mean(indeg)
        properties.append(av_deg)

        # property 4: link density
        linkden = connections.size / (len(nodes) * len(nodes))
        properties.append(linkden)

        # property 5: number of self loops
        numloop = list(G.selfloop_edges())
        numloop = len(numloop)
        properties.append(numloop)
        #       # property 6: number of simple cycles (excluding self loops)
        #       numcyc = list(nx.simple_cycles(G))
        #       numcyc = len(numcyc) - numloop
        #       properties.append(numcyc)

        #       timings.update({'p6':time.time()-timeS})
        #       print('p6')
        #       print(timings['p6'])
        #       timeS = time.time()

        # find all components
        components = list(nx.connected_components(U))

        ischain = [None] * len(components)
        istree = [None] * len(components)
        isdag = [None] * len(components)
        unicel = [None] * len(components)
        isscc = [None] * len(components)
        iscyc = [None] * len(components)
        iseul = [None] * len(components)
        indeg_by_comp = []
        outdeg_by_comp = []
        node_conn = [0] * len(components)
        av_clust = [0.] * len(components)
        assort = [0.] * len(components)
        indeg_cen_av = [0.] * len(components)
        indeg_cen_max = [0.] * len(components)
        indeg_cen_min = [0.] * len(components)
        outdeg_cen_av = [0.] * len(components)
        outdeg_cen_max = [0.] * len(components)
        outdeg_cen_min = [0.] * len(components)
        bet_cen_av = [0.] * len(components)
        bet_cen_max = [0.] * len(components)
        bet_cen_min = [0.] * len(components)
        eig_cen_av = [0.] * len(components)
        eig_cen_max = [0.] * len(components)
        eig_cen_min = [0.] * len(components)
        triangles_av = [0.] * len(components)
        triangles_max = [0.] * len(components)
        triangles_min = [0.] * len(components)
        squares_av = [0.] * len(components)
        squares_max = [0.] * len(components)
        squares_min = [0.] * len(components)
        transitivity = [0.] * len(components)
        rc = [0.] * len(components)
        loopnumber = [0] * len(components)

        for compnum in np.arange(len(components)):
            # property 6: ischain?(remove self-loops and then test this property)
            # want: how many chains does the graph contain.. look at each component, not the whole graph in one go.
            # most graphs are single components.
            G1 = G.subgraph(list(components[compnum]))
            Gnoself = G1.copy()
            Gnoself.remove_edges_from(Gnoself.selfloop_edges())
            Unoself = nx.Graph()
            Unoself.add_edges_from(Gnoself.edges)

            # if all in and out degrees are 1, graph is a chain..do not include in trees
            indeg2 = []
            outdeg2 = []
            indeg_ls2 = list(Gnoself.in_degree())
            outdeg_ls2 = list(Gnoself.out_degree())
            # nx gives indeg and outdeg as tuples (nodename, in/out deg). which is why i need the for loop below
            for x in np.arange(len(G1.nodes())):
                indeg2.append(indeg_ls2[x][1])
                outdeg2.append(outdeg_ls2[x][1])
            indeg_by_comp.append(int_arr_to_str(indeg2, delim=';'))
            outdeg_by_comp.append(int_arr_to_str(outdeg2, delim=';'))

            indeg2 = np.array(indeg2)
            outdeg2 = np.array(outdeg2)
            in_min_out = indeg2 - outdeg2
            ischain[compnum] = int((np.sum(in_min_out) == 0)
                                   & (np.sum(np.abs(in_min_out)) == 2)
                                   & (np.all(indeg2 <= 1))
                                   & (np.all(outdeg2 <= 1)))
            # property 7: istree(remove chains first)
            istree[compnum] = int((nx.is_tree(Gnoself) - ischain[compnum]) > 0)
            # property 8: isdag(only looking at DAGs other than trees and chains)
            isdag[compnum] = int((int(nx.is_directed_acyclic_graph(Gnoself)) -
                                  istree[compnum] - ischain[compnum]) > 0)
            if isdag[compnum] > 0:
                loopnumber[compnum] = len(list(
                    Gnoself.edges)) - (len(list(Gnoself.nodes)) - 1)
            # property 9: single celled
            unicel[compnum] = int(len(Gnoself.nodes) == 1)
            istree[compnum] = int(istree[compnum]) - int(
                unicel[compnum]
            )  # nx counts single node with no self-edge as a tree
            # property 10: isscc (excluding unicellular)
            num_strong_comp2 = nx.number_strongly_connected_components(Gnoself)
            isscc[compnum] = int(num_strong_comp2 == 1)
            isscc[compnum] = int((isscc[compnum] - unicel[compnum]) > 0)
            # property 11: iscyc(cyclic graphs other than those with a single scc and single celled graphs)
            iscyc[compnum] = int((isdag[compnum] + istree[compnum] +
                                  ischain[compnum] + isscc[compnum] +
                                  unicel[compnum]) == 0)
            # property 12: is eulerian
            iseul[compnum] = int(nx.is_eulerian(Gnoself))
            # property 13: node connectivity
            node_conn[compnum] = approx.node_connectivity(Gnoself)
            # property 14: clustering coefficient
            av_clust[compnum] = nx.average_clustering(Gnoself)
            # property 15: assortativity(pearson's coefficient)
            try:
                assort[compnum] = nx.degree_pearson_correlation_coefficient(
                    Gnoself)  #####################check
            except:
                assort[compnum] = 0.0
            # property 16,17,18: in degree centrality (average, maximum and minimum)
            indeg_cen = []
            dict1 = nx.in_degree_centrality(Gnoself)
            for a1 in dict1:
                indeg_cen.append(dict1[a1])
            indeg_cen_av[compnum] = np.average(indeg_cen)
            indeg_cen_max[compnum] = max(indeg_cen)
            indeg_cen_min[compnum] = min(indeg_cen)
            # property 19,20,21: out degree centrality (average, maximum, minimum)
            outdeg_cen = []
            dict1 = nx.out_degree_centrality(Gnoself)
            for a1 in dict1:
                outdeg_cen.append(dict1[a1])
            outdeg_cen_av[compnum] = np.average(outdeg_cen)
            outdeg_cen_max[compnum] = max(outdeg_cen)
            outdeg_cen_min[compnum] = min(outdeg_cen)
            # property 22,23,24: betweenness centrality (average,maximum, minimum)
            bet_cen = []
            dict1 = nx.betweenness_centrality(Gnoself)
            for a1 in dict1:
                bet_cen.append(dict1[a1])
            bet_cen_av[compnum] = np.average(bet_cen)
            bet_cen_max[compnum] = max(bet_cen)
            bet_cen_min[compnum] = min(bet_cen)
            # property 25,26,27: eigen vector centrality (average,maximum, minimum)
            eig_cen = []
            try:
                dict1 = nx.eigenvector_centrality(Gnoself)
                for a1 in dict1:
                    eig_cen.append(dict1[a1])
                eig_cen_av[compnum] = np.average(eig_cen)
                eig_cen_max[compnum] = max(eig_cen)
                eig_cen_min[compnum] = min(eig_cen)
            except nx.PowerIterationFailedConvergence:
                pass
            # property 28,29,30: number of triangles for each node (average,maximum, minimum)
            triangles = []
            dict1 = nx.triangles(Unoself)
            for a1 in dict1:
                triangles.append(dict1[a1])
            if len(triangles):
                triangles_av[compnum] = np.average(triangles)
                triangles_max[compnum] = max(triangles)
                triangles_min[compnum] = min(triangles)
            # property 31: transitivity (fraction of all possible triangles present in the graph)
            transitivity[compnum] = nx.transitivity(Gnoself)
            # property 32,33,34: square clustering for each node(fraction of all possible squares present at a node)
            squares = []
            dict1 = nx.square_clustering(Gnoself)
            for a1 in dict1:
                squares.append(dict1[a1])
            if len(squares):
                squares_av[compnum] = np.average(squares)
                squares_max[compnum] = max(squares)
                squares_min[compnum] = min(squares)
            # propery 35: rich club coefficient
            if len(list(Unoself.nodes())) > 3:
                rc[compnum] = 0.0


#               rc[compnum] = nx.rich_club_coefficient(Unoself).values()# only works if graph has 4 or more edges
# property 36 and 37: number of source and target nodes

        iseul = sum(iseul)
        iscyc = sum(iscyc)
        isscc = sum(isscc)
        unicel = sum(unicel)
        isdag = sum(isdag)
        istree = sum(istree)
        ischain = sum(ischain)
        indeg_by_comp = ';'.join([str(x) for x in indeg_by_comp])
        outdeg_by_comp = ';'.join([str(x) for x in outdeg_by_comp])
        node_conn = ';'.join([str(x) for x in node_conn
                              ])  # node connectivity for each component
        avav_clust = np.average(
            av_clust)  # average clustering coefficient over all components
        av_clust = ';'.join([
            str(round(x, 2)) for x in av_clust
        ])  # average clustering coefficients for each component
        av_assort = np.average(
            assort)  # average assortativity over all components
        assort = ';'.join([str(round(x, 2)) for x in assort
                           ])  # assortativity for each component
        indeg_cen_avav = np.average(
            indeg_cen_av)  # average indeg centrality over all components
        indeg_cen_av = ';'.join([
            str(round(x, 2)) for x in indeg_cen_av
        ])  # average indeg centrality for each component
        indeg_cen_maxmax = max(
            indeg_cen_max)  # maximum indeg centrality across all components
        indeg_cen_max = ';'.join([
            str(round(x, 2)) for x in indeg_cen_max
        ])  # maximum indeg centrality for each component
        indeg_cen_minmin = min(
            indeg_cen_min)  # minimum indeg centrality across all components
        indeg_cen_min = ';'.join([
            str(round(x, 2)) for x in indeg_cen_min
        ])  # minimum indeg centrality for each component

        outdeg_cen_avav = np.average(outdeg_cen_av)
        outdeg_cen_av = ';'.join([str(round(x, 2)) for x in outdeg_cen_av])
        outdeg_cen_maxmax = max(outdeg_cen_max)
        outdeg_cen_max = ';'.join([str(round(x, 2)) for x in outdeg_cen_max])
        outdeg_cen_minmin = min(outdeg_cen_min)
        outdeg_cen_min = ';'.join([str(round(x, 2)) for x in outdeg_cen_min])
        bet_cen_avav = np.average(bet_cen_av)
        bet_cen_av = ';'.join([str(round(x, 2)) for x in bet_cen_av])
        bet_cen_maxmax = max(bet_cen_max)
        bet_cen_max = ';'.join([str(round(x, 2)) for x in bet_cen_max])
        bet_cen_minmin = min(bet_cen_min)
        bet_cen_min = ';'.join([str(round(x, 2)) for x in bet_cen_min])
        eig_cen_avav = np.average(eig_cen_av)
        eig_cen_av = ';'.join([str(round(x, 2)) for x in eig_cen_av])
        eig_cen_maxmax = max(eig_cen_max)
        eig_cen_max = ';'.join([str(round(x, 2)) for x in eig_cen_max])
        eig_cen_minmin = min(eig_cen_min)
        eig_cen_min = ';'.join([str(round(x, 2)) for x in eig_cen_min])
        triangles_avav = np.average(triangles_av)
        triangles_av = ';'.join([str(x) for x in triangles_av])
        triangles_maxmax = max(triangles_max)
        triangles_max = ';'.join([str(x) for x in triangles_max])
        triangles_minmin = min(triangles_min)
        triangles_min = ';'.join([str(x) for x in triangles_min])
        transitivity_av = np.average(transitivity)
        transitivity_max = max(transitivity)
        transitivity_min = min(transitivity)
        transitivity = ';'.join([str(x) for x in transitivity])
        squares_avav = np.average(squares_av)
        squares_maxmax = max(squares_max)
        squares_minmin = min(squares_min)
        squares_av = ';'.join([str(x) for x in squares_av])
        squares_max = ';'.join([str(x) for x in squares_max])
        squares_min = ';'.join([str(x) for x in squares_min])
        rc_av = np.average(rc)
        rc_max = max(rc)
        rc_min = min(rc)
        rc = ';'.join([str(x) for x in rc])
        ln = [loopnumber[x] for x in np.nonzero(loopnumber)[0]]
        if any(ln):
            loopnumber_av = np.average(ln)
        else:
            loopnumber_av = 0.0
        loopnumber = ';'.join([str(x) for x in loopnumber])

        # check.. sum of iscyc, isscc, unicel, dag,tree, chain should be the total number of components
        if num_comp != (iscyc + isscc + unicel + isdag + istree + ischain):
            print('Number of components is wrong!!!!!!')
            print(num_comp)
            print([iscyc, isscc, unicel, isdag, istree, ischain])
            sys.exit()

        properties.append(indeg_by_comp)  # string
        properties.append(outdeg_by_comp)  #string
        properties.append(ischain)  #int
        properties.append(istree)  #int
        properties.append(isdag)  #int
        properties.append(unicel)  #int
        properties.append(isscc)  #int
        properties.append(iscyc)  #int
        properties.append(iseul)  #int
        properties.append(loopnumber_av)  #float
        properties.append(loopnumber)  #string
        properties.append(node_conn)  #string
        properties.append(avav_clust)  #float
        properties.append(av_clust)  #string
        properties.append(av_assort)  #float
        properties.append(assort)  #string
        properties.append(indeg_cen_avav)  #float
        properties.append(indeg_cen_av)  #string
        properties.append(indeg_cen_maxmax)  #float
        properties.append(indeg_cen_max)  #string
        properties.append(indeg_cen_minmin)  #float
        properties.append(indeg_cen_min)  #string
        properties.append(outdeg_cen_avav)  #float
        properties.append(outdeg_cen_av)  #string
        properties.append(outdeg_cen_maxmax)  #float
        properties.append(outdeg_cen_max)  #string
        properties.append(outdeg_cen_minmin)  #float
        properties.append(outdeg_cen_min)  #string
        properties.append(bet_cen_avav)  #float
        properties.append(bet_cen_av)  #string
        properties.append(bet_cen_maxmax)  #float
        properties.append(bet_cen_max)  #string
        properties.append(bet_cen_minmin)  #float
        properties.append(bet_cen_min)  #string
        properties.append(eig_cen_avav)  #float
        properties.append(eig_cen_av)  #string
        properties.append(eig_cen_maxmax)  #float
        properties.append(eig_cen_max)  #string
        properties.append(eig_cen_minmin)  #float
        properties.append(eig_cen_min)  #string
        properties.append(triangles_avav)  #float
        properties.append(triangles_av)  #string
        properties.append(triangles_maxmax)  #float
        properties.append(triangles_max)  #string
        properties.append(triangles_minmin)  #float
        properties.append(triangles_min)  #string
        properties.append(transitivity_av)  # float
        properties.append(transitivity_max)  #float
        properties.append(transitivity_min)  #float
        properties.append(transitivity)  #string
        properties.append(squares_avav)  #float
        properties.append(squares_av)  #string
        properties.append(squares_maxmax)  #float
        properties.append(squares_max)  #string
        properties.append(squares_minmin)  #float
        properties.append(squares_min)  #string
        properties.append(rc_av)  # float
        properties.append(rc_max)  #float
        properties.append(rc_min)  #float
        properties.append(rc)  #string

        # append more properties.....
        # property 14:

        # property x: in-degree sequence
        #indeg = # list(G.in_degree())[iterate over number of nodes][1]
        # property y: out-degree sequence
        #outdeg = # list(G.in_degree())[iterate over number of nodes][1]
        #.....
    else:
        properties = [0] * 2 + [0.] * 2 + [0] + [''] * 2 + [0] * 7 + [
            0.
        ] + [''] * 2 + [0., ''] * 17 + [0.] * 3 + [''] + [0., ''] * 3 + [
            0., 0., 0., ''
        ]

    # return list of properties
    return properties
Beispiel #50
0
 def is_feasible(self):
     import networkx as nx
     G = nx.DiGraph()
     edg = tuple(set(unfold(self.A)) | set(unfold(self.E)))
     G.add_edges_from(edg)
     return nx.is_directed_acyclic_graph(G)
Beispiel #51
0
def find_topological_order(directory, target=None):
    graph = nx.DiGraph()

    # First, walk the installers and find real providers
    for root, _, files in os.walk(directory):
        if INSTALLER in files:
            name = os.path.basename(root)
            graph.add_node(name, transitive=False)

    # Second, find all dependees and dependers
    for root, _, files in os.walk(directory):
        if INSTALLER in files:
            name = os.path.basename(root)
            dependencies, satisfies = read_dependencies(
                os.path.join(root, INSTALLER))

            for dependence in dependencies:
                # If by now the dependence does not have a node it does not have a real
                # provider, so we assume it is transitive, i.d. provided by something
                # with different name
                if not graph.has_node(dependence):
                    graph.add_node(dependence, transitive=True)

            # Set edge from dependee to its provider
            add_edge = functools.partial(lambda a, b: graph.add_edge(b, a),
                                         name)
            list(map(add_edge, dependencies))

            for sat in satisfies:
                # If there is something that tries to satisfy already satisfied
                # dependency we consider this an error
                if graph.has_node(sat) and len(list(graph.predecessors(sat))):
                    print(("{} tries to satisfy already existing installer {}".
                           format(name, sat)))
                    return False, None
                graph.add_node(sat, transitive=True)

            # Set edge from transitive provider to its real provider
            add_edge = functools.partial(lambda a, b: graph.add_edge(a, b),
                                         name)
            list(map(add_edge, satisfies))

    # print graph.edges()
    # sys.exit(0)

    # Not all dependencies are provided by installers of the same name. By
    # collapsing the graph on these 'satisfying' dependencies we point a dependee
    # to a right installer.
    nodes_to_remove = list()
    for node, transitive in graph.nodes(data='transitive'):
        if not transitive:
            continue

        dependees = list(graph.successors(node))
        providers = list(graph.predecessors(node))
        assert len(
            providers
        ) == 1, 'Must be exactly one provider, node: {}, dependees: {}, providers: {}'.format(
            node, dependees, providers)

        # Remove transitive node with all its edges
        nodes_to_remove.append(node)

        # Reconnect the graph
        add_edge = functools.partial(graph.add_edge, providers[0])
        list(map(add_edge, dependees))

    for node in nodes_to_remove:
        graph.remove_node(node)

    if not nx.is_directed_acyclic_graph(graph):
        print(("Found dependency cycle: {}".format(nx.find_cycle(graph))))
        return False, None

    if target:
        closure = set([target])
        while True:
            new = closure | set(
                sum(list(map(list, list(map(graph.predecessors, closure)))),
                    []))
            if closure == new:
                break
            closure = new
        return True, list(nx.topological_sort(graph.subgraph(closure)))

    return True, list(nx.topological_sort(graph))
Beispiel #52
0
 def __init__(self, G: nx.DiGraph):
     assert (nx.is_directed_acyclic_graph(G)), f"{G.edges()} not DAG"
     self.dag = G
## deepgo/data/train

# work_dir = '/u/flashscratch/d/datduong/goAndGeneAnnotationDec2018/'
# work_dir = '/u/flashscratch/d/datduong/goAndGeneAnnotation/'
work_dir = '/u/flashscratch/d/datduong/deepgo/data/'

os.chdir(work_dir)

# Read the taxrank ontology
graph = obonet.read_obo('go.obo')  # https://github.com/dhimmel/obonet

len(graph)  # Number of nodes

graph.number_of_edges()  # Number of edges

networkx.is_directed_acyclic_graph(graph)  # Check if the ontology is a DAG

# Mapping from term ID to name
id_to_name = {
    id_: data.get('name')
    for id_, data in graph.nodes(data=True)
    if 'OBSOLETE' not in data.get('def')
}  ## by default obsolete already removed
# id_to_name['GO:0000002']

go_name_array_obo = list(id_to_name.keys())
go_name_array_obo.sort()
# go_name_array_obo = [re.sub(r"GO:","",g) for g in go_name_array_obo]

pd.DataFrame(go_name_array_obo).to_csv("go_name_in_obo.csv",
                                       header=None,
 def test_generate_random_dag(self):
     self.assertTrue(
         nx.is_directed_acyclic_graph(generate_random_dag(10, 0.5)))
Beispiel #55
0
def d_separated(G: nx.DiGraph, x: AbstractSet, y: AbstractSet,
                z: AbstractSet) -> bool:
    """
    Return whether node sets ``x`` and ``y`` are d-separated by ``z``.

    Parameters
    ----------
    G : graph
        A NetworkX DAG.

    x : set
        First set of nodes in ``G``.

    y : set
        Second set of nodes in ``G``.

    z : set
        Set of conditioning nodes in ``G``. Can be empty set.

    Returns
    -------
    b : bool
        A boolean that is true if ``x`` is d-separated from ``y`` given ``z`` in ``G``.

    Raises
    ------
    NetworkXError
        The *d-separation* test is commonly used with directed
        graphical models which are acyclic.  Accordingly, the algorithm
        raises a :exc:`NetworkXError` if the input graph is not a DAG.

    NodeNotFound
        If any of the input nodes are not found in the graph,
        a :exc:`NodeNotFound` exception is raised.

    """

    if not nx.is_directed_acyclic_graph(G):
        raise nx.NetworkXError("graph should be directed acyclic")

    union_xyz = x.union(y).union(z)

    if any(n not in G.nodes for n in union_xyz):
        raise nx.NodeNotFound(
            "one or more specified nodes not found in the graph")

    G_copy = G.copy()

    # transform the graph by removing leaves that are not in x | y | z
    # until no more leaves can be removed.
    leaves = deque([n for n in G_copy.nodes if G_copy.out_degree[n] == 0])
    while len(leaves) > 0:
        leaf = leaves.popleft()
        if leaf not in union_xyz:
            for p in G_copy.predecessors(leaf):
                if G_copy.out_degree[p] == 1:
                    leaves.append(p)
            G_copy.remove_node(leaf)

    # transform the graph by removing outgoing edges from the
    # conditioning set.
    edges_to_remove = list(G_copy.out_edges(z))
    G_copy.remove_edges_from(edges_to_remove)

    # use disjoint-set data structure to check if any node in `x`
    # occurs in the same weakly connected component as a node in `y`.
    disjoint_set = UnionFind(G_copy.nodes())
    for component in nx.weakly_connected_components(G_copy):
        disjoint_set.union(*component)
    disjoint_set.union(*x)
    disjoint_set.union(*y)

    if x and y and disjoint_set[next(iter(x))] == disjoint_set[next(iter(y))]:
        return False
    else:
        return True
Beispiel #56
0
 def check_cycle(self):
     if not nx.is_directed_acyclic_graph(self.graph):
         raise Exception("attempt to add a cyclic")
def all_pairs_lowest_common_ancestor(G, pairs=None):
    """Compute the lowest common ancestor for pairs of nodes.

    Parameters
    ----------
    G : NetworkX directed graph

    pairs : iterable of pairs of nodes, optional (default: all pairs)
        The pairs of nodes of interest.
        If None, will find the LCA of all pairs of nodes.

    Returns
    -------
    An iterator over ((node1, node2), lca) where (node1, node2) are
    the pairs specified and lca is a lowest common ancestor of the pair.
    Note that for the default of all pairs in G, we consider
    unordered pairs, e.g. you will not get both (b, a) and (a, b).

    Notes
    -----
    Only defined on non-null directed acyclic graphs.

    Uses the $O(n^3)$ ancestor-list algorithm from:
    M. A. Bender, M. Farach-Colton, G. Pemmasani, S. Skiena, P. Sumazin.
    "Lowest common ancestors in trees and directed acyclic graphs."
    Journal of Algorithms, 57(2): 75-94, 2005.

    See Also
    --------
    tree_all_pairs_lowest_common_ancestor
    lowest_common_ancestor
    """
    if not nx.is_directed_acyclic_graph(G):
        raise nx.NetworkXError("LCA only defined on directed acyclic graphs.")
    elif len(G) == 0:
        raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.")
    elif None in G:
        raise nx.NetworkXError("None is not a valid node.")

    # The copy isn't ideal, neither is the switch-on-type, but without it users
    # passing an iterable will encounter confusing errors, and itertools.tee
    # does not appear to handle builtin types efficiently (IE, it materializes
    # another buffer rather than just creating listoperators at the same
    # offset). The Python documentation notes use of tee is unadvised when one
    # is consumed before the other.
    #
    # This will always produce correct results and avoid unnecessary
    # copies in many common cases.
    #
    if (not isinstance(pairs, (Mapping, Set)) and pairs is not None):
        pairs = set(pairs)

    # Convert G into a dag with a single root by adding a node with edges to
    # all sources iff necessary.
    sources = [n for n, deg in G.in_degree if deg == 0]
    if len(sources) == 1:
        root = sources[0]
        super_root = None
    else:
        G = G.copy()
        super_root = root = generate_unique_node()
        for source in sources:
            G.add_edge(root, source)

    # Start by computing a spanning tree, and the DAG of all edges not in it.
    # We will then use the tree lca algorithm on the spanning tree, and use
    # the DAG to figure out the set of tree queries necessary.
    spanning_tree = nx.dfs_tree(G, root)
    dag = nx.DiGraph((u, v) for u, v in G.edges
                     if u not in spanning_tree or v not in spanning_tree[u])

    # Ensure that both the dag and the spanning tree contains all nodes in G,
    # even nodes that are disconnected in the dag.
    spanning_tree.add_nodes_from(G)
    dag.add_nodes_from(G)

    counter = count()

    # Necessary to handle graphs consisting of a single node and no edges.
    root_distance = {root: next(counter)}

    for edge in nx.bfs_edges(spanning_tree, root):
        for node in edge:
            if node not in root_distance:
                root_distance[node] = next(counter)

    # Index the position of all nodes in the Euler tour so we can efficiently
    # sort lists and merge in tour order.
    euler_tour_pos = {}
    for node in nx.depth_first_search.dfs_preorder_nodes(G, root):
        if node not in euler_tour_pos:
            euler_tour_pos[node] = next(counter)

    # Generate the set of all nodes of interest in the pairs.
    pairset = set()
    if pairs is not None:
        pairset = set(chain.from_iterable(pairs))

    for n in pairset:
        if n not in G:
            msg = "The node %s is not in the digraph." % str(n)
            raise nx.NodeNotFound(msg)

    # Generate the transitive closure over the dag (not G) of all nodes, and
    # sort each node's closure set by order of first appearance in the Euler
    # tour.
    ancestors = {}
    for v in dag:
        if pairs is None or v in pairset:
            my_ancestors = nx.dag.ancestors(dag, v)
            my_ancestors.add(v)
            ancestors[v] = sorted(my_ancestors, key=euler_tour_pos.get)

    def _compute_dag_lca_from_tree_values(tree_lca, dry_run):
        """Iterate through the in-order merge for each pair of interest.

        We do this to answer the user's query, but it is also used to
        avoid generating unnecessary tree entries when the user only
        needs some pairs.
        """
        for (node1, node2) in pairs if pairs is not None else tree_lca:
            best_root_distance = None
            best = None

            indices = [0, 0]
            ancestors_by_index = [ancestors[node1], ancestors[node2]]

            def get_next_in_merged_lists(indices):
                """Returns index of the list containing the next item

                Next order refers to the merged order.
                Index can be 0 or 1 (or None if exhausted).
                """
                index1, index2 = indices
                if (index1 >= len(ancestors[node1])
                        and index2 >= len(ancestors[node2])):
                    return None
                elif index1 >= len(ancestors[node1]):
                    return 1
                elif index2 >= len(ancestors[node2]):
                    return 0
                elif (euler_tour_pos[ancestors[node1][index1]] <
                      euler_tour_pos[ancestors[node2][index2]]):
                    return 0
                else:
                    return 1

            # Find the LCA by iterating through the in-order merge of the two
            # nodes of interests' ancestor sets. In principle, we need to
            # consider all pairs in the Cartesian product of the ancestor sets,
            # but by the restricted min range query reduction we are guaranteed
            # that one of the pairs of interest is adjacent in the merged list
            # iff one came from each list.
            i = get_next_in_merged_lists(indices)
            cur = ancestors_by_index[i][indices[i]], i
            while i is not None:
                prev = cur
                indices[i] += 1
                i = get_next_in_merged_lists(indices)
                if i is not None:
                    cur = ancestors_by_index[i][indices[i]], i

                    # Two adjacent entries must not be from the same list
                    # in order for their tree LCA to be considered.
                    if cur[1] != prev[1]:
                        tree_node1, tree_node2 = prev[0], cur[0]
                        if (tree_node1, tree_node2) in tree_lca:
                            ans = tree_lca[tree_node1, tree_node2]
                        else:
                            ans = tree_lca[tree_node2, tree_node1]
                        if not dry_run and (best is None or root_distance[ans]
                                            > best_root_distance):
                            best_root_distance = root_distance[ans]
                            best = ans

            # If the LCA is super_root, there is no LCA in the user's graph.
            if not dry_run and (super_root is None or best != super_root):
                yield (node1, node2), best

    # Generate the spanning tree lca for all pairs. This doesn't make sense to
    # do incrementally since we are using a linear time offline algorithm for
    # tree lca.
    if pairs is None:
        # We want all pairs so we'll need the entire tree.
        tree_lca = dict(
            tree_all_pairs_lowest_common_ancestor(spanning_tree, root))
    else:
        # We only need the merged adjacent pairs by seeing which queries the
        # algorithm needs then generating them in a single pass.
        tree_lca = defaultdict(int)
        for _ in _compute_dag_lca_from_tree_values(tree_lca, True):
            pass

        # Replace the bogus default tree values with the real ones.
        for (pair, lca) in tree_all_pairs_lowest_common_ancestor(
                spanning_tree, root, tree_lca):
            tree_lca[pair] = lca

    # All precomputations complete. Now we just need to give the user the pairs
    # they asked for, or all pairs if they want them all.
    return _compute_dag_lca_from_tree_values(tree_lca, False)
Beispiel #58
0
def raise_if_dagcircuit_invalid(dag):
    """Validates the internal consistency of a DAGCircuit._multi_graph.
    Intended for use in testing.

    Raises:
       DAGCircuitError: if DAGCircuit._multi_graph is inconsistent.
    """

    multi_graph = dag._multi_graph

    if dag._USE_RX:
        if not rx.is_directed_acyclic_graph(multi_graph):
            raise DAGCircuitError('multi_graph is not a DAG.')
    else:
        if not nx.is_directed_acyclic_graph(multi_graph):
            raise DAGCircuitError('multi_graph is not a DAG.')

    # Every node should be of type in, out, or op.
    # All input/output nodes should be present in input_map/output_map.
    for node in dag._get_multi_graph_nodes():
        if node.type == 'in':
            assert node is dag.input_map[node.wire]
        elif node.type == 'out':
            assert node is dag.output_map[node.wire]
        elif node.type == 'op':
            continue
        else:
            raise DAGCircuitError('Found node of unexpected type: {}'.format(
                node.type))

    # Shape of node.op should match shape of node.
    for node in dag.op_nodes():
        assert len(node.qargs) == node.op.num_qubits
        assert len(node.cargs) == node.op.num_clbits

    # Every edge should be labled with a known wire.
    edges_outside_wires = [edge_data['wire']
                           for source, dest, edge_data
                           in dag._get_multi_graph_edges()
                           if edge_data['wire'] not in dag.wires]
    if edges_outside_wires:
        raise DAGCircuitError('multi_graph contains one or more edges ({}) '
                              'not found in DAGCircuit.wires ({}).'.format(edges_outside_wires,
                                                                           dag.wires))

    # Every wire should have exactly one input node and one output node.
    for wire in dag.wires:
        in_node = dag.input_map[wire]
        out_node = dag.output_map[wire]

        assert in_node.wire == wire
        assert out_node.wire == wire
        assert in_node.type == 'in'
        assert out_node.type == 'out'

    # Every wire should be propagated by exactly one edge between nodes.
    for wire in dag.wires:
        cur_node_id = dag.input_map[wire]._node_id
        out_node_id = dag.output_map[wire]._node_id

        while cur_node_id != out_node_id:
            out_edges = dag._get_multi_graph_out_edges(cur_node_id)
            edges_to_follow = [(src, dest, data) for (src, dest, data) in out_edges
                               if data['wire'] == wire]

            assert len(edges_to_follow) == 1
            cur_node_id = edges_to_follow[0][1]

    # Wires can only terminate at input/output nodes.
    for op_node in dag.op_nodes():
        assert multi_graph.in_degree(op_node._node_id) == multi_graph.out_degree(op_node._node_id)

    # Node input/output edges should match node qarg/carg/condition.
    for node in dag.op_nodes():
        in_edges = dag._get_multi_graph_in_edges(node._node_id)
        out_edges = dag._get_multi_graph_out_edges(node._node_id)

        in_wires = {data['wire'] for src, dest, data in in_edges}
        out_wires = {data['wire'] for src, dest, data in out_edges}

        node_cond_bits = set(node.condition[0][:] if node.condition is not None else [])
        node_qubits = set(node.qargs)
        node_clbits = set(node.cargs)

        all_bits = node_qubits | node_clbits | node_cond_bits

        assert in_wires == all_bits, 'In-edge wires {} != node bits {}'.format(
            in_wires, all_bits)
        assert out_wires == all_bits, 'Out-edge wires {} != node bits {}'.format(
            out_wires, all_bits)
    def _legal_operations(self,
                          model,
                          tabu_list=[],
                          max_indegree=None,
                          black_list=None,
                          white_list=None):
        """Generates a list of legal (= not in tabu_list) graph modifications
        for a given model, together with their score changes. Possible graph modifications:
        (1) add, (2) remove, or (3) flip a single edge. For details on scoring
        see Koller & Friedman, Probabilistic Graphical Models, Section 18.4.3.3 (page 818).
        If a number `max_indegree` is provided, only modifications that keep the number
        of parents for each node below `max_indegree` are considered. A list of
        edges can optionally be passed as `black_list` or `white_list` to exclude those
        edges or to limit the search.
        """

        local_score = self.scoring_method.local_score
        nodes = self.state_names.keys()
        potential_new_edges = (set(permutations(nodes, 2)) -
                               set(model.edges()) -
                               set([(Y, X) for (X, Y) in model.edges()]))

        for (X, Y) in potential_new_edges:  # (1) add single edge
            if nx.is_directed_acyclic_graph(
                    nx.DiGraph(list(model.edges()) + [(X, Y)])):
                operation = ("+", (X, Y))
                if (operation not in tabu_list
                        and (black_list is None or (X, Y) not in black_list)
                        and (white_list is None or (X, Y) in white_list)):
                    old_parents = model.get_parents(Y)
                    new_parents = old_parents + [X]
                    if max_indegree is None or len(
                            new_parents) <= max_indegree:
                        score_delta = local_score(
                            Y, new_parents) - local_score(Y, old_parents)
                        yield (operation, score_delta)

        for (X, Y) in model.edges():  # (2) remove single edge
            operation = ("-", (X, Y))
            if operation not in tabu_list:
                old_parents = model.get_parents(Y)
                new_parents = old_parents[:]
                new_parents.remove(X)
                score_delta = local_score(Y, new_parents) - local_score(
                    Y, old_parents)
                yield (operation, score_delta)

        for (X, Y) in model.edges():  # (3) flip single edge
            new_edges = list(model.edges()) + [(Y, X)]
            new_edges.remove((X, Y))
            if nx.is_directed_acyclic_graph(nx.DiGraph(new_edges)):
                operation = ("flip", (X, Y))
                if (operation not in tabu_list
                        and ("flip", (Y, X)) not in tabu_list
                        and (black_list is None or (Y, X) not in black_list)
                        and (white_list is None or (Y, X) in white_list)):
                    old_X_parents = model.get_parents(X)
                    old_Y_parents = model.get_parents(Y)
                    new_X_parents = old_X_parents + [Y]
                    new_Y_parents = old_Y_parents[:]
                    new_Y_parents.remove(X)
                    if max_indegree is None or len(
                            new_X_parents) <= max_indegree:
                        score_delta = (local_score(X, new_X_parents) +
                                       local_score(Y, new_Y_parents) -
                                       local_score(X, old_X_parents) -
                                       local_score(Y, old_Y_parents))
                        yield (operation, score_delta)
Beispiel #60
0
 def is_dag(self, adj):
     return networkx.is_directed_acyclic_graph(self.as_graph(adj))