def comb_fas( graph): '''@param: graph, a nx.DiGraph obj ''' assert isinstance( graph, nx.DiGraph) origin_weight = nx.get_edge_attributes( graph, 'weight') weight = origin_weight.copy() assert len(weight) == graph.number_of_edges(), "Some edge doesnot has a weight attr." fas = [] while( not nx.is_directed_acyclic_graph(graph) ): c = list( nx.simple_cycles(graph) )[0] mini_weight = min( [ weight[edge] for edge in get_edges(c)] ) cycle_edges_weight = {edge:weight[edge] for edge in get_edges(c) } for eachEdge in cycle_edges_weight.keys(): cycle_edges_weight[eachEdge] -= mini_weight weight[eachEdge ] -= mini_weight if cycle_edges_weight[eachEdge] == 0: fas.append( eachEdge ) graph.remove_edge( eachEdge[0], eachEdge[1] ) for eachEdge in copy.copy(fas): graph.add_edge( eachEdge[0], eachEdge[1], {'weight' : origin_weight[eachEdge]} ) if nx.is_directed_acyclic_graph( graph): fas.remove(eachEdge) continue else: graph.remove_edge( eachEdge[0], eachEdge[1] ) return fas
def mean_geodesic(pg, debug=0): """ mean_geodesic() calculates the mean geodesic (shortest) distance between two vertices in a network. """ length_sum = 0 if networkx.is_directed_acyclic_graph(pg): n_pairs_with_paths = 0 else: n_pairs_with_paths = ( pg.order() * ( pg.order() + 1 ) ) / 2 tg = networkx.subgraph(pg, pg.nodes()) for u in pg.nodes_iter(): tg.delete_node(u) for v in tg.nodes_iter(): try: length = networkx.shortest_path_length(pg,u,v) if length > 0: length_sum = length_sum + length if networkx.is_directed_acyclic_graph(pg): n_pairs_with_paths = n_pairs_with_paths + 1 except networkx.exception.NetworkXError: pass try: geodesic = float(length_sum) / float(n_pairs_with_paths) except: geodesic = -999. if debug: print 'length_sum:\t', length_sum print 'n_pairs_with_paths:\t', n_pairs_with_paths return geodesic
def minkowski_causality(D,N,show_plot=False): """ Instantiates "event_in_minkowski" to return a list of N points. """ def points_in_minkowski(D,N): points_in_minkowski = [] n=1 while n<=N: point_n = event_in_minkowski(D) coords_n = point_n.coord_value_point_n(n) points_in_minkowski.append(coords_n) n+=1 return points_in_minkowski good_points = points_in_minkowski(D,N) #List --> Dict as nx needs hashable object to add nodes/edges from. dict_of_points = {} for i in range(len(good_points)): good_points[i] = tuple(good_points[i]) dict_of_points[i] = good_points[i] #Add nodes to empty nx graph object G=nx.DiGraph() for point in dict_of_points: G.add_node(point) print nx.is_directed_acyclic_graph(G) #Add edge (from i to j) to empty nx graph object if node j falls within the future light cone of i for i in range(len(dict_of_points)): for j in range(len(dict_of_points)): if i==j: continue t_separation = dict_of_points[j][0] - dict_of_points[i][0] space_separation=0 for d in range(1,D): space_separation += (dict_of_points[i][d] - dict_of_points[j][d]) if t_separation>=abs(space_separation): G.add_edge(i,j) else: pass #Check G is a DAG, print model info if nx.is_directed_acyclic_graph(G): print "This is a DAG of causal relations between randomly placed events in ",D,"D Minkowski space-time." #Show plot if show_plot==True: draw_in_minkowski(G,dict_of_points) return G
def make_acyclic(G): G_copy = G.copy() F = [] original_G = G.copy() while not nx.is_directed_acyclic_graph(G_copy): #iterate through cycles in G for cycle in nx.simple_cycles(G_copy): min_weight = 100000 min_u = 0 min_v = 0 #Find minimum weight edge in the cycle, weight #here is bundle size #TODO: start with smallest cycle by sorting #print G.edges(data=True) for i in xrange(0,len(cycle)-1): u = cycle[i] v = cycle[i+1] if G[u][v]['bsize'] < min_weight: min_weight = G[u][v]['bsize'] min_u = u min_v = v if G[cycle[- 1]][cycle[0]]['bsize'] < min_weight: min_weight = G[cycle[-1]][cycle[0]]['bsize'] min_u = cycle[-1] min_v = cycle[0] #reduce the edge weights by min_weight and remove the edge if its weight is 0 if min_weight != 100000: for i in xrange(0,len(cycle)-1): u = cycle[i] v = cycle[i+1] G[u][v]['bsize'] -= min_weight G[cycle[-1]][cycle[0]]['bsize'] -= min_weight G.remove_edge(min_u,min_v) F.append((min_u,min_v,original_G.get_edge_data(min_u,min_v))) G_copy = G.copy() break #Now try adding edges from F to G, TODO do in non-increasing order if len(G.edges()) == 0: continue # if len(G.nodes()) == 0: # continue for edge in F: u = edge[0] v = edge[1] G.add_edge(u,v,edge[2]) if not nx.is_directed_acyclic_graph(G): G.remove_edge(u,v) return G
def test_topological_sort2(self): DG = nx.DiGraph({1: [2], 2: [3], 3: [4], 4: [5], 5: [1], 11: [12], 12: [13], 13: [14], 14: [15]}) assert_raises(nx.NetworkXUnfeasible, consume, nx.topological_sort(DG)) assert_false(nx.is_directed_acyclic_graph(DG)) DG.remove_edge(1, 2) consume(nx.topological_sort(DG)) assert_true(nx.is_directed_acyclic_graph(DG))
def test_topological_sort2(self): DG = nx.DiGraph({1: [2], 2: [3], 3: [4], 4: [5], 5: [1], 11: [12], 12: [13], 13: [14], 14: [15]}) assert_raises(nx.NetworkXUnfeasible, nx.topological_sort, DG) assert_raises(nx.NetworkXUnfeasible, nx.topological_sort_recursive, DG) assert_false(nx.is_directed_acyclic_graph(DG)) DG.remove_edge(1, 2) assert_equal(nx.topological_sort_recursive(DG), [11, 12, 13, 14, 15, 2, 3, 4, 5, 1]) assert_equal(nx.topological_sort(DG), [11, 12, 13, 14, 15, 2, 3, 4, 5, 1]) assert_true(nx.is_directed_acyclic_graph(DG))
def make_dag(g): if nx.is_directed_acyclic_graph(g): return p = nx.periphery(g) for c in nx.weakly_connected_component_subgraphs(g): if nx.is_directed_acyclic_graph(g): continue cycles = nx.simple_cycles(c) for c in cycles: edges = zip(c[:-1], c[1:]) edges.append((c[-1], c[0])) for e in edges: data = g.edges(e[0], e[1])[0][2] c.remove_edge(e[0], e[1])
def mean_degree_centrality(pg, normalize=0): """ mean_degree_centrality(pg) calculates mean in- and out-degree centralities for directed graphs and simple degree-centralities for undirected graphs. If the normalize flag is set, each node's centralities are weighted by the number of edges in the (di)graph. """ centrality = {} try: if networkx.is_directed_acyclic_graph(pg): cent_sum_in, cent_sum_out = 0, 0 for n in pg.nodes(): n_cent_in = pg.in_degree(n) n_cent_out = pg.out_degree(n) if normalize: n_cent_in = float(n_cent_in) / float(pg.size()-1) n_cent_out = float(n_cent_out) / float(pg.size()-1) cent_sum_in = cent_sum_in + n_cent_in cent_sum_out = cent_sum_out + n_cent_out centrality['in'] = cent_sum_in / float(pg.order()) centrality['out'] = cent_sum_out / float(pg.order()) else: cent_sum = 0 for n in pg.nodes(): if not normalize: n_cent = pg.degree(n) else: n_cent = networkx.degree_centrality(pg,n) cent_sum = cent_sum + n_cent centrality['all'] = cent_sum / float(pg.order()) except: logging.error('pyp_network.mean_degree_centrality() failed!') return centrality
def set_indices(self): if not nx.is_directed_acyclic_graph(self): raise ValueError('The graph is not DAG') if not nx.is_connected(self.to_undirected()): raise ValueError('The graph is not connected') self.base_digraph = nx.DiGraph(self) self.ordered_nodes = nx.topological_sort(self) for idx, node in enumerate(self.ordered_nodes): self.node[node]['index'] = idx self.ordered_edges = OrderedDict({}) index = 0 for tail in self.ordered_nodes: for head in sorted(self[tail]): self.ordered_edges[(tail, head)] = index self.base_digraph[tail][head]['capacity'] = len(self[tail][head]) for idx in sorted(self[tail][head]): self[tail][head][idx]['index'] = index index = index + 1 # reset data structures self.coding_matrix = None self.dst_evolution_rec = None self.alignment_nodes = []
def generate(self): """Workhorse factory method for producing all valid DAGs for this schema and set of constraints.""" graphs = [] sys.stderr.write("gen:\n" + self.dumpEdgePossibleSettings()) edgesPossible = self.getAllVarPairs() edgeCombos = factorialDict(self.edgePossibleSettings) for edgeCombo in edgeCombos: # edgeCombo is a dict (s, t) -> 1 graph = nx.DiGraph() for i, ev in enumerate(self.entVars): lat = True if (ev in self.latents) else False det = self.determines.get(ev, None) graph.add_node(ev, latent=lat, determines=det, order=i) # order they were passed in is preserved graphSig = "" for s, t in edgesPossible: setting = edgeCombo.get((s, t), 0) if (setting == 1): graph.add_edge(s, t) elif (setting == 2): graph.add_edge(t, s) if (s in self.indexSet) and (t in self.indexSet): graphSig += str(setting) graph.graph['index'] = int(graphSig, 3) if (not self.dagsOnly) or (nx.is_directed_acyclic_graph(graph)): graphs.append(graph) if (len(graphs) < len(edgeCombos)): sys.stderr.write("eliminated %d cyclic graphs\n" % (len(edgeCombos) - len(graphs))) return sorted(graphs, key=lambda x: x.graph['index'])
def balance(graph): '''parame: graph, a DAG its.__class__ == nx.DiGraph return: r, removed edges set so makr the input graph a b-structure ''' # 只处理整数形式的图,每一个整数对应的节点可以在后面查到 # 输入进来的图应该是连通的,如果存在非连通图,minimum_edge_cut就会产生问题 assert nx.is_directed_acyclic_graph(graph),\ "The target graph you want to banlance is not a DAG" r = [] # removed set if check(graph): return r #非B-Stucture时,一直循环下去 # BUGY: 如果cs为空呢,那么不可能有两个图返回来,这时候怎么办 print "\nCutting Graph" cs, g1, g2 = cut(graph) r = balance(g1) + balance(g2) + cs csl = [] for eachEdge in cs: under_check_graph = graph.copy() under_check_graph.remove_edges_from(r) under_check_graph.add_edges_from(csl) under_check_graph.add_edge(eachEdge[0],eachEdge[1]) if check(under_check_graph): print "Edge: %s added back" % str(eachEdge) csl.append(eachEdge) graph.add_edge(eachEdge[0],eachEdge[1]) for eachEdge in csl: r.remove(eachEdge) print "Removed Edge Set: %s" % str(r) return r
def set_params(self, node, delta, eta, marginal): self.clear_memory() assert node in self.graph.nodes() self.graph.node[node]['delta'] = delta self.graph.node[node]['eta'] = eta self.graph.node[node]['marginal'] = marginal assert nx.is_directed_acyclic_graph(self.graph)
def build_graph(self): """Build graph of relationships between hills Each hill is a list of things that can be used for that hill. Each of these may have inputs (names of other hills). A graph is build to show the input relations. Checks in case graph is cyclic. Does a topological sort on the hills to give and order in which they should be processed. """ graph = nx.DiGraph() for hill, data in self.hills.items(): for item in data: for link in item.inputs: graph.add_edge(link, hill) # check if graph is acyclic is_dag = nx.is_directed_acyclic_graph(graph) if not is_dag: raise ValueError("hills must be acyclic") self.hill_order = nx.topological_sort( graph)
def _validate(G): ''' Validates dependency graph to ensure it has no missing or cyclic dependencies ''' for name in G.nodes(): if 'value' not in G.node[name] and 'template' not in G.node[name]: msg = 'Dependency unsatisfied in variable "%s"' % name raise ParamException(msg) if not nx.is_directed_acyclic_graph(G): graph_cycles = nx.simple_cycles(G) variable_names = [] for cycle in graph_cycles: try: variable_name = cycle[0] except IndexError: continue variable_names.append(variable_name) variable_names = ', '.join(sorted(variable_names)) msg = ('Cyclic dependency found in the following variables: %s. Likely the variable is ' 'referencing itself' % (variable_names)) raise ParamException(msg)
def dyad_census(pg, debug=0, debuglog=0): """ dyad_census() calculates the number of null, asymmetric, and mutual edges between all pairs of nodes in a directed graph. """ if not networkx.is_directed_acyclic_graph(pg): logging.error('pyp_network.dyad_census() requires a directed graph as input!') return 0 else: census = {} census['null'] = 0 census['asymmetric'] = 0 census['mutual'] = 0 tg = networkx.subgraph(pg, pg.nodes()) for u in pg.nodes_iter(): tg.delete_node(u) for v in tg.nodes_iter(): if not pg.has_neighbor(u,v): census['null'] = census['null'] + 1 elif u in pg.predecessors(v) and v in pg.successors(u): census['mutual'] = census['mutual'] + 1 if debug: print 'Nodes %s and %s link to one another!' % ( u, v ) if debuglog: logging.error('Nodes %s and %s link to one another!',u, v) elif u in pg.predecessors(v) and v not in pg.successors(u): census['asymmetric'] = census['asymmetric'] + 1 elif u not in pg.predecessors(v) and v in pg.successors(u): census['asymmetric'] = census['asymmetric'] + 1 else: pass del(tg) return census
def graph(dataframe=None): G = nx.DiGraph() nrow = dataframe.shape[0] for i in xrange(nrow): source = dataframe['module_id'][i] G.add_node(source) if pd.isnull(dataframe['children'][i]) is not True: try: targets = dataframe['children'][i].split() except: raise ValueError('Data type is not correct:', i, dataframe.loc[i,], type(dataframe['children'][i])) for key in targets: G.add_edge(source, key) # Sanity check selfLoop = G.selfloop_edges() assert len(selfLoop) == 0, ValueError('self loop:', selfLoop) assert nx.is_directed_acyclic_graph(G), valueError('loop exists!') return G
def __init__(self, tasks_reqs): """Construct a PipelineFramework based on the given Tasks and their requirements. A PipelineFramework is the structure of the pipeline, it contains no patient data. :param tasks_reqs: the Tasks and their requirements :type tasks_reqs: iterable of tuples, each with a Task and its list of required UIDs :raises: ValueError """ self.dag = DiGraph() task_dict = {} for task, _ in tasks_reqs: if task_dict.get(task._uid) is not None: raise ValueError("Pipeline contains duplicate Task {}".format(task._uid)) self.dag.add_node(task, done=False) task_dict[task._uid] = task for task, reqs in tasks_reqs: for req_uid in reqs: uid = task_dict.get(req_uid) if uid is None: raise KeyError("Unknown UID {} set as requirement for {}".format(req_uid, task._uid)) self.dag.add_edge(uid, task) if not is_directed_acyclic_graph(self.dag): raise ValueError("Pipeline contains a cycle.")
def count_common_subgraphs(graph1, graph2, n1, n2, node_attrib='label', edge_attrib='label'): """ Counts the number of common (dependency parse) subgraphs rooted at n1 and n2. This is an implementation of Cm(n1, n2) for dependency structures from Collins and Duffy (2001). Parsing with a Single Neuron. """ for graph in (graph1, graph2): assert nx.is_directed_acyclic_graph(graph) if graph1.node[n1][node_attrib] != graph2.node[n2][node_attrib]: return 0 n1_children = dependency_children(graph1, n1, edge_attrib=edge_attrib) n2_children = dependency_children(graph2, n2, edge_attrib=edge_attrib) if not n1_children or not n2_children: return 0 else: result = 1 # neutral element of multiplication for n1_target, n2_target in common_dependency_targets(graph1, graph2, n1, n2, node_attrib=node_attrib): result *= (count_common_subgraphs(graph1, graph2, n1_target, n2_target, node_attrib='label', edge_attrib='label') + 2) return result - 1
def read_pedigree_from_test_file(file_name, genotyped_id_file=None): '''Load a pedigree from a PLINK TFAM file.''' data = np.genfromtxt(file_name, np.dtype(int)) p = io_pedigree.read(file_name, genotyped_id_file=genotyped_id_file) assert_equal(p._graph.number_of_nodes(), data.shape[0], 'Incorrect number of nodes') assert nx.is_directed_acyclic_graph(p._graph), 'Pedigree is not a DAG' return p
def longest_subsequence_dag(a, sign): '''Return a longest increasing (if sign=1) or decreasing (if sign=-1) sub-sequence in the permutation a of the first n natural integers. Time and storage are O(n). If multiple longest sub-sequences exist, arbitrarily returns one of them.''' # Dan Cook's idea: use symmetry to solve the decreasing case in terms of the increasing case if sign < 0: return list(reversed(longest_subsequence_dag(list(reversed(a)), 1))) G = build_dag(np.array(a)) # Construct a DAG whose edges represent all candidate pairs of consecutive elements of the longest subsequence assert nx.is_directed_acyclic_graph(G) # print 'Edges', G.edges() depth = longest_path_length(G) # For each node, calculate the longest path length # print 'depth', depth # Back-track from a node of maximum depth to its ancestors to reconstruct the longest path x = np.argmax(depth) seq = [x] # print 'x', x, 'depth', depth[x] while G.in_degree(x) > 0: # To find the maximum path, choose a parent of minimum depth parents = G.predecessors(x) # print 'parents', parents x = parents[np.argmax(depth[parents])] # print 'x', x, 'depth', depth[x] seq.append(x) # print 'seq', seq # print 'final seq', list(reversed(seq)) return list(reversed(seq))
def load(self): """ Load dependencies for all loaded schemas. This method gets called before any operation that requires dependencies: delete, drop, populate, progress. """ # reload from scratch to prevent duplication of renamed edges self.clear() # load primary key info keys = self._conn.query(""" SELECT concat('`', table_schema, '`.`', table_name, '`') as tab, column_name FROM information_schema.key_column_usage WHERE table_name not LIKE "~%%" AND table_schema in ('{schemas}') AND constraint_name="PRIMARY" """.format(schemas="','".join(self._conn.schemas))) pks = defaultdict(set) for key in keys: pks[key[0]].add(key[1]) # add nodes to the graph for n, pk in pks.items(): self.add_node(n, primary_key=pk) # load foreign keys keys = self._conn.query(""" SELECT constraint_name, concat('`', table_schema, '`.`', table_name, '`') as referencing_table, concat('`', referenced_table_schema, '`.`', referenced_table_name, '`') as referenced_table, column_name, referenced_column_name FROM information_schema.key_column_usage WHERE referenced_table_name NOT LIKE "~%%" AND (referenced_table_schema in ('{schemas}') OR referenced_table_schema is not NULL AND table_schema in ('{schemas}')) """.format(schemas="','".join(self._conn.schemas)), as_dict=True) fks = defaultdict(lambda: dict(attr_map=dict())) for key in keys: d = fks[(key['constraint_name'], key['referencing_table'], key['referenced_table'])] d['referencing_table'] = key['referencing_table'] d['referenced_table'] = key['referenced_table'] d['attr_map'][key['column_name']] = key['referenced_column_name'] # add edges to the graph for fk in fks.values(): props = dict( primary=all(attr in pks[fk['referencing_table']] for attr in fk['attr_map']), attr_map=fk['attr_map'], aliased=any(k != v for k, v in fk['attr_map'].items()), multi=not all(a in fk['attr_map'] for a in pks[fk['referencing_table']])) if not props['aliased']: self.add_edge(fk['referenced_table'], fk['referencing_table'], **props) else: # for aliased dependencies, add an extra node in the format '1', '2', etc alias_node = '%d' % next(self._node_alias_count) self.add_node(alias_node) self.add_edge(fk['referenced_table'], alias_node, **props) self.add_edge(alias_node, fk['referencing_table'], **props) if not nx.is_directed_acyclic_graph(self): # pragma: no cover raise DataJointError('DataJoint can only work with acyclic dependencies')
def _resolve_dependencies(self): self.templates_deps.clear() for (tpl_name, tpl_data) in self.templates.iteritems(): self.templates_deps.add_node(tpl_name) for parent in tpl_data["parent"]: self.templates_deps.add_edge(tpl_name, parent) if not nx.is_directed_acyclic_graph(self.templates_deps): raise ParsingError(_("A cycle has been detected in templates"))
def is_configuration (self, s) : pre = set () for e in s : for c in e.pre | e.cont : if not sgl (c.pre) <= s : return False g = self.asym_graph (True, s, True) return networkx.is_directed_acyclic_graph (g)
def is_dag(self): """ Check to see if we have a directed acyclic graph If we have an acyclic graph, it is possible to reorganize the nodes according to the downgraph on G """ if nx.is_directed_acyclic_graph(Node.G): return True else: return False
def remove_cycles(G): while not nx.is_directed_acyclic_graph(G): subgraphs = nx.strongly_connected_component_subgraphs(G) for subgraph in subgraphs: if subgraph.number_of_nodes() > 1: edge_index = random.randrange(subgraph.number_of_edges()) edge = subgraph.edges()[edge_index] G.remove_edge(edge[0], edge[1])
def __init__(self, scaffold_graph): print "Entering PathFinder module:", str(datetime.now()) self.G = scaffold_graph.copy() #Build strandless list of sequences sequences = set([n for n in self.G.nodes() if n > 0]) #Define weakly connected components print "1... Defining weakly connected components" component_graphs = set([g for g in nx.weakly_connected_component_subgraphs(self.G)]) single_node_graphs = set([g for g in component_graphs if len(g.nodes()) == 1]) multi_node_graphs = set([g for g in component_graphs if len(g.nodes()) > 1]) print "Number of single-node components:", len(single_node_graphs) print "Number of multi-node components:", len(multi_node_graphs) #Consolidate unscaffolded nodes, discard reverse strand print "2... Consolidating single-node components" unscaffolded = set([g.nodes()[0] for g in single_node_graphs]) discard_nodes = set([n for n in unscaffolded if n < 0]) for g in iter(single_node_graphs.copy()): if g.nodes()[0] in discard_nodes: single_node_graphs.discard(g) print "Number of unscaffolded sequences:", len(single_node_graphs) #Classify multi-node graphs print "3... Classifying multi-node components" DAG = set([]) Euler = set([]) for g in multi_node_graphs: if nx.is_directed_acyclic_graph(g): DAG.add(g) elif nx.is_eulerian(g): Euler.add(g) else: sys.exit("FATAL ERROR: Unknown multi-node graph type!") print "Number of directed acyclic graphs:", len(DAG) print "Number of Eulerian graphs:", len(Euler) #Build scaffolds from DAGs print "4... Building scaffolds from directed acyclic graphs" self.scaffolds = set([]) for g in DAG: self.build_dag_scaffold(g) #Consolidating complementary scaffolds, keep first found print "5... Consolidating complementary scaffolds" consolidated_scaff = set([]) for seq in iter(self.scaffolds): comp = self.revc(seq) if comp in self.scaffolds: if comp not in consolidated_scaff: consolidated_scaff.add(seq) else: print "WARNING: non-complemented scaffold" self.scaffolds = consolidated_scaff print "Number of scaffolds assembled:", len(self.scaffolds) #Build scaffolds from Eulerian graphs #Add unscaffolded seqs to scaffolds list print "6... Adding unscaffolded sequences to output" for g in single_node_graphs: seq = self.G.node[g.nodes()[0]]['seq'] self.scaffolds.add(seq) print "Leaving PathFinder module:", str(datetime.now())
def add(self, u, uw, v, vw, sequential=False, global_dag=None): """ Add nodes u and/or v to the partition if sequential is True, break antichains to sequential chains """ # if (self.partition_id == 180): # logger.debug("u = ", u, ", v = ", v, ", partition = ", self.partition_id) unew = False if self._dag.node.has_key(u) else True vnew = False if self._dag.node.has_key(v) else True self._dag.add_node(u, weight=uw) self._dag.add_node(v, weight=vw) self._dag.add_edge(u, v) if (unew and vnew): # we know this is fast self._max_antichains = DAGUtil.get_max_antichains(self._dag) self._max_dop = 1 else: if (sequential and (global_dag is not None)): # break potential antichain to sequential chain if (unew): v_ups = nx.ancestors(self._dag, v) for vup in v_ups: if (u == vup): continue if (len(self._dag.predecessors(vup)) == 0): # link u to "root" parent of v to break antichain self._dag.add_edge(u, vup) # change the original global graph global_dag.add_edge(u, vup, weight=0) if (not nx.is_directed_acyclic_graph(global_dag)): global_dag.remove_edge(u, vup) else: u_downs = nx.descendants(self._dag, u) for udo in u_downs: if (udo == v): continue if (len(self._dag.successors(udo)) == 0): # link "leaf" children of u to v to break antichain self._dag.add_edge(udo, v) # change the original global graph global_dag.add_edge(udo, v, weight=0) if (not nx.is_directed_acyclic_graph(global_dag)): global_dag.remove_edge(udo, v) self._max_dop = self.probe_max_dop(u, v, unew, vnew, update=True)
def reduce_paths(G): """ Make graph into a directed acyclic graph (DAG). """ from jcvi.algorithms.lpsolve import min_feedback_arc_set while not nx.is_directed_acyclic_graph(G): edges = [] for a, b, w in G.edges_iter(data=True): w = w['weight'] edges.append((a, b, w)) mf, mf_score = min_feedback_arc_set(edges) for a, b, w in mf: G.remove_edge(a, b) assert nx.is_directed_acyclic_graph(G) G = transitive_reduction(G) return G
def is_tree(graph): flag = nx.is_directed_acyclic_graph(graph) if flag == False: return False l = [v for u, v in graph.edges()] s = set(l) if len(s) == len(l): return True return False
def _validate(self, graph=None): if graph is None: graph = self._graph # Ensure that there is a valid topological ordering. if not nx.is_directed_acyclic_graph(graph): raise exc.DependencyFailure("No path through the items in the" " graph produces an ordering that" " will allow for correct dependency" " resolution")
def new_space(self, parent, name=None, bases=None, formula=None, refs=None, source=None, is_derived=False, prefix="", doc=None, container=None): """Create a new child space. Args: name (str): Name of the space. If omitted, the space is created automatically. bases: If specified, the new space becomes a derived space of the `base` space. formula: Function whose parameters used to set space parameters. refs: a mapping of refs to be added. source: A source module from which cell definitions are read. prefix: Prefix to the autogenerated name when name is None. """ if name is None: while True: name = parent.spacenamer.get_next(parent.namespace, prefix) if self._can_add(parent, name, UserSpaceImpl): break elif not self._can_add(parent, name, UserSpaceImpl): raise ValueError("Cannot create space '%s'" % name) if not prefix and not is_valid_name(name): raise ValueError("Invalid name '%s'." % name) if bases is None: bases = [] elif isinstance(bases, UserSpaceImpl): bases = [bases] if parent.is_model(): node = name pnode = [] else: node = parent.namedid + "." + name pnode = [parent.namedid] nodes = pnode + [b.namedid for b in bases] oldsubg_inherit = self._inheritance.subgraph_from_nodes(nodes) oldsubg = oldsubg_inherit.get_derived_graph() newsubg_inherit = oldsubg_inherit.copy_as_spacegraph(oldsubg_inherit) newsubg_inherit.add_node(node, mode="defined", state="defined") for b in bases: base = b.namedid newsubg_inherit.add_edge(base, node, mode="defined", index=newsubg_inherit.max_index(node)) if not nx.is_directed_acyclic_graph(newsubg_inherit): raise ValueError("cyclic inheritance") if not newsubg_inherit.check_cyclic(node, node): raise ValueError("cyclic inheritance through composition") newsubg_inherit.get_mro(node) # Check if MRO is possible for pnode in newsubg_inherit.get_parent_nodes(node): newsubg_inherit.nodes[pnode]["mode"] = "defined" start = [(tail, node) for tail in newsubg_inherit.ordered_preds(node)] newsubg = newsubg_inherit.get_derived_graph(on_edge=self._derive_hook, start=start) if not nx.is_directed_acyclic_graph(newsubg): raise ValueError("cyclic inheritance") # Check if MRO is possible for each node in sub graph for n in nx.descendants(newsubg, node): newsubg.get_mro(n) if not parent.is_model(): parent.set_defined() if container is None: container = parent._named_spaces space = UserSpaceImpl(parent, name, container, is_derived, formula=formula, refs=refs, source=source, doc=doc) newsubg.nodes[node]["space"] = space newsubg.nodes[node]["state"] = "created" self._instructions.execute() self._update_graphs(newsubg_inherit, newsubg, oldsubg_inherit, oldsubg) return space
def finalize(self): assert (not self.__finalized) if not nx.is_directed_acyclic_graph(self.__graph): raise ValueError("The generated graph is not a DAG!\n" + str(g)) self.__finalized = True
def find_and_replace_pattern(graph: nx.MultiDiGraph): is_acyclic = nx.is_directed_acyclic_graph(graph) graph.graph['is_cyclic'] = not is_acyclic
def initialize(self): """ Initialize Graph class instance. Initialization includes: create NetworkX DiGraph, populate it with input and step nodes, and directed edges. Args: None. Returns: On failure: Raises WorkflowDAGException. """ for context in self._parsed_job_work_uri: # set default empty values for context options if context not in self._context_options: self._context_options[context] = {} # references to step classes for each context try: self._load_context_classes() except WorkflowDAGException as err: msg = 'cannot load context-specific step classes' Log.an().error(msg) raise WorkflowDAGException(str(err) + '|' + msg) from err # flatten parameters self._parameters = { param_name: param['value'] for param_name, param in self._workflow['parameters'].items() } # init DAG object with structure and empty nodes self._graph = nx.DiGraph() try: self._init_graph_structure() except WorkflowDAGException as err: msg = 'cannot initialize graph structure' Log.an().error(msg) raise WorkflowDAGException(str(err) + '|' + msg) from err # validate that graph is DAG if not nx.is_directed_acyclic_graph(self._graph): msg = 'graph contains cycles, check step dependencies' Log.an().error(msg) raise WorkflowDAGException(msg) # topological sort of graph nodes self._topo_sort = list(nx.topological_sort(self._graph)) # create URIs for each input and step for all contexts try: self._init_context_uris() except WorkflowDAGException as err: msg = 'cannot initialize context uris' Log.an().error(msg) raise WorkflowDAGException(str(err) + '|' + msg) from err # initalize input nodes try: self._init_inputs() except WorkflowDAGException as err: msg = 'cannot initialize workflow inputs' Log.an().error(msg) raise WorkflowDAGException(str(err) + '|' + msg) from err # initialize step nodes try: self._init_steps() except WorkflowDAGException as err: msg = 'cannot initialize workflow steps' Log.an().error(msg) raise WorkflowDAGException(str(err) + '|' + msg) from err
def is_ebunch_dag(ebunch): G = nx.DiGraph() G.add_edges_from(ebunch) return nx.is_directed_acyclic_graph(G)
def test_is_directed_acyclic_graph(self): G = nx.generators.complete_graph(2) assert not nx.is_directed_acyclic_graph(G) assert not nx.is_directed_acyclic_graph(G.to_directed()) assert not nx.is_directed_acyclic_graph(nx.Graph([(3, 4), (4, 5)])) assert nx.is_directed_acyclic_graph(nx.DiGraph([(3, 4), (4, 5)]))
def _generate_flatgraph(self): """Generate a graph containing only Nodes or MapNodes """ import networkx as nx logger.debug("expanding workflow: %s", self) nodes2remove = [] if not nx.is_directed_acyclic_graph(self._graph): raise Exception(("Workflow: %s is not a directed acyclic graph " "(DAG)") % self.name) nodes = list(nx.topological_sort(self._graph)) for node in nodes: logger.debug("processing node: %s", node) if isinstance(node, Workflow): nodes2remove.append(node) # use in_edges instead of in_edges_iter to allow # disconnections to take place properly. otherwise, the # edge dict is modified. # dj: added list() for networkx ver.2 for u, _, d in list( self._graph.in_edges(nbunch=node, data=True)): logger.debug("in: connections-> %s", str(d["connect"])) for cd in deepcopy(d["connect"]): logger.debug("in: %s", str(cd)) dstnode = node._get_parameter_node(cd[1], subtype="in") srcnode = u srcout = cd[0] dstin = cd[1].split(".")[-1] logger.debug("in edges: %s %s %s %s", srcnode, srcout, dstnode, dstin) self.disconnect(u, cd[0], node, cd[1]) self.connect(srcnode, srcout, dstnode, dstin) # do not use out_edges_iter for reasons stated in in_edges # dj: for ver 2 use list(out_edges) for _, v, d in list( self._graph.out_edges(nbunch=node, data=True)): logger.debug("out: connections-> %s", str(d["connect"])) for cd in deepcopy(d["connect"]): logger.debug("out: %s", str(cd)) dstnode = v if isinstance(cd[0], tuple): parameter = cd[0][0] else: parameter = cd[0] srcnode = node._get_parameter_node(parameter, subtype="out") if isinstance(cd[0], tuple): srcout = list(cd[0]) srcout[0] = parameter.split(".")[-1] srcout = tuple(srcout) else: srcout = parameter.split(".")[-1] dstin = cd[1] logger.debug("out edges: %s %s %s %s", srcnode, srcout, dstnode, dstin) self.disconnect(node, cd[0], v, cd[1]) self.connect(srcnode, srcout, dstnode, dstin) # expand the workflow node # logger.debug('expanding workflow: %s', node) node._generate_flatgraph() for innernode in node._graph.nodes(): innernode._hierarchy = ".".join( (self.name, innernode._hierarchy)) self._graph.add_nodes_from(node._graph.nodes()) self._graph.add_edges_from(node._graph.edges(data=True)) if nodes2remove: self._graph.remove_nodes_from(nodes2remove) logger.debug("finished expanding workflow: %s", self)
def isAcyclic(self): return nx.is_directed_acyclic_graph(self.G)
def is_cyclic(self): """:return : True if the graph has cycle (reentrant arcs are not considerate as cycle). """ if self.is_reentrant: return True return not nx.is_directed_acyclic_graph(self.nxg)
def isTree(self): assert NX.is_directed_acyclic_graph(self.nxDg) for node in self.nxDg.nodes(): assert len(self.nxDg.in_edges(node)) < 2
def greedy(self, threshold=None, candidateSet=None, candidateChildFrac=2., maxNumOutgroups=1): orderedPairs = [] for source, sinks in self.dm.items(): for sink, dist in sinks.items(): if source != self.root and sink != self.root: orderedPairs.append((dist, (source, sink))) orderedPairs.sort(key=lambda x: x[0]) finished = set() self.candidateMap = dict() if candidateSet is not None: assert isinstance(candidateSet, set) for candidate in candidateSet: self.candidateMap[candidate] = True htable = self.heightTable() for candidate in orderedPairs: source = candidate[1][0] sink = candidate[1][1] sourceName = self.mcTree.getName(source) sinkName = self.mcTree.getName(sink) dist = candidate[0] # skip leaves (as sources) if len(self.dag.out_edges(source)) == 0: finished.add(source) # skip nodes that were already finished in a previous run if sourceName in self.ogMap and len( self.ogMap[sourceName]) >= maxNumOutgroups: finished.add(source) # skip invalid outgroups if sink in self.invalidSet: continue # skip nodes that aren't in the candidate set (if specified) # or don't have enough candidate children if not self.inCandidateSet(sink, candidateChildFrac): continue # canditate pair exceeds given threshold, so we skip if threshold is not None and \ htable[sink] - htable[source] + 1 > threshold: continue # Don't use any outgroups that are a child of another node # already in the outgroup set if any([ self.onSamePath(x, sink) for x in self.dag.successors(source) ]): continue if source not in finished and \ not self.onSamePath(source, sink): self.dag.add_edge(source, sink, weight=dist, info='outgroup') if NX.is_directed_acyclic_graph(self.dag): htable[source] = max(htable[source], htable[sink] + 1) existingOutgroups = [i[0] for i in self.ogMap[sourceName]] if sinkName in existingOutgroups: # This outgroup was already assigned to this source in a previous run # Sanity check that the distance is equal existingOutgroupDist = dict(self.ogMap[sourceName]) assert existingOutgroupDist[sinkName] == dist continue self.ogMap[sourceName].append((sinkName, dist)) if len(self.ogMap[sourceName]) >= maxNumOutgroups: finished.add(source) else: self.dag.remove_edge(source, sink) # Since we could be adding to the ogMap instead of creating # it, sort the outgroups by distance again. Sorting the # outgroups is critical for the multiple-outgroups code to # work well. for node, outgroups in self.ogMap.items(): self.ogMap[node] = sorted(outgroups, key=lambda x: x[1])
def test_is_dag_nodes_degrees(self, num_nodes, degree): """ Tests that generated graph is dag for different numbers of nodes and degrees """ sm = generate_structure(num_nodes, degree) assert nx.is_directed_acyclic_graph(sm)
def test_random(n=50, p=0.1, runs=1000, debug=None): for run in range(runs) if debug is None else [debug]: g = fast_gnp_random_graph(n, p, seed=run + 1, directed=True) # add source connected to all nodes source = 100 * (n // 100) + 200 for v in list(g.nodes()): g.add_edge(source, v, weight=0, tokens=0) # add random weights and tokens wsum = 1 for _, _, data in g.edges_iter(data=True): data['weight'] = randrange(1, 10) data['tokens'] = randrange(-1, 8) wsum += data['weight'] # create shortest path formulation for initial tree for _, _, data in g.edges_iter(data=True): data['sp'] = data['tokens'] * wsum - data['weight'] # ensure that the graph admits a feasible solution its = 0 while True: try: its += 1 tree, distances = bfct.find_shortest_paths(g, source, arg='sp') break except NegativeCycleException as ex: toks = sum( map(lambda vw: g.get_edge_data(*vw).get('tokens'), ex.cycle)) edge_data = g.get_edge_data(*choice(ex.cycle)) edge_data['tokens'] += (1 - toks) edge_data[ 'sp'] = edge_data['tokens'] * wsum - edge_data['weight'] if debug is not None: import pdb pdb.set_trace() negative_toks = False for _, _, data in g.edges_iter(data=True): if data['tokens'] < 0: negative_toks = True break print("Run {}: negative tokens: {}, iterations: {}".format( run, negative_toks, its)) ratio, cycle = compute_mcr(g, source) assert ratio is not None, "Deadlocked cycle found" if not cycle: # verify that the graph is acyclic assert is_directed_acyclic_graph( g), "[run = {}] Graph is not acyclic".format(run) else: wsum, tsum = 0, 0 for v, w in cycle: data = g.get_edge_data(v, w) wsum += data['weight'] tsum += data['tokens'] assert Fraction( wsum, tsum ) == ratio, "[run = {}] computed MCR {} does not match ratio of critical cycle {}".format( run, ratio, Fraction(wsum, tsum)) for v, w, data in g.edges_iter(data=True): data['weight'] = data['tokens'] * ratio - data['weight'] try: bellman_ford(g, source) except Exception: print("Exception during run {}".format(run))
def test_random_dag_create_one() -> None: dag = random_dag(number_of_nodes=5, edge_density=0.4, max_in_degree=4) assert nx.is_directed_acyclic_graph(dag)
to_one_paper = [vrt_name_one_paper] * num_child # the newer paper # concatenate the lists from_all_papers = from_all_papers + from_one_paper to_all_papers = to_all_papers + to_one_paper ##### Section: Draw Graph ##### # Build a dataframe with 4 connections df = pd.DataFrame({'from': from_all_papers, 'to': to_all_papers}) # Build your graph G = nx.from_pandas_edgelist(df, 'from', 'to', create_using=nx.DiGraph()) # determine vertices' coordinate if not nx.is_directed_acyclic_graph(G): raise TypeError('Cannot to a graph that is not a DAG') vertices_sorted = list(nx.topological_sort(G)) num_vertices = len(vertices_sorted) posi = {} for i in range(num_vertices): vrt_name = vertices_sorted[i] posi_vert = -i / num_vertices posi_hori = random.random() posi[vrt_name] = np.array([posi_hori, posi_vert]) # make the vertices less dense posi_new = vertices_less_dense(posi)
def RP_RL(self, model, model_id, parameters_file): start_RL = time.perf_counter() os.chdir(rpconfig.path) # 14k m10n10 filenames_file = open(rpconfig.filename_profiles, 'r') filenames = [i.strip('\n') for i in filenames_file] train_filenames = filenames[:10000] + filenames[12000:] test_filenames = filenames[ 10000:11000] # the same 1000 profiles we used in the paper validation_filenames = filenames[11000:11500] # m10n10 # filenames = sorted(glob.glob('M10N10-*.csv')) # train_filenames = filenames[0:80000] # test_filenames = filenames[80000:100000] # m20n20 # filenames_file = open(rpconfig.filename_profiles, 'r') # filenames = [i.strip('\n') for i in filenames_file] # train_filenames = filenames[:1] #+ filenames[12000:] # test_filenames = filenames[:1] # validation_filenames = filenames # all available m20n20 # debugging # train_filenames = ['meh'] # test_filenames = ['4circle.soc'] # m50n50 # filenames = sorted(glob.glob('M50N50-*.csv')) # train_filenames = filenames[0:1] # test_filenames = filenames[0:1000] # m40n40 # filenames = sorted(glob.glob('M40N40-*.csv')) # train_filenames = filenames[0:1] # test_filenames = filenames[0:1000] # m50n50 # filenames_file = open(rpconfig.filename_profiles, 'r') # filenames = [i.strip('\n') for i in filenames_file] # train_filenames = filenames[0:1] # test_filenames = filenames[0:1] # validation_filenames = filenames # Read true winners os.chdir(rpconfig.winners_path) true_winners = [] winners_file = open("./winners_14k.txt", 'r') # winners_file = open("./winners_m20n20.txt", 'r') for line in winners_file: winners = [] line = line.replace('[', '') line = line.replace(']', '') line = line.replace(' ', '') line = line.replace('\n', '') line = line.split(',') for c in line: winners.append(int(c)) true_winners.append(winners) os.chdir(rpconfig.path) # Split true_winners into train and test true_winners_train = true_winners[:10000] + true_winners[12000:] true_winners_test = true_winners[10000:11000] true_winners_val = true_winners[11000:11100] # m20n20 # true_winners_train = true_winners[:1] # true_winners_test = true_winners[:1] # true_winners_val = true_winners # Open files for output output_filename = str(model_id) + "_RL_training_results.txt" loss_filename = str(model_id) + "_RL_loss.txt" test_output_filename = str(model_id) + "_RL_test_results.txt" test_output_summary_filename = str( model_id) + "_RL_test_summary_results.txt" validation_output_filename = str(model_id) + "_RL_val_results.txt" validation_output_summary_filename = str( model_id) + "_RL_val_summary_results.txt" output_file = open(rpconfig.results_path + output_filename, "w+") test_output_file = open(rpconfig.results_path + test_output_filename, "w+") test_output_summary_file = open( rpconfig.results_path + test_output_summary_filename, "w+") val_output_file = open( rpconfig.results_path + validation_output_filename, "w+") val_output_summary_file = open( rpconfig.results_path + validation_output_summary_filename, "w+") loss_file = open(rpconfig.results_path + loss_filename, "w+") # Create RL base if params.f_use_v2: if params.f_experience_replay: print("Experience replay not implemented for v2") sys.exit(0) else: base = RL_base_v2(len(train_filenames)) else: if params.f_use_PUT_agent and params.f_experience_replay: base = RL_base_PUT_agent_experience_replay( len(train_filenames)) elif params.f_experience_replay: base = RL_base_experience_replay(len(train_filenames)) else: base = RL_base(len(train_filenames)) # Create agent if params.f_use_v2: agent = RP_RL_agent_v2(model, base.learning_rate, loss_file) else: if params.f_use_PUT_agent and params.f_experience_replay: agent = RP_RL_agent_PUT_experience(model, base.learning_rate, loss_file) elif params.f_use_PUT_agent: agent = RP_RL_agent_PUT(model, base.learning_rate, loss_file) else: agent = RP_RL_agent(model, base.learning_rate, loss_file) total_time = 0 num_times_tested = 0 print("***********************************************") print("Starting Reinforcement Learning", model_id) # Print header header = "Inputfile\tPUT-winners\tExploration Rate\tLearning Rate\tTau\tStop Conditions\tNum Nodes\tNum Winners Found\tLoss\tAvg Loss\tIs Acyclic\tIter To Find Winner\tIters To Find All Winners\tRunning Nodes\tWinners Dist\tnum_iters_reset_skipped\tNum hashed\tRuntime" print(header) output_file.write(header + '\n') output_file.flush() loss_file.write('Num Nodes' + '\t' + 'Loss Per Node' + '\n') loss_file.flush() # Open winner distribution file if params.f_use_winners_distribution: winners_distribution_file = open( rpconfig.winners_distribution_filename, 'r') winners_distribution = {} for line in winners_distribution_file: line = line.strip('\n') line = line.split('\t') if len(line) == 1: current_file = line[0] continue if current_file not in winners_distribution: winners_distribution[current_file] = {} winners_distribution[current_file][int(line[0])] = int(line[1]) winners_distribution_file.close() # Shuffle training data if params.shuffle_training_data: combined = list(zip(train_filenames, true_winners_train)) random.shuffle(combined) train_filenames, true_winners_train = zip(*combined) # Print test output file heading if params.f_test_using_PUT_RP: test_header = "inputfile\tPUT-winners\tnum nodes\tdiscovery states\tmax discovery state\tdiscovery times\tmax discovery times\tstop condition hits\tsum stop cond hits\tnum hashes\tnum initial bridges\tnum redundant edges\ttime for cycles\truntime" elif params.f_use_PUT_agent: test_header = 'Profile\tPUT-Winners\tNum Winners\tMissed Winners\tNum Missed Winners\tNum Nodes\tNode Discovered\t100% Nodes\tRuntime Discovered\t100% Runtime\tRuntime' test_summary_header = "Test\tNum PUT-Winners Found\tTotal Num Nodes\tAvg Nodes Per Profile\tAvg 100% Nodes\tTotal Time\tAvg Time Per Profile\tAvg 100% Time\n" test_output_summary_file.write(test_summary_header) val_output_summary_file.write(test_summary_header) test_output_summary_file.flush() val_output_summary_file.flush() else: test_header = 'Profile\tPUT-Winners\tNum Winners\tMissed Winners\tNum Missed Winners\tNum Iters\tIter Discoverd\tMax Iter Discovery\tTime Discovered\tMax Time Discovery\tRuntime' test_summary_header = "Test\tNum PUT-Winners Found\tTotal Num Iterations\tAvg Iterations Per Profile\tAvg 100% Iters\tTotal Time\tAvg Time Per Profile\tAvg 100% Time\n" test_output_summary_file.write(test_summary_header) val_output_summary_file.write(test_summary_header) test_output_summary_file.flush() val_output_summary_file.flush() test_output_file.write(test_header + '\n') val_output_file.write(test_header + '\n') test_output_file.flush() val_output_file.flush() # Print additional parameters parameters_file.write("RL Data Path\t" + rpconfig.path + '\n') parameters_file.write("RL Num Training Data\t" + str(len(train_filenames)) + '\n') parameters_file.write("RL Num Testing Data\t" + str(len(test_filenames)) + '\n') parameters_file.write("RL Train From...To\t" + train_filenames[0] + "\t" + train_filenames[-1] + '\n') parameters_file.write("RL Test From...To\t" + test_filenames[0] + "\t" + test_filenames[-1] + '\n') parameters_file.write("RL Loss Function\t" + str(agent.loss_fn) + '\n') parameters_file.flush() val_results = [] if params.test_10x: print("********** testing 10x *******************") assert params.f_start_from_default or params.test_with_LP for t in range(10): test_model(test_output_file, test_output_summary_file, agent, test_filenames, true_winners_test, model_id, "final_" + str(t), False) # assert not params.f_use_testing_v2 # assert params.f_start_from_default # num_samples_range = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] # runtimes # for num_samples in num_samples_range: # params.num_test_iterations = num_samples # print(params.num_test_iterations) # start = time.perf_counter() # test_model(val_output_file, val_output_summary_file, agent, validation_filenames, true_winners_val, model_id, num_times_tested, True) # num_times_tested += 1 # return for epoch in range(params.num_epochs): i = 0 print('---------------Epoch ' + str(epoch) + '------------------------') # Shuffle training data if params.shuffle_training_data: combined = list(zip(train_filenames, true_winners_train)) random.shuffle(combined) train_filenames, true_winners_train = zip(*combined) for inputfile in train_filenames: # Test model on validation data # Not necessary since epochs added # if i % params.test_every == 0 and (params.test_at_start or i != 0): # if params.f_test_using_PUT_RP: # test_model_using_PUT_RP(test_output_file, agent, test_filenames, model_id, num_times_tested) # else: # num_iters = test_model(val_output_file, val_output_summary_file, agent, validation_filenames, true_winners_val, model_id, num_times_tested, True) # val_results.append(num_iters) # # num_times_tested += 1 if i % 10 == 0: RP_utils.save_model(model, "RL_" + str(i), model_id) profile = read_profile(inputfile) # Run the profile print(inputfile) start = time.perf_counter() if params.f_use_winners_distribution: rp_results, iter_to_find_winner, iter_to_find_all_winners = base.reinforcement_loop( agent, profile, winners_distribution=winners_distribution[inputfile]) elif params.f_train_till_find_all_winners: rp_results, iter_to_find_winner, iter_to_find_all_winners = base.reinforcement_loop( agent, profile, true_winners=set(true_winners_train[i])) else: rp_results, iter_to_find_winner, iter_to_find_all_winners = base.reinforcement_loop( agent, profile, true_winners=set(true_winners_train[i]), filename=inputfile) end = time.perf_counter() # Evaluate and output results PUT_winners = sorted(rp_results.known_winners) stats = agent.stats total_time += (end - start) if stats.num_nodes == 0: avg_loss_per_node = 0 else: avg_loss_per_node = stats.running_loss / stats.num_nodes is_acyclic = str(nx.is_directed_acyclic_graph(agent.E_0)) if params.f_use_winners_distribution: output_winners_distribution = winners_distribution[ inputfile] else: output_winners_distribution = {} result_text = "%s\t%r\t%f\t%f\t%f\t%r\t%d\t%d\t%f\t%f\t%s\t%r\t%d\t%d\t%r\t%d\t%d\t%f" % \ (inputfile, PUT_winners, base.exploration_rate, base.learning_rate, base.tau, stats.stop_condition_hits, stats.num_nodes, len(PUT_winners), stats.running_loss, avg_loss_per_node, is_acyclic, iter_to_find_winner, iter_to_find_all_winners, agent.running_nodes, output_winners_distribution, stats.num_iters_reset_skipped, agent.stats.num_hashed, end - start) print(i, result_text) output_file.write(result_text + '\n') output_file.flush() i += 1 # Test on validation data after each epoch if params.f_test_using_PUT_RP: test_model_using_PUT_RP(test_output_file, agent, test_filenames, model_id, num_times_tested) else: num_iters = test_model(val_output_file, val_output_summary_file, agent, validation_filenames, true_winners_val, model_id, num_times_tested, True) val_results.append(num_iters) num_times_tested += 1 print( '----------------------Training Done------------------------------' ) print("Validation results:", val_results) best_model = np.argmin(val_results) print("Best model:", best_model) # Use best model from validation testing to test 10x on test set RP_utils.load_model( model, rpconfig.results_path + str(model_id) + "_RL_val_" + str(best_model) + "_model.pth.tar") # Create agent if params.f_use_v2: agent_testing = RP_RL_agent_v2(model, base.learning_rate) else: agent_testing = RP_RL_agent(model, base.learning_rate) for t in range(10): test_model(test_output_file, test_output_summary_file, agent_testing, test_filenames, true_winners_test, model_id, "final_" + str(t), False) print("Total Time to Train: %f" % total_time) print("Average Time Per Profile: %f" % (total_time / len(train_filenames))) print("Total RL Runtime: %f" % (time.perf_counter() - start_RL)) # Close files output_file.close() test_output_file.close() test_output_summary_file.close() val_output_file.close() val_output_summary_file.close() loss_file.close()
def is_directed_acyclic(self): """Returns if this graph is a DAG or not.""" return nx.is_directed_acyclic_graph(self)
def parse_obo_file_and_build_dags(obo_file, forced=False): """ Parse the GO OBO into a networkx MultiDiGraph using obonet. Then construct a DAG for each category using the 'is_a' relationships *forced*: this function will store the dags as an edgelist for faster parsing If forced is true, it will overwrite those *returns*: a dictionary containing a DAG for each of the 3 GO categories 'C', 'F', and 'P' """ global id_to_name, name_to_id, goid_to_category dag_edgelist_file = obo_file.replace(".obo", "-isa-edgelist.txt") goid_names_file = obo_file.replace(".obo", "-names.txt") if not forced and os.path.isfile(dag_edgelist_file) and os.path.isfile( goid_names_file): print("Reading GO dags from %s" % (dag_edgelist_file)) go_dags = {} for c in ['C', 'F', 'P']: go_dags[c] = nx.DiGraph() with open(dag_edgelist_file, 'r') as f: for line in f: if line[0] == '#': continue g1, g2, c = line.rstrip().split('\t')[:3] go_dags[c].add_edge(g1, g2) for c, dag in go_dags.items(): print("\tDAG for %s has %d nodes, %d edges" % (c, dag.number_of_nodes(), dag.number_of_edges())) # also set the category for each GO term for n in dag.nodes(): goid_to_category[n] = c with open(goid_names_file, 'r') as f: for line in f: if line[0] == '#': continue goid, name, c = line.rstrip().split('\t')[:3] name_to_id[name] = goid id_to_name[goid] = name else: print("Reading GO OBO file from %s" % (obo_file)) # obonet returns a networkx MultiDiGraph object containing all of the relationships in the ontology graph = obonet.read_obo(obo_file) # build a mapping from the GO term IDs to the name of the GO term id_to_name = { id_: data['name'] for id_, data in graph.nodes(data=True) } name_to_id = { data['name']: id_ for id_, data in graph.nodes(data=True) } print("\t%d nodes, %d edges" % (graph.number_of_nodes(), graph.number_of_edges())) # make sure this really is a DAG if nx.is_directed_acyclic_graph(graph) is False: print("\tWarning: graph is not a dag") # copied this section from cell 19 of https://github.com/IGACAT/DataPreprocessing/blob/master/scripts/populate_go_terms.ipynb # Extract all edges with "is_a" relationship. # I did not include "part_of" relationships because the molecular_function and biological_process DAGs are not separate from each other if I do is_a_edge_list = [] for child, parent, key in graph.out_edges(keys=True): if key == 'is_a': is_a_edge_list.append((child, parent)) # get a is_a-type edge-induced subgraph is_a_subG = nx.MultiDiGraph(is_a_edge_list) full_to_category = { 'cellular_component': 'C', 'biological_process': 'P', 'molecular_function': 'F' } go_dags = {} # there are 3 weakly_connected_components. One for each category for wcc in nx.weakly_connected_components(is_a_subG): G = is_a_subG.subgraph(wcc) # store this DAG in the dictionary of GO DAGs # find the root node root_node = None # find root_node (no out_edge) for node in G.nodes(): if G.out_degree(node) == 0: root_node = node #print(root_node, id_to_name[node]) break c = full_to_category[id_to_name[root_node]] print("\tDAG for %s has %d nodes" % (id_to_name[root_node], len(wcc))) go_dags[c] = G # also set the category for each GO term for n in G.nodes(): goid_to_category[n] = c print("\twriting dags to %s" % (dag_edgelist_file)) with open(dag_edgelist_file, 'w') as out: out.write("#child\tparent\thierarchy\n") for c, dag in go_dags.items(): out.write(''.join("%s\t%s\t%s\n" % (g1, g2, c) for g1, g2 in dag.edges())) # also write the names to a file print("\twriting goid names to %s" % (goid_names_file)) with open(goid_names_file, 'w') as out: for goid in id_to_name: out.write("%s\t%s\t%s\n" % (goid, id_to_name[goid], goid_to_category[goid])) return go_dags
def get_graph_properties(edges): # Set up graph connections = np.array([int(x) for x in edges.split(';')]) nodes = sorted(list(set(connections))) # Calculate Properties properties = [] timings = {} if connections[0] > 0: edges = connections.reshape(int(connections.size / 2), 2) timeS = time.time() # directed graph G = nx.DiGraph() G.add_edges_from(edges) # undirected graph U = nx.Graph() U.add_edges_from(edges) # graph generated # property 1: number of components num_comp = nx.number_connected_components(U) properties.append(num_comp) # property 2: number of strongly connected components num_strong_comp = nx.number_strongly_connected_components(G) properties.append(num_strong_comp) # property 3: average in/out degree indeg = [] outdeg = [] indeg_ls = list(G.in_degree()) outdeg_ls = list(G.out_degree()) for x in np.arange(len(nodes)): indeg.append(indeg_ls[x][1]) outdeg.append(outdeg_ls[x][1]) av_deg = np.mean(indeg) properties.append(av_deg) # property 4: link density linkden = connections.size / (len(nodes) * len(nodes)) properties.append(linkden) # property 5: number of self loops numloop = list(G.selfloop_edges()) numloop = len(numloop) properties.append(numloop) # # property 6: number of simple cycles (excluding self loops) # numcyc = list(nx.simple_cycles(G)) # numcyc = len(numcyc) - numloop # properties.append(numcyc) # timings.update({'p6':time.time()-timeS}) # print('p6') # print(timings['p6']) # timeS = time.time() # find all components components = list(nx.connected_components(U)) ischain = [None] * len(components) istree = [None] * len(components) isdag = [None] * len(components) unicel = [None] * len(components) isscc = [None] * len(components) iscyc = [None] * len(components) iseul = [None] * len(components) indeg_by_comp = [] outdeg_by_comp = [] node_conn = [0] * len(components) av_clust = [0.] * len(components) assort = [0.] * len(components) indeg_cen_av = [0.] * len(components) indeg_cen_max = [0.] * len(components) indeg_cen_min = [0.] * len(components) outdeg_cen_av = [0.] * len(components) outdeg_cen_max = [0.] * len(components) outdeg_cen_min = [0.] * len(components) bet_cen_av = [0.] * len(components) bet_cen_max = [0.] * len(components) bet_cen_min = [0.] * len(components) eig_cen_av = [0.] * len(components) eig_cen_max = [0.] * len(components) eig_cen_min = [0.] * len(components) triangles_av = [0.] * len(components) triangles_max = [0.] * len(components) triangles_min = [0.] * len(components) squares_av = [0.] * len(components) squares_max = [0.] * len(components) squares_min = [0.] * len(components) transitivity = [0.] * len(components) rc = [0.] * len(components) loopnumber = [0] * len(components) for compnum in np.arange(len(components)): # property 6: ischain?(remove self-loops and then test this property) # want: how many chains does the graph contain.. look at each component, not the whole graph in one go. # most graphs are single components. G1 = G.subgraph(list(components[compnum])) Gnoself = G1.copy() Gnoself.remove_edges_from(Gnoself.selfloop_edges()) Unoself = nx.Graph() Unoself.add_edges_from(Gnoself.edges) # if all in and out degrees are 1, graph is a chain..do not include in trees indeg2 = [] outdeg2 = [] indeg_ls2 = list(Gnoself.in_degree()) outdeg_ls2 = list(Gnoself.out_degree()) # nx gives indeg and outdeg as tuples (nodename, in/out deg). which is why i need the for loop below for x in np.arange(len(G1.nodes())): indeg2.append(indeg_ls2[x][1]) outdeg2.append(outdeg_ls2[x][1]) indeg_by_comp.append(int_arr_to_str(indeg2, delim=';')) outdeg_by_comp.append(int_arr_to_str(outdeg2, delim=';')) indeg2 = np.array(indeg2) outdeg2 = np.array(outdeg2) in_min_out = indeg2 - outdeg2 ischain[compnum] = int((np.sum(in_min_out) == 0) & (np.sum(np.abs(in_min_out)) == 2) & (np.all(indeg2 <= 1)) & (np.all(outdeg2 <= 1))) # property 7: istree(remove chains first) istree[compnum] = int((nx.is_tree(Gnoself) - ischain[compnum]) > 0) # property 8: isdag(only looking at DAGs other than trees and chains) isdag[compnum] = int((int(nx.is_directed_acyclic_graph(Gnoself)) - istree[compnum] - ischain[compnum]) > 0) if isdag[compnum] > 0: loopnumber[compnum] = len(list( Gnoself.edges)) - (len(list(Gnoself.nodes)) - 1) # property 9: single celled unicel[compnum] = int(len(Gnoself.nodes) == 1) istree[compnum] = int(istree[compnum]) - int( unicel[compnum] ) # nx counts single node with no self-edge as a tree # property 10: isscc (excluding unicellular) num_strong_comp2 = nx.number_strongly_connected_components(Gnoself) isscc[compnum] = int(num_strong_comp2 == 1) isscc[compnum] = int((isscc[compnum] - unicel[compnum]) > 0) # property 11: iscyc(cyclic graphs other than those with a single scc and single celled graphs) iscyc[compnum] = int((isdag[compnum] + istree[compnum] + ischain[compnum] + isscc[compnum] + unicel[compnum]) == 0) # property 12: is eulerian iseul[compnum] = int(nx.is_eulerian(Gnoself)) # property 13: node connectivity node_conn[compnum] = approx.node_connectivity(Gnoself) # property 14: clustering coefficient av_clust[compnum] = nx.average_clustering(Gnoself) # property 15: assortativity(pearson's coefficient) try: assort[compnum] = nx.degree_pearson_correlation_coefficient( Gnoself) #####################check except: assort[compnum] = 0.0 # property 16,17,18: in degree centrality (average, maximum and minimum) indeg_cen = [] dict1 = nx.in_degree_centrality(Gnoself) for a1 in dict1: indeg_cen.append(dict1[a1]) indeg_cen_av[compnum] = np.average(indeg_cen) indeg_cen_max[compnum] = max(indeg_cen) indeg_cen_min[compnum] = min(indeg_cen) # property 19,20,21: out degree centrality (average, maximum, minimum) outdeg_cen = [] dict1 = nx.out_degree_centrality(Gnoself) for a1 in dict1: outdeg_cen.append(dict1[a1]) outdeg_cen_av[compnum] = np.average(outdeg_cen) outdeg_cen_max[compnum] = max(outdeg_cen) outdeg_cen_min[compnum] = min(outdeg_cen) # property 22,23,24: betweenness centrality (average,maximum, minimum) bet_cen = [] dict1 = nx.betweenness_centrality(Gnoself) for a1 in dict1: bet_cen.append(dict1[a1]) bet_cen_av[compnum] = np.average(bet_cen) bet_cen_max[compnum] = max(bet_cen) bet_cen_min[compnum] = min(bet_cen) # property 25,26,27: eigen vector centrality (average,maximum, minimum) eig_cen = [] try: dict1 = nx.eigenvector_centrality(Gnoself) for a1 in dict1: eig_cen.append(dict1[a1]) eig_cen_av[compnum] = np.average(eig_cen) eig_cen_max[compnum] = max(eig_cen) eig_cen_min[compnum] = min(eig_cen) except nx.PowerIterationFailedConvergence: pass # property 28,29,30: number of triangles for each node (average,maximum, minimum) triangles = [] dict1 = nx.triangles(Unoself) for a1 in dict1: triangles.append(dict1[a1]) if len(triangles): triangles_av[compnum] = np.average(triangles) triangles_max[compnum] = max(triangles) triangles_min[compnum] = min(triangles) # property 31: transitivity (fraction of all possible triangles present in the graph) transitivity[compnum] = nx.transitivity(Gnoself) # property 32,33,34: square clustering for each node(fraction of all possible squares present at a node) squares = [] dict1 = nx.square_clustering(Gnoself) for a1 in dict1: squares.append(dict1[a1]) if len(squares): squares_av[compnum] = np.average(squares) squares_max[compnum] = max(squares) squares_min[compnum] = min(squares) # propery 35: rich club coefficient if len(list(Unoself.nodes())) > 3: rc[compnum] = 0.0 # rc[compnum] = nx.rich_club_coefficient(Unoself).values()# only works if graph has 4 or more edges # property 36 and 37: number of source and target nodes iseul = sum(iseul) iscyc = sum(iscyc) isscc = sum(isscc) unicel = sum(unicel) isdag = sum(isdag) istree = sum(istree) ischain = sum(ischain) indeg_by_comp = ';'.join([str(x) for x in indeg_by_comp]) outdeg_by_comp = ';'.join([str(x) for x in outdeg_by_comp]) node_conn = ';'.join([str(x) for x in node_conn ]) # node connectivity for each component avav_clust = np.average( av_clust) # average clustering coefficient over all components av_clust = ';'.join([ str(round(x, 2)) for x in av_clust ]) # average clustering coefficients for each component av_assort = np.average( assort) # average assortativity over all components assort = ';'.join([str(round(x, 2)) for x in assort ]) # assortativity for each component indeg_cen_avav = np.average( indeg_cen_av) # average indeg centrality over all components indeg_cen_av = ';'.join([ str(round(x, 2)) for x in indeg_cen_av ]) # average indeg centrality for each component indeg_cen_maxmax = max( indeg_cen_max) # maximum indeg centrality across all components indeg_cen_max = ';'.join([ str(round(x, 2)) for x in indeg_cen_max ]) # maximum indeg centrality for each component indeg_cen_minmin = min( indeg_cen_min) # minimum indeg centrality across all components indeg_cen_min = ';'.join([ str(round(x, 2)) for x in indeg_cen_min ]) # minimum indeg centrality for each component outdeg_cen_avav = np.average(outdeg_cen_av) outdeg_cen_av = ';'.join([str(round(x, 2)) for x in outdeg_cen_av]) outdeg_cen_maxmax = max(outdeg_cen_max) outdeg_cen_max = ';'.join([str(round(x, 2)) for x in outdeg_cen_max]) outdeg_cen_minmin = min(outdeg_cen_min) outdeg_cen_min = ';'.join([str(round(x, 2)) for x in outdeg_cen_min]) bet_cen_avav = np.average(bet_cen_av) bet_cen_av = ';'.join([str(round(x, 2)) for x in bet_cen_av]) bet_cen_maxmax = max(bet_cen_max) bet_cen_max = ';'.join([str(round(x, 2)) for x in bet_cen_max]) bet_cen_minmin = min(bet_cen_min) bet_cen_min = ';'.join([str(round(x, 2)) for x in bet_cen_min]) eig_cen_avav = np.average(eig_cen_av) eig_cen_av = ';'.join([str(round(x, 2)) for x in eig_cen_av]) eig_cen_maxmax = max(eig_cen_max) eig_cen_max = ';'.join([str(round(x, 2)) for x in eig_cen_max]) eig_cen_minmin = min(eig_cen_min) eig_cen_min = ';'.join([str(round(x, 2)) for x in eig_cen_min]) triangles_avav = np.average(triangles_av) triangles_av = ';'.join([str(x) for x in triangles_av]) triangles_maxmax = max(triangles_max) triangles_max = ';'.join([str(x) for x in triangles_max]) triangles_minmin = min(triangles_min) triangles_min = ';'.join([str(x) for x in triangles_min]) transitivity_av = np.average(transitivity) transitivity_max = max(transitivity) transitivity_min = min(transitivity) transitivity = ';'.join([str(x) for x in transitivity]) squares_avav = np.average(squares_av) squares_maxmax = max(squares_max) squares_minmin = min(squares_min) squares_av = ';'.join([str(x) for x in squares_av]) squares_max = ';'.join([str(x) for x in squares_max]) squares_min = ';'.join([str(x) for x in squares_min]) rc_av = np.average(rc) rc_max = max(rc) rc_min = min(rc) rc = ';'.join([str(x) for x in rc]) ln = [loopnumber[x] for x in np.nonzero(loopnumber)[0]] if any(ln): loopnumber_av = np.average(ln) else: loopnumber_av = 0.0 loopnumber = ';'.join([str(x) for x in loopnumber]) # check.. sum of iscyc, isscc, unicel, dag,tree, chain should be the total number of components if num_comp != (iscyc + isscc + unicel + isdag + istree + ischain): print('Number of components is wrong!!!!!!') print(num_comp) print([iscyc, isscc, unicel, isdag, istree, ischain]) sys.exit() properties.append(indeg_by_comp) # string properties.append(outdeg_by_comp) #string properties.append(ischain) #int properties.append(istree) #int properties.append(isdag) #int properties.append(unicel) #int properties.append(isscc) #int properties.append(iscyc) #int properties.append(iseul) #int properties.append(loopnumber_av) #float properties.append(loopnumber) #string properties.append(node_conn) #string properties.append(avav_clust) #float properties.append(av_clust) #string properties.append(av_assort) #float properties.append(assort) #string properties.append(indeg_cen_avav) #float properties.append(indeg_cen_av) #string properties.append(indeg_cen_maxmax) #float properties.append(indeg_cen_max) #string properties.append(indeg_cen_minmin) #float properties.append(indeg_cen_min) #string properties.append(outdeg_cen_avav) #float properties.append(outdeg_cen_av) #string properties.append(outdeg_cen_maxmax) #float properties.append(outdeg_cen_max) #string properties.append(outdeg_cen_minmin) #float properties.append(outdeg_cen_min) #string properties.append(bet_cen_avav) #float properties.append(bet_cen_av) #string properties.append(bet_cen_maxmax) #float properties.append(bet_cen_max) #string properties.append(bet_cen_minmin) #float properties.append(bet_cen_min) #string properties.append(eig_cen_avav) #float properties.append(eig_cen_av) #string properties.append(eig_cen_maxmax) #float properties.append(eig_cen_max) #string properties.append(eig_cen_minmin) #float properties.append(eig_cen_min) #string properties.append(triangles_avav) #float properties.append(triangles_av) #string properties.append(triangles_maxmax) #float properties.append(triangles_max) #string properties.append(triangles_minmin) #float properties.append(triangles_min) #string properties.append(transitivity_av) # float properties.append(transitivity_max) #float properties.append(transitivity_min) #float properties.append(transitivity) #string properties.append(squares_avav) #float properties.append(squares_av) #string properties.append(squares_maxmax) #float properties.append(squares_max) #string properties.append(squares_minmin) #float properties.append(squares_min) #string properties.append(rc_av) # float properties.append(rc_max) #float properties.append(rc_min) #float properties.append(rc) #string # append more properties..... # property 14: # property x: in-degree sequence #indeg = # list(G.in_degree())[iterate over number of nodes][1] # property y: out-degree sequence #outdeg = # list(G.in_degree())[iterate over number of nodes][1] #..... else: properties = [0] * 2 + [0.] * 2 + [0] + [''] * 2 + [0] * 7 + [ 0. ] + [''] * 2 + [0., ''] * 17 + [0.] * 3 + [''] + [0., ''] * 3 + [ 0., 0., 0., '' ] # return list of properties return properties
def is_feasible(self): import networkx as nx G = nx.DiGraph() edg = tuple(set(unfold(self.A)) | set(unfold(self.E))) G.add_edges_from(edg) return nx.is_directed_acyclic_graph(G)
def find_topological_order(directory, target=None): graph = nx.DiGraph() # First, walk the installers and find real providers for root, _, files in os.walk(directory): if INSTALLER in files: name = os.path.basename(root) graph.add_node(name, transitive=False) # Second, find all dependees and dependers for root, _, files in os.walk(directory): if INSTALLER in files: name = os.path.basename(root) dependencies, satisfies = read_dependencies( os.path.join(root, INSTALLER)) for dependence in dependencies: # If by now the dependence does not have a node it does not have a real # provider, so we assume it is transitive, i.d. provided by something # with different name if not graph.has_node(dependence): graph.add_node(dependence, transitive=True) # Set edge from dependee to its provider add_edge = functools.partial(lambda a, b: graph.add_edge(b, a), name) list(map(add_edge, dependencies)) for sat in satisfies: # If there is something that tries to satisfy already satisfied # dependency we consider this an error if graph.has_node(sat) and len(list(graph.predecessors(sat))): print(("{} tries to satisfy already existing installer {}". format(name, sat))) return False, None graph.add_node(sat, transitive=True) # Set edge from transitive provider to its real provider add_edge = functools.partial(lambda a, b: graph.add_edge(a, b), name) list(map(add_edge, satisfies)) # print graph.edges() # sys.exit(0) # Not all dependencies are provided by installers of the same name. By # collapsing the graph on these 'satisfying' dependencies we point a dependee # to a right installer. nodes_to_remove = list() for node, transitive in graph.nodes(data='transitive'): if not transitive: continue dependees = list(graph.successors(node)) providers = list(graph.predecessors(node)) assert len( providers ) == 1, 'Must be exactly one provider, node: {}, dependees: {}, providers: {}'.format( node, dependees, providers) # Remove transitive node with all its edges nodes_to_remove.append(node) # Reconnect the graph add_edge = functools.partial(graph.add_edge, providers[0]) list(map(add_edge, dependees)) for node in nodes_to_remove: graph.remove_node(node) if not nx.is_directed_acyclic_graph(graph): print(("Found dependency cycle: {}".format(nx.find_cycle(graph)))) return False, None if target: closure = set([target]) while True: new = closure | set( sum(list(map(list, list(map(graph.predecessors, closure)))), [])) if closure == new: break closure = new return True, list(nx.topological_sort(graph.subgraph(closure))) return True, list(nx.topological_sort(graph))
def __init__(self, G: nx.DiGraph): assert (nx.is_directed_acyclic_graph(G)), f"{G.edges()} not DAG" self.dag = G
## deepgo/data/train # work_dir = '/u/flashscratch/d/datduong/goAndGeneAnnotationDec2018/' # work_dir = '/u/flashscratch/d/datduong/goAndGeneAnnotation/' work_dir = '/u/flashscratch/d/datduong/deepgo/data/' os.chdir(work_dir) # Read the taxrank ontology graph = obonet.read_obo('go.obo') # https://github.com/dhimmel/obonet len(graph) # Number of nodes graph.number_of_edges() # Number of edges networkx.is_directed_acyclic_graph(graph) # Check if the ontology is a DAG # Mapping from term ID to name id_to_name = { id_: data.get('name') for id_, data in graph.nodes(data=True) if 'OBSOLETE' not in data.get('def') } ## by default obsolete already removed # id_to_name['GO:0000002'] go_name_array_obo = list(id_to_name.keys()) go_name_array_obo.sort() # go_name_array_obo = [re.sub(r"GO:","",g) for g in go_name_array_obo] pd.DataFrame(go_name_array_obo).to_csv("go_name_in_obo.csv", header=None,
def test_generate_random_dag(self): self.assertTrue( nx.is_directed_acyclic_graph(generate_random_dag(10, 0.5)))
def d_separated(G: nx.DiGraph, x: AbstractSet, y: AbstractSet, z: AbstractSet) -> bool: """ Return whether node sets ``x`` and ``y`` are d-separated by ``z``. Parameters ---------- G : graph A NetworkX DAG. x : set First set of nodes in ``G``. y : set Second set of nodes in ``G``. z : set Set of conditioning nodes in ``G``. Can be empty set. Returns ------- b : bool A boolean that is true if ``x`` is d-separated from ``y`` given ``z`` in ``G``. Raises ------ NetworkXError The *d-separation* test is commonly used with directed graphical models which are acyclic. Accordingly, the algorithm raises a :exc:`NetworkXError` if the input graph is not a DAG. NodeNotFound If any of the input nodes are not found in the graph, a :exc:`NodeNotFound` exception is raised. """ if not nx.is_directed_acyclic_graph(G): raise nx.NetworkXError("graph should be directed acyclic") union_xyz = x.union(y).union(z) if any(n not in G.nodes for n in union_xyz): raise nx.NodeNotFound( "one or more specified nodes not found in the graph") G_copy = G.copy() # transform the graph by removing leaves that are not in x | y | z # until no more leaves can be removed. leaves = deque([n for n in G_copy.nodes if G_copy.out_degree[n] == 0]) while len(leaves) > 0: leaf = leaves.popleft() if leaf not in union_xyz: for p in G_copy.predecessors(leaf): if G_copy.out_degree[p] == 1: leaves.append(p) G_copy.remove_node(leaf) # transform the graph by removing outgoing edges from the # conditioning set. edges_to_remove = list(G_copy.out_edges(z)) G_copy.remove_edges_from(edges_to_remove) # use disjoint-set data structure to check if any node in `x` # occurs in the same weakly connected component as a node in `y`. disjoint_set = UnionFind(G_copy.nodes()) for component in nx.weakly_connected_components(G_copy): disjoint_set.union(*component) disjoint_set.union(*x) disjoint_set.union(*y) if x and y and disjoint_set[next(iter(x))] == disjoint_set[next(iter(y))]: return False else: return True
def check_cycle(self): if not nx.is_directed_acyclic_graph(self.graph): raise Exception("attempt to add a cyclic")
def all_pairs_lowest_common_ancestor(G, pairs=None): """Compute the lowest common ancestor for pairs of nodes. Parameters ---------- G : NetworkX directed graph pairs : iterable of pairs of nodes, optional (default: all pairs) The pairs of nodes of interest. If None, will find the LCA of all pairs of nodes. Returns ------- An iterator over ((node1, node2), lca) where (node1, node2) are the pairs specified and lca is a lowest common ancestor of the pair. Note that for the default of all pairs in G, we consider unordered pairs, e.g. you will not get both (b, a) and (a, b). Notes ----- Only defined on non-null directed acyclic graphs. Uses the $O(n^3)$ ancestor-list algorithm from: M. A. Bender, M. Farach-Colton, G. Pemmasani, S. Skiena, P. Sumazin. "Lowest common ancestors in trees and directed acyclic graphs." Journal of Algorithms, 57(2): 75-94, 2005. See Also -------- tree_all_pairs_lowest_common_ancestor lowest_common_ancestor """ if not nx.is_directed_acyclic_graph(G): raise nx.NetworkXError("LCA only defined on directed acyclic graphs.") elif len(G) == 0: raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") elif None in G: raise nx.NetworkXError("None is not a valid node.") # The copy isn't ideal, neither is the switch-on-type, but without it users # passing an iterable will encounter confusing errors, and itertools.tee # does not appear to handle builtin types efficiently (IE, it materializes # another buffer rather than just creating listoperators at the same # offset). The Python documentation notes use of tee is unadvised when one # is consumed before the other. # # This will always produce correct results and avoid unnecessary # copies in many common cases. # if (not isinstance(pairs, (Mapping, Set)) and pairs is not None): pairs = set(pairs) # Convert G into a dag with a single root by adding a node with edges to # all sources iff necessary. sources = [n for n, deg in G.in_degree if deg == 0] if len(sources) == 1: root = sources[0] super_root = None else: G = G.copy() super_root = root = generate_unique_node() for source in sources: G.add_edge(root, source) # Start by computing a spanning tree, and the DAG of all edges not in it. # We will then use the tree lca algorithm on the spanning tree, and use # the DAG to figure out the set of tree queries necessary. spanning_tree = nx.dfs_tree(G, root) dag = nx.DiGraph((u, v) for u, v in G.edges if u not in spanning_tree or v not in spanning_tree[u]) # Ensure that both the dag and the spanning tree contains all nodes in G, # even nodes that are disconnected in the dag. spanning_tree.add_nodes_from(G) dag.add_nodes_from(G) counter = count() # Necessary to handle graphs consisting of a single node and no edges. root_distance = {root: next(counter)} for edge in nx.bfs_edges(spanning_tree, root): for node in edge: if node not in root_distance: root_distance[node] = next(counter) # Index the position of all nodes in the Euler tour so we can efficiently # sort lists and merge in tour order. euler_tour_pos = {} for node in nx.depth_first_search.dfs_preorder_nodes(G, root): if node not in euler_tour_pos: euler_tour_pos[node] = next(counter) # Generate the set of all nodes of interest in the pairs. pairset = set() if pairs is not None: pairset = set(chain.from_iterable(pairs)) for n in pairset: if n not in G: msg = "The node %s is not in the digraph." % str(n) raise nx.NodeNotFound(msg) # Generate the transitive closure over the dag (not G) of all nodes, and # sort each node's closure set by order of first appearance in the Euler # tour. ancestors = {} for v in dag: if pairs is None or v in pairset: my_ancestors = nx.dag.ancestors(dag, v) my_ancestors.add(v) ancestors[v] = sorted(my_ancestors, key=euler_tour_pos.get) def _compute_dag_lca_from_tree_values(tree_lca, dry_run): """Iterate through the in-order merge for each pair of interest. We do this to answer the user's query, but it is also used to avoid generating unnecessary tree entries when the user only needs some pairs. """ for (node1, node2) in pairs if pairs is not None else tree_lca: best_root_distance = None best = None indices = [0, 0] ancestors_by_index = [ancestors[node1], ancestors[node2]] def get_next_in_merged_lists(indices): """Returns index of the list containing the next item Next order refers to the merged order. Index can be 0 or 1 (or None if exhausted). """ index1, index2 = indices if (index1 >= len(ancestors[node1]) and index2 >= len(ancestors[node2])): return None elif index1 >= len(ancestors[node1]): return 1 elif index2 >= len(ancestors[node2]): return 0 elif (euler_tour_pos[ancestors[node1][index1]] < euler_tour_pos[ancestors[node2][index2]]): return 0 else: return 1 # Find the LCA by iterating through the in-order merge of the two # nodes of interests' ancestor sets. In principle, we need to # consider all pairs in the Cartesian product of the ancestor sets, # but by the restricted min range query reduction we are guaranteed # that one of the pairs of interest is adjacent in the merged list # iff one came from each list. i = get_next_in_merged_lists(indices) cur = ancestors_by_index[i][indices[i]], i while i is not None: prev = cur indices[i] += 1 i = get_next_in_merged_lists(indices) if i is not None: cur = ancestors_by_index[i][indices[i]], i # Two adjacent entries must not be from the same list # in order for their tree LCA to be considered. if cur[1] != prev[1]: tree_node1, tree_node2 = prev[0], cur[0] if (tree_node1, tree_node2) in tree_lca: ans = tree_lca[tree_node1, tree_node2] else: ans = tree_lca[tree_node2, tree_node1] if not dry_run and (best is None or root_distance[ans] > best_root_distance): best_root_distance = root_distance[ans] best = ans # If the LCA is super_root, there is no LCA in the user's graph. if not dry_run and (super_root is None or best != super_root): yield (node1, node2), best # Generate the spanning tree lca for all pairs. This doesn't make sense to # do incrementally since we are using a linear time offline algorithm for # tree lca. if pairs is None: # We want all pairs so we'll need the entire tree. tree_lca = dict( tree_all_pairs_lowest_common_ancestor(spanning_tree, root)) else: # We only need the merged adjacent pairs by seeing which queries the # algorithm needs then generating them in a single pass. tree_lca = defaultdict(int) for _ in _compute_dag_lca_from_tree_values(tree_lca, True): pass # Replace the bogus default tree values with the real ones. for (pair, lca) in tree_all_pairs_lowest_common_ancestor( spanning_tree, root, tree_lca): tree_lca[pair] = lca # All precomputations complete. Now we just need to give the user the pairs # they asked for, or all pairs if they want them all. return _compute_dag_lca_from_tree_values(tree_lca, False)
def raise_if_dagcircuit_invalid(dag): """Validates the internal consistency of a DAGCircuit._multi_graph. Intended for use in testing. Raises: DAGCircuitError: if DAGCircuit._multi_graph is inconsistent. """ multi_graph = dag._multi_graph if dag._USE_RX: if not rx.is_directed_acyclic_graph(multi_graph): raise DAGCircuitError('multi_graph is not a DAG.') else: if not nx.is_directed_acyclic_graph(multi_graph): raise DAGCircuitError('multi_graph is not a DAG.') # Every node should be of type in, out, or op. # All input/output nodes should be present in input_map/output_map. for node in dag._get_multi_graph_nodes(): if node.type == 'in': assert node is dag.input_map[node.wire] elif node.type == 'out': assert node is dag.output_map[node.wire] elif node.type == 'op': continue else: raise DAGCircuitError('Found node of unexpected type: {}'.format( node.type)) # Shape of node.op should match shape of node. for node in dag.op_nodes(): assert len(node.qargs) == node.op.num_qubits assert len(node.cargs) == node.op.num_clbits # Every edge should be labled with a known wire. edges_outside_wires = [edge_data['wire'] for source, dest, edge_data in dag._get_multi_graph_edges() if edge_data['wire'] not in dag.wires] if edges_outside_wires: raise DAGCircuitError('multi_graph contains one or more edges ({}) ' 'not found in DAGCircuit.wires ({}).'.format(edges_outside_wires, dag.wires)) # Every wire should have exactly one input node and one output node. for wire in dag.wires: in_node = dag.input_map[wire] out_node = dag.output_map[wire] assert in_node.wire == wire assert out_node.wire == wire assert in_node.type == 'in' assert out_node.type == 'out' # Every wire should be propagated by exactly one edge between nodes. for wire in dag.wires: cur_node_id = dag.input_map[wire]._node_id out_node_id = dag.output_map[wire]._node_id while cur_node_id != out_node_id: out_edges = dag._get_multi_graph_out_edges(cur_node_id) edges_to_follow = [(src, dest, data) for (src, dest, data) in out_edges if data['wire'] == wire] assert len(edges_to_follow) == 1 cur_node_id = edges_to_follow[0][1] # Wires can only terminate at input/output nodes. for op_node in dag.op_nodes(): assert multi_graph.in_degree(op_node._node_id) == multi_graph.out_degree(op_node._node_id) # Node input/output edges should match node qarg/carg/condition. for node in dag.op_nodes(): in_edges = dag._get_multi_graph_in_edges(node._node_id) out_edges = dag._get_multi_graph_out_edges(node._node_id) in_wires = {data['wire'] for src, dest, data in in_edges} out_wires = {data['wire'] for src, dest, data in out_edges} node_cond_bits = set(node.condition[0][:] if node.condition is not None else []) node_qubits = set(node.qargs) node_clbits = set(node.cargs) all_bits = node_qubits | node_clbits | node_cond_bits assert in_wires == all_bits, 'In-edge wires {} != node bits {}'.format( in_wires, all_bits) assert out_wires == all_bits, 'Out-edge wires {} != node bits {}'.format( out_wires, all_bits)
def _legal_operations(self, model, tabu_list=[], max_indegree=None, black_list=None, white_list=None): """Generates a list of legal (= not in tabu_list) graph modifications for a given model, together with their score changes. Possible graph modifications: (1) add, (2) remove, or (3) flip a single edge. For details on scoring see Koller & Friedman, Probabilistic Graphical Models, Section 18.4.3.3 (page 818). If a number `max_indegree` is provided, only modifications that keep the number of parents for each node below `max_indegree` are considered. A list of edges can optionally be passed as `black_list` or `white_list` to exclude those edges or to limit the search. """ local_score = self.scoring_method.local_score nodes = self.state_names.keys() potential_new_edges = (set(permutations(nodes, 2)) - set(model.edges()) - set([(Y, X) for (X, Y) in model.edges()])) for (X, Y) in potential_new_edges: # (1) add single edge if nx.is_directed_acyclic_graph( nx.DiGraph(list(model.edges()) + [(X, Y)])): operation = ("+", (X, Y)) if (operation not in tabu_list and (black_list is None or (X, Y) not in black_list) and (white_list is None or (X, Y) in white_list)): old_parents = model.get_parents(Y) new_parents = old_parents + [X] if max_indegree is None or len( new_parents) <= max_indegree: score_delta = local_score( Y, new_parents) - local_score(Y, old_parents) yield (operation, score_delta) for (X, Y) in model.edges(): # (2) remove single edge operation = ("-", (X, Y)) if operation not in tabu_list: old_parents = model.get_parents(Y) new_parents = old_parents[:] new_parents.remove(X) score_delta = local_score(Y, new_parents) - local_score( Y, old_parents) yield (operation, score_delta) for (X, Y) in model.edges(): # (3) flip single edge new_edges = list(model.edges()) + [(Y, X)] new_edges.remove((X, Y)) if nx.is_directed_acyclic_graph(nx.DiGraph(new_edges)): operation = ("flip", (X, Y)) if (operation not in tabu_list and ("flip", (Y, X)) not in tabu_list and (black_list is None or (Y, X) not in black_list) and (white_list is None or (Y, X) in white_list)): old_X_parents = model.get_parents(X) old_Y_parents = model.get_parents(Y) new_X_parents = old_X_parents + [Y] new_Y_parents = old_Y_parents[:] new_Y_parents.remove(X) if max_indegree is None or len( new_X_parents) <= max_indegree: score_delta = (local_score(X, new_X_parents) + local_score(Y, new_Y_parents) - local_score(X, old_X_parents) - local_score(Y, old_Y_parents)) yield (operation, score_delta)
def is_dag(self, adj): return networkx.is_directed_acyclic_graph(self.as_graph(adj))