def node_is_isolated(self, node, allow_self_loop=False): """Checks if the project item with the given name has any connections. Args: node (str): Project item name allow_self_loop (bool): If default (False), Self-loops are considered as an in-neighbor or an out-neighbor so the method returns False. If True, single node with a self-loop is considered isolated. Returns: bool: True if project item has no in-neighbors nor out-neighbors, False if it does. Single node with a self-loop is NOT isolated (returns False). """ g = self.dag_with_node(node) if not allow_self_loop: return nx.is_isolate(g, node) has_self_loop = g.has_edge(node, node) if not has_self_loop: return nx.is_isolate(g, node) # The node has a self-loop. # Node degree is the number of edges that are connected to it. A self-loop increases the degree by 2 deg = g.degree(node) if deg - 2 == 0: # If degree - 2 is zero, it is isolated. return True return False
def test_is_isolate(): G = nx.Graph() G.add_edge(0, 1) G.add_node(2) assert not nx.is_isolate(G, 0) assert not nx.is_isolate(G, 1) assert nx.is_isolate(G, 2)
def test_is_isolate(): G = nx.Graph() G.add_edge(0, 1) G.add_node(2) assert_false(nx.is_isolate(G, 0)) assert_false(nx.is_isolate(G, 1)) assert_true(nx.is_isolate(G, 2))
def build_graph(self): # make a partite network self.graph = nx.random_partition_graph(self.nodes, 0, 0) if self.p == 0: return lp = math.log(self.p) for p1, p2 in pairwise(self.graph.graph['partition']): p1 = sorted(p1) p2 = sorted(p2) for i in p1: for j in p2: if int(self.p) == 1: self.graph.add_edge(i, j) continue lr = math.log(1.0 - random.random()) if lr/lp >= 1: self.graph.add_edge(i, j) # prune unnconnected nodes if self.prune: for node in self.graph.nodes(): if nx.is_isolate(self.graph, node): self.graph.remove_node(node) self.node_labels_to_ints() self.colour()
def getImportanceGrad(G, x_good, x, nsamples, influ_obj, herd, a): #Returns the gradient vector of the multilinear relaxation at x as given in Chekuri's paper #(See Theorem 1 in nips2012 paper) N = G.number_of_nodes() grad = Variable(torch.zeros(N)) x_prp = (1 - a)*x + a*x_good if herd == 1: samples_list = herd_points(x_prp, nsamples) else: samples_list = Variable(torch.bernoulli(x_prp.repeat(nsamples, 1))) w = getImportanceWeights(samples_list, x, x_prp) for t in range(nsamples): sample = samples_list[t] m = torch.zeros(sample.size()) for p in np.arange(N): if nx.is_isolate(G, p) is False: m[p] = 1 grad[p] = grad[p] + w[t]*(influ_obj(np.logical_or(sample.numpy(), m.numpy())) - influ_obj(np.logical_and(sample.numpy(), np.logical_not(m.numpy())))) m[p] = 0 return grad*1.0/nsamples
def find_valid_alternatives(go, alt_id_g, rel_g): r = [alt_id for alt_id in find_alternatives(go, alt_id_g) if alt_id in rel_g and not nx.is_isolate(rel_g, alt_id)] if len(r) > 1: print(f'found {len(r)} valid alternatives of {go}', file=sys.stderr) return r
def set_warranted(self): undefeated = set([node for (node,x) in self.graph.edges()]) - \ set([node for (x,node) in self.graph.edges()]) undefeated |= set([node for node in self.graph.nodes() if nx.is_isolate(self.graph, node)]) warranted = undefeated | self.judge.grounded(undefeated, self.graph, set([]), set([])) for w in warranted: self.graph.add_node(w, style="filled", fillcolor="green") self.warranted = warranted print len(warranted), "reviews were accepted"
def compress(self): first_stage = set([node for (node, x) in self.dotgraph.edges()]) - set( [node for (x, node) in self.dotgraph.edges()] ) first_stage |= set([node for node in self.dotgraph.nodes() if nx.is_isolate(self.dotgraph, node)]) defeat_stages = [first_stage] + self.stages(first_stage, first_stage) cs = [] for stage in defeat_stages: cs += self.consistent_subsets(stage, self.warranted) compressed_dotnodes = {} has_compressed = {} compressed_warranted = set([]) compressed_dotgraph = nx.DiGraph() for subset in cs: positive_feats = set([]) negative_feats = set([]) for i in subset: positive_feats |= set(self.dotnodes[i].get_positive_feats()) negative_feats |= set(self.dotnodes[i].get_negative_feats()) r = Review( "c" + str(self.cid), {"feats": list(positive_feats), "text": ""}, {"feats": list(negative_feats), "text": ""}, ) compressed_dotnodes[r.id] = r has_compressed[r.id] = subset self.cid += 1 if subset.issubset(self.warranted): compressed_warranted.add(r.id) compressed_dotgraph.add_node( r.id, style="filled", fillcolor="green", shape="record", label=str(r.subset_label(subset)) ) else: compressed_dotgraph.add_node(r.id, shape="record", label=str(r.subset_label(subset))) for id1, n1 in has_compressed.items(): for id2, n2 in has_compressed.items(): for i in n1: for j in n2: ri = self.dotnodes[i] rj = self.dotnodes[j] if ( ri.in_conflict(rj) and not (id1, id2) in compressed_dotgraph.edges() and not (id2, id1) in compressed_dotgraph.edges() ): compressed_dotgraph.add_edge(id1, id2, dir="none") self.warranted = compressed_warranted self.dotgraph = compressed_dotgraph self.dotnodes = compressed_dotnodes self.has_compressed = has_compressed
def generate_migration_links_rates(self): paths = {} migs = [] max_migs = [] mindist = 1 # 1km minimum distance in gravity model for improved short-distance asymptotic behavior dist_cutoff = 20 # beyond 20km effective distance not reached in 1 day. max_migration_dests = 100 # limit of DTK local migration for src, v in self.path_lengths: paths[src] = {} for dest, dist in v.items(): # print (dist) # print (src) # print (dest) if not dist or src == dest: continue if dist < dist_cutoff: mig_rate = self.coeff * self.graph.population[int(dest)] mig_volume = self.graph.population[int(src)] * mig_rate paths[src][dest] = mig_rate migs.append(mig_rate) else: warnings.warn( 'Check if dist_cutoff is too low for source node ' + str(src) + " distance is " + str(dist)) d = paths[src] if not d: warnings.warn('No paths from source ' + str(src) + ' found! Check if node is isolated.') print("Node " + str(src) + " is isolate " + str(nx.is_isolate(self.graph, src))) continue nl = heapq.nlargest(max_migration_dests, d, key=lambda k: d[k]) # print(len(d), nl, [int(d[k]) for k in nl]) max_migs.append(d[nl[0]]) paths[src] = dict([(k, d[k]) for k in nl]) self.link_rates = paths return paths
def __walks_multi__(nodes, net=None, network_id=None, steps=10, number_of_walks=10, degree=True, start=None, probabilistic=True, weight="weight"): performed_walks = {} cn = 0 for node in nodes: if cn % 100 == 0: print("walks for node ", cn, "outof", len(nodes)) cn = cn + 1 walks = [] if node in net.nodes(): if not nx.is_isolate(net, node): if degree: nw = int(number_of_walks * net.degree[node]) print("running walks", nw, "for node", node) else: nw = number_of_walks walks = global_distances.perform_random_walks( net, steps=steps, number_of_walks=nw, start=node, probabilistic=probabilistic, weight=weight) #print("count nodes / edges in walk") #nodes_cnt, edges_cnt = global_distances.rank_walks(net, walks) performed_walks[node] = walks return performed_walks
def nodes_filter(x): i, v = x return i in nodes and (i is not SINK or not is_isolate(graph, SINK))
def has_isolated_node(g): for node in g.nodes(): if nx.is_isolate(g, node): return True return False
nx.is_directed_acyclic_graph(G) nx.is_aperiodic(G) # distance measure (all for connected graph) nx.center(Gcc) nx.diameter(Gcc) nx.eccentricity(Gcc) nx.periphery(Gcc) nx.radius(Gcc) # flows (seg fault currently) #nx.max_flow(Gcc, 1, 2) #nx.min_cut(G, 1, 2) # isolates nx.is_isolate(G, 1) # False nx.is_isolate(G, 5) # True # HITS nx.hits(G, max_iter=1000) # cannot converge? # maximal independent set nx.maximal_independent_set(G) # shortest path nx.shortest_path(G) # need "predecessors_iter" nx.all_pairs_shortest_path(G) nx.all_pairs_shortest_path_length(G) nx.predecessor(G, 1) nx.predecessor(G, 1, 378)
def Remove_Single_Node(graph): for node in graph.nodes(): if nx.is_isolate(graph,node): graph.remove_node(node) return graph
def filter_node(n): """Filter out unconnected nodes.""" return not nx.is_isolate(view, n)
def plot(dsp, workflow=False, dot=None, edge_data=None, view=False, depth=-1, function_module=True, node_output=True, nested=False, **kw_dot): """ Plots the Dispatcher with a graph in the DOT language with Graphviz. :param dsp: A dispatcher that identifies the model adopted. :type dsp: dispatcher.Dispatcher :param dot: A directed graph in the DOT language. :type dot: graphviz.dot.Digraph, optional :param workflow: If True the workflow graph will be plotted, otherwise the dmap. :type workflow: bool, (DiGraph, dict), optional :param edge_data: Edge attribute to view. The default is the edge weights. :type edge_data: str, optional :param node_output: If True the node outputs are displayed with the workflow. :type node_output: bool :param view: Open the rendered directed graph in the DOT language with the sys default opener. :type view: bool, optional :param depth: Depth of sub-dispatch plots. If negative all levels are plotted. :type depth: int, optional :param function_module: If True the function labels are plotted with the function module, otherwise only the function name will be visible. :type function_module: bool, optional :param nested: If False the sub-dispatcher nodes are plotted on the same graph, otherwise they can be viewed clicking on the node that has an URL link. :type nested: bool :param kw_dot: Dot arguments: - name: Graph name used in the source code. - comment: Comment added to the first line of the source. - directory: (Sub)directory for source saving and rendering. - filename: Filename for saving the source (defaults to name + '.gv' ). - format: Rendering output format ('pdf', 'png', ...). - engine: Layout command used ('dot', 'neato', ...). - encoding: Encoding for saving the source. - graph_attr: Dict of (attribute, value) pairs for the graph. - node_attr: Dict of (attribute, value) pairs set for all nodes. - edge_attr: Dict of (attribute, value) pairs set for all edges. - body: Dict of (attribute, value) pairs to add to the graph body. :param kw_dot: dict :return: A directed graph source code in the DOT language. :rtype: graphviz.dot.Digraph Example: .. dispatcher:: dsp :opt: graph_attr={'ratio': '1'} :code: >>> from co2mpas.dispatcher import Dispatcher >>> from co2mpas.dispatcher.utils import SubDispatch, SINK >>> ss = Dispatcher(name='Sub-sub-dispatcher') >>> def fun(a): ... return a + 1, a - 1 >>> ss.add_function('fun', fun, ['a'], ['b', 'c']) 'fun' >>> sub_dispatch = SubDispatch(ss, ['a', 'b', 'c'], output_type='list') >>> s_dsp = Dispatcher(name='Sub-dispatcher') >>> s_dsp.add_function('sub_dispatch', sub_dispatch, ['a'], ['b', 'c']) 'sub_dispatch' >>> dispatch = SubDispatch(s_dsp, ['b', 'c', 'a'], output_type='list') >>> dsp = Dispatcher(name='Dispatcher') >>> dsp.add_data('input', default_value={'a': {'a': 3}}) 'input' >>> dsp.add_function('dispatch', dispatch, ['input'], ['d', 'e', SINK]) 'dispatch' >>> dot = plot(dsp, graph_attr={'ratio': '1'}) Dispatch in order to have a workflow: .. dispatcher:: dsp :opt: workflow=True, graph_attr={'ratio': '1'} :code: >>> o = dsp.dispatch() ... >>> wf = plot(dsp, workflow=True, graph_attr={'ratio': '1'}) """ args = _init_graph_data(dsp, workflow, edge_data) dsp, g, val, dist, edge_data, inputs, outputs = args dot = dot or _init_dot(dsp, workflow, nested, **kw_dot) dsp2dot_id = _get_dsp2dot_id(dot, dsp.dmap) if not g.node: _set_node(dot, EMPTY, dsp2dot_id) if START in g.node and (len(g.node) == 1 or not nx.is_isolate(g, START)): _set_node(dot, START, dsp2dot_id) elif inputs and set(inputs).issubset(g.node): dot_u = _set_node(dot, START, dsp2dot_id) for i, v in enumerate(inputs): _set_edge(dot, dot_u, dsp2dot_id[v], xlabel=str(i)) for k, v in g.node.items(): if k not in dsp.nodes or (k is SINK and nx.is_isolate(g, SINK)): continue _set_node(dot, k, dsp2dot_id, node_attr=dsp.nodes.get(k, {}), values=val, dist=dist, dsp=dsp, function_module=function_module, edge_attr=edge_data, workflow_node=v, depth=depth, node_output=node_output, nested=nested) for u, v, a in g.edges_iter(data=True): _set_edge(dot, dsp2dot_id[u], dsp2dot_id[v], a, edge_data=None) if outputs and set(outputs).issubset(g.node): dot_v = _set_node(dot, END, dsp2dot_id) for i, u in enumerate(outputs): _set_edge(dot, dsp2dot_id[u], dot_v, xlabel=str(i)) if view: try: dot.render(cleanup=True, view=True) except RuntimeError as ex: log.warning('{}'.format(ex), exc_info=1) return dot
def stress(G, output_folder, fI, x_constraint=None, y_constraint=None, weight_threshold=0): # remove weak edges G2 = G.copy() for ij in G.edges: if G.edges[ij]['weight'] < weight_threshold: G2.remove_edge(*ij) # Make another copy G3 = G2.copy() # Check if any nodes have been isolated if nx.number_of_isolates(G2) != 0: # First remove nodes from x and y constraints for i in range(0, len(G2)): # Check if node is isolated if nx.is_isolate(G2, i): # Then check if node has x and y constraints if i in x_constraint: del x_constraint[i] if i in y_constraint: del y_constraint[i] # Make iterator over the isolates iso = nx.isolates(G2) # Use to remove nodes from graph G3.remove_nodes_from(iso) # Mapping should be empty initially mapping = {} # Counter k = 0 # Loop over for i in range(0, len(G2)): # If isolate found if nx.is_isolate(G2, i): # Increment counter k = k + 1 else: # Otherwise save to mapping mapping[i] = i - k # Use to relabel nodes and constraints G3 = nx.relabel_nodes(G3, mapping) # Loop over keys in mapping for i in mapping: # Check if key exists in constraints if i in x_constraint: x_constraint[mapping[i]] = x_constraint.pop(i) if i in y_constraint: y_constraint[mapping[i]] = y_constraint.pop(i) # length has decreased by 1 => This isn't the problem # compute layout X = _sgd(G3, y_constraint=y_constraint, x_constraint=x_constraint) # draw with colours cols_node = list(nx.get_node_attributes(G3, 'color').values()) cols_edge = list(nx.get_edge_attributes(G3, 'color').values()) widths_edge = list(nx.get_edge_attributes(G3, 'width').values()) # Extract x and y labels of nodes xvs = nx.get_node_attributes(G3, 'x') yvs = nx.get_node_attributes(G3, 'y') # Make a copy of xvs to use as labels labels = yvs.copy() # Vector containing alphabet ab = list(string.ascii_lowercase) # Loop over x values for i in yvs: # Check if node is a strain if xvs[i] == 1.0: labels[i] = ab[yvs[i] - 1] nx.draw(G3, pos=X, node_color=cols_node, edge_color=cols_edge, width=widths_edge, labels=labels, arrows=False) plt.axis('equal') if weight_threshold != 0.0: plt.savefig(f'{output_folder}/{G3.graph["name"]}_stress_prun{fI}.png') else: plt.savefig(f'{output_folder}/{G3.graph["name"]}_stress{fI}.png') plt.close()
def remove_edge(u, v): rm_edge(u, v) # Remove the edge. if is_isolate(graph, v): # Check if v is isolate. rm_node(v) # Remove the isolate out node.
def connect_lv_generators(network, allow_multiple_genos_per_load=True): """Connect LV generators to existing grids. This function searches for unconnected generators in all LV grids and connects them. It connects * generators of voltage level 6 * to MV-LV station * generators of voltage level 7 * with a nom. capacity of <=30 kW to LV loads of type residential * with a nom. capacity of >30 kW and <=100 kW to LV loads of type retail, industrial or agricultural * to the MV-LV station if no appropriate load is available (fallback) Parameters ---------- network : :class:`~.grid.network.Network` The eDisGo container object allow_multiple_genos_per_load : :obj:`bool` If True, more than one generator can be connected to one load Notes ----- For the allocation, loads are selected randomly (sector-wise) using a predefined seed to ensure reproducibility. """ # get predefined random seed and initialize random generator seed = int(network.config['grid_connection']['random_seed']) #random.seed(a=seed) random.seed(a=1234) # ToDo: Switch back to 'seed' as soon as line ids are finished, #58 # get standard equipment std_line_type = network.equipment_data['lv_cables'].loc[ network.config['grid_expansion_standard_equipment']['lv_line']] std_line_kind = 'cable' # # TEMP: DEBUG STUFF # lv_grid_stats = pd.DataFrame(columns=('lv_grid', # 'load_count', # 'geno_count', # 'more_genos_than_loads') # ) # iterate over all LV grids for lv_grid in network.mv_grid.lv_grids: lv_loads = lv_grid.graph.nodes_by_attribute('load') # counter for genos in v_level 7 log_geno_count_vlevel7 = 0 # generate random list (without replacement => unique elements) # of loads (residential) to connect genos (P <= 30kW) to. lv_loads_res = sorted([ lv_load for lv_load in lv_loads if 'residential' in list(lv_load.consumption.keys()) ], key=lambda _: repr(_)) if len(lv_loads_res) > 0: lv_loads_res_rnd = set( random.sample(lv_loads_res, len(lv_loads_res))) else: lv_loads_res_rnd = None # generate random list (without replacement => unique elements) # of loads (retail, industrial, agricultural) to connect genos # (30kW < P <= 100kW) to. lv_loads_ria = sorted([ lv_load for lv_load in lv_loads if any([ _ in list(lv_load.consumption.keys()) for _ in ['retail', 'industrial', 'agricultural'] ]) ], key=lambda _: repr(_)) if len(lv_loads_ria) > 0: lv_loads_ria_rnd = set( random.sample(lv_loads_ria, len(lv_loads_ria))) else: lv_loads_ria_rnd = None for geno in sorted(lv_grid.graph.nodes_by_attribute('generator'), key=lambda x: repr(x)): if nx.is_isolate(lv_grid.graph, geno): lv_station = lv_grid.station # generator is of v_level 6 -> connect to LV station if geno.v_level == 6: line_length = calc_geo_dist_vincenty( network=network, node_source=geno, node_target=lv_station) line = Line(id=random.randint(10**8, 10**9), length=line_length / 1e3, quantity=1, kind=std_line_kind, type=std_line_type, grid=lv_grid) lv_grid.graph.add_edge(geno, lv_station, line=line, type='line') # add line to equipment changes to track costs _add_cable_to_equipment_changes(network=network, line=line) # generator is of v_level 7 -> assign geno to load elif geno.v_level == 7: # counter for genos in v_level 7 log_geno_count_vlevel7 += 1 # connect genos with P <= 30kW to residential loads, if available if (geno.nominal_capacity <= 30) and (lv_loads_res_rnd is not None): if len(lv_loads_res_rnd) > 0: lv_load = lv_loads_res_rnd.pop() # if random load list is empty, create new one else: lv_loads_res_rnd = set( random.sample(lv_loads_res, len(lv_loads_res))) lv_load = lv_loads_res_rnd.pop() # get cable distributor of building lv_conn_target = list( lv_grid.graph.neighbors(lv_load))[0] if not allow_multiple_genos_per_load: # check if there's an existing generator connected to the load # if so, select next load. If no load is available, connect to station. while any([ isinstance(_, Generator) for _ in lv_grid.graph.neighbors( list(lv_grid.graph.neighbors(lv_load)) [0]) ]): if len(lv_loads_res_rnd) > 0: lv_load = lv_loads_res_rnd.pop() # get cable distributor of building lv_conn_target = list( lv_grid.graph.neighbors(lv_load))[0] else: lv_conn_target = lv_grid.station logger.debug( 'No valid conn. target found for {}. ' 'Connected to {}.'.format( repr(geno), repr(lv_conn_target))) break # connect genos with 30kW <= P <= 100kW to residential loads # to retail, industrial, agricultural loads, if available elif (geno.nominal_capacity > 30) and (lv_loads_ria_rnd is not None): if len(lv_loads_ria_rnd) > 0: lv_load = lv_loads_ria_rnd.pop() # if random load list is empty, create new one else: lv_loads_ria_rnd = set( random.sample(lv_loads_ria, len(lv_loads_ria))) lv_load = lv_loads_ria_rnd.pop() # get cable distributor of building lv_conn_target = list( lv_grid.graph.neighbors(lv_load))[0] if not allow_multiple_genos_per_load: # check if there's an existing generator connected to the load # if so, select next load. If no load is available, connect to station. while any([ isinstance(_, Generator) for _ in lv_grid.graph.neighbors( list(lv_grid.graph.neighbors(lv_load)) [0]) ]): if len(lv_loads_ria_rnd) > 0: lv_load = lv_loads_ria_rnd.pop() # get cable distributor of building lv_conn_target = list( lv_grid.graph.neighbors(lv_load))[0] else: lv_conn_target = lv_grid.station logger.debug( 'No valid conn. target found for {}. ' 'Connected to {}.'.format( repr(geno), repr(lv_conn_target))) break # fallback: connect to station else: lv_conn_target = lv_grid.station logger.debug('No valid conn. target found for {}. ' 'Connected to {}.'.format( repr(geno), repr(lv_conn_target))) line = Line(id=random.randint(10**8, 10**9), length=1e-3, quantity=1, kind=std_line_kind, type=std_line_type, grid=lv_grid) lv_grid.graph.add_edge(geno, lv_station, line=line, type='line') # add line to equipment changes to track costs _add_cable_to_equipment_changes(network=network, line=line) # warn if there're more genos than loads in LV grid if log_geno_count_vlevel7 > len(lv_loads): logger.debug( 'The count of newly connected generators in voltage level 7 ({}) ' 'exceeds the count of loads ({}) in LV grid {}.'.format( str(log_geno_count_vlevel7), str(len(lv_loads)), repr(lv_grid)))
def make_graphs(G,window,index,communities,commoner_graphs): """ Generate JSON for NetworkX graph. Update commoner graphs. This method generates all necessary information from a NetworkX graph representation and returns it in a JSON format. It also updates the 'dynamic communities' and individual commoner graphs (using make_dynamic_communities and build_commoner_data methods) :param G: NetworkX graph of interactions in time window :param window: 2-tuple containing start and end dates :param index: integer representing time step :param communities: list holding NetworkX dynamic communities (filled in by make_dynamic_communities method) :param commoner_graphs: dictionary mapping each commoner node to its interaction history (filled in by build_commoner_data method) :returns: tuple containing: 1. Updated dynamic communities 2. Updated commoner_graphs 3. JSON representation of NetworkX graph 4. Updated NetworkX graph """ edges_to_remove = [] tag_edges = [] #tag_nodes = {} tag_counts = {} #Holds counts of each of the tags cumulative = (index == 0) create_count = 0 comment_count = 0 convo_count = 0 trans_count = 0 graph_copy = copy.deepcopy(G) #To avoid screwing future iterations nodeiter = G.nodes(data=True) edgeiter = G.edges(data=True) #Filter edges outside time window and add count stats for (u,v,c) in edgeiter: c['activations'] = [] if window[0] is not None: edge_exists = False for intervals in c['spells']: if (window[0] <= cf.to_date(intervals[0]) < window[1]): edge_exists = True break else: edge_exists = True #Edge always exists in static network if edge_exists == False: edges_to_remove.append((u,v,c)) else: copy_edge = graph_copy.edges[u,v] if window[0] is not None: copy_edge['first_active'] = copy_edge['spells'][0][0] copy_edge['last_active'] = copy_edge['spells'][len(copy_edge['spells'])-1][0] del graph_copy.edges[u,v]['spells'] #Remove non-existent edges graph_copy.remove_edges_from(edges_to_remove) #Also remove the tag edges so not to influence k-core calculation graph_copy.remove_edges_from((tag_edges)) #Filter nodes outside the time window nodes_to_remove = [] zero_nodes = [] if window[0] is not None: for (n,c) in nodeiter: graph_copy.nodes[n]['nodemeta'] = [] graph_copy.nodes[n]['date'] = cf.to_str(window[0]) c['date'] = cf.to_str(window[0]) #TODO: Do both lines need to be here? if nx.is_isolate(graph_copy,n): nodes_to_remove.append(n) G.nodes[n]['binary_active'] += "0" graph_copy.nodes[n]['binary_active'] += "0" if 'type' not in c or c['type'] == cf.user_type: zero_nodes.append((n,c)) else: G.nodes[n]['times_active'] += 1 graph_copy.nodes[n]['times_active'] += 1 G.nodes[n]['binary_active'] += "1" graph_copy.nodes[n]['binary_active'] += "1" graph_copy.remove_nodes_from(nodes_to_remove) #Get rid of spells and actions that fall outside the window range graph_copy = filter_spells(graph_copy,window) #DO THE KCORE CALCULATIONS HERE (core_G,colluders) = dx.weighted_core(graph_copy.to_undirected(),window,cumulative) #Add the tags back in core_G.add_edges_from(tag_edges) to_remove = [] nodeiter = core_G.nodes(data=True) for (n,c) in nodeiter: if cf.user_type != '' and 'type' not in c: #If there are meant to be types but we can't find any to_remove.append(n) core_G.remove_nodes_from(to_remove) #Recommender data is built from the cumulative graph if not cumulative: build_commoner_data(core_G,commoner_graphs,zero_nodes) #Remove isolated nodes that exist after removing Basic Income core_G.remove_nodes_from(list(nx.isolates(core_G))) #Now compare fronts to previous partitions if not cumulative: partition = make_dynamic_communities(core_G,communities,index) else: undirectedGraph = core_G.to_undirected() partition = community.best_partition(undirectedGraph,weight='positivemax') nodeiter = core_G.nodes(data=True) for n,c in nodeiter: c['cluster'] = partition[n] if cf.LABEL_KEY != "": c['label'] = c[cf.LABEL_KEY] else: c['label'] = str(n) core_graph_json = json_graph.node_link_data(core_G) if window[1] is not None: meta_info = {'date':cf.to_str(window[1]),'colluders':colluders} core_graph_json.update(meta_info) return (communities,commoner_graphs,core_graph_json,core_G)
def connect_mv_generators(network): """Connect MV generators to existing grids. This function searches for unconnected generators in MV grids and connects them. It connects * generators of voltage level 4 * to HV-MV station * generators of voltage level 5 * with a nom. capacity of <=30 kW to LV loads of type residential * with a nom. capacity of >30 kW and <=100 kW to LV loads of type retail, industrial or agricultural * to the MV-LV station if no appropriate load is available (fallback) Parameters ---------- network : :class:`~.grid.network.Network` The eDisGo container object Notes ----- Adapted from `Ding0 <https://github.com/openego/ding0/blob/\ 21a52048f84ec341fe54e0204ac62228a9e8a32a/\ ding0/grid/mv_grid/mv_connect.py#L820>`_. """ # get params from config buffer_radius = int( network.config['grid_connection']['conn_buffer_radius']) buffer_radius_inc = int( network.config['grid_connection']['conn_buffer_radius_inc']) # get standard equipment std_line_type = network.equipment_data['mv_cables'].loc[ network.config['grid_expansion_standard_equipment']['mv_line']] for geno in sorted(network.mv_grid.graph.nodes_by_attribute('generator'), key=lambda _: repr(_)): if nx.is_isolate(network.mv_grid.graph, geno): # ===== voltage level 4: generator has to be connected to MV station ===== if geno.v_level == 4: line_length = calc_geo_dist_vincenty( network=network, node_source=geno, node_target=network.mv_grid.station) line = Line(id=random.randint(10**8, 10**9), type=std_line_type, kind='cable', quantity=1, length=line_length / 1e3, grid=network.mv_grid) network.mv_grid.graph.add_edge(network.mv_grid.station, geno, line=line, type='line') # add line to equipment changes to track costs _add_cable_to_equipment_changes(network=network, line=line) # ===== voltage level 5: generator has to be connected to MV grid (next-neighbor) ===== elif geno.v_level == 5: # get branches within a the predefined radius `generator_buffer_radius` branches = calc_geo_lines_in_buffer( network=network, node=geno, grid=network.mv_grid, radius=buffer_radius, radius_inc=buffer_radius_inc) # calc distance between generator and grid's lines -> find nearest line conn_objects_min_stack = _find_nearest_conn_objects( network=network, node=geno, branches=branches) # connect! # go through the stack (from nearest to most far connection target object) generator_connected = False for dist_min_obj in conn_objects_min_stack: target_obj_result = _connect_mv_node( network=network, node=geno, target_obj=dist_min_obj) if target_obj_result is not None: generator_connected = True break if not generator_connected: logger.debug( 'Generator {0} could not be connected, try to ' 'increase the parameter `conn_buffer_radius` in ' 'config file `config_grid.cfg` to gain more possible ' 'connection points.'.format(geno))
def helper_walks(networks, nodes, network_ids, steps=10, number_of_walks=10, degree=True, probabilistic=True, weight="weight"): """ Estimates for networks number_of_walks walks of size steps. Parameters: networks (list): of networkX graph objects nodes (list): of nodes (areas) to be compared. network_ids (list): list of network IDs. steps (int): is size of random walk number_of_walks (int): how many random walks are performed on G degree (boolean): if True then the number of random walks performed for each starting node is dependent on its degree and is estimated as degree*number_of_walks. probabilisitc (boolean): if True edge weights are taken into account else all edges are considered equal. If true then weight needs to be set weight (str): edge attribute name as contained in G. Weight is evaluated as a similarity Returns: walks (dict): key is network IDs and value is dict where key is starting node and value is list of performed walks. Each walk is a sublist and contains the node IDs in order of their visit by the random walk. """ performed_walks = {} for net_id in network_ids: performed_walks[net_id] = {} cn = 0 for node in nodes: if cn % 100 == 0: print("walks for node ", cn, "outof", len(nodes)) cn = cn + 1 walks = [] for i in range(len(networks)): net = networks[i] network_id = network_ids[i] if node in net.nodes(): if not nx.is_isolate(net, node): if degree: nw = int(number_of_walks * net.degree[node]) print("running walks", nw, "for node", node) else: nw = number_of_walks walks = global_distances.perform_random_walks( net, steps=steps, number_of_walks=nw, start=node, probabilistic=probabilistic, weight=weight) #save performed_walks[network_id][node] = walks return performed_walks
def __getSourceNodes(self): sourceNodes = [ node for (node, indegree) in self.G.in_degree() if (indegree == 0 and not nx.is_isolate(self.G, node)) ] return sourceNodes
nx.is_directed_acyclic_graph(G) nx.is_aperiodic(G) # distance measure (all for connected graph) nx.center(Gcc) nx.diameter(Gcc) nx.eccentricity(Gcc) nx.periphery(Gcc) nx.radius(Gcc) # flows (seg fault currently) #nx.max_flow(Gcc, 1, 2) #nx.min_cut(G, 1, 2) # isolates nx.is_isolate(G, 1) # False nx.is_isolate(G, 5) # True # HITS nx.hits(G,max_iter=1000) # cannot converge? # maximal independent set nx.maximal_independent_set(G) # shortest path nx.shortest_path(G) # need "predecessors_iter" nx.all_pairs_shortest_path(G) nx.all_pairs_shortest_path_length(G) nx.predecessor(G, 1) nx.predecessor(G, 1, 378)
def load_data(self,file_name="1"): """ This method loads existing data into a graph object. """ self.contexts_list = {} # Loading in data other than the Database other = open("./"+str(file_name)+"_general_data.csv", "r+") for line in other: data = line.strip().split(";") if data[0] == "my_ID": if data[1] == 'False': self.my_ID = -1 else: self.my_ID = int(data[1]) self.mynet.add_node(self.my_ID) if data[0] == "contexts_list": self.contexts_list = {} if data[1] != "": new_data = data[1].split(",") for i in new_data: if i != "": j = i.split(":") self.contexts_list[int(j[0])] = j[1] if data[0] == "defined_colors": self.defined_colors = [i for i in data[1].split(",")] # loading in the database self.friend_db = pd.read_csv("./"+str(file_name) +"_friend_data.csv", header=0, index_col=0) new_names = [] for i in list(self.friend_db.columns.values): new_names.append(int(i)) if self.my_ID != i: # adding network ties self.mynet.add_node(int(i)) self.mynet.add_edge(int(i), self.my_ID) self.friend_db.columns = new_names attrs=list(self.friend_db.index.values) # adding the mutual ties connections #and converting to ints from strings) for i in list(self.friend_db.columns.values): q = self.friend_db[int(i)]["known from"] if q==q: self.friend_db[int(i)]["known from"] = int(q) r = self.friend_db[int(i)]["race"] if r==r: self.friend_db[int(i)]["race"] = int(r) p = self.friend_db[int(i)]["gender"] if p==p: self.friend_db[int(i)]["gender"] = str(p) if type(self.friend_db[int(i)]["mutuals"]) == str: mut_str = self.friend_db[int(i)]["mutuals"][1:-1].split(",") mutuals = [] for j in mut_str: try: mutuals.append(int(j)) except: if j != "": mutuals.append(long(j)) else: pass self.friend_db[int(i)]["mutuals"] = mutuals for j in mutuals: if int(j) in new_names: try: edge = self.mynet[int(j)][int(i)] except: self.mynet.add_edge(int(j), int(i)) val = self.friend_db[int(i)]["strong tie"] if val != "" and (val==val): self.mynet[int(i)][self.my_ID]["strong"] = int(val) for edge in self.mynet.edges(): try: self.mynet[int(edge[0])][int(edge[1])]["strong"] except: self.mynet[int(edge[0])][int(edge[1])]["strong"]=0 isos = [] bu = nx.Graph() bu.add_nodes_from(self.mynet.nodes()) bu.add_edges_from(self.mynet.edges()) bu.remove_node(self.my_ID) for n in bu.nodes(): if nx.is_isolate(bu, n): bu.remove_node(n) isos.append(n) self.no_ego_net = bu self.b_cent = self.betweenness_centrality(withme=False) self.c_cent = self.closeness_centrality(withme=False) self.d_cent = self.degree_centrality(withme=False) self.e_cent = self.eigenvector_centrality(iterations=100,withme=False) for iso in isos: self.b_cent[iso]=-1 self.c_cent[iso]=-1 self.d_cent[iso]=-1 self.e_cent[iso]=-1 self.b_cent_w = self.betweenness_centrality(withme=True) self.c_cent_w = self.closeness_centrality(withme=True) self.d_cent_w = self.degree_centrality(withme=True) self.e_cent_w = self.eigenvector_centrality(iterations=100,withme=True) print "Data successfully loaded"
def _not_isolated_nodes(G): return filter(lambda n: not nx.is_isolate(G, n), G.nodes)
def __init__(self, obj, workflow=False, nested=True, edge_data=(), node_data=(), node_function=(), draw_outputs=0, view=False, node_styles=None, depth=-1, function_module=False, name=None, comment=None, directory=None, filename=None, format='svg', engine=None, encoding=None, graph_attr=None, node_attr=None, edge_attr=None, body=None, parent_dot='', _saved_outputs=None): """ Plots the Dispatcher with a graph in the DOT language with Graphviz. :param workflow: If True the latest solution will be plotted, otherwise the dmap. :type workflow: bool, optional :param view: Open the rendered directed graph in the DOT language with the sys default opener. :type view: bool, optional :param nested: If False the sub-dispatcher nodes are plotted on the same graph, otherwise they can be viewed clicking on the node that has an URL link. :type nested: bool, optional :param edge_data: Edge attributes to view. :type edge_data: tuple[str], optional :param node_data: Data node attributes to view. :type node_data: tuple[str], optional :param node_function: Function node attributes to view. :type node_function: tuple[str], optional :param draw_outputs: It modifies the defaults data node and edge attributes to view. If `draw_outputs` is: - 1: node attribute 'output' is drawn. - 2: edge attribute 'value' is drawn. - 3: node 'output' and edge 'value' attributes are drawn. - otherwise: node 'output' and edge 'value' attributes are not drawn. :type draw_outputs: int, optional :param node_styles: Default node styles according to graphviz node attributes. :type node_styles: dict[str|Token, dict[str, str]] :param depth: Depth of sub-dispatch plots. If negative all levels are plotted. :type depth: int, optional :param function_module: If True the function labels are plotted with the function module, otherwise only the function name will be visible. :type function_module: bool, optional :param name: Graph name used in the source code. :type name: str :param comment: Comment added to the first line of the source. :type comment: str :param directory: (Sub)directory for source saving and rendering. :type directory: str, optional :param filename: File name for saving the source. :type filename: str, optional :param format: Rendering output format ('pdf', 'png', ...). :type format: str, optional :param engine: Layout command used ('dot', 'neato', ...). :type engine: str, optional :param encoding: Encoding for saving the source. :type encoding: str, optional :param graph_attr: Dict of (attribute, value) pairs for the graph. :type graph_attr: dict, optional :param node_attr: Dict of (attribute, value) pairs set for all nodes. :type node_attr: dict, optional :param edge_attr: Dict of (attribute, value) pairs set for all edges. :type edge_attr: dict, optional :param body: Dict of (attribute, value) pairs to add to the graph body. :type body: dict, optional :return: A directed graph source code in the DOT language. :rtype: graphviz.dot.Digraph Example: .. dispatcher:: dsp :opt: graph_attr={'ratio': '1'} :code: >>> from co2mpas.dispatcher import Dispatcher >>> dsp = Dispatcher(name='Dispatcher') >>> def fun(a): ... return a + 1, a - 1 >>> dsp.add_function('fun', fun, ['a'], ['b', 'c']) 'fun' >>> dsp.plot(view=False, graph_attr={'ratio': '1'}) <co2mpas.dispatcher.utils.drw.DspPlot object at 0x...> """ from .sol import Solution from .. import Dispatcher from networkx import is_isolate self._edge_data = edge_data self._node_data = node_data self._node_function = node_function self._graph_attr = graph_attr self._node_attr = node_attr self._edge_attr = edge_attr self._body = body self.node_styles = _upt_styles(node_styles or {}, self.__node_styles) self.depth = depth self.draw_outputs = draw_outputs self.function_module = function_module self._saved_outputs = _saved_outputs or {} self.workflow = workflow inputs, outputs = (), () obj = parent_func(obj) if isinstance(obj, Solution): dsp, sol = obj.dsp, obj elif isinstance(obj, SubDispatchFunction): dsp, sol = obj.dsp, obj.solution inputs, outputs = obj.inputs or (), obj.outputs or () elif isinstance(obj, SubDispatch): dsp, sol = obj.dsp, obj.solution if obj.output_type != 'all': outputs = obj.outputs or () elif isinstance(obj, Dispatcher): dsp, sol = obj, obj.solution else: raise ValueError('Type %s not supported.' % type(obj).__name__) self.dsp = dsp _body = self.__body.copy() if workflow: _body['label'] = '<%s workflow>' self.g = g = sol.workflow self.obj = sol else: _body['label'] = '<%s>' self.g = g = dsp.dmap self.obj = obj draw_outputs = int(draw_outputs) if draw_outputs == 1: i, j = -1, None elif draw_outputs == 2: i, j = None, -1 elif draw_outputs == 3: i = j = None else: i = j = -1 self.node_data = node_data or self.__node_data[:j] self.node_function = node_function or self.__node_function self.edge_data = tuple(k if k != 'weight' else dsp.weight for k in edge_data or self.__edge_data[:i]) name = name or dsp.name or '%s %d' % (type(dsp).__name__, id(dsp)) self.nested = nested if filename: if directory is not None: filename = osp.join(directory, filename) directory, filename = osp.split(osp.abspath(filename)) else: if directory is None: directory = mkdtemp('') filename = _encode_file_name(name[8:] if parent_dot else name) if osp.splitext(filename)[1] != self._default_extension: filename = '%s.%s' % (filename, self._default_extension) name = self._html_encode(name) _body['label'] = _body['label'] % name body = combine_dicts(_body, body or {}) super(DspPlot, self).__init__( name=name, comment=comment, filename=filename, directory=directory, format=format, engine=engine, encoding=encoding, graph_attr=combine_dicts(self.__graph_attr, graph_attr or {}), node_attr=combine_dicts(self.__node_attr, node_attr or {}), edge_attr=combine_dicts(self.__edge_attr, edge_attr or {}), body=['%s = %s' % (k, v) for k, v in body.items()]) self.id_map = self.get_id_map(parent_dot, chain(g.node, inputs, outputs)) if not g.node or not (g.edge or inputs or outputs): self._set_data_node(EMPTY, {}) if START in g.node or (inputs and START not in g.node): self._set_data_node(START, {}) if outputs and END not in g.node: self._set_data_node(END, {}) for k, v in sorted(g.node.items()): if k not in dsp.nodes or (k is SINK and is_isolate(g, SINK)): continue self._set_node(k, v) edges = {(u, v): a for u, v, a in g.edges_iter(data=True)} for i, v in enumerate(inputs): n = (START, v) edges[n] = combine_dicts(edges.get(n, {}), {'inp_id': i}) for i, u in enumerate(outputs): n = (u, END) edges[n] = combine_dicts(edges.get(n, {}), {'out_id': i}) for (u, v), a in sorted(edges.items()): self._set_edge(u, v, a) if view: self.render(cleanup=True, view=True)
def main(): global seqDict global rna_gap global g global args global scaffolding_type_1 global scaffolding_type_2 """ Parsing arguments and initiating variables """ if args.d: logging.basicConfig(format='%(asctime)s::%(levelname)s::%(message)s',filename=args.log, level=logging.DEBUG) elif args.v: logging.basicConfig(format='%(asctime)s::%(levelname)s::%(message)s',filename=args.log, level=logging.INFO) else: logging.basicConfig(format='%(asctime)s::%(levelname)s::%(message)s',filename=args.log) rna_gap=int(args.rna_gap) logging.info("Begin the analysis with Scaff2link version "+version) mkdir_p(args.outDir) # pour avoir les arguments c'est args.fasta / args.phylo / args.rna etc ... ça te renvoie la string (type string) g=nx.MultiDiGraph() ## Adding all vertexes to the graph """ Reading fasta """ seqDict=dict() for record in SeqIO.parse(args.fasta, "fasta"): seqDict[record.id]=record.seq g.add_node(record.id, length=len(record.seq), log10Len=math.log10(len(record.seq)), strand='') logging.info("Number of nodes parsed: "+str(len(g.nodes))) """ Adding edges from Ragout : """ logging.info("Parsing edges from synteny information using ragout: "+args.phylo) scaffolding_type_1="synteny" current_scaffold="" fromName="" gap_length=0 with open(args.phylo) as f: for line in f: line=line.rstrip("\n") if line[0] != "#" and line !="": elems=line.split("\t") if current_scaffold == elems[0]: if elems[4] == "N": gap_length=int(elems[5]) else: if not (fromName in g.nodes and elems[5] in g.nodes): logging.critical("Unknown node name when parsing ragout edge from "+fromName+" to "+elems[5]) sys.exit(1) g.add_edge(fromName,elems[5],type=scaffolding_type_1,fromStrand=from_orientation,toStrand=elems[8],gap=gap_length, readsCount=-1) g.nodes[fromName]["strand"]=from_orientation g.nodes[elems[5]]["strand"]=elems[8] fromName=elems[5] from_orientation=elems[8] else: current_scaffold=elems[0] fromName=elems[5] from_orientation=elems[8] """ Parsing edges from Agouti : """ logging.info("Parsing joint pairs in edges from rna-seq information using agouti: "+args.rna) scaffolding_type_2="rna-seq" joint_pairs=dict() with open(args.rna) as f: for line in f: line=line.rstrip() elems=line.split("\t") fromName=elems[1] toName=elems[4] if args.lib_type[0]=='f': FromStrand=elems[3] else: FromStrand=strand_reverse(elems[3]) if args.lib_type[1]=='f': ToStrand=elems[6] else: ToStrand=strand_reverse(elems[6]) key="\t".join([FromStrand,fromName,ToStrand,toName]) if key not in joint_pairs.keys(): joint_pairs[key]=1 else: joint_pairs[key]+=1 """ Adding parsed edges from Agouti : """ logging.info("Adding rna-seq based edges to the scaffolding graph") for key in joint_pairs.keys(): if joint_pairs[key]>(int(args.min_reads)-1): key_list=key.split("\t") FromStrand=key_list[0] fromName=key_list[1] ToStrand=key_list[2] toName=key_list[3] if not (fromName in g.nodes and toName in g.nodes): logging.critical("Unknown node name when parsing agouti edge from "+fromName+" to "+toName) sys.exit(1) # Here we add the edge, but try to simplify it if it already exist because of Ragout numFromTo=g.number_of_edges(fromName,toName) add_stranded_edge(fromName,toName,FromStrand,ToStrand,scaffolding_type_2,joint_pairs[key]) """ Finalizing and reporting the initial graph (end of step 00) """ logging.info("Scaffolding graph completed, collecting nodes statistics") TotalNodes=len(g.nodes) ConnectedNodes=TotalNodes ConsistantNodes=TotalNodes node2remove=list() for node in g.nodes: if nx.is_isolate(g,node): # WAY TO GET THE DEGREE OF NODE #g.remove_node(node) node2remove.append(node) ConnectedNodes-=1 ConsistantNodes-=1 elif g.nodes[node]["strand"]=='.': ConsistantNodes-=1 for node in node2remove: g.remove_node(node) logging.info("Out of "+str(TotalNodes)+" initial nodes, "+str(ConnectedNodes)+" are connected, and among them "+str(ConsistantNodes)+" are strand consistent") if args.v : logging.info("Collecting edges statistics") edgeCounter=Counter(list(g.edges)) t1=0 t2=0 t12=0 edges_list=list(g.edges) edges_set=set(g.edges) for edge in edgeCounter.keys(): for key in range(0,edgeCounter[edge]): if g[edge[0]][edge[1]][key]["type"]==scaffolding_type_1: t1+=1 elif g[edge[0]][edge[1]][key]["type"]==scaffolding_type_2: t2+=1 elif g[edge[0]][edge[1]][key]["type"]==scaffolding_type_1+"_AND_"+scaffolding_type_2: t12+=1 else: logging.critical("unknown scaffold type") sys.exit(1) logging.info("Edges statistics : \n"+ ";".join([scaffolding_type_1,scaffolding_type_2,scaffolding_type_1+"_AND_"+scaffolding_type_2])+ "\n"+";".join([str(t1),str(t2),str(t12)])) mkdir_p(args.outDir+'/00-complete_graph') nx.write_graphml(g, args.outDir+"/00-complete_graph/graph.graphml") """ Step 01 : chain simplification """ logging.info("starting first chain simplification") setNodes=set(g.nodes) basename="scaff2links_chain_" count_name=1 while len(setNodes)!=0: try: node=setNodes.pop() if g.nodes[node]["strand"]!='.': inspectIn=True inspectOut=True chain=[node] nodeIn=node nodeOut=node logging.debug("Chain simplification started on node:"+node) while inspectIn: if len(g.in_edges(nodeOut))==1 : possibleNodeOut=list(g.in_edges(nodeOut))[0][0] if g.has_edge(nodeOut,possibleNodeOut): inspectIn=False logging.debug("STOP") else: logging.debug("IN chain extension: (in)"+nodeOut+"\t(out)"+possibleNodeOut) if (len(g.out_edges(possibleNodeOut))==1) and (g.nodes[possibleNodeOut]["strand"]!='.'): logging.debug("CONTINUE") nodeOut=possibleNodeOut chain.append(nodeOut) setNodes.remove(nodeOut) else : inspectIn=False logging.debug("STOP") else: inspectIn=False while inspectOut: if len(g.out_edges(nodeIn))==1 : possibleNodeIn=list(g.out_edges(nodeIn))[0][1] if g.has_edge(possibleNodeIn,nodeIn): inspectOut=False logging.debug("STOP") else: logging.debug("OUT chain extension: (out)"+nodeIn+"\t(in)"+possibleNodeIn) if (len(g.in_edges(possibleNodeIn))==1) and (g.nodes[possibleNodeIn]["strand"]!='.'): logging.debug("CONTINUE") nodeIn=possibleNodeIn chain.append(nodeIn) setNodes.remove(nodeIn) else: inspectOut=False logging.debug("STOP") else: inspectOut=False name=basename+str(count_name) count_name+=1 def_chain=chain_simplification(chain=chain,start=nodeOut,end=nodeIn,name=name,chain_is_path=False) logging.debug(name+"\t"+def_chain) except: nx.write_graphml(g, args.outDir+"/error.graphml") print(traceback.format_exc()) logging.critical("error during first chain simplification, the current graph have been written") sys.exit(1) """ end of step 01 writing... """ mkdir_p(args.outDir+'/01-simplified_graph') nx.write_graphml(g, args.outDir+"/01-simplified_graph/graph.graphml") node2remove=list() for node in g.nodes: if nx.is_isolate(g,node): # WAY TO GET THE DEGREE OF NODE #g.remove_node(node) node2remove.append(node) for node in node2remove: g.remove_node(node) with open(args.outDir+"/01-simplified_graph/scaffolds.fasta", "w") as output_handle: for key in seqDict.keys(): SeqIO.write(SeqRecord(seqDict[key],id=key,description=''), output_handle, "fasta") """ Step 02 : dag simplification """ logging.info("starting dag simplification") logging.info("Find remaining bridges") g_broken=nx.Graph(g.copy()) edge2remove=list() for e in nx.bridges(g_broken): edge2remove.append(e) for e_ in edge2remove: g_broken.remove_edge(e_[0],e_[1]) basename="scaff2links_dag_" count_name=1 logging.info("Connected component analysis for DAG") for nset in nx.connected_components(g_broken): subg=g.subgraph(nset) if nx.is_directed_acyclic_graph(subg) and len(list(subg.nodes()))>1: lpath=nx.dag_longest_path(subg) logging.debug("DAG found :"+str(nset)+"| type="+str(type(nset))) if len(list(subg.nodes()))==2: logging.warn("A connected component resulting from the graph where all bridges have been removed is of size 2, which is mathematically unexpected") if len(lpath)==len(nset): logging.debug("DAG with all nodes in the longest path: ("+")->-(".join(lpath)+')') # check neighbor pos=-1 start=0 for node in lpath: pos+=1 add_set=set() if node==lpath[0] or pos == start: for e in list(g.in_edges(node)): add_set.add(e[0]) if node==lpath[-1]: for e in list(g.out_edges(node)): add_set.add(e[1]) if not (set(nx.all_neighbors(g,node)) <= (add_set | nset)): logging.debug("Found cutting node in DAG: "+lpath[pos]) for e in list(g.out_edges(node)): add_set.add(e[1]) if (set(nx.all_neighbors(g,node)) <= (add_set | nset)) and (pos-start>0): logging.debug("cutted DAG simplification (including the cutting node) at "+lpath[pos]+": ("+")->-(".join([ lpath[x] for x in range(start,pos+1) ] )+')') chain_simplification([ lpath[x] for x in range(start,pos+1) ],lpath[start],lpath[pos],basename+str(count_name),chain_is_path=True) count_name+=1 start=pos+1 checkStart=False elif pos-start>1: logging.debug("cutted DAG simplification (excluding the cutting node) at "+lpath[pos]+": ("+")->-(".join([ lpath[x] for x in range(start,pos) ] )+')') chain_simplification([ lpath[x] for x in range(start,pos) ],lpath[start],lpath[pos-1],basename+str(count_name),chain_is_path=True) count_name+=1 checkStart=True else: checkStart=True if checkStart: start=pos for e in list(g.out_edges(node)): if not e[1] in nset: start=pos+1 logging.debug("NB: Cutting node excluded as start") break if start==pos: logging.debug("NB: Cutting node included as start") if pos-start>0: logging.debug("Final DAG simplification : ("+")->-(".join([ lpath[x] for x in range(start,pos+1) ] )+')') chain_simplification([ lpath[x] for x in range(start,pos+1) ],lpath[start],lpath[pos],basename+str(count_name),chain_is_path=True) count_name+=1 start=pos+1 """ end of step 02 writing... """ logging.info("Write graph and fasta after DAG simplification") mkdir_p(args.outDir+'/02-after_dag_graph') nx.write_graphml(g, args.outDir+"/02-after_dag_graph/graph.graphml") node2remove=list() for node in g.nodes: if nx.is_isolate(g,node): # WAY TO GET THE DEGREE OF NODE #g.remove_node(node) node2remove.append(node) for node in node2remove: g.remove_node(node) with open(args.outDir+"/02-after_dag_graph/scaffolds.fasta", "w") as output_handle: for key in seqDict.keys(): SeqIO.write(SeqRecord(seqDict[key],id=key,description=''), output_handle, "fasta")