def __remove_random_fermat_point(self, input_graph, fermat_points): graph = input_graph.copy() points_count = len(fermat_points) if points_count > 1: point_to_delete = fermat_points[rnd.randint(0, points_count-1)] graph.remove_node(point_to_delete) if nx.is_connected(graph): return nx.minimum_spanning_tree(graph), graph return nx.minimum_spanning_tree(input_graph), input_graph
def test_mst_edges_specify_weight(self): G=nx.Graph() G.add_edge(1,2,weight=1,color='red',distance=7) G.add_edge(1,3,weight=30,color='blue',distance=1) G.add_edge(2,3,weight=1,color='green',distance=1) G.add_node(13,color='purple') G.graph['foo']='bar' T=nx.minimum_spanning_tree(G) assert_equal(sorted(T.nodes()),[1,2,3,13]) assert_equal(sorted(T.edges()),[(1,2),(2,3)]) T=nx.minimum_spanning_tree(G,weight='distance') assert_equal(sorted(T.edges()),[(1,3),(2,3)]) assert_equal(sorted(T.nodes()),[1,2,3,13])
def test_prim_minimum_spanning_tree_edges_specify_weight(self): G = nx.Graph() G.add_edge(1, 2, weight=1, color="red", distance=7) G.add_edge(1, 3, weight=30, color="blue", distance=1) G.add_edge(2, 3, weight=1, color="green", distance=1) G.add_node(13, color="purple") G.graph["foo"] = "bar" T = nx.minimum_spanning_tree(G, algorithm="prim") assert_equal(sorted(T.nodes()), [1, 2, 3, 13]) assert_equal(sorted(T.edges()), [(1, 2), (2, 3)]) T = nx.minimum_spanning_tree(G, weight="distance", algorithm="prim") assert_equal(sorted(T.edges()), [(1, 3), (2, 3)]) assert_equal(sorted(T.nodes()), [1, 2, 3, 13])
def spanning_trees(): G = full_grid_nocut() H = nx.Graph() nodes1 = [] nodes2 = [] for u in G.nodes(): H.add_node(u) plant = None if u[0] < 5: nodes1.append(u) plant = 1 else: nodes2.append(u) plant = 2 H.node[u]['plant'] = plant G1 = G.subgraph(nodes1) G2 = G.subgraph(nodes2) S1 = nx.minimum_spanning_tree(G1) S2 = nx.minimum_spanning_tree(G2) for u, v in S1.edges() + S2.edges(): H.add_edge(u, v) H[u][v]['difficulty'] = 1 H.graph['nests'] = G.graph['nests'] H.graph['name'] = 'span_trees' ''' H = partition_plants(H) H.graph['name'] = 'span_trees' ''' assign_difficulties(H) for i in xrange(11): u, v = (4, i), (5, i) H.add_edge(u, v) H[u][v]['difficulty'] = 3 ''' for i in xrange(10): u, v = (i, 5), (i + 1, 5) H.add_edge((i, 5), (i + 1, 5)) H[u][v]['difficulty'] = 1 ''' return H
def chords (G): """Return a new graph that contains the edges that are the chords of G. The chords are all the edges that are not in a spanning three of G. Parameters ---------- G : graph A NetworkX graph. Returns ------- C : A new graph with the chords of G. T : The spanning tree from which C was calculated. """ if G.is_directed (): if G.is_multigraph (): T = nx.minimum_spanning_tree (nx.MultiGraph (G)) else: T = nx.minimum_spanning_tree (nx.Graph (G)) else: T = nx.minimum_spanning_tree (G) C = G.copy () edges = T.edges_iter () for e in edges: try: C.remove_edge (*e) except: C.remove_edge (*e[::-1]) #deg = C.degree_iter (); #for d in deg: # if d[1] == 0: # C.remove_node (d[0]) # Recreate T to get the same type as G T = G.copy () if G.is_multigraph (): edges = C.edges_iter (keys=True) else: edges = C.edges_iter () for e in edges: T.remove_edge (*e) return T,C
def test_min_edges(): # run mod_boruvka on graph with high mv max and long edge and make sure # that the result is an MST # of eachother (otherwise they should be connected) g = graph_high_mvmax_long_edge() subgraphs = UnionFind() rtree = Rtree() # build min span forest via mod_boruvka msf_g = mod_boruvka(g, subgraphs=subgraphs, rtree=rtree) # use networkx to build mst and compare coord_list = msf_g.coords.values() c = np.array(coord_list) all_dists = np.sqrt(((c[np.newaxis, :, :] - c[:, np.newaxis, :]) ** 2). sum(2)) complete_g = nx.Graph(all_dists) mst_g = nx.minimum_spanning_tree(complete_g) mst_edge_set = set([frozenset(e) for e in mst_g.edges()]) msf_edge_set = set([frozenset(e) for e in msf_g.edges()]) assert msf_edge_set == mst_edge_set
def order_contigs(self): """ Attempt to determine an initial starting order of contigs based only upon the cross terms (linking contacts) between each using graphical techniques. Beginning with a graph of contigs, where edges are weighted by contact weight, it is decomposed using Louvain modularity. Taking inverse edge weights, the shortest path of the minimum spanning tree of each subgraph is used to define an order. The subgraph orderings are then concatenated together to define a full ordering of the sample. Those with no edges, are included by appear in an indeterminate order. :return: order of contigs """ g = self.create_contig_graph() decomposed_subgraphs = decompose_graph(g) isolates = [] new_order = [] for gi in decomposed_subgraphs: if gi.order() > 1: inverse_edge_weights(gi) mst = nx.minimum_spanning_tree(gi) inverse_edge_weights(gi) new_order.extend(edgeiter_to_nodelist(dfs_weighted(mst))) else: isolates.extend(gi.nodes()) return new_order + isolates
def __init__(self, topology_file = "gates_topology.dot"): frenetic.App.__init__(self) logging.info("---> Reading Topology from "+topology_file) self.agraph = pgv.AGraph(topology_file) for sw in self.agraph.nodes(): dpid = str(sw.attr['dpid']) self.dpid_to_switch_dict[ dpid ] = str(sw) self.switch_to_dpid_dict[ str(sw) ] = dpid if sw.attr['core']: self.core_switches.add (str(sw)) # It's faster to denormalize this now logging.info("---> Remembering internal ports") self.switch_internal_ports = { sw: set([]) for sw in self.switch_to_dpid_dict } for e in self.agraph.edges(): source_sw = str(e[0]) dest_sw = str(e[1]) source_port = int(e.attr["src_port"]) dest_port = int(e.attr["dport"]) self.switch_internal_ports[ source_sw ].add( source_port ) if source_sw not in self.port_mappings: self.port_mappings[source_sw] = {} self.port_mappings[source_sw][dest_sw] = source_port self.switch_internal_ports[ dest_sw ].add( dest_port ) if dest_sw not in self.port_mappings: self.port_mappings[dest_sw] = {} self.port_mappings[dest_sw][source_sw] = dest_port logging.info("---> Calculating spanning tree") nxgraph = nx.from_agraph(self.agraph) self.nx_topo = nx.minimum_spanning_tree(nxgraph)
def rand_spanning_tree(N, rand_weights=False): '''Creats a random minimal tree on N nodes Args: N (int): Number of nodes Returns: A NxN numpy array representing the adjacency matrix of the graph. ''' # Create Random Graph A_rand = rand.rand(N, N) G_rand = nx.Graph() G_rand.add_nodes_from(xrange(N)) for i in xrange(N): for j in xrange(i+1): G_rand.add_edge(i, j, weight=A_rand[i, j]) # Find minimal spanning tree spanning_tree = nx.minimum_spanning_tree(G_rand) # Create adjacency matrix final_graph = nx.adj_matrix(spanning_tree).toarray() final_graph[final_graph > 0] = 1 # Randomize weights if requested if rand_weights: R = np.tril(rand.rand(N, N)) R = R + np.transpose(R) final_graph = final_graph * R return final_graph
def from_graph(self, G): self.G = G.copy() cliques = nx.clique.find_cliques(G) cliquegraph = nx.clique.make_max_clique_graph(G) clique_dict = {} for v, clq in zip(cliquegraph.nodes(), cliques): clique_dict[v] = clq for u, v, data in cliquegraph.edges(data=True): cliquegraph.remove_edge(u, v) sep = set(clique_dict[u]).intersection(set(clique_dict[v])) w = len(sep) cliquegraph.add_edge(u, v, nodes=sep, weight=-w) self.cliquetree = nx.minimum_spanning_tree(cliquegraph) for v in self.G: self.node_in_cliques[v] = set() for v in clique_dict: self.nodes_in_clique[v] = set() for node in clique_dict[v]: self.nodes_in_clique[v].add(node) self.node_in_cliques[node].add(v) self.uid = len(G) + 1 self.insertable = set() for v in self.G: self.update_insertable(v)
def visualize_mst(votes): min_spanning_tree = nx.minimum_spanning_tree(votes, weight = 'difference') #this makes sure draw_spring results are the same at each call np.random.seed(1) color = [min_spanning_tree.node[senator]['color'] for senator in min_spanning_tree.nodes()] #determine position of each node using a spring layout pos = nx.spring_layout(min_spanning_tree, iterations=200) plt.figure(figsize=(25,25)) #plot the edges nx.draw_networkx_edges(min_spanning_tree, pos, alpha = .5) #plot the nodes nx.draw_networkx_nodes(min_spanning_tree, pos, node_color=color) #draw the labels lbls = nx.draw_networkx_labels(min_spanning_tree, pos, alpha=5, font_size=8) #coordinate information is meaningless here, so let's remove it plt.xticks([]) plt.yticks([]) remove_border(left=False, bottom=False)
def test_mst_disconnected(self): G=nx.Graph() G.add_path([1,2]) G.add_path([10,20]) T=nx.minimum_spanning_tree(G) assert_equal(sorted(T.edges()),[(1, 2), (20, 10)]) assert_equal(sorted(T.nodes()),[1, 2, 10, 20])
def threshold_matrix(M, cost): ''' M is the full association matrix. cost is the percentage (0 to 100) at which you'd like to threshold threshold_matrix first creates a copy of the input matrix, then sets all diagonal values to 0. It next calculates the minimum spanning tree, and ensures that those edges are *always* included in the thresholded matrix. then sets all values below the appropriate percentile to 0 ''' # Make a copy of the matrix thr_M = np.copy(M) # Set all diagonal values to -999 thr_M[np.diag_indices_from(thr_M)] = -999 # Calculate minmum spanning tree G = nx.from_numpy_matrix(M) mst = nx.minimum_spanning_tree(G, weight='weight'*-1) # Calculate the threshold value thr = np.percentile(thr_M[np.triu_indices_from(thr_M, k=1)], cost) # Set all values that are less than the threshold to 0 thr_M[thr_M < thr] = 0 # Set all values that are not zero to 1 thr_M[thr_M != 0] = 1 return thr_M
def test_prim_minimum_spanning_tree_disconnected(self): G = nx.Graph() G.add_edge(1, 2) G.add_edge(10, 20) T = nx.minimum_spanning_tree(G, algorithm='prim') assert_equal(sorted(map(sorted, T.edges())), [[1, 2], [10, 20]]) assert_equal(sorted(T.nodes()), [1, 2, 10, 20])
def _retrieve_skycoords(V): coords_l = [] # Accessing the borders one by one. At this step, V_subgraphs contains a list of cycles # (i.e. one describing the external border of the MOC component and several describing the holes # found in the MOC component). V_subgraphs = nx.connected_component_subgraphs(V) for v in V_subgraphs: # Compute the MST for each cycle v = nx.convert_node_labels_to_integers(v) mst = nx.minimum_spanning_tree(v) # Get one end of the span tree by looping over its node and checking if the degree is one src = None for (node, deg) in mst.degree(): if deg == 1: src = node break # Get the unordered lon and lat ra = np.asarray(list(nx.get_node_attributes(v, 'ra').values())) dec = np.asarray(list(nx.get_node_attributes(v, 'dec').values())) coords = np.vstack((ra, dec)).T # Get the ordering from the MST ordering = np.asarray(list(nx.dfs_preorder_nodes(mst, src))) # Order the coords coords = coords[ordering] # Get a skycoord containing N coordinates computed from the Nx2 `coords` array coords = SkyCoord(coords, unit="deg") coords_l.append(coords) return coords_l
def chow_liu(data, mi_estimator=discrete_mutual_information): arguments = list(data.columns) g = nx.Graph() g.add_nodes_from(arguments) for src, dst in combinations(arguments, 2): g.add_edge(src, dst, weight=-mi_estimator(data[[src]], data[[dst]])) return DGM(nx.dfs_tree(nx.minimum_spanning_tree(g), arguments[0]))
def verify_solution(self, sol): """Verify the solution for MST against NetworkX's built-in MST solver. Only works if the solution is unique (=> edges have unique weights.)""" nx_sol = set(nx.minimum_spanning_tree(self.graph).edges()) return nx_sol == sol
def minimal_couplers(subgraphs, edges): '''Use the fewest possible number of couplers between and within subgraphs''' N = len(subgraphs) # map each subgraph to its minimum spanning tree subgraphs = [nx.minimum_spanning_tree(subgraph) for subgraph in subgraphs] # for each tree, find a root node and store the shortest path to each # node as a cost metric. costs = {} for tree in subgraphs: # identify the root path_lengths = nx.shortest_path_length(tree) root_weights = {k: sum(path_lengths[k].values()) for k in path_lengths} root = sort_dict(root_weights)[0] # assign path lengths as node costs for node in path_lengths[root]: costs[node] = path_lengths[root][node] # for each pair of subgraphs, keep the inter-subgraph edge with the # minimum total cost of its end nodes nodes = sorted(subgraphs.keys()) for i in xrange(N-1): q1 = nodes[i] for j in xrange(i+1, N): q2 = nodes[j] edge_costs = {e: costs[e[0]]+costs[e[1]] for e in edges[(q1, q2)]} edges[(q1, q2)] = sort_dict(edge_costs)[0] return subgraphs, edges
def main(): G = nx.Graph() # G eh um grafo direcionado # gera o grafo apartir de suas arestas G.add_weighted_edges_from([(1, 2, 13.0), (1, 3, 8.0), (1, 5, 33.0), (2, 3, 22.0), (2, 4, 17.0), (2, 6, 20.0), (3, 5, 18.0), (4, 5, 10.0), (4, 6, 5.0), (5, 6, 15.0)]) desenhaGrafo(G, "grafo-c.png") T = nx.minimum_spanning_tree(G) desenhaGrafo(T, "arv-min-c.png")
def create_spanning_tree(self, username="******", password="******"): T = nx.minimum_spanning_tree(self.graph) used_links = [] disabled_ports = {} for link in self.links: used = False src, dst = hex(link.src.dpid), hex(link.dst.dpid) for edge in T.edges(): if (src,dst) == edge or (dst,src) == edge: used = True if not used: if link.src.dpid not in disabled_ports: disabled_ports[link.src.dpid] = [] disabled_ports[link.src.dpid].append(link.src.port_no) for dp in disabled_ports: ip = self.dpid_to_ip[hex(dp)] print("logging into " + ip) s = spawn("ssh %s@%s" %(username, ip)) s.expect(".*assword") s.sendline(password) s.expect("Press any key to continue") s.sendline("\r") s.sendline("config") for n in disabled_ports[dp]: #print("Enabling port " + `n` + " on " + self.dpid_to_ip[dp]) s.sendline("interface ethernet " + `n` + " disable") s.sendline("save") s.sendline("logo") s.sendline("y") print("CREATED SPANNING TREE")
def find_min_spanning_tree(A): """ Input: A : Adjecency matrix in scipy.sparse format. Output: T : Minimum spanning tree. run_time : Total runtime to find minimum spanning tree """ # Record start time. start = time.time() # Check if graph is pre-processed, if yes then don't process it again. if os.path.exists('../Data/dcg_graph.json'): with open('../Data/dcg_graph.json') as data: d = json.load(data) G = json_graph.node_link_graph(d) # If graph is not preprocessed then convert it to a Graph and save it to a JSON file. else: G = from_scipy_sparse_matrix(A) data = json_graph.node_link_data(G) with open('../Data/dcg_graph.json', 'w') as outfile: json.dump(data, outfile) # Find MST. T = minimum_spanning_tree(G) #Record total Runtime run_time = time.time()-start return T, run_time
def compute_initial_guess(num_nodes, relative_rotations, relative_edges): graph = nx.Graph() graph.add_nodes_from(range(num_nodes)) for (ind, edge) in enumerate(relative_edges): (n, theta) = so3.matrix_to_axis_angle(relative_rotations[ind]) graph.add_edge(edge[0], edge[1], weight=theta, index=ind) tree = nx.minimum_spanning_tree(graph) global_rotation = [] for i in range(num_nodes): global_rotation.append(numpy.identity(3)) edges = nx.dfs_edges(tree, 0) for edge in edges: ind = graph[edge[0]][edge[1]]["index"] mat = relative_rotations[ind] if relative_edges[ind][0] == edge[0] and relative_edges[ind][1] == edge[1]: pass elif relative_edges[ind][0] == edge[1] and relative_edges[ind][1] == edge[0]: mat = mat.transpose() else: logging.error("GRAPH ERROR") global_rotation[edge[1]] = mat.dot(global_rotation[edge[0]]) return global_rotation
def SpanningFeatures(self): closed=[] count=0 tempG=nx.Graph() for node in self.gr.nodes(): try: tempG.add_node(node) except: pass for source in self.gr.nodes(): for target in self.gr.nodes(): if source!=target and [source,target] not in closed: path=nx.shortest_path(self.G, source, target) self.shortestPath.append(path) closed.append([source,target]) closed.append([target,source]) tempG.add_edge(source, target, len(path)-1) for i in range(0,len(path)): try: self.Gs.add_node(path[i]) if i!=len(path): self.Gs.add_edge(path[i],path[i+1]) except: pass self.SpanG=nx.minimum_spanning_tree(self.Gs) #Compute Features # self.gS_avgMSTWeight=float(weight)/count return
def test_kruskal_minimum_spanning_tree_disconnected(self): G = nx.Graph() G.add_path([1, 2]) G.add_path([10, 20]) T = nx.minimum_spanning_tree(G, algorithm="kruskal") assert_equal(sorted(map(sorted, T.edges())), [[1, 2], [10, 20]]) assert_equal(sorted(T.nodes()), [1, 2, 10, 20])
def hidden_image_maze(fname, style='jittery'): """ Supported styles: jittery, smooth, sketch""" H = models.image_grid_graph(fname) # get a subgraph of the grid corresponding to edges between black pixels G = H.base_graph # for every edge in H, make the corresponding edge in H have weight 0 for u,v in H.edges(): G[u][v]['weight'] = 0 # find a minimum spanning tree on G (which will include the maze solution) T = nx.minimum_spanning_tree(G) # find the maze solution in the spanning tree P = models.my_path_graph(nx.shortest_path(T, (0,0), max(H.nodes()))) # generate the dual graph, including edges not crossed by the spanning tree D = models.dual_grid(G, T) views.add_maze_boundary(D, max(G.nodes())) views.make_entry_and_exit(D, max(G.nodes())) pos = views.layout_maze(D, fast=(style == 'jittery')) views.plot_maze(D, pos, P, G.pos) # make it stylish if requested if style == 'sketch': plt.figure(1) D_pos = views.layout_maze(D, fast=True) nx.draw_networkx_edges(D, D_pos, width=1, edge_color='k') D_pos = views.layout_maze(D, fast=True) nx.draw_networkx_edges(D, D_pos, width=1, edge_color='k') # show the pixel colors loaded from the file, for "debugging" plt.figure(2) for v in G: plt.plot([G.pos[v][0]], [G.pos[v][1]], '.', alpha=.5, color=G.node[v]['color'])
def test_local_threshold(self): self.a.import_adj_file(self.MODIF_FILE, delimiter=",") self.a.apply_threshold() temp = nx.minimum_spanning_tree(self.a.G) # Normal self.a.local_thresholding() self.assertEqual(temp.number_of_edges(), self.a.G.number_of_edges()) self.assertTrue(nx.is_connected(self.a.G)) # totalEdges # normal totalEdges self.a.local_thresholding(threshold_type="totalEdges", value=20) self.assertEqual(self.a.G.number_of_edges(), 20) self.assertTrue(nx.is_connected(self.a.G)) # short totalEdges self.a.local_thresholding(threshold_type="totalEdges", value=1) self.assertEqual(self.a.G.number_of_edges(), temp.number_of_edges()) self.assertTrue(nx.is_connected(self.a.G)) # bigger totalEdges self.a.local_thresholding(threshold_type="totalEdges", value=500000) self.assertTrue(nx.is_connected(self.a.G)) # edgePC self.a.apply_threshold() all_edges = self.a.G.number_of_edges() self.a.local_thresholding(threshold_type="edgePC", value=100) self.assertEqual(self.a.G.number_of_edges(), all_edges) self.assertTrue(nx.is_connected(self.a.G)) self.a.local_thresholding(threshold_type="edgePC", value=20) self.assertEqual(self.a.G.number_of_edges(), int(0.2 * all_edges)) self.assertTrue(nx.is_connected(self.a.G))
def plotGraph(g,filename): """ Creates a plot of the graph passed in after transforming the full graph into a minimum spanning tree. The MST of a graph like this has some significance (but also some locally strange paths) and is nice to look add due to the reduced edge density. """ plt.figure(figsize=(15, 10)) np.random.seed(5) mst = nx.minimum_spanning_tree(g, weight='difference') pos = nx.spring_layout(mst, iterations=900, k=.008, weight='difference') mst_edges = list(nx.minimum_spanning_edges(g, weight='difference')) degs = mst.degree() nodesize = [degs[v]*80 for v in mst] nl = mst.nodes() nx.draw_networkx_edges(g, pos, edgelist=mst_edges, alpha=.2) nx.draw_networkx_nodes(g, pos, nodelist = nl, node_size=nodesize, node_color=nodesize) nx.draw_networkx_labels(g, pos, font_color='k', font_size=7) plt.title("Artist Network", fontsize=18) plt.xticks([]) plt.yticks([]) plt.savefig(filename)
def mst_of_g(g,terminals,verbose=False,weighted=True,cutoff=7,return_gL=False,bidir=False): STARTTIME=time.time() if verbose: logger.info("Starting MST construction") sys.stdout.flush() STARTTIME=time.time() gLedges=[] shortest_network=model.AnnotatedGraph() for i in range(len(terminals)): src=terminals[i] if src not in g: if verbose: logger.info("Node %s not in g"%(src)) continue if weighted: costs,paths=nx.single_source_dijkstra(g, src, weight='weight',cutoff=cutoff) else: paths=nx.single_source_shortest_path(g,src,cutoff=cutoff) costs=dict([(k,len(v)) for k,v in paths.items()]) if bidir: span=range(len(terminals)) else: span=range(i+1,len(terminals)) for j in span: if j==i: continue tgt=terminals[j] if tgt not in paths: if verbose: logger.info("no paths between %s and %s"%(src,tgt)) continue shortest_network.add_path(paths[tgt]) gLedges.append((src,tgt,{'weight':costs[tgt],'path':paths[tgt]})) if verbose: logger.info("Done %s. Still %d to go"%(src,len(terminals)-i)) sys.stdout.flush() if verbose: logger.info("Computed Metric closure in %f seconds"%(time.time() - STARTTIME)) STARTTIME=time.time() sys.stdout.flush() gL=nx.Graph() gL.add_edges_from(gLedges) # Min spanning Tree tL=nx.minimum_spanning_tree(gL) if verbose: logger.info("Computed Min spanning tree in %f seconds"%(time.time() - STARTTIME)) STARTTIME=time.time() sys.stdout.flush() mst=model.AnnotatedGraph() for e in tL.edges(data=True): mst.add_path(e[2]["path"]) copy_attributes_from_g(mst,g) if return_gL: return mst,gL,shortest_network else: return mst
def set_pred_relations(self, candids, relations): #NOTE: digraph doesn't play well with MST algorithms in networkx self.G = nx.MultiGraph() pred_rels = [] for candid, subrels in zip(candids, relations): dict_subrels = dict(subrels) if 'X' in dict_subrels.keys(): idx = self.stroke_groups.index(candid[0]) idx2 = self.stroke_groups.index(candid[1]) if idx2-idx > 1: print("X probability: " + str(dict_subrels['X']) + ", Idx dist: " + str(idx2-idx)) if dict_subrels['X'] > 1: continue for rel, w in subrels: if rel.startswith("A"): if w < 0.4: # Heuristic, but increases accuracy by a lot in practise continue if rel != 'X': # Don't forget to do 1-w (since we're looking for minimum) self.G.add_edge(candid[0], candid[1], weight=(1-w), rel=rel.tolist()) T = nx.minimum_spanning_tree(self.G) self.mst_edges = set(T.edges()) # optimization self.mst = T
def mst_weight( taxa, patterns, matrices, characters ): """ Calculate minimal weight of unsorted trees. """ G = nx.Graph() for i,tA in enumerate(taxa): for j,tB in enumerate(taxa): if i < j: all_scores = [] for pt,mt,cs in zip(patterns, matrices, characters): ptA = pt[i] ptB = pt[j] scores = [] for pA in ptA: idxA = cs.index(pA) for pB in ptB: idxB = cs.index(pB) score = mt[idxA][idxB] scores += [score] all_scores += [min(scores)] G.add_edge(tA, tB, weight=sum(all_scores)) g = nx.minimum_spanning_tree(G) return sum([w[2]['weight'] for w in g.edges(data=True)]) / 2
def test_wrong_value(self): nx.minimum_spanning_tree(self.G, algorithm='random')
def test_disconnected(self): G = nx.Graph([(0, 1, dict(weight=1)), (2, 3, dict(weight=2))]) T = nx.minimum_spanning_tree(G, algorithm=self.algo) assert nodes_equal(list(T), list(range(4))) assert edges_equal(list(T.edges()), [(0, 1), (2, 3)])
nx.read_weighted_edgelist(f, create_using=Y, nodetype=str, encoding="utf-8") pos = nx.spring_layout(Y) edge_labels = {(u, v): d['weight'] for u, v, d in Y.edges(data=True)} nx.draw_networkx_nodes(Y, pos, node_size=700) nx.draw_networkx_edges(Y, pos) nx.draw_networkx_labels(Y, pos) nx.draw_networkx_edge_labels(Y, pos, edge_labels=edge_labels) plt.show() pylab.show() if (menu == 11): T = nx.minimum_spanning_tree(Y, weight="weight") os.system("clear") print("Algoritmo A*\n") source = input("Digite o começo :").upper() target = input("Digite o final :").upper() print("Resultado (BFS) : ", list(nx.bfs_edges(Y, source))) print("Resultado (DFS) : ", list(nx.dfs_edges(Y, source))) print("Resultado (Algoritmo A*) : ", list(nx.astar_path(Y, source, target, heuristic=None)), "\n") print("Resultado (Algoritmo Dijkstra) : ", list(nx.dijkstra_path(Y, source, target, weight="weight")), "\n") print( "Caminho (Algoritmo Dijkstra) : ", str(nx.dijkstra_predecessor_and_distance(Y, source)).replace( ",", "\n").replace("(", " ").replace("{", "\n ").replace(
def local_thresholding_prop(conn_matrix, thr): """ Threshold the adjacency matrix by building from the minimum spanning tree (MST) and adding successive N-nearest neighbour degree graphs to achieve target proportional threshold. Parameters ---------- conn_matrix : array Weighted NxN matrix. thr : float A proportional threshold, between 0 and 1, to achieve through local thresholding. Returns ------- conn_matrix_thr : array Weighted local-thresholding using MST, NxN matrix. References ---------- .. [1] Alexander-Bloch, A. F., Gogtay, N., Meunier, D., Birn, R., Clasen, L., Lalonde, F., … Bullmore, E. T. (2010). Disrupted modularity and local connectivity of brain functional networks in childhood-onset schizophrenia. Frontiers in Systems Neuroscience. https://doi.org/10.3389/fnsys.2010.00147 .. [2] Tewarie, P., van Dellen, E., Hillebrand, A., & Stam, C. J. (2015). The minimum spanning tree: An unbiased method for brain network analysis. NeuroImage. https://doi.org/10.1016/j.neuroimage.2014.10.015 """ from pynets.core import thresholding fail_tol = 100 conn_matrix = np.nan_to_num(conn_matrix) if np.sum(conn_matrix) == 0: print(UserWarning('Empty connectivity matrix detected!')) return conn_matrix G = nx.from_numpy_matrix(np.abs(conn_matrix)) maximum_edges = G.number_of_edges() Gcc = sorted(nx.connected_components(G), key=len, reverse=True) G0 = G.subgraph(Gcc[0]) min_t = nx.minimum_spanning_tree(thresholding.weight_to_distance(G0), weight="distance") min_t.add_nodes_from(G.nodes()) len_edges = min_t.number_of_edges() upper_values = np.triu_indices(np.shape(conn_matrix)[0], k=1) weights = np.array(conn_matrix[upper_values]) edgenum = int(float(thr) * float(len(weights[~np.isnan(weights)]))) if len_edges > edgenum: print(f"Warning: The minimum spanning tree already has: {len_edges} " f"edges, select more edges. Local Threshold " f"will be applied by just retaining the Minimum Spanning Tree") conn_matrix_thr = nx.to_numpy_array(G) return conn_matrix_thr k = 1 len_edge_list = [] while (len_edges < edgenum and k <= np.shape(conn_matrix)[0] and (len(len_edge_list[-fail_tol:]) - len(set(len_edge_list[-fail_tol:]))) < (fail_tol - 1)) and nx.is_connected(min_t) is True: # print(k) # print(len_edges) len_edge_list.append(len_edges) # Create nearest neighbour graph nng = thresholding.knn(conn_matrix, k) # Remove edges from the NNG that exist already in the new graph/MST nng.remove_edges_from(min_t.edges()) number_before = nng.number_of_edges() if nng.number_of_edges() == 0 and number_before >= maximum_edges: break # Add weights to NNG for e in nng.edges(): nng.edges[e[0], e[1]]["weight"] = float(conn_matrix[e[0], e[1]]) # Obtain list of edges from the NNG in order of weight edge_list = sorted(nng.edges(data=True), key=lambda t: t[2]["weight"], reverse=True) # Add edges in order of connectivity strength for edge in edge_list: # print(f"Adding edge to mst: {edge}") min_t.add_edges_from([edge]) len_edges = min_t.number_of_edges() if len_edges >= edgenum: # print(len_edges) break k += 1 conn_matrix_bin = thresholding.binarize( nx.to_numpy_array(min_t, nodelist=sorted(G.nodes()), dtype=np.float64)) try: conn_matrix_thr = np.multiply(conn_matrix, conn_matrix_bin) return conn_matrix_thr except ValueError as e: print( e, f"MST thresholding failed. Check raw graph output manually " f"for debugging.")
pieces.append(g.nodes()) num_invalid += 1 return num_invalid def buildSubGraph(self, nodes, g): graph = nx.Graph() for n in nodes: graph.add_node(n) for k in nodes: if n < k and g.has_edge(n, k): graph.add_edge(n, k, weight=g[n][k]['weight']) return graph if __name__ == '__main__': from graph_loader import GraphGenerator as GG test_file = "input2.csv" g = GG(test_file) mst = nx.minimum_spanning_tree(g.getGraph()) l = 3 u = 15 cutter = MSTCutter(l, u) num_invalid, pieces, weights = cutter.cutFromTSP(mst, g.getGraph()) print("# of invalid pieces: " + str(num_invalid)) for p in pieces: cor = [g.getGraph().node[n]['position'] for n in p] print(cor) print(weights) #cutter.findCuts(mst) #print(cutter.comp)
def scaffold_via_wells_mst(g): # initialize internal contig labels (used for downstream qc) for v in g.vertices: v.initialize_contigs() # construct well-based scaffold graph in networkx format nxg = g.nxgraph # nxg = _construct_graph(g) # weigh edges according to how many wells they are sharing: _reweigh_edges(nxg, g, type_='wells') # find the maxinum spanning forest msf = nx.minimum_spanning_tree(nxg) # keep simplifying the graph until the msf has no branching nodes: n_iter = 1 while _has_branches(msf) and n_iter <= 10: print 'MSF simplificaiton iteration %d' % n_iter # print '...', max(msf.degree(weight=None).values()) # print '...', sorted(msf.degree(weight=None).iteritems(), key=lambda x: x[1], reverse=True)[:10] # vg = sorted(msf.degree(weight=None).iteritems(), key=lambda x: x[1], reverse=True)[0][0] # v = g.vertex_from_id(vg[0]) # N = [n.id for n in g.vertices if v in n.neighbors] # print ',,,', N # print msf.neighbors(v) # remove edges of g not selected in forest MSF E = [e for e in g.edges] n_removed = 0 for e in E: e_nx = ((e.v1.id, e.connection[e.v1]), (e.v2.id, e.connection[e.v2])) if not msf.has_edge(*e_nx): g.remove_edge(e) n_removed += 1 print '%d edges not in MST removed.' % n_removed # contract edges n_contracted = contract_edges(g, store_ordering=True) print '%d edges contracted.' % n_contracted # now we are going to compute the trunk # get the networkx graph again nxg = g.nxgraph _reweigh_edges(nxg, g, type_='wells') # FIXME: do this once # recompute the maxinum spanning forest msf = nx.minimum_spanning_tree(g.nxgraph) # for each tree in forest: trunk = list() for mst in nx.connected_component_subgraphs(msf): # add to mst trunk if len(mst) >= 4: trunk.extend(_mst_trunk(mst, g)) # remove edges not in trunk: E = [e for e in g.edges] print trunk trunk_v = set([v[0] for v in trunk]) n_removed = 0 for e in E: v1_id, v2_id = e.v1.id, e.v2.id if v1_id not in trunk_v or v2_id not in trunk_v: g.remove_edge(e) n_removed += 1 if n_iter >= 4: keyboard() print '%d edges not in trunk removed.' % n_removed # contract one last time n_contracted = contract_edges(g, store_ordering=True) print '%d edges contracted.' % n_contracted # construct well-based scaffold graph in networkx format nxg = g.nxgraph # nxg = _construct_graph(g) # weigh edges according to how many wells they are sharing: _reweigh_edges(nxg, g, type_='wells') # find the maxinum spanning forest msf = nx.minimum_spanning_tree(nxg) n_iter += 1
def get_weights(closes, robust, ddev, cats, graph_path): ret = np.log(closes / closes.shift()).fillna(0.0) corr = cov2cor(cov_robust(ret) if robust else ret.cov()) dist = distance(corr) link = linkage(dist, 'ward') quasiIdx = np.array(dendrogram(link)['leaves']) clusters = quasiIdx # acceleration = np.diff(link[:, 2], 2)[::-1] # # ck = np.where(acceleration >= np.mean(acceleration))[0][-1] + 2 # ck = acceleration.argmax() + 2 # cluster_idx = fcluster(link, ck, criterion='maxclust') - 1 # clusters = pd.Series() # cidx = [] # for cn in np.unique(cluster_idx): # idx = np.where(cluster_idx == cn)[0] # cidx = np.where(cluster_idx ==cn)[0][0] # clusters.loc[cidx] = quasiIdx[idx] # clusters = clusters.sort_index().values weights = getRecBipart(closes, ddev, clusters) weights.index = closes.columns[weights.index] try: if cats is not None: ccats = cats[corr.columns].copy() widxed = weights.loc[corr.index] names = [s.replace(' ', '\n') for s in corr.columns] corr.index = names corr.columns = names corr = ((corr - corr.min()) / corr.max()).round(2) mst = nx.minimum_spanning_tree(nx.from_pandas_adjacency(corr, create_using=nx.MultiGraph())) legends = None if cats is not None: ccats.columns = names ccats = ccats.T else: ccats = pd.DataFrame({ 'Colors': list('b' * len(names)), 'Shapes': list('o' * len(names)) }, index=names) ccats['Sizes'] = pd.DataFrame({i.replace(' ', '\n'): w for i, w in zip(weights.index, weights)}, index=['Sizes']).T fs = np.min((20, len(weights))) fig = plt.figure(figsize=(fs + 5, fs + 2), dpi=80) cf = fig.add_subplot(111) draw_net(mst, ccats, cf) if cats is not None: leg = [] sdict = dict() adict = dict() for row in ccats.iterrows(): row = row[1] sdict[row.Strategy] = row.Colors adict[row.Asset] = row.Shapes for k, v in sdict.items(): leg.append(mpatches.Patch(color=v, label=k)) leg1 = fig.legend(handles=leg, title='Strategy', loc=7, fontsize='xx-large') # leg = [] # for k, v in adict.items(): # leg.append(mlines.Line2D([], [], color='black', marker=v, linestyle='None', # markersize=10, label=k)) # leg2 = fig.legend(handles=leg, title='Asset', loc=1) fig.savefig(join(graph_path, str(closes.index[-1].date()) + '.png')) plt.close('all') except Exception as e: pass return weights
def path(pts, covars, weights, vertical=False): ''' This method returns the path (as a list of indices) through pts which maximizes the minimum probability over the links of the paths. This function takes three arguments as inputs: pts - The points of interest as a (2,N) array. covars - The covariance matrix for each point as a (2,2,N) array. weights - The weights to assign to each point. Currently not used. vertical - Default is False. If False uses the min and max points in x (the first dimension) as starting and ending points. If True uses the min and max in y (the second dimension). ''' x, y = pts dists = np.zeros((len(x), len(x))) for i in range(len(x)): for j in range(len(y)): if i != j: dists[i, j] = bhattacharyyaDistance(pts[:, i], pts[:, j], covars[i], covars[j]) # dists[i,j] += 0.5*np.log(weights[i]) # dists[i,j] += 0.5*np.log(weights[j]) keepIndices = [] for i in range(len(x)): keep = False for j in range(len(y)): if dists[i, j] < 5: keep = True if keep: keepIndices.append(i) x = x[keepIndices] y = y[keepIndices] if vertical: start = np.argmin(y) stop = np.argmax(y) else: start = np.argmin(x) stop = np.argmax(x) G = nx.Graph() for i in range(len(x)): G.add_node(i) for i in range(len(x)): for j in range(len(y)): if i != j: G.add_edge(i, j, weight=np.exp(dists[i][j])) t = nx.minimum_spanning_tree(G, weight='weight') pth = nx.shortest_path(t, start, stop) return pth
def get_shortest_path_recommendation_set(db, user, target_papers): G = db.get_citation_network() source_papers = [p['paper_id'] for p in db.list_papers_read(user)] if not source_papers: return [] # Create contracted graph G_contracted = G for i in range(len(source_papers)): for j in range(i): node1 = source_papers[i] node2 = source_papers[j] if not G.has_edge(node1, node2): G_contracted.add_edge(node1, node2) G_contracted[node1][node2]['weight'] = 0.0 # Construct metric closure G_metric_closure = nx.Graph() nodes_list = target_papers for i in range(len(nodes_list)): node1 = nodes_list[i] G_metric_closure.add_node(node1) for j in range(i): node2 = nodes_list[j] G_metric_closure.add_node(node2) this_weight = nx.shortest_path_length(G_contracted, node1, node2) path_list = nx.shortest_path(G_contracted, node1, node2) G_metric_closure.add_edge(node1, node2, weight=this_weight, path_list=path_list) # Add known set known_set_label = 'known' G_metric_closure.add_node(known_set_label) for target_paper in target_papers: G_metric_closure.add_node(target_paper) (this_weight, closest_source) = get_shortest_path_from_set(G_contracted, source_papers, target_paper) path_list = nx.shortest_path(G, closest_source, target_paper) G_metric_closure.add_edge(known_set_label, target_paper, weight=this_weight, path_list=path_list) # Get minimal spanning tree and traverse it from known min_tree = nx.minimum_spanning_tree(G_metric_closure) papers_in_path_id = [] next_set = set() next_set.add(known_set_label) to_visit = set(min_tree.nodes()) while len(to_visit) > 0: current_set = next_set next_set = set() for node in current_set: to_visit.remove(node) for neighbour in min_tree.neighbors(node): if neighbour in to_visit: next_set.add(neighbour) path_to_add = G_metric_closure[node][neighbour][ 'path_list'] papers_in_path_id = add_paper_to_path( papers_in_path_id, path_to_add, source_papers) papers_in_path_id.reverse() papers_in_path = db.list_papers_list(papers_in_path_id) additional_nodes = set(G.nodes()) for paper in papers_in_path_id: additional_nodes.remove(paper) return list(papers_in_path)
def test_kruskal_minimum_spanning_tree_isolate(self): G = nx.Graph() G.add_nodes_from([1, 2]) T = nx.minimum_spanning_tree(G, algorithm='kruskal') assert_equal(sorted(T.nodes()), [1, 2]) assert_equal(sorted(T.edges()), [])
def test_kruskal_minimum_spanning_tree(self): T = nx.minimum_spanning_tree(self.G, algorithm='kruskal') assert_equal(sorted(T.edges(data=True)), self.minimum_spanning_edgelist)
def unfold(mesh): # Calculate the number of surfaces, edges and corners, as well as the length of the longest shortest edge numEdges = mesh.n_edges() numVertices = mesh.n_vertices() numFaces = mesh.n_faces() # Generate the dual graph of the mesh and calculate the weights dualGraph = nx.Graph() # For the weights: calculate the longest and shortest edge of the triangle minLength = 1000 maxLength = 0 for edge in mesh.edges(): edgelength = mesh.calc_edge_length(edge) if edgelength < minLength: minLength = edgelength if edgelength > maxLength: maxLength = edgelength # All edges in the net for edge in mesh.edges(): # The two sides adjacent to the edge face1 = mesh.face_handle(mesh.halfedge_handle(edge, 0)) face2 = mesh.face_handle(mesh.halfedge_handle(edge, 1)) # The weight edgeweight = 1.0 - (mesh.calc_edge_length(edge) - minLength) / (maxLength - minLength) # Calculate the centres of the pages (only necessary for visualisation) center1 = (0, 0) for vertex in mesh.fv(face1): center1 = center1 + 0.3333333333333333 * np.array( [mesh.point(vertex)[0], mesh.point(vertex)[2]]) center2 = (0, 0) for vertex in mesh.fv(face2): center2 = center2 + 0.3333333333333333 * np.array( [mesh.point(vertex)[0], mesh.point(vertex)[2]]) # Add the new nodes and edge to the dual graph dualGraph.add_node(face1.idx(), pos=center1) dualGraph.add_node(face2.idx(), pos=center2) dualGraph.add_edge(face1.idx(), face2.idx(), idx=edge.idx(), weight=edgeweight) # Calculate the minimum spanning tree spanningTree = nx.minimum_spanning_tree(dualGraph) # Unfold the tree fullUnfolding = unfoldSpanningTree(mesh, spanningTree) [unfoldedMesh, isFoldingEdge, connections, glueNumber, foldingDirection] = fullUnfolding # Resolve the intersections # Find all intersections epsilon = 1E-12 # Accuracy faceIntersections = [] for face1 in unfoldedMesh.faces(): for face2 in unfoldedMesh.faces(): if face2.idx() < face1.idx( ): # so that we do not double check the couples # Get the triangle faces triangle1 = [] triangle2 = [] for halfedge in unfoldedMesh.fh(face1): triangle1.append( unfoldedMesh.point( unfoldedMesh.from_vertex_handle(halfedge))) for halfedge in unfoldedMesh.fh(face2): triangle2.append( unfoldedMesh.point( unfoldedMesh.from_vertex_handle(halfedge))) if triangleIntersection(triangle1, triangle2, epsilon): faceIntersections.append( [connections[face1.idx()], connections[face2.idx()]]) # Find the paths # We find the minimum number of cuts to resolve any self-intersection # Search all paths between overlapping triangles paths = [] for intersection in faceIntersections: paths.append( nx.algorithms.shortest_paths.shortest_path(spanningTree, source=intersection[0], target=intersection[1])) # Find all edges in all threads edgepaths = [] for path in paths: edgepath = [] for i in range(len(path) - 1): edgepath.append((path[i], path[i + 1])) edgepaths.append(edgepath) # List of all edges in all paths allEdgesInPaths = list(set().union(*edgepaths)) # Count how often each edge occurs numEdgesInPaths = [] for edge in allEdgesInPaths: num = 0 for path in edgepaths: if edge in path: num = num + 1 numEdgesInPaths.append(num) S = [] C = [] while len(C) != len(paths): # Calculate the weights to decide which edge to cut cutWeights = np.empty(len(allEdgesInPaths)) for i in range(len(allEdgesInPaths)): currentEdge = allEdgesInPaths[i] # Count how many of the paths in which the edge occurs have already been cut numInC = 0 for path in C: if currentEdge in path: numInC = numInC + 1 # Determine the weight if (numEdgesInPaths[i] - numInC) > 0: cutWeights[i] = 1 / (numEdgesInPaths[i] - numInC) else: cutWeights[i] = 1000 # 1000 = infinite # Find the edge with the least weight minimalIndex = np.argmin(cutWeights) S.append(allEdgesInPaths[minimalIndex]) # Find all paths where the edge occurs and add them to C for path in edgepaths: if allEdgesInPaths[minimalIndex] in path and not path in C: C.append(path) # Now we remove the cut edges from the minimum spanning tree spanningTree.remove_edges_from(S) # Find the cohesive components connectedComponents = nx.algorithms.components.connected_components( spanningTree) connectedComponentList = list(connectedComponents) # Unfolding of the components unfoldings = [] for component in connectedComponentList: unfoldings.append( unfoldSpanningTree(mesh, spanningTree.subgraph(component))) return fullUnfolding, unfoldings
def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--infile', type=str, default=None) parser.add_argument('-e', '--edgefile', type=str, default=None) parser.add_argument('-p', '--hapfile', type=str, default=None) parser.add_argument('--strict', default=False, action='store_true') args = parser.parse_args() Gall = nx.Graph() Gloc = nx.Graph() infile = args.infile usage_denom = 1024 * 1000 print('before') print( 'Memory usage info (Mb):\t' + str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom)) #load allele graph reads = defaultdict(dict) with gzip.open(infile, 'rb') as fp: line = fp.readline().strip() while line: [allele, read, readpos, strand] = re.split('[\t]', line) [locus, ref, alt, refalt] = re.split('_', allele) readpos = int(readpos) reads[read][allele] = readpos if not locus in Gloc.nodes: add_locus_node(Gloc, locus, allele) if not allele in Gall.nodes: add_allele_node(Gall, allele) for r1 in reads[read].keys(): if r1 != allele: edge0 = [allele, r1] if not edge0 in Gall.edges: Gall.add_edge(allele, r1) Gall.edges[edge0]['count'] = 0 Gall.edges[edge0]['dist'] = 0 Gall.edges[edge0]['dist_sq'] = 0 Gall.edges[edge0]['count'] = Gall.edges[edge0]['count'] + 1 dist = abs(reads[read][r1] - reads[read][allele]) Gall.edges[edge0][ 'dist'] = Gall.edges[edge0]['dist'] + dist Gall.edges[edge0][ 'dist_sq'] = Gall.edges[edge0]['dist_sq'] + dist * dist line = fp.readline().strip() print('allele graph') print( 'Memory usage info (Mb):\t' + str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom)) #load locus graph for edge in Gall.edges: mndist = Gall.edges[edge]['dist'] * 1.0 / Gall.edges[edge]['count'] Gall.edges[edge]['mean_dist'] = mndist Gall.edges[edge]['sd_dist'] = math.sqrt( Gall.edges[edge]['dist_sq'] * 1.0 / Gall.edges[edge]['count'] - mndist * mndist) node1 = Gall.nodes[edge[0]] node2 = Gall.nodes[edge[1]] add_counts_edge(Gloc, node1, node2, Gall.edges[edge]['count']) #import code #code.interact(local=locals()) print('locus graph') print( 'Memory usage info (Mb):\t' + str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom)) #create strict locus graph and prune edges Gconf = Gloc.copy() bad_edges = [] edgelist = list(Gloc.edges) for edge in edgelist: curedge = Gloc.edges[edge] [aa, bb, cc, dd] = [ curedge['r', 'r'], curedge['r', 'a'], curedge['a', 'r'], curedge['a', 'a'] ] if (aa + bb == 0 and cc * dd > 0) or (cc + dd == 0 and aa * bb > 0) or ( aa + cc == 0 and bb * dd > 0) or (bb + dd == 0 and aa * cc > 0): bad_edges.append(edge) remove_allele_edges(Gloc, Gall, edge) Gloc.remove_edge(edge[0], edge[1]) Gconf.remove_edge(edge[0], edge[1]) else: oddsratio = (aa + 1.0) * (dd + 1.0) / (bb + 1.0) / (cc + 1.0) if (oddsratio > 2 or oddsratio < 0.5) and (aa * dd > 0 or bb * cc > 0): pass else: Gconf.remove_edge(edge[0], edge[1]) print('strict locus graph') print( 'Memory usage info (Mb):\t' + str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom)) with open(args.edgefile, 'w') as outf, open(args.hapfile, 'w') as outh: comp_loose = list( Gloc.subgraph(c).copy() for c in nx.connected_components(Gloc)) #iterate over connected components of loose locus graph, phase and merge if possible for ii in range(len(comp_loose)): gg_loose = comp_loose[ii].copy() gg_conf = Gconf.subgraph(list(gg_loose.nodes())).copy() gg = list( gg_conf.subgraph(cc).copy() for cc in nx.connected_components(gg_conf)) for jj in range(len(gg)): ggsub1 = gg[jj] [h1, h2] = phase_conf_component(ggsub1) print( str(ii) + '\t' + str(jj) + '\t' + str(gg_conf.number_of_nodes()) + '\t' + str(ggsub1.number_of_nodes()) + '\t' + str(len(h1))) #if len(gg)>1: # merge_all(gg_conf, ii, Gloc) #gg=list(gg_conf.subgraph(cc).copy() for cc in nx.connected_components(gg_conf)) #if len(gg)>1 and args.strict==False: # merge_all(gg_conf, ii, Gloc, strict=False) #list of connected graphs, post-merging gg = list( gg_conf.subgraph(cc).copy() for cc in nx.connected_components(gg_conf)) for jj in range(len(gg)): ggsub1 = gg[jj] haps = phase_conf_component(ggsub1, strict=False) print( str(ii) + '\t' + str(jj) + '\t' + str(gg_conf.number_of_nodes()) + '\t' + str(ggsub1.number_of_nodes()) + '\t' + str(len(haps[0]))) #subgraphs of allele graph corresponding to allele on each haplotype for hapid in range(2): Gallsub = Gall.subgraph(haps[hapid]).copy() minforest = nx.minimum_spanning_tree(Gallsub, weight='mean_dist') mintree = list( minforest.subgraph(cc).copy() for cc in nx.connected_components(minforest)) for treeid in range(len(mintree)): treelist = list(mintree[treeid].edges) for tredge in treelist: curedge = mintree[treeid].edges[tredge] edgestr = str(round( curedge['mean_dist'], 3)) + ';' + str( round(curedge['sd_dist'], 3)) + ';' + str( curedge['count']) print(str(ii) + '_' + str(jj) + '_' + str(hapid + 1) + '_' + str(treeid) + '\t' + edgestr + '\t' + tredge[0] + '\t' + tredge[1], file=outf) terminal_nodes = [] for node1 in list(mintree[treeid].nodes): if mintree[treeid].degree(node1) == 1: terminal_nodes.append(node1) for aa in range(len(terminal_nodes) - 1): for bb in range(aa + 1, len(terminal_nodes)): sp = nx.shortest_path(mintree[treeid], terminal_nodes[aa], terminal_nodes[bb]) for nodeii in range(len(sp)): node1 = sp[nodeii] if nodeii == 0: outstr = str(mintree[treeid].degree( node1)) + '\t.' else: prevedge = mintree[treeid].edges[ sp[nodeii], sp[nodeii - 1]] outstr = str( mintree[treeid].degree(node1) ) + '\t' + str( prevedge['count']) + '_' + str( prevedge['dist']) + '_' + str( prevedge['dist_sq']) print(str(ii) + '_' + str(jj) + '_' + str(hapid + 1) + '_' + str(treeid) + '\t' + str(aa) + '_' + str(bb) + '\t' + outstr + '\t' + node1, file=outh) print( '-----------------------------------------------------------------' ) print('Memory usage info (Mb):\t' + str( resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--infile', type=str, default=None) parser.add_argument('-e', '--edgefile', type=str, default=None) parser.add_argument('-p', '--hapfile', type=str, default=None) parser.add_argument('-b', '--bedfile', type=str, default=None) parser.add_argument('--strict', default=False, action='store_true') args = parser.parse_args() cp = cProfile.Profile() cp.enable() usage_denom = 1024 * 1000 print('before') print( 'Memory usage info (Mb):\t' + str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom)) Gall = nx.Graph() Gloc = nx.Graph() infile = args.infile #load allele graph reads = defaultdict(dict) readid = {} locid = {} id2loc = {} [readct, locct, allct] = [0, 0, 0] with gzip.open(infile, 'rb') as fp: line = fp.readline().strip() while line: [allele, readstr, readpos, strand] = re.split('[\t]', line) [loc, ref, alt, refalt] = re.split('_', allele) locstr = loc + '_' + ref + '_' + alt readpos = int(readpos) if not readstr in readid: readid[readstr] = readct readct += 1 read = readid[readstr] print(str(read)) if not locstr in locid: lid = 'loc' + str(locct) locid[locstr] = lid id2loc[lid] = locstr locct += 1 locus = locid[locstr] allele = locus + '_' + refalt reads[read][allele] = readpos if not Gloc.has_node(locus): Gloc.add_node(locus) if not Gall.has_node(allele): Gall.add_node(allele, refalt=refalt) for r1 in reads[read]: if r1 != allele: if not Gall.has_edge(allele, r1): Gall.add_edge(allele, r1, count=0, dist=0, dist_sq=0, reads=[]) curedge = Gall.edges[allele, r1] rr = curedge['reads'] rr.append(read) dist = abs(reads[read][r1] - reads[read][allele]) Gall.edges[allele, r1].update({ 'count': curedge['count'] + 1, 'dist': curedge['dist'] + dist, 'dist_sq': curedge['dist_sq'] + dist * dist, 'reads': rr }) line = fp.readline().strip() del reads print('allele graph') print( 'Memory usage info (Mb):\t' + str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom)) #load locus graph for edge in Gall.edges: curedge = Gall.edges[edge] mndist = curedge['dist'] * 1.0 / curedge['count'] sddist = math.sqrt(curedge['dist_sq'] * 1.0 / curedge['count'] - mndist * mndist) Gall.edges[edge].update({'mean_dist': mndist, 'sd_dist': sddist}) add_counts_edge(Gloc, Gall, edge[0], edge[1], curedge['count']) print('locus graph') print( 'Memory usage info (Mb):\t' + str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom)) # prune edges bad_edges = [] edgelist = list(Gloc.edges) for edge in edgelist: curedge = Gloc.edges[edge] [aa, bb, cc, dd] = [ curedge['r', 'r'], curedge['r', 'a'], curedge['a', 'r'], curedge['a', 'a'] ] pfloose = pre_filter_loose_pass(aa, bb, cc, dd) pfstrict = pre_filter_strict_pass(aa, bb, cc, dd) if not pfloose: bad_edges.append(edge) remove_allele_edges(edge[0], edge[1], Gall) Gloc.remove_edge(edge[0], edge[1]) elif not pfstrict: set_allele_edge_conf(edge[0], edge[1], Gall, False) Gloc.edges[edge[0], edge[1]]['conf'] = False else: set_allele_edge_conf(edge[0], edge[1], Gall, True) with open(args.edgefile, 'w') as outf, open(args.hapfile, 'w') as outh, open(args.bedfile, 'w') as outb: comp_loose = list( Gloc.subgraph(c) for c in sorted( nx.connected_components(Gloc), key=len, reverse=True)) #iterate over connected components of loose locus graph, phase and merge if possible for ii in range(len(comp_loose)): gg_loose = comp_loose[ii] #if ii == 0: # import code # code.interact(local=locals()) selected_edges = [(u, v) for u, v, e in gg_loose.edges(data=True) if e['conf'] == True] gg_conf = gg_loose.edge_subgraph(selected_edges) gg = list( gg_conf.subgraph(cc).copy() for cc in sorted( nx.connected_components(gg_conf), key=len, reverse=True)) subgraphs1 = [] subgraphs2 = [] for jj in range(len(gg)): [h1, h2] = phase_conf_component(gg[jj]) subgraphs1.append(h1) subgraphs2.append(h2) if (len(gg) > 1 and not args.strict): allhaps = merge_subgraphs(subgraphs1, subgraphs2, Gall) else: allhaps = [subgraphs1, subgraphs2] print(str(ii) + '\t' + str(jj) + '\t' + str(len(allhaps[0])) + '*') for jj in range(len(allhaps[0])): for hapid in range(2): minforest = nx.minimum_spanning_tree(Gall.subgraph( allhaps[hapid][jj]), weight='mean_dist') mintree = list( minforest.subgraph(cc) for cc in nx.connected_components(minforest)) for treeid in range(len(mintree)): id = (str(ii) + '_' + str(jj) + '_' + str(hapid + 1) + '_' + str(treeid)) treelist = list(mintree[treeid].edges) nodes = [ int( re.split('[:_]', id2loc[re.split('_', node)[0]])[1]) for node in mintree[treeid].nodes ] nodes.sort() dists = [] ones = [] for kk in range(len(nodes)): dists.append(nodes[kk] - nodes[0] + 1) ones.append(1) dstr = ','.join(map(str, dists)) onestr = ','.join(map(str, ones)) print('chr20\t' + str(nodes[0]) + '\t' + str(max(nodes)) + '\t' + id + '\t100\t.\t' + str(nodes[0]) + '\t' + str(max(nodes)) + '\t150,150,0\t' + str(len(dists)) + '\t' + onestr + '\t' + dstr, file=outb) for tredge in treelist: curedge = mintree[treeid].edges[tredge] [id1, refalt1] = re.split('_', tredge[0]) [id2, refalt2] = re.split('_', tredge[1]) edgestr = str(round( curedge['mean_dist'], 3)) + ';' + str( round(curedge['sd_dist'], 3)) + ';' + str( curedge['count']) print(id + '\t' + edgestr + '\t' + id2loc[id1] + '_' + refalt1 + '\t' + id2loc[id2] + '_' + refalt2, file=outf) terminal_nodes = [] for node1 in list(mintree[treeid].nodes): if mintree[treeid].degree(node1) == 1: terminal_nodes.append(node1) for aa in range(len(terminal_nodes) - 1): for bb in range(aa + 1, len(terminal_nodes)): sp = nx.shortest_path(mintree[treeid], terminal_nodes[aa], terminal_nodes[bb]) for nodeii in range(len(sp)): node1 = sp[nodeii] [id1, refalt1] = re.split('_', node1) if nodeii == 0: outstr = str(mintree[treeid].degree( node1)) + '\t.' else: prevedge = mintree[treeid].edges[ sp[nodeii], sp[nodeii - 1]] outstr = str( mintree[treeid].degree(node1) ) + '\t' + str( prevedge['count']) + '_' + str( prevedge['dist']) + '_' + str( prevedge['dist_sq']) print(id + '\t' + str(aa) + '_' + str(bb) + '\t' + outstr + '\t' + id2loc[id1] + '_' + refalt, file=outh) print( '-----------------------------------------------------------------' ) print('Memory usage info (Mb):\t' + str( resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom)) cp.disable() cp.print_stats()
# logger.info('time_deltas = %r' % (time_deltas,)) maxweight = vt.safe_max(time_deltas, nans=False, fill=0) + 1 time_deltas[np.isnan(time_deltas)] = maxweight time_delta_weight = 10 * time_deltas / (time_deltas.max() + 1) is_comp = infr.guess_if_comparable(candidate_mst_edges) comp_weight = 10 * (1 - is_comp) extra_weight = comp_weight + time_delta_weight # logger.info('time_deltas = %r' % (time_deltas,)) nx.set_edge_attributes( aug_graph, name='weight', values={ edge: 10.0 + extra for edge, extra in zip(candidate_mst_edges, extra_weight) }, ) except Exception: logger.info('FAILED WEIGHTING USING TIME') nx.set_edge_attributes( aug_graph, name='weight', values={edge: 10.0 + _randint() for edge in candidate_mst_edges}, ) new_edges = [] for cc_sub_graph in nx.connected_component_subgraphs(aug_graph): mst_sub_graph = nx.minimum_spanning_tree(cc_sub_graph) # Only add edges not in the original graph for edge in mst_sub_graph.edges(): if not graph.has_edge(*edge): new_edges.append(e_(*edge))
def draw(self, stix_name=0, is_width_as_weight=False, is_draw_min_spin_tree=False, pic_num_minspintree=100000): # nx.draw(self.G, with_labels=True) # nx.draw_graphviz(self.G) # nx.nx_agraph.write_dot(self.G, 'test.dot') # nx.draw(self.G, pos=graphviz_layout(self.G)) self.DiG = self.G # self.DiG = nx.path_graph(6) # self.DiG.edge[1][2]['weight'] = 3 if isinstance(stix_name, int): stix_name = '#' + stix_name plt.figure("Structure Tree for STIX PACKAGE [ " + stix_name + ' ]') # plt.title("stix structure tree") mng = plt.get_current_fig_manager() mng.resize(*mng.window.maxsize()) pos = graphviz_layout(self.DiG, prog='dot', args='-Grankdir=LR') if is_width_as_weight: self.__set_edge_weights(self.DiG) weights = self.__get_edge_weights(self.DiG) nx.draw(self.DiG, node_size=40, pos=pos, edge_color='y', with_labels=False, width=weights) else: nx.draw(self.DiG, node_size=40, pos=pos, edge_color='y', with_labels=False) if not self.is_clustering_node_by_name: labels = self.__get_display_labels(self.DiG) nx.draw_networkx_labels(self.DiG, pos=pos, labels=labels, font_color='b') else: nx.draw_networkx_labels(self.DiG, pos=pos, font_color='b') if is_draw_min_spin_tree: self.UnDiG = self.G.to_undirected() self.UnDiG = nx.minimum_spanning_tree(self.UnDiG) plt.figure("Minimun Spinning Tree for STIX PACKAGE [ " + stix_name + ' ]') # plt.title("minimum spinning tree") mng = plt.get_current_fig_manager() mng.resize(*mng.window.maxsize()) pos = graphviz_layout(self.UnDiG, prog='dot', args='-Grankdir=LR') nx.draw(self.UnDiG, node_size=40, pos=pos, edge_color='y') nx.draw_networkx_labels(self.UnDiG, pos=pos, font_color='b')
14: "vie", 15: "zag", 16: "rom" } switch_link_matrix = [(1, 2), (1, 4), (2, 3), (2, 5), (3, 4), (3, 6), (4, 7), (4, 9), (5, 6), (5, 10), (6, 7), (6, 11), (7, 8), (8, 9), (8, 12), (10, 11), (10, 13), (11, 12), (11, 14), (12, 16), (13, 14), (14, 15), (15, 16)] host_count_per_switch = 1 topology = nx.Graph() nodes = list(switch_names.keys()) topology.add_nodes_from(nodes) topology.add_edges_from(switch_link_matrix) result = minimum_spanning_tree(topology) no_flood_links = list(set(switch_link_matrix) - set(result.edges)) # ---------- initialize network ----------------------------- #dpid = DPID_BASE OpenFlow14Switch = partial(OVSKernelSwitch, protocols=OPENFLOW_PROTOCOL) #STPEnabledSwitch = partial(OVSKernelSwitch, protocols=OPENFLOW_PROTOCOL, failMode="standalone", stp=True) net = Containernet(ipBase=IP_BASE) net.addController("c0", controller=RemoteController, link=OVSLink, ip=CONTROLLER_IP, port=CONTROLLER_PORT)
def min_spanning_tree(G): return nx.minimum_spanning_tree(G, weight='length')
def _remove_overlap(nodes, overlap_frac): """Implement GTree algorithm https://arxiv.org/pdf/1608.02653.pdf.""" nodes = [n.copy() for n in nodes] def dist(idx1, idx2, pos, nodes): d = pos[idx1] - pos[idx2] center_to_center = np.sqrt((d * d).sum()) return center_to_center - (1.0 - overlap_frac) * ( nodes[idx1]['radius'] + nodes[idx2]['radius']) def get_next(mst, previous, current): edges = list(mst.edges(current)) next_nodes = [] for e in edges: if previous is None or previous not in e: next_nodes.append(e[1] if e[0] == current else e[0]) return next_nodes def shift_nodes(nodes, mst, source, target, delta_x, delta_y): # shift the target trg_node = nodes[target] trg_node['x'] += delta_x trg_node['y'] += delta_y # shift nodes recursively next_nodes = get_next(mst, source, target) for next_n in next_nodes: shift_nodes(nodes, mst, target, next_n, delta_x, delta_y) def process_tree(nodes, mst, previous, current): # process mst recursively next_nodes = get_next(mst, previous, current) for next_n in next_nodes: wt = mst.edges[(current, next_n)]['weight'] if wt < 0: # compute the shift x, y src_node = nodes[current] trg_node = nodes[next_n] dx = trg_node['x'] - src_node['x'] dy = trg_node['y'] - src_node['y'] dist = np.sqrt(dx**2 + dy**2) frac_x = dx / dist frac_y = dy / dist wt = mst.edges[(current, next_n)]['weight'] delta_x = -wt * frac_x delta_y = -wt * frac_y # shift target and its children shift_nodes(nodes, mst, current, next_n, delta_x, delta_y) process_tree(nodes, mst, current, next_n) max_steps = 10 for step in range(max_steps): # extract position data to numpy pos = np.array([[n['x'], n['y']] for n in nodes]) # build delauney triangulation tri = sp.spatial.Delaunay(pos) # build weighted networkx graph. Weight is distance between node edges raw_edges = set() for sim in tri.simplices: raw_edges.add((sim[0], sim[1])) raw_edges.add((sim[1], sim[2])) raw_edges.add((sim[2], sim[0])) nw = nx.Graph() n_overlap = 0 for e in raw_edges: d = dist(e[0], e[1], pos, nodes) nw.add_edge(e[0], e[1], weight=d) if d < 0: n_overlap += 1 print(f'Step {step} n_overlap = {n_overlap}') # quit looping if all weights are positive if n_overlap == 0: break # get minimal spanning tree of weighted graph mst = nx.minimum_spanning_tree(nw) # roots have degree == 1 root = [n for n, d in mst.degree if d == 1][0] # recursively process mst from root process_tree(nodes, mst, None, root) return {n['name']: np.array([n['x'], n['y']]) for n in nodes}
id0 = 0 (totIdx, atomIdx, cfg, H, normal, d, pt0, nbrlist, slice_nbrlist_u, slice_nbrlist_d, pairs) = load_status_0(dirname) for path_id in path: for key, val in ID.iteritems(): if val == path_id: nucleus.append(str2bits(key)) potential.append(db[key]) fp.write(str(db[key]) + "\n") writecncfg(cfg[bits2nucleus(str2bits(key), totIdx), :], H, dirname + "path-" + str(id0)) id0 += 1 ''' Find MST of G, (supposed to be MEP?) ''' T = nx.minimum_spanning_tree(G) P = sorted(T.edges(data=True)) print(P) #quit() ''' Find all simple path starting from the shortest one ''' simple_paths = nx.shortest_simple_paths(G, source=idfewest, target=idmost) #sz = 0 #for path in simple_paths: # print('sz = {0}'.format(sz)) # sz += 1 print("size of simple_paths = {0}".format(sz))
def test_minimum_tree(self): T = nx.minimum_spanning_tree(self.G, algorithm=self.algo) actual = sorted(T.edges(data=True)) assert edges_equal(actual, self.minimum_spanning_edgelist)
def network_graph_pre(historic_df, windows, rolling_corr): '''function for in-advance full-horizon graphx-figures generation''' network_plotly_figures = [] for window in windows: correlates_df = historic_df.iloc[window:(rolling_corr + window)] # sequence corr-matrix estimation (mst-graph eucliden-distance) corr_matrix = correlates_df.corr() dist_matrix = (1 - corr_matrix).stack().reset_index() dist_matrix.columns = ['coin1', 'coin2', 'weight'] graph_matrice = dist_matrix.loc[ dist_matrix['coin1'] != dist_matrix['coin2']] # package networkx: standard & nst-graph construction graph_TS = nx.from_pandas_edgelist(graph_matrice, 'coin1', 'coin2', ['weight']) graph_MST = nx.minimum_spanning_tree(graph_TS) # graph-nodes coordinates +callibration bitcoin-coin centrality fix_posit = {'bitcoin': (0, 0)} fix_nodes = fix_posit.keys() positions = nx.spring_layout(graph_MST, weight='weight', pos=fix_posit, fixed=fix_nodes, scale=3, center=(0, 0)) # mst-graph centrality params: betweeness & to-bitcoin-distance bitcenter = nx.single_source_shortest_path_length(graph_MST, 'bitcoin') betweenness = nx.betweenness_centrality(graph_MST) # package plotly: mst edge traces-contruction edge_trace = go.Scatter(x=[], y=[], line=dict(width=1.5, color='#888'), opacity=0.3, hoverinfo='none', name=window, mode='lines') # plot parametrization -edges coordinates for edge1, edge2, weight in graph_MST.edges().data('weight'): x0, y0 = positions[edge1] x1, y1 = positions[edge2] edge_trace['x'] += tuple([x0, x1, None]) edge_trace['y'] += tuple([y0, y1, None]) # package plotly: mst node traces-contruction node_trace = go.Scatter( x=[], y=[], text=[], hoverinfo='text', name=window, mode='markers', marker=dict(showscale=True, colorscale='YlGnBu', reversescale=False, opacity=0.9, cmin=0.0, cmax=0.6, size=[], color=[], colorbar=dict(thickness=12, title="Graph Centrality Metrics: " "Degree [size] & Betweenes [color]", xanchor='left', titleside='right'), line=dict(width=[], color='black'))) # plot parametrization -nodes coordinates for node in graph_MST.nodes(): x, y = positions[node] node_trace['x'] += tuple([x]) node_trace['y'] += tuple([y]) # information for nodes interactive display capabilities for n, nghbr in enumerate(graph_MST.adjacency()): # print("n: {} + {}".format(n, nghbr[1])) node_trace['marker']['color'] += tuple([betweenness[nghbr[0]]]) node_trace['marker']['size'] += tuple([len(nghbr[1]) * 5 + 20]) node_trace['marker']['line']['width'] += tuple( [2 if nghbr[0] == 'bitcoin' else .2]) node_trace['text'] += tuple([ "COIN: <b>> {} <</b>" "<br><i>distance to Bitcoin: {}</i>" "<br><i>neighbours: {}</i>" "<br><i>betweenness: {}</i>" "".format(nghbr[0], bitcenter[nghbr[0]], len(nghbr[1]), round(betweenness[nghbr[0]], 6)) ]) # package plotly: network constant lyout-aesthetics mst_layout = go.Layout( title="<br><b>MST graph representation" "of Cryptocurrency Market</b>", titlefont=dict(size=15), showlegend=False, hovermode='closest', margin=dict(b=5, l=5, r=5, t=15), # annotations = [dict( # # text = "LINK: <a href='https://plot.ly'>PLOTLY</a>", # showarrow = False, # xref = "paper", # yref="paper", # x = 0.005, # y=-0.002) ], xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-3, 4]), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-5, 5])) network_plotly_figures.append( go.Figure(data=[edge_trace, node_trace], layout=mst_layout)) return network_plotly_figures
def test_empty_graph(self): G = nx.empty_graph(3) T = nx.minimum_spanning_tree(G, algorithm=self.algo) assert nodes_equal(sorted(T), list(range(3))) assert T.number_of_edges() == 0
header=None, names=['a', 'b']) n = len(data) cost_matrix = np.zeros((n, n)) a, b = data['a'].values, data['b'].values for i in range(n): x1, y1 = a[i], b[i] for j in range(n): x2, y2 = a[j], b[j] cost_matrix[i, j] = ((x1 - x2)**2 + (y1 - y2)**2)**0.5 if i == j: cost_matrix[i, j] = np.inf g = nx.from_numpy_array(cost_matrix) gcc = g.subgraph(max(nx.connected_components(g), key=len)) mst = nx.minimum_spanning_tree(gcc) multigraph = nx.MultiGraph() mst_cost = 0 for i in mst.edges: w = mst.edges[i[0], i[1]]['weight'] mst_cost += w multigraph.add_edge(i[0], i[1], weight=w) multigraph.add_edge(i[0], i[1], weight=w) print("Start finding cycle") tour = [u for u, v in nx.eulerian_circuit(multigraph)] emb_tour, visited = [], set() for i in tour: if i not in visited: emb_tour.append(i) visited.add(i)
print( "Random test cases genereted with following values for nodes (vertices):", n) for i in n: graph = nx.cycle_graph(i) # print("Original Graph") # print(nx.info(graph)) # nx.draw(graph, with_labels=True) # plt.show() for edge in graph.edges(): graph.edges[edge]["weight"] = int(np.random.uniform(2, 100)) tree1 = nx.minimum_spanning_tree(graph) a = [] for edge in tree1.edges(): a.append(tree1.edges[edge]["weight"]) c1 = sum(a) print("MST 1 COST :", c1) # print("MST 1 edges",tree1.edges(data=True)) x = graph.edges() y = list(x) graph.add_edge(1, graph.size() - 1, weight=1) tree2 = nx.minimum_spanning_tree(graph) b = []
def bridges(density, restrictions, costs, topological_correction_value): binary_map = np.greater(density, 0.5) save_binary_map = binary_map.copy() pad_density = np.pad(density, ((1, 1), (1, 1)), mode='constant') pad_binary_map = np.greater(pad_density, 0.5) density_shape = density.shape width = density_shape[0] height = density_shape[1] pad_costs = np.pad(costs, ((1, 1), (1, 1)), mode='constant') [solid_labels, num_solid_labels] = skim.label(pad_binary_map, neighbors=4, return_num=True) if num_solid_labels <= 1: return density density_graph = nx.Graph() for x_idx in range(0, width): for y_idx in range(0, height): center_node_id = (x_idx + 1) * (pad_density.shape[1]) + (y_idx + 1) for x_offset in range(0, 3): for y_offset in range(0, 3): if ((x_offset == 1) and (y_offset == 1)) or ( (np.abs(x_offset - 1) + np.abs(y_offset - 1)) > 1): continue next_x_idx = x_idx + x_offset next_y_idx = y_idx + y_offset if ((next_x_idx == 0) or (next_y_idx == 0) or (next_x_idx == (pad_density.shape[0] - 1)) or (next_y_idx == (pad_density.shape[1] - 1))): continue next_node_id = next_x_idx * ( pad_density.shape[1]) + next_y_idx next_density_value = pad_binary_map[next_x_idx, next_y_idx] cost_value = pad_costs[next_x_idx, next_y_idx] if next_density_value: cost_value = 0 # # todo(groberts): this should be directed because as it stands you are just overwriting edges of adjacent # nodes, so you aren't capturing that moving in one direction or another incurs a different cost!. # density_graph.add_edge(center_node_id, next_node_id, weight=cost_value) label_to_representative_pt = {} for x_idx in range(0, width): for y_idx in range(0, height): density_value = pad_density[1 + x_idx, 1 + y_idx] component_label = solid_labels[1 + x_idx, 1 + y_idx] if (component_label in label_to_representative_pt.keys()) or ( not density_value): continue label_to_representative_pt[component_label] = [x_idx, y_idx] mst_graph = nx.Graph() for label_idx_start in range(0, num_solid_labels): component_start = 1 + label_idx_start source_pt = label_to_representative_pt[component_start] source_node_id = (source_pt[0] + 1) * (pad_density.shape[1]) + ( source_pt[1] + 1) min_path_all = nx.shortest_path(density_graph, source=source_node_id, weight='weight') for label_idx_end in range(1 + label_idx_start, num_solid_labels): component_end = 1 + label_idx_end target_pt = label_to_representative_pt[component_end] target_node_id = (target_pt[0] + 1) * (pad_density.shape[1]) + ( target_pt[1] + 1) min_path = min_path_all[target_node_id] min_path_distance = 0 for path_idx in range(1, (len(min_path) - 1)): node_id = min_path[path_idx] source_x = int(node_id / pad_density.shape[1]) - 1 source_y = node_id % pad_density.shape[1] - 1 min_path_distance += pad_costs[source_x, source_y] mst_graph.add_edge(component_start, component_end, weight=min_path_distance) mst = nx.minimum_spanning_tree(mst_graph) mst_edges = nx.edges(mst) for edge in mst.edges(): edge_start, edge_end = edge source_pt = label_to_representative_pt[edge_start] target_pt = label_to_representative_pt[edge_end] source_node_id = (source_pt[0] + 1) * (pad_density.shape[1]) + ( source_pt[1] + 1) target_node_id = (target_pt[0] + 1) * (pad_density.shape[1]) + ( target_pt[1] + 1) min_path = nx.shortest_path(density_graph, source=source_node_id, target=target_node_id, weight='weight') for path_idx in range(1, (len(min_path) - 1)): node_id = min_path[path_idx] source_x = int(node_id / pad_density.shape[1]) - 1 source_y = node_id % pad_density.shape[1] - 1 density[source_x, source_y] = topological_correction_value pad_density[1 + source_x, 1 + source_y] = topological_correction_value binary_map[source_x, source_y] = True pad_binary_map[1 + source_x, 1 + source_y] = True restrictions = np.logical_not(np.logical_xor(binary_map, save_binary_map))
def seeded_mst(cleaned_edges, edge_weights, seed_labels, _node_sizes=None): """ Partition a graph using the a minimum-spanning tree. To ensure that seeded nodes cannot be merged together prematurely, a virtual root node is inserted into the graph and given artificially strong affinity (low edge weight) to all seeded nodes. Thanks to their low weights, the root node's edges will always be included in the MST, thus ensuring that seeded nodes can only be joined via the root node. After the MST is computed, the root node is deleted, leaving behind a forest in which each connected component contains at most only one seed node. Args: cleaned_edges: array, (E,2), uint32 edge_weights: array, (E,), float32 seed_labels: array (N,), uint32 All un-seeded nodes should be marked as 0. Returns: (output_labels, disconnected_components, contains_unlabeled_components) Where: output_labels: array (N,), uint32 Agglomerated node labeling. disconnected_components: A set of seeds which ended up with more than one component in the result. contains_unlabeled_components: True if the input contains one or more disjoint components that were not seeded and thus not labeled during agglomeration. False otherwise. """ g = nx.Graph() g.add_nodes_from(np.arange(len(seed_labels))) TINY_WEIGHT = edge_weights.min( ) - 1000.0 # fixme: would -np.inf work here? assert len(cleaned_edges) == len(edge_weights) for (u, v), w in zip(cleaned_edges, edge_weights): g.add_edge(u, v, weight=w) # Add a special root node and connect it to all seed nodes. root = len(seed_labels) for seed_node in seed_labels.nonzero()[0]: g.add_edge(root, seed_node, weight=TINY_WEIGHT) # Perform MST and then drop the root node # (and all its edges), leaving a forest mst = nx.minimum_spanning_tree(g) mst.remove_node(root) output_labels = np.empty_like(seed_labels) contains_unlabeled_components = False for i, cc in enumerate(nx.connected_components(mst), start=1): cc = [*cc] cc_seeds = set(pd.unique(seed_labels[cc])) - {0} assert len(cc_seeds) <= 1 if len(cc_seeds) == 1: output_labels[cc] = cc_seeds.pop() else: output_labels[cc] = 0 contains_unlabeled_components = True disconnected_components = _find_disconnected_components( cleaned_edges, output_labels) return CleaveResults(output_labels, disconnected_components, contains_unlabeled_components)
node_size=500, node_color='gray', font_size=10, edge_width=10, alpha=1, arrows=False) plt.savefig('elimina3.png') #// Outros formatos: pdf, svg, ... plt.axis('off') plt.show() algPrim(A, V) G = nx.from_numpy_matrix(A) #converte a matriz em um grafo T = nx.minimum_spanning_tree(G) #faz uma MST automatica W = [int(T[u][v]['weight']) for u, v in T.edges()] #lista com os pesos da MST gerada automatico #imprimir a soma das arestas print "\n\n T \n\n" print " - Soma dos pesos da MST gerada automatico: ", sum(W) print " - Número de vértices da MST gerada automatico: ", len(T.edges()) print "\n\n T \n\n" plt.figure() #// Cria figura para desenhar grafo: 15 é a dimensão da imagem nx.draw(T, dim=100, with_labels=True, node_color='gray',
def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--infile', type=str, default=None) parser.add_argument('-s', '--singletons', type=str, default=None) parser.add_argument('-t', '--temp_prefix', type=str, default=None) parser.add_argument('-f', '--infile2', type=str, default=None) parser.add_argument('-c', '--compfile', type=str, default=None) parser.add_argument('-b', '--bedfile', type=str, default=None) parser.add_argument('-e', '--edgefile', type=str, default=None) parser.add_argument('-p', '--hapfile', type=str, default=None) args = parser.parse_args() min_counts_strict = 5 cp = cProfile.Profile() cp.enable() usage_denom = 1024 Gloc = nx.Graph() Ghom = nx.Graph() homvar = {} with gzip.open(args.infile2, 'rt') as fp: line = fp.readline().strip() while line: homvar[line] = 1 line = fp.readline().strip() with gzip.open(args.infile, 'rt') as fp, gzip.open( args.temp_prefix + '.het.txt.gz', 'wt') as fhet, gzip.open(args.temp_prefix + '.mixed.txt.gz', 'wt') as fmix: line = fp.readline().strip() ct = 0 while line: if ct % 1000 == 0: sys.stderr.write( str(ct) + '\t' + str( resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom) + '\n') ll = re.split('[\t]', line) cts = list(map(int, ll[2:6])) mns = list(map(float, ll[6:10])) tot = sum(cts) [loc1, loc2] = ll[0:2] tp = 'het-hom' if loc1 in homvar and loc2 in homvar: tp = 'hom-hom' elif not (loc1 in homvar or loc2 in homvar): tp = 'het-het' if loc2 < loc1: [loc1, loc2] = [loc2, loc1] cts = [cts[0], cts[2], cts[1], cts[3]] mns = [mns[0], mns[2], mns[1], mns[3]] conf = False if pre_filter_loose_pass(cts, 0.90, 1, tp): if tp == 'het-hom': print(line, file=fmix) if pre_filter_strict_pass(cts, 0.95, tp): conf = True if tp == 'het-het': Gloc.add_edge(loc1, loc2, conf=conf, cts=cts, mns=mns, wt=tot) elif tp == 'hom-hom': Ghom.add_edge(loc1, loc2, conf=conf, cts=cts, mns=mns, wt=tot) else: if tp == 'het-het': print(line, file=fhet) Gloc.add_node(loc1) Gloc.add_node(loc2) if tp == 'het-hom': if loc1 in homvar: Gloc.add_node(loc2) else: Gloc.add_node(loc1) line = fp.readline().strip() ct += 1 for edge in Ghom.edges(data=True): nn = sum(edge[2]['cts']) for ii in range(4): if edge[2]['cts'][ii] > 0.9 * nn: edge[2]['dist'] = edge[2]['mns'][ii] for edge in Gloc.edges(data=True): nn = sum(edge[2]['cts']) if edge[2]['cts'][0] + edge[2]['cts'][3] > 0.8 * nn: edge[2]['orient'] = 'outer' edge[2]['dist'] = 0.5 * (edge[2]['mns'][0] + edge[2]['mns'][3]) if edge[2]['cts'][1] + edge[2]['cts'][2] > 0.8 * nn: edge[2]['orient'] = 'inner' edge[2]['dist'] = 0.5 * (edge[2]['mns'][1] + edge[2]['mns'][2]) het_bridges = remove_bridges(Gloc, min_counts_strict) hom_bridges = remove_bridges(Ghom, min_counts_strict) loc2comphom = {} comp2treehom = {} loc2comp = {} comp2tree = {} cp.disable() cp.print_stats() sys.exit(1) gg = list( Ghom.subgraph(cc) for cc in sorted(nx.connected_components(Ghom), key=len, reverse=True)) for ii in range(len(gg)): print(str(ii)) tr = nx.minimum_spanning_tree(gg[ii], weight='dist') comp2treehom[ii] = tr for node in tr.nodes(): loc2comphom[node] = ii Ghom = None gg = list( Gloc.subgraph(cc) for cc in sorted(nx.connected_components(Gloc), key=len, reverse=True)) for ii in range(len(gg)): print( str(ii) + '\t' + str( resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom / usage_denom)) if gg[ii].number_of_nodes() > 2: tr = nx.minimum_spanning_tree(gg[ii], weight='dist') else: tr = gg[ii] comp2tree[ii] = tr for node in tr.nodes(): loc2comp[node] = ii with gzip.open(args.bedfile + 'hom.bed.gz', 'wt') as outb: for comp in comp2treehom.keys(): id = 'hom_' + str(comp) mintree = comp2treehom[comp] nodes = [ int(re.split('[:_]', node)[1]) for node in mintree.nodes if not 'ctg' in node ] chrs = [ re.split('[:_]', node)[0] for node in mintree.nodes if not 'ctg' in node ] chr = Counter(chrs).most_common(1)[0][0] nodes.sort() dists = [] ones = [] for kk in range(len(nodes)): dists.append(nodes[kk] - nodes[0] + 1) ones.append(1) dstr = ','.join(map(str, dists)) onestr = ','.join(map(str, ones)) print(chr + '\t' + str(nodes[0]) + '\t' + str(max(nodes)) + '\t' + id + '\t100\t.\t' + str(nodes[0]) + '\t' + str(max(nodes)) + '\t150,150,0\t' + str(len(dists)) + '\t' + onestr + '\t' + dstr, file=outb) with gzip.open(args.bedfile + 'het.bed.gz', 'wt') as outb: for comp in comp2tree.keys(): id = 'het_' + str(comp) print(id) mintree = comp2tree[comp] nodes = [ int(re.split('[:_]', node)[1]) for node in mintree.nodes if not 'ctg' in node ] if len(nodes) > 0: chrs = [ re.split('[:_]', node)[0] for node in mintree.nodes if not 'ctg' in node ] chr = Counter(chrs).most_common(1)[0][0] nodes.sort() dists = [] ones = [] for kk in range(len(nodes)): dists.append(nodes[kk] - nodes[0] + 1) ones.append(1) dstr = ','.join(map(str, dists)) onestr = ','.join(map(str, ones)) print(chr + '\t' + str(nodes[0]) + '\t' + str(max(nodes)) + '\t' + id + '\t100\t.\t' + str(nodes[0]) + '\t' + str(max(nodes)) + '\t0,150,0\t' + str(len(dists)) + '\t' + onestr + '\t' + dstr, file=outb) ll = [loc2comp, comp2tree, loc2comphom, comp2treehom] with open("../hom.p", 'wb') as f: pickle.dump(ll, f) Gmix = nx.Graph() superg = nx.Graph() with gzip.open(args.temp_prefix + '.mixed.txt.gz', 'rt') as fp: line = fp.readline().strip() ct = 0 while line: if ct % 1000 == 0: sys.stderr.write( str(ct) + '\t' + str( resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom) + '\n') ll = re.split('[\t]', line) cts = list(map(int, ll[2:6])) mns = list(map(float, ll[6:10])) tot = sum(cts) [loc1, loc2] = ll[0:2] if (loc1 in homvar and loc2 not in homvar) or (loc2 in homvar and loc1 not in homvar): tp = 'het-hom' if pre_filter_strict_pass(cts, 0.95, tp): conf = True if not loc2 in homvar: [loc1, loc2] = [loc2, loc1] cts = [cts[0], cts[2], cts[1], cts[3]] mns = [mns[0], mns[2], mns[1], mns[3]] if loc1 in loc2comp and loc2 in loc2comphom: num = 0 denom = 0 for ii in range(4): if cts[ii] > 0: num += mns[ii] denom += 1 mn_dist = 1.0 * num / denom Gmix.add_edge(loc1, loc2, conf=conf, cts=cts, mns=mns, dist=mn_dist, wt=tot) hetcomp = loc2comp[loc1] homcomp = loc2comphom[loc2] node1 = 'het_' + str(hetcomp) node2 = 'hom_' + str(homcomp) if not superg.has_edge(node1, node2): superg.add_edge(node1, node2, dist=[], wt=0, ct=0, sum_dist=0) superg.edges[node1, node2]['wt'] += tot superg.edges[node1, node2]['dist'].append(mn_dist) superg.edges[node1, node2]['ct'] += 1 superg.edges[node1, node2]['sum_dist'] += mn_dist line = fp.readline().strip() ct += 1 gg = list( superg.subgraph(cc) for cc in sorted( nx.connected_components(superg), key=len, reverse=True)) supercomp2tree = {} with gzip.open(args.bedfile + '.mixed.bed.gz', 'wt') as outb: for ii in range(len(gg)): id = 'mixed_' + str(ii) tocomp = [] for node in gg[ii].nodes(): [tp, id] = re.split('_', node) if tp == 'het': tr = comp2tree[int(id)] else: tr = comp2treehom[int(id)] for node in tr.nodes(): tr.nodes[node]['tp'] = tp tocomp.append(tr) Gcomp = nx.compose_all(tocomp) Gsub = Gmix.subgraph(Gcomp.nodes()) Gcomp1 = nx.compose(Gsub, Gcomp) mintree = nx.minimum_spanning_tree(Gcomp1, weight='dist') supercomp2tree[ii] = mintree nodes = [ int(re.split('[:_]', node)[1]) for node in mintree.nodes if not 'ctg' in node ] chrs = [ re.split('[:_]', node)[0] for node in mintree.nodes if not 'ctg' in node ] chr = Counter(chrs).most_common(1)[0][0] nodes.sort() dists = [] ones = [] for kk in range(len(nodes)): dists.append(nodes[kk] - nodes[0] + 1) ones.append(1) dstr = ','.join(map(str, dists)) onestr = ','.join(map(str, ones)) print(chr + '\t' + str(nodes[0]) + '\t' + str(max(nodes)) + '\t' + id + '\t100\t.\t' + str(nodes[0]) + '\t' + str(max(nodes)) + '\t150,150,0\t' + str(len(dists)) + '\t' + onestr + '\t' + dstr, file=outb) comp2supercomp = {} for ii in range(len(gg)): for node in gg[ii].nodes(): comp2supercomp[node] = ii Gloose = nx.Graph() with gzip.open(args.temp_prefix + '.het.txt.gz', 'rt') as fp: line = fp.readline().strip() ct = 0 while line: if ct % 1000 == 0: sys.stderr.write( str(ct) + '\t' + str( resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom) + '\n') ll = re.split('[\t]', line) cts = list(map(int, ll[2:6])) mns = list(map(float, ll[6:10])) tot = sum(cts) [loc1, loc2] = ll[0:2] if not (loc1 in homvar or loc2 in homvar): tp = 'het-het' if loc2 < loc1: [loc1, loc2] = [loc2, loc1] cts = [cts[0], cts[2], cts[1], cts[3]] mns = [mns[0], mns[2], mns[1], mns[3]] conf = False if pre_filter_loose_pass(cts, 0.90, 2, tp): Gloose.add_edge(loc1, loc2, conf=conf, cts=cts, mns=mns, wt=tot) line = fp.readline().strip() ct += 1 for edge in Gloose.edges(data=True): if edge[0] in loc2comp and edge[1] in loc2comp and not loc2comp[ edge[0]] == loc2comp[edge[1]]: [comp0, comp1] = [ 'het_' + str(loc2comp[edge[0]]), 'het_' + str(loc2comp[edge[1]]) ] if comp0 in comp2supercomp and comp1 in comp2supercomp: [s0, s1] = [comp2supercomp[comp0], comp2supercomp[comp1]] if s0 == s1 and edge[2]['wt'] > 1: spl = nx.shortest_path_length(supercomp2tree[s0], source=edge[0], target=edge[1], weight='dist') numer = 0 denom = 0 for ii in range(4): if edge[2]['cts'][ii] > 0: numer += edge[2]['mns'][ii] denom += 1 edge[2]['dist'] = 1.0 * numer / denom if abs(edge[2]['dist'] - spl) < 1000: Gloc.add_edge(edge[0], edge[1], cts=edge[2]['cts'], mns=edge[2]['mns'], dist=edge[2]['dist'], wt=edge[2]['wt']) comp2tree = {} loc2comp = {} gg = list( Gloc.subgraph(cc) for cc in sorted(nx.connected_components(Gloc), key=len, reverse=True)) for ii in range(len(gg)): print( str(ii) + '\t' + str( resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom / usage_denom)) if gg[ii].number_of_nodes() > 2: tr = nx.minimum_spanning_tree(gg[ii], weight='dist') else: tr = gg[ii] comp2tree[ii] = tr for node in tr.nodes(): loc2comp[node] = ii with gzip.open(args.bedfile + 'het.1.bed.gz', 'wt') as outb: for comp in comp2tree.keys(): id = 'het_' + str(comp) print(id) mintree = comp2tree[comp] nodes = [ int(re.split('[:_]', node)[1]) for node in mintree.nodes if not 'ctg' in node ] if len(nodes) > 0: chrs = [ re.split('[:_]', node)[0] for node in mintree.nodes if not 'ctg' in node ] chr = Counter(chrs).most_common(1)[0][0] nodes.sort() dists = [] ones = [] for kk in range(len(nodes)): dists.append(nodes[kk] - nodes[0] + 1) ones.append(1) dstr = ','.join(map(str, dists)) onestr = ','.join(map(str, ones)) print(chr + '\t' + str(nodes[0]) + '\t' + str(max(nodes)) + '\t' + id + '\t100\t.\t' + str(nodes[0]) + '\t' + str(max(nodes)) + '\t0,150,0\t' + str(len(dists)) + '\t' + onestr + '\t' + dstr, file=outb) code.interact(local=locals())
def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--infile', type=str, default=None) parser.add_argument('-s', '--singletons', type=str, default=None) parser.add_argument('-t', '--temp_prefix', type=str, default=None) parser.add_argument('-f', '--infile2', type=str, default=None) args = parser.parse_args() min_counts_strict=5 usage_denom=1024*1024 Ghom=nx.Graph() homvar={} with gzip.open(args.infile2, 'rt') as fp: line=fp.readline().strip() while line: homvar[line]=1 line=fp.readline().strip() with gzip.open(args.infile, 'rt') as fp, gzip.open(args.temp_prefix+'.het.txt.gz', 'wt') as fhet, gzip.open(args.temp_prefix+'.mixed.txt.gz', 'wt') as fmix: line=fp.readline().strip() ct=0 while line: if ct%1000==0: sys.stderr.write(str(ct)+'\t'+str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/usage_denom)+'\n') ll=re.split('[\t]', line) [loc1, loc2]=ll[0:2] if not (loc1 in homvar or loc2 in homvar): tp='het-het' print(line, file=fhet) elif not (loc1 in homvar and loc2 in homvar): tp='het-hom' print(line, file=fmix) else: tp='hom-hom' cts=list(map(int, ll[2:6])) mns=list(map(float, ll[6:10])) if loc2<loc1: [loc1, loc2]=[loc2, loc1] cts=[cts[0], cts[2], cts[1], cts[3]] mns=[mns[0], mns[2], mns[1], mns[3]] [passf, orient, nn, dist]=pre_filter_strict_pass(cts, mns, 0.95, tp) if passf: Ghom.add_edge(loc1, loc2, orient=orient, dist=int(dist), wt=nn) line=fp.readline().strip() ct+=1 hom_bridges=remove_bridges(Ghom, min_counts_strict, 'hom-hom') loc2comphom={}; comp2treehom={}; gg=list(Ghom.subgraph(cc) for cc in sorted(nx.connected_components(Ghom), key=len, reverse=True)) for ii in range(len(gg)): print(str(ii)) tr=nx.minimum_spanning_tree(gg[ii], weight='dist') comp2treehom[ii]=tr for node in tr.nodes(): loc2comphom[node]=ii Ghom=None ll=[loc2comphom, comp2treehom] with open(args.temp_prefix+'.hom.p', 'wb') as f: pickle.dump(ll, f)