Beispiel #1
0
 def __remove_random_fermat_point(self, input_graph, fermat_points):
     graph = input_graph.copy()
     points_count = len(fermat_points)
     if points_count > 1:
         point_to_delete = fermat_points[rnd.randint(0, points_count-1)]
         graph.remove_node(point_to_delete)
         if nx.is_connected(graph):
             return nx.minimum_spanning_tree(graph), graph
     return nx.minimum_spanning_tree(input_graph), input_graph
 def test_mst_edges_specify_weight(self):
     G=nx.Graph()
     G.add_edge(1,2,weight=1,color='red',distance=7)
     G.add_edge(1,3,weight=30,color='blue',distance=1)
     G.add_edge(2,3,weight=1,color='green',distance=1)
     G.add_node(13,color='purple')
     G.graph['foo']='bar'
     T=nx.minimum_spanning_tree(G)
     assert_equal(sorted(T.nodes()),[1,2,3,13])
     assert_equal(sorted(T.edges()),[(1,2),(2,3)])
     T=nx.minimum_spanning_tree(G,weight='distance')
     assert_equal(sorted(T.edges()),[(1,3),(2,3)])
     assert_equal(sorted(T.nodes()),[1,2,3,13])
Beispiel #3
0
 def test_prim_minimum_spanning_tree_edges_specify_weight(self):
     G = nx.Graph()
     G.add_edge(1, 2, weight=1, color="red", distance=7)
     G.add_edge(1, 3, weight=30, color="blue", distance=1)
     G.add_edge(2, 3, weight=1, color="green", distance=1)
     G.add_node(13, color="purple")
     G.graph["foo"] = "bar"
     T = nx.minimum_spanning_tree(G, algorithm="prim")
     assert_equal(sorted(T.nodes()), [1, 2, 3, 13])
     assert_equal(sorted(T.edges()), [(1, 2), (2, 3)])
     T = nx.minimum_spanning_tree(G, weight="distance", algorithm="prim")
     assert_equal(sorted(T.edges()), [(1, 3), (2, 3)])
     assert_equal(sorted(T.nodes()), [1, 2, 3, 13])
Beispiel #4
0
def spanning_trees():
    G = full_grid_nocut()
    H = nx.Graph()
    nodes1 = []
    nodes2 = []
    
    for u in G.nodes():
        H.add_node(u)
        plant = None
        if u[0] < 5:
            nodes1.append(u)
            plant = 1
        else:
            nodes2.append(u)
            plant = 2
        H.node[u]['plant'] = plant
            
    G1 = G.subgraph(nodes1)
    G2 = G.subgraph(nodes2)
    
    S1 = nx.minimum_spanning_tree(G1)
    S2 = nx.minimum_spanning_tree(G2)
    
    for u, v in S1.edges() + S2.edges():
        H.add_edge(u, v)
        H[u][v]['difficulty'] = 1
    
    H.graph['nests'] = G.graph['nests']
    H.graph['name'] = 'span_trees'
    
    '''
    H = partition_plants(H)
    H.graph['name'] = 'span_trees'
    '''
    
    assign_difficulties(H)
    
    for i in xrange(11):
        u, v = (4, i), (5, i)
        H.add_edge(u, v)
        H[u][v]['difficulty'] = 3

    '''
    for i in xrange(10):
        u, v = (i, 5), (i + 1, 5)
        H.add_edge((i, 5), (i + 1, 5))
        H[u][v]['difficulty'] = 1
    '''
    
    return H
Beispiel #5
0
def chords (G):
  """Return a new graph that contains the edges that are the chords of G.

      The chords are all the edges that are not in a spanning three of G.

  Parameters
  ----------
  G : graph
     A NetworkX graph.

  Returns
  -------
  C : A new graph with the chords of G.
  T : The spanning tree from which C was calculated.

  """
  if G.is_directed ():
    if G.is_multigraph ():
      T = nx.minimum_spanning_tree (nx.MultiGraph (G))
    else:
      T = nx.minimum_spanning_tree (nx.Graph (G))
  else:
    T = nx.minimum_spanning_tree (G)

  C     = G.copy ()
  edges = T.edges_iter ()

  for e in edges:
    try:
      C.remove_edge (*e)
    except:
      C.remove_edge (*e[::-1])

  #deg = C.degree_iter ();
  #for d in deg:
  #  if d[1] == 0:
  #    C.remove_node (d[0])

  # Recreate T to get the same type as G
  T = G.copy ()
  if G.is_multigraph ():
    edges = C.edges_iter (keys=True)
  else:
    edges = C.edges_iter ()

  for e in edges:
    T.remove_edge (*e)

  return T,C
def test_min_edges():

    # run mod_boruvka on graph with high mv max and long edge and make sure
    # that the result is an MST
    # of eachother (otherwise they should be connected)
    g = graph_high_mvmax_long_edge()

    subgraphs = UnionFind()
    rtree = Rtree()

    # build min span forest via mod_boruvka
    msf_g = mod_boruvka(g, subgraphs=subgraphs, rtree=rtree)

    # use networkx to build mst and compare
    coord_list = msf_g.coords.values()
    c = np.array(coord_list)
    all_dists = np.sqrt(((c[np.newaxis, :, :] - c[:, np.newaxis, :]) ** 2).
                    sum(2))

    complete_g = nx.Graph(all_dists)
    mst_g = nx.minimum_spanning_tree(complete_g)

    mst_edge_set = set([frozenset(e) for e in mst_g.edges()])
    msf_edge_set = set([frozenset(e) for e in msf_g.edges()])
    assert msf_edge_set == mst_edge_set
Beispiel #7
0
    def order_contigs(self):
        """
        Attempt to determine an initial starting order of contigs based
        only upon the cross terms (linking contacts) between each using
        graphical techniques.

        Beginning with a graph of contigs, where edges are weighted by
        contact weight, it is decomposed using Louvain modularity. Taking
        inverse edge weights, the shortest path of the minimum spanning
        tree of each subgraph is used to define an order. The subgraph
        orderings are then concatenated together to define a full
        ordering of the sample.

        Those with no edges, are included by appear in an indeterminate
        order.

        :return: order of contigs
        """
        g = self.create_contig_graph()
        decomposed_subgraphs = decompose_graph(g)

        isolates = []
        new_order = []
        for gi in decomposed_subgraphs:
            if gi.order() > 1:
                inverse_edge_weights(gi)
                mst = nx.minimum_spanning_tree(gi)
                inverse_edge_weights(gi)
                new_order.extend(edgeiter_to_nodelist(dfs_weighted(mst)))
            else:
                isolates.extend(gi.nodes())

        return new_order + isolates
Beispiel #8
0
  def __init__(self, topology_file = "gates_topology.dot"):
    frenetic.App.__init__(self) 
 
    logging.info("---> Reading Topology from "+topology_file)
    self.agraph = pgv.AGraph(topology_file)
    for sw in self.agraph.nodes():
      dpid = str(sw.attr['dpid'])
      self.dpid_to_switch_dict[ dpid ] = str(sw)
      self.switch_to_dpid_dict[ str(sw) ] = dpid
      if sw.attr['core']:
        self.core_switches.add (str(sw))

    # It's faster to denormalize this now
    logging.info("---> Remembering internal ports")
    self.switch_internal_ports = { sw: set([]) for sw in self.switch_to_dpid_dict }
    for e in self.agraph.edges():
      source_sw = str(e[0])
      dest_sw = str(e[1])
      source_port = int(e.attr["src_port"])
      dest_port = int(e.attr["dport"])
      self.switch_internal_ports[ source_sw ].add( source_port )
      if source_sw not in self.port_mappings:
        self.port_mappings[source_sw] = {}
      self.port_mappings[source_sw][dest_sw] = source_port
      self.switch_internal_ports[ dest_sw ].add( dest_port )
      if dest_sw not in self.port_mappings:
        self.port_mappings[dest_sw] = {}
      self.port_mappings[dest_sw][source_sw] = dest_port

    logging.info("---> Calculating spanning tree")
    nxgraph = nx.from_agraph(self.agraph)
    self.nx_topo = nx.minimum_spanning_tree(nxgraph)
Beispiel #9
0
def rand_spanning_tree(N, rand_weights=False):
    '''Creats a random minimal tree on N nodes

    Args:
        N (int): Number of nodes

    Returns:
        A NxN numpy array representing the adjacency matrix of the graph.

    '''

    # Create Random Graph
    A_rand = rand.rand(N, N)
    G_rand = nx.Graph()
    G_rand.add_nodes_from(xrange(N))
    for i in xrange(N):
        for j in xrange(i+1):
            G_rand.add_edge(i, j, weight=A_rand[i, j])
    # Find minimal spanning tree
    spanning_tree = nx.minimum_spanning_tree(G_rand)
    # Create adjacency matrix
    final_graph = nx.adj_matrix(spanning_tree).toarray()
    final_graph[final_graph > 0] = 1
    # Randomize weights if requested
    if rand_weights:
        R = np.tril(rand.rand(N, N))
        R = R + np.transpose(R)
        final_graph = final_graph * R
    return final_graph
Beispiel #10
0
    def from_graph(self, G):
        self.G = G.copy()
        cliques = nx.clique.find_cliques(G)
        cliquegraph = nx.clique.make_max_clique_graph(G)
        clique_dict = {}
        for v, clq in zip(cliquegraph.nodes(), cliques):
            clique_dict[v] = clq

        for u, v, data in cliquegraph.edges(data=True):
            cliquegraph.remove_edge(u, v)
            sep = set(clique_dict[u]).intersection(set(clique_dict[v]))
            w = len(sep)
            cliquegraph.add_edge(u, v, nodes=sep, weight=-w)
        self.cliquetree = nx.minimum_spanning_tree(cliquegraph)

        for v in self.G:
            self.node_in_cliques[v] = set()
        for v in clique_dict:
            self.nodes_in_clique[v] = set()
            for node in clique_dict[v]:
                self.nodes_in_clique[v].add(node)
                self.node_in_cliques[node].add(v)
        self.uid = len(G) + 1
        self.insertable = set()
        for v in self.G:
            self.update_insertable(v)
Beispiel #11
0
def visualize_mst(votes):
    min_spanning_tree = nx.minimum_spanning_tree(votes, weight = 'difference')

    #this makes sure draw_spring results are the same at each call
    np.random.seed(1)  

    color = [min_spanning_tree.node[senator]['color'] for senator in min_spanning_tree.nodes()]

    #determine position of each node using a spring layout
    pos = nx.spring_layout(min_spanning_tree, iterations=200)
    plt.figure(figsize=(25,25))


    #plot the edges
    nx.draw_networkx_edges(min_spanning_tree, pos, alpha = .5)

    #plot the nodes
    nx.draw_networkx_nodes(min_spanning_tree, pos, node_color=color)

    #draw the labels
    lbls = nx.draw_networkx_labels(min_spanning_tree, pos, alpha=5, font_size=8)

    #coordinate information is meaningless here, so let's remove it
    plt.xticks([])
    plt.yticks([])
    remove_border(left=False, bottom=False)
 def test_mst_disconnected(self):
     G=nx.Graph()
     G.add_path([1,2])
     G.add_path([10,20])
     T=nx.minimum_spanning_tree(G)
     assert_equal(sorted(T.edges()),[(1, 2), (20, 10)])
     assert_equal(sorted(T.nodes()),[1, 2, 10, 20])
Beispiel #13
0
def threshold_matrix(M, cost):
    '''
    M is the full association matrix.
    cost is the percentage (0 to 100) at which you'd like to threshold
    
    threshold_matrix first creates a copy of the input matrix, then
    sets all diagonal values to 0. It next calculates the minimum spanning tree,
    and ensures that those edges are *always* included in the thresholded
    matrix.
    
    then sets all values below the 
    appropriate percentile to 0
    '''
    # Make a copy of the matrix
    thr_M = np.copy(M)
    
    # Set all diagonal values to -999    
    thr_M[np.diag_indices_from(thr_M)] = -999
    
    # Calculate minmum spanning tree
    G = nx.from_numpy_matrix(M)
    mst = nx.minimum_spanning_tree(G, weight='weight'*-1)
    
    # Calculate the threshold value
    thr = np.percentile(thr_M[np.triu_indices_from(thr_M, k=1)], cost)
    
    # Set all values that are less than the threshold to 0
    thr_M[thr_M < thr] = 0
       
    # Set all values that are not zero to 1
    thr_M[thr_M != 0] = 1

    return thr_M
Beispiel #14
0
 def test_prim_minimum_spanning_tree_disconnected(self):
     G = nx.Graph()
     G.add_edge(1, 2)
     G.add_edge(10, 20)
     T = nx.minimum_spanning_tree(G, algorithm='prim')
     assert_equal(sorted(map(sorted, T.edges())), [[1, 2], [10, 20]])
     assert_equal(sorted(T.nodes()), [1, 2, 10, 20])
Beispiel #15
0
    def _retrieve_skycoords(V):
        coords_l = []
        # Accessing the borders one by one. At this step, V_subgraphs contains a list of cycles
        # (i.e. one describing the external border of the MOC component and several describing the holes
        # found in the MOC component).
        V_subgraphs = nx.connected_component_subgraphs(V)
        for v in V_subgraphs:
            # Compute the MST for each cycle
            v = nx.convert_node_labels_to_integers(v)
            mst = nx.minimum_spanning_tree(v)
            # Get one end of the span tree by looping over its node and checking if the degree is one
            src = None
            for (node, deg) in mst.degree():
                if deg == 1:
                    src = node
                    break

            # Get the unordered lon and lat
            ra = np.asarray(list(nx.get_node_attributes(v, 'ra').values()))
            dec = np.asarray(list(nx.get_node_attributes(v, 'dec').values()))
            coords = np.vstack((ra, dec)).T
            # Get the ordering from the MST
            ordering = np.asarray(list(nx.dfs_preorder_nodes(mst, src)))
            # Order the coords
            coords = coords[ordering]
            # Get a skycoord containing N coordinates computed from the Nx2 `coords` array
            coords = SkyCoord(coords, unit="deg")
            coords_l.append(coords)

        return coords_l
Beispiel #16
0
def chow_liu(data, mi_estimator=discrete_mutual_information):
    arguments = list(data.columns)
    g = nx.Graph()
    g.add_nodes_from(arguments)
    for src, dst in combinations(arguments, 2):
        g.add_edge(src, dst, weight=-mi_estimator(data[[src]], data[[dst]]))
    return DGM(nx.dfs_tree(nx.minimum_spanning_tree(g), arguments[0]))
    def verify_solution(self, sol):
        """Verify the solution for MST against NetworkX's built-in MST solver.
           Only works if the solution is unique (=> edges have unique weights.)"""

        nx_sol = set(nx.minimum_spanning_tree(self.graph).edges())

        return nx_sol == sol
Beispiel #18
0
def minimal_couplers(subgraphs, edges):
    '''Use the fewest possible number of couplers between and within
    subgraphs'''

    N = len(subgraphs)

    # map each subgraph to its minimum spanning tree
    subgraphs = [nx.minimum_spanning_tree(subgraph) for subgraph in subgraphs]

    # for each tree, find a root node and store the shortest path to each
    # node as a cost metric.
    costs = {}
    for tree in subgraphs:
        # identify the root
        path_lengths = nx.shortest_path_length(tree)
        root_weights = {k: sum(path_lengths[k].values()) for k in path_lengths}
        root = sort_dict(root_weights)[0]
        # assign path lengths as node costs
        for node in path_lengths[root]:
            costs[node] = path_lengths[root][node]

    # for each pair of subgraphs, keep the inter-subgraph edge with the
    # minimum total cost of its end nodes
    nodes = sorted(subgraphs.keys())
    for i in xrange(N-1):
        q1 = nodes[i]
        for j in xrange(i+1, N):
            q2 = nodes[j]
            edge_costs = {e: costs[e[0]]+costs[e[1]] for e in edges[(q1, q2)]}
            edges[(q1, q2)] = sort_dict(edge_costs)[0]

    return subgraphs, edges
Beispiel #19
0
def main():
    G = nx.Graph()  # G eh um grafo direcionado
    # gera o grafo apartir de suas arestas
    G.add_weighted_edges_from([(1, 2, 13.0), (1, 3, 8.0), (1, 5, 33.0), (2, 3, 22.0), (2, 4, 17.0), (2, 6, 20.0), (3, 5, 18.0), (4, 5, 10.0), (4, 6, 5.0), (5, 6, 15.0)])
    desenhaGrafo(G, "grafo-c.png")
    T = nx.minimum_spanning_tree(G)
    desenhaGrafo(T, "arv-min-c.png")
Beispiel #20
0
    def create_spanning_tree(self, username="******", password="******"):
        T = nx.minimum_spanning_tree(self.graph)

        used_links = []
        disabled_ports = {}

        for link in self.links:
            used = False
            src, dst = hex(link.src.dpid), hex(link.dst.dpid)
            for edge in T.edges():
                if (src,dst) == edge or (dst,src) == edge:
                    used = True
            if not used:
                if link.src.dpid not in disabled_ports:
                    disabled_ports[link.src.dpid] = []
                disabled_ports[link.src.dpid].append(link.src.port_no)
        for dp in disabled_ports:
            ip = self.dpid_to_ip[hex(dp)]
            print("logging into " + ip)
            s = spawn("ssh %s@%s" %(username, ip))
            s.expect(".*assword")
            s.sendline(password)
            s.expect("Press any key to continue")
            s.sendline("\r")
            s.sendline("config")
            for n in disabled_ports[dp]:
                #print("Enabling port " + `n` + " on " + self.dpid_to_ip[dp])
                s.sendline("interface ethernet " + `n` + " disable")
            s.sendline("save")
            s.sendline("logo")
            s.sendline("y")
        print("CREATED SPANNING TREE")
Beispiel #21
0
def find_min_spanning_tree(A):
	"""
		Input:
			A : Adjecency matrix in scipy.sparse format.
		Output:
			T : Minimum spanning tree.
			run_time : Total runtime to find minimum spanning tree 

	"""
	# Record start time.
	start = time.time()

	# Check if graph is pre-processed, if yes then don't process it again.
	if os.path.exists('../Data/dcg_graph.json'):
		with open('../Data/dcg_graph.json') as data:
			d = json.load(data)
		G = json_graph.node_link_graph(d)

	# If graph is not preprocessed then convert it to a Graph and save it to a JSON file.
	else:
		G = from_scipy_sparse_matrix(A)
		data = json_graph.node_link_data(G)
		with open('../Data/dcg_graph.json', 'w') as outfile:
			json.dump(data, outfile)

	# Find MST.
	T = minimum_spanning_tree(G)

	#Record total Runtime
	run_time = time.time()-start
	return T, run_time
def compute_initial_guess(num_nodes, relative_rotations, relative_edges):
	graph = nx.Graph()
	graph.add_nodes_from(range(num_nodes))

	for (ind, edge) in enumerate(relative_edges):
		(n, theta) = so3.matrix_to_axis_angle(relative_rotations[ind])
		graph.add_edge(edge[0], edge[1], weight=theta, index=ind)

	tree = nx.minimum_spanning_tree(graph)

	global_rotation = []

	for i in range(num_nodes):
		global_rotation.append(numpy.identity(3))

	edges = nx.dfs_edges(tree, 0)

	for edge in edges:
		ind = graph[edge[0]][edge[1]]["index"]
		mat = relative_rotations[ind]

		if relative_edges[ind][0] == edge[0] and relative_edges[ind][1] == edge[1]:
			pass
		elif relative_edges[ind][0] == edge[1] and relative_edges[ind][1] == edge[0]:
			mat = mat.transpose()
		else:
			logging.error("GRAPH ERROR")

		global_rotation[edge[1]] = mat.dot(global_rotation[edge[0]])

	return global_rotation
 def SpanningFeatures(self):
   
     closed=[]
     count=0
     tempG=nx.Graph()
     for node in self.gr.nodes():
         try:
             tempG.add_node(node)
         except:
             pass
     
     for source in self.gr.nodes():
         for target in self.gr.nodes():
             if source!=target and [source,target] not in closed:
                 path=nx.shortest_path(self.G, source, target)
                 self.shortestPath.append(path)
                 closed.append([source,target])
                 closed.append([target,source])
                 tempG.add_edge(source, target, len(path)-1)
                 for i in range(0,len(path)):
                     try:
                         self.Gs.add_node(path[i])
                         if i!=len(path):
                             self.Gs.add_edge(path[i],path[i+1])
                     except:
                         pass
     self.SpanG=nx.minimum_spanning_tree(self.Gs)
                 
     #Compute Features
    
     
    # self.gS_avgMSTWeight=float(weight)/count
                 
     return
Beispiel #24
0
 def test_kruskal_minimum_spanning_tree_disconnected(self):
     G = nx.Graph()
     G.add_path([1, 2])
     G.add_path([10, 20])
     T = nx.minimum_spanning_tree(G, algorithm="kruskal")
     assert_equal(sorted(map(sorted, T.edges())), [[1, 2], [10, 20]])
     assert_equal(sorted(T.nodes()), [1, 2, 10, 20])
Beispiel #25
0
def hidden_image_maze(fname, style='jittery'):
    """ Supported styles: jittery, smooth, sketch"""
    H = models.image_grid_graph(fname)  # get a subgraph of the grid corresponding to edges between black pixels
    G = H.base_graph

    # for every edge in H, make the corresponding edge in H have weight 0
    for u,v in H.edges():
        G[u][v]['weight'] = 0

    # find a minimum spanning tree on G (which will include the maze solution)
    T = nx.minimum_spanning_tree(G)

    # find the maze solution in the spanning tree
    P = models.my_path_graph(nx.shortest_path(T, (0,0), max(H.nodes())))

    # generate the dual graph, including edges not crossed by the spanning tree
    D = models.dual_grid(G, T)
    views.add_maze_boundary(D, max(G.nodes()))
    views.make_entry_and_exit(D, max(G.nodes()))
    pos = views.layout_maze(D, fast=(style == 'jittery'))
    views.plot_maze(D, pos, P, G.pos)

    # make it stylish if requested
    if style == 'sketch':
        plt.figure(1)
        D_pos = views.layout_maze(D, fast=True)
        nx.draw_networkx_edges(D, D_pos, width=1, edge_color='k')
        D_pos = views.layout_maze(D, fast=True)
        nx.draw_networkx_edges(D, D_pos, width=1, edge_color='k')

    
    # show the pixel colors loaded from the file, for "debugging"
    plt.figure(2)
    for v in G:
        plt.plot([G.pos[v][0]], [G.pos[v][1]], '.', alpha=.5, color=G.node[v]['color'])
Beispiel #26
0
    def test_local_threshold(self):
        self.a.import_adj_file(self.MODIF_FILE, delimiter=",")
        self.a.apply_threshold()
        temp = nx.minimum_spanning_tree(self.a.G)

        # Normal
        self.a.local_thresholding()
        self.assertEqual(temp.number_of_edges(), self.a.G.number_of_edges())
        self.assertTrue(nx.is_connected(self.a.G))

        # totalEdges
        # normal totalEdges
        self.a.local_thresholding(threshold_type="totalEdges", value=20)
        self.assertEqual(self.a.G.number_of_edges(), 20)
        self.assertTrue(nx.is_connected(self.a.G))
        # short totalEdges
        self.a.local_thresholding(threshold_type="totalEdges", value=1)
        self.assertEqual(self.a.G.number_of_edges(), temp.number_of_edges())
        self.assertTrue(nx.is_connected(self.a.G))
        # bigger totalEdges
        self.a.local_thresholding(threshold_type="totalEdges", value=500000)
        self.assertTrue(nx.is_connected(self.a.G))

        # edgePC
        self.a.apply_threshold()
        all_edges = self.a.G.number_of_edges()

        self.a.local_thresholding(threshold_type="edgePC", value=100)
        self.assertEqual(self.a.G.number_of_edges(), all_edges)
        self.assertTrue(nx.is_connected(self.a.G))

        self.a.local_thresholding(threshold_type="edgePC", value=20)
        self.assertEqual(self.a.G.number_of_edges(), int(0.2 * all_edges))
        self.assertTrue(nx.is_connected(self.a.G))
Beispiel #27
0
def plotGraph(g,filename):
    """
    Creates a plot of the graph passed in after transforming
    the full graph into a minimum spanning tree. The MST of a graph
    like this has some significance (but also some locally strange paths)
    and is nice to look add due to the reduced edge density.
    """

    plt.figure(figsize=(15, 10))
    np.random.seed(5)
    mst = nx.minimum_spanning_tree(g, weight='difference')
    pos = nx.spring_layout(mst, iterations=900, k=.008, weight='difference')

    mst_edges = list(nx.minimum_spanning_edges(g, weight='difference'))
    degs = mst.degree()
    nodesize = [degs[v]*80 for v in mst]

    nl = mst.nodes()

    nx.draw_networkx_edges(g, pos, edgelist=mst_edges, alpha=.2)
    nx.draw_networkx_nodes(g, pos, nodelist = nl, node_size=nodesize, node_color=nodesize)

        
    nx.draw_networkx_labels(g, pos, font_color='k', font_size=7)

    plt.title("Artist Network", fontsize=18)
    plt.xticks([])
    plt.yticks([])
    plt.savefig(filename)
def mst_of_g(g,terminals,verbose=False,weighted=True,cutoff=7,return_gL=False,bidir=False):
	STARTTIME=time.time()
	if verbose:
		logger.info("Starting MST construction")
		sys.stdout.flush()

	STARTTIME=time.time()
	gLedges=[]
	shortest_network=model.AnnotatedGraph()

	for i in range(len(terminals)):
		src=terminals[i]
		if src not in g:
			if verbose:
				logger.info("Node %s not in g"%(src))
			continue
		if weighted:
			costs,paths=nx.single_source_dijkstra(g, src, weight='weight',cutoff=cutoff)
		else:
			paths=nx.single_source_shortest_path(g,src,cutoff=cutoff)
			costs=dict([(k,len(v)) for k,v in paths.items()])

		if bidir:
			span=range(len(terminals))
		else:
			span=range(i+1,len(terminals))
		for j in span:
			if j==i:
				continue
			tgt=terminals[j]
			if tgt not in paths:
				if verbose:
					logger.info("no paths between %s and %s"%(src,tgt))
				continue
			shortest_network.add_path(paths[tgt])
			gLedges.append((src,tgt,{'weight':costs[tgt],'path':paths[tgt]}))
		if verbose:
			logger.info("Done %s. Still %d to go"%(src,len(terminals)-i))
			sys.stdout.flush()			
	if verbose:
		logger.info("Computed Metric closure in %f seconds"%(time.time() - STARTTIME))
		STARTTIME=time.time()
		sys.stdout.flush()			
	gL=nx.Graph()
	gL.add_edges_from(gLedges)
	# Min spanning Tree
	tL=nx.minimum_spanning_tree(gL)
	if verbose:
		logger.info("Computed Min spanning tree in %f seconds"%(time.time() - STARTTIME))
		STARTTIME=time.time()
		sys.stdout.flush()	

	mst=model.AnnotatedGraph()
	for e in tL.edges(data=True):
		mst.add_path(e[2]["path"])
	copy_attributes_from_g(mst,g)
	if return_gL:
		return mst,gL,shortest_network
	else:
		return mst
Beispiel #29
0
    def set_pred_relations(self, candids, relations):
        #NOTE: digraph doesn't play well with MST algorithms in networkx
        self.G = nx.MultiGraph()
        pred_rels = []
        for candid, subrels in zip(candids, relations):
            dict_subrels = dict(subrels)
            if 'X' in dict_subrels.keys():
                idx = self.stroke_groups.index(candid[0])
                idx2 = self.stroke_groups.index(candid[1])
                if idx2-idx > 1:
                    print("X probability: " + str(dict_subrels['X']) +
                            ", Idx dist: " + str(idx2-idx))
                if dict_subrels['X'] > 1:
                    continue

            for rel, w in subrels:
                if rel.startswith("A"):
                    if w < 0.4:
                        # Heuristic, but increases accuracy by a lot in practise
                        continue
                if rel != 'X':
                    # Don't forget to do 1-w (since we're looking for minimum)
                    self.G.add_edge(candid[0], candid[1], weight=(1-w), rel=rel.tolist())

        T = nx.minimum_spanning_tree(self.G)
        self.mst_edges = set(T.edges())  # optimization
        self.mst = T
Beispiel #30
0
def mst_weight(
        taxa,
        patterns,
        matrices,
        characters
        ):
    """
    Calculate minimal weight of unsorted trees.
    """

    G = nx.Graph()
    for i,tA in enumerate(taxa):
        for j,tB in enumerate(taxa):
            if i < j:
                all_scores = []
                for pt,mt,cs in zip(patterns, matrices, characters):
                    ptA = pt[i]
                    ptB = pt[j]
                    scores = []
                    for pA in ptA:
                        idxA = cs.index(pA)
                        for pB in ptB:
                            idxB = cs.index(pB)
                            score = mt[idxA][idxB]
                        scores += [score]
                    all_scores += [min(scores)]
                G.add_edge(tA, tB, weight=sum(all_scores))
    g = nx.minimum_spanning_tree(G)
    
    return sum([w[2]['weight'] for w in g.edges(data=True)]) / 2
Beispiel #31
0
 def test_wrong_value(self):
     nx.minimum_spanning_tree(self.G, algorithm='random')
 def test_disconnected(self):
     G = nx.Graph([(0, 1, dict(weight=1)), (2, 3, dict(weight=2))])
     T = nx.minimum_spanning_tree(G, algorithm=self.algo)
     assert nodes_equal(list(T), list(range(4)))
     assert edges_equal(list(T.edges()), [(0, 1), (2, 3)])
Beispiel #33
0
        nx.read_weighted_edgelist(f,
                                  create_using=Y,
                                  nodetype=str,
                                  encoding="utf-8")
        pos = nx.spring_layout(Y)

        edge_labels = {(u, v): d['weight'] for u, v, d in Y.edges(data=True)}
        nx.draw_networkx_nodes(Y, pos, node_size=700)
        nx.draw_networkx_edges(Y, pos)
        nx.draw_networkx_labels(Y, pos)
        nx.draw_networkx_edge_labels(Y, pos, edge_labels=edge_labels)
        plt.show()
        pylab.show()

    if (menu == 11):
        T = nx.minimum_spanning_tree(Y, weight="weight")
        os.system("clear")
        print("Algoritmo A*\n")
        source = input("Digite o começo :").upper()
        target = input("Digite o final :").upper()

        print("Resultado (BFS) : ", list(nx.bfs_edges(Y, source)))
        print("Resultado (DFS) : ", list(nx.dfs_edges(Y, source)))
        print("Resultado (Algoritmo A*) : ",
              list(nx.astar_path(Y, source, target, heuristic=None)), "\n")
        print("Resultado (Algoritmo Dijkstra) : ",
              list(nx.dijkstra_path(Y, source, target, weight="weight")), "\n")
        print(
            "Caminho (Algoritmo Dijkstra) : ",
            str(nx.dijkstra_predecessor_and_distance(Y, source)).replace(
                ",", "\n").replace("(", " ").replace("{", "\n ").replace(
Beispiel #34
0
def local_thresholding_prop(conn_matrix, thr):
    """
    Threshold the adjacency matrix by building from the minimum spanning tree
    (MST) and adding successive N-nearest neighbour degree graphs to achieve
    target proportional threshold.

    Parameters
    ----------
    conn_matrix : array
        Weighted NxN matrix.
    thr : float
        A proportional threshold, between 0 and 1, to achieve through local
        thresholding.

    Returns
    -------
    conn_matrix_thr : array
        Weighted local-thresholding using MST, NxN matrix.

    References
    ----------
    .. [1] Alexander-Bloch, A. F., Gogtay, N., Meunier, D., Birn, R., Clasen,
      L., Lalonde, F., … Bullmore, E. T. (2010). Disrupted modularity and local
      connectivity of brain functional networks in childhood-onset
      schizophrenia. Frontiers in Systems Neuroscience.
      https://doi.org/10.3389/fnsys.2010.00147
    .. [2] Tewarie, P., van Dellen, E., Hillebrand, A., & Stam, C. J. (2015).
      The minimum spanning tree: An unbiased method for brain network analysis.
      NeuroImage. https://doi.org/10.1016/j.neuroimage.2014.10.015

    """
    from pynets.core import thresholding

    fail_tol = 100
    conn_matrix = np.nan_to_num(conn_matrix)

    if np.sum(conn_matrix) == 0:
        print(UserWarning('Empty connectivity matrix detected!'))
        return conn_matrix

    G = nx.from_numpy_matrix(np.abs(conn_matrix))

    maximum_edges = G.number_of_edges()

    Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
    G0 = G.subgraph(Gcc[0])

    min_t = nx.minimum_spanning_tree(thresholding.weight_to_distance(G0),
                                     weight="distance")

    min_t.add_nodes_from(G.nodes())
    len_edges = min_t.number_of_edges()
    upper_values = np.triu_indices(np.shape(conn_matrix)[0], k=1)
    weights = np.array(conn_matrix[upper_values])
    edgenum = int(float(thr) * float(len(weights[~np.isnan(weights)])))

    if len_edges > edgenum:
        print(f"Warning: The minimum spanning tree already has: {len_edges} "
              f"edges, select more edges. Local Threshold "
              f"will be applied by just retaining the Minimum Spanning Tree")
        conn_matrix_thr = nx.to_numpy_array(G)
        return conn_matrix_thr

    k = 1
    len_edge_list = []
    while (len_edges < edgenum and k <= np.shape(conn_matrix)[0] and
           (len(len_edge_list[-fail_tol:]) -
            len(set(len_edge_list[-fail_tol:]))) <
           (fail_tol - 1)) and nx.is_connected(min_t) is True:
        # print(k)
        # print(len_edges)
        len_edge_list.append(len_edges)
        # Create nearest neighbour graph
        nng = thresholding.knn(conn_matrix, k)

        # Remove edges from the NNG that exist already in the new graph/MST
        nng.remove_edges_from(min_t.edges())

        number_before = nng.number_of_edges()
        if nng.number_of_edges() == 0 and number_before >= maximum_edges:
            break

        # Add weights to NNG
        for e in nng.edges():
            nng.edges[e[0], e[1]]["weight"] = float(conn_matrix[e[0], e[1]])

        # Obtain list of edges from the NNG in order of weight
        edge_list = sorted(nng.edges(data=True),
                           key=lambda t: t[2]["weight"],
                           reverse=True)

        # Add edges in order of connectivity strength
        for edge in edge_list:
            # print(f"Adding edge to mst: {edge}")
            min_t.add_edges_from([edge])
            len_edges = min_t.number_of_edges()
            if len_edges >= edgenum:
                # print(len_edges)
                break
        k += 1

    conn_matrix_bin = thresholding.binarize(
        nx.to_numpy_array(min_t, nodelist=sorted(G.nodes()), dtype=np.float64))

    try:
        conn_matrix_thr = np.multiply(conn_matrix, conn_matrix_bin)
        return conn_matrix_thr

    except ValueError as e:
        print(
            e, f"MST thresholding failed. Check raw graph output manually "
            f"for debugging.")
Beispiel #35
0
                pieces.append(g.nodes())
                num_invalid += 1
        return num_invalid

    def buildSubGraph(self, nodes, g):
        graph = nx.Graph()
        for n in nodes:
            graph.add_node(n)
            for k in nodes:
                if n < k and g.has_edge(n, k):
                    graph.add_edge(n, k, weight=g[n][k]['weight'])
        return graph


if __name__ == '__main__':
    from graph_loader import GraphGenerator as GG
    test_file = "input2.csv"
    g = GG(test_file)
    mst = nx.minimum_spanning_tree(g.getGraph())
    l = 3
    u = 15
    cutter = MSTCutter(l, u)
    num_invalid, pieces, weights = cutter.cutFromTSP(mst, g.getGraph())
    print("# of invalid pieces: " + str(num_invalid))
    for p in pieces:
        cor = [g.getGraph().node[n]['position'] for n in p]
        print(cor)
    print(weights)
    #cutter.findCuts(mst)
    #print(cutter.comp)
Beispiel #36
0
def scaffold_via_wells_mst(g):
    # initialize internal contig labels (used for downstream qc)
    for v in g.vertices:
        v.initialize_contigs()

    # construct well-based scaffold graph in networkx format
    nxg = g.nxgraph
    # nxg = _construct_graph(g)

    # weigh edges according to how many wells they are sharing:
    _reweigh_edges(nxg, g, type_='wells')

    # find the maxinum spanning forest
    msf = nx.minimum_spanning_tree(nxg)

    # keep simplifying the graph until the msf has no branching nodes:
    n_iter = 1
    while _has_branches(msf) and n_iter <= 10:
        print 'MSF simplificaiton iteration %d' % n_iter

        # print '...', max(msf.degree(weight=None).values())
        # print '...', sorted(msf.degree(weight=None).iteritems(), key=lambda x: x[1], reverse=True)[:10]
        # vg = sorted(msf.degree(weight=None).iteritems(), key=lambda x: x[1], reverse=True)[0][0]
        # v = g.vertex_from_id(vg[0])
        # N = [n.id for n in g.vertices if v in n.neighbors]
        # print ',,,', N
        # print msf.neighbors(v)

        # remove edges of g not selected in forest MSF
        E = [e for e in g.edges]
        n_removed = 0
        for e in E:
            e_nx = ((e.v1.id, e.connection[e.v1]), (e.v2.id,
                                                    e.connection[e.v2]))
            if not msf.has_edge(*e_nx):
                g.remove_edge(e)
                n_removed += 1

        print '%d edges not in MST removed.' % n_removed

        # contract edges
        n_contracted = contract_edges(g, store_ordering=True)
        print '%d edges contracted.' % n_contracted

        # now we are going to compute the trunk

        # get the networkx graph again
        nxg = g.nxgraph
        _reweigh_edges(nxg, g, type_='wells')  # FIXME: do this once

        # recompute the maxinum spanning forest
        msf = nx.minimum_spanning_tree(g.nxgraph)

        # for each tree in forest:
        trunk = list()
        for mst in nx.connected_component_subgraphs(msf):
            # add to mst trunk
            if len(mst) >= 4:
                trunk.extend(_mst_trunk(mst, g))

        # remove edges not in trunk:
        E = [e for e in g.edges]
        print trunk
        trunk_v = set([v[0] for v in trunk])
        n_removed = 0
        for e in E:
            v1_id, v2_id = e.v1.id, e.v2.id
            if v1_id not in trunk_v or v2_id not in trunk_v:
                g.remove_edge(e)
                n_removed += 1

        if n_iter >= 4: keyboard()

        print '%d edges not in trunk removed.' % n_removed

        # contract one last time
        n_contracted = contract_edges(g, store_ordering=True)
        print '%d edges contracted.' % n_contracted

        # construct well-based scaffold graph in networkx format
        nxg = g.nxgraph
        # nxg = _construct_graph(g)

        # weigh edges according to how many wells they are sharing:
        _reweigh_edges(nxg, g, type_='wells')

        # find the maxinum spanning forest
        msf = nx.minimum_spanning_tree(nxg)

        n_iter += 1
Beispiel #37
0
def get_weights(closes, robust, ddev, cats, graph_path):
    ret = np.log(closes / closes.shift()).fillna(0.0)
    corr = cov2cor(cov_robust(ret) if robust else ret.cov())
    dist = distance(corr)
    link = linkage(dist, 'ward')
    quasiIdx = np.array(dendrogram(link)['leaves'])
    clusters = quasiIdx
    # acceleration = np.diff(link[:, 2], 2)[::-1]
    # # ck = np.where(acceleration >= np.mean(acceleration))[0][-1] + 2
    # ck = acceleration.argmax() + 2
    # cluster_idx = fcluster(link, ck, criterion='maxclust') - 1
    # clusters = pd.Series()
    # cidx = []
    # for cn in np.unique(cluster_idx):
    #     idx = np.where(cluster_idx == cn)[0]
    #     cidx = np.where(cluster_idx ==cn)[0][0]
    #     clusters.loc[cidx] = quasiIdx[idx]
    # clusters = clusters.sort_index().values
    weights = getRecBipart(closes, ddev, clusters)
    weights.index = closes.columns[weights.index]

    try:
        if cats is not None:
            ccats = cats[corr.columns].copy()

        widxed = weights.loc[corr.index]
        names = [s.replace(' ', '\n') for s in corr.columns]
        corr.index = names
        corr.columns = names
        corr = ((corr - corr.min()) / corr.max()).round(2)
        mst = nx.minimum_spanning_tree(nx.from_pandas_adjacency(corr, create_using=nx.MultiGraph()))

        legends = None
        if cats is not None:
            ccats.columns = names
            ccats = ccats.T
        else:
            ccats = pd.DataFrame({
                'Colors': list('b' * len(names)),
                'Shapes': list('o' * len(names))
            }, index=names)

        ccats['Sizes'] = pd.DataFrame({i.replace(' ', '\n'): w for i, w in zip(weights.index, weights)}, index=['Sizes']).T
        fs = np.min((20, len(weights)))
        fig = plt.figure(figsize=(fs + 5, fs + 2), dpi=80)
        cf = fig.add_subplot(111)
        draw_net(mst, ccats, cf)

        if cats is not None:
            leg = []
            sdict = dict()
            adict = dict()
            for row in ccats.iterrows():
                row = row[1]
                sdict[row.Strategy] = row.Colors
                adict[row.Asset] = row.Shapes
            for k, v in sdict.items():
                leg.append(mpatches.Patch(color=v, label=k))
            leg1 = fig.legend(handles=leg, title='Strategy', loc=7, fontsize='xx-large')
            # leg = []
            # for k, v in adict.items():
            #     leg.append(mlines.Line2D([], [], color='black', marker=v, linestyle='None',
            #               markersize=10, label=k))
            # leg2 = fig.legend(handles=leg, title='Asset', loc=1)

        fig.savefig(join(graph_path, str(closes.index[-1].date()) + '.png'))
        plt.close('all')
    except Exception as e:
        pass

    return weights
def path(pts, covars, weights, vertical=False):
    '''
	This method returns the path (as a list of indices) through
	pts which maximizes the minimum probability over the links
	of the paths.

	This function takes three arguments as inputs:
		pts 		-	The points of interest as a (2,N) array.
		covars		-	The covariance matrix for each point as a (2,2,N) array.
		weights		-	The weights to assign to each point. Currently not used.
		vertical 	-	Default is False. If False uses the min and max points in
						x (the first dimension) as starting and ending points.
						If True uses the min and max in y (the second dimension).
	'''

    x, y = pts

    dists = np.zeros((len(x), len(x)))

    for i in range(len(x)):
        for j in range(len(y)):
            if i != j:
                dists[i, j] = bhattacharyyaDistance(pts[:, i], pts[:, j],
                                                    covars[i], covars[j])
#				dists[i,j] += 0.5*np.log(weights[i])
#				dists[i,j] += 0.5*np.log(weights[j])

    keepIndices = []

    for i in range(len(x)):
        keep = False
        for j in range(len(y)):
            if dists[i, j] < 5:
                keep = True
        if keep:
            keepIndices.append(i)

    x = x[keepIndices]
    y = y[keepIndices]

    if vertical:
        start = np.argmin(y)
        stop = np.argmax(y)
    else:
        start = np.argmin(x)
        stop = np.argmax(x)

    G = nx.Graph()

    for i in range(len(x)):
        G.add_node(i)

    for i in range(len(x)):
        for j in range(len(y)):
            if i != j:
                G.add_edge(i, j, weight=np.exp(dists[i][j]))

    t = nx.minimum_spanning_tree(G, weight='weight')
    pth = nx.shortest_path(t, start, stop)

    return pth
Beispiel #39
0
def get_shortest_path_recommendation_set(db, user, target_papers):
    G = db.get_citation_network()
    source_papers = [p['paper_id'] for p in db.list_papers_read(user)]
    if not source_papers:
        return []
    # Create contracted graph
    G_contracted = G
    for i in range(len(source_papers)):
        for j in range(i):
            node1 = source_papers[i]
            node2 = source_papers[j]
            if not G.has_edge(node1, node2):
                G_contracted.add_edge(node1, node2)
            G_contracted[node1][node2]['weight'] = 0.0

    # Construct metric closure
    G_metric_closure = nx.Graph()
    nodes_list = target_papers
    for i in range(len(nodes_list)):
        node1 = nodes_list[i]
        G_metric_closure.add_node(node1)
        for j in range(i):
            node2 = nodes_list[j]
            G_metric_closure.add_node(node2)

            this_weight = nx.shortest_path_length(G_contracted, node1, node2)
            path_list = nx.shortest_path(G_contracted, node1, node2)
            G_metric_closure.add_edge(node1,
                                      node2,
                                      weight=this_weight,
                                      path_list=path_list)

    # Add known set
    known_set_label = 'known'
    G_metric_closure.add_node(known_set_label)
    for target_paper in target_papers:
        G_metric_closure.add_node(target_paper)
        (this_weight,
         closest_source) = get_shortest_path_from_set(G_contracted,
                                                      source_papers,
                                                      target_paper)
        path_list = nx.shortest_path(G, closest_source, target_paper)
        G_metric_closure.add_edge(known_set_label,
                                  target_paper,
                                  weight=this_weight,
                                  path_list=path_list)

    # Get minimal spanning tree and traverse it from known
    min_tree = nx.minimum_spanning_tree(G_metric_closure)
    papers_in_path_id = []

    next_set = set()
    next_set.add(known_set_label)
    to_visit = set(min_tree.nodes())

    while len(to_visit) > 0:
        current_set = next_set
        next_set = set()
        for node in current_set:
            to_visit.remove(node)
            for neighbour in min_tree.neighbors(node):
                if neighbour in to_visit:
                    next_set.add(neighbour)
                    path_to_add = G_metric_closure[node][neighbour][
                        'path_list']
                    papers_in_path_id = add_paper_to_path(
                        papers_in_path_id, path_to_add, source_papers)

    papers_in_path_id.reverse()
    papers_in_path = db.list_papers_list(papers_in_path_id)

    additional_nodes = set(G.nodes())
    for paper in papers_in_path_id:
        additional_nodes.remove(paper)

    return list(papers_in_path)
Beispiel #40
0
 def test_kruskal_minimum_spanning_tree_isolate(self):
     G = nx.Graph()
     G.add_nodes_from([1, 2])
     T = nx.minimum_spanning_tree(G, algorithm='kruskal')
     assert_equal(sorted(T.nodes()), [1, 2])
     assert_equal(sorted(T.edges()), [])
Beispiel #41
0
 def test_kruskal_minimum_spanning_tree(self):
     T = nx.minimum_spanning_tree(self.G, algorithm='kruskal')
     assert_equal(sorted(T.edges(data=True)), self.minimum_spanning_edgelist)
Beispiel #42
0
def unfold(mesh):
    # Calculate the number of surfaces, edges and corners, as well as the length of the longest shortest edge
    numEdges = mesh.n_edges()
    numVertices = mesh.n_vertices()
    numFaces = mesh.n_faces()

    # Generate the dual graph of the mesh and calculate the weights
    dualGraph = nx.Graph()

    # For the weights: calculate the longest and shortest edge of the triangle
    minLength = 1000
    maxLength = 0
    for edge in mesh.edges():
        edgelength = mesh.calc_edge_length(edge)
        if edgelength < minLength:
            minLength = edgelength
        if edgelength > maxLength:
            maxLength = edgelength

    # All edges in the net
    for edge in mesh.edges():
        # The two sides adjacent to the edge
        face1 = mesh.face_handle(mesh.halfedge_handle(edge, 0))
        face2 = mesh.face_handle(mesh.halfedge_handle(edge, 1))

        # The weight
        edgeweight = 1.0 - (mesh.calc_edge_length(edge) -
                            minLength) / (maxLength - minLength)

        # Calculate the centres of the pages (only necessary for visualisation)
        center1 = (0, 0)
        for vertex in mesh.fv(face1):
            center1 = center1 + 0.3333333333333333 * np.array(
                [mesh.point(vertex)[0],
                 mesh.point(vertex)[2]])
        center2 = (0, 0)
        for vertex in mesh.fv(face2):
            center2 = center2 + 0.3333333333333333 * np.array(
                [mesh.point(vertex)[0],
                 mesh.point(vertex)[2]])

        # Add the new nodes and edge to the dual graph
        dualGraph.add_node(face1.idx(), pos=center1)
        dualGraph.add_node(face2.idx(), pos=center2)
        dualGraph.add_edge(face1.idx(),
                           face2.idx(),
                           idx=edge.idx(),
                           weight=edgeweight)

    # Calculate the minimum spanning tree
    spanningTree = nx.minimum_spanning_tree(dualGraph)

    # Unfold the tree
    fullUnfolding = unfoldSpanningTree(mesh, spanningTree)
    [unfoldedMesh, isFoldingEdge, connections, glueNumber,
     foldingDirection] = fullUnfolding

    # Resolve the intersections
    # Find all intersections
    epsilon = 1E-12  # Accuracy
    faceIntersections = []
    for face1 in unfoldedMesh.faces():
        for face2 in unfoldedMesh.faces():
            if face2.idx() < face1.idx(
            ):  # so that we do not double check the couples
                # Get the triangle faces
                triangle1 = []
                triangle2 = []
                for halfedge in unfoldedMesh.fh(face1):
                    triangle1.append(
                        unfoldedMesh.point(
                            unfoldedMesh.from_vertex_handle(halfedge)))
                for halfedge in unfoldedMesh.fh(face2):
                    triangle2.append(
                        unfoldedMesh.point(
                            unfoldedMesh.from_vertex_handle(halfedge)))
                if triangleIntersection(triangle1, triangle2, epsilon):
                    faceIntersections.append(
                        [connections[face1.idx()], connections[face2.idx()]])

    # Find the paths
    # We find the minimum number of cuts to resolve any self-intersection

    # Search all paths between overlapping triangles
    paths = []
    for intersection in faceIntersections:
        paths.append(
            nx.algorithms.shortest_paths.shortest_path(spanningTree,
                                                       source=intersection[0],
                                                       target=intersection[1]))

    # Find all edges in all threads
    edgepaths = []
    for path in paths:
        edgepath = []
        for i in range(len(path) - 1):
            edgepath.append((path[i], path[i + 1]))
        edgepaths.append(edgepath)

    # List of all edges in all paths
    allEdgesInPaths = list(set().union(*edgepaths))

    # Count how often each edge occurs
    numEdgesInPaths = []
    for edge in allEdgesInPaths:
        num = 0
        for path in edgepaths:
            if edge in path:
                num = num + 1
        numEdgesInPaths.append(num)

    S = []
    C = []

    while len(C) != len(paths):
        # Calculate the weights to decide which edge to cut
        cutWeights = np.empty(len(allEdgesInPaths))
        for i in range(len(allEdgesInPaths)):
            currentEdge = allEdgesInPaths[i]

            # Count how many of the paths in which the edge occurs have already been cut
            numInC = 0
            for path in C:
                if currentEdge in path:
                    numInC = numInC + 1

            # Determine the weight
            if (numEdgesInPaths[i] - numInC) > 0:
                cutWeights[i] = 1 / (numEdgesInPaths[i] - numInC)
            else:
                cutWeights[i] = 1000  # 1000 = infinite
        # Find the edge with the least weight
        minimalIndex = np.argmin(cutWeights)
        S.append(allEdgesInPaths[minimalIndex])
        # Find all paths where the edge occurs and add them to C
        for path in edgepaths:
            if allEdgesInPaths[minimalIndex] in path and not path in C:
                C.append(path)

    # Now we remove the cut edges from the minimum spanning tree
    spanningTree.remove_edges_from(S)

    # Find the cohesive components
    connectedComponents = nx.algorithms.components.connected_components(
        spanningTree)
    connectedComponentList = list(connectedComponents)

    # Unfolding of the components
    unfoldings = []
    for component in connectedComponentList:
        unfoldings.append(
            unfoldSpanningTree(mesh, spanningTree.subgraph(component)))

    return fullUnfolding, unfoldings
Beispiel #43
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--infile', type=str, default=None)
    parser.add_argument('-e', '--edgefile', type=str, default=None)
    parser.add_argument('-p', '--hapfile', type=str, default=None)
    parser.add_argument('--strict', default=False, action='store_true')
    args = parser.parse_args()

    Gall = nx.Graph()
    Gloc = nx.Graph()
    infile = args.infile
    usage_denom = 1024 * 1000

    print('before')
    print(
        'Memory usage info (Mb):\t' +
        str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom))
    #load allele graph
    reads = defaultdict(dict)
    with gzip.open(infile, 'rb') as fp:
        line = fp.readline().strip()
        while line:
            [allele, read, readpos, strand] = re.split('[\t]', line)
            [locus, ref, alt, refalt] = re.split('_', allele)
            readpos = int(readpos)
            reads[read][allele] = readpos
            if not locus in Gloc.nodes:
                add_locus_node(Gloc, locus, allele)
            if not allele in Gall.nodes:
                add_allele_node(Gall, allele)
            for r1 in reads[read].keys():
                if r1 != allele:
                    edge0 = [allele, r1]
                    if not edge0 in Gall.edges:
                        Gall.add_edge(allele, r1)
                        Gall.edges[edge0]['count'] = 0
                        Gall.edges[edge0]['dist'] = 0
                        Gall.edges[edge0]['dist_sq'] = 0
                    Gall.edges[edge0]['count'] = Gall.edges[edge0]['count'] + 1
                    dist = abs(reads[read][r1] - reads[read][allele])
                    Gall.edges[edge0][
                        'dist'] = Gall.edges[edge0]['dist'] + dist
                    Gall.edges[edge0][
                        'dist_sq'] = Gall.edges[edge0]['dist_sq'] + dist * dist
            line = fp.readline().strip()

    print('allele graph')
    print(
        'Memory usage info (Mb):\t' +
        str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom))

    #load locus graph
    for edge in Gall.edges:
        mndist = Gall.edges[edge]['dist'] * 1.0 / Gall.edges[edge]['count']
        Gall.edges[edge]['mean_dist'] = mndist
        Gall.edges[edge]['sd_dist'] = math.sqrt(
            Gall.edges[edge]['dist_sq'] * 1.0 / Gall.edges[edge]['count'] -
            mndist * mndist)
        node1 = Gall.nodes[edge[0]]
        node2 = Gall.nodes[edge[1]]
        add_counts_edge(Gloc, node1, node2, Gall.edges[edge]['count'])

    #import code
    #code.interact(local=locals())

    print('locus graph')
    print(
        'Memory usage info (Mb):\t' +
        str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom))

    #create strict locus graph and prune edges
    Gconf = Gloc.copy()
    bad_edges = []
    edgelist = list(Gloc.edges)
    for edge in edgelist:
        curedge = Gloc.edges[edge]
        [aa, bb, cc, dd] = [
            curedge['r', 'r'], curedge['r', 'a'], curedge['a', 'r'],
            curedge['a', 'a']
        ]
        if (aa + bb == 0
                and cc * dd > 0) or (cc + dd == 0 and aa * bb > 0) or (
                    aa + cc == 0 and bb * dd > 0) or (bb + dd == 0
                                                      and aa * cc > 0):
            bad_edges.append(edge)
            remove_allele_edges(Gloc, Gall, edge)
            Gloc.remove_edge(edge[0], edge[1])
            Gconf.remove_edge(edge[0], edge[1])
        else:
            oddsratio = (aa + 1.0) * (dd + 1.0) / (bb + 1.0) / (cc + 1.0)
            if (oddsratio > 2 or oddsratio < 0.5) and (aa * dd > 0
                                                       or bb * cc > 0):
                pass
            else:
                Gconf.remove_edge(edge[0], edge[1])

    print('strict locus  graph')
    print(
        'Memory usage info (Mb):\t' +
        str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom))

    with open(args.edgefile, 'w') as outf, open(args.hapfile, 'w') as outh:
        comp_loose = list(
            Gloc.subgraph(c).copy() for c in nx.connected_components(Gloc))
        #iterate over connected components of loose locus graph, phase and merge if possible
        for ii in range(len(comp_loose)):
            gg_loose = comp_loose[ii].copy()
            gg_conf = Gconf.subgraph(list(gg_loose.nodes())).copy()
            gg = list(
                gg_conf.subgraph(cc).copy()
                for cc in nx.connected_components(gg_conf))
            for jj in range(len(gg)):
                ggsub1 = gg[jj]
                [h1, h2] = phase_conf_component(ggsub1)
                print(
                    str(ii) + '\t' + str(jj) + '\t' +
                    str(gg_conf.number_of_nodes()) + '\t' +
                    str(ggsub1.number_of_nodes()) + '\t' + str(len(h1)))
            #if len(gg)>1:
            #  merge_all(gg_conf, ii, Gloc)
            #gg=list(gg_conf.subgraph(cc).copy() for cc in nx.connected_components(gg_conf))
            #if len(gg)>1 and args.strict==False:
            #  merge_all(gg_conf, ii, Gloc, strict=False)
            #list of connected graphs, post-merging
            gg = list(
                gg_conf.subgraph(cc).copy()
                for cc in nx.connected_components(gg_conf))
            for jj in range(len(gg)):
                ggsub1 = gg[jj]
                haps = phase_conf_component(ggsub1, strict=False)
                print(
                    str(ii) + '\t' + str(jj) + '\t' +
                    str(gg_conf.number_of_nodes()) + '\t' +
                    str(ggsub1.number_of_nodes()) + '\t' + str(len(haps[0])))
                #subgraphs of allele graph corresponding to allele on each haplotype
                for hapid in range(2):
                    Gallsub = Gall.subgraph(haps[hapid]).copy()
                    minforest = nx.minimum_spanning_tree(Gallsub,
                                                         weight='mean_dist')
                    mintree = list(
                        minforest.subgraph(cc).copy()
                        for cc in nx.connected_components(minforest))
                    for treeid in range(len(mintree)):
                        treelist = list(mintree[treeid].edges)
                        for tredge in treelist:
                            curedge = mintree[treeid].edges[tredge]
                            edgestr = str(round(
                                curedge['mean_dist'], 3)) + ';' + str(
                                    round(curedge['sd_dist'], 3)) + ';' + str(
                                        curedge['count'])
                            print(str(ii) + '_' + str(jj) + '_' +
                                  str(hapid + 1) + '_' + str(treeid) + '\t' +
                                  edgestr + '\t' + tredge[0] + '\t' +
                                  tredge[1],
                                  file=outf)
                        terminal_nodes = []
                        for node1 in list(mintree[treeid].nodes):
                            if mintree[treeid].degree(node1) == 1:
                                terminal_nodes.append(node1)
                        for aa in range(len(terminal_nodes) - 1):
                            for bb in range(aa + 1, len(terminal_nodes)):
                                sp = nx.shortest_path(mintree[treeid],
                                                      terminal_nodes[aa],
                                                      terminal_nodes[bb])
                                for nodeii in range(len(sp)):
                                    node1 = sp[nodeii]
                                    if nodeii == 0:
                                        outstr = str(mintree[treeid].degree(
                                            node1)) + '\t.'
                                    else:
                                        prevedge = mintree[treeid].edges[
                                            sp[nodeii], sp[nodeii - 1]]
                                        outstr = str(
                                            mintree[treeid].degree(node1)
                                        ) + '\t' + str(
                                            prevedge['count']) + '_' + str(
                                                prevedge['dist']) + '_' + str(
                                                    prevedge['dist_sq'])
                                    print(str(ii) + '_' + str(jj) + '_' +
                                          str(hapid + 1) + '_' + str(treeid) +
                                          '\t' + str(aa) + '_' + str(bb) +
                                          '\t' + outstr + '\t' + node1,
                                          file=outh)

            print(
                '-----------------------------------------------------------------'
            )
            print('Memory usage info (Mb):\t' + str(
                resource.getrusage(resource.RUSAGE_SELF).ru_maxrss /
                usage_denom))
Beispiel #44
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--infile', type=str, default=None)
    parser.add_argument('-e', '--edgefile', type=str, default=None)
    parser.add_argument('-p', '--hapfile', type=str, default=None)
    parser.add_argument('-b', '--bedfile', type=str, default=None)
    parser.add_argument('--strict', default=False, action='store_true')
    args = parser.parse_args()

    cp = cProfile.Profile()
    cp.enable()
    usage_denom = 1024 * 1000
    print('before')
    print(
        'Memory usage info (Mb):\t' +
        str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom))

    Gall = nx.Graph()
    Gloc = nx.Graph()
    infile = args.infile

    #load allele graph
    reads = defaultdict(dict)
    readid = {}
    locid = {}
    id2loc = {}
    [readct, locct, allct] = [0, 0, 0]
    with gzip.open(infile, 'rb') as fp:
        line = fp.readline().strip()
        while line:
            [allele, readstr, readpos, strand] = re.split('[\t]', line)
            [loc, ref, alt, refalt] = re.split('_', allele)
            locstr = loc + '_' + ref + '_' + alt
            readpos = int(readpos)
            if not readstr in readid:
                readid[readstr] = readct
                readct += 1
            read = readid[readstr]
            print(str(read))
            if not locstr in locid:
                lid = 'loc' + str(locct)
                locid[locstr] = lid
                id2loc[lid] = locstr
                locct += 1
            locus = locid[locstr]
            allele = locus + '_' + refalt
            reads[read][allele] = readpos

            if not Gloc.has_node(locus):
                Gloc.add_node(locus)
            if not Gall.has_node(allele):
                Gall.add_node(allele, refalt=refalt)
            for r1 in reads[read]:
                if r1 != allele:
                    if not Gall.has_edge(allele, r1):
                        Gall.add_edge(allele,
                                      r1,
                                      count=0,
                                      dist=0,
                                      dist_sq=0,
                                      reads=[])
                    curedge = Gall.edges[allele, r1]
                    rr = curedge['reads']
                    rr.append(read)
                    dist = abs(reads[read][r1] - reads[read][allele])
                    Gall.edges[allele, r1].update({
                        'count':
                        curedge['count'] + 1,
                        'dist':
                        curedge['dist'] + dist,
                        'dist_sq':
                        curedge['dist_sq'] + dist * dist,
                        'reads':
                        rr
                    })
            line = fp.readline().strip()

    del reads
    print('allele graph')
    print(
        'Memory usage info (Mb):\t' +
        str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom))

    #load locus graph
    for edge in Gall.edges:
        curedge = Gall.edges[edge]
        mndist = curedge['dist'] * 1.0 / curedge['count']
        sddist = math.sqrt(curedge['dist_sq'] * 1.0 / curedge['count'] -
                           mndist * mndist)
        Gall.edges[edge].update({'mean_dist': mndist, 'sd_dist': sddist})
        add_counts_edge(Gloc, Gall, edge[0], edge[1], curedge['count'])
    print('locus graph')
    print(
        'Memory usage info (Mb):\t' +
        str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / usage_denom))

    # prune edges

    bad_edges = []
    edgelist = list(Gloc.edges)
    for edge in edgelist:
        curedge = Gloc.edges[edge]
        [aa, bb, cc, dd] = [
            curedge['r', 'r'], curedge['r', 'a'], curedge['a', 'r'],
            curedge['a', 'a']
        ]
        pfloose = pre_filter_loose_pass(aa, bb, cc, dd)
        pfstrict = pre_filter_strict_pass(aa, bb, cc, dd)
        if not pfloose:
            bad_edges.append(edge)
            remove_allele_edges(edge[0], edge[1], Gall)
            Gloc.remove_edge(edge[0], edge[1])
        elif not pfstrict:
            set_allele_edge_conf(edge[0], edge[1], Gall, False)
            Gloc.edges[edge[0], edge[1]]['conf'] = False
        else:
            set_allele_edge_conf(edge[0], edge[1], Gall, True)

    with open(args.edgefile,
              'w') as outf, open(args.hapfile,
                                 'w') as outh, open(args.bedfile, 'w') as outb:
        comp_loose = list(
            Gloc.subgraph(c) for c in sorted(
                nx.connected_components(Gloc), key=len, reverse=True))
        #iterate over connected components of loose locus graph, phase and merge if possible
        for ii in range(len(comp_loose)):
            gg_loose = comp_loose[ii]
            #if ii == 0:
            #  import code
            #  code.interact(local=locals())
            selected_edges = [(u, v) for u, v, e in gg_loose.edges(data=True)
                              if e['conf'] == True]
            gg_conf = gg_loose.edge_subgraph(selected_edges)
            gg = list(
                gg_conf.subgraph(cc).copy() for cc in sorted(
                    nx.connected_components(gg_conf), key=len, reverse=True))
            subgraphs1 = []
            subgraphs2 = []
            for jj in range(len(gg)):
                [h1, h2] = phase_conf_component(gg[jj])
                subgraphs1.append(h1)
                subgraphs2.append(h2)
            if (len(gg) > 1 and not args.strict):
                allhaps = merge_subgraphs(subgraphs1, subgraphs2, Gall)
            else:
                allhaps = [subgraphs1, subgraphs2]
            print(str(ii) + '\t' + str(jj) + '\t' + str(len(allhaps[0])) + '*')
            for jj in range(len(allhaps[0])):
                for hapid in range(2):
                    minforest = nx.minimum_spanning_tree(Gall.subgraph(
                        allhaps[hapid][jj]),
                                                         weight='mean_dist')
                    mintree = list(
                        minforest.subgraph(cc)
                        for cc in nx.connected_components(minforest))
                    for treeid in range(len(mintree)):
                        id = (str(ii) + '_' + str(jj) + '_' + str(hapid + 1) +
                              '_' + str(treeid))
                        treelist = list(mintree[treeid].edges)
                        nodes = [
                            int(
                                re.split('[:_]', id2loc[re.split('_',
                                                                 node)[0]])[1])
                            for node in mintree[treeid].nodes
                        ]
                        nodes.sort()
                        dists = []
                        ones = []
                        for kk in range(len(nodes)):
                            dists.append(nodes[kk] - nodes[0] + 1)
                            ones.append(1)
                        dstr = ','.join(map(str, dists))
                        onestr = ','.join(map(str, ones))
                        print('chr20\t' + str(nodes[0]) + '\t' +
                              str(max(nodes)) + '\t' + id + '\t100\t.\t' +
                              str(nodes[0]) + '\t' + str(max(nodes)) +
                              '\t150,150,0\t' + str(len(dists)) + '\t' +
                              onestr + '\t' + dstr,
                              file=outb)
                        for tredge in treelist:
                            curedge = mintree[treeid].edges[tredge]
                            [id1, refalt1] = re.split('_', tredge[0])
                            [id2, refalt2] = re.split('_', tredge[1])
                            edgestr = str(round(
                                curedge['mean_dist'], 3)) + ';' + str(
                                    round(curedge['sd_dist'], 3)) + ';' + str(
                                        curedge['count'])
                            print(id + '\t' + edgestr + '\t' + id2loc[id1] +
                                  '_' + refalt1 + '\t' + id2loc[id2] + '_' +
                                  refalt2,
                                  file=outf)
                        terminal_nodes = []
                        for node1 in list(mintree[treeid].nodes):
                            if mintree[treeid].degree(node1) == 1:
                                terminal_nodes.append(node1)
                        for aa in range(len(terminal_nodes) - 1):
                            for bb in range(aa + 1, len(terminal_nodes)):
                                sp = nx.shortest_path(mintree[treeid],
                                                      terminal_nodes[aa],
                                                      terminal_nodes[bb])
                                for nodeii in range(len(sp)):
                                    node1 = sp[nodeii]
                                    [id1, refalt1] = re.split('_', node1)
                                    if nodeii == 0:
                                        outstr = str(mintree[treeid].degree(
                                            node1)) + '\t.'
                                    else:
                                        prevedge = mintree[treeid].edges[
                                            sp[nodeii], sp[nodeii - 1]]
                                        outstr = str(
                                            mintree[treeid].degree(node1)
                                        ) + '\t' + str(
                                            prevedge['count']) + '_' + str(
                                                prevedge['dist']) + '_' + str(
                                                    prevedge['dist_sq'])
                                    print(id + '\t' + str(aa) + '_' + str(bb) +
                                          '\t' + outstr + '\t' + id2loc[id1] +
                                          '_' + refalt,
                                          file=outh)
            print(
                '-----------------------------------------------------------------'
            )
            print('Memory usage info (Mb):\t' + str(
                resource.getrusage(resource.RUSAGE_SELF).ru_maxrss /
                usage_denom))

    cp.disable()
    cp.print_stats()
Beispiel #45
0
    # logger.info('time_deltas = %r' % (time_deltas,))
    maxweight = vt.safe_max(time_deltas, nans=False, fill=0) + 1
    time_deltas[np.isnan(time_deltas)] = maxweight
    time_delta_weight = 10 * time_deltas / (time_deltas.max() + 1)
    is_comp = infr.guess_if_comparable(candidate_mst_edges)
    comp_weight = 10 * (1 - is_comp)
    extra_weight = comp_weight + time_delta_weight

    # logger.info('time_deltas = %r' % (time_deltas,))
    nx.set_edge_attributes(
        aug_graph,
        name='weight',
        values={
            edge: 10.0 + extra for edge, extra in zip(candidate_mst_edges, extra_weight)
        },
    )
except Exception:
    logger.info('FAILED WEIGHTING USING TIME')
    nx.set_edge_attributes(
        aug_graph,
        name='weight',
        values={edge: 10.0 + _randint() for edge in candidate_mst_edges},
    )
new_edges = []
for cc_sub_graph in nx.connected_component_subgraphs(aug_graph):
    mst_sub_graph = nx.minimum_spanning_tree(cc_sub_graph)
    # Only add edges not in the original graph
    for edge in mst_sub_graph.edges():
        if not graph.has_edge(*edge):
            new_edges.append(e_(*edge))
    def draw(self,
             stix_name=0,
             is_width_as_weight=False,
             is_draw_min_spin_tree=False,
             pic_num_minspintree=100000):
        # nx.draw(self.G, with_labels=True)
        # nx.draw_graphviz(self.G)

        # nx.nx_agraph.write_dot(self.G, 'test.dot')
        # nx.draw(self.G, pos=graphviz_layout(self.G))

        self.DiG = self.G

        # self.DiG = nx.path_graph(6)
        # self.DiG.edge[1][2]['weight'] = 3

        if isinstance(stix_name, int):
            stix_name = '#' + stix_name
        plt.figure("Structure Tree for STIX PACKAGE [ " + stix_name + ' ]')
        # plt.title("stix structure tree")
        mng = plt.get_current_fig_manager()
        mng.resize(*mng.window.maxsize())
        pos = graphviz_layout(self.DiG, prog='dot', args='-Grankdir=LR')

        if is_width_as_weight:
            self.__set_edge_weights(self.DiG)
            weights = self.__get_edge_weights(self.DiG)

            nx.draw(self.DiG,
                    node_size=40,
                    pos=pos,
                    edge_color='y',
                    with_labels=False,
                    width=weights)
        else:
            nx.draw(self.DiG,
                    node_size=40,
                    pos=pos,
                    edge_color='y',
                    with_labels=False)
        if not self.is_clustering_node_by_name:
            labels = self.__get_display_labels(self.DiG)
            nx.draw_networkx_labels(self.DiG,
                                    pos=pos,
                                    labels=labels,
                                    font_color='b')
        else:
            nx.draw_networkx_labels(self.DiG, pos=pos, font_color='b')

        if is_draw_min_spin_tree:
            self.UnDiG = self.G.to_undirected()
            self.UnDiG = nx.minimum_spanning_tree(self.UnDiG)

            plt.figure("Minimun Spinning Tree for STIX PACKAGE [ " +
                       stix_name + ' ]')
            # plt.title("minimum spinning tree")
            mng = plt.get_current_fig_manager()
            mng.resize(*mng.window.maxsize())
            pos = graphviz_layout(self.UnDiG, prog='dot', args='-Grankdir=LR')
            nx.draw(self.UnDiG, node_size=40, pos=pos, edge_color='y')
            nx.draw_networkx_labels(self.UnDiG, pos=pos, font_color='b')
Beispiel #47
0
        14: "vie",
        15: "zag",
        16: "rom"
    }
    switch_link_matrix = [(1, 2), (1, 4), (2, 3), (2, 5), (3, 4),
                          (3, 6), (4, 7), (4, 9), (5, 6), (5, 10), (6, 7),
                          (6, 11), (7, 8), (8, 9), (8, 12), (10, 11), (10, 13),
                          (11, 12), (11, 14), (12, 16), (13, 14), (14, 15),
                          (15, 16)]
    host_count_per_switch = 1

    topology = nx.Graph()
    nodes = list(switch_names.keys())
    topology.add_nodes_from(nodes)
    topology.add_edges_from(switch_link_matrix)
    result = minimum_spanning_tree(topology)

    no_flood_links = list(set(switch_link_matrix) - set(result.edges))

    # ---------- initialize network  -----------------------------
    #dpid = DPID_BASE
    OpenFlow14Switch = partial(OVSKernelSwitch, protocols=OPENFLOW_PROTOCOL)
    #STPEnabledSwitch = partial(OVSKernelSwitch, protocols=OPENFLOW_PROTOCOL, failMode="standalone", stp=True)

    net = Containernet(ipBase=IP_BASE)
    net.addController("c0",
                      controller=RemoteController,
                      link=OVSLink,
                      ip=CONTROLLER_IP,
                      port=CONTROLLER_PORT)
Beispiel #48
0
def min_spanning_tree(G):
    return nx.minimum_spanning_tree(G, weight='length')
def _remove_overlap(nodes, overlap_frac):
    """Implement GTree algorithm https://arxiv.org/pdf/1608.02653.pdf."""
    nodes = [n.copy() for n in nodes]

    def dist(idx1, idx2, pos, nodes):
        d = pos[idx1] - pos[idx2]
        center_to_center = np.sqrt((d * d).sum())
        return center_to_center - (1.0 - overlap_frac) * (
            nodes[idx1]['radius'] + nodes[idx2]['radius'])

    def get_next(mst, previous, current):
        edges = list(mst.edges(current))
        next_nodes = []
        for e in edges:
            if previous is None or previous not in e:
                next_nodes.append(e[1] if e[0] == current else e[0])
        return next_nodes

    def shift_nodes(nodes, mst, source, target, delta_x, delta_y):
        # shift the target
        trg_node = nodes[target]
        trg_node['x'] += delta_x
        trg_node['y'] += delta_y
        # shift nodes recursively
        next_nodes = get_next(mst, source, target)
        for next_n in next_nodes:
            shift_nodes(nodes, mst, target, next_n, delta_x, delta_y)

    def process_tree(nodes, mst, previous, current):
        # process mst recursively
        next_nodes = get_next(mst, previous, current)
        for next_n in next_nodes:
            wt = mst.edges[(current, next_n)]['weight']
            if wt < 0:
                # compute the shift x, y
                src_node = nodes[current]
                trg_node = nodes[next_n]
                dx = trg_node['x'] - src_node['x']
                dy = trg_node['y'] - src_node['y']
                dist = np.sqrt(dx**2 + dy**2)
                frac_x = dx / dist
                frac_y = dy / dist
                wt = mst.edges[(current, next_n)]['weight']
                delta_x = -wt * frac_x
                delta_y = -wt * frac_y
                # shift target and its children
                shift_nodes(nodes, mst, current, next_n, delta_x, delta_y)
            process_tree(nodes, mst, current, next_n)

    max_steps = 10
    for step in range(max_steps):
        # extract position data to numpy
        pos = np.array([[n['x'], n['y']] for n in nodes])
        # build delauney triangulation
        tri = sp.spatial.Delaunay(pos)
        # build weighted networkx graph. Weight is distance between node edges
        raw_edges = set()
        for sim in tri.simplices:
            raw_edges.add((sim[0], sim[1]))
            raw_edges.add((sim[1], sim[2]))
            raw_edges.add((sim[2], sim[0]))
        nw = nx.Graph()
        n_overlap = 0
        for e in raw_edges:
            d = dist(e[0], e[1], pos, nodes)
            nw.add_edge(e[0], e[1], weight=d)
            if d < 0:
                n_overlap += 1
        print(f'Step {step} n_overlap = {n_overlap}')
        # quit looping if all weights are positive
        if n_overlap == 0:
            break
        # get minimal spanning tree of weighted graph
        mst = nx.minimum_spanning_tree(nw)
        # roots have degree == 1
        root = [n for n, d in mst.degree if d == 1][0]
        # recursively process mst from root
        process_tree(nodes, mst, None, root)
    return {n['name']: np.array([n['x'], n['y']]) for n in nodes}
Beispiel #50
0
    id0 = 0
    (totIdx, atomIdx, cfg, H, normal, d, pt0, nbrlist, slice_nbrlist_u,
     slice_nbrlist_d, pairs) = load_status_0(dirname)
    for path_id in path:
        for key, val in ID.iteritems():
            if val == path_id:
                nucleus.append(str2bits(key))
                potential.append(db[key])
                fp.write(str(db[key]) + "\n")
                writecncfg(cfg[bits2nucleus(str2bits(key), totIdx), :], H,
                           dirname + "path-" + str(id0))
                id0 += 1
'''
Find MST of G, (supposed to be MEP?)
'''
T = nx.minimum_spanning_tree(G)
P = sorted(T.edges(data=True))

print(P)

#quit()
'''
Find all simple path starting from the shortest one
'''

simple_paths = nx.shortest_simple_paths(G, source=idfewest, target=idmost)
#sz = 0
#for path in simple_paths:
#    print('sz = {0}'.format(sz))
#    sz += 1
print("size of simple_paths = {0}".format(sz))
 def test_minimum_tree(self):
     T = nx.minimum_spanning_tree(self.G, algorithm=self.algo)
     actual = sorted(T.edges(data=True))
     assert edges_equal(actual, self.minimum_spanning_edgelist)
Beispiel #52
0
def network_graph_pre(historic_df, windows, rolling_corr):
    '''function for in-advance full-horizon graphx-figures generation'''

    network_plotly_figures = []

    for window in windows:
        correlates_df = historic_df.iloc[window:(rolling_corr + window)]

        # sequence corr-matrix estimation (mst-graph eucliden-distance)
        corr_matrix = correlates_df.corr()
        dist_matrix = (1 - corr_matrix).stack().reset_index()
        dist_matrix.columns = ['coin1', 'coin2', 'weight']
        graph_matrice = dist_matrix.loc[
            dist_matrix['coin1'] != dist_matrix['coin2']]

        # package networkx: standard & nst-graph construction
        graph_TS = nx.from_pandas_edgelist(graph_matrice, 'coin1', 'coin2',
                                           ['weight'])
        graph_MST = nx.minimum_spanning_tree(graph_TS)

        # graph-nodes coordinates +callibration bitcoin-coin centrality
        fix_posit = {'bitcoin': (0, 0)}
        fix_nodes = fix_posit.keys()
        positions = nx.spring_layout(graph_MST,
                                     weight='weight',
                                     pos=fix_posit,
                                     fixed=fix_nodes,
                                     scale=3,
                                     center=(0, 0))

        # mst-graph centrality params: betweeness & to-bitcoin-distance
        bitcenter = nx.single_source_shortest_path_length(graph_MST, 'bitcoin')
        betweenness = nx.betweenness_centrality(graph_MST)

        # package plotly: mst edge traces-contruction
        edge_trace = go.Scatter(x=[],
                                y=[],
                                line=dict(width=1.5, color='#888'),
                                opacity=0.3,
                                hoverinfo='none',
                                name=window,
                                mode='lines')

        # plot parametrization -edges coordinates
        for edge1, edge2, weight in graph_MST.edges().data('weight'):
            x0, y0 = positions[edge1]
            x1, y1 = positions[edge2]
            edge_trace['x'] += tuple([x0, x1, None])
            edge_trace['y'] += tuple([y0, y1, None])

        # package plotly: mst node traces-contruction
        node_trace = go.Scatter(
            x=[],
            y=[],
            text=[],
            hoverinfo='text',
            name=window,
            mode='markers',
            marker=dict(showscale=True,
                        colorscale='YlGnBu',
                        reversescale=False,
                        opacity=0.9,
                        cmin=0.0,
                        cmax=0.6,
                        size=[],
                        color=[],
                        colorbar=dict(thickness=12,
                                      title="Graph Centrality Metrics: "
                                      "Degree [size] & Betweenes [color]",
                                      xanchor='left',
                                      titleside='right'),
                        line=dict(width=[], color='black')))

        # plot parametrization -nodes coordinates
        for node in graph_MST.nodes():
            x, y = positions[node]
            node_trace['x'] += tuple([x])
            node_trace['y'] += tuple([y])

        # information for nodes interactive display capabilities
        for n, nghbr in enumerate(graph_MST.adjacency()):
            # print("n: {}  + {}".format(n, nghbr[1]))
            node_trace['marker']['color'] += tuple([betweenness[nghbr[0]]])
            node_trace['marker']['size'] += tuple([len(nghbr[1]) * 5 + 20])
            node_trace['marker']['line']['width'] += tuple(
                [2 if nghbr[0] == 'bitcoin' else .2])
            node_trace['text'] += tuple([
                "COIN: <b>> {} <</b>"
                "<br><i>distance to Bitcoin: {}</i>"
                "<br><i>neighbours: {}</i>"
                "<br><i>betweenness: {}</i>"
                "".format(nghbr[0], bitcenter[nghbr[0]], len(nghbr[1]),
                          round(betweenness[nghbr[0]], 6))
            ])

        # package plotly: network constant lyout-aesthetics
        mst_layout = go.Layout(
            title="<br><b>MST graph representation"
            "of Cryptocurrency Market</b>",
            titlefont=dict(size=15),
            showlegend=False,
            hovermode='closest',
            margin=dict(b=5, l=5, r=5, t=15),
            # annotations = [dict(
            # 	# text = "LINK: <a href='https://plot.ly'>PLOTLY</a>",
            # 	showarrow = False,
            # 	xref = "paper",
            # 	yref="paper",
            # 	x = 0.005,
            # 	y=-0.002) ],
            xaxis=dict(showgrid=False,
                       zeroline=False,
                       showticklabels=False,
                       range=[-3, 4]),
            yaxis=dict(showgrid=False,
                       zeroline=False,
                       showticklabels=False,
                       range=[-5, 5]))

        network_plotly_figures.append(
            go.Figure(data=[edge_trace, node_trace], layout=mst_layout))

    return network_plotly_figures
 def test_empty_graph(self):
     G = nx.empty_graph(3)
     T = nx.minimum_spanning_tree(G, algorithm=self.algo)
     assert nodes_equal(sorted(T), list(range(3)))
     assert T.number_of_edges() == 0
Beispiel #54
0
                   header=None,
                   names=['a', 'b'])

n = len(data)
cost_matrix = np.zeros((n, n))
a, b = data['a'].values, data['b'].values
for i in range(n):
    x1, y1 = a[i], b[i]
    for j in range(n):
        x2, y2 = a[j], b[j]
        cost_matrix[i, j] = ((x1 - x2)**2 + (y1 - y2)**2)**0.5
        if i == j: cost_matrix[i, j] = np.inf

g = nx.from_numpy_array(cost_matrix)
gcc = g.subgraph(max(nx.connected_components(g), key=len))
mst = nx.minimum_spanning_tree(gcc)
multigraph = nx.MultiGraph()
mst_cost = 0
for i in mst.edges:
    w = mst.edges[i[0], i[1]]['weight']
    mst_cost += w
    multigraph.add_edge(i[0], i[1], weight=w)
    multigraph.add_edge(i[0], i[1], weight=w)

print("Start finding cycle")
tour = [u for u, v in nx.eulerian_circuit(multigraph)]
emb_tour, visited = [], set()
for i in tour:
    if i not in visited:
        emb_tour.append(i)
        visited.add(i)
Beispiel #55
0
print(
    "Random test cases genereted with following values for nodes (vertices):",
    n)

for i in n:
    graph = nx.cycle_graph(i)
    # print("Original Graph")
    # print(nx.info(graph))

    # nx.draw(graph, with_labels=True)
    # plt.show()

    for edge in graph.edges():
        graph.edges[edge]["weight"] = int(np.random.uniform(2, 100))

    tree1 = nx.minimum_spanning_tree(graph)
    a = []
    for edge in tree1.edges():
        a.append(tree1.edges[edge]["weight"])

    c1 = sum(a)
    print("MST 1 COST :", c1)
    # print("MST 1 edges",tree1.edges(data=True))

    x = graph.edges()
    y = list(x)

    graph.add_edge(1, graph.size() - 1, weight=1)

    tree2 = nx.minimum_spanning_tree(graph)
    b = []
Beispiel #56
0
def bridges(density, restrictions, costs, topological_correction_value):
    binary_map = np.greater(density, 0.5)
    save_binary_map = binary_map.copy()

    pad_density = np.pad(density, ((1, 1), (1, 1)), mode='constant')

    pad_binary_map = np.greater(pad_density, 0.5)

    density_shape = density.shape
    width = density_shape[0]
    height = density_shape[1]

    pad_costs = np.pad(costs, ((1, 1), (1, 1)), mode='constant')

    [solid_labels, num_solid_labels] = skim.label(pad_binary_map,
                                                  neighbors=4,
                                                  return_num=True)

    if num_solid_labels <= 1:
        return density

    density_graph = nx.Graph()
    for x_idx in range(0, width):
        for y_idx in range(0, height):

            center_node_id = (x_idx + 1) * (pad_density.shape[1]) + (y_idx + 1)

            for x_offset in range(0, 3):
                for y_offset in range(0, 3):

                    if ((x_offset == 1) and (y_offset == 1)) or (
                        (np.abs(x_offset - 1) + np.abs(y_offset - 1)) > 1):
                        continue

                    next_x_idx = x_idx + x_offset
                    next_y_idx = y_idx + y_offset

                    if ((next_x_idx == 0) or (next_y_idx == 0)
                            or (next_x_idx == (pad_density.shape[0] - 1))
                            or (next_y_idx == (pad_density.shape[1] - 1))):
                        continue

                    next_node_id = next_x_idx * (
                        pad_density.shape[1]) + next_y_idx

                    next_density_value = pad_binary_map[next_x_idx, next_y_idx]
                    cost_value = pad_costs[next_x_idx, next_y_idx]

                    if next_density_value:
                        cost_value = 0

                    #
                    # todo(groberts): this should be directed because as it stands you are just overwriting edges of adjacent
                    # nodes, so you aren't capturing that moving in one direction or another incurs a different cost!.
                    #
                    density_graph.add_edge(center_node_id,
                                           next_node_id,
                                           weight=cost_value)

    label_to_representative_pt = {}

    for x_idx in range(0, width):
        for y_idx in range(0, height):
            density_value = pad_density[1 + x_idx, 1 + y_idx]
            component_label = solid_labels[1 + x_idx, 1 + y_idx]

            if (component_label in label_to_representative_pt.keys()) or (
                    not density_value):
                continue

            label_to_representative_pt[component_label] = [x_idx, y_idx]

    mst_graph = nx.Graph()

    for label_idx_start in range(0, num_solid_labels):
        component_start = 1 + label_idx_start
        source_pt = label_to_representative_pt[component_start]
        source_node_id = (source_pt[0] + 1) * (pad_density.shape[1]) + (
            source_pt[1] + 1)

        min_path_all = nx.shortest_path(density_graph,
                                        source=source_node_id,
                                        weight='weight')

        for label_idx_end in range(1 + label_idx_start, num_solid_labels):

            component_end = 1 + label_idx_end

            target_pt = label_to_representative_pt[component_end]
            target_node_id = (target_pt[0] + 1) * (pad_density.shape[1]) + (
                target_pt[1] + 1)

            min_path = min_path_all[target_node_id]

            min_path_distance = 0

            for path_idx in range(1, (len(min_path) - 1)):
                node_id = min_path[path_idx]

                source_x = int(node_id / pad_density.shape[1]) - 1
                source_y = node_id % pad_density.shape[1] - 1

                min_path_distance += pad_costs[source_x, source_y]

            mst_graph.add_edge(component_start,
                               component_end,
                               weight=min_path_distance)

    mst = nx.minimum_spanning_tree(mst_graph)

    mst_edges = nx.edges(mst)

    for edge in mst.edges():
        edge_start, edge_end = edge

        source_pt = label_to_representative_pt[edge_start]
        target_pt = label_to_representative_pt[edge_end]

        source_node_id = (source_pt[0] + 1) * (pad_density.shape[1]) + (
            source_pt[1] + 1)
        target_node_id = (target_pt[0] + 1) * (pad_density.shape[1]) + (
            target_pt[1] + 1)

        min_path = nx.shortest_path(density_graph,
                                    source=source_node_id,
                                    target=target_node_id,
                                    weight='weight')

        for path_idx in range(1, (len(min_path) - 1)):
            node_id = min_path[path_idx]

            source_x = int(node_id / pad_density.shape[1]) - 1
            source_y = node_id % pad_density.shape[1] - 1

            density[source_x, source_y] = topological_correction_value
            pad_density[1 + source_x,
                        1 + source_y] = topological_correction_value
            binary_map[source_x, source_y] = True
            pad_binary_map[1 + source_x, 1 + source_y] = True

    restrictions = np.logical_not(np.logical_xor(binary_map, save_binary_map))
Beispiel #57
0
def seeded_mst(cleaned_edges, edge_weights, seed_labels, _node_sizes=None):
    """
    Partition a graph using the a minimum-spanning tree.
    To ensure that seeded nodes cannot be merged together prematurely,
    a virtual root node is inserted into the graph and given artificially
    strong affinity (low edge weight) to all seeded nodes.
    Thanks to their low weights, the root node's edges will always be
    included in the MST, thus ensuring that seeded nodes can only be
    joined via the root node. After the MST is computed, the root node is
    deleted, leaving behind a forest in which each connected component
    contains at most only one seed node.

    Args:
        cleaned_edges:
            array, (E,2), uint32

        edge_weights:
            array, (E,), float32

        seed_labels:
            array (N,), uint32
            All un-seeded nodes should be marked as 0.

    Returns:
        (output_labels, disconnected_components, contains_unlabeled_components)

        Where:
            output_labels:
                array (N,), uint32
                Agglomerated node labeling.

            disconnected_components:
                A set of seeds which ended up with more than one component in the result.

            contains_unlabeled_components:
                True if the input contains one or more disjoint components that were not seeded
                and thus not labeled during agglomeration. False otherwise.
    """
    g = nx.Graph()
    g.add_nodes_from(np.arange(len(seed_labels)))

    TINY_WEIGHT = edge_weights.min(
    ) - 1000.0  # fixme: would -np.inf work here?

    assert len(cleaned_edges) == len(edge_weights)
    for (u, v), w in zip(cleaned_edges, edge_weights):
        g.add_edge(u, v, weight=w)

    # Add a special root node and connect it to all seed nodes.
    root = len(seed_labels)
    for seed_node in seed_labels.nonzero()[0]:
        g.add_edge(root, seed_node, weight=TINY_WEIGHT)

    # Perform MST and then drop the root node
    # (and all its edges), leaving a forest
    mst = nx.minimum_spanning_tree(g)
    mst.remove_node(root)

    output_labels = np.empty_like(seed_labels)
    contains_unlabeled_components = False

    for i, cc in enumerate(nx.connected_components(mst), start=1):
        cc = [*cc]
        cc_seeds = set(pd.unique(seed_labels[cc])) - {0}
        assert len(cc_seeds) <= 1
        if len(cc_seeds) == 1:
            output_labels[cc] = cc_seeds.pop()
        else:
            output_labels[cc] = 0
            contains_unlabeled_components = True

    disconnected_components = _find_disconnected_components(
        cleaned_edges, output_labels)
    return CleaveResults(output_labels, disconnected_components,
                         contains_unlabeled_components)
            node_size=500,
            node_color='gray',
            font_size=10,
            edge_width=10,
            alpha=1,
            arrows=False)
    plt.savefig('elimina3.png')  #// Outros formatos: pdf, svg, ...

    plt.axis('off')
    plt.show()


algPrim(A, V)

G = nx.from_numpy_matrix(A)  #converte a matriz em um grafo
T = nx.minimum_spanning_tree(G)  #faz uma MST automatica

W = [int(T[u][v]['weight'])
     for u, v in T.edges()]  #lista com os pesos da MST gerada automatico

#imprimir a soma das arestas
print "\n\n T \n\n"
print " - Soma dos pesos da MST gerada automatico: ", sum(W)
print " - Número de vértices da MST gerada automatico: ", len(T.edges())
print "\n\n T \n\n"

plt.figure()  #// Cria figura para desenhar grafo: 15 é a dimensão da imagem
nx.draw(T,
        dim=100,
        with_labels=True,
        node_color='gray',
Beispiel #59
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--infile', type=str, default=None)
    parser.add_argument('-s', '--singletons', type=str, default=None)
    parser.add_argument('-t', '--temp_prefix', type=str, default=None)
    parser.add_argument('-f', '--infile2', type=str, default=None)
    parser.add_argument('-c', '--compfile', type=str, default=None)
    parser.add_argument('-b', '--bedfile', type=str, default=None)
    parser.add_argument('-e', '--edgefile', type=str, default=None)
    parser.add_argument('-p', '--hapfile', type=str, default=None)

    args = parser.parse_args()
    min_counts_strict = 5
    cp = cProfile.Profile()
    cp.enable()
    usage_denom = 1024
    Gloc = nx.Graph()
    Ghom = nx.Graph()

    homvar = {}
    with gzip.open(args.infile2, 'rt') as fp:
        line = fp.readline().strip()
        while line:
            homvar[line] = 1
            line = fp.readline().strip()

    with gzip.open(args.infile, 'rt') as fp, gzip.open(
            args.temp_prefix + '.het.txt.gz',
            'wt') as fhet, gzip.open(args.temp_prefix + '.mixed.txt.gz',
                                     'wt') as fmix:
        line = fp.readline().strip()
        ct = 0
        while line:
            if ct % 1000 == 0:
                sys.stderr.write(
                    str(ct) + '\t' + str(
                        resource.getrusage(resource.RUSAGE_SELF).ru_maxrss /
                        usage_denom) + '\n')
            ll = re.split('[\t]', line)
            cts = list(map(int, ll[2:6]))
            mns = list(map(float, ll[6:10]))
            tot = sum(cts)
            [loc1, loc2] = ll[0:2]
            tp = 'het-hom'
            if loc1 in homvar and loc2 in homvar:
                tp = 'hom-hom'
            elif not (loc1 in homvar or loc2 in homvar):
                tp = 'het-het'
            if loc2 < loc1:
                [loc1, loc2] = [loc2, loc1]
                cts = [cts[0], cts[2], cts[1], cts[3]]
                mns = [mns[0], mns[2], mns[1], mns[3]]
            conf = False
            if pre_filter_loose_pass(cts, 0.90, 1, tp):
                if tp == 'het-hom':
                    print(line, file=fmix)
                if pre_filter_strict_pass(cts, 0.95, tp):
                    conf = True
                    if tp == 'het-het':
                        Gloc.add_edge(loc1,
                                      loc2,
                                      conf=conf,
                                      cts=cts,
                                      mns=mns,
                                      wt=tot)
                    elif tp == 'hom-hom':
                        Ghom.add_edge(loc1,
                                      loc2,
                                      conf=conf,
                                      cts=cts,
                                      mns=mns,
                                      wt=tot)
                else:
                    if tp == 'het-het':
                        print(line, file=fhet)
                        Gloc.add_node(loc1)
                        Gloc.add_node(loc2)
                    if tp == 'het-hom':
                        if loc1 in homvar:
                            Gloc.add_node(loc2)
                        else:
                            Gloc.add_node(loc1)
            line = fp.readline().strip()
            ct += 1

    for edge in Ghom.edges(data=True):
        nn = sum(edge[2]['cts'])
        for ii in range(4):
            if edge[2]['cts'][ii] > 0.9 * nn:
                edge[2]['dist'] = edge[2]['mns'][ii]

    for edge in Gloc.edges(data=True):
        nn = sum(edge[2]['cts'])
        if edge[2]['cts'][0] + edge[2]['cts'][3] > 0.8 * nn:
            edge[2]['orient'] = 'outer'
            edge[2]['dist'] = 0.5 * (edge[2]['mns'][0] + edge[2]['mns'][3])
        if edge[2]['cts'][1] + edge[2]['cts'][2] > 0.8 * nn:
            edge[2]['orient'] = 'inner'
            edge[2]['dist'] = 0.5 * (edge[2]['mns'][1] + edge[2]['mns'][2])

    het_bridges = remove_bridges(Gloc, min_counts_strict)
    hom_bridges = remove_bridges(Ghom, min_counts_strict)
    loc2comphom = {}
    comp2treehom = {}
    loc2comp = {}
    comp2tree = {}

    cp.disable()
    cp.print_stats()

    sys.exit(1)

    gg = list(
        Ghom.subgraph(cc)
        for cc in sorted(nx.connected_components(Ghom), key=len, reverse=True))
    for ii in range(len(gg)):
        print(str(ii))
        tr = nx.minimum_spanning_tree(gg[ii], weight='dist')
        comp2treehom[ii] = tr
        for node in tr.nodes():
            loc2comphom[node] = ii

    Ghom = None
    gg = list(
        Gloc.subgraph(cc)
        for cc in sorted(nx.connected_components(Gloc), key=len, reverse=True))
    for ii in range(len(gg)):
        print(
            str(ii) + '\t' + str(
                resource.getrusage(resource.RUSAGE_SELF).ru_maxrss /
                usage_denom / usage_denom))
        if gg[ii].number_of_nodes() > 2:
            tr = nx.minimum_spanning_tree(gg[ii], weight='dist')
        else:
            tr = gg[ii]
        comp2tree[ii] = tr
        for node in tr.nodes():
            loc2comp[node] = ii

    with gzip.open(args.bedfile + 'hom.bed.gz', 'wt') as outb:
        for comp in comp2treehom.keys():
            id = 'hom_' + str(comp)
            mintree = comp2treehom[comp]
            nodes = [
                int(re.split('[:_]', node)[1]) for node in mintree.nodes
                if not 'ctg' in node
            ]
            chrs = [
                re.split('[:_]', node)[0] for node in mintree.nodes
                if not 'ctg' in node
            ]
            chr = Counter(chrs).most_common(1)[0][0]
            nodes.sort()
            dists = []
            ones = []
            for kk in range(len(nodes)):
                dists.append(nodes[kk] - nodes[0] + 1)
                ones.append(1)
                dstr = ','.join(map(str, dists))
                onestr = ','.join(map(str, ones))
            print(chr + '\t' + str(nodes[0]) + '\t' + str(max(nodes)) + '\t' +
                  id + '\t100\t.\t' + str(nodes[0]) + '\t' + str(max(nodes)) +
                  '\t150,150,0\t' + str(len(dists)) + '\t' + onestr + '\t' +
                  dstr,
                  file=outb)

    with gzip.open(args.bedfile + 'het.bed.gz', 'wt') as outb:
        for comp in comp2tree.keys():
            id = 'het_' + str(comp)
            print(id)
            mintree = comp2tree[comp]
            nodes = [
                int(re.split('[:_]', node)[1]) for node in mintree.nodes
                if not 'ctg' in node
            ]
            if len(nodes) > 0:
                chrs = [
                    re.split('[:_]', node)[0] for node in mintree.nodes
                    if not 'ctg' in node
                ]
                chr = Counter(chrs).most_common(1)[0][0]
                nodes.sort()
                dists = []
                ones = []
                for kk in range(len(nodes)):
                    dists.append(nodes[kk] - nodes[0] + 1)
                    ones.append(1)
                    dstr = ','.join(map(str, dists))
                    onestr = ','.join(map(str, ones))
                print(chr + '\t' + str(nodes[0]) + '\t' + str(max(nodes)) +
                      '\t' + id + '\t100\t.\t' + str(nodes[0]) + '\t' +
                      str(max(nodes)) + '\t0,150,0\t' + str(len(dists)) +
                      '\t' + onestr + '\t' + dstr,
                      file=outb)

    ll = [loc2comp, comp2tree, loc2comphom, comp2treehom]
    with open("../hom.p", 'wb') as f:
        pickle.dump(ll, f)

    Gmix = nx.Graph()
    superg = nx.Graph()

    with gzip.open(args.temp_prefix + '.mixed.txt.gz', 'rt') as fp:
        line = fp.readline().strip()
        ct = 0
        while line:
            if ct % 1000 == 0:
                sys.stderr.write(
                    str(ct) + '\t' + str(
                        resource.getrusage(resource.RUSAGE_SELF).ru_maxrss /
                        usage_denom) + '\n')
            ll = re.split('[\t]', line)
            cts = list(map(int, ll[2:6]))
            mns = list(map(float, ll[6:10]))
            tot = sum(cts)
            [loc1, loc2] = ll[0:2]
            if (loc1 in homvar
                    and loc2 not in homvar) or (loc2 in homvar
                                                and loc1 not in homvar):
                tp = 'het-hom'
                if pre_filter_strict_pass(cts, 0.95, tp):
                    conf = True
                    if not loc2 in homvar:
                        [loc1, loc2] = [loc2, loc1]
                        cts = [cts[0], cts[2], cts[1], cts[3]]
                        mns = [mns[0], mns[2], mns[1], mns[3]]
                    if loc1 in loc2comp and loc2 in loc2comphom:
                        num = 0
                        denom = 0
                        for ii in range(4):
                            if cts[ii] > 0:
                                num += mns[ii]
                                denom += 1
                        mn_dist = 1.0 * num / denom
                        Gmix.add_edge(loc1,
                                      loc2,
                                      conf=conf,
                                      cts=cts,
                                      mns=mns,
                                      dist=mn_dist,
                                      wt=tot)
                        hetcomp = loc2comp[loc1]
                        homcomp = loc2comphom[loc2]
                        node1 = 'het_' + str(hetcomp)
                        node2 = 'hom_' + str(homcomp)
                        if not superg.has_edge(node1, node2):
                            superg.add_edge(node1,
                                            node2,
                                            dist=[],
                                            wt=0,
                                            ct=0,
                                            sum_dist=0)
                        superg.edges[node1, node2]['wt'] += tot
                        superg.edges[node1, node2]['dist'].append(mn_dist)
                        superg.edges[node1, node2]['ct'] += 1
                        superg.edges[node1, node2]['sum_dist'] += mn_dist
            line = fp.readline().strip()
            ct += 1

    gg = list(
        superg.subgraph(cc) for cc in sorted(
            nx.connected_components(superg), key=len, reverse=True))

    supercomp2tree = {}
    with gzip.open(args.bedfile + '.mixed.bed.gz', 'wt') as outb:
        for ii in range(len(gg)):
            id = 'mixed_' + str(ii)
            tocomp = []
            for node in gg[ii].nodes():
                [tp, id] = re.split('_', node)
                if tp == 'het':
                    tr = comp2tree[int(id)]
                else:
                    tr = comp2treehom[int(id)]
                for node in tr.nodes():
                    tr.nodes[node]['tp'] = tp
                tocomp.append(tr)
            Gcomp = nx.compose_all(tocomp)
            Gsub = Gmix.subgraph(Gcomp.nodes())
            Gcomp1 = nx.compose(Gsub, Gcomp)
            mintree = nx.minimum_spanning_tree(Gcomp1, weight='dist')
            supercomp2tree[ii] = mintree
            nodes = [
                int(re.split('[:_]', node)[1]) for node in mintree.nodes
                if not 'ctg' in node
            ]
            chrs = [
                re.split('[:_]', node)[0] for node in mintree.nodes
                if not 'ctg' in node
            ]
            chr = Counter(chrs).most_common(1)[0][0]
            nodes.sort()
            dists = []
            ones = []
            for kk in range(len(nodes)):
                dists.append(nodes[kk] - nodes[0] + 1)
                ones.append(1)
            dstr = ','.join(map(str, dists))
            onestr = ','.join(map(str, ones))
            print(chr + '\t' + str(nodes[0]) + '\t' + str(max(nodes)) + '\t' +
                  id + '\t100\t.\t' + str(nodes[0]) + '\t' + str(max(nodes)) +
                  '\t150,150,0\t' + str(len(dists)) + '\t' + onestr + '\t' +
                  dstr,
                  file=outb)

    comp2supercomp = {}
    for ii in range(len(gg)):
        for node in gg[ii].nodes():
            comp2supercomp[node] = ii

    Gloose = nx.Graph()
    with gzip.open(args.temp_prefix + '.het.txt.gz', 'rt') as fp:
        line = fp.readline().strip()
        ct = 0
        while line:
            if ct % 1000 == 0:
                sys.stderr.write(
                    str(ct) + '\t' + str(
                        resource.getrusage(resource.RUSAGE_SELF).ru_maxrss /
                        usage_denom) + '\n')
            ll = re.split('[\t]', line)
            cts = list(map(int, ll[2:6]))
            mns = list(map(float, ll[6:10]))
            tot = sum(cts)
            [loc1, loc2] = ll[0:2]
            if not (loc1 in homvar or loc2 in homvar):
                tp = 'het-het'
                if loc2 < loc1:
                    [loc1, loc2] = [loc2, loc1]
                    cts = [cts[0], cts[2], cts[1], cts[3]]
                    mns = [mns[0], mns[2], mns[1], mns[3]]
                conf = False
                if pre_filter_loose_pass(cts, 0.90, 2, tp):
                    Gloose.add_edge(loc1,
                                    loc2,
                                    conf=conf,
                                    cts=cts,
                                    mns=mns,
                                    wt=tot)
            line = fp.readline().strip()
            ct += 1

    for edge in Gloose.edges(data=True):
        if edge[0] in loc2comp and edge[1] in loc2comp and not loc2comp[
                edge[0]] == loc2comp[edge[1]]:
            [comp0, comp1] = [
                'het_' + str(loc2comp[edge[0]]),
                'het_' + str(loc2comp[edge[1]])
            ]
            if comp0 in comp2supercomp and comp1 in comp2supercomp:
                [s0, s1] = [comp2supercomp[comp0], comp2supercomp[comp1]]
                if s0 == s1 and edge[2]['wt'] > 1:
                    spl = nx.shortest_path_length(supercomp2tree[s0],
                                                  source=edge[0],
                                                  target=edge[1],
                                                  weight='dist')
                    numer = 0
                    denom = 0
                    for ii in range(4):
                        if edge[2]['cts'][ii] > 0:
                            numer += edge[2]['mns'][ii]
                            denom += 1
                    edge[2]['dist'] = 1.0 * numer / denom
                    if abs(edge[2]['dist'] - spl) < 1000:
                        Gloc.add_edge(edge[0],
                                      edge[1],
                                      cts=edge[2]['cts'],
                                      mns=edge[2]['mns'],
                                      dist=edge[2]['dist'],
                                      wt=edge[2]['wt'])

    comp2tree = {}
    loc2comp = {}
    gg = list(
        Gloc.subgraph(cc)
        for cc in sorted(nx.connected_components(Gloc), key=len, reverse=True))
    for ii in range(len(gg)):
        print(
            str(ii) + '\t' + str(
                resource.getrusage(resource.RUSAGE_SELF).ru_maxrss /
                usage_denom / usage_denom))
        if gg[ii].number_of_nodes() > 2:
            tr = nx.minimum_spanning_tree(gg[ii], weight='dist')
        else:
            tr = gg[ii]
        comp2tree[ii] = tr
        for node in tr.nodes():
            loc2comp[node] = ii

    with gzip.open(args.bedfile + 'het.1.bed.gz', 'wt') as outb:
        for comp in comp2tree.keys():
            id = 'het_' + str(comp)
            print(id)
            mintree = comp2tree[comp]
            nodes = [
                int(re.split('[:_]', node)[1]) for node in mintree.nodes
                if not 'ctg' in node
            ]
            if len(nodes) > 0:
                chrs = [
                    re.split('[:_]', node)[0] for node in mintree.nodes
                    if not 'ctg' in node
                ]
                chr = Counter(chrs).most_common(1)[0][0]
                nodes.sort()
                dists = []
                ones = []
                for kk in range(len(nodes)):
                    dists.append(nodes[kk] - nodes[0] + 1)
                    ones.append(1)
                    dstr = ','.join(map(str, dists))
                    onestr = ','.join(map(str, ones))
                print(chr + '\t' + str(nodes[0]) + '\t' + str(max(nodes)) +
                      '\t' + id + '\t100\t.\t' + str(nodes[0]) + '\t' +
                      str(max(nodes)) + '\t0,150,0\t' + str(len(dists)) +
                      '\t' + onestr + '\t' + dstr,
                      file=outb)

    code.interact(local=locals())
Beispiel #60
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('-i', '--infile', type=str, default=None)
  parser.add_argument('-s', '--singletons', type=str, default=None)
  parser.add_argument('-t', '--temp_prefix', type=str, default=None)
  parser.add_argument('-f', '--infile2', type=str, default=None)

  args = parser.parse_args()
  min_counts_strict=5
  usage_denom=1024*1024
  Ghom=nx.Graph()

  homvar={}
  with gzip.open(args.infile2, 'rt') as fp:
    line=fp.readline().strip()
    while line:
      homvar[line]=1
      line=fp.readline().strip()

  with gzip.open(args.infile, 'rt') as fp, gzip.open(args.temp_prefix+'.het.txt.gz', 'wt') as fhet, gzip.open(args.temp_prefix+'.mixed.txt.gz', 'wt') as fmix:
    line=fp.readline().strip()
    ct=0
    while line:
      if ct%1000==0:
        sys.stderr.write(str(ct)+'\t'+str(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/usage_denom)+'\n')
      ll=re.split('[\t]', line)
      [loc1, loc2]=ll[0:2]      
      if not (loc1 in homvar or loc2 in homvar):
        tp='het-het'
        print(line, file=fhet)
      elif not (loc1 in homvar and loc2 in homvar):
        tp='het-hom'
        print(line, file=fmix)
      else:
        tp='hom-hom'
        cts=list(map(int, ll[2:6]))
        mns=list(map(float, ll[6:10]))
        if loc2<loc1:
          [loc1, loc2]=[loc2, loc1]
          cts=[cts[0], cts[2], cts[1], cts[3]]
          mns=[mns[0], mns[2], mns[1], mns[3]]
        [passf, orient, nn, dist]=pre_filter_strict_pass(cts, mns, 0.95, tp)
        if passf:
          Ghom.add_edge(loc1, loc2, orient=orient, dist=int(dist), wt=nn)
      line=fp.readline().strip()
      ct+=1

  hom_bridges=remove_bridges(Ghom, min_counts_strict, 'hom-hom')
  loc2comphom={}; comp2treehom={};

  gg=list(Ghom.subgraph(cc) for cc in sorted(nx.connected_components(Ghom), key=len, reverse=True))
  for ii in range(len(gg)):
    print(str(ii))
    tr=nx.minimum_spanning_tree(gg[ii], weight='dist')
    comp2treehom[ii]=tr
    for node in tr.nodes():
      loc2comphom[node]=ii
  
  Ghom=None
  ll=[loc2comphom, comp2treehom]
  with open(args.temp_prefix+'.hom.p', 'wb') as f:
    pickle.dump(ll, f)