def test_connected_components_on_very_deep_graph(self): gr = pygraph.classes.graph.graph() gr.add_nodes(range(0, 5001)) for i in range(0, 5000): gr.add_edge((i, i + 1)) recursionlimit = getrecursionlimit() connected_components(gr) assert getrecursionlimit() == recursionlimit
def test_connected_components_on_very_deep_graph(self): gr = pygraph.classes.graph.graph() gr.add_nodes(range(0,5001)) for i in range(0,5000): gr.add_edge((i,i+1)) recursionlimit = getrecursionlimit() connected_components(gr) assert getrecursionlimit() == recursionlimit
def get_strings (): with open('fingerprints.uniq', 'r') as f: fingerprints=[x.rstrip() for x in f.readlines()] gr = graph() gr.add_nodes(fingerprints) fs = dict() components = dict() for f in fingerprints: for h in (f[:15], f[16:]): if h not in fs: fs[h]=[] fs[h].append(f) # print(fs) edges=set() for shared in fs.values(): for f1 in shared: for f2 in shared: if f1 < f2 and hamming_distance(f1, f2) < 2: # print(f1.rstrip() + ":" + f2.rstrip()) # components[f1].extend(components[f2]) # components[f2]=components[f1] edges.add((f1, f2)) [gr.add_edge(e) for e in edges] components=connected_components(gr) real_components=dict() for (k,v) in connected_components(gr).items(): if v in real_components: real_components[v].append(k) else: real_components[v]=[k] all_consensus={k:consensus_string(v) for (k,v) in real_components.items()} all_consensus_distances={k:max_hamming_distance(v, real_components[k]) for (k,v) in all_consensus.items()} pp = pprint.PrettyPrinter(indent=4) for k in real_components.keys(): (chosen, cost) = (consensus_string(real_components[k]), all_consensus_distances[k]) if all_consensus_distances[k] > 1: c=center_string(real_components[k]) if max_hamming_distance(c, real_components[k]) < cost: (all_consensus_distances[k], all_consensus_distances[k]) = (c, max_hamming_distance(c, real_components[k])) # from real_components we compute map, which is a dictionary mapping a fingerprint # to its representative map={} for (k,v) in real_components.items(): map.update({(w, all_consensus[k]) for w in v}) with open('clusters', 'w') as f: pp = pprint.PrettyPrinter(indent=4,stream=f) pp.pprint(real_components) return(map)
def test_cut_edges_in_graph(self): gr = testlib.new_graph() gr.add_nodes(['x','y']) gr.add_edge(('x','y')) gr.add_edge(('x',0)) gr_copy = deepcopy(gr) ce = cut_edges(gr) for each in ce: before = number_of_connected_components(connected_components(gr)) gr.del_edge(each) number_of_connected_components(connected_components(gr)) > before gr = gr_copy
def test_cut_edges_in_graph(self): gr = testlib.new_graph() gr.add_nodes(['x', 'y']) gr.add_edge(('x', 'y')) gr.add_edge(('x', 0)) gr_copy = deepcopy(gr) ce = cut_edges(gr) for each in ce: before = number_of_connected_components(connected_components(gr)) gr.del_edge(each) number_of_connected_components(connected_components(gr)) > before gr = gr_copy
def outlier_nn(self, positive=1, negative=-1): """This method finds the largest neuron cluster. If a neuron belongs to this cluster, a label specified by positive will be added to this neuron, else this neuron will be labeled by negative variable. The labeled results will be outputed in a list as labels_final.""" groups = connected_components(self.gr) #find the largest group group_counts = dict_reverse(groups) max_count = 0 for keys, values in list(group_counts.items()): if len(values) > max_count: max_count = len(values) max_group = keys affines = {} for keys, values in list(groups.items()): if values == max_group: affines[values] = positive else: affines[values] = negative #this is only for outlier detection for values in list(groups.values()): if values not in list(affines.keys()): affines[values] = -1 for keys, values in list(groups.items()): groups[keys] = affines[values] labels_final = [] for i in range(len(self.nodes)): labels_final.append(groups[i]) print(labels_final) return self.nodes, labels_final
def parsimonous_protein_identification(peptides): """ parsimonous_protein_identification - takes a dict of the form {<peptide_seq>: <protein_name>, [<protein_na,e> ...] } and returns the proteins identified using parsimony. """ detected_proteins = {} protein2peptides = {} # start with the uniquely determined proteins for peptide, proteins in peptides.items(): if len(proteins) == 1: detected_proteins[proteins[0]] = [peptide] peptides.pop(peptide) else: for p in proteins: if not p in protein2peptides: protein2peptides[p] = [peptide] else: protein2peptides[p].append(peptide) # remaining peptides have multiple potential proteins, use parsimony g = graph() # identify protein clusters for peptide, proteins in peptides.items(): for protein in proteins: if not g.has_node(protein): g.add_node(protein) for p1, p2 in combinations(proteins, 2): if not g.has_edge((p1, p2)): g.add_edge((p1, p2)) connected = connected_components(g).items() clusters = {subgraph: set() for protein, subgraph in connected} for protein, subgraph in connected: clusters[subgraph] = clusters[subgraph].union(set((protein,))) def find_covering(proteins): peptides = set(chain(*(tuple(protein2peptides[p]) for p in proteins))) for k in range(1, len(proteins) + 1): for covering in combinations(proteins, k): covered = set(chain(*(tuple(protein2peptides[p]) for p in covering))) if len(covered) == len(peptides): return covering return None # find the minimal protein covering of each cluster for cluster in clusters.values(): covering = find_covering(cluster) if covering is None: print "Error, failed to cover " + str(subgraph) sys.exit(1) else: for protein in covering: detected_proteins[protein] = protein2peptides[protein] return detected_proteins
def test_connected_components_hypergraph(self): gr = hypergraph() # Add some nodes / edges gr.add_nodes(range(9)) gr.add_hyperedges(['a', 'b', 'c']) # Connect the 9 nodes with three size-3 hyperedges for node_set in [['a',0,1,2], ['b',3,4,5], ['c',6,7,8]]: for node in node_set[1:]: gr.link(node, node_set[0]) cc = connected_components(gr) assert 3 == len(set(cc.values())) assert cc[0] == cc[1] and cc[1] == cc[2] assert cc[3] == cc[4] and cc[4] == cc[5] assert cc[6] == cc[7] and cc[7] == cc[8] # Do it again with two components and more than one edge for each gr = hypergraph() gr.add_nodes(range(9)) gr.add_hyperedges(['a', 'b', 'c', 'd']) for node_set in [['a',0,1,2], ['b',2,3,4], ['c',5,6,7], ['d',6,7,8]]: for node in node_set[1:]: gr.link(node, node_set[0]) cc = connected_components(gr) assert 2 == len(set(cc.values())) for i in [0,1,2,3]: assert cc[i] == cc[i+1] for i in [5,6,7]: assert cc[i] == cc[i+1] assert cc[4] != cc[5]
def test_connected_components_hypergraph(self): gr = hypergraph() # Add some nodes / edges gr.add_nodes(range(9)) gr.add_hyperedges(['a', 'b', 'c']) # Connect the 9 nodes with three size-3 hyperedges for node_set in [['a', 0, 1, 2], ['b', 3, 4, 5], ['c', 6, 7, 8]]: for node in node_set[1:]: gr.link(node, node_set[0]) cc = connected_components(gr) assert 3 == len(set(cc.values())) assert cc[0] == cc[1] and cc[1] == cc[2] assert cc[3] == cc[4] and cc[4] == cc[5] assert cc[6] == cc[7] and cc[7] == cc[8] # Do it again with two components and more than one edge for each gr = hypergraph() gr.add_nodes(range(9)) gr.add_hyperedges(['a', 'b', 'c', 'd']) for node_set in [['a', 0, 1, 2], ['b', 2, 3, 4], ['c', 5, 6, 7], ['d', 6, 7, 8]]: for node in node_set[1:]: gr.link(node, node_set[0]) cc = connected_components(gr) assert 2 == len(set(cc.values())) for i in [0, 1, 2, 3]: assert cc[i] == cc[i + 1] for i in [5, 6, 7]: assert cc[i] == cc[i + 1] assert cc[4] != cc[5]
def draw_2d(self, scale=1, axis_=False): """Draws the topology structure and neurons. scale is real number, it can be set arbitrarily to adjust the size of drawed neuron clusters. axis is True or False, and means weither to enable axis in the final drawings. In this method, MDS is used for drawing high dimensional Euclidean graphs. If you do not use this method, sklearn is not a prerequisite for running the pygks software.""" groups = connected_components(self.gr) if len(self.nodes[0]) != 2: print('using MDS for none 2d drawing') from sklearn import manifold from sklearn.metrics import euclidean_distances similarities = euclidean_distances(self.nodes) for i in range(len(self.nodes)): for j in range(len(self.nodes)): if groups[i] == groups[j]: similarities[i, j] *= scale mds = manifold.MDS(n_components=2, max_iter=500, eps=1e-7, dissimilarity="precomputed", n_jobs=1) pos = mds.fit(similarities).embedding_ draw_nodes = pos else: draw_nodes = self.nodes print('now_drawing') import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) node_count = len(draw_nodes) for i in range(node_count): for j in range(i, node_count): if self.gr.has_edge((i, j)): ax.plot([draw_nodes[i][0], draw_nodes[j][0]], [draw_nodes[i][1], draw_nodes[j][1]], color='k', linestyle='-', linewidth=1) group_counts = len(set(groups.values())) style_tail = ['.', 'o', 'x', '^', 's', '+'] style_head = ['b', 'r', 'g', 'k'] style_list = [] for each in itertools.product(style_head, style_tail): style_list.append(each[0] + each[1]) i = 0 for each in draw_nodes: ax.plot(each[0], each[1], style_list[groups[i] - 1]) i += 1 if not axis_: plt.axis('off') plt.show()
def test_connected_components_in_graph(self): gr = testlib.new_graph() gr.add_nodes(['a', 'b', 'c']) gr.add_edge(('a', 'b')) cc = connected_components(gr) for n in gr: for m in gr: if (cc[n] == cc[m]): assert m in depth_first_search(gr, n)[0] else: assert m not in depth_first_search(gr, n)[0]
def extract_tracklets(self): """ Make list of tracklet objects. """ self.cc = algos.connected_components(self.ct.graph) self.tracklets = dict(zip(self.cc.values(), [Tracklet(i) for i in self.cc.values()])) for node in self.cc.keys(): self.tracklets[self.cc[node]].add_node(node)
def test_connected_components_in_graph(self): gr = testlib.new_graph() gr.add_nodes(['a','b','c']) gr.add_edge(('a','b')) cc = connected_components(gr) for n in gr: for m in gr: if (cc[n] == cc[m]): assert m in depth_first_search(gr, n)[0] else: assert m not in depth_first_search(gr, n)[0]
def spaghettify_digraph(g, head, tail): # head = old release, tail = new release original = digraph() original.add_nodes(g.nodes()) [original.add_edge(edge) for edge in g.edges()] heads = set(original.neighbors(head)) tails = set(original.incidents(tail)) # Reduce task to release edges # Delete edge from task to release on tasks with edges to both other tasks and the tail release for node in tails: if len([n for n in traversal(original, node, 'pre')]) > 2: original.del_edge((node, tail)) # Reduce release to task edges # Delete edges from release to task on tasks with edges from other tasks and head release for node in heads: if len([n for n in traversal(original.reverse(), node, 'pre')]) > 2: original.del_edge((head, node)) heads = set(original.neighbors(head)) tails = set(original.incidents(tail)) trimmed = _trim_digraph(original, head, tail) components = connected_components(trimmed) hc = {} # {[heads], component} tc = {} # {[tails], component} for component in set(components.values()): # Find the nodes in the component nodes = set([k for k, v in components.iteritems() if v == component]) hc[frozenset(heads.intersection(nodes))] = component tc[frozenset(tails & nodes)] = component for component in xrange(1, len(hc)): current_heads = next( (t for t, c in hc.iteritems() if c == component + 1)) current_tails = next((t for t, c in tc.iteritems() if c == component)) for current_head, current_tail in product(current_heads, current_tails): original.add_edge((current_tail, current_head)) if (head, current_head) in original.edges(): original.del_edge((head, current_head)) if (current_tail, tail) in original.edges(): original.del_edge((current_tail, tail)) return original
def spaghettify_digraph(g, head, tail): # head = old release, tail = new release original = digraph() original.add_nodes(g.nodes()) [original.add_edge(edge) for edge in g.edges()] heads = set(original.neighbors(head)) tails = set(original.incidents(tail)) # Reduce task to release edges # Delete edge from task to release on tasks with edges to both other tasks and the tail release for node in tails: if len([n for n in traversal(original, node, "pre")]) > 2: original.del_edge((node, tail)) # Reduce release to task edges # Delete edges from release to task on tasks with edges from other tasks and head release for node in heads: if len([n for n in traversal(original.reverse(), node, "pre")]) > 2: original.del_edge((head, node)) heads = set(original.neighbors(head)) tails = set(original.incidents(tail)) trimmed = _trim_digraph(original, head, tail) components = connected_components(trimmed) hc = {} # {[heads], component} tc = {} # {[tails], component} for component in set(components.values()): # Find the nodes in the component nodes = set([k for k, v in components.iteritems() if v == component]) hc[frozenset(heads.intersection(nodes))] = component tc[frozenset(tails & nodes)] = component for component in xrange(1, len(hc)): current_heads = next((t for t, c in hc.iteritems() if c == component + 1)) current_tails = next((t for t, c in tc.iteritems() if c == component)) for current_head, current_tail in product(current_heads, current_tails): original.add_edge((current_tail, current_head)) if (head, current_head) in original.edges(): original.del_edge((head, current_head)) if (current_tail, tail) in original.edges(): original.del_edge((current_tail, tail)) return original
def connected_components(self): """ Connected components. @rtype: dictionary @return: Pairing that associates each node to its connected component. """ components_ = accessibility.connected_components(self.graph) components = {} for each in list(components_.keys()): if (each[1] == 'n'): components[each[0]] = components_[each] return components
def teo_2(g, k): # Salvaguarda if k > len(g.nodes()): raise ValueError('FORBIDDEN: K > |V|') if k <= 0: raise ValueError('FORBIDDEN: K <= 0') # Caso base if k == 1: forest = graph() for node in g.nodes(): forest.add_node(node) return forest # Hipotese indutiva forest = teo_2(g, k-1) # Enquanto ainda houverem componentes conexos # que nao satisfazem a condicao while True: # Atualiza a lista de componentes, pois pode ter # mudado durante a adicao cc = _transform_cc(connected_components(forest)) # Seleciona um que tenha comprimento < k selected_component = None for component in cc: if len(component) < k: selected_component = component break # Se nao conseguiu selecionar, significa que todos # satisfazem comprimento >= k, e podemos parar o while if selected_component == None: break # Caso haja um selecionado, selecionar a aresta de menor # peso que tenha somente um dos vertices em selected_component edges = g.edges() used_edges = forest.edges() unused_edges = [e for e in edges if e not in used_edges] neighbor_edges = [e for e in unused_edges if e[0] in selected_component] min_edge = min(neighbor_edges, key=lambda e: g.edge_weight(e)) forest.add_edge(min_edge) return forest
def GetExactParsimonyList(peptides, protein_dict): parsimony_list = list() explained = set() for pr, pe in protein_dict.iteritems(): has_unique_pe = any(len(peptides[p]) == 1 for p in pe) if has_unique_pe: parsimony_list.append(pr) explained = explained.union(pe) tmp = set(peptides.keys()) unexp_peptides = tmp.difference(explained) for pr in protein_dict.keys(): protein_dict[pr] = protein_dict[pr].intersection(unexp_peptides) print "There are " + str(len(unexp_peptides)) + " unexplained peptides" gr = graph() for pe in unexp_peptides: proteins = peptides[pe] if (len(proteins) == len( [node for node in proteins if not gr.has_node(node)])): gr.add_nodes(proteins) # add edges betwen all these proteins for i in range(len(proteins) - 1): for j in range(i + 1, len(proteins)): gr.add_edge((proteins[i], proteins[j])) print "%d unexplained peptides, %d proteins in the graph, with %d connections " % ( len(unexp_peptides), len(gr.nodes()), len(gr.edges())) con_components = connected_components(gr) subgraphs_dict = {} for pr, comp in con_components.iteritems(): if comp in subgraphs_dict: subgraphs_dict[comp] = subgraphs_dict[comp].union(set([pr])) else: subgraphs_dict[comp] = set([pr]) subgraphs = subgraphs_dict.values() tmp = [len(s) for s in subgraphs] print "%d independent subgraphs, with %d to %d proteins" % ( len(subgraphs), min(tmp), max(tmp)) for sub in subgraphs: min_subset = getExactSolution(sub, protein_dict) parsimony_list += min_subset print "%d proteins in the final list" % (len(parsimony_list), ) print "Done parsimony" return parsimony_list
def connected_components(self): """ Return a set of a connected components. Each connected component set can be passed to `self.copy()` to be copied as a subgraph. This builds on python-graph's version of a function with the same name but also adds awareness of our conventions about there being both a node/edge for relations/CDUs. """ ccs = accessibility.connected_components(self) subgraphs = collections.defaultdict(set) for node, i in ccs.items(): subgraphs[i].add(node) # the basic idea here: if any member of connected component is # one of our hybrid node/edge creatures, we need to help the # graph library recognise that that anything connected *via* # the edge should also be considered as connected *to* the # edge, so we merge the components eaten = set() merged = {} prior = subgraphs.keys() while sorted(merged.keys()) != prior: prior = sorted(merged.keys()) merged = {} for k in subgraphs: if k in eaten: continue subg = subgraphs[k] merged[k] = copy.copy(subg) for n in subg: e = self.mirror(n) if e is not None: links = set(self.links(e)) for k2 in subgraphs.keys(): links2 = subgraphs[k2] if k2 != k and not links2.isdisjoint(links): eaten.add(k2) merged[k] |= links2 subgraphs = merged ccs = frozenset([frozenset(v) for v in subgraphs.values()]) return ccs
def GetExactParsimonyList(peptides, protein_dict): parsimony_list = list() explained = set() for pr, pe in protein_dict.iteritems(): has_unique_pe = any(len(peptides[p]) == 1 for p in pe) if has_unique_pe: parsimony_list.append(pr) explained = explained.union(pe) tmp = set(peptides.keys()) unexp_peptides = tmp.difference(explained) for pr in protein_dict.keys(): protein_dict[pr] = protein_dict[pr].intersection(unexp_peptides) print "There are " + str(len(unexp_peptides)) + " unexplained peptides" gr = graph() for pe in unexp_peptides: proteins = peptides[pe] if(len(proteins) == len([node for node in proteins if not gr.has_node(node)])): gr.add_nodes(proteins) # add edges betwen all these proteins for i in range(len(proteins) - 1): for j in range(i + 1, len(proteins)): gr.add_edge((proteins[i], proteins[j])) print "%d unexplained peptides, %d proteins in the graph, with %d connections " % (len(unexp_peptides), len(gr.nodes()), len(gr.edges())) con_components = connected_components(gr) subgraphs_dict = {} for pr, comp in con_components.iteritems(): if comp in subgraphs_dict: subgraphs_dict[comp] = subgraphs_dict[comp].union(set([pr])) else: subgraphs_dict[comp] = set([pr]) subgraphs = subgraphs_dict.values() tmp = [len(s) for s in subgraphs] print "%d independent subgraphs, with %d to %d proteins" % (len(subgraphs), min(tmp), max(tmp)) for sub in subgraphs: min_subset = getExactSolution(sub, protein_dict) parsimony_list += min_subset print "%d proteins in the final list" % (len(parsimony_list), ) print "Done parsimony" return parsimony_list
def getCcp(self, targetFluent): """ Returns all connected components in this graph that contain targetFluent. Note: For a connected component to 'contain' targetFluent at least one node must contain ALL of targetFluent's leaf nodes (atomic fluents) . """ containers = [ n for n in self.nodes() if n.contains(targetFluent) ] ccpMap = connected_components(self) #dict: node -> ccpNumber tarCcpNumbers = set() for node in containers: tarCcpNumbers.add(ccpMap[node]) ccp = [] #List of connected components for i in tarCcpNumbers: ccp.append([ node for node, num in ccpMap.items() if num == i ]) return ccp
def heuristic(sequences, weights): def gen_typical_sequence(sequences, seq_names): return ''.join([random.choice(x) for x in zip(*[sequences[n] for n in seq_names])]) result = graph() result.add_nodes(sequences.keys()) internal_node_label_phases = "abcdefghijklmnopqrstuvwxyz" root = None for phase in internal_node_label_phases: gr = heuristic_1(sequences, weights) print_dot(result,phase + ".png") cc = connected_components(gr) #print cc size_cc = len(cc) if size_cc == 1: break cc_seqs = {} # Transform from {seq_name : component_index} # to {component_index : [seq_name]} joining by # component_index for seq_name in cc.keys(): if cc[seq_name] not in cc_seqs: cc_seqs[cc[seq_name]] = [] cc_seqs[cc[seq_name]].append(seq_name) typical_sequences = {} for k in cc_seqs.keys(): typical_sequences[phase + str(k)] = gen_typical_sequence(sequences, cc_seqs[k]) # add edges to the result graph, giving each component its own subtree for k in cc_seqs.keys(): result.add_node(phase+str(k)) for seq in cc_seqs[k]: result.add_edge((phase + str(k),seq)) sequences = typical_sequences weights = gen_sequence_weights(sequences) root = phase + str(1) return (result,root)
def colorMap(m): g = pygraph.graph() height = len(m) width = len(m[0]) for i in range(height): g.add_nodes([(i, x) for x in range(width)]) for i in range(height): for j in range(width): candidates = [] if i != 0: candidates.append((i - 1, j)) if j != 0: candidates.append((i, j - 1)) if j != width - 1: candidates.append((i, j + 1)) if i != height - 1: candidates.append((i + 1, j)) # find the best candidate and add the edge # to the graph val = m[i][j] if len(candidates) == 0: continue best_candidate = candidates[0] best_fall = val - m[best_candidate[0]][best_candidate[1]] for k in range(1, len(candidates)): new_candidate = candidates[k] new_fall = val - m[new_candidate[0]][new_candidate[1]] if new_fall > best_fall: best_fall = new_fall best_candidate = new_candidate if best_fall > 0: g.add_edge((i, j), best_candidate) return connected_components(g)
def teo_2(g, k): if k > len(g.nodes()): raise ValueError('FORBIDDEN: K > |V|') if k <= 0: raise ValueError('FORBIDDEN: K <= 0') # Caso base if k == 1: forest = graph() for node in g.nodes(): forest.add_node(node) return forest # Passo indutivo forest = teo_2(g, k - 1) while True: cc = _transform_cc(connected_components(forest)) selected_component = None for component in cc: if len(component) < k: selected_component = component break if selected_component == None: break edges = g.edges() used_edges = forest.edges() unused_edges = [e for e in edges if e not in used_edges] neighbor_edges = [ e for e in unused_edges if e[0] in selected_component ] min_edge = min(neighbor_edges, key=lambda e: g.edge_weight(e)) forest.add_edge(min_edge) return forest
def plot_nuclei_at_timestamp_with_tracklet_coloring(self): """ 3-d plot of nuclei (point cloud) color coded based on tracklet assignment """ cc_dict = connected_components(self.cell_tracker.graph) from mpl_toolkits.mplot3d import Axes3D fig = pylab.figure(figsize=(10,3), facecolor='white') fig.canvas.set_window_title("3-D plot of nuclei color coded by tracklet") ax = fig.add_subplot(111, projection='3d') color_idx = np.linspace(0, 1, len(np.unique(cc_dict.values()))+1) np.random.shuffle(color_idx) x_vals = [] y_vals = [] z_vals = [] colors = [] for node in self.cell_tracker.graph.nodes(): t,cp = self.cell_tracker.get_cell_profile_info_from_node_name(node) x_vals.append( self.cell_tracker.list_of_cell_profiles_per_timestamp[t].list_of_cell_profiles[cp].nucleus.x) y_vals.append( self.cell_tracker.list_of_cell_profiles_per_timestamp[t].list_of_cell_profiles[cp].nucleus.y) z_vals.append( self.cell_tracker.list_of_cell_profiles_per_timestamp[t].list_of_cell_profiles[cp].nucleus.z) colors.append(pylab.cm.jet( color_idx[cc_dict[node]])) ax.scatter(x_vals, y_vals, z_vals, s=20, c = colors) ax.set_xlabel('X axis') ax.set_ylabel('Y axis') ax.set_zlabel('Z axis') ax.pbaspect = [1., 1., 0.3] pylab.show()
W = [] E = [] for i in range(30): x = 'u' + str(h1(i)) y = 'w' + str(h2(i)) U.append(x) W.append(y) E.append((x,y)) gr.add_nodes(list(set(U))) gr.add_nodes(list(set(W))) for edge in E: gr.add_edge(edge) inv_map = {} for k, v in connected_components(gr).iteritems(): inv_map[v] = inv_map.get(v, []) inv_map[v].append(k) nodemapping = {} newmap = {} for k in inv_map.keys(): C = min(map(lambda x : x[1:], inv_map[k])) for v in inv_map[k]: nodemapping[v] = v + '__' + str(C) newgraph = graph() newgraph.add_nodes(map(lambda x : nodemapping[x], gr.nodes())) for edge in E: Q = (nodemapping[edge[0]], nodemapping[edge[1]])
for z in range( num_lists ): # Graph creation gr = graph() gr.add_nodes( nodes_list ) f_edges = open( 'tmp/edgesLists/permutation_test/edgesList.txt'+str(rep+1)+'_'+str(z+1) , 'r' ) for line in f_edges: v = line.split() gr.add_edge( int(v[0]) , int(v[1]) ) # gr.add_edge( (int(v[0]) , int(v[1])) ) f_edges.close() connect_comp_dict = connected_components( gr ) connect_comp = connect_comp_dict.keys() cc = dict() for i in range( len( connect_comp ) ): id_cc = connect_comp_dict[ connect_comp[ i ] ] if id_cc not in cc: cc[ id_cc ] = 0 cc[ id_cc ] += 1 cc_ids = cc.keys() max_cc_size = 0 count_1=0 count_2=0 count_3=0 count_4=0 count_5=0 for i in range( len( cc_ids ) ):
import pygraph.algorithms.generators as gen import pygraph.algorithms.accessibility as acc import pygraph.algorithms.minmax as minmax graph = gen.generate(5000, 10000, weight_range=(50, 2000)) components = acc.connected_components(graph) nodes = [g for g in graph if components[g] == 1] print "GRAPH NODES" for n in graph.nodes(): print n print "GRAPH EDGES" for e in graph.edges(): if components[e[0]] == 1: w = graph.edge_weight(e) print (e[0], e[1], w) # MST = minmax.minimal_spanning_tree(graph) # print "MST NODES" # for n in MST.keys(): # print n # print "MST EDGES" # for k in MST.keys(): # if MST[k] is not None: # print "(%d, %d)" % (k, MST[k]) # else: # print "(%d, %d)" % (k, k)
def cell_lineage_gui(self, init_t1, init_t2, init_z1, init_z2): """ Display original slice and segmentation for two time stamps. When clicking on one cell, show the daughter cells in the second time stamp. Slider bars to navigate time and z. """ fig = pylab.figure(figsize=(19,8), facecolor='white') fig.canvas.set_window_title("Cell lineage visualization") from matplotlib.widgets import Slider from matplotlib.widgets import Button t1 = init_t1 z1 = init_z1 t2 = init_t2 z2 = init_z2 cc_of_interest = [-1] cc_dict = connected_components(self.cell_tracker.graph) I1 = self.fetch_slice_at(t1,z1) Seg1 = [] Seg1.append(self.fetch_seg_at(t1,z1)) Seg2 = [] def make_cell_lineage_mask(tval, target_cc_index, Seg): labels = np.unique(Seg) target_cc_index = set(target_cc_index) # make the segment border max value max_val = len( self.cell_tracker.list_of_cell_profiles_per_timestamp[tval].list_of_cell_profiles) half_val = int(max_val / 2.) mask = np.uint(Seg == 0) * max_val for label in labels: if label>1: cp_index = self.cell_tracker.list_of_cell_profiles_per_timestamp[tval].seg_label_to_cp_list_index[label] node_name = "t%d_c%d" % (tval, cp_index) cc_index = cc_dict[node_name] if cc_index in target_cc_index: mask += np.uint(Seg==label) * half_val print "--- LINK: Label:", label,"| Tracklet:", cc_index,"| CellProfile:", cp_index, "| Node:", node_name, "| t:",tval return mask def select_similar_cells(event): print "Select similar cells" # get cell profile indices of cells of interest target_cp_index = [ ] cc_of_interest_set = set(cc_of_interest) t1 = int(s_t1.val) # TODO: no need to search all time series, just search current time for node in self.cell_tracker.graph.nodes(): t,c = self.cell_tracker.get_cell_profile_info_from_node_name(node) if t == t1: if cc_dict[node] in cc_of_interest_set: target_cp_index.append(c) similar_cp_index = self.cell_tracker.list_of_cell_profiles_per_timestamp[t1].get_similar(target_cp_index) # get the connected components correspondng to these for cp_index in similar_cp_index: node_name = "t" + str(t1) + "_c" + str(cp_index) cc_of_interest.append(cc_dict[node_name]) # remake selection mask and lineage_mask lineage_mask = make_cell_lineage_mask(int(s_t2.val), cc_of_interest,Seg2[0]) selection_mask = make_cell_lineage_mask(t1,cc_of_interest, Seg1[0]) l4.set_data(lineage_mask ) l21.set_data(selection_mask) pylab.draw() I2 = self.fetch_slice_at(t2,z2) Seg2.append(self.fetch_seg_at(t2,z2)) lineage_mask = make_cell_lineage_mask(init_t2, cc_of_interest,Seg2[0]) selection_mask = make_cell_lineage_mask(init_t1,cc_of_interest, Seg1[0]) #draw_points_at_t_z(init_time, init_z) # image subplot ax1 = pylab.subplot(161) pylab.subplots_adjust(bottom=0.25) l1 = pylab.imshow(I1, cmap = "gray") pylab.axis([0, I1.shape[1], I1.shape[0], 0]) # segmentation subplot ax2 = pylab.subplot(162) pylab.subplots_adjust(bottom=0.25) l2 = pylab.imshow(Seg1[0], cmap = self.color_map, vmin = 0, vmax = len( self.cell_tracker.list_of_cell_profiles_per_timestamp[t1].list_of_cell_profiles), picker = True) pylab.axis([0, Seg1[0].shape[1], Seg1[0].shape[0], 0]) # user selection mask ax21 = pylab.subplot(163) pylab.subplots_adjust(bottom=0.25) l21 = pylab.imshow(selection_mask, cmap = "gist_heat", vmin = 0, vmax = len( self.cell_tracker.list_of_cell_profiles_per_timestamp[t1].list_of_cell_profiles), picker = True) pylab.axis([0, Seg1[0].shape[1], Seg1[0].shape[0], 0]) # image subplot ax3 = pylab.subplot(164) pylab.subplots_adjust(bottom=0.25) l3 = pylab.imshow(I2, cmap = "gray") pylab.axis([0, I2.shape[1], I2.shape[0], 0]) # segmentation subplot ax31 = pylab.subplot(165) pylab.subplots_adjust(bottom=0.25) l31 = pylab.imshow(Seg2[0], cmap = self.color_map, vmin = 0, vmax = len( self.cell_tracker.list_of_cell_profiles_per_timestamp[t1].list_of_cell_profiles)) pylab.axis() #[0, Seg2[0].shape[1], 0, Seg2[0].shape[0]]) # segmentation subplot ax4 = pylab.subplot(166) pylab.subplots_adjust(bottom=0.25) l4 = pylab.imshow(lineage_mask, cmap = "gist_heat", vmin = 0, vmax = len( self.cell_tracker.list_of_cell_profiles_per_timestamp[t2].list_of_cell_profiles)) pylab.axis([0, Seg2[0].shape[1], Seg2[0].shape[0], 0]) # slider for time axcolor = 'lightgoldenrodyellow' ax_t1 = pylab.axes([0.2, 0.2, 0.25, 0.03], axisbg=axcolor) s_t1 = Slider(ax_t1, 'time-stamp', 0, len(self.cell_tracker.list_of_cell_profiles_per_timestamp)-1, valinit=init_t1) fig._s_t1 = s_t1 # slider for z axcolor = 'lightgoldenrodyellow' ax_z1 = pylab.axes([0.2, 0.15, 0.25, 0.03], axisbg=axcolor) s_z1 = Slider(ax_z1, 'z-slice', 0, int(CellECT.track_tool.globals.PARAMETER_DICT["z-slices-per-stack"])-1, valinit=init_z1) fig._s_z1 = s_z1 # slider for time axcolor = 'lightgoldenrodyellow' ax_t2 = pylab.axes([0.6, 0.2, 0.25, 0.03], axisbg=axcolor) s_t2 = Slider(ax_t2, 'time-stamp', 0, len(self.cell_tracker.list_of_cell_profiles_per_timestamp)-1, valinit=init_t2) fig._s_t2 = s_t2 # slider for z axcolor = 'lightgoldenrodyellow' ax_z2 = pylab.axes([0.6, 0.15, 0.25, 0.03], axisbg=axcolor) s_z2 = Slider(ax_z2, 'z-slice', 0, int(CellECT.track_tool.globals.PARAMETER_DICT["z-slices-per-stack"])-1, valinit=init_z2) fig._s_z2 = s_z2 # select similar cells button ax_button1 = pylab.axes([0.3, 0.025, 0.4, 0.05]) button1 = Button(ax_button1, "Select similar cells.") button1.on_clicked(select_similar_cells) fig._btn = button1 t1 = init_t1 z1 = init_z1 z_old1 = [-1] t_old1 = [-1] t2 = init_t2 z2 = init_z2 z_old2 = [-1] t_old2 = [-1] # call back for time slider def update_t1(val): t1 = int(s_t1.val) t2 = int(s_t2.val) z1 = int(s_z1.val) z_seg1 = int(s_z1.val) if (z_old1[0] != z1) or (t_old1[0] !=t1): I1 = self.fetch_slice_at(t1,z1) Seg1[0] = self.fetch_seg_at(t1,z1) selection_mask = make_cell_lineage_mask(t1, cc_of_interest, Seg1[0]) l1.set_data(I1) l2.set_data(Seg1[0]) l21.set_data(selection_mask) pylab.draw() z_old1[0] = z1 t_old1[0] = t1 # call back for z slider def update_z1(val): t1 = int(s_t1.val) t2 = int(s_t2.val) z1 = int(s_z1.val) z_seg1 = int(s_z1.val) if (z_old1[0] != z1) or (t_old1[0] !=t1): I1 = self.fetch_slice_at(t1,z1) Seg1[0] = self.fetch_seg_at(t1,z1) selection_mask = make_cell_lineage_mask(t1, cc_of_interest, Seg1[0]) l1.set_data(I1) l2.set_data(Seg1[0]) l21.set_data(selection_mask) pylab.draw() z_old1[0] = z1 t_old1[0] = t1 # call back for time slider def update_t2(val): t2 = int(s_t2.val) t1 = int(s_t1.val) z2 = int(s_z2.val ) z_seg2 = int(s_z2.val) if (z_old2[0] != z2) or (t_old2[0] !=t2): I2 = self.fetch_slice_at(t2,z2) Seg2[0] = self.fetch_seg_at(t2,z2) lineage_mask = make_cell_lineage_mask(t2, cc_of_interest, Seg2[0]) l3.set_data(I2) l31.set_data(Seg2[0]) l4.set_data(lineage_mask) pylab.draw() z_old2[0] = z2 t_old2[0] = t2 # call back for z slider def update_z2(val): t2 = int(s_t2.val) t1 = int(s_t1.val) z2 = int(s_z2.val ) z_seg2 = int(s_z2.val) if (z_old2[0] != z2) or (t_old2[0] !=t2): I2 = self.fetch_slice_at(t2,z2) Seg2[0] = self.fetch_seg_at(t2,z2) lineage_mask = make_cell_lineage_mask(t2, cc_of_interest, Seg2[0]) l3.set_data(I2) l31.set_data(Seg2[0]) l4.set_data(lineage_mask) pylab.draw() z_old2[0] = z2 t_old2[0] = t2 def onpick(event): xval = int(event.mouseevent.xdata) yval = int(event.mouseevent.ydata) tval = int(s_t1.val) label = Seg1[0][yval, xval] if label == 0: print "Border selected." if label == 1: print "Background selected." if label >1: cp_index = self.cell_tracker.list_of_cell_profiles_per_timestamp[tval].seg_label_to_cp_list_index[label] node_name = "t" + str(tval) + "_c" + str(cp_index) if event.mouseevent.button == 1: # left click starts new set for i in xrange(0,len(cc_of_interest)): cc_of_interest.pop(0) cc_of_interest.append(cc_dict[node_name]) elif event.mouseevent.button == 3: # right click adds to the selection set cc_name = cc_dict[node_name] if not cc_name in cc_of_interest: cc_of_interest.append(cc_name) else: cc_of_interest.remove(cc_name) print cc_of_interest print event.mouseevent.button if len(cc_of_interest): print "SELECTED: Label:", label, "| Tracklet:", cc_of_interest[-1], "| CellProfile:", cp_index, "| Node:", node_name, "| t:", tval t2 = int(s_t2.val) t1 = int(s_t1.val) lineage_mask = make_cell_lineage_mask( t2, cc_of_interest, Seg2[0]) selection_mask = make_cell_lineage_mask( t1, cc_of_interest, Seg1[0]) l4.set_data(lineage_mask ) l21.set_data(selection_mask) pylab.draw() fig.canvas.mpl_connect('pick_event', onpick) s_t1.on_changed(update_t1) s_z1.on_changed(update_z1) s_t2.on_changed(update_t2) s_z2.on_changed(update_z2) pylab.show()
def plot_color_tracklets_time_sequence(self, init_time, init_z): """ Plot a slice of the volume at given t and z, and draw the nuclei color coded according to tracklet slider bar to navigate time and z """ from matplotlib.widgets import Slider fig = pylab.figure(figsize =(10,10), facecolor='white') fig.canvas.set_window_title("Plot tracklets as color coded nuclei at slice") depth_range = 5 t = init_time z = init_z def draw_points_at_t_z(t,z, ax): # place color coded dots for i in xrange(len(self. cell_tracker.list_of_cell_profiles_per_timestamp[t].list_of_cell_profiles)): cp = self.cell_tracker.list_of_cell_profiles_per_timestamp[t].list_of_cell_profiles[i] if np.abs(cp.nucleus.z - z) < depth_range: node = "t%d_c%d" % (t, i) tracklet = cc_dict[node] color = pylab.cm.jet(color_idx[tracklet]) ax.plot( cp.nucleus.y, cp.nucleus.x, "o", color = color) ax.text( cp.nucleus.y -0.2, cp.nucleus.x +0.2, tracklet, fontsize = 10, color = color ) cc_dict = connected_components(self.cell_tracker.graph) color_idx = np.linspace(0, 1, len(np.unique(cc_dict.values()))+1) np.random.shuffle(color_idx) I = self.fetch_slice_at(t,z) #draw_points_at_t_z(init_time, init_z) # image subplot ax1 = pylab.subplot(111) pylab.subplots_adjust(bottom=0.25) l1 = pylab.imshow(I, cmap = "gray") pylab.axis([0, I.shape[1], I.shape[0],0]) draw_points_at_t_z(t,z,ax1) # slider for time axcolor = 'lightgoldenrodyellow' ax_t = pylab.axes([0.2, 0.1, 0.65, 0.03], axisbg=axcolor) s_t = Slider(ax_t, 'time-stamp', 0, len(self.cell_tracker.list_of_cell_profiles_per_timestamp)-1, valinit=init_time) fig._s_t = s_t # slider for z axcolor = 'lightgoldenrodyellow' ax_z = pylab.axes([0.2, 0.05, 0.65, 0.03], axisbg=axcolor) s_z = Slider(ax_z, 'z-slice', 0, int(CellECT.track_tool.globals.PARAMETER_DICT["z-slices-per-stack"])-1, valinit=init_z) fig._s_z = s_z t = init_time z = init_z z_old = [-1] t_old = [-1] # call back for time slider def update_t(val): t = int(s_t.val) z = int(s_z.val) if (z_old[0] != z) or (t_old[0] !=t): I = self.fetch_slice_at(t,z) l1.set_data(I) # to remove old dots from current view ax1.lines = [] ax1.texts = [] draw_points_at_t_z(t,z, ax1) pylab.draw() z_old[0] = z t_old[0] = t # call back for z slider def update_z(val): t = int(s_t.val) z = int(s_z.val) if (z_old[0] != z) or (t_old[0] !=t): I = self.fetch_slice_at(t,z) l1.set_data(I) # to remove old dots from current view ax1.lines = [] ax1.texts = [] draw_points_at_t_z(t,z, ax1) pylab.draw() z_old[0] = z t_old[0] = t s_t.on_changed(update_t) s_z.on_changed(update_z) pylab.show()
def plot_tracklets_in_slice_with_seg(self, init_time, init_z): """ Plot a slice of the volume at given t and z, and draw the nuclei color coded according to tracklet plot the segmentation next to it. Color code segments and nuclei. Slider bar to navigate time and z. """ from matplotlib.widgets import Slider fig = pylab.figure(figsize=(10,10), facecolor='white') fig.canvas.set_window_title("Segmentation color coded by tracklets") depth_range = 5 t = init_time z = init_z cc_dict = connected_components(self.cell_tracker.graph) color_idx = np.linspace(0, 1, len(np.unique(cc_dict.values()))+1) np.random.shuffle(color_idx) # def make_colors_for_seg(Seg,tval): # my_colors = [pylab.cm.jet(color_idx[i]) for i in xrange(len(color_idx))] # for label in np.unique(Seg): # if label > 1: # cp_index = self.cell_tracker.list_of_cell_profiles_per_timestamp[tval].seg_label_to_cp_list_index[label] # node = "t" + str(tval) + "_c" + str(cp_index) # tracklet = cc_dict[node] # my_colors[label] =pylab.cm.jet(color_idx[tracklet]) # # return my_colors def draw_points_at_t_z(t,z, ax): # place color coded dots for i in xrange(len(self. cell_tracker.list_of_cell_profiles_per_timestamp[t].list_of_cell_profiles)): cp = self.cell_tracker.list_of_cell_profiles_per_timestamp[t].list_of_cell_profiles[i] if np.abs(cp.nucleus.z - z) < depth_range: node = "t%d_c%d" % (t, i) tracklet = cc_dict[node] total_tracklets = len(self.cell_tracker.list_of_cell_profiles_per_timestamp)-1 index = min ( tracklet, int(tracklet/ float(total_tracklets) * 255)) color = pylab.cm.jet(color_idx[index]) ax.plot( cp.nucleus.y, cp.nucleus.x, "o", color = color) ax.text(cp.nucleus.y, cp.nucleus.x, tracklet, fontsize=10, color = color ) #TODO: Reassign color map instead of segmentation def get_label_to_tracklet_dict(Seg,tval): label_to_tracklet_dict = {} for label in np.unique(Seg): if label > 1: cp_index = self.cell_tracker.list_of_cell_profiles_per_timestamp[tval].seg_label_to_cp_list_index[label] node = "t%d_c%d" % (tval, cp_index) label_to_tracklet_dict[label] = cc_dict[node] return label_to_tracklet_dict def relabel_to_tracklet(Seg, tval): label_to_tracklet_dict = get_label_to_tracklet_dict(Seg, tval) for i in xrange (Seg.shape[0]): for j in xrange (Seg.shape[1]): label = Seg[i,j] if label > 1: Seg[i,j] = label_to_tracklet_dict[label] I = self.fetch_slice_at(t,z) Seg = self.fetch_seg_at(t,z) #draw_points_at_t_z(init_time, init_z) import time time1 = time.time() relabel_to_tracklet(Seg, init_time) print time.time() - time1 def onpick(event): x = int(event.mouseevent.xdata) y = int(event.mouseevent.ydata) z = int() print Seg[y,x] # my_colors = make_colors_for_seg(Seg, init_time) # image subplot ax1 = pylab.subplot(121) pylab.subplots_adjust(bottom=0.25) l1 = pylab.imshow(I, cmap = "gray") pylab.axis([0, I.shape[1], I.shape[0], 0]) draw_points_at_t_z(t,z,ax1) # segmentation subplot ax2 = pylab.subplot(122) pylab.subplots_adjust(bottom=0.25) l2 = pylab.imshow(Seg, vmin = 0, vmax = len(color_idx), cmap = "jet", picker = True) pylab.axis([0, Seg.shape[1], Seg.shape[0], 0]) # slider for time axcolor = 'lightgoldenrodyellow' ax_t = pylab.axes([0.2, 0.1, 0.65, 0.03], axisbg=axcolor) s_t = Slider(ax_t, 'time-stamp', 0, len(self.cell_tracker.list_of_cell_profiles_per_timestamp)-1, valinit=init_time) fig._s_t = s_t # slider for z axcolor = 'lightgoldenrodyellow' ax_z = pylab.axes([0.2, 0.05, 0.65, 0.03], axisbg=axcolor) s_z = Slider(ax_z, 'z-slice', 0, int(CellECT.track_tool.globals.PARAMETER_DICT["z-slices-per-stack"])-1, valinit=init_z) fig._s_z = s_z t = init_time z = init_z z_old = [-1] t_old = [-1] # call back for time slider def update_t(val): t = int(s_t.val) z = int(s_z.val / 2) *2 z_seg = z if (z_old[0] != z) or (t_old[0] !=t): I = self.fetch_slice_at(t,z) l1.set_data(I) Seg = self.fetch_seg_at(t,z) relabel_to_tracklet(Seg, t) l2.set_data(Seg) # to remove old dots from current view ax1.lines = [] ax1.texts = [] draw_points_at_t_z(t,z, ax1) pylab.draw() z_old[0] = z t_old[0] = t # call back for z slider def update_z(val): t = int(s_t.val) z = int(s_z.val ) z_seg = z if (z_old[0] != z) or (t_old[0] !=t): I = self.fetch_slice_at(t,z) l1.set_data(I) #print "loading ", "seg_all_time_stamps/timestamp_"+str(t)+"_z_"+ str(z) + "_seg.png" Seg = self.fetch_seg_at(t,z) relabel_to_tracklet(Seg, t) l2.set_data(Seg) # to remove old dots from current view ax1.lines = [] ax1.texts = [] draw_points_at_t_z(t,z, ax1) pylab.draw() z_old[0] = z t_old[0] = t s_t.on_changed(update_t) s_z.on_changed(update_z) fig.canvas.mpl_connect('pick_event', onpick) pylab.show()
def calculate_components(self, graph): components = connected_components(graph) for edge in graph.edges(): graph.add_edge_attribute(edge, (Visualization.ATTRIBUTE_KEY_COMPONENT, max(components[edge[0]], components[edge[1]])))
def calculate_components(self, graph): components = connected_components(graph) for edge in graph.edges(): graph.add_edge_attribute( edge, (Visualization.ATTRIBUTE_KEY_COMPONENT, max(components[edge[0]], components[edge[1]])))
import pygraph.algorithms.generators as gen import pygraph.algorithms.accessibility as acc import pygraph.algorithms.minmax as minmax graph = gen.generate(5000, 10000, weight_range=(50, 2000)) components = acc.connected_components(graph) nodes = [g for g in graph if components[g] == 1] print "GRAPH NODES" for n in graph.nodes(): print n print "GRAPH EDGES" for e in graph.edges(): if components[e[0]] == 1: w = graph.edge_weight(e) print(e[0], e[1], w) # MST = minmax.minimal_spanning_tree(graph) # print "MST NODES" # for n in MST.keys(): # print n # print "MST EDGES" # for k in MST.keys(): # if MST[k] is not None: # print "(%d, %d)" % (k, MST[k]) # else: # print "(%d, %d)" % (k, k)
def get_parsimony_list(peptides, qval_threshold, verbosity=2): ''' The input is a list of PercolatorPeptide objects ''' # create dictionaries peptide_dict, protein_dict = get_dict(peptides, qval_threshold) print_message("\n{} peptides, mapping to {} proteins are used".format(\ len(peptide_dict), len(protein_dict)), verbosity, 2) parsimony_list = [] explained = set([]) # include the proteins that have at least one unique peptide print_message("\nFind the proteins with unique peptides...", verbosity, 2) for pr, pe in protein_dict.iteritems(): has_unique_pe = any(len(peptide_dict[p])==1 for p in pe) if has_unique_pe: parsimony_list.append(pr) explained = explained.union(pe) print_message("{} such proteins found, explaining {} peptides".format(\ len(parsimony_list), len(explained)), verbosity, 2) # update the peptide and protein dictionary tmp = set(peptide_dict.keys()) unexp_peptides = tmp.difference(explained) for pr in protein_dict.keys(): protein_dict[pr] = protein_dict[pr].intersection(unexp_peptides) if len(unexp_peptides) == 0: print_message("\n------\n{} proteins in the final list\n".format(\ len(parsimony_list)), verbosity, 2) return parsimony_list # build a graph print_message("\nBuild a graph...", verbosity, 2) gr = graph() for pe in unexp_peptides: proteins = peptide_dict[pe] for my_protein in proteins: if not gr.has_node(my_protein): gr.add_node(my_protein) # add edges betwen all these proteins for i in range(len(proteins) - 1): for j in range(i + 1, len(proteins)): if not gr.has_edge((proteins[i], proteins[j])): gr.add_edge((proteins[i], proteins[j])) print_message("{} unexplained peptides, {} nodes, with {} edges ".format(\ len(unexp_peptides), len(gr.nodes()), len(gr.edges())), \ verbosity, 2) # get the connected components print_message("\nGet the connected components ...", verbosity, 2) con_components = connected_components(gr) subgraphs_dict = {} for pr, comp in con_components.iteritems(): if comp in subgraphs_dict: subgraphs_dict[comp] = subgraphs_dict[comp].union(set([pr])) else: subgraphs_dict[comp] = set([pr]) subgraphs = subgraphs_dict.values() tmp = [len(s) for s in subgraphs] print_message("{} subgraphs, with {} to {} proteins".format(\ len(subgraphs_dict), min(tmp), max(tmp)), verbosity, 2) # get the exact solution in each subgraphs print_message("\nPerform exhaustive search in subgraphs", verbosity, 2) for sub in subgraphs: min_subset = get_exact_solution(sub, protein_dict) parsimony_list += min_subset print_message("\n------\n{} proteins in the final list\n".format(\ len(parsimony_list)), verbosity, 2) return parsimony_list
for edge_name, cut_degree in yourcut.items(): if (cut_degree < max_degree + 1) and (gr.has_edge(edge_name)): ## print edge_name gr.del_edge(edge_name) print 'cluster break x 1', edge_name if __name__ == '__main__': gr = graph() setN = [ array([0.0, 0.0]), array([1.0, 0.0]), array([0.5, 0.5]), array([0.5, 1.0]), array([-0.1, -0.1]), array([2.0, 1.0]), array([2.0, 0.0]), array([1.5, 0.5]) ] gr.add_nodes(range(8)) gr.add_edge((0, 1)) gr.add_edge((0, 2)) gr.add_edge((2, 1)) gr.add_edge((3, 2)) gr.add_edge((0, 4)) gr.add_edge((5, 6)) gr.add_edge((5, 7)) gr.add_edge((7, 6)) group(setN, gr, True, 1, 0.01) print gr, connected_components(gr)
def group(setN, gr, write_mark, minimum_cluster, alpha): ## print gr ## print write_mark N = len(setN) ## print N density = zeros(N) for i in range(N): distances = 0.0 neighbor_set = gr.neighbors(i) for each_node in neighbor_set: tmp_d = setN[i] - setN[each_node] distances += sqrt(inner(tmp_d, tmp_d)) if distances == 0.0 or len(neighbor_set) == 0: ## print i,N,'in the pool' ## raw_input('ISB..') distances = 0.0 else: ## print distances,float(len(neighbor_set)) distances = distances / float(len(neighbor_set)) density[i] = 1.0 / pow(1 + distances, 2.0) density_copy = deepcopy(density) ## print density #* 1 * remain_set = set(range(N)) clusters = {} for i in range(N): if is_max(i, gr, density): clusters[i] = i remain_set.remove(i) density_copy[i] = -1 ## print 'remaining set',len(remain_set) #* 2 * while len(remain_set) > 0: unlabeled_max = argmax(density_copy) density_copy[unlabeled_max] = -1 #* 3 * neighbor_set = gr.neighbors(unlabeled_max) tmp_density = density[neighbor_set] label_index = neighbor_set[argmax(tmp_density)] clusters[unlabeled_max] = clusters[label_index] remain_set.remove(unlabeled_max) #algorithm 2 #find boundary edges cluster_centers = clusters.values() ## print len(cluster_centers),cluster_centers borders = [] for each_edge in gr.edges(): if clusters[each_edge[0]] != clusters[each_edge[1]]: if each_edge[0] < each_edge[1]: borders.append(each_edge) #find big clusters connected_groups = connected_components(gr) group_count = len(set(connected_groups.values())) big_cluster_head = [] tmp_set_appeared = set([]) for head, group_index in connected_groups.items(): if group_index not in tmp_set_appeared: big_cluster_head.append(head) tmp_set_appeared.add(group_index) #construct edge set heads_tails = accessibility(gr) head_and_tail = {} for head, tail in heads_tails.items(): if head in big_cluster_head: head_and_tail[connected_groups[head]] = tail #tresholds of the super clusters Gc = {} for head, tail in head_and_tail.items(): tmp_tresh = 0.0 count = 0.0 for each_edge in itertools.combinations(tail, 2): if gr.has_edge(each_edge): count += 1.0 tmp_tresh += abs(density[each_edge[0]] - density[each_edge[1]]) if count == 0.0: ## print('sigularity') Gc[head] = 0.0 else: Gc[head] = alpha * tmp_tresh / count #* 2 * ## print 'group,tresh',borders,Gc while len(borders) > 0: ## print borders,'borders' current_border = borders.pop() Dab = max(density[current_border[0]], density[current_border[1]]) Dca = density[clusters[current_border[0]]] Dcb = density[clusters[current_border[1]]] Gtresh = Gc[connected_groups[current_border[0]]] ## print Dca - Dab,Dcb - Dab,Gtresh,'x' if connected_groups[current_border[0]] != connected_groups[ current_border[1]]: raw_input('there be a problem') if ((Dca - Dab < Gtresh) | (Dcb - Dab < Gtresh)) == False: clusterA = clusters[current_border[0]] clusterB = clusters[current_border[1]] ## print gr,'sb' gr.del_edge(current_border) tmp_borders = deepcopy(borders) for each_edge in tmp_borders: if (clusters[each_edge[0]] == clusterA and clusters[each_edge[1]] == clusterB) or ( clusters[each_edge[1]] == clusterA and clusters[each_edge[0]] == clusterB): ## raw_input('del') gr.del_edge(each_edge) borders.remove(each_edge) ## out_cast = [] ## for i in range(len(setN)): ## if gr.neighbors(i) == 0: ## out_cast.append(i) ## for isolation in out_cast: ## gr.del_node(isolation) ## setN.remove(isolation) ## print len(setN) ## print write_mark if write_mark: connected_groups = connected_components(gr) group_count = len(set(connected_groups.values())) if minimum_cluster > group_count: grade = minimum_cluster - group_count from pygraph.algorithms.minmax import cut_tree yourcut = cut_tree(gr) ## print 'cut tree',yourcut.values(),grade yourset = yourcut.values() for i in range(grade): print min(yourset) yourset.remove(min(yourset)) max_degree = min(yourset) ## print max_degree,yourset for edge_name, cut_degree in yourcut.items(): if (cut_degree < max_degree + 1) and (gr.has_edge(edge_name)): ## print edge_name gr.del_edge(edge_name) print 'cluster break x 1', edge_name