def compute_transitions_old(self): import igraph g = utils.get_igraph_from_adjacency( self._adata.uns['velocyto_transitions'], directed=True) vc = igraph.VertexClustering( g, membership=self._adata.obs[self._groups_key].cat.codes.values) # this stores all single-cell edges in the cluster graph cg_full = vc.cluster_graph(combine_edges=False) # this is the boolean version that simply counts edges in the clustered graph g_bool = utils.get_igraph_from_adjacency( self._adata.uns['velocyto_transitions'].astype('bool'), directed=True) vc_bool = igraph.VertexClustering( g_bool, membership=self._adata.obs[self._groups_key].cat.codes.values) cg_bool = vc_bool.cluster_graph(combine_edges='sum') # collapsed version transitions = utils.get_sparse_from_igraph(cg_bool, weight_attr='weight') total_n = self._neighbors.n_neighbors * np.array(vc_bool.sizes()) transitions_ttest = transitions.copy() transitions_confidence = transitions.copy() from scipy.stats import ttest_1samp for i in range(transitions.shape[0]): neighbors = transitions[i].nonzero()[1] for j in neighbors: forward = cg_full.es.select(_source=i, _target=j)['weight'] backward = cg_full.es.select(_source=j, _target=i)['weight'] # backward direction: add minus sign values = np.array(list(forward) + list(-np.array(backward))) # require some minimal number of observations if len(values) < 5: transitions_ttest[i, j] = 0 transitions_ttest[j, i] = 0 transitions_confidence[i, j] = 0 transitions_confidence[j, i] = 0 continue t, prob = ttest_1samp(values, 0.0) if t > 0: # number of outgoing edges greater than number of ingoing edges # i.e., transition from i to j transitions_ttest[i, j] = -np.log10(max(prob, 1e-10)) transitions_ttest[j, i] = 0 else: transitions_ttest[j, i] = -np.log10(max(prob, 1e-10)) transitions_ttest[i, j] = 0 # geom_mean geom_mean = np.sqrt(total_n[i] * total_n[j]) diff = (len(forward) - len(backward)) / geom_mean if diff > 0: transitions_confidence[i, j] = diff transitions_confidence[j, i] = 0 else: transitions_confidence[j, i] = -diff transitions_confidence[i, j] = 0 transitions_ttest.eliminate_zeros() transitions_confidence.eliminate_zeros() # transpose in order to match convention of stochastic matrices # entry ij means transition from j to i self.transitions_ttest = transitions_ttest.T self.transitions_confidence = transitions_confidence.T
def clusters(self, value: Union[ig.VertexClustering, dict[int, list[int]]]) -> None: if isinstance(value, ig.VertexClustering): self._partition = value elif isinstance(value, dict): sorted_node_community_map = dict(sorted(value.items())) part = ig.VertexClustering( self.graph, membership=[i[0] for i in sorted_node_community_map.values()], ) self._partition = part elif isinstance(value, list): part = ig.VertexClustering(self.graph, membership=value) self._partition = part
def compute(self, two): g = self.build_graph() comms = igraph.Graph.community_multilevel(g, weights="weight", return_levels=False) memb = comms.membership # force dichotomy (horrible exponential time algo) if two: bestmod = -1 best = None for i in range(2**len(comms)): memb2 = twocomms(memb, i) vc = igraph.VertexClustering(g, membership=memb2) vc.recalculate_modularity() m = vc.modularity if m >= bestmod: bestmod = m best = memb2 memb = best mod = g.modularity(memb, weights="weight") return memb, mod
def gan_community_detection(): gan = load_gan() graph = ig.Graph(directed=True) graph.add_vertices(load_apps()) elist = [] for e in gan.edges(): u, v = e w = float(gan[u][v]['weight']) elist.append((u, v, w)) # write networkx to file graph = nx.DiGraph() graph.add_weighted_edges_from(elist) nx.write_graphml(graph, GRAPHML_PATH) # read file to construct igraph graph = ig.Graph.Read_GraphML(GRAPHML_PATH) graph.vs['size'] = [10 for i in xrange(len(graph.vs))] clusters = graph.community_spinglass() membership = clusters.membership vc = ig.VertexClustering(graph, membership) result = [] for c in vc: result.append(set([graph.vs[i]['id'] for i in c])) ig.plot(vc, bbox=(2400, 1400)) return result, clusters.modularity
def plot(drama, caption=True): plot = ig.Plot(outputfolder + drama.get("title") + ".png", bbox=(600, 600), background="white") try: graph = ig.VertexClustering(drama.get("graph")).giant() visual_style = {} visual_style["layout"] = graph.layout_fruchterman_reingold() visual_style["vertex_color"] = "#0000ff" visual_style["vertex_shape"] = "rectangle" visual_style["vertex_size"] = 8 visual_style["vertex_label"] = graph.vs["name"] visual_style["vertex_label_size"] = 15 visual_style["vertex_label_dist"] = 1.5 visual_style["edge_color"] = "#6495ed" visual_style["edge_width"] = graph.es["weight"] visual_style["bbox"] = (600, 600) visual_style["margin"] = 50 plot.add(graph, **visual_style) except: pass if caption: # Make the plot draw itself on the Cairo surface. plot.redraw() # Grab the surface, construct a drawing context and a TextDrawer. ctx = cairo.Context(plot.surface) ctx.set_font_size(15) drawer = TextDrawer(ctx, drama.get("title"), halign=TextDrawer.CENTER) drawer.draw_at(0, 597, width=600) plot.save()
def _compute_connectivities_v1_2(self): import igraph ones = self._neighbors.distances.copy() ones.data = np.ones(len(ones.data)) # should be directed if we deal with distances g = utils.get_igraph_from_adjacency(ones, directed=True) vc = igraph.VertexClustering( g, membership=self._adata.obs[self._groups_key].cat.codes.values) ns = vc.sizes() n = sum(ns) es_inner_cluster = [vc.subgraph(i).ecount() for i in range(len(ns))] cg = vc.cluster_graph(combine_edges='sum') inter_es = utils.get_sparse_from_igraph(cg, weight_attr='weight') es = np.array(es_inner_cluster) + inter_es.sum(axis=1).A1 inter_es = inter_es + inter_es.T # \epsilon_i + \epsilon_j connectivities = inter_es.copy() expected_n_edges = inter_es.copy() inter_es = inter_es.tocoo() for i, j, v in zip(inter_es.row, inter_es.col, inter_es.data): expected_random_null = (es[i]*ns[j] + es[j]*ns[i])/(n - 1) if expected_random_null != 0: scaled_value = v / expected_random_null else: scaled_value = 1 if scaled_value > 1: scaled_value = 1 connectivities[i, j] = scaled_value expected_n_edges[i, j] = expected_random_null # set attributes self.ns = ns self.expected_n_edges_random = expected_n_edges self.connectivities = connectivities self.connectivities_tree = self._get_connectivities_tree_v1_2() return inter_es.tocsr(), connectivities
def _compute_connectivities_v1_0(self): import igraph ones = self._neighbors.connectivities.copy() ones.data = np.ones(len(ones.data)) g = utils.get_igraph_from_adjacency(ones) vc = igraph.VertexClustering( g, membership=self._adata.obs[self._groups_key].cat.codes.values) ns = vc.sizes() cg = vc.cluster_graph(combine_edges='sum') inter_es = utils.get_sparse_from_igraph(cg, weight_attr='weight')/2 connectivities = inter_es.copy() inter_es = inter_es.tocoo() n_neighbors_sq = self._neighbors.n_neighbors**2 for i, j, v in zip(inter_es.row, inter_es.col, inter_es.data): # have n_neighbors**2 inside sqrt for backwards compat geom_mean_approx_knn = np.sqrt( n_neighbors_sq * ns[i] * ns[j]) if geom_mean_approx_knn != 0: scaled_value = v / geom_mean_approx_knn else: scaled_value = 1 connectivities[i, j] = scaled_value # set attributes self.ns = ns self.connectivities = connectivities self.connectivities_tree = self._get_connectivities_tree_v1_0(inter_es) return inter_es.tocsr(), connectivities
def get_communities(g, n, path, filename, algorithm='label_propagation'): """ Gets a number of igraph.VertexClustering objects. These objects are loaded from file if possible, otherwise they are found using the given algorithm. :param g: The graph to find communities in. :param n: The number of communities to find. :param path: The path to the base folder for the graph. :param filename: The filename of the graph to use. :param algorithm: The name of the clustering algorithm to use. The filename and path arguments are used to find clusters stored on disk. Any new clusters are stored along with the ones already present for future use. :return: A list of VertexClustering objects Examples -------- >>> path = 'data/testing' >>> filename = 'test1' >>> g = load_network(path, filename) >>> comms = get_communities(g, 10, path, filename, algorithm='random_walk') >>> len(comms) 10 """ # load any preexisting clusters cluster_path = '{}/communities/{}/{}.json'.format(path, algorithm, filename) ensure_folder(cluster_path) h = open(cluster_path, 'a') try: multithreading.lock_file_handle(h) try: cluster_sets = json.load(open(cluster_path, 'r')) except ValueError: # the file is probably empty because we just made it cluster_sets = [] logger.info('Loaded {} communities'.format(len(cluster_sets))) # add new clusters if needed while len(cluster_sets) < n: logger.debug('{} / {} communities'.format(len(cluster_sets), n)) clustering = _algorithms[algorithm](g) cluster_sets.append({ 'membership': clustering.membership, 'modularity_params': clustering._modularity_params }) # save the cluster sets json.dump(cluster_sets, open(cluster_path, 'w'), indent=2) finally: multithreading.unlock_file_handle(h) h.close() # construct a list of objects clusters = [igraph.VertexClustering(g, **c) for c in cluster_sets] return clusters[:n] # return only the first n
def compute_transitions_coarse(self): import igraph g = utils.get_igraph_from_adjacency( self._adata.uns['rna_velocity']['graph'], directed=True) self.vc = igraph.VertexClustering( g, membership=self._adata.obs[self._groups].cat.codes.values) cg = self.vc.cluster_graph(combine_edges='sum') self.transitions_coarse = utils.get_sparse_from_igraph(cg, weight_attr='weight')
def modularity(p_list, g): """ Calculates the modularity of a partition of g without accouting for edge weight. Uses the constraints implied in the publication as well as constraints necessary to partition a graph that is disconnnected. the given constraints include: returning a modularity of one to paritions of a single modules containing a single isolated node, returning a modularity of zero to partitions of multiple isolated modules, incrementing the modularity by 1/# of modules for a module that contains a single isolatednode. Otherwise, the modularity is calculated as proposed in equation 1. Note: This modularity calculation is much faster than modularity_weights but is less accurate. Parameters ---------- p: list Membership list of interest. g: igraph.Graph Graph of interest. Returns ------- m: float Modularity of partition. The value will be between 0 and 1. References ---------- R. Guimera, L. Amaral """ p = igraph.VertexClustering(g, p_list) L = g.ecount() # Return modularity of 1 for module containing single isolated node if L == 0 and g.vcount == 1: return 1 # Return modularity of 0 for module containing multiple isolated nodes if L == 0 and g.vcount > 1: return 0 # Calculate modularity m = 0 for i, mod in enumerate(p): # Skip if empty module if len(mod) == 0: continue # A module that contains a single isolated node adds 1/# of modules if len(mod) == 1 and g.degree(mod[0]) == 0: m += len(p)**-1 continue # Create subgraph containing module of interest g_sub = g.subgraph(mod) ls = float(g_sub.ecount()) ds = float(sum(g_sub.degree())) # Penalty applied is proportional to the number of components in subgraph m += ((ls / L) - (ds / L)**2)/len(g_sub.components()) return m
def Build_random_clustering(cls, vc): ''' build a vertex clustering according to the vc which is input the output of the vc_gen has the same number of nodes in each community ''' nodes_count = vc.n l = vc.membership random.shuffle(l) return igraph.VertexClustering(vc.graph, l)
def LocalMove(self, elligible): # Make a copy to avoid prematurely altering "true" communities comm = self.comm # this is number of community # there is no empty group num_comm = 0 while num_comm != self.number_of_groups: node = np.random.choice( elligible ) # Take a random node from the community being passed in # Making the local move new_comms = set(comm.values()) # two cases one for all nodes has same comm, and others not # fix here by sangpil if len(new_comms) == 1: selected = comm[node] # get from ground truth set of comms new_comms_list = [ val for val in self.comms_set if val != selected ] comm[node] = np.random.choice(new_comms_list) else: selected = comm[node] new_comms_list = [ val for val in self.comms_set if val != selected ] comm[node] = np.random.choice(new_comms_list) num_comm = len(list(set(comm.values()))) # Getting modularity of post-local move partitions partition = ig.VertexClustering(self.ntwk, membership=list(comm.values())) ltemp_modularity = partition.q # If move is better, adjust the community and modularity accordingly # Alternatively, if move is worse but succeed temperature calculations better = bool(ltemp_modularity >= self.modularity) temp_move = bool( np.random.rand() <= np.exp((ltemp_modularity - self.modularity) * (1 / self.temp))) if better or temp_move: self.comm = comm self.modularity = ltemp_modularity self.t_modularity_list.append(ltemp_modularity) self.temp *= 0.995 return True else: self.temp *= 0.995 return False
def compute_connectivities_coarse(self): import igraph ones = self.connectivities.copy() # graph where edges carry weight 1 ones.data = np.ones(len(ones.data)) g = utils.get_igraph_from_adjacency(ones) self.vc = igraph.VertexClustering( g, membership=self._adata.obs[self._groups].cat.codes.values) cg = self.vc.cluster_graph(combine_edges='sum') self.connectivities_coarse = utils.get_sparse_from_igraph(cg, weight_attr='weight')/2
def compute_transitions(self): vkey = 'velocity_graph' if vkey not in self._adata.uns: if 'velocyto_transitions' in self._adata.uns: self._adata.uns[vkey] = self._adata.uns['velocyto_transitions'] logg.debug( "The key 'velocyto_transitions' has been changed to 'velocity_graph'." ) else: raise ValueError( 'The passed AnnData needs to have an `uns` annotation ' "with key 'velocity_graph' - a sparse matrix from RNA velocity." ) if self._adata.uns[vkey].shape != (self._adata.n_obs, self._adata.n_obs): raise ValueError( f"The passed 'velocity_graph' have shape {self._adata.uns[vkey].shape} " f"but shoud have shape {(self._adata.n_obs, self._adata.n_obs)}" ) # restore this at some point # if 'expected_n_edges_random' not in self._adata.uns['paga']: # raise ValueError( # 'Before running PAGA with `use_rna_velocity=True`, run it with `False`.') import igraph g = _utils.get_igraph_from_adjacency( self._adata.uns[vkey].astype('bool'), directed=True, ) vc = igraph.VertexClustering( g, membership=self._adata.obs[self._groups_key].cat.codes.values) # set combine_edges to False if you want self loops cg_full = vc.cluster_graph(combine_edges='sum') transitions = _utils.get_sparse_from_igraph(cg_full, weight_attr='weight') transitions = transitions - transitions.T transitions_conf = transitions.copy() transitions = transitions.tocoo() total_n = self._neighbors.n_neighbors * np.array(vc.sizes()) # total_n_sum = sum(total_n) # expected_n_edges_random = self._adata.uns['paga']['expected_n_edges_random'] for i, j, v in zip(transitions.row, transitions.col, transitions.data): # if expected_n_edges_random[i, j] != 0: # # factor 0.5 because of asymmetry # reference = 0.5 * expected_n_edges_random[i, j] # else: # # approximate # reference = self._neighbors.n_neighbors * total_n[i] * total_n[j] / total_n_sum reference = np.sqrt(total_n[i] * total_n[j]) transitions_conf[i, j] = 0 if v < 0 else v / reference transitions_conf.eliminate_zeros() # transpose in order to match convention of stochastic matrices # entry ij means transition from j to i self.transitions_confidence = transitions_conf.T
def GlobalMove(self, elligible): comm = self.comm #print(comm) # this is number of community # there is no empty group num_comm = 0 while num_comm != self.number_of_groups : if len(elligible) == 2 : node_choice = [i for i in range(2, len(elligible)+1)] #list of number for nodes being passed in else : node_choice = [i for i in range(2, len(elligible))] #print("elligible", elligible) node_num = np.random.choice(node_choice) node_list = list(set(np.random.choice(elligible, node_num, replace=False))) #Take several random nodes (2~ length of elligible) s_node = np.random.choice(node_list) #select single node to check value selected = comm[s_node] new_comms_list = [val for val in self.comms_set if val != selected] #change values in all processing data change_comm = np.random.choice(new_comms_list) # number for new comm for node in node_list : comm[node] = change_comm num_comm = len(list(set(comm.values()))) #print(num_comm) partition = ig.VertexClustering(self.ntwk, membership= list(comm.values())) gtemp_modularity = partition.q #print("problem", gtemp_modularity) better = bool(gtemp_modularity >= self.modularity) temp_move = bool(np.random.rand() <= np.exp((gtemp_modularity - self.modularity) * (1/self.temp))) if better or temp_move : self.comm = comm self.modularity = gtemp_modularity self.t_modularity_list.append(gtemp_modularity) self.temp = self.temp /(1+(self.cooling*self.temp)) return True else: self.temp = self.temp /(1+(self.cooling*self.temp)) return False
def LocalMove(self, elligible): # Make a copy to avoid prematurely altering "true" communities comm = self.comm #print(comm) # this is number of community # there is no empty group num_comm = 0 while num_comm != self.number_of_groups: node = np.random.choice( elligible ) # Take a random node from the community being passed in # Making the local move new_comms = set(comm.values()) selected = comm[node] new_comms_list = [val for val in self.comms_set if val != selected] #print(new_comms_list) comm[node] = np.random.choice(new_comms_list) num_comm = len(list(set(comm.values()))) #print(num_comm, "stuck here2") # Getting modularity of post-local move partitions partition = ig.VertexClustering(self.ntwk, membership=list(comm.values())) ltemp_modularity = partition.q # If move is better, adjust the community and modularity accordingly # Alternatively, if move is worse but succeed temperature calculations better = bool(ltemp_modularity >= self.modularity) print("L, Former mod {}, After mod {}".format(self.modularity, ltemp_modularity)) temp_move = bool( np.random.rand() <= np.exp((ltemp_modularity - self.modularity) * (1 / self.temp))) if better or temp_move: self.optim_partition = partition self.comm = comm self.modularity = ltemp_modularity self.t_modularity_list.append(ltemp_modularity) self.temp = self.temp / (1 + (self.cooling * self.temp)) return True else: #temp* self.cooling = 0.995,0.999 #self.temp = self.temp /(1+(self.cooling*self.temp)) return False
def export_word_graph(keyword, dictionary, modelname, model, num_topics, num_words, threshold, depth): """ Constructs a network of relations between words and topics. This can be seen as a bipartite network, which is then transformed into a unipartite network of word-word relations. Of this network the giant component is taken and visualized. """ H = nx.Graph() for word in dictionary.token2id.items(): H.add_node(word[1], text=word[0], partition=1) n = 0 for topic in model.show_topics(num_topics, num_words, formatted=False): H.add_node(len(dictionary) + n + 1, partition=0) for word in range(num_words): if topic[word][0] > threshold: #only positive weights H.add_edge( len(dictionary) + n + 1, dictionary.token2id[topic[word][1]]) n += 1 # construct bipartite graph with topics as 0 and words as 1 word_nodes, topic_nodes = nx.algorithms.bipartite.sets(H) # create unipartite projection for words W = nx.algorithms.bipartite.weighted_projected_graph(H, word_nodes) # write to disk as GML nx.write_gml( W, "{0}_{1}_{2}x{3}.gml".format(keyword + modelname, depth, num_topics, num_words)) # read from disk as GML and create as igraph.Graph G = ig.read( "{0}_{1}_{2}x{3}.gml".format(keyword + modelname, depth, num_topics, num_words), "gml") # filter to giant component gc = ig.VertexClustering(G).giant() visual_style = {} visual_style["layout"] = G.layout_fruchterman_reingold() visual_style["vertex_size"] = 8 visual_style["vertex_label"] = G.vs["text"] visual_style["edge_width"] = 0.5 visual_style["bbox"] = (1200, 1200) visual_style["margin"] = 50 ig.plot( gc, "{0}_{1}_{2}x{3}_FR.svg".format(keyword + modelname, depth, num_topics, num_words), **visual_style)
def average_odf_and_internal_density( graph: igraph.Graph, members: List[int]) -> Tuple[float, float]: """ Calculates the average odf and the internal density of a solution in one go :param graph: graph to calculate the metric to :param members: community of each node :return: both measures """ v_cluster = igraph.VertexClustering(graph, membership=members) edges_crossing = v_cluster.crossing() communities = v_cluster.subgraphs() n_communities = len(communities) average_odf = np.zeros(n_communities) internal_density = np.zeros(n_communities) comm_sizes = np.array([g.vcount() for g in communities]) for e_index, crossing in enumerate(edges_crossing): if crossing: # edge between clusters, influence avg_odf edge = graph.es[e_index] source_degree = graph.degree(edge.source) average_odf[members[edge.source]] += 1.0 / source_degree target_degree = graph.degree(edge.target) average_odf[members[edge.target]] += 1.0 / target_degree else: # edge in clusters influence internal density edge = graph.es[e_index] internal_density[members[edge.source]] += 1 # filter empty communities non_empty_comms = np.where(comm_sizes > 1) comm_sizes = np.take(comm_sizes, non_empty_comms) average_odf = np.take(average_odf, non_empty_comms) internal_density = np.take(internal_density, non_empty_comms) # aggregate values average_odf = average_odf / comm_sizes internal_density = (2.0 * internal_density) / (comm_sizes * (comm_sizes - 1.0)) internal_density = 1.0 - internal_density average_odf = np.mean(average_odf) internal_density = np.mean(internal_density) return average_odf, internal_density
def kmeans_clustering(h5_data, data_matrix, adjacency_matrix, k): centroids, membership_list, inertia = k_means(data_matrix, n_clusters=k) # Defaultdict with cluster-membership as key and a list with all members as value kmeans_communities = defaultdict(list) for x in range(len(membership_list)): #mass_float = round(float(data.columns[x]),3) #kmeans_communities[membership_list[x]].append(mass_float) kmeans_communities[membership_list[x]].append(x) # Creation of the actual graph kmeans_community_G = base_graph_structure(h5_data, adjacency_matrix) # Set communities based on the cluster found by k-means kmeans_communities = ig.VertexClustering(kmeans_community_G, membership=membership_list) return kmeans_community_G, kmeans_communities, membership_list, centroids
def outqueue2res(g, nprocs, mod_res, memship_res): """Take output from outqueue and calculate the final results for the community detection. :param g: igraph.Graph object :param nprocs: number of processes running in parallel :type nprocs: int :param mod_res: highest modularity score :param memship_res: membership list corresponding to highest modularity score """ for work in range(nprocs): for val in iter(output_queue.get, "STOP"): pass modaux = val[0] if modaux > mod_res.value: mod_res.value = modaux loug = ig.VertexClustering(g, val[1]) memship_res[:] = loug.membership
def radicchi(G, measure='weak'): """ Wrapper for execution of the Radicchi community-detection algorithm. Returns covers of the graph, with metadata representing provenance - in essence, a "dendrogram" that represents splits into communities. """ g = G.copy() g.vs['id'] = list(range(g.vcount())) if measure == 'weak': result = radicchi_internal(G, g, 0, measure=measure, clustering=4) elif measure == 'strong': result = radicchi_internal(G, g, 0, measure=measure, clustering=3) else: raise Exception('Other measures of community not yet supported') clustering = [0] * G.vcount() for i, l in enumerate(result): for v in l: clustering[v] = i return ig.VertexClustering(G, clustering)
def pan_community_detection(uid): graph = convert_to_igraph(uid) # graph.vs['label'] = graph.vs['id'] graph.vs['size'] = [30 for i in xrange(len(graph.vs))] # two kinds of methods for directed graph clusters = graph.community_spinglass() # could get higher modularity # clusters = pan.community_edge_betweenness().as_clustering() membership = clusters.membership vc = ig.VertexClustering(graph, membership) result = [] for c in vc: result.append([graph.vs[i]['id'] for i in c]) scale = sum([len(c) for c in result]) / float(len(result)) # draw communities # ig.plot(vc, bbox=(1000, 1000)) return len(result), scale, clusters.modularity
def get_expected_edges_ml(part_obj, layer_vec, weight='weight'): """ Multilayer calculation of expected edges. Breaks up partition object \ by layer and calculated expected edges for each layer-subgraph seperately\ thus getting the relative weights correct :param part_obj: ig.VertexPartition with the appropriate graph and membership vector. :param layer_vec: array with length equaling number of nodes specifying which layer each node is in. :param weight: weight attribute on network :return: """ P_tot = 0 layers = np.unique(layer_vec) for layer in layers: cind = np.where(layer_vec == layer)[0] subgraph = part_obj.graph.subgraph(cind) submem = np.array(part_obj.membership)[cind] cpartobj = ig.VertexClustering(graph=subgraph, membership=submem) P_tot += get_expected_edges(cpartobj, weight=weight, directed=subgraph.is_directed()) return P_tot
def GlobalMove(self, elligible): node_choice = [i for i in range(2, len(elligible)+1)] #list of number for nodes being passed in node_num = np.random.choice(node_choice) node_list = list(set(np.random.choice(elligible, node_num, replace=False))) #Take several random nodes (2~ length of elligible) comm = self.comm s_node = np.random.choice(node_list) #select single node to check value selected = comm[s_node] new_comms_list = [val for val in self.comms_set if val != selected] #change values in all processing data change_comm = np.random.choice(new_comms_list) # number for new comm for node in node_list : comm[node] = change_comm partition = ig.VertexClustering(self.ntwk, membership= list(comm.values())) gtemp_modularity = partition.q better = bool(gtemp_modularity >= self.modularity) temp_move = bool(np.random.rand() <= np.exp((gtemp_modularity - self.modularity) * (1/self.temp))) if better or temp_move : self.comm = comm self.modularity = gtemp_modularity self.t_modularity_list.append(gtemp_modularity) self.temp *= 0.995 return True else: self.temp *=0.995 return False
first_clustering_mem_list = [] for line in f: split_string = line.split(" ") first_clustering_mem_list.append(int(split_string[1].replace("\n", ""))) f.close() # do the same for second clustering file f = open(second_clustering_path, 'r') second_clustering_mem_list = [] for line in f: split_string = line.split(" ") second_clustering_mem_list.append(int(split_string[1].replace("\n", ""))) f.close() # create corresponding Vertex Clusterings first_clustering = igraph.VertexClustering(input_network, first_clustering_mem_list) second_clustering = igraph.VertexClustering(input_network, second_clustering_mem_list) print "done creating clusterings." if verbosity: print first_clustering print second_clustering ############ COMPARE CLUSTERINGS ############ vi = igraph.compare_communities(first_clustering, second_clustering, method='vi', remove_none=False) nmi = igraph.compare_communities(first_clustering, second_clustering, method='nmi', remove_none=False) split_join = igraph.compare_communities(first_clustering, second_clustering, method='split-join', remove_none=False) rand = igraph.compare_communities(first_clustering, second_clustering, method='rand', remove_none=False) adj_rand = igraph.compare_communities(first_clustering, second_clustering, method='adjusted_rand', remove_none=False) print "\nSeparated by tabs:"
def community_sa(g, mod_calc, t0 = 2.5 *10**-4, C = 0.75, f = 0.5): """ Partitions the graph using the SA community detection algorithm proposed in Guimera and Amaral's publication. Assumptions made when implementing the algorithm include randomly selecting the node n for which to locally modify and using a 50% to determine whether a global split or merge is proposed. The splitalgorthim follows the detection algorith exactly. For each T, f * S**2 local changes are made. Parameters ---------- g: igraph.Graph The graph of interest. mod_calc: lambda Function indicating which modularity measure to use. T0: float The intial temperature. The default is 2.5 * 10**-4, as proposed in Brockman's supplmental materials. c: float The cooling factor. The default is c = 0.75, as proposed in Brockman's supplemental materials. f: float The proportional of changes made. The default is 0, as proposed in Brockman's paper. Returns ------- igraph.VertexClustering Returns a clustering of the vertex set of the graph. References ---------- R. Guimera, L. Amaral Examples -------- >>> mod_calc = lambda p, g: modularity(p, g) >>> parts = community_sa(g, mod_calc, f = 0.65) >>> type(parts) igraph.clustering.VertexClustering """ t = float(t0) S = g.vcount() # Initialize p such that each N node is in its own module p = range(S) accept = False list_steps = [] while(not accept): logger.info('temp: {} modularity: {}'.format(t, mod_calc(p, g))) # Propose fS**2 individual node movements for i in range(int((f * S)**2)): pnew = _local_update(list(p)) # Accept new partition according to equation 2 from the publication if _accept_update(mod_calc, g, pnew, p, t): p = list(pnew) if i % 1000 == 0: logger.info('{} of {} local updates complete'.format(i, int((f * S)**2))) # Propose fS collective movements # Change probability of merge given previous proposal rejections merge_prob = 2 for i in range(int(f * S)): if i % 100 == 0: logger.info('{} of {} local updates complete'.format(i, int(f * S))) # With a changing probability, merge modules if randint(1, int(merge_prob)) != 1: pnew = _merge_update(list(p)) # Accept new partition according to equation 2 from the publication if _accept_update(mod_calc, g, pnew, p, t): p = list(pnew) # Otherwise split modules else: # Split module using simplified SA community detection algorithm pnew = _split_update(igraph.VertexClustering(g, p), sample(list(p), 1)[0], t0, t, S, C, f, mod_calc) # Accept new partition according to equation 2 from the publication if _accept_update(mod_calc, g, pnew, p, t): p = list(pnew) else: # For every 1000th rejection reduce probability of split merge_prob += 0.001 # Append current modularity list_steps.append(mod_calc(p, g)) # Check if modularity has improved within three last temp steps if len(list_steps) > 3: # Maintain length of 3 list_steps.remove(list_steps[0]) if (abs(list_steps[0] - list_steps[1]) + abs(list_steps[0] - list_steps[2])) < 2 * 10**-3: # If M has seen no improvement, accept the partition and exit the while loop accept = True # Cool t t *= C return igraph.VertexClustering(g, p)
reposCG = reposCG.Read_Pickle('reposCG.pickle') #%% ''' CALCULO CLUSTERS CON FAST GREEDY reposcom creo que es una lista de listas donde cada lista es una comunidad ''' reposcom = reposCG.community_fastgreedy(weights=None) #fgclust = reposcom.as_clustering() fgclust2 = reposcom.as_clustering(n=20) ''' Ir a savecsv.py y guardar! Los clusters con ifastgreedy se hicieron a la componente gigante del grafo no dirigido ''' #%% ''' CREO OBJETO VERTEX CLUSTERING (que es lo que me devuelve la función community) A partir de: - lista membership - grafo ''' #udvc2 = ig.VertexClustering(udg, membership = membership) rdvc = ig.VertexClustering(rdg, membership=membership) #%% #sizesu = sorted(list(udvc.sizes())) sizesr = sorted(list(rdvc.sizes()))
def shuffled_comdet(g, numiter, parallel=True): """Run Louvain community detections with shuffled node sequence. Perform ``numiter`` Louvain community detections of the input graph ``g`` and return the VertexClustering object with the highest modularity score. The community detections are performed on randomly shuffled node sequence and can be run in parallel. The number of processes is determined by the number of CPU of the work station (see below). :param g: graph for community detection :type g: igraph.Graph :param numiter: number of reshuffled community detections to run :type numiter: int :param parallel: if ``True``, the numiter community detections are performed in parallel, otherwise in sequence :type parallel: bool :returns: VertexClustering with highest modularity score :rtype: igraph.VertexClustering """ # get edgelist and nodelist from input graph edgelist = [g.es[i].tuple for i in range(len(g.es))] nodelist = range(g.vcount()) # run first community detection: loug = g.community_multilevel(return_levels=False) mod = g.modularity(loug) # print('Modularity of the original order = ', mod) # create variables which are shared among all the parallel workers mod_res = mp.Value('d', mod) memship_res = mp.Array('i', loug.membership) # set number of processes run in parallel as nummber of CPUs (+-1 usually) if parallel: numprocs = mp.cpu_count() - 1 else: numprocs = 1 # processes which add elements to input and output queue p_inqueue = mp.Process(target=add2inqueue, args=(numiter - 1, numprocs)) p_outqueue = mp.Process(target=outqueue2res, args=(g, numprocs, mod_res, memship_res)) # create worker processes ps = [mp.Process(target=comdet_worker, args=(nodelist, edgelist)) for i in range(numprocs)] # start queues p_inqueue.start() p_outqueue.start() # start workers for p in ps: p.start() # end processes once they are done p_inqueue.join() for p in ps: p.join() p_outqueue.join() print 'Done.' return ig.VertexClustering(g, memship_res[:])
def find_partition(graph, method, initial_membership=None, weight=None, resolution_parameter=1.0, consider_comms=ALL_NEIGH_COMMS): """ Method for detecting communities using the Louvain algorithm. This functions finds the optimal partition given the specified method. For the various possible methods see package documentation. Keyword arguments: graph The graph for which to find the optimal partition. method The type of partition which will be used during optimisation. initial_membership=None If provided, the optimisation will start with this initial membership. Should be a list that contains any unique identified for a community, which is converted to a numeric representation. Since communities can never be split, the number of communities in this initial partition provides an upper bound. weight=None If provided, indicates the edge attribute to use as a weight. (N.B. note that Significance is not suited for weighted graphs). resolution_parameter=1.0 For those methods that use a resolution parameter, this is indicated here. consider_comms=ALL_NEIGH_COMMS This parameter determines which communities to consider when moving a node. ALL_COMMS Consider all communities always. ALL_NEIGH_COMMS Consider all communities of the neighbours RAND_COMM Consider only a single random community ALL_NEIGH_COMMS Consider only a single random community of the neighbours. Notice that this is sampled from the set of all neighbours so that the communities are sampled with respective frequency. In ordinary cases it is usually not necessary to alter this parameter. The default choice of considering all communities of the neighbours works relatively well, and is relatively fast. However, in the case of negative weights, it may be better to move a node to a community to which it is not connected, so that one would need to consider all communities. Alternatively, by only selecting a single random community from the neighbours to consider, one can considerably speed up the algorithm, without loosing too much quality. The quality of the partition, as measured by the indicated method is provided in the returned partition as partition.quality. returns: optimized partition.""" pygraph_t = __get_py_capsule(graph) if weight is not None: if isinstance(weight, str): weight = graph.es[weight] else: # Make sure it is a list weight = list(weight) if initial_membership is not None: gen = _ig.UniqueIdGenerator() initial_membership = [gen[m] for m in initial_membership] membership, quality = _c_louvain._find_partition(pygraph_t, method, initial_membership, weight, resolution_parameter, consider_comms) partition = _ig.VertexClustering(graph, membership) partition.quality = quality return partition
def compute_transitions(self): try: import igraph except ImportError: raise ImportError( "To run paga, you need to install `pip install python-igraph`" ) vkey = f"{self.vkey}_graph" if vkey not in self._adata.uns: raise ValueError( "The passed AnnData needs to have an `uns` annotation " "with key 'velocity_graph' - a sparse matrix from RNA velocity." ) if self._adata.uns[vkey].shape != (self._adata.n_obs, self._adata.n_obs): raise ValueError( f"The passed 'velocity_graph' has shape {self._adata.uns[vkey].shape} " f"but shoud have shape {(self._adata.n_obs, self._adata.n_obs)}" ) clusters = self._adata.obs[self.groups] cats = clusters.cat.categories vgraph = self._adata.uns[vkey] > 0.1 time_prior = self.use_time_prior if isinstance(time_prior, str) and time_prior in self._adata.obs.keys(): vpt = self._adata.obs[time_prior].values vpt_mean = self._adata.obs.groupby(self.groups)[time_prior].mean() vpt_means = np.array([vpt_mean[cat] for cat in clusters]) rows, cols, vals = [], [], [] for i in range(vgraph.shape[0]): indices = vgraph[i].indices idx_bool = vpt[i] < vpt[indices] idx_bool &= vpt_means[indices] > vpt_means[i] - 0.1 cols.extend(indices[idx_bool]) vals.extend(vgraph[i].data[idx_bool]) rows.extend([i] * np.sum(idx_bool)) vgraph = vals_to_csr(vals, rows, cols, shape=vgraph.shape) if isinstance(self.end_key, str) and self.end_key in self._adata.obs.keys(): set_row_csr(vgraph, rows=np.where(self._adata.obs[self.end_key] > 0.7)[0]) if isinstance(self.root_key, str) and self.root_key in self._adata.obs.keys(): vgraph[:, np.where(self._adata.obs[self.root_key] > 0.7)[0]] = 0 vgraph.eliminate_zeros() membership = self._adata.obs[self.groups].cat.codes.values g = get_igraph_from_adjacency(vgraph, directed=True) vc = igraph.VertexClustering(g, membership=membership) cg_full = vc.cluster_graph(combine_edges="sum") transitions = get_sparse_from_igraph(cg_full, weight_attr="weight") transitions = transitions - transitions.T transitions_conf = transitions.copy() transitions = transitions.tocoo() total_n = self._neighbors.n_neighbors * np.array(vc.sizes()) for i, j, v in zip(transitions.row, transitions.col, transitions.data): reference = np.sqrt(total_n[i] * total_n[j]) transitions_conf[i, j] = 0 if v < 0 else v / reference transitions_conf.eliminate_zeros() # remove non-confident direct paths if more confident indirect path is found. T = transitions_conf.A threshold = max(np.nanmin(np.nanmax(T / (T > 0), axis=0)) - 1e-6, 0.01) T *= T > threshold for i in range(len(T)): idx = T[i] > 0 if np.any(idx): indirect = np.clip(T[idx], None, T[i][idx][:, None]).max(0) T[i, T[i] < indirect] = 0 if self.minimum_spanning_tree: T_tmp = T.copy() T_num = T > 0 T_sum = np.sum(T_num, 0) T_max = np.max(T_tmp) for i in range(len(T_tmp)): if T_sum[i] == 1: T_tmp[np.where(T_num[:, i])[0][0], i] = T_max from scipy.sparse.csgraph import minimum_spanning_tree T_tmp = np.abs(minimum_spanning_tree(-T_tmp).A) > 0 T = T_tmp * T transitions_conf = csr_matrix(T) self.transitions_confidence = transitions_conf.T # set threshold for minimal spanning tree. df = pd.DataFrame(T, index=cats, columns=cats) self.threshold = np.nanmin(np.nanmax(df.values / (df.values > 0), axis=0)) self.threshold = max(self.threshold - 1e-6, 0.01)