def test_diff_move(): intraslice = ig.Graph.Read_Ncol("multilayer_SBM_interslice_edges.csv", directed=False) n = intraslice.vcount() layer_vec = [0] * n membership = list(range(n)) part_rbc = louvain.RBConfigurationVertexPartition( intraslice, resolution_parameter=1.0, initial_membership=membership) part_weighted_layers = louvain.RBConfigurationVertexPartitionWeightedLayers( intraslice, resolution_parameter=1.0, layer_vec=layer_vec, initial_membership=membership) # check diff_move() - quality() consistency across 100 random moves for repeat in range(100): v = randint(0, n - 1) c = randint(0, n - 1) old_quality = part_weighted_layers.quality() wl_diff = part_weighted_layers.diff_move(v, c) part_weighted_layers.move_node(v, c) true_diff = part_weighted_layers.quality() - old_quality rbc_diff = part_rbc.diff_move(v, c) part_rbc.move_node(v, c) assert isclose( wl_diff, true_diff ), "WeightedLayers diff_move() inconsistent with quality()" assert isclose( wl_diff, rbc_diff ), "WeightedLayers diff_move() inconsistent with single-layer" assert isclose(part_weighted_layers.quality(), part_rbc.quality( )), "WeightedLayers quality() inconsistent with single-layer" # check rng consistency between RBConfigurationVertexPartition and its WeightedLayers variant # with various seeds and intraslice resolution parameters for gamma in np.linspace(0.5, 1.5, 10): shared_seed = randint(-1 << 31, (1 << 31) - 1) # random int32 louvain.set_rng_seed(shared_seed) part_weighted_layers = louvain.RBConfigurationVertexPartitionWeightedLayers( intraslice, resolution_parameter=gamma, layer_vec=layer_vec) opt = louvain.Optimiser() opt.optimise_partition(partition=part_weighted_layers) louvain.set_rng_seed(shared_seed) part_rbc = louvain.RBConfigurationVertexPartition( intraslice, resolution_parameter=gamma) opt = louvain.Optimiser() opt.optimise_partition(partition=part_rbc) quality_weighted_layers = part_weighted_layers.quality( resolution_parameter=gamma) quality_rbc = part_rbc.quality(resolution_parameter=gamma) assert isclose( quality_weighted_layers, quality_rbc ), "Intra-layer optimisation inconsistent with single-layer"
def multilayer_louvain(G_intralayer, G_interlayer, layer_vec, gamma, omega, optimiser=None, return_partition=False): # RBConfigurationVertexPartitionWeightedLayers implements a multilayer version of "standard" modularity (i.e. # the Reichardt and Bornholdt's Potts model with configuration null model). check_multilayer_louvain_capabilities() if 'weight' not in G_intralayer.es: G_intralayer.es['weight'] = [1.0] * G_intralayer.ecount() if 'weight' not in G_interlayer.es: G_interlayer.es['weight'] = [1.0] * G_interlayer.ecount() if optimiser is None: optimiser = louvain.Optimiser() intralayer_part = louvain.RBConfigurationVertexPartitionWeightedLayers( G_intralayer, layer_vec=layer_vec, weights='weight', resolution_parameter=gamma) interlayer_part = louvain.CPMVertexPartition(G_interlayer, resolution_parameter=0.0, weights='weight') optimiser.optimise_partition_multiplex([intralayer_part, interlayer_part], layer_weights=[1, omega]) if return_partition: return intralayer_part else: return tuple(intralayer_part.membership)
def plot_number_clusters(card_data_df, G, resolution_range): """Take a card_data_df and the graph that represents it as well as a range of resolution parameters, and plot a graph showing how the number of clusters changes with resolution parameter. Parameters: ----------- card_data_df: pandas DataFrame containing as colu.mns card name and the decks that each card belongs to as a set. G: igraph Graph representation of card_data_df. resolution_range: tuple of two values to vary resolution_parameter between. See also: --------- create_card_df: function that creates card_data_df. create_graph: function that creates G. """ optimiser = lv.Optimiser() profile = optimiser.resolution_profile( G, lv.RBERVertexPartition, resolution_range=resolution_range, node_sizes=card_data_df["Count"].tolist(), ) x = np.linspace( resolution_range[0], resolution_range[1], len(profile) ) y = np.array([len(partition) for partition in profile]) plt.plot(x, y) plt.xlabel("resolution_parameter") plt.ylabel("Number of clusters") plt.show()
def louvain_modified(snapshots, randomise_constraint=0.02): optimiser = louvain.Optimiser() partitions = [] partition = None for i, snapshot in enumerate(snapshots): if partition is not None and randomise_constraint < 1: improv = 1 optimiser_decay = 2 partition = louvain.ModularityVertexPartition( snapshot.get_graph(), init_clusters(snapshots[i].get_graph(), snapshots[i - 1].get_graph(), partition, randomise_constraint).membership) while improv > 0 and optimiser_decay > 0: improv = optimiser.optimise_partition(partition) if improv == 0: optimiser_decay -= 1 else: optimiser_decay = 2 else: partition = louvain.find_partition( snapshot.get_graph(), louvain.ModularityVertexPartition) snapshots[i].get_graph().vs["cluster_seed"] = partition.membership partitions.append(partition) return partitions
def ms_avg(snapshots, weights={}): if not snapshots: return None optimiser = louvain.Optimiser() static_modularities = [0 for s in snapshots] partitions = [louvain.ModularityVertexPartition(snap.get_graph()) for snap in snapshots] partitions_agg = [partition.aggregate_partition() for partition in partitions] for idx, snap in enumerate(snapshots): try: weights[idx] except KeyError: weights[idx] = 1 improv = 1 while improv > 0: improv = 0 # phase 1 for idx in range(len(partitions_agg)): if optimiser.move_nodes(partitions_agg[idx]) > 0: improv = 1 static_modularities[idx] = partitions_agg[idx].quality() # phase 2 if improv > 0: for idx in range(len(partitions_agg)): partitions[idx].from_coarse_partition(partitions_agg[idx]) partitions_agg[idx] = partitions_agg[idx].aggregate_partition() return (sum([static_modularities[idx] * weights[idx] for idx in range(len(static_modularities))]) / sum(weights.values()))
def louvain_find_partition_multiplex(graphs, partition_type, layer_weights=None, seed=None, **kwargs): """ Detect communities for multiplex graphs. Each graph should be defined on the same set of vertices, only the edges may differ for different graphs. See :func:`Optimiser.optimise_partition_multiplex` for a more detailed explanation. Parameters ---------- graphs : list of :class:`ig.Graph` List of :class:`louvain.VertexPartition` layers to optimise. partition_type : type of :class:`MutableVertexPartition` The type of partition to use for optimisation (identical for all graphs). seed : int Seed for the random number generator. By default uses a random seed if nothing is specified. **kwargs Remaining keyword arguments, passed on to constructor of ``partition_type``. Returns ------- list of int membership of nodes. float Improvement in quality of combined partitions, see :func:`Optimiser.optimise_partition_multiplex`. Notes ----- We don't return a partition in this case because a partition is always defined on a single graph. We therefore simply return the membership (which is the same for all layers). See Also -------- :func:`Optimiser.optimise_partition_multiplex` :func:`slices_to_layers` Examples -------- >>> n = 100 >>> G_1 = ig.Graph.Lattice([n], 1) >>> G_2 = ig.Graph.Lattice([n], 1) >>> membership, improvement = louvain.find_partition_multiplex([G_1, G_2], ... louvain.ModularityVertexPartition) """ n_layers = len(graphs) partitions = [] if (layer_weights is None): layer_weights = [1] * n_layers for graph in graphs: partitions.append(partition_type(graph, **kwargs)) optimiser = louvain.Optimiser() if (not seed is None): optimiser.set_rng_seed(seed) improvement = optimiser.optimise_partition_multiplex( partitions, layer_weights) return partitions[0].membership, improvement
def louvain( data: AnnData, rep: str = "pca", resolution: int = 1.3, random_state: int = 0, class_label: str = "louvain_labels", ) -> None: """Cluster the cells using Louvain algorithm. Parameters ---------- data: ``anndata.AnnData`` Annotated data matrix with rows for cells and columns for genes. rep: ``str``, optional, default: ``"pca"`` The embedding representation used for clustering. Keyword ``'X_' + rep`` must exist in ``data.obsm``. By default, use PCA coordinates. resolution: ``int``, optional, default: ``1.3`` Resolution factor. Higher resolution tends to find more clusters with smaller sizes. random_state: ``int``, optional, default: ``0`` Random seed for reproducing results. class_label: ``str``, optional, default: ``"louvain_labels"`` Key name for storing cluster labels in ``data.obs``. Returns ------- ``None`` Update ``data.obs``: * ``data.obs[class_label]``: Cluster labels of cells as categorical data. Examples -------- >>> pg.louvain(adata) """ start = time.time() rep_key = "W_" + rep if rep_key not in data.uns: raise ValueError("Cannot find affinity matrix. Please run neighbors first!") W = data.uns[rep_key] G = construct_graph(W) partition_type = louvain_module.RBConfigurationVertexPartition partition = partition_type(G, resolution_parameter=resolution, weights="weight") optimiser = louvain_module.Optimiser() optimiser.set_rng_seed(random_state) diff = optimiser.optimise_partition(partition) labels = np.array([str(x + 1) for x in partition.membership]) categories = natsorted(np.unique(labels)) data.obs[class_label] = pd.Categorical(values=labels, categories=categories) end = time.time() logger.info("Louvain clustering is done. Time spent = {:.2f}s.".format(end - start))
def louvain(i, j, val, dim, partition_method, initial_membership, weights, resolution, node_sizes, seed, verbose): import louvain import igraph as ig import numpy from scipy.sparse import csc_matrix data = csc_matrix((val, (i, j)), shape=dim) # vcount = max(data.shape) sources, targets = data.nonzero() edgelist = zip(sources.tolist(), targets.tolist()) G = ig.Graph(edges=list(edgelist)) # G = ig.Graph.Adjacency(data.tolist()) if partition_method == 'ModularityVertexPartition': partition = louvain.CPMVertexPartition( G, initial_membership=initial_membership, weights=weights) elif partition_method == 'RBConfigurationVertexPartition': partition = louvain.CPMVertexPartition( G, initial_membership=initial_membership, weights=weights, resolution_parameter=resolution) elif partition_method == 'RBERVertexPartition': partition = louvain.CPMVertexPartition( G, initial_membership=initial_membership, weights=weights, node_sizes=node_sizes, resolution_parameter=resolution) elif partition_method == 'CPMVertexPartition': partition = louvain.CPMVertexPartition( G, initial_membership=initial_membership, weights=weights, node_sizes=node_sizes, resolution_parameter=resolution) elif partition_method == 'SignificanceVertexPartition': partition = louvain.CPMVertexPartition( G, initial_membership=initial_membership, node_sizes=node_sizes) elif partition_method == 'SurpriseVertexPartition': partition = louvain.CPMVertexPartition( G, initial_membership=initial_membership, weights=weights, node_sizes=node_sizes) else: raise ValueError('partition_method ' + partition_method + ' is NOT supported.') if seed != None: louvain.set_rng_seed(seed) optimiser = louvain.Optimiser() diff = optimiser.optimise_partition(partition) # ig.plot(partition) return partition
def louvain_hierarchy_output(partition): optimiser = louvain.Optimiser() partition_agg = partition.aggregate_partition() partition_layers = [] while optimiser.move_nodes(partition_agg) > 0: partition.from_coarse_partition(partition_agg) partition_agg = partition_agg.aggregate_partition() partition_layers.append(list(partition)) return partition_layers
def multilayer_louvain(G_intralayer, G_interlayer, layer_vec, gamma, omega, optimiser=None, return_partition=False): r"""Run the Louvain modularity maximization algorithm at a single (:math:`\gamma, \omega`) value. :param G_intralayer: intralayer graph of interest :type G_intralayer: igraph.Graph :param G_interlayer: interlayer graph of interest :type G_interlayer: igraph.Graph :param layer_vec: list of each vertex's layer membership :type layer_vec: list[int] :param gamma: gamma (intralayer resolution parameter) to run Louvain at :type gamma: float :param omega: omega (interlayer resolution parameter) to run Louvain at :type omega: float :param optimiser: if not None, use passed-in (potentially custom) louvain optimiser :type optimiser: louvain.Optimiser :param return_partition: if True, return a louvain partition. Otherwise, return a community membership tuple :type return_partition: bool :return: partition from louvain :rtype: tuple[int] or louvain.RBConfigurationVertexPartitionWeightedLayers """ # RBConfigurationVertexPartitionWeightedLayers implements a multilayer version of "standard" modularity (i.e. # the Reichardt and Bornholdt's Potts model with configuration null model). check_multilayer_louvain_capabilities() if 'weight' not in G_intralayer.es: G_intralayer.es['weight'] = [1.0] * G_intralayer.ecount() if 'weight' not in G_interlayer.es: G_interlayer.es['weight'] = [1.0] * G_interlayer.ecount() if optimiser is None: optimiser = louvain.Optimiser() intralayer_part = louvain.RBConfigurationVertexPartitionWeightedLayers( G_intralayer, layer_vec=layer_vec, weights='weight', resolution_parameter=gamma) interlayer_part = louvain.CPMVertexPartition(G_interlayer, resolution_parameter=0.0, weights='weight') optimiser.optimise_partition_multiplex([intralayer_part, interlayer_part], layer_weights=[1, omega]) if return_partition: return intralayer_part else: return tuple(intralayer_part.membership)
def louvain_multiplex(graphs, partition_type, interslice_weight, resolution_parameter): layers, interslice_layer, G_full = louvain.time_slices_to_layers( graphs, vertex_id_attr='name', interslice_weight=interslice_weight) partitions = [partition_type(H, resolution_parameter) for H in layers] interslice_partition = partition_type(interslice_layer, resolution_parameter, weights='weight') optimiser = louvain.Optimiser() optimiser.optimise_partition_multiplex(partitions + [interslice_partition]) quality = sum( [p.quality() for p in partitions + [interslice_partition]]) return partitions[0], quality
def test_multilayer_louvain(): intraslice = ig.Graph.Read_Ncol("multilayer_SBM_intraslice_edges.csv", directed=False) interslice = ig.Graph.Read_Ncol("multilayer_SBM_interslice_edges.csv", directed=False) n_layers = 4 n = intraslice.vcount() // n_layers layer_vec = np.array([i // n for i in range(n * n_layers)]) intraslice.es['weight'] = 1.0 intralayer_part = louvain.RBConfigurationVertexPartitionWeightedLayers( intraslice, resolution_parameter=1.0, layer_vec=layer_vec, weights='weight') for omega in np.linspace(0.5, 1.5, 10): interslice.es['weight'] = omega interlayer_part = louvain.RBConfigurationVertexPartition( interslice, resolution_parameter=0.0, weights='weight') opt = louvain.Optimiser() opt.optimise_partition_multiplex( partitions=[intralayer_part, interlayer_part]) louvain_mod = intralayer_part.quality( resolution_parameter=1.0) + interlayer_part.quality() A = np.array(intraslice.get_adjacency()._get_data()) C = omega * np.array(interslice.get_adjacency()._get_data()) P = np.zeros((n_layers * n, n_layers * n)) for i in range(n_layers): c_degrees = np.array( intraslice.degree(list(range(n * i, n * i + n)))) c_inds = np.where(layer_vec == i)[0] P[np.ix_(c_inds, c_inds)] = np.outer( c_degrees, c_degrees.T) / (1.0 * np.sum(c_degrees)) membership = np.array(intralayer_part.membership) true_mod = sum( calculate_coefficient(membership, X) for X in (A, -P, C)) assert isclose( louvain_mod, true_mod ), "WeightedLayers quality() inconsistent with alternate calculation"
def layer_partition(): sub_g = get_subgraph(node_lists = ['1384', '3762', '1493', '3767', '1762', '7364'], depth=0) #8175 #sub_g = get_subgraph(node_lists = ['8175', '8008'], depth=1) graphml_path = os.path.join(VIS_DATA_DIR, 'song-tmp.graphml') nx.write_graphml(sub_g, graphml_path) G = ig.Graph.Read_GraphML(graphml_path) G_pos = G.subgraph_edges(G.es.select(weight_gt = 0), delete_vertices=False) G_neg = G.subgraph_edges(G.es.select(weight_lt = 0), delete_vertices=False) G_neg.es['weight'] = [-w for w in G_neg.es['weight']] part_pos = louvain.ModularityVertexPartition(G_pos, weights='weight') part_neg = louvain.ModularityVertexPartition(G_neg, weights='weight') optimiser = louvain.Optimiser() part_pos = louvain.ModularityVertexPartition(G_pos, weights='weight') part_neg = louvain.ModularityVertexPartition(G_neg, weights='weight') diff = optimiser.optimise_partition_multiplex([part_pos, part_neg],layer_weights=[1,-1]) # while diff > 0: # diff = optimiser.optimise_partition_multiplex([part_pos, part_neg],layer_weights=[1,-1]) # print(diff) # print(part_neg) # print(part_pos) # for v in G.vs: # print(v.index, v["label"]) # print(dir(part_pos), part_pos.membership) print(dir(part_pos)) print(part_pos.summary()) print(part_pos.modularity, part_pos.q, part_pos) node_partition = {} for v in G.vs: node_partition[v["label"]] = v.index node_partition2 = {} memberships = [i for i in part_pos.membership] assert len(memberships) == len(node_partition) for i in node_partition: # if node_partition[i] == 0: # print(i) node_partition2[i] = memberships[node_partition[i]] # print(node_partition2) gaints = ['1384', '3762', '1493', '3767', '1762', '7364'] gaints_name = ['歐陽修','蘇洵','蘇轍','蘇軾','王安石','曾鞏'] for gaint, name in zip(gaints, gaints_name): print(node_partition2[gaint], gaint, name)
def run_approximated_louvain(data, rep_key, n_jobs = 1, resolution = 1.3, random_state = 0, n_clusters = 30, n_init = 20, class_label = 'approx_louvain_labels'): start = time.time() X = data.obsm[rep_key].astype('float64') np.random.seed(random_state) seeds = np.random.randint(np.iinfo(np.int32).max, size = n_init) old_n = set_numpy_thread(1) threads = [None] * n_jobs results = [None] * n_jobs for i in range(n_jobs): t = threading.Thread(target=run_one_instance_of_kmeans, args=(i, results, n_init, n_clusters, n_jobs, X, seeds)) threads[i] = t t.start() for i in range(n_jobs): threads[i].join() set_numpy_thread(old_n) labels = list(zip(*[x for y in results for x in y])) uniqs = np.unique(labels, axis = 0) transfer_dict = {tuple(k):v for k, v in zip(uniqs, range(uniqs.shape[0]))} labels = [transfer_dict[x] for x in labels] G = construct_graph(data.uns['W_norm']) partition = louvain.RBConfigurationVertexPartition(G, resolution_parameter = resolution, initial_membership = labels) partition_agg = partition.aggregate_partition() optimiser = louvain.Optimiser() optimiser.set_rng_seed(random_state) diff = optimiser.optimise_partition(partition_agg) partition.from_coarse_partition(partition_agg) labels = np.array([str(x + 1) for x in partition.membership]) categories = natsorted(np.unique(labels)) data.obs[class_label] = pd.Categorical(values = labels, categories = categories) end = time.time() print("Approximated Louvain clustering is done. Time spent = {:.2f}s.".format(end - start))
def layer_partition(sub_g): graphml_path = os.path.join(VIS_DATA_DIR, 'song-tmp.graphml') nx.write_graphml(sub_g, graphml_path) G = ig.Graph.Read_GraphML(graphml_path) G_pos = G.subgraph_edges(G.es.select(weight_gt=0), delete_vertices=False) G_neg = G.subgraph_edges(G.es.select(weight_lt=0), delete_vertices=False) G_neg.es['weight'] = [-w for w in G_neg.es['weight']] part_pos = louvain.ModularityVertexPartition(G_pos, weights='weight') part_neg = louvain.ModularityVertexPartition(G_neg, weights='weight') optimiser = louvain.Optimiser() part_pos = louvain.ModularityVertexPartition(G_pos, weights='weight') part_neg = louvain.ModularityVertexPartition(G_neg, weights='weight') diff = optimiser.optimise_partition_multiplex([part_pos, part_neg], layer_weights=[1, -1]) # while diff > 0: # diff = optimiser.optimise_partition_multiplex([part_pos, part_neg],layer_weights=[1,-1]) # print(diff) # print(part_neg) # print(part_pos) # for v in G.vs: # print(v.index, v["label"]) # print(dir(part_pos), part_pos.membership) # print(dir(part_pos)) # print(part_pos.summary()) # print(part_pos.modularity, part_pos.q, part_pos) node_partition = {} for v in G.vs: node_partition[v["label"]] = v.index node_partition2 = {} memberships = [i for i in part_pos.membership] assert len(memberships) == len(node_partition) for i in node_partition: node_partition2[i] = memberships[node_partition[i]] return node_partition2
def run_louvain(data, affinity = 'W_norm', resolution = 1.3, random_state = 0): start = time.time() W = None if affinity == 'W_norm': W = data.uns['W_norm'] elif affinity == 'W_diffmap': W = calculate_affinity_matrix(data.uns['diffmap_knn_indices'], data.uns['diffmap_knn_distances']) else: W_diffmap = calculate_affinity_matrix(data.uns['diffmap_knn_indices'], data.uns['diffmap_knn_distances']) W, diag_tmp, diag_half_tmp = calculate_normalized_affinity(W_diffmap) G = construct_graph(W) partition = louvain.RBConfigurationVertexPartition(G, resolution_parameter = resolution) optimiser = louvain.Optimiser() optimiser.set_rng_seed(random_state) diff = optimiser.optimise_partition(partition) labels = np.array([str(x + 1) for x in partition.membership]) categories = natsorted(np.unique(labels)) data.obs[aff2lab[affinity]] = pd.Categorical(values = labels, categories = categories) end = time.time() print("Louvain clustering is done. Time spent = {:.2f}s.".format(end - start))
def _cluster(self, aData, resolution, clusterMin=10, clusteringAlgorithm='leiden' ) -> Tuple[pd.DataFrame, pd.DataFrame]: """ Performs the clustering. This function is a little more complicated than strictly necessary because it preserves the information about the cluster label of each cell during the iterations of the modularity optimization. The final result where global modularity has been optimized is saved in the task's output subdir, whereas the iteration results are saved in output/iterations. It is sometimes useful to expore the cluster labels of cells from modularities prior to steady state, as they generally reflect coherent groupings that are more granular than the final assignments. Args: aData: anndata object to use for clustering resolution: resolution for modularity calculation clusterMin: minimum number of cells that must be in a cluster to keep that cluster clusteringAlgorithm: choice of algorithm to use for modularity optimization, currently leiden and louvain are supported Returns: a tuple of dataframes, first is a dataframe containig the cluster labels from all rounds of modularity optimization, second is just the final round of optimization. Index is always cell id """ g = Neighbors(aData).to_igraph() if clusteringAlgorithm == 'louvain': import louvain as clAlgo print('using louvain algorithm') elif clusteringAlgorithm == 'leiden': import leidenalg as clAlgo print('using leiden algorithm') optimiser = clAlgo.Optimiser() tracking = [] partition = clAlgo.RBConfigurationVertexPartition( g, weights='weight', resolution_parameter=resolution) partition_agg = partition.aggregate_partition() print(partition.summary()) diff = optimiser.move_nodes(partition_agg) while diff > 0.0: partition.from_coarse_partition(partition_agg) partition_agg = partition_agg.aggregate_partition() tracking.append(partition.membership) print(partition_agg.summary()) diff = optimiser.move_nodes(partition_agg) df = pd.DataFrame(tracking, columns=aData.obs.index).T clusteringOutput = df.iloc[:, [-1]].copy(deep=True) colLabel = 'kValue_{}_resolution_{}'.format(self.kValue, int(self.resolution)) clusteringOutput.columns = [colLabel] clusteringOutputGrouped = clusteringOutput.groupby(colLabel).size() toZero = clusteringOutputGrouped[ clusteringOutputGrouped < int(clusterMin)].index.values.tolist() mask = clusteringOutput[colLabel].isin(toZero) clusteringOutput[colLabel] = clusteringOutput[colLabel].where(~mask, other=-1) print('Clustering yields {} clusters with at least {} cells'.format( clusteringOutput[colLabel].unique().astype(int).max(), clusterMin)) return df, clusteringOutput
def run_louvain(graph, config_model='Default', overlap=False, directed=False, deep=False, interslice_weight=0.1, resolution_parameter=0.1, seed=None): """ :outdir: the output directory to comprehend the output link file :param graph: input file :param config_model: 'RB', 'RBER', 'CPM', 'Surprise', 'Significance' :param overlap: bool, whether to enable overlapping community detection :param directed :param deep :param interslice_weight :param resolution_parameter :return """ if seed != None: louvain.set_rng_seed(seed) def louvain_hierarchy_output(partition): optimiser = louvain.Optimiser() partition_agg = partition.aggregate_partition() partition_layers = [] while optimiser.move_nodes(partition_agg) > 0: partition.from_coarse_partition(partition_agg) partition_agg = partition_agg.aggregate_partition() partition_layers.append(list(partition)) return partition_layers def louvain_multiplex(graphs, partition_type, interslice_weight, resolution_parameter): layers, interslice_layer, G_full = louvain.time_slices_to_layers( graphs, vertex_id_attr='name', interslice_weight=interslice_weight) if partition_type == louvain.ModularityVertexPartition: partitions = [partition_type(H) for H in layers] interslice_partition = partition_type(interslice_layer, weights='weight') else: partitions = [ partition_type(H, resolution_parameter=resolution_parameter) for H in layers ] interslice_partition = partition_type( interslice_layer, resolution_parameter=resolution_parameter, weights='weight') optimiser = louvain.Optimiser() optimiser.optimise_partition_multiplex(partitions + [interslice_partition]) quality = sum( [p.quality() for p in partitions + [interslice_partition]]) return partitions[0], quality def partition_to_clust(graphs, partition, min_size_cut=2): clusts = [] node_names = [] if not isinstance(graphs, list): graphs = [graphs] for g in graphs: node_names.extend(g.vs['name']) for i in range(len(partition)): clust = [node_names[id] for id in partition[i]] clust = list(set(clust)) if len(clust) < min_size_cut: continue clust.sort() clusts.append(clust) clusts = sorted(clusts, key=lambda x: len(x), reverse=True) return clusts multi = False if isinstance(graph, list): multi = True if overlap == True and multi == False: multi = True net = graph graph = [] for i in range(4): graph.append(net) if multi == True and deep == True: sys.stderr.write( 'louvain does not support hierarchical clustering with overlapped communities' ) sys.exit() if config_model == 'RB': partition_type = louvain.RBConfigurationVertexPartition elif config_model == 'RBER': partition_type = louvain.RBERConfigurationVertexPartition elif config_model == 'CPM': partition_type = louvain.CPMVertexPartition elif config_model == 'Surprise': partition_type = louvain.SurpriseVertexPartition elif config_model == "Significance": partition_type = louvain.SignificanceVertexPartition else: sys.stderr.write("Not specifying the configuration model; " "perform simple Louvain.") partition_type = louvain.ModularityVertexPartition weighted = False if multi: wL = [] G = [] for file in graph: with open(file, 'r') as f: lines = f.read().splitlines() elts = lines[0].split() if len(elts) == 3: weighted = True else: weighted = False for i in range(len(lines)): elts = lines[i].split() for j in range(2): elts[j] = int(elts[j]) if weighted == True: elts[2] = float(elts[2]) if elts[2] < 0: sys.stderr.write("negative edge weight not allowed") return 1 lines[i] = tuple(elts) g = igraph.Graph.TupleList(lines, directed=directed, weights=weighted) G.append(g) wL.append(weighted) f.close() if True in wL and False in wL: raise Exception('all graphs should follow the same format') if partition_type == louvain.CPMVertexPartition and directed is True: raise Exception('graph for CPMVertexPartition must be undirected') if partition_type == louvain.SignificanceVertexPartition and weighted is True: raise Exception('SignificanceVertexPartition only support ' 'unweighted graphs') partition, quality = louvain_multiplex(G, partition_type, interslice_weight, resolution_parameter) else: with open(graph, 'r') as f: lines = f.read().splitlines() elts = lines[0].split() if len(elts) == 3: weighted = True else: weighted = False for i in range(len(lines)): elts = lines[i].split() for j in range(2): elts[j] = int(elts[j]) if weighted is True: elts[2] = float(elts[2]) if elts[2] < 0: sys.stderr.write("negative edge weight not allowed") return 1 lines[i] = tuple(elts) f.close() G = igraph.Graph.TupleList(lines, directed=directed, weights=weighted) if weighted is False: weights = None else: weights = G.es['weight'] if partition_type == louvain.ModularityVertexPartition: partition = partition_type(G, weights=weights) else: partition = partition_type( G, weights=weights, resolution_parameter=resolution_parameter) if deep == False: optimiser = louvain.Optimiser() optimiser.optimise_partition(partition) if deep == False: clusts = partition_to_clust(G, partition) if len(clusts) == 0: sys.stderr.write( "No cluster; Resolution parameter may be too extreme") return 1 maxNode = 0 for clust in clusts: maxNode = max(maxNode, max(clust)) for i in range(len(clusts)): sys.stdout.write( str(maxNode + len(partition) + 1) + ',' + str(maxNode + i + 1) + ',' + 'c-c' + ';') for n in clusts[i]: sys.stdout.write( str(maxNode + i + 1) + ',' + str(n) + ',' + 'c-m' + ';') else: partitions = louvain_hierarchy_output(partition) clusts_layers = [] for p in partitions: clusts_layers.append(partition_to_clust(G, p)) if len(clusts_layers[0]) == 0: sys.stderr.write( "No cluster; Resolution parameter may be too extreme") return 1 maxNode = 0 for clust in clusts_layers[0]: maxNode = max(maxNode, max(clust)) for i in range(len(clusts_layers[0])): for n in clusts_layers[0][i]: sys.stdout.write( str(maxNode + i + 1) + ',' + str(n) + ',' + 'c-m' + ';') maxNode = maxNode + len(clusts_layers[0]) for i in range(1, len(clusts_layers)): for j in range(len(clusts_layers[i - 1])): for k in range(len(clusts_layers[i])): if all(x in clusts_layers[i][k] for x in clusts_layers[i - 1][j]): sys.stdout.write( str(maxNode + k + 1) + ',' + str(maxNode - len(clusts_layers[i - 1]) + j + 1) + ',' + 'c-c' + ';') break maxNode = maxNode + len(clusts_layers[i]) for i in range(len(clusts_layers[-1])): sys.stdout.write( str(maxNode + 1) + ',' + str(maxNode - len(clusts_layers[-1]) + i + 1) + ',' + 'c-c' + ';') sys.stdout.flush() return 0
def setUp(self): self.optimiser = louvain.Optimiser()
def run_alg(Gs, alg, gamma=1.0, sample=1.0, layer_weights=None): ''' Run community detection algorithm with a resolution parameter. Right now only use RB in Louvain/Leiden Parameters ---------- Gs : a list of igraph.Graph alg : str choose between 'louvain' and 'leiden' gamma : float resolution parameter sample : if smaller than 1, randomly delete a fraction of edges each time layer_weights: a list of float specifying layer weights in the multilayer setting Returns ------ C: scipy.sparse.csr_matrix a matrix recording the membership of each cluster ''' if len(Gs) == 1: G = Gs[0] G1 = G.copy() if sample < 1: G1 = network_perturb(G, sample) if alg == 'louvain': partition_type = louvain.RBConfigurationVertexPartition partition = louvain.find_partition(G1, partition_type, resolution_parameter=gamma) elif alg == 'leiden': partition_type = leidenalg.RBConfigurationVertexPartition partition = leidenalg.find_partition(G1, partition_type, resolution_parameter=gamma) partitions = [partition] else: # multiplex mode if layer_weights == None: layer_weights = [1.0 for _ in Gs] assert len(layer_weights) == len( Gs), 'layer weights inconsistent with the number of input networks' Gs1 = [G.copy() for G in Gs] if sample < 1: Gs1 = [network_perturb(G, sample) for G in Gs] if alg == 'louvain': partition_type = louvain.RBConfigurationVertexPartition optimiser = louvain.Optimiser() partitions = [ partition_type(G, resolution_parameter=gamma) for G in Gs1 ] _ = optimiser.optimise_partition_multiplex( partitions, layer_weights=layer_weights) elif alg == 'leiden': partition_type = leidenalg.RBConfigurationVertexPartition # partition = leidenalg.find_partition_multiplex(Gs1, partition_type, resolution_parameter=gamma, # layer_weights=layer_weights) optimiser = leidenalg.Optimiser() partitions = [ partition_type(G, resolution_parameter=gamma) for G in Gs1 ] _ = optimiser.optimise_partition_multiplex( partitions, n_iterations=-1, layer_weights=layer_weights ) # -1 means iterate until no further optimization # print([len(p) for p in partitions]) # debug # partition = sorted(partition, key=len, reverse=True) LOGGER.info('Resolution: {:.4f}; find {} clusters'.format( gamma, len(partitions[0]))) return partition_to_membership_matrix(partitions[0])
def spectral_louvain( data: AnnData, rep: str = "pca", resolution: float = 1.3, rep_kmeans: str = "diffmap", n_clusters: int = 30, n_clusters2: int = 50, n_init: int = 10, n_jobs: int = -1, random_state: int = 0, class_label: str = "spectral_louvain_labels", ) -> None: """ Cluster the data using Spectral Louvain algorithm. Parameters ---------- data: ``anndata.AnnData`` Annotated data matrix with rows for cells and columns for genes. rep: ``str``, optional, default: ``"pca"`` The embedding representation used for clustering. Keyword ``'X_' + rep`` must exist in ``data.obsm``. By default, use PCA coordinates. resolution: ``int``, optional, default: ``1.3`` Resolution factor. Higher resolution tends to find more clusters with smaller sizes. rep_kmeans: ``str``, optional, default: ``"diffmap"`` The embedding representation on which the KMeans runs. Keyword must exist in ``data.obsm``. By default, use Diffusion Map coordinates. If diffmap is not calculated, use PCA coordinates instead. n_clusters: ``int``, optional, default: ``30`` The number of first level clusters. n_clusters2: ``int``, optional, default: ``50`` The number of second level clusters. n_init: ``int``, optional, default: ``10`` Number of kmeans tries for the first level clustering. Default is set to be the same as scikit-learn Kmeans function. n_jobs: ``int``, optional, default: ``-1`` Number of threads to use. If ``-1``, use all available threads. random_state: ``int``, optional, default: ``0`` Random seed for reproducing results. class_label: ``str``, optional, default: ``"spectral_louvain_labels"`` Key name for storing cluster labels in ``data.obs``. Returns ------- ``None`` Update ``data.obs``: * ``data.obs[class_label]``: Cluster labels for cells as categorical data. Examples -------- >>> pg.spectral_louvain(adata) """ start = time.time() if "X_" + rep_kmeans not in data.obsm.keys(): logger.warning("{} is not calculated, switch to pca instead.".format(rep_kmeans)) rep_kmeans = "pca" if "X_" + rep_kmeans not in data.obsm.keys(): raise ValueError("Please run {} first!".format(rep_kmeans)) if "W_" + rep not in data.uns: raise ValueError("Cannot find affinity matrix. Please run neighbors first!") labels = partition_cells_by_kmeans( data, rep_kmeans, n_jobs, n_clusters, n_clusters2, n_init, random_state, ) W = data.uns["W_" + rep] G = construct_graph(W) partition_type = louvain_module.RBConfigurationVertexPartition partition = partition_type( G, resolution_parameter=resolution, weights="weight", initial_membership=labels ) partition_agg = partition.aggregate_partition() optimiser = louvain_module.Optimiser() optimiser.set_rng_seed(random_state) diff = optimiser.optimise_partition(partition_agg) partition.from_coarse_partition(partition_agg) labels = np.array([str(x + 1) for x in partition.membership]) categories = natsorted(np.unique(labels)) data.obs[class_label] = pd.Categorical(values=labels, categories=categories) end = time.time() logger.info( "Spectral Louvain clustering is done. Time spent = {:.2f}s.".format(end - start) )
def iterative_multilayer_resolution_parameter_estimation( G_intralayer, G_interlayer, layer_vec, gamma=1.0, omega=1.0, gamma_tol=1e-2, omega_tol=5e-2, omega_max=1000, max_iter=25, model='temporal', verbose=False): """ Multilayer variant of ALG. 1 from "Relating modularity maximization and stochastic block models in multilayer networks." The nested functions here are just used to match the pseudocode in the paper. :param G_intralayer: input graph containing all intra-layer edges :param G_interlayer: input graph containing all inter-layer edges :param layer_vec: vector of each vertex's layer membership :param gamma: starting gamma value :param omega: starting omega value :param gamma_tol: convergence tolerance for gamma :param omega_tol: convergence tolerance for omega :param max_iter: maximum number of iterations :param omega_max: maximum allowed value for omega :param model: network layer topology (temporal, multilevel, multiplex) :param verbose: whether or not to print verbose output :return: gamma, omega to which the iteration converged and the resulting partition """ if 'weight' not in G_intralayer.es: G_intralayer.es['weight'] = [1.0] * G_intralayer.ecount() G_interlayer.es['weight'] = [omega] * G_interlayer.ecount() T = max(layer_vec) + 1 # layer count optimiser = louvain.Optimiser() m_t = [0] * T for e in G_intralayer.es: m_t[layer_vec[e.source]] += e['weight'] N = G_intralayer.vcount() // T Nt = [0] * T for l in layer_vec: Nt[l] += 1 check_multilayer_graph_consistency(G_intralayer, G_interlayer, layer_vec, model, m_t, T, N, Nt) if model is 'multiplex': def update_omega(theta_in, theta_out, p, K): if theta_out == 0: return log(1 + p * K / (1 - p)) / (T * log(theta_in)) if p < 1.0 else omega_max # if p is 1, the optimal omega is infinite (here, omega_max) return log(1 + p * K / (1 - p)) / ( T * (log(theta_in) - log(theta_out))) if p < 1.0 else omega_max else: def update_omega(theta_in, theta_out, p, K): if theta_out == 0: return log(1 + p * K / (1 - p)) / (2 * log(theta_in)) if p < 1.0 else omega_max # if p is 1, the optimal omega is infinite (here, omega_max) return log(1 + p * K / (1 - p)) / ( 2 * (log(theta_in) - log(theta_out))) if p < 1.0 else omega_max # TODO: non-uniform cases # model affects SBM parameter estimation and the updating of omega if model is 'temporal': def calculate_persistence(community): # ordinal persistence return sum(community[e.source] == community[e.target] for e in G_interlayer.es) / (N * (T - 1)) elif model is 'multilevel': def calculate_persistence(community): # multilevel persistence pers_per_layer = [0] * T for e in G_interlayer.es: pers_per_layer[layer_vec[e.target]] += ( community[e.source] == community[e.target]) pers_per_layer = [pers_per_layer[l] / Nt[l] for l in range(T)] return sum(pers_per_layer) / (T - 1) elif model is 'multiplex': def calculate_persistence(community): # categorical persistence return sum(community[e.source] == community[e.target] for e in G_interlayer.es) / (N * T * (T - 1)) else: raise ValueError( "Model {} is not temporal, multilevel, or multiplex".format(model)) def maximize_modularity(intralayer_resolution, interlayer_resolution): # RBConfigurationVertexPartitionWeightedLayers implements a multilayer version of "standard" modularity (i.e. # the Reichardt and Bornholdt's Potts model with configuration null model). G_interlayer.es['weight'] = interlayer_resolution intralayer_part = \ louvain.RBConfigurationVertexPartitionWeightedLayers(G_intralayer, layer_vec=layer_vec, weights='weight', resolution_parameter=intralayer_resolution) interlayer_part = louvain.CPMVertexPartition(G_interlayer, resolution_parameter=0.0, weights='weight') optimiser.optimise_partition_multiplex( [intralayer_part, interlayer_part]) return intralayer_part def estimate_SBM_parameters(partition): K = len(partition) community = partition.membership m_t_in = [0] * T for e in G_intralayer.es: if community[e.source] == community[e.target] and layer_vec[ e.source] == layer_vec[e.target]: m_t_in[layer_vec[e.source]] += e['weight'] kappa_t_r_list = [[0] * K for _ in range(T)] for e in G_intralayer.es: layer = layer_vec[e.source] kappa_t_r_list[layer][community[e.source]] += e['weight'] kappa_t_r_list[layer][community[e.target]] += e['weight'] sum_kappa_t_sqr = [ sum(x**2 for x in kappa_t_r_list[t]) for t in range(T) ] theta_in = sum(2 * m_t_in[t] for t in range(T)) / sum(sum_kappa_t_sqr[t] / (2 * m_t[t]) for t in range(T)) # guard for div by zero with single community partition theta_out = sum(2 * m_t[t] - 2 * m_t_in[t] for t in range(T)) / \ sum(2 * m_t[t] - sum_kappa_t_sqr[t] / (2 * m_t[t]) for t in range(T)) if K > 1 else 0 pers = calculate_persistence(community) if model is 'multiplex': # estimate p by solving polynomial root-finding problem with starting estimate p=0.5 def f(x): coeff = 2 * (1 - 1 / K) / (T * (T - 1)) return coeff * sum( (T - n) * x**n for n in range(1, T)) + 1 / K - pers # guard for div by zero with single community partition # (in this case, all community assignments persist across layers) p = fsolve(f, np.array([0.5]))[0] if pers < 1.0 and K > 1 else 1.0 else: # guard for div by zero with single community partition # (in this case, all community assignments persist across layers) p = max( (K * pers - 1) / (K - 1), 0) if pers < 1.0 and K > 1 else 1.0 return theta_in, theta_out, p, K def update_gamma(theta_in, theta_out): if theta_out == 0: return theta_in / log(theta_in) return (theta_in - theta_out) / (log(theta_in) - log(theta_out)) part, K, last_gamma, last_omega = (None, ) * 4 for iteration in range(max_iter): part = maximize_modularity(gamma, omega) theta_in, theta_out, p, K = estimate_SBM_parameters(part) if theta_in == 0 or theta_in == 1: raise ValueError( "gamma={:.3f}, omega={:.3f} resulted in degenerate partition". format(gamma, omega)) if not 0.0 <= p <= 1.0: raise ValueError( "gamma={:.3f}, omega={:.3f} resulted in impossible estimate p={:.3f}" "".format(gamma, omega, p)) last_gamma, last_omega = gamma, omega gamma = update_gamma(theta_in, theta_out) omega = update_omega(theta_in, theta_out, p, K) if verbose: print( "Iter {:>2}: {} communities with Q={:.3f}, gamma={:.3f}->{:.3f}, omega={:.3f}->{:.3f}, and p={:.3f}" "".format(iteration, K, part.q, last_gamma, gamma, last_omega, omega, p)) if abs(gamma - last_gamma) < gamma_tol and abs(omega - last_omega) < omega_tol: break # gamma and omega converged else: if verbose: print( "Parameters failed to converge within {} iterations. " "Final move of ({:.3f}, {:.3f}) was not within tolerance ({}, {})" "".format(max_iter, abs(gamma - last_gamma), abs(omega - last_omega), gamma_tol, omega_tol)) if verbose: print("Returned {} communities with Q={:.3f}, gamma={:.3f}, " "and omega={:.3f}".format(K, part.q, gamma, omega)) return gamma, omega, part
def spectral_louvain( data: MultimodalData, rep: str = "pca", resolution: float = 1.3, rep_kmeans: str = "diffmap", n_clusters: int = 30, n_clusters2: int = 50, n_init: int = 10, n_jobs: int = -1, random_state: int = 0, class_label: str = "spectral_louvain_labels", ) -> None: """ Cluster the data using Spectral Louvain algorithm. [Li20]_ Parameters ---------- data: ``pegasusio.MultimodalData`` Annotated data matrix with rows for cells and columns for genes. rep: ``str``, optional, default: ``"pca"`` The embedding representation used for clustering. Keyword ``'X_' + rep`` must exist in ``data.obsm``. By default, use PCA coordinates. resolution: ``int``, optional, default: ``1.3`` Resolution factor. Higher resolution tends to find more clusters with smaller sizes. rep_kmeans: ``str``, optional, default: ``"diffmap"`` The embedding representation on which the KMeans runs. Keyword must exist in ``data.obsm``. By default, use Diffusion Map coordinates. If diffmap is not calculated, use PCA coordinates instead. n_clusters: ``int``, optional, default: ``30`` The number of first level clusters. n_clusters2: ``int``, optional, default: ``50`` The number of second level clusters. n_init: ``int``, optional, default: ``10`` Number of kmeans tries for the first level clustering. Default is set to be the same as scikit-learn Kmeans function. n_jobs : `int`, optional (default: -1) Number of threads to use for the KMeans step. -1 refers to using all physical CPU cores. random_state: ``int``, optional, default: ``0`` Random seed for reproducing results. class_label: ``str``, optional, default: ``"spectral_louvain_labels"`` Key name for storing cluster labels in ``data.obs``. Returns ------- ``None`` Update ``data.obs``: * ``data.obs[class_label]``: Cluster labels for cells as categorical data. Examples -------- >>> pg.spectral_louvain(data) """ try: import louvain as louvain_module except ImportError: import sys logger.error( "Need louvain! Try 'pip install louvain' or 'conda install -c conda-forge louvain'." ) sys.exit(-1) if f"X_{rep_kmeans}" not in data.obsm.keys(): logger.warning( f"{rep_kmeans} is not calculated, switch to pca instead.") rep_kmeans = "pca" if f"X_{rep_kmeans}" not in data.obsm.keys(): raise ValueError(f"Please run {rep_kmeans} first!") if f"W_{rep}" not in data.obsp: raise ValueError( "Cannot find affinity matrix. Please run neighbors first!") labels = partition_cells_by_kmeans( data.obsm[f"X_{rep_kmeans}"], n_clusters, n_clusters2, n_init, n_jobs, random_state, ) W = data.obsp[f"W_{rep}"] G = construct_graph(W) partition_type = louvain_module.RBConfigurationVertexPartition partition = partition_type(G, resolution_parameter=resolution, weights="weight", initial_membership=labels) partition_agg = partition.aggregate_partition() optimiser = louvain_module.Optimiser() optimiser.set_rng_seed(random_state) diff = optimiser.optimise_partition(partition_agg) partition.from_coarse_partition(partition_agg) labels = np.array([str(x + 1) for x in partition.membership]) categories = natsorted(np.unique(labels)) data.obs[class_label] = pd.Categorical(values=labels, categories=categories) data.register_attr(class_label, "cluster") n_clusters = data.obs[class_label].cat.categories.size logger.info( f"Spectral Louvain clustering is done. Get {n_clusters} clusters.")
def run_louvain_multilayer(intralayer_graph, interlayer_graph, layer_vec, weight='weight', resolution=1.0, omega=1.0, nruns=1): logging.debug('Shuffling node ids') t = time() mu = np.sum(intralayer_graph.es[weight]) + interlayer_graph.ecount() use_RBCweighted = hasattr(louvain, 'RBConfigurationVertexPartitionWeightedLayers') outparts = [] for run in range(nruns): rand_perm = list(np.random.permutation(interlayer_graph.vcount())) # rand_perm = list(range(interlayer_graph.vcount())) rperm = rev_perm(rand_perm) interslice_layer_rand = interlayer_graph.permute_vertices(rand_perm) rlayer_vec = permute_vector(rand_perm, layer_vec) rintralayer_graph = intralayer_graph.permute_vertices(rand_perm) # if use_RBCweighted: rlayers = [ intralayer_graph ] # one layer representing all intralayer connections here else: rlayers = _create_multilayer_igraphs_from_super_adj_igraph( rintralayer_graph, layer_vec=rlayer_vec) logging.debug('time: {:.4f}'.format(time() - t)) t = time() #create the partition objects layer_partition_objs = [] logging.debug('creating partition objects') t = time() for i, layer in enumerate( rlayers): #these are the shuffled igraph slice objects try: res = resolution[i] except: res = resolution if use_RBCweighted: cpart = louvain.RBConfigurationVertexPartitionWeightedLayers( layer, layer_vec=rlayer_vec, weights=weight, resolution_parameter=res) else: #This creates individual VertexPartition for each layer. Much slower to optimize. cpart = louvain.RBConfigurationVertexPartition( layer, weights=weight, resolution_parameter=res) layer_partition_objs.append(cpart) coupling_partition = louvain.RBConfigurationVertexPartition( interslice_layer_rand, weights=weight, resolution_parameter=0) all_layer_partobjs = layer_partition_objs + [coupling_partition] optimiser = louvain.Optimiser() logging.debug('time: {:.4f}'.format(time() - t)) logging.debug('running optimiser') t = time() layer_weights = [1] * len(rlayers) + [omega] improvement = optimiser.optimise_partition_multiplex( all_layer_partobjs, layer_weights=layer_weights) #the membership for each of the partitions is tied together. finalpartition = permute_vector(rperm, all_layer_partobjs[0].membership) reversed_partobj = [] #go back and reverse the graphs associated with each of the partobj. this allows for properly calculating exp edges with partobj #This is not ideal. Could we just reverse the permutation? for layer in layer_partition_objs: if use_RBCweighted: reversed_partobj.append( louvain.RBConfigurationVertexPartitionWeightedLayers( graph=layer.graph.permute_vertices(rperm), initial_membership=finalpartition, weights=weight, layer_vec=layer_vec, resolution_parameter=layer.resolution_parameter)) else: reversed_partobj.append( louvain.RBConfigurationVertexPartition( graph=layer.graph.permute_vertices(rperm), initial_membership=finalpartition, weights=weight, resolution_parameter=layer.resolution_parameter)) coupling_partition_rev = louvain.RBConfigurationVertexPartition( graph=coupling_partition.graph.permute_vertices(rperm), initial_membership=finalpartition, weights=weight, resolution_parameter=0) #use only the intralayer part objs A = _get_sum_internal_edges_from_partobj_list(reversed_partobj, weight=weight) if use_RBCweighted: #should only one partobj here representing all layers P = get_expected_edges_ml(reversed_partobj[0], layer_vec=layer_vec, weight=weight) else: P = _get_sum_expected_edges_from_partobj_list(reversed_partobj, weight=weight) C = get_sum_internal_edges(coupling_partition_rev, weight=weight) outparts.append({'partition': np.array(finalpartition), 'resolution': resolution, 'coupling':omega, 'orig_mod': (.5/mu)*(_get_modularity_from_partobj_list(reversed_partobj)\ +omega*coupling_partition_rev.quality()), 'int_edges': A, 'exp_edges': P, 'int_inter_edges':C}) logging.debug('time: {:.4f}'.format(time() - t)) return outparts
def iterative_multilayer_resolution_parameter_estimation( G_intralayer, G_interlayer, layer_vec, gamma=1.0, omega=1.0, gamma_tol=1e-2, omega_tol=5e-2, omega_max=1000, max_iter=25, model='temporal', verbose=False): """ Multilayer variant of ALG. 1 from "Relating modularity maximization and stochastic block models in multilayer networks." The nested functions here are just used to match the pseudocode in the paper. :param G_intralayer: intralayer graph of interest :type G_intralayer: igraph.Graph :param G_interlayer: interlayer graph of interest :type G_interlayer: igraph.Graph :param layer_vec: list of each vertex's layer membership :type layer_vec: list[int] :param gamma: starting gamma value :type gamma: float :param omega: starting omega value :type omega: float :param gamma_tol: convergence tolerance for gamma :type gamma_tol: float :param omega_tol: convergence tolerance for omega :type omega_tol: float :param omega_max: maximum allowed value for omega :type omega_max: float :param max_iter: maximum number of iterations :type max_iter: int :param model: network layer topology (temporal, multilevel, multiplex) :type model: str :param verbose: whether or not to print verbose output :type verbose: bool :return: - gamma to which the iteration converged - omega to which the iteration converged - the resulting partition :rtype: tuple[float, float, tuple[int]] """ if 'weight' not in G_intralayer.es: G_intralayer.es['weight'] = [1.0] * G_intralayer.ecount() if 'weight' not in G_interlayer.es: G_interlayer.es['weight'] = [1.0] * G_interlayer.ecount() T = max(layer_vec) + 1 # layer count optimiser = louvain.Optimiser() # compute total edge weights per layer m_t = [0] * T for e in G_intralayer.es: m_t[layer_vec[e.source]] += e['weight'] # compute total node counts per layer N = G_intralayer.vcount() // T Nt = [0] * T for layer in layer_vec: Nt[layer] += 1 check_multilayer_graph_consistency(G_intralayer, G_interlayer, layer_vec, model, m_t, T, N, Nt) update_omega = omega_function_from_model(model, omega_max, T=T) update_gamma = gamma_estimate_from_parameters def maximize_modularity(intralayer_resolution, interlayer_resolution): return multilayer_louvain(G_intralayer, G_interlayer, layer_vec, intralayer_resolution, interlayer_resolution, optimiser=optimiser, return_partition=True) def estimate_SBM_parameters(partition): return estimate_multilayer_SBM_parameters(G_intralayer, G_interlayer, layer_vec, partition, model, N=N, T=T, Nt=Nt, m_t=m_t) part, K, last_gamma, last_omega = (None, ) * 4 for iteration in range(max_iter): part = maximize_modularity(gamma, omega) theta_in, theta_out, p, K = estimate_SBM_parameters(part) if not 0.0 <= p <= 1.0: raise ValueError( f"gamma={gamma:.3f}, omega={omega:.3f} resulted in impossible estimate p={p:.3f}" ) last_gamma, last_omega = gamma, omega gamma = update_gamma(theta_in, theta_out) if gamma is None: raise ValueError( f"gamma={last_gamma:.3f}, omega={last_omega:.3f} resulted in degenerate partition" ) omega = update_omega(theta_in, theta_out, p, K) if verbose: print( f"Iter {iteration:>2}: {K} communities with Q={part.q:.3f}, gamma={last_gamma:.3f}->{gamma:.3f}, " f"omega={last_omega:.3f}->{omega:.3f}, and p={p:.3f}") if abs(gamma - last_gamma) < gamma_tol and abs(omega - last_omega) < omega_tol: break # gamma and omega converged else: if verbose: print( f"Parameters failed to converge within {max_iter} iterations. " f"Final move of ({abs(gamma - last_gamma):.3f}, {abs(omega - last_omega):.3f}) " f"was not within tolerance ({gamma_tol}, {omega_tol})") if verbose: print( f"Returned {K} communities with Q={part.q:.3f}, gamma={gamma:.3f}, and omega={omega:.3f}" ) return gamma, omega, part
def run_louvain(graph, config_model='RB', overlap=False, directed=False, interslice_weight=0.1, resolution_parameter=0.1): """ :outdir: the output directory to comprehend the output link file :param graph: input file :param config_model: 'RB', 'RBER', 'CPM', 'Surprise', 'Significance' :param overlap: bool, whether to enable overlapping community detection :param directed :param interslice_weight :param resolution_parameter :return """ def louvain_multiplex(graphs, partition_type, interslice_weight, resolution_parameter): layers, interslice_layer, G_full = louvain.time_slices_to_layers( graphs, vertex_id_attr='name', interslice_weight=interslice_weight) partitions = [partition_type(H, resolution_parameter) for H in layers] interslice_partition = partition_type(interslice_layer, resolution_parameter, weights='weight') optimiser = louvain.Optimiser() optimiser.optimise_partition_multiplex(partitions + [interslice_partition]) quality = sum( [p.quality() for p in partitions + [interslice_partition]]) return partitions[0], quality multi = False if isinstance(graph, list): multi = True if overlap == True and multi == False: multi = True net = graph graph = [] for i in range(4): graph.append(net) if config_model == 'RB': partition_type = louvain.RBConfigurationVertexPartition elif config_model == 'RBER': partition_type = louvain.RBERConfigurationVertexPartition elif config_model == 'CPM': partition_type = louvain.CPMVertexPartition elif config_model == 'Surprise': partition_type = louvain.SurpriseVertexPartition elif config_model == "Significance": partition_type = louvain.SignificanceVertexPartition else: sys.stderr.write( "Not specifying the configuration model; perform simple Louvain.") partition_type = louvain.ModularityVertexPartition weighted = False if multi: wL = [] G = [] for file in graph: with open(file, 'r') as f: lines = f.read().splitlines() elts = lines[0].split() if len(elts) == 3: weighted = True else: weighted = False for i in range(len(lines)): elts = lines[i].split() for j in range(2): elts[j] = int(elts[j]) if weighted == True: elts[2] = float(elts[2]) lines[i] = tuple(elts) g = igraph.Graph.TupleList(lines, directed=directed, weights=weighted) G.append(g) wL.append(weighted) f.close() if True in wL and False in wL: raise Exception('all graphs should follow the same format') if partition_type == louvain.CPMVertexPartition and directed == True: raise Exception('graph for CPMVertexPartition must be undirected') if partition_type == louvain.SignificanceVertexPartition and weighted == True: raise Exception( 'SignificanceVertexPartition only support unweighted graphs') if partition_type == louvain.ModularityVertexPartition: partition, quality = louvain_multiplex(G, partition_type, interslice_weight) else: partition, quality = louvain_multiplex(G, partition_type, interslice_weight, resolution_parameter) else: with open(graph, 'r') as f: lines = f.read().splitlines() Node2Index = {} elts = lines[0].split() if len(elts) == 3: weighted = True else: weighted = False index = 0 for i in range(len(lines)): elts = lines[i].split() for j in range(2): elts[j] = int(elts[j]) if elts[j] not in Node2Index: Node2Index[elts[j]] = index index += 1 if weighted == True: elts[2] = float(elts[2]) lines[i] = tuple(elts) Index2Node = {} for node in Node2Index: Index2Node[Node2Index[node]] = node f.close() G = igraph.Graph.TupleList(lines, directed=directed, weights=weighted) if weighted == False: weights = None else: weights = G.es['weight'] partition = louvain.find_partition( G, partition_type, weights=weights, resolution_parameter=resolution_parameter) optimiser = louvain.Optimiser() optimiser.optimise_partition(partition) if len(partition) == 0: sys.stderr.write("No cluster; Resolution parameter may be too extreme") return 1 maxNode = max(list(Node2Index.keys())) for i in range(len(partition)): sys.stdout.write( str(maxNode + len(partition) + 1) + ',' + str(maxNode + i + 1) + ',' + 'term-term' + ';') for n in partition[i]: sys.stdout.write( str(maxNode + i + 1) + ',' + str(Index2Node[n]) + ',' + 'term-gene' + ';') sys.stdout.flush() return 0
for resolution in resolutions: memberships = [] print('Detecting communities using resolution parameter {0}'.format(resolution)) for itr in range(n_repl): print('\tRun {0:02d}'.format(itr)) partition_intraslice = [louvain.RBConfigurationVertexPartition(H, weights='weight', resolution_parameter=resolution) for H in G_intraslice] partition_interslice = louvain.CPMVertexPartition(G_interslice, weights='weight', node_sizes=G_interslice.vs['node_size'], resolution_parameter=0) ##%% Optimise partitions opt = louvain.Optimiser() opt.consider_comms = louvain.ALL_NEIGH_COMMS opt.optimise_partition_multiplex(partition_intraslice + [partition_interslice]) # The membership in all partitions will be identical, so simply # consider the membership for the interslice partition and graph. memberships.append(partition_interslice.membership) ##%% Write results to file cluster_df = pd.DataFrame({attr: G_interslice.vs[attr] for attr in G_interslice.vertex_attributes()}, index=[v.index for v in G_interslice.vs]) membership_df = pd.DataFrame.from_records(zip(*memberships), columns=['run_{0}'.format(itr) for itr in range(n_repl)]); cluster_df = pd.concat([cluster_df, membership_df], axis=1) cluster_df = cluster_df.sort_values(['statenme', 'year']) cluster_df.to_csv(output_dir + 'comms_{0}.csv'.format(resolution), index=False)
def run_louvain(graph, config_model='Default', overlap=False, directed=False, deep=False, interslice_weight=0.1, resolution_parameter=0.1, seed=None): """ :outdir: the output directory to comprehend the output link file :param graph: input file :param config_model: 'RB', 'RBER', 'CPM', 'Surprise', 'Significance' :param overlap: bool, whether to enable overlapping community detection :param directed :param deep :param interslice_weight :param resolution_parameter :return """ if seed != None: louvain.set_rng_seed(seed) def louvain_hierarchy_output(partition): optimiser = louvain.Optimiser() partition_agg = partition.aggregate_partition() partition_layers = [] while optimiser.move_nodes(partition_agg) > 0: partition.from_coarse_partition(partition_agg) partition_agg = partition_agg.aggregate_partition() partition_layers.append(list(partition)) return partition_layers def louvain_multiplex(graphs, partition_type, interslice_weight, resolution_parameter): layers, interslice_layer, G_full = louvain.time_slices_to_layers( graphs, vertex_id_attr='name', interslice_weight=interslice_weight) if partition_type == louvain.ModularityVertexPartition: partitions = [partition_type(H) for H in layers] interslice_partition = partition_type(interslice_layer, weights='weight') else: partitions = [ partition_type(H, resolution_parameter=resolution_parameter) for H in layers ] interslice_partition = partition_type( interslice_layer, resolution_parameter=resolution_parameter, weights='weight') optimiser = louvain.Optimiser() optimiser.optimise_partition_multiplex(partitions + [interslice_partition]) quality = sum( [p.quality() for p in partitions + [interslice_partition]]) return partitions[0], quality def partition_to_clust(graphs, partition, min_size_cut=2): clusts = [] node_names = [] if not isinstance(graphs, list): graphs = [graphs] for g in graphs: node_names.extend(g.vs['name']) for i in range(len(partition)): clust = [node_names[id] for id in partition[i]] clust = list(set(clust)) if len(clust) < min_size_cut: continue clust.sort() clusts.append(clust) clusts = sorted(clusts, key=lambda x: len(x), reverse=True) return clusts multi = False if isinstance(graph, list): multi = True if overlap == True and multi == False: multi = True net = graph graph = [] for i in range(4): graph.append(net) if multi == True and deep == True: sys.stderr.write('louvain does not support hierarchical ' 'clustering with overlapped communities\n') return 1 if config_model == 'RB': partition_type = louvain.RBConfigurationVertexPartition elif config_model == 'RBER': partition_type = louvain.RBERConfigurationVertexPartition elif config_model == 'CPM': partition_type = louvain.CPMVertexPartition elif config_model == 'Surprise': partition_type = louvain.SurpriseVertexPartition elif config_model == "Significance": partition_type = louvain.SignificanceVertexPartition else: sys.stderr.write("Configuration model not set " "performing simple Louvain.\n") partition_type = louvain.ModularityVertexPartition weighted = False if multi: wL = [] G = [] for file in graph: with open(file, 'r') as f: lines = f.read().splitlines() elts = lines[0].split() if len(elts) == 3: weighted = True else: weighted = False for i in range(len(lines)): elts = lines[i].split() for j in range(2): elts[j] = int(elts[j]) if weighted == True: elts[2] = float(elts[2]) if elts[2] < 0: sys.stderr.write('encountered a negative edge weight ' 'on row ' + str(i) + ' (' + str(lines[i]) + ') which is not allowed\n') return 2 lines[i] = tuple(elts) g = igraph.Graph.TupleList(lines, directed=directed, weights=weighted) G.append(g) wL.append(weighted) f.close() if True in wL and False in wL: raise Exception('all graphs should follow the same format') if partition_type == louvain.CPMVertexPartition and directed is True: raise Exception('graph for CPMVertexPartition must be undirected') if partition_type == louvain.SignificanceVertexPartition and weighted is True: raise Exception('SignificanceVertexPartition only support ' 'unweighted graphs') partition, quality = louvain_multiplex(G, partition_type, interslice_weight, resolution_parameter) else: if not os.path.isfile(graph): sys.stderr.write(str(graph) + ' is not a file\n') return 3 if os.path.getsize(graph) == 0: sys.stderr.write(str(graph) + ' is an empty file\n') return 4 with open(graph, 'r') as f: lines = f.read().splitlines() elts = lines[0].split() if len(elts) == 3: weighted = True else: weighted = False for i in range(len(lines)): elts = lines[i].split() for j in range(2): elts[j] = int(elts[j]) if weighted is True: elts[2] = float(elts[2]) if elts[2] < 0: sys.stderr.write('encountered a negative edge weight ' 'on row ' + str(i) + ' (' + str(lines[i]) + ') which is not allowed\n') return 3 lines[i] = tuple(elts) f.close() G = igraph.Graph.TupleList(lines, directed=directed, weights=weighted) if weighted is False: weights = None else: weights = G.es['weight'] if partition_type == louvain.ModularityVertexPartition: partition = partition_type(G, weights=weights) else: partition = partition_type( G, weights=weights, resolution_parameter=resolution_parameter) if deep == False: optimiser = louvain.Optimiser() optimiser.optimise_partition(partition) lines = [] if deep == False: clusts = partition_to_clust(G, partition) if len(clusts) == 0: sys.stderr.write(DEFAULT_ERR_MSG) return 4 maxNode = 0 for clust in clusts: maxNode = max(maxNode, max(clust)) for i in range(len(clusts)): lines.append( str(maxNode + len(partition) + 1) + '\t' + str(maxNode + i + 1)) for n in clusts[i]: lines.append(str(maxNode + i + 1) + '\t' + str(n)) else: partitions = louvain_hierarchy_output(partition) clusts_layers = [] for p in partitions: clusts_layers.append(partition_to_clust(G, p)) if len(clusts_layers) == 0: sys.stderr.write(DEFAULT_ERR_MSG) return 5 if len(clusts_layers[0]) == 0: sys.stderr.write(DEFAULT_ERR_MSG) return 6 maxNode = 0 for clust in clusts_layers[0]: maxNode = max(maxNode, max(clust)) for i in range(len(clusts_layers[0])): for n in clusts_layers[0][i]: lines.append(str(maxNode + i + 1) + '\t' + str(n)) maxNode = maxNode + len(clusts_layers[0]) for i in range(1, len(clusts_layers)): for j in range(len(clusts_layers[i - 1])): for k in range(len(clusts_layers[i])): if all(x in clusts_layers[i][k] for x in clusts_layers[i - 1][j]): lines.append( str(maxNode + k + 1) + '\t' + str(maxNode - len(clusts_layers[i - 1]) + j + 1)) break maxNode = maxNode + len(clusts_layers[i]) for i in range(len(clusts_layers[-1])): lines.append( str(maxNode + 1) + '\t' + str(maxNode - len(clusts_layers[-1]) + i + 1)) # trim the hierarchy to remove contigs up_tree = {} down_tree = {} for line in lines: elts = line.split() down_tree.setdefault(elts[0], []) down_tree[elts[0]].append(elts[1]) up_tree.setdefault(elts[1], []) up_tree[elts[1]].append(elts[0]) # store root and leaves set1 = set(down_tree.keys()) set2 = set(up_tree.keys()) root_l = list(set1.difference(set2)) leaf_l = list(set2.difference(set1)) node_l = list(set1.union(set2)) # find all contigs in the DAG Contigs = [] work_list = root_l visited = {} for node in node_l: visited[node] = 0 work_path = [] new_path = False while work_list: key = work_list.pop(0) if new_path == False: work_path.append(key) else: work_path.append(up_tree[key][visited[key]]) work_path.append(key) if key in leaf_l: new_path = True Contigs.append(work_path) work_path = [] elif len(down_tree[key]) > 1 or visited[key] > 0: new_path = True Contigs.append(work_path) work_path = [] if visited[key] == 0 and key not in leaf_l: work_list = down_tree[key] + work_list visited[key] += 1 # write trimmed DAG for path in Contigs[1:]: sys.stdout.write(path[0] + ',' + path[-1] + ',') if path[-1] in leaf_l: sys.stdout.write('c-m' + ';') else: sys.stdout.write('c-c' + ';') sys.stdout.flush() return 0