def test_diff_move():
    intraslice = ig.Graph.Read_Ncol("multilayer_SBM_interslice_edges.csv",
                                    directed=False)
    n = intraslice.vcount()
    layer_vec = [0] * n
    membership = list(range(n))

    part_rbc = louvain.RBConfigurationVertexPartition(
        intraslice, resolution_parameter=1.0, initial_membership=membership)
    part_weighted_layers = louvain.RBConfigurationVertexPartitionWeightedLayers(
        intraslice,
        resolution_parameter=1.0,
        layer_vec=layer_vec,
        initial_membership=membership)

    # check diff_move() - quality() consistency across 100 random moves
    for repeat in range(100):
        v = randint(0, n - 1)
        c = randint(0, n - 1)
        old_quality = part_weighted_layers.quality()
        wl_diff = part_weighted_layers.diff_move(v, c)
        part_weighted_layers.move_node(v, c)
        true_diff = part_weighted_layers.quality() - old_quality

        rbc_diff = part_rbc.diff_move(v, c)
        part_rbc.move_node(v, c)

        assert isclose(
            wl_diff, true_diff
        ), "WeightedLayers diff_move() inconsistent with quality()"
        assert isclose(
            wl_diff, rbc_diff
        ), "WeightedLayers diff_move() inconsistent with single-layer"
        assert isclose(part_weighted_layers.quality(), part_rbc.quality(
        )), "WeightedLayers quality() inconsistent with single-layer"

    # check rng consistency between RBConfigurationVertexPartition and its WeightedLayers variant
    # with various seeds and intraslice resolution parameters
    for gamma in np.linspace(0.5, 1.5, 10):
        shared_seed = randint(-1 << 31, (1 << 31) - 1)  # random int32

        louvain.set_rng_seed(shared_seed)
        part_weighted_layers = louvain.RBConfigurationVertexPartitionWeightedLayers(
            intraslice, resolution_parameter=gamma, layer_vec=layer_vec)
        opt = louvain.Optimiser()
        opt.optimise_partition(partition=part_weighted_layers)

        louvain.set_rng_seed(shared_seed)
        part_rbc = louvain.RBConfigurationVertexPartition(
            intraslice, resolution_parameter=gamma)
        opt = louvain.Optimiser()
        opt.optimise_partition(partition=part_rbc)

        quality_weighted_layers = part_weighted_layers.quality(
            resolution_parameter=gamma)
        quality_rbc = part_rbc.quality(resolution_parameter=gamma)
        assert isclose(
            quality_weighted_layers, quality_rbc
        ), "Intra-layer optimisation inconsistent with single-layer"
Ejemplo n.º 2
0
def multilayer_louvain(G_intralayer,
                       G_interlayer,
                       layer_vec,
                       gamma,
                       omega,
                       optimiser=None,
                       return_partition=False):
    # RBConfigurationVertexPartitionWeightedLayers implements a multilayer version of "standard" modularity (i.e.
    # the Reichardt and Bornholdt's Potts model with configuration null model).
    check_multilayer_louvain_capabilities()

    if 'weight' not in G_intralayer.es:
        G_intralayer.es['weight'] = [1.0] * G_intralayer.ecount()

    if 'weight' not in G_interlayer.es:
        G_interlayer.es['weight'] = [1.0] * G_interlayer.ecount()

    if optimiser is None:
        optimiser = louvain.Optimiser()

    intralayer_part = louvain.RBConfigurationVertexPartitionWeightedLayers(
        G_intralayer,
        layer_vec=layer_vec,
        weights='weight',
        resolution_parameter=gamma)
    interlayer_part = louvain.CPMVertexPartition(G_interlayer,
                                                 resolution_parameter=0.0,
                                                 weights='weight')
    optimiser.optimise_partition_multiplex([intralayer_part, interlayer_part],
                                           layer_weights=[1, omega])

    if return_partition:
        return intralayer_part
    else:
        return tuple(intralayer_part.membership)
Ejemplo n.º 3
0
def plot_number_clusters(card_data_df, G, resolution_range):
    """Take a card_data_df and the graph that represents it as well as
    a range of resolution parameters, and plot a graph showing how the
    number of clusters changes with resolution parameter.
    Parameters:
    -----------
    card_data_df: pandas DataFrame containing as colu.mns card name and
    the decks that each card belongs to as a set.

    G: igraph Graph representation of card_data_df.

    resolution_range: tuple of two values to vary resolution_parameter
    between.
    See also:
    ---------
    create_card_df: function that creates card_data_df.

    create_graph: function that creates G.
    """
    optimiser = lv.Optimiser()
    profile = optimiser.resolution_profile(
        G,
        lv.RBERVertexPartition,
        resolution_range=resolution_range,
        node_sizes=card_data_df["Count"].tolist(),
    )
    x = np.linspace(
        resolution_range[0], resolution_range[1], len(profile)
    )
    y = np.array([len(partition) for partition in profile])
    plt.plot(x, y)
    plt.xlabel("resolution_parameter")
    plt.ylabel("Number of clusters")
    plt.show()
def louvain_modified(snapshots, randomise_constraint=0.02):
    optimiser = louvain.Optimiser()
    partitions = []
    partition = None

    for i, snapshot in enumerate(snapshots):
        if partition is not None and randomise_constraint < 1:
            improv = 1
            optimiser_decay = 2

            partition = louvain.ModularityVertexPartition(
                snapshot.get_graph(),
                init_clusters(snapshots[i].get_graph(),
                              snapshots[i - 1].get_graph(), partition,
                              randomise_constraint).membership)

            while improv > 0 and optimiser_decay > 0:
                improv = optimiser.optimise_partition(partition)
                if improv == 0:
                    optimiser_decay -= 1
                else:
                    optimiser_decay = 2
        else:
            partition = louvain.find_partition(
                snapshot.get_graph(), louvain.ModularityVertexPartition)
            snapshots[i].get_graph().vs["cluster_seed"] = partition.membership

        partitions.append(partition)

    return partitions
Ejemplo n.º 5
0
def ms_avg(snapshots, weights={}):
    if not snapshots:
        return None

    optimiser = louvain.Optimiser()

    static_modularities = [0 for s in snapshots]
    partitions = [louvain.ModularityVertexPartition(snap.get_graph()) for snap in snapshots]
    partitions_agg = [partition.aggregate_partition() for partition in partitions]

    for idx, snap in enumerate(snapshots):
        try:
            weights[idx]
        except KeyError:
            weights[idx] = 1

    improv = 1
    while improv > 0:
        improv = 0

        # phase 1
        for idx in range(len(partitions_agg)):
            if optimiser.move_nodes(partitions_agg[idx]) > 0:
                improv = 1
            static_modularities[idx] = partitions_agg[idx].quality()

        # phase 2
        if improv > 0:
            for idx in range(len(partitions_agg)):
                partitions[idx].from_coarse_partition(partitions_agg[idx])
                partitions_agg[idx] = partitions_agg[idx].aggregate_partition()

    return (sum([static_modularities[idx] * weights[idx] for idx in range(len(static_modularities))])
            / sum(weights.values()))
Ejemplo n.º 6
0
def louvain_find_partition_multiplex(graphs,
                                     partition_type,
                                     layer_weights=None,
                                     seed=None,
                                     **kwargs):
    """ Detect communities for multiplex graphs.
	Each graph should be defined on the same set of vertices, only the edges may
	differ for different graphs. See
	:func:`Optimiser.optimise_partition_multiplex` for a more detailed
	explanation.
	Parameters
	----------
	graphs : list of :class:`ig.Graph`
		List of :class:`louvain.VertexPartition` layers to optimise.
	partition_type : type of :class:`MutableVertexPartition`
		The type of partition to use for optimisation (identical for all graphs).
	seed : int
		Seed for the random number generator. By default uses a random seed
		if nothing is specified.
	**kwargs
		Remaining keyword arguments, passed on to constructor of ``partition_type``.
	Returns
	-------
	list of int
		membership of nodes.
	float
		Improvement in quality of combined partitions, see
		:func:`Optimiser.optimise_partition_multiplex`.
	Notes
	-----
	We don't return a partition in this case because a partition is always
	defined on a single graph. We therefore simply return the membership (which
	is the same for all layers).
	See Also
	--------
	:func:`Optimiser.optimise_partition_multiplex`
	:func:`slices_to_layers`
	Examples
	--------
	>>> n = 100
	>>> G_1 = ig.Graph.Lattice([n], 1)
	>>> G_2 = ig.Graph.Lattice([n], 1)
	>>> membership, improvement = louvain.find_partition_multiplex([G_1, G_2],
	...                                                            louvain.ModularityVertexPartition)
	"""
    n_layers = len(graphs)
    partitions = []
    if (layer_weights is None):
        layer_weights = [1] * n_layers
    for graph in graphs:
        partitions.append(partition_type(graph, **kwargs))
    optimiser = louvain.Optimiser()

    if (not seed is None):
        optimiser.set_rng_seed(seed)

    improvement = optimiser.optimise_partition_multiplex(
        partitions, layer_weights)
    return partitions[0].membership, improvement
Ejemplo n.º 7
0
def louvain(
    data: AnnData,
    rep: str = "pca",
    resolution: int = 1.3,
    random_state: int = 0,
    class_label: str = "louvain_labels",
) -> None:
    """Cluster the cells using Louvain algorithm.

    Parameters
    ----------
    data: ``anndata.AnnData``
        Annotated data matrix with rows for cells and columns for genes.

    rep: ``str``, optional, default: ``"pca"``
        The embedding representation used for clustering. Keyword ``'X_' + rep`` must exist in ``data.obsm``. By default, use PCA coordinates.

    resolution: ``int``, optional, default: ``1.3``
        Resolution factor. Higher resolution tends to find more clusters with smaller sizes.

    random_state: ``int``, optional, default: ``0``
        Random seed for reproducing results.

    class_label: ``str``, optional, default: ``"louvain_labels"``
        Key name for storing cluster labels in ``data.obs``.

    Returns
    -------
    ``None``

    Update ``data.obs``:
        * ``data.obs[class_label]``: Cluster labels of cells as categorical data.

    Examples
    --------
    >>> pg.louvain(adata)
    """

    start = time.time()

    rep_key = "W_" + rep
    if rep_key not in data.uns:
        raise ValueError("Cannot find affinity matrix. Please run neighbors first!")
    W = data.uns[rep_key]

    G = construct_graph(W)
    partition_type = louvain_module.RBConfigurationVertexPartition
    partition = partition_type(G, resolution_parameter=resolution, weights="weight")
    optimiser = louvain_module.Optimiser()
    optimiser.set_rng_seed(random_state)
    diff = optimiser.optimise_partition(partition)

    labels = np.array([str(x + 1) for x in partition.membership])
    categories = natsorted(np.unique(labels))
    data.obs[class_label] = pd.Categorical(values=labels, categories=categories)

    end = time.time()
    logger.info("Louvain clustering is done. Time spent = {:.2f}s.".format(end - start))
Ejemplo n.º 8
0
def louvain(i, j, val, dim, partition_method, initial_membership, weights,
            resolution, node_sizes, seed, verbose):
    import louvain
    import igraph as ig
    import numpy
    from scipy.sparse import csc_matrix
    data = csc_matrix((val, (i, j)), shape=dim)
    # vcount = max(data.shape)
    sources, targets = data.nonzero()
    edgelist = zip(sources.tolist(), targets.tolist())
    G = ig.Graph(edges=list(edgelist))

    # G = ig.Graph.Adjacency(data.tolist())

    if partition_method == 'ModularityVertexPartition':
        partition = louvain.CPMVertexPartition(
            G, initial_membership=initial_membership, weights=weights)
    elif partition_method == 'RBConfigurationVertexPartition':
        partition = louvain.CPMVertexPartition(
            G,
            initial_membership=initial_membership,
            weights=weights,
            resolution_parameter=resolution)
    elif partition_method == 'RBERVertexPartition':
        partition = louvain.CPMVertexPartition(
            G,
            initial_membership=initial_membership,
            weights=weights,
            node_sizes=node_sizes,
            resolution_parameter=resolution)
    elif partition_method == 'CPMVertexPartition':
        partition = louvain.CPMVertexPartition(
            G,
            initial_membership=initial_membership,
            weights=weights,
            node_sizes=node_sizes,
            resolution_parameter=resolution)
    elif partition_method == 'SignificanceVertexPartition':
        partition = louvain.CPMVertexPartition(
            G, initial_membership=initial_membership, node_sizes=node_sizes)
    elif partition_method == 'SurpriseVertexPartition':
        partition = louvain.CPMVertexPartition(
            G,
            initial_membership=initial_membership,
            weights=weights,
            node_sizes=node_sizes)
    else:
        raise ValueError('partition_method ' + partition_method +
                         ' is NOT supported.')

    if seed != None:
        louvain.set_rng_seed(seed)

    optimiser = louvain.Optimiser()
    diff = optimiser.optimise_partition(partition)

    # ig.plot(partition)
    return partition
Ejemplo n.º 9
0
 def louvain_hierarchy_output(partition):
     optimiser = louvain.Optimiser()
     partition_agg = partition.aggregate_partition()
     partition_layers = []
     while optimiser.move_nodes(partition_agg) > 0:
         partition.from_coarse_partition(partition_agg)
         partition_agg = partition_agg.aggregate_partition()
         partition_layers.append(list(partition))
     return partition_layers
Ejemplo n.º 10
0
def multilayer_louvain(G_intralayer,
                       G_interlayer,
                       layer_vec,
                       gamma,
                       omega,
                       optimiser=None,
                       return_partition=False):
    r"""Run the Louvain modularity maximization algorithm at a single (:math:`\gamma, \omega`) value.

    :param G_intralayer: intralayer graph of interest
    :type G_intralayer: igraph.Graph
    :param G_interlayer: interlayer graph of interest
    :type G_interlayer: igraph.Graph
    :param layer_vec: list of each vertex's layer membership
    :type layer_vec: list[int]
    :param gamma: gamma (intralayer resolution parameter) to run Louvain at
    :type gamma: float
    :param omega: omega (interlayer resolution parameter) to run Louvain at
    :type omega: float
    :param optimiser: if not None, use passed-in (potentially custom) louvain optimiser
    :type optimiser: louvain.Optimiser
    :param return_partition: if True, return a louvain partition. Otherwise, return a community membership tuple
    :type return_partition: bool
    :return: partition from louvain
    :rtype: tuple[int] or louvain.RBConfigurationVertexPartitionWeightedLayers
    """

    # RBConfigurationVertexPartitionWeightedLayers implements a multilayer version of "standard" modularity (i.e.
    # the Reichardt and Bornholdt's Potts model with configuration null model).
    check_multilayer_louvain_capabilities()

    if 'weight' not in G_intralayer.es:
        G_intralayer.es['weight'] = [1.0] * G_intralayer.ecount()

    if 'weight' not in G_interlayer.es:
        G_interlayer.es['weight'] = [1.0] * G_interlayer.ecount()

    if optimiser is None:
        optimiser = louvain.Optimiser()

    intralayer_part = louvain.RBConfigurationVertexPartitionWeightedLayers(
        G_intralayer,
        layer_vec=layer_vec,
        weights='weight',
        resolution_parameter=gamma)
    interlayer_part = louvain.CPMVertexPartition(G_interlayer,
                                                 resolution_parameter=0.0,
                                                 weights='weight')
    optimiser.optimise_partition_multiplex([intralayer_part, interlayer_part],
                                           layer_weights=[1, omega])

    if return_partition:
        return intralayer_part
    else:
        return tuple(intralayer_part.membership)
Ejemplo n.º 11
0
 def louvain_multiplex(graphs, partition_type, interslice_weight,
                       resolution_parameter):
     layers, interslice_layer, G_full = louvain.time_slices_to_layers(
         graphs, vertex_id_attr='name', interslice_weight=interslice_weight)
     partitions = [partition_type(H, resolution_parameter) for H in layers]
     interslice_partition = partition_type(interslice_layer,
                                           resolution_parameter,
                                           weights='weight')
     optimiser = louvain.Optimiser()
     optimiser.optimise_partition_multiplex(partitions +
                                            [interslice_partition])
     quality = sum(
         [p.quality() for p in partitions + [interslice_partition]])
     return partitions[0], quality
def test_multilayer_louvain():
    intraslice = ig.Graph.Read_Ncol("multilayer_SBM_intraslice_edges.csv",
                                    directed=False)
    interslice = ig.Graph.Read_Ncol("multilayer_SBM_interslice_edges.csv",
                                    directed=False)
    n_layers = 4
    n = intraslice.vcount() // n_layers
    layer_vec = np.array([i // n for i in range(n * n_layers)])

    intraslice.es['weight'] = 1.0
    intralayer_part = louvain.RBConfigurationVertexPartitionWeightedLayers(
        intraslice,
        resolution_parameter=1.0,
        layer_vec=layer_vec,
        weights='weight')

    for omega in np.linspace(0.5, 1.5, 10):
        interslice.es['weight'] = omega

        interlayer_part = louvain.RBConfigurationVertexPartition(
            interslice, resolution_parameter=0.0, weights='weight')

        opt = louvain.Optimiser()
        opt.optimise_partition_multiplex(
            partitions=[intralayer_part, interlayer_part])

        louvain_mod = intralayer_part.quality(
            resolution_parameter=1.0) + interlayer_part.quality()

        A = np.array(intraslice.get_adjacency()._get_data())
        C = omega * np.array(interslice.get_adjacency()._get_data())
        P = np.zeros((n_layers * n, n_layers * n))
        for i in range(n_layers):
            c_degrees = np.array(
                intraslice.degree(list(range(n * i, n * i + n))))
            c_inds = np.where(layer_vec == i)[0]
            P[np.ix_(c_inds, c_inds)] = np.outer(
                c_degrees, c_degrees.T) / (1.0 * np.sum(c_degrees))

        membership = np.array(intralayer_part.membership)
        true_mod = sum(
            calculate_coefficient(membership, X) for X in (A, -P, C))

        assert isclose(
            louvain_mod, true_mod
        ), "WeightedLayers quality() inconsistent with alternate calculation"
Ejemplo n.º 13
0
def layer_partition():
    sub_g = get_subgraph(node_lists = ['1384', '3762', '1493', '3767', '1762', '7364'], depth=0)
    #8175
    #sub_g = get_subgraph(node_lists = ['8175', '8008'], depth=1)
    
    graphml_path = os.path.join(VIS_DATA_DIR, 'song-tmp.graphml')
    nx.write_graphml(sub_g, graphml_path)
    G = ig.Graph.Read_GraphML(graphml_path)
    G_pos = G.subgraph_edges(G.es.select(weight_gt = 0), delete_vertices=False)
    G_neg = G.subgraph_edges(G.es.select(weight_lt = 0), delete_vertices=False)
    G_neg.es['weight'] = [-w for w in G_neg.es['weight']]
    part_pos = louvain.ModularityVertexPartition(G_pos, weights='weight')
    part_neg = louvain.ModularityVertexPartition(G_neg, weights='weight')
    optimiser = louvain.Optimiser()
    part_pos = louvain.ModularityVertexPartition(G_pos, weights='weight')
    part_neg = louvain.ModularityVertexPartition(G_neg, weights='weight')
    diff = optimiser.optimise_partition_multiplex([part_pos, part_neg],layer_weights=[1,-1])
    # while diff > 0:
    #     diff = optimiser.optimise_partition_multiplex([part_pos, part_neg],layer_weights=[1,-1])
    # print(diff)
    # print(part_neg)
    # print(part_pos)
    # for v in G.vs:
    #     print(v.index, v["label"])
    # print(dir(part_pos), part_pos.membership)
    print(dir(part_pos))
    print(part_pos.summary())
    print(part_pos.modularity, part_pos.q, part_pos)
    
    node_partition = {}
    for v in G.vs:
        node_partition[v["label"]] = v.index
    node_partition2 = {}
    memberships = [i for i in part_pos.membership]
    assert len(memberships) == len(node_partition)
    for i in node_partition:
        # if node_partition[i] == 0:
        #     print(i)
        node_partition2[i] = memberships[node_partition[i]]
    # print(node_partition2)
    gaints = ['1384', '3762', '1493', '3767', '1762', '7364']
    gaints_name = ['歐陽修','蘇洵','蘇轍','蘇軾','王安石','曾鞏']
    
    for gaint, name in zip(gaints, gaints_name):
        print(node_partition2[gaint], gaint, name)
Ejemplo n.º 14
0
def run_approximated_louvain(data, rep_key, n_jobs = 1, resolution = 1.3, random_state = 0, n_clusters = 30, n_init = 20, class_label = 'approx_louvain_labels'):
	start = time.time()

	X = data.obsm[rep_key].astype('float64')
	np.random.seed(random_state)
	seeds = np.random.randint(np.iinfo(np.int32).max, size = n_init)
	
	old_n = set_numpy_thread(1)

	threads = [None] * n_jobs
	results = [None] * n_jobs

	for i in range(n_jobs):
		t = threading.Thread(target=run_one_instance_of_kmeans, args=(i, results, n_init, n_clusters, n_jobs, X, seeds))
		threads[i] = t
		t.start()

	for i in range(n_jobs):
		threads[i].join()

	set_numpy_thread(old_n)
	
	labels = list(zip(*[x for y in results for x in y]))
	uniqs = np.unique(labels, axis = 0)
	transfer_dict = {tuple(k):v for k, v in zip(uniqs, range(uniqs.shape[0]))}
	labels = [transfer_dict[x] for x in labels]

	G = construct_graph(data.uns['W_norm'])

	partition = louvain.RBConfigurationVertexPartition(G, resolution_parameter = resolution, initial_membership = labels)
	partition_agg = partition.aggregate_partition()
	optimiser = louvain.Optimiser()
	optimiser.set_rng_seed(random_state)
	diff = optimiser.optimise_partition(partition_agg)
	partition.from_coarse_partition(partition_agg)

	labels = np.array([str(x + 1) for x in partition.membership])
	categories = natsorted(np.unique(labels))
	data.obs[class_label] = pd.Categorical(values = labels, categories = categories)

	end = time.time()
	print("Approximated Louvain clustering is done. Time spent = {:.2f}s.".format(end - start))
Ejemplo n.º 15
0
def layer_partition(sub_g):

    graphml_path = os.path.join(VIS_DATA_DIR, 'song-tmp.graphml')
    nx.write_graphml(sub_g, graphml_path)
    G = ig.Graph.Read_GraphML(graphml_path)
    G_pos = G.subgraph_edges(G.es.select(weight_gt=0), delete_vertices=False)
    G_neg = G.subgraph_edges(G.es.select(weight_lt=0), delete_vertices=False)
    G_neg.es['weight'] = [-w for w in G_neg.es['weight']]
    part_pos = louvain.ModularityVertexPartition(G_pos, weights='weight')
    part_neg = louvain.ModularityVertexPartition(G_neg, weights='weight')
    optimiser = louvain.Optimiser()
    part_pos = louvain.ModularityVertexPartition(G_pos, weights='weight')
    part_neg = louvain.ModularityVertexPartition(G_neg, weights='weight')
    diff = optimiser.optimise_partition_multiplex([part_pos, part_neg],
                                                  layer_weights=[1, -1])
    # while diff > 0:
    #     diff = optimiser.optimise_partition_multiplex([part_pos, part_neg],layer_weights=[1,-1])
    # print(diff)
    # print(part_neg)
    # print(part_pos)
    # for v in G.vs:
    #     print(v.index, v["label"])
    # print(dir(part_pos), part_pos.membership)
    # print(dir(part_pos))
    # print(part_pos.summary())
    # print(part_pos.modularity, part_pos.q, part_pos)

    node_partition = {}
    for v in G.vs:
        node_partition[v["label"]] = v.index
    node_partition2 = {}
    memberships = [i for i in part_pos.membership]
    assert len(memberships) == len(node_partition)
    for i in node_partition:
        node_partition2[i] = memberships[node_partition[i]]

    return node_partition2
Ejemplo n.º 16
0
def run_louvain(data, affinity = 'W_norm', resolution = 1.3, random_state = 0):
	start = time.time()

	W = None
	if affinity == 'W_norm':
		W = data.uns['W_norm']
	elif affinity == 'W_diffmap':
		W = calculate_affinity_matrix(data.uns['diffmap_knn_indices'], data.uns['diffmap_knn_distances'])
	else:
		W_diffmap = calculate_affinity_matrix(data.uns['diffmap_knn_indices'], data.uns['diffmap_knn_distances'])
		W, diag_tmp, diag_half_tmp = calculate_normalized_affinity(W_diffmap)

	G = construct_graph(W)

	partition = louvain.RBConfigurationVertexPartition(G, resolution_parameter = resolution)
	optimiser = louvain.Optimiser()
	optimiser.set_rng_seed(random_state)
	diff = optimiser.optimise_partition(partition)

	labels = np.array([str(x + 1) for x in partition.membership])
	categories = natsorted(np.unique(labels))
	data.obs[aff2lab[affinity]] = pd.Categorical(values = labels, categories = categories)
	end = time.time()
	print("Louvain clustering is done. Time spent = {:.2f}s.".format(end - start))
Ejemplo n.º 17
0
    def _cluster(self,
                 aData,
                 resolution,
                 clusterMin=10,
                 clusteringAlgorithm='leiden'
                 ) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """
        Performs the clustering. This function is a little more complicated
        than strictly necessary because it preserves the information about
        the cluster label of each cell during the iterations of the modularity
        optimization. The final result where global modularity has been
        optimized is saved in the task's output subdir, whereas the iteration
        results are saved in output/iterations. It is sometimes useful to expore
        the cluster labels of cells from modularities prior to steady state, as
        they generally reflect coherent groupings that are more granular than
        the final assignments.

        Args:
            aData: anndata object to use for clustering
            resolution: resolution for modularity calculation
            clusterMin: minimum number of cells that must be in a cluster
                        to keep that cluster
            clusteringAlgorithm: choice of algorithm to use for modularity
                                 optimization, currently leiden and louvain are
                                 supported
        Returns:
            a tuple of dataframes, first is a dataframe containig the cluster
            labels from all rounds of modularity optimization, second is just
            the final round of optimization. Index is always cell id
        """
        g = Neighbors(aData).to_igraph()

        if clusteringAlgorithm == 'louvain':
            import louvain as clAlgo
            print('using louvain algorithm')
        elif clusteringAlgorithm == 'leiden':
            import leidenalg as clAlgo
            print('using leiden algorithm')

        optimiser = clAlgo.Optimiser()
        tracking = []
        partition = clAlgo.RBConfigurationVertexPartition(
            g, weights='weight', resolution_parameter=resolution)
        partition_agg = partition.aggregate_partition()
        print(partition.summary())

        diff = optimiser.move_nodes(partition_agg)
        while diff > 0.0:
            partition.from_coarse_partition(partition_agg)
            partition_agg = partition_agg.aggregate_partition()
            tracking.append(partition.membership)
            print(partition_agg.summary())
            diff = optimiser.move_nodes(partition_agg)

        df = pd.DataFrame(tracking, columns=aData.obs.index).T

        clusteringOutput = df.iloc[:, [-1]].copy(deep=True)
        colLabel = 'kValue_{}_resolution_{}'.format(self.kValue,
                                                    int(self.resolution))
        clusteringOutput.columns = [colLabel]
        clusteringOutputGrouped = clusteringOutput.groupby(colLabel).size()

        toZero = clusteringOutputGrouped[
            clusteringOutputGrouped < int(clusterMin)].index.values.tolist()
        mask = clusteringOutput[colLabel].isin(toZero)
        clusteringOutput[colLabel] = clusteringOutput[colLabel].where(~mask,
                                                                      other=-1)
        print('Clustering yields {} clusters with at least {} cells'.format(
            clusteringOutput[colLabel].unique().astype(int).max(), clusterMin))

        return df, clusteringOutput
Ejemplo n.º 18
0
def run_louvain(graph,
                config_model='Default',
                overlap=False,
                directed=False,
                deep=False,
                interslice_weight=0.1,
                resolution_parameter=0.1,
                seed=None):
    """
    :outdir: the output directory to comprehend the output link file
    :param graph: input file
    :param config_model: 'RB', 'RBER', 'CPM', 'Surprise', 'Significance'
    :param overlap: bool, whether to enable overlapping community detection
    :param directed
    :param deep
    :param interslice_weight
    :param resolution_parameter
    :return
    """

    if seed != None:
        louvain.set_rng_seed(seed)

    def louvain_hierarchy_output(partition):
        optimiser = louvain.Optimiser()
        partition_agg = partition.aggregate_partition()
        partition_layers = []
        while optimiser.move_nodes(partition_agg) > 0:
            partition.from_coarse_partition(partition_agg)
            partition_agg = partition_agg.aggregate_partition()
            partition_layers.append(list(partition))
        return partition_layers

    def louvain_multiplex(graphs, partition_type, interslice_weight,
                          resolution_parameter):
        layers, interslice_layer, G_full = louvain.time_slices_to_layers(
            graphs, vertex_id_attr='name', interslice_weight=interslice_weight)
        if partition_type == louvain.ModularityVertexPartition:
            partitions = [partition_type(H) for H in layers]
            interslice_partition = partition_type(interslice_layer,
                                                  weights='weight')
        else:
            partitions = [
                partition_type(H, resolution_parameter=resolution_parameter)
                for H in layers
            ]
            interslice_partition = partition_type(
                interslice_layer,
                resolution_parameter=resolution_parameter,
                weights='weight')
        optimiser = louvain.Optimiser()
        optimiser.optimise_partition_multiplex(partitions +
                                               [interslice_partition])
        quality = sum(
            [p.quality() for p in partitions + [interslice_partition]])
        return partitions[0], quality

    def partition_to_clust(graphs, partition, min_size_cut=2):
        clusts = []
        node_names = []
        if not isinstance(graphs, list):
            graphs = [graphs]
        for g in graphs:
            node_names.extend(g.vs['name'])
        for i in range(len(partition)):
            clust = [node_names[id] for id in partition[i]]
            clust = list(set(clust))
            if len(clust) < min_size_cut:
                continue
            clust.sort()
            clusts.append(clust)
        clusts = sorted(clusts, key=lambda x: len(x), reverse=True)
        return clusts

    multi = False
    if isinstance(graph, list):
        multi = True

    if overlap == True and multi == False:
        multi = True
        net = graph
        graph = []
        for i in range(4):
            graph.append(net)

    if multi == True and deep == True:
        sys.stderr.write(
            'louvain does not support hierarchical clustering with overlapped communities'
        )
        sys.exit()

    if config_model == 'RB':
        partition_type = louvain.RBConfigurationVertexPartition
    elif config_model == 'RBER':
        partition_type = louvain.RBERConfigurationVertexPartition
    elif config_model == 'CPM':
        partition_type = louvain.CPMVertexPartition
    elif config_model == 'Surprise':
        partition_type = louvain.SurpriseVertexPartition
    elif config_model == "Significance":
        partition_type = louvain.SignificanceVertexPartition
    else:
        sys.stderr.write("Not specifying the configuration model; "
                         "perform simple Louvain.")
        partition_type = louvain.ModularityVertexPartition

    weighted = False
    if multi:
        wL = []
        G = []
        for file in graph:
            with open(file, 'r') as f:
                lines = f.read().splitlines()
            elts = lines[0].split()
            if len(elts) == 3:
                weighted = True
            else:
                weighted = False
            for i in range(len(lines)):
                elts = lines[i].split()
                for j in range(2):
                    elts[j] = int(elts[j])
                if weighted == True:
                    elts[2] = float(elts[2])
                    if elts[2] < 0:
                        sys.stderr.write("negative edge weight not allowed")
                        return 1
                lines[i] = tuple(elts)
            g = igraph.Graph.TupleList(lines,
                                       directed=directed,
                                       weights=weighted)
            G.append(g)
            wL.append(weighted)
            f.close()
        if True in wL and False in wL:
            raise Exception('all graphs should follow the same format')
        if partition_type == louvain.CPMVertexPartition and directed is True:
            raise Exception('graph for CPMVertexPartition must be undirected')
        if partition_type == louvain.SignificanceVertexPartition and weighted is True:
            raise Exception('SignificanceVertexPartition only support '
                            'unweighted graphs')
        partition, quality = louvain_multiplex(G, partition_type,
                                               interslice_weight,
                                               resolution_parameter)

    else:
        with open(graph, 'r') as f:
            lines = f.read().splitlines()
        elts = lines[0].split()
        if len(elts) == 3:
            weighted = True
        else:
            weighted = False

        for i in range(len(lines)):
            elts = lines[i].split()
            for j in range(2):
                elts[j] = int(elts[j])
            if weighted is True:
                elts[2] = float(elts[2])
                if elts[2] < 0:
                    sys.stderr.write("negative edge weight not allowed")
                    return 1
            lines[i] = tuple(elts)
        f.close()

        G = igraph.Graph.TupleList(lines, directed=directed, weights=weighted)
        if weighted is False:
            weights = None
        else:
            weights = G.es['weight']
        if partition_type == louvain.ModularityVertexPartition:
            partition = partition_type(G, weights=weights)
        else:
            partition = partition_type(
                G, weights=weights, resolution_parameter=resolution_parameter)
        if deep == False:
            optimiser = louvain.Optimiser()
            optimiser.optimise_partition(partition)

    if deep == False:
        clusts = partition_to_clust(G, partition)
        if len(clusts) == 0:
            sys.stderr.write(
                "No cluster; Resolution parameter may be too extreme")
            return 1

        maxNode = 0
        for clust in clusts:
            maxNode = max(maxNode, max(clust))

        for i in range(len(clusts)):
            sys.stdout.write(
                str(maxNode + len(partition) + 1) + ',' +
                str(maxNode + i + 1) + ',' + 'c-c' + ';')
            for n in clusts[i]:
                sys.stdout.write(
                    str(maxNode + i + 1) + ',' + str(n) + ',' + 'c-m' + ';')
    else:
        partitions = louvain_hierarchy_output(partition)
        clusts_layers = []
        for p in partitions:
            clusts_layers.append(partition_to_clust(G, p))
        if len(clusts_layers[0]) == 0:
            sys.stderr.write(
                "No cluster; Resolution parameter may be too extreme")
            return 1
        maxNode = 0
        for clust in clusts_layers[0]:
            maxNode = max(maxNode, max(clust))
        for i in range(len(clusts_layers[0])):
            for n in clusts_layers[0][i]:
                sys.stdout.write(
                    str(maxNode + i + 1) + ',' + str(n) + ',' + 'c-m' + ';')
        maxNode = maxNode + len(clusts_layers[0])
        for i in range(1, len(clusts_layers)):
            for j in range(len(clusts_layers[i - 1])):
                for k in range(len(clusts_layers[i])):
                    if all(x in clusts_layers[i][k]
                           for x in clusts_layers[i - 1][j]):
                        sys.stdout.write(
                            str(maxNode + k + 1) + ',' +
                            str(maxNode - len(clusts_layers[i - 1]) + j + 1) +
                            ',' + 'c-c' + ';')
                        break
            maxNode = maxNode + len(clusts_layers[i])
        for i in range(len(clusts_layers[-1])):
            sys.stdout.write(
                str(maxNode + 1) + ',' +
                str(maxNode - len(clusts_layers[-1]) + i + 1) + ',' + 'c-c' +
                ';')

    sys.stdout.flush()
    return 0
 def setUp(self):
     self.optimiser = louvain.Optimiser()
Ejemplo n.º 20
0
def run_alg(Gs, alg, gamma=1.0, sample=1.0, layer_weights=None):
    '''
    Run community detection algorithm with a resolution parameter. Right now only use RB in Louvain/Leiden

    Parameters
    ----------
    Gs : a list of igraph.Graph
    alg : str
        choose between 'louvain' and 'leiden'
    gamma : float
        resolution parameter
    sample : if smaller than 1, randomly delete a fraction of edges each time
    layer_weights: a list of float
        specifying layer weights in the multilayer setting
    Returns
    ------
    C: scipy.sparse.csr_matrix
        a matrix recording the membership of each cluster

    '''
    if len(Gs) == 1:
        G = Gs[0]
        G1 = G.copy()
        if sample < 1:
            G1 = network_perturb(G, sample)
        if alg == 'louvain':
            partition_type = louvain.RBConfigurationVertexPartition
            partition = louvain.find_partition(G1,
                                               partition_type,
                                               resolution_parameter=gamma)
        elif alg == 'leiden':
            partition_type = leidenalg.RBConfigurationVertexPartition
            partition = leidenalg.find_partition(G1,
                                                 partition_type,
                                                 resolution_parameter=gamma)
        partitions = [partition]
    else:  # multiplex mode
        if layer_weights == None:
            layer_weights = [1.0 for _ in Gs]
        assert len(layer_weights) == len(
            Gs), 'layer weights inconsistent with the number of input networks'
        Gs1 = [G.copy() for G in Gs]
        if sample < 1:
            Gs1 = [network_perturb(G, sample) for G in Gs]
        if alg == 'louvain':
            partition_type = louvain.RBConfigurationVertexPartition
            optimiser = louvain.Optimiser()
            partitions = [
                partition_type(G, resolution_parameter=gamma) for G in Gs1
            ]
            _ = optimiser.optimise_partition_multiplex(
                partitions, layer_weights=layer_weights)
        elif alg == 'leiden':
            partition_type = leidenalg.RBConfigurationVertexPartition
            # partition = leidenalg.find_partition_multiplex(Gs1, partition_type, resolution_parameter=gamma,
            #                                                layer_weights=layer_weights)
            optimiser = leidenalg.Optimiser()
            partitions = [
                partition_type(G, resolution_parameter=gamma) for G in Gs1
            ]
            _ = optimiser.optimise_partition_multiplex(
                partitions, n_iterations=-1, layer_weights=layer_weights
            )  # -1 means iterate until no further optimization
            # print([len(p) for p in partitions]) # debug

    # partition = sorted(partition, key=len, reverse=True)
    LOGGER.info('Resolution: {:.4f}; find {} clusters'.format(
        gamma, len(partitions[0])))

    return partition_to_membership_matrix(partitions[0])
Ejemplo n.º 21
0
def spectral_louvain(
    data: AnnData,
    rep: str = "pca",
    resolution: float = 1.3,
    rep_kmeans: str = "diffmap",
    n_clusters: int = 30,
    n_clusters2: int = 50,
    n_init: int = 10,
    n_jobs: int = -1,
    random_state: int = 0,
    class_label: str = "spectral_louvain_labels",
) -> None:
    """ Cluster the data using Spectral Louvain algorithm.

    Parameters
    ----------
    data: ``anndata.AnnData``
        Annotated data matrix with rows for cells and columns for genes.

    rep: ``str``, optional, default: ``"pca"``
        The embedding representation used for clustering. Keyword ``'X_' + rep`` must exist in ``data.obsm``. By default, use PCA coordinates.

    resolution: ``int``, optional, default: ``1.3``
        Resolution factor. Higher resolution tends to find more clusters with smaller sizes.

    rep_kmeans: ``str``, optional, default: ``"diffmap"``
        The embedding representation on which the KMeans runs. Keyword must exist in ``data.obsm``. By default, use Diffusion Map coordinates. If diffmap is not calculated, use PCA coordinates instead.

    n_clusters: ``int``, optional, default: ``30``
        The number of first level clusters.

    n_clusters2: ``int``, optional, default: ``50``
        The number of second level clusters.

    n_init: ``int``, optional, default: ``10``
        Number of kmeans tries for the first level clustering. Default is set to be the same as scikit-learn Kmeans function.

    n_jobs: ``int``, optional, default: ``-1``
        Number of threads to use. If ``-1``, use all available threads.

    random_state: ``int``, optional, default: ``0``
        Random seed for reproducing results.

    class_label: ``str``, optional, default: ``"spectral_louvain_labels"``
        Key name for storing cluster labels in ``data.obs``.

    Returns
    -------
    ``None``

    Update ``data.obs``:
        * ``data.obs[class_label]``: Cluster labels for cells as categorical data.

    Examples
    --------
    >>> pg.spectral_louvain(adata)
    """

    start = time.time()

    if "X_" + rep_kmeans not in data.obsm.keys():
        logger.warning("{} is not calculated, switch to pca instead.".format(rep_kmeans))
        rep_kmeans = "pca"
        if "X_" + rep_kmeans not in data.obsm.keys():
            raise ValueError("Please run {} first!".format(rep_kmeans))
    if "W_" + rep not in data.uns:
        raise ValueError("Cannot find affinity matrix. Please run neighbors first!")

    labels = partition_cells_by_kmeans(
        data, rep_kmeans, n_jobs, n_clusters, n_clusters2, n_init, random_state,
    )

    W = data.uns["W_" + rep]

    G = construct_graph(W)
    partition_type = louvain_module.RBConfigurationVertexPartition
    partition = partition_type(
        G, resolution_parameter=resolution, weights="weight", initial_membership=labels
    )
    partition_agg = partition.aggregate_partition()

    optimiser = louvain_module.Optimiser()
    optimiser.set_rng_seed(random_state)
    diff = optimiser.optimise_partition(partition_agg)
    partition.from_coarse_partition(partition_agg)

    labels = np.array([str(x + 1) for x in partition.membership])
    categories = natsorted(np.unique(labels))
    data.obs[class_label] = pd.Categorical(values=labels, categories=categories)

    end = time.time()
    logger.info(
        "Spectral Louvain clustering is done. Time spent = {:.2f}s.".format(end - start)
    )
Ejemplo n.º 22
0
def iterative_multilayer_resolution_parameter_estimation(
        G_intralayer,
        G_interlayer,
        layer_vec,
        gamma=1.0,
        omega=1.0,
        gamma_tol=1e-2,
        omega_tol=5e-2,
        omega_max=1000,
        max_iter=25,
        model='temporal',
        verbose=False):
    """
    Multilayer variant of ALG. 1 from "Relating modularity maximization and stochastic block models in multilayer
    networks." The nested functions here are just used to match the pseudocode in the paper.

    :param G_intralayer: input graph containing all intra-layer edges
    :param G_interlayer: input graph containing all inter-layer edges
    :param layer_vec: vector of each vertex's layer membership
    :param gamma: starting gamma value
    :param omega: starting omega value
    :param gamma_tol: convergence tolerance for gamma
    :param omega_tol: convergence tolerance for omega
    :param max_iter: maximum number of iterations
    :param omega_max: maximum allowed value for omega
    :param model: network layer topology (temporal, multilevel, multiplex)
    :param verbose: whether or not to print verbose output
    :return: gamma, omega to which the iteration converged and the resulting partition
    """

    if 'weight' not in G_intralayer.es:
        G_intralayer.es['weight'] = [1.0] * G_intralayer.ecount()

    G_interlayer.es['weight'] = [omega] * G_interlayer.ecount()
    T = max(layer_vec) + 1  # layer count
    optimiser = louvain.Optimiser()
    m_t = [0] * T
    for e in G_intralayer.es:
        m_t[layer_vec[e.source]] += e['weight']

    N = G_intralayer.vcount() // T
    Nt = [0] * T
    for l in layer_vec:
        Nt[l] += 1

    check_multilayer_graph_consistency(G_intralayer, G_interlayer, layer_vec,
                                       model, m_t, T, N, Nt)

    if model is 'multiplex':

        def update_omega(theta_in, theta_out, p, K):
            if theta_out == 0:
                return log(1 + p * K /
                           (1 - p)) / (T *
                                       log(theta_in)) if p < 1.0 else omega_max
            # if p is 1, the optimal omega is infinite (here, omega_max)
            return log(1 + p * K / (1 - p)) / (
                T * (log(theta_in) - log(theta_out))) if p < 1.0 else omega_max
    else:

        def update_omega(theta_in, theta_out, p, K):
            if theta_out == 0:
                return log(1 + p * K /
                           (1 - p)) / (2 *
                                       log(theta_in)) if p < 1.0 else omega_max
            # if p is 1, the optimal omega is infinite (here, omega_max)
            return log(1 + p * K / (1 - p)) / (
                2 * (log(theta_in) - log(theta_out))) if p < 1.0 else omega_max

    # TODO: non-uniform cases
    # model affects SBM parameter estimation and the updating of omega
    if model is 'temporal':

        def calculate_persistence(community):
            # ordinal persistence
            return sum(community[e.source] == community[e.target]
                       for e in G_interlayer.es) / (N * (T - 1))
    elif model is 'multilevel':

        def calculate_persistence(community):
            # multilevel persistence
            pers_per_layer = [0] * T
            for e in G_interlayer.es:
                pers_per_layer[layer_vec[e.target]] += (
                    community[e.source] == community[e.target])

            pers_per_layer = [pers_per_layer[l] / Nt[l] for l in range(T)]
            return sum(pers_per_layer) / (T - 1)
    elif model is 'multiplex':

        def calculate_persistence(community):
            # categorical persistence
            return sum(community[e.source] == community[e.target]
                       for e in G_interlayer.es) / (N * T * (T - 1))
    else:
        raise ValueError(
            "Model {} is not temporal, multilevel, or multiplex".format(model))

    def maximize_modularity(intralayer_resolution, interlayer_resolution):
        # RBConfigurationVertexPartitionWeightedLayers implements a multilayer version of "standard" modularity (i.e.
        # the Reichardt and Bornholdt's Potts model with configuration null model).
        G_interlayer.es['weight'] = interlayer_resolution
        intralayer_part = \
            louvain.RBConfigurationVertexPartitionWeightedLayers(G_intralayer, layer_vec=layer_vec, weights='weight',
                                                                 resolution_parameter=intralayer_resolution)
        interlayer_part = louvain.CPMVertexPartition(G_interlayer,
                                                     resolution_parameter=0.0,
                                                     weights='weight')
        optimiser.optimise_partition_multiplex(
            [intralayer_part, interlayer_part])
        return intralayer_part

    def estimate_SBM_parameters(partition):
        K = len(partition)

        community = partition.membership
        m_t_in = [0] * T
        for e in G_intralayer.es:
            if community[e.source] == community[e.target] and layer_vec[
                    e.source] == layer_vec[e.target]:
                m_t_in[layer_vec[e.source]] += e['weight']

        kappa_t_r_list = [[0] * K for _ in range(T)]
        for e in G_intralayer.es:
            layer = layer_vec[e.source]
            kappa_t_r_list[layer][community[e.source]] += e['weight']
            kappa_t_r_list[layer][community[e.target]] += e['weight']
        sum_kappa_t_sqr = [
            sum(x**2 for x in kappa_t_r_list[t]) for t in range(T)
        ]

        theta_in = sum(2 * m_t_in[t]
                       for t in range(T)) / sum(sum_kappa_t_sqr[t] /
                                                (2 * m_t[t]) for t in range(T))
        # guard for div by zero with single community partition
        theta_out = sum(2 * m_t[t] - 2 * m_t_in[t] for t in range(T)) / \
                    sum(2 * m_t[t] - sum_kappa_t_sqr[t] / (2 * m_t[t]) for t in range(T)) if K > 1 else 0

        pers = calculate_persistence(community)
        if model is 'multiplex':
            # estimate p by solving polynomial root-finding problem with starting estimate p=0.5
            def f(x):
                coeff = 2 * (1 - 1 / K) / (T * (T - 1))
                return coeff * sum(
                    (T - n) * x**n for n in range(1, T)) + 1 / K - pers

            # guard for div by zero with single community partition
            # (in this case, all community assignments persist across layers)
            p = fsolve(f, np.array([0.5]))[0] if pers < 1.0 and K > 1 else 1.0
        else:
            # guard for div by zero with single community partition
            # (in this case, all community assignments persist across layers)
            p = max(
                (K * pers - 1) / (K - 1), 0) if pers < 1.0 and K > 1 else 1.0

        return theta_in, theta_out, p, K

    def update_gamma(theta_in, theta_out):
        if theta_out == 0:
            return theta_in / log(theta_in)
        return (theta_in - theta_out) / (log(theta_in) - log(theta_out))

    part, K, last_gamma, last_omega = (None, ) * 4
    for iteration in range(max_iter):
        part = maximize_modularity(gamma, omega)
        theta_in, theta_out, p, K = estimate_SBM_parameters(part)

        if theta_in == 0 or theta_in == 1:
            raise ValueError(
                "gamma={:.3f}, omega={:.3f} resulted in degenerate partition".
                format(gamma, omega))

        if not 0.0 <= p <= 1.0:
            raise ValueError(
                "gamma={:.3f}, omega={:.3f} resulted in impossible estimate p={:.3f}"
                "".format(gamma, omega, p))

        last_gamma, last_omega = gamma, omega
        gamma = update_gamma(theta_in, theta_out)
        omega = update_omega(theta_in, theta_out, p, K)

        if verbose:
            print(
                "Iter {:>2}: {} communities with Q={:.3f}, gamma={:.3f}->{:.3f}, omega={:.3f}->{:.3f}, and p={:.3f}"
                "".format(iteration, K, part.q, last_gamma, gamma, last_omega,
                          omega, p))

        if abs(gamma - last_gamma) < gamma_tol and abs(omega -
                                                       last_omega) < omega_tol:
            break  # gamma and omega converged
    else:
        if verbose:
            print(
                "Parameters failed to converge within {} iterations. "
                "Final move of ({:.3f}, {:.3f}) was not within tolerance ({}, {})"
                "".format(max_iter, abs(gamma - last_gamma),
                          abs(omega - last_omega), gamma_tol, omega_tol))

    if verbose:
        print("Returned {} communities with Q={:.3f}, gamma={:.3f}, "
              "and omega={:.3f}".format(K, part.q, gamma, omega))

    return gamma, omega, part
Ejemplo n.º 23
0
def spectral_louvain(
    data: MultimodalData,
    rep: str = "pca",
    resolution: float = 1.3,
    rep_kmeans: str = "diffmap",
    n_clusters: int = 30,
    n_clusters2: int = 50,
    n_init: int = 10,
    n_jobs: int = -1,
    random_state: int = 0,
    class_label: str = "spectral_louvain_labels",
) -> None:
    """ Cluster the data using Spectral Louvain algorithm. [Li20]_

    Parameters
    ----------
    data: ``pegasusio.MultimodalData``
        Annotated data matrix with rows for cells and columns for genes.

    rep: ``str``, optional, default: ``"pca"``
        The embedding representation used for clustering. Keyword ``'X_' + rep`` must exist in ``data.obsm``. By default, use PCA coordinates.

    resolution: ``int``, optional, default: ``1.3``
        Resolution factor. Higher resolution tends to find more clusters with smaller sizes.

    rep_kmeans: ``str``, optional, default: ``"diffmap"``
        The embedding representation on which the KMeans runs. Keyword must exist in ``data.obsm``. By default, use Diffusion Map coordinates. If diffmap is not calculated, use PCA coordinates instead.

    n_clusters: ``int``, optional, default: ``30``
        The number of first level clusters.

    n_clusters2: ``int``, optional, default: ``50``
        The number of second level clusters.

    n_init: ``int``, optional, default: ``10``
        Number of kmeans tries for the first level clustering. Default is set to be the same as scikit-learn Kmeans function.

    n_jobs : `int`, optional (default: -1)
        Number of threads to use for the KMeans step. -1 refers to using all physical CPU cores.

    random_state: ``int``, optional, default: ``0``
        Random seed for reproducing results.

    class_label: ``str``, optional, default: ``"spectral_louvain_labels"``
        Key name for storing cluster labels in ``data.obs``.

    Returns
    -------
    ``None``

    Update ``data.obs``:
        * ``data.obs[class_label]``: Cluster labels for cells as categorical data.

    Examples
    --------
    >>> pg.spectral_louvain(data)
    """
    try:
        import louvain as louvain_module
    except ImportError:
        import sys
        logger.error(
            "Need louvain! Try 'pip install louvain' or 'conda install -c conda-forge louvain'."
        )
        sys.exit(-1)

    if f"X_{rep_kmeans}" not in data.obsm.keys():
        logger.warning(
            f"{rep_kmeans} is not calculated, switch to pca instead.")
        rep_kmeans = "pca"
        if f"X_{rep_kmeans}" not in data.obsm.keys():
            raise ValueError(f"Please run {rep_kmeans} first!")
    if f"W_{rep}" not in data.obsp:
        raise ValueError(
            "Cannot find affinity matrix. Please run neighbors first!")

    labels = partition_cells_by_kmeans(
        data.obsm[f"X_{rep_kmeans}"],
        n_clusters,
        n_clusters2,
        n_init,
        n_jobs,
        random_state,
    )

    W = data.obsp[f"W_{rep}"]

    G = construct_graph(W)
    partition_type = louvain_module.RBConfigurationVertexPartition
    partition = partition_type(G,
                               resolution_parameter=resolution,
                               weights="weight",
                               initial_membership=labels)
    partition_agg = partition.aggregate_partition()

    optimiser = louvain_module.Optimiser()
    optimiser.set_rng_seed(random_state)
    diff = optimiser.optimise_partition(partition_agg)
    partition.from_coarse_partition(partition_agg)

    labels = np.array([str(x + 1) for x in partition.membership])
    categories = natsorted(np.unique(labels))
    data.obs[class_label] = pd.Categorical(values=labels,
                                           categories=categories)
    data.register_attr(class_label, "cluster")

    n_clusters = data.obs[class_label].cat.categories.size
    logger.info(
        f"Spectral Louvain clustering is done. Get {n_clusters} clusters.")
Ejemplo n.º 24
0
def run_louvain_multilayer(intralayer_graph,
                           interlayer_graph,
                           layer_vec,
                           weight='weight',
                           resolution=1.0,
                           omega=1.0,
                           nruns=1):
    logging.debug('Shuffling node ids')
    t = time()
    mu = np.sum(intralayer_graph.es[weight]) + interlayer_graph.ecount()

    use_RBCweighted = hasattr(louvain,
                              'RBConfigurationVertexPartitionWeightedLayers')

    outparts = []
    for run in range(nruns):
        rand_perm = list(np.random.permutation(interlayer_graph.vcount()))
        # rand_perm = list(range(interlayer_graph.vcount()))
        rperm = rev_perm(rand_perm)
        interslice_layer_rand = interlayer_graph.permute_vertices(rand_perm)
        rlayer_vec = permute_vector(rand_perm, layer_vec)

        rintralayer_graph = intralayer_graph.permute_vertices(rand_perm)
        #
        if use_RBCweighted:
            rlayers = [
                intralayer_graph
            ]  #  one layer representing all intralayer connections here
        else:
            rlayers = _create_multilayer_igraphs_from_super_adj_igraph(
                rintralayer_graph, layer_vec=rlayer_vec)

        logging.debug('time: {:.4f}'.format(time() - t))

        t = time()

        #create the partition objects
        layer_partition_objs = []

        logging.debug('creating partition objects')
        t = time()

        for i, layer in enumerate(
                rlayers):  #these are the shuffled igraph slice objects
            try:
                res = resolution[i]
            except:
                res = resolution

            if use_RBCweighted:

                cpart = louvain.RBConfigurationVertexPartitionWeightedLayers(
                    layer,
                    layer_vec=rlayer_vec,
                    weights=weight,
                    resolution_parameter=res)
            else:
                #This creates individual VertexPartition for each layer.  Much slower to optimize.
                cpart = louvain.RBConfigurationVertexPartition(
                    layer, weights=weight, resolution_parameter=res)

            layer_partition_objs.append(cpart)

        coupling_partition = louvain.RBConfigurationVertexPartition(
            interslice_layer_rand, weights=weight, resolution_parameter=0)

        all_layer_partobjs = layer_partition_objs + [coupling_partition]

        optimiser = louvain.Optimiser()
        logging.debug('time: {:.4f}'.format(time() - t))
        logging.debug('running optimiser')
        t = time()

        layer_weights = [1] * len(rlayers) + [omega]
        improvement = optimiser.optimise_partition_multiplex(
            all_layer_partobjs, layer_weights=layer_weights)

        #the membership for each of the partitions is tied together.
        finalpartition = permute_vector(rperm,
                                        all_layer_partobjs[0].membership)
        reversed_partobj = []
        #go back and reverse the graphs associated with each of the partobj.  this allows for properly calculating exp edges with partobj
        #This is not ideal.  Could we just reverse the permutation?
        for layer in layer_partition_objs:
            if use_RBCweighted:
                reversed_partobj.append(
                    louvain.RBConfigurationVertexPartitionWeightedLayers(
                        graph=layer.graph.permute_vertices(rperm),
                        initial_membership=finalpartition,
                        weights=weight,
                        layer_vec=layer_vec,
                        resolution_parameter=layer.resolution_parameter))
            else:
                reversed_partobj.append(
                    louvain.RBConfigurationVertexPartition(
                        graph=layer.graph.permute_vertices(rperm),
                        initial_membership=finalpartition,
                        weights=weight,
                        resolution_parameter=layer.resolution_parameter))
        coupling_partition_rev = louvain.RBConfigurationVertexPartition(
            graph=coupling_partition.graph.permute_vertices(rperm),
            initial_membership=finalpartition,
            weights=weight,
            resolution_parameter=0)
        #use only the intralayer part objs
        A = _get_sum_internal_edges_from_partobj_list(reversed_partobj,
                                                      weight=weight)
        if use_RBCweighted:  #should only one partobj here representing all layers
            P = get_expected_edges_ml(reversed_partobj[0],
                                      layer_vec=layer_vec,
                                      weight=weight)
        else:
            P = _get_sum_expected_edges_from_partobj_list(reversed_partobj,
                                                          weight=weight)
        C = get_sum_internal_edges(coupling_partition_rev, weight=weight)
        outparts.append({'partition': np.array(finalpartition),
             'resolution': resolution,
             'coupling':omega,
             'orig_mod': (.5/mu)*(_get_modularity_from_partobj_list(reversed_partobj)\
                   +omega*coupling_partition_rev.quality()),
             'int_edges': A,
             'exp_edges': P,
            'int_inter_edges':C})

    logging.debug('time: {:.4f}'.format(time() - t))
    return outparts
def iterative_multilayer_resolution_parameter_estimation(
        G_intralayer,
        G_interlayer,
        layer_vec,
        gamma=1.0,
        omega=1.0,
        gamma_tol=1e-2,
        omega_tol=5e-2,
        omega_max=1000,
        max_iter=25,
        model='temporal',
        verbose=False):
    """
    Multilayer variant of ALG. 1 from "Relating modularity maximization and stochastic block models in multilayer
    networks." The nested functions here are just used to match the pseudocode in the paper.

    :param G_intralayer: intralayer graph of interest
    :type G_intralayer: igraph.Graph
    :param G_interlayer: interlayer graph of interest
    :type G_interlayer: igraph.Graph
    :param layer_vec: list of each vertex's layer membership
    :type layer_vec: list[int]
    :param gamma: starting gamma value
    :type gamma: float
    :param omega: starting omega value
    :type omega: float
    :param gamma_tol: convergence tolerance for gamma
    :type gamma_tol: float
    :param omega_tol: convergence tolerance for omega
    :type omega_tol: float
    :param omega_max: maximum allowed value for omega
    :type omega_max: float
    :param max_iter: maximum number of iterations
    :type max_iter: int
    :param model: network layer topology (temporal, multilevel, multiplex)
    :type model: str
    :param verbose: whether or not to print verbose output
    :type verbose: bool
    :return:
        - gamma to which the iteration converged
        - omega to which the iteration converged
        - the resulting partition
    :rtype: tuple[float, float, tuple[int]]
    """

    if 'weight' not in G_intralayer.es:
        G_intralayer.es['weight'] = [1.0] * G_intralayer.ecount()

    if 'weight' not in G_interlayer.es:
        G_interlayer.es['weight'] = [1.0] * G_interlayer.ecount()

    T = max(layer_vec) + 1  # layer count
    optimiser = louvain.Optimiser()

    # compute total edge weights per layer
    m_t = [0] * T
    for e in G_intralayer.es:
        m_t[layer_vec[e.source]] += e['weight']

    # compute total node counts per layer
    N = G_intralayer.vcount() // T
    Nt = [0] * T
    for layer in layer_vec:
        Nt[layer] += 1

    check_multilayer_graph_consistency(G_intralayer, G_interlayer, layer_vec,
                                       model, m_t, T, N, Nt)
    update_omega = omega_function_from_model(model, omega_max, T=T)
    update_gamma = gamma_estimate_from_parameters

    def maximize_modularity(intralayer_resolution, interlayer_resolution):
        return multilayer_louvain(G_intralayer,
                                  G_interlayer,
                                  layer_vec,
                                  intralayer_resolution,
                                  interlayer_resolution,
                                  optimiser=optimiser,
                                  return_partition=True)

    def estimate_SBM_parameters(partition):
        return estimate_multilayer_SBM_parameters(G_intralayer,
                                                  G_interlayer,
                                                  layer_vec,
                                                  partition,
                                                  model,
                                                  N=N,
                                                  T=T,
                                                  Nt=Nt,
                                                  m_t=m_t)

    part, K, last_gamma, last_omega = (None, ) * 4
    for iteration in range(max_iter):
        part = maximize_modularity(gamma, omega)
        theta_in, theta_out, p, K = estimate_SBM_parameters(part)

        if not 0.0 <= p <= 1.0:
            raise ValueError(
                f"gamma={gamma:.3f}, omega={omega:.3f} resulted in impossible estimate p={p:.3f}"
            )

        last_gamma, last_omega = gamma, omega
        gamma = update_gamma(theta_in, theta_out)

        if gamma is None:
            raise ValueError(
                f"gamma={last_gamma:.3f}, omega={last_omega:.3f} resulted in degenerate partition"
            )

        omega = update_omega(theta_in, theta_out, p, K)

        if verbose:
            print(
                f"Iter {iteration:>2}: {K} communities with Q={part.q:.3f}, gamma={last_gamma:.3f}->{gamma:.3f}, "
                f"omega={last_omega:.3f}->{omega:.3f}, and p={p:.3f}")

        if abs(gamma - last_gamma) < gamma_tol and abs(omega -
                                                       last_omega) < omega_tol:
            break  # gamma and omega converged
    else:
        if verbose:
            print(
                f"Parameters failed to converge within {max_iter} iterations. "
                f"Final move of ({abs(gamma - last_gamma):.3f}, {abs(omega - last_omega):.3f}) "
                f"was not within tolerance ({gamma_tol}, {omega_tol})")

    if verbose:
        print(
            f"Returned {K} communities with Q={part.q:.3f}, gamma={gamma:.3f}, and omega={omega:.3f}"
        )

    return gamma, omega, part
Ejemplo n.º 26
0
def run_louvain(graph,
                config_model='RB',
                overlap=False,
                directed=False,
                interslice_weight=0.1,
                resolution_parameter=0.1):
    """
    :outdir: the output directory to comprehend the output link file
    :param graph: input file
    :param config_model: 'RB', 'RBER', 'CPM', 'Surprise', 'Significance'
    :param overlap: bool, whether to enable overlapping community detection
    :param directed
    :param interslice_weight
    :param resolution_parameter
    :return
    """
    def louvain_multiplex(graphs, partition_type, interslice_weight,
                          resolution_parameter):
        layers, interslice_layer, G_full = louvain.time_slices_to_layers(
            graphs, vertex_id_attr='name', interslice_weight=interslice_weight)
        partitions = [partition_type(H, resolution_parameter) for H in layers]
        interslice_partition = partition_type(interslice_layer,
                                              resolution_parameter,
                                              weights='weight')
        optimiser = louvain.Optimiser()
        optimiser.optimise_partition_multiplex(partitions +
                                               [interslice_partition])
        quality = sum(
            [p.quality() for p in partitions + [interslice_partition]])
        return partitions[0], quality

    multi = False
    if isinstance(graph, list):
        multi = True

    if overlap == True and multi == False:
        multi = True
        net = graph
        graph = []
        for i in range(4):
            graph.append(net)

    if config_model == 'RB':
        partition_type = louvain.RBConfigurationVertexPartition
    elif config_model == 'RBER':
        partition_type = louvain.RBERConfigurationVertexPartition
    elif config_model == 'CPM':
        partition_type = louvain.CPMVertexPartition
    elif config_model == 'Surprise':
        partition_type = louvain.SurpriseVertexPartition
    elif config_model == "Significance":
        partition_type = louvain.SignificanceVertexPartition
    else:
        sys.stderr.write(
            "Not specifying the configuration model; perform simple Louvain.")
        partition_type = louvain.ModularityVertexPartition

    weighted = False
    if multi:
        wL = []
        G = []
        for file in graph:
            with open(file, 'r') as f:
                lines = f.read().splitlines()
            elts = lines[0].split()
            if len(elts) == 3:
                weighted = True
            else:
                weighted = False
            for i in range(len(lines)):
                elts = lines[i].split()
                for j in range(2):
                    elts[j] = int(elts[j])
                if weighted == True:
                    elts[2] = float(elts[2])
                lines[i] = tuple(elts)
            g = igraph.Graph.TupleList(lines,
                                       directed=directed,
                                       weights=weighted)
            G.append(g)
            wL.append(weighted)
            f.close()
        if True in wL and False in wL:
            raise Exception('all graphs should follow the same format')
        if partition_type == louvain.CPMVertexPartition and directed == True:
            raise Exception('graph for CPMVertexPartition must be undirected')
        if partition_type == louvain.SignificanceVertexPartition and weighted == True:
            raise Exception(
                'SignificanceVertexPartition only support unweighted graphs')
        if partition_type == louvain.ModularityVertexPartition:
            partition, quality = louvain_multiplex(G, partition_type,
                                                   interslice_weight)
        else:
            partition, quality = louvain_multiplex(G, partition_type,
                                                   interslice_weight,
                                                   resolution_parameter)

    else:
        with open(graph, 'r') as f:
            lines = f.read().splitlines()
        Node2Index = {}
        elts = lines[0].split()
        if len(elts) == 3:
            weighted = True
        else:
            weighted = False
        index = 0
        for i in range(len(lines)):
            elts = lines[i].split()
            for j in range(2):
                elts[j] = int(elts[j])
                if elts[j] not in Node2Index:
                    Node2Index[elts[j]] = index
                    index += 1
            if weighted == True:
                elts[2] = float(elts[2])
            lines[i] = tuple(elts)
        Index2Node = {}
        for node in Node2Index:
            Index2Node[Node2Index[node]] = node
        f.close()
        G = igraph.Graph.TupleList(lines, directed=directed, weights=weighted)
        if weighted == False:
            weights = None
        else:
            weights = G.es['weight']
        partition = louvain.find_partition(
            G,
            partition_type,
            weights=weights,
            resolution_parameter=resolution_parameter)
        optimiser = louvain.Optimiser()
        optimiser.optimise_partition(partition)

    if len(partition) == 0:
        sys.stderr.write("No cluster; Resolution parameter may be too extreme")
        return 1

    maxNode = max(list(Node2Index.keys()))

    for i in range(len(partition)):
        sys.stdout.write(
            str(maxNode + len(partition) + 1) + ',' + str(maxNode + i + 1) +
            ',' + 'term-term' + ';')
        for n in partition[i]:
            sys.stdout.write(
                str(maxNode + i + 1) + ',' + str(Index2Node[n]) + ',' +
                'term-gene' + ';')
    sys.stdout.flush()
    return 0
Ejemplo n.º 27
0
for resolution in resolutions:
    memberships = []
    print('Detecting communities using resolution parameter {0}'.format(resolution))
    for itr in range(n_repl):
      print('\tRun {0:02d}'.format(itr))

      partition_intraslice = [louvain.RBConfigurationVertexPartition(H, weights='weight',
                                                resolution_parameter=resolution)
                              for H in G_intraslice]
      partition_interslice = louvain.CPMVertexPartition(G_interslice,
                                                        weights='weight',
                                                        node_sizes=G_interslice.vs['node_size'], 
                                                        resolution_parameter=0)
      
      ##%% Optimise partitions
      opt = louvain.Optimiser()
      opt.consider_comms = louvain.ALL_NEIGH_COMMS
      opt.optimise_partition_multiplex(partition_intraslice + [partition_interslice])

      # The membership in all partitions will be identical, so simply
      # consider the membership for the interslice partition and graph.
      memberships.append(partition_interslice.membership)
    
    ##%% Write results to file
    cluster_df = pd.DataFrame({attr: G_interslice.vs[attr] for attr in 
                               G_interslice.vertex_attributes()}, index=[v.index for v in G_interslice.vs])
    membership_df = pd.DataFrame.from_records(zip(*memberships), columns=['run_{0}'.format(itr) for itr in range(n_repl)]);
    cluster_df = pd.concat([cluster_df, membership_df], axis=1)
    cluster_df = cluster_df.sort_values(['statenme', 'year'])
    cluster_df.to_csv(output_dir + 'comms_{0}.csv'.format(resolution), index=False)
Ejemplo n.º 28
0
def run_louvain(graph,
                config_model='Default',
                overlap=False,
                directed=False,
                deep=False,
                interslice_weight=0.1,
                resolution_parameter=0.1,
                seed=None):
    """
    :outdir: the output directory to comprehend the output link file
    :param graph: input file
    :param config_model: 'RB', 'RBER', 'CPM', 'Surprise', 'Significance'
    :param overlap: bool, whether to enable overlapping community detection
    :param directed
    :param deep
    :param interslice_weight
    :param resolution_parameter
    :return
    """

    if seed != None:
        louvain.set_rng_seed(seed)

    def louvain_hierarchy_output(partition):
        optimiser = louvain.Optimiser()
        partition_agg = partition.aggregate_partition()
        partition_layers = []
        while optimiser.move_nodes(partition_agg) > 0:
            partition.from_coarse_partition(partition_agg)
            partition_agg = partition_agg.aggregate_partition()
            partition_layers.append(list(partition))
        return partition_layers

    def louvain_multiplex(graphs, partition_type, interslice_weight,
                          resolution_parameter):
        layers, interslice_layer, G_full = louvain.time_slices_to_layers(
            graphs, vertex_id_attr='name', interslice_weight=interslice_weight)
        if partition_type == louvain.ModularityVertexPartition:
            partitions = [partition_type(H) for H in layers]
            interslice_partition = partition_type(interslice_layer,
                                                  weights='weight')
        else:
            partitions = [
                partition_type(H, resolution_parameter=resolution_parameter)
                for H in layers
            ]
            interslice_partition = partition_type(
                interslice_layer,
                resolution_parameter=resolution_parameter,
                weights='weight')
        optimiser = louvain.Optimiser()
        optimiser.optimise_partition_multiplex(partitions +
                                               [interslice_partition])
        quality = sum(
            [p.quality() for p in partitions + [interslice_partition]])
        return partitions[0], quality

    def partition_to_clust(graphs, partition, min_size_cut=2):
        clusts = []
        node_names = []
        if not isinstance(graphs, list):
            graphs = [graphs]
        for g in graphs:
            node_names.extend(g.vs['name'])
        for i in range(len(partition)):
            clust = [node_names[id] for id in partition[i]]
            clust = list(set(clust))
            if len(clust) < min_size_cut:
                continue
            clust.sort()
            clusts.append(clust)
        clusts = sorted(clusts, key=lambda x: len(x), reverse=True)
        return clusts

    multi = False
    if isinstance(graph, list):
        multi = True

    if overlap == True and multi == False:
        multi = True
        net = graph
        graph = []
        for i in range(4):
            graph.append(net)

    if multi == True and deep == True:
        sys.stderr.write('louvain does not support hierarchical '
                         'clustering with overlapped communities\n')
        return 1

    if config_model == 'RB':
        partition_type = louvain.RBConfigurationVertexPartition
    elif config_model == 'RBER':
        partition_type = louvain.RBERConfigurationVertexPartition
    elif config_model == 'CPM':
        partition_type = louvain.CPMVertexPartition
    elif config_model == 'Surprise':
        partition_type = louvain.SurpriseVertexPartition
    elif config_model == "Significance":
        partition_type = louvain.SignificanceVertexPartition
    else:
        sys.stderr.write("Configuration model not set "
                         "performing simple Louvain.\n")
        partition_type = louvain.ModularityVertexPartition

    weighted = False
    if multi:
        wL = []
        G = []
        for file in graph:
            with open(file, 'r') as f:
                lines = f.read().splitlines()
            elts = lines[0].split()
            if len(elts) == 3:
                weighted = True
            else:
                weighted = False
            for i in range(len(lines)):
                elts = lines[i].split()
                for j in range(2):
                    elts[j] = int(elts[j])
                if weighted == True:
                    elts[2] = float(elts[2])
                    if elts[2] < 0:
                        sys.stderr.write('encountered a negative edge weight '
                                         'on row ' + str(i) + ' (' +
                                         str(lines[i]) +
                                         ') which is not allowed\n')
                        return 2
                lines[i] = tuple(elts)
            g = igraph.Graph.TupleList(lines,
                                       directed=directed,
                                       weights=weighted)
            G.append(g)
            wL.append(weighted)
            f.close()
        if True in wL and False in wL:
            raise Exception('all graphs should follow the same format')
        if partition_type == louvain.CPMVertexPartition and directed is True:
            raise Exception('graph for CPMVertexPartition must be undirected')
        if partition_type == louvain.SignificanceVertexPartition and weighted is True:
            raise Exception('SignificanceVertexPartition only support '
                            'unweighted graphs')
        partition, quality = louvain_multiplex(G, partition_type,
                                               interslice_weight,
                                               resolution_parameter)

    else:
        if not os.path.isfile(graph):
            sys.stderr.write(str(graph) + ' is not a file\n')
            return 3
        if os.path.getsize(graph) == 0:
            sys.stderr.write(str(graph) + ' is an empty file\n')
            return 4
        with open(graph, 'r') as f:
            lines = f.read().splitlines()
        elts = lines[0].split()
        if len(elts) == 3:
            weighted = True
        else:
            weighted = False

        for i in range(len(lines)):
            elts = lines[i].split()
            for j in range(2):
                elts[j] = int(elts[j])
            if weighted is True:
                elts[2] = float(elts[2])
                if elts[2] < 0:
                    sys.stderr.write('encountered a negative edge weight '
                                     'on row ' + str(i) + ' (' +
                                     str(lines[i]) +
                                     ') which is not allowed\n')
                    return 3
            lines[i] = tuple(elts)
        f.close()

        G = igraph.Graph.TupleList(lines, directed=directed, weights=weighted)
        if weighted is False:
            weights = None
        else:
            weights = G.es['weight']
        if partition_type == louvain.ModularityVertexPartition:
            partition = partition_type(G, weights=weights)
        else:
            partition = partition_type(
                G, weights=weights, resolution_parameter=resolution_parameter)
        if deep == False:
            optimiser = louvain.Optimiser()
            optimiser.optimise_partition(partition)

    lines = []
    if deep == False:
        clusts = partition_to_clust(G, partition)
        if len(clusts) == 0:
            sys.stderr.write(DEFAULT_ERR_MSG)
            return 4

        maxNode = 0
        for clust in clusts:
            maxNode = max(maxNode, max(clust))

        for i in range(len(clusts)):
            lines.append(
                str(maxNode + len(partition) + 1) + '\t' +
                str(maxNode + i + 1))
            for n in clusts[i]:
                lines.append(str(maxNode + i + 1) + '\t' + str(n))
    else:
        partitions = louvain_hierarchy_output(partition)
        clusts_layers = []
        for p in partitions:
            clusts_layers.append(partition_to_clust(G, p))
        if len(clusts_layers) == 0:
            sys.stderr.write(DEFAULT_ERR_MSG)
            return 5
        if len(clusts_layers[0]) == 0:
            sys.stderr.write(DEFAULT_ERR_MSG)
            return 6
        maxNode = 0
        for clust in clusts_layers[0]:
            maxNode = max(maxNode, max(clust))
        for i in range(len(clusts_layers[0])):
            for n in clusts_layers[0][i]:
                lines.append(str(maxNode + i + 1) + '\t' + str(n))
        maxNode = maxNode + len(clusts_layers[0])
        for i in range(1, len(clusts_layers)):
            for j in range(len(clusts_layers[i - 1])):
                for k in range(len(clusts_layers[i])):
                    if all(x in clusts_layers[i][k]
                           for x in clusts_layers[i - 1][j]):
                        lines.append(
                            str(maxNode + k + 1) + '\t' +
                            str(maxNode - len(clusts_layers[i - 1]) + j + 1))
                        break
            maxNode = maxNode + len(clusts_layers[i])
        for i in range(len(clusts_layers[-1])):
            lines.append(
                str(maxNode + 1) + '\t' +
                str(maxNode - len(clusts_layers[-1]) + i + 1))

    # trim the hierarchy to remove contigs
    up_tree = {}
    down_tree = {}
    for line in lines:
        elts = line.split()
        down_tree.setdefault(elts[0], [])
        down_tree[elts[0]].append(elts[1])
        up_tree.setdefault(elts[1], [])
        up_tree[elts[1]].append(elts[0])

    # store root and leaves
    set1 = set(down_tree.keys())
    set2 = set(up_tree.keys())
    root_l = list(set1.difference(set2))
    leaf_l = list(set2.difference(set1))
    node_l = list(set1.union(set2))

    # find all contigs in the DAG
    Contigs = []
    work_list = root_l
    visited = {}
    for node in node_l:
        visited[node] = 0
    work_path = []
    new_path = False
    while work_list:
        key = work_list.pop(0)
        if new_path == False:
            work_path.append(key)
        else:
            work_path.append(up_tree[key][visited[key]])
            work_path.append(key)
        if key in leaf_l:
            new_path = True
            Contigs.append(work_path)
            work_path = []
        elif len(down_tree[key]) > 1 or visited[key] > 0:
            new_path = True
            Contigs.append(work_path)
            work_path = []
        if visited[key] == 0 and key not in leaf_l:
            work_list = down_tree[key] + work_list
        visited[key] += 1

    # write trimmed DAG
    for path in Contigs[1:]:
        sys.stdout.write(path[0] + ',' + path[-1] + ',')
        if path[-1] in leaf_l:
            sys.stdout.write('c-m' + ';')
        else:
            sys.stdout.write('c-c' + ';')

    sys.stdout.flush()
    return 0