Ejemplo n.º 1
0
def identify_clusters(vlm,
                      conn,
                      correct_tags=False,
                      tag_correction_list=[],
                      method_name='ModularityVertexPartition',
                      seed=360):
    """
    Cluster identification via the Louvain algorithm. Can be used for cluster discovery. If clusters are manually identified
    (e.g. by visualize_protein_markers()), clusters can be renumbered or combined using the tag correction list.
    Method names are any used in louvain.find_partition method.
    """

    g = ig.Graph.Adjacency(conn.todense().tolist())
    method = getattr(louvain, method_name)
    louvain.set_rng_seed(seed)
    partition = louvain.find_partition(g, method)
    tag_list = np.zeros(conn.shape[0])
    for x in range(len(partition)):
        tag_list[partition[x]] = int(x)
    if correct_tags:
        cluster_ID = [tag_correction_list[int(X)] for X in tag_list]
    else:
        cluster_ID = [int(X) for X in tag_list]

    num_clusters = max(cluster_ID) + 1

    vlm.cluster_ID = cluster_ID
    vlm.num_clusters = int(num_clusters)
    return [cluster_ID, num_clusters]
def test_diff_move():
    intraslice = ig.Graph.Read_Ncol("multilayer_SBM_interslice_edges.csv",
                                    directed=False)
    n = intraslice.vcount()
    layer_vec = [0] * n
    membership = list(range(n))

    part_rbc = louvain.RBConfigurationVertexPartition(
        intraslice, resolution_parameter=1.0, initial_membership=membership)
    part_weighted_layers = louvain.RBConfigurationVertexPartitionWeightedLayers(
        intraslice,
        resolution_parameter=1.0,
        layer_vec=layer_vec,
        initial_membership=membership)

    # check diff_move() - quality() consistency across 100 random moves
    for repeat in range(100):
        v = randint(0, n - 1)
        c = randint(0, n - 1)
        old_quality = part_weighted_layers.quality()
        wl_diff = part_weighted_layers.diff_move(v, c)
        part_weighted_layers.move_node(v, c)
        true_diff = part_weighted_layers.quality() - old_quality

        rbc_diff = part_rbc.diff_move(v, c)
        part_rbc.move_node(v, c)

        assert isclose(
            wl_diff, true_diff
        ), "WeightedLayers diff_move() inconsistent with quality()"
        assert isclose(
            wl_diff, rbc_diff
        ), "WeightedLayers diff_move() inconsistent with single-layer"
        assert isclose(part_weighted_layers.quality(), part_rbc.quality(
        )), "WeightedLayers quality() inconsistent with single-layer"

    # check rng consistency between RBConfigurationVertexPartition and its WeightedLayers variant
    # with various seeds and intraslice resolution parameters
    for gamma in np.linspace(0.5, 1.5, 10):
        shared_seed = randint(-1 << 31, (1 << 31) - 1)  # random int32

        louvain.set_rng_seed(shared_seed)
        part_weighted_layers = louvain.RBConfigurationVertexPartitionWeightedLayers(
            intraslice, resolution_parameter=gamma, layer_vec=layer_vec)
        opt = louvain.Optimiser()
        opt.optimise_partition(partition=part_weighted_layers)

        louvain.set_rng_seed(shared_seed)
        part_rbc = louvain.RBConfigurationVertexPartition(
            intraslice, resolution_parameter=gamma)
        opt = louvain.Optimiser()
        opt.optimise_partition(partition=part_rbc)

        quality_weighted_layers = part_weighted_layers.quality(
            resolution_parameter=gamma)
        quality_rbc = part_rbc.quality(resolution_parameter=gamma)
        assert isclose(
            quality_weighted_layers, quality_rbc
        ), "Intra-layer optimisation inconsistent with single-layer"
Ejemplo n.º 3
0
def louvain(i, j, val, dim, partition_method, initial_membership, weights,
            resolution, node_sizes, seed, verbose):
    import louvain
    import igraph as ig
    import numpy
    from scipy.sparse import csc_matrix
    data = csc_matrix((val, (i, j)), shape=dim)
    # vcount = max(data.shape)
    sources, targets = data.nonzero()
    edgelist = zip(sources.tolist(), targets.tolist())
    G = ig.Graph(edges=list(edgelist))

    # G = ig.Graph.Adjacency(data.tolist())

    if partition_method == 'ModularityVertexPartition':
        partition = louvain.CPMVertexPartition(
            G, initial_membership=initial_membership, weights=weights)
    elif partition_method == 'RBConfigurationVertexPartition':
        partition = louvain.CPMVertexPartition(
            G,
            initial_membership=initial_membership,
            weights=weights,
            resolution_parameter=resolution)
    elif partition_method == 'RBERVertexPartition':
        partition = louvain.CPMVertexPartition(
            G,
            initial_membership=initial_membership,
            weights=weights,
            node_sizes=node_sizes,
            resolution_parameter=resolution)
    elif partition_method == 'CPMVertexPartition':
        partition = louvain.CPMVertexPartition(
            G,
            initial_membership=initial_membership,
            weights=weights,
            node_sizes=node_sizes,
            resolution_parameter=resolution)
    elif partition_method == 'SignificanceVertexPartition':
        partition = louvain.CPMVertexPartition(
            G, initial_membership=initial_membership, node_sizes=node_sizes)
    elif partition_method == 'SurpriseVertexPartition':
        partition = louvain.CPMVertexPartition(
            G,
            initial_membership=initial_membership,
            weights=weights,
            node_sizes=node_sizes)
    else:
        raise ValueError('partition_method ' + partition_method +
                         ' is NOT supported.')

    if seed != None:
        louvain.set_rng_seed(seed)

    optimiser = louvain.Optimiser()
    diff = optimiser.optimise_partition(partition)

    # ig.plot(partition)
    return partition
def louvain_method(user_interaction_graph):
    '''
    https://github.com/vtraag/louvain-igraph
    Fast unfolding of communities in large networks, Vincent D Blondel, Jean-Loup Guillaume, Renaud Lambiotte, Renaud Lefebvre, Journal of Statistical Mechanics: Theory and Experiment 2008(10), P10008 (12pp)
    :param user_interaction_graph: igraph Graph
    '''
    louvain.set_rng_seed(43)
    node_names = user_interaction_graph.vs
    return[[node_names[node]['name'] for node in community] for community in louvain.find_partition(user_interaction_graph, louvain.ModularityVertexPartition)]
Ejemplo n.º 5
0
def get_louvain(mknn, min_cluster_size=10, resolution_parameter=1.0, seed=0):
    g = ig.Graph(n=mknn.shape[0], edges=list(zip(mknn.row, mknn.col)), directed=False)

    # Louvain clustering over the mKNN graph
    louvain.set_rng_seed(seed)
    part = louvain.find_partition(g,
                louvain.RBConfigurationVertexPartition,
                resolution_parameter=resolution_parameter)
    
    return CellLabels(clean_labels(part.membership, min_cluster_size=min_cluster_size)) 
    def optimal_modularity_community_detection(self,visual=True,name='optimal_modularity'):
        """
        Community detection Function using Louvain algorithm and maximization of modularity.
        Inputs:
            - visual: (Default = True) Visualize the communities computed
            - name: name of the .png exported file
        """
        louvain.set_rng_seed(123456)
        partition = louvain.find_partition(self.G, louvain.ModularityVertexPartition,weights=self.G.es['weight'])
        self.G.vs['community_optimal_modularity'] = partition.membership
        
        print("The estimated number of communities is",len(set(partition.membership)))
        print('\n')
        print("Communities")
        for n in range(0,len(partition)):
            print('Community number', n, '- size:', len(partition[n]))

        #Create a dictionary whith keys as channels (names of our nodes) and values the community they belong
        comm_detect = dict(zip(self.G.vs['label'],self.G.vs['community_optimal_modularity']))
        print()
        print('The communities are:')
        print()
        comms = {}

        for item in comm_detect.items():
            if item[1] not in comms.keys():
                comms[item[1]] = []

            comms[item[1]].append(item[0])
            
        comms = OrderedDict(sorted(comms.items(), key=lambda t:t[0]))

        print(comms.items())
        
        if visual:
            visual_style = {}
            visual_style["vertex_size"] = 25
            #visual_style["vertex_color"] = "white"
            visual_style["vertex_label"] = self.G.vs["label"]
            #visual_style["edge_width"] = [math.exp(weight)*0.5 for weight in self.G.es["weight"]]
            visual_style["edge_width"] = 0.2
            visual_style["layout"] = self.G.vs["coords"]
            pal = igraph.drawing.colors.ClusterColoringPalette(len(set(self.G.vs['community_optimal_modularity'])))
            visual_style["vertex_color"] = pal.get_many(self.G.vs['community_optimal_modularity'])
            self.G.es['arrow_size'] = [0.1 for edge in self.G.es]



            graph = igraph.plot(self.G,bbox=(0, 0, 600, 600), **visual_style)
            graph.save(name + '.png')
            
            return(comms,graph)
        
        return(comms)
Ejemplo n.º 7
0
def louvain_clusters(latent, k=10, rands=0, mutual=False):
    nn_matrix = kneighbors_graph(latent, k)
    rows, cols = nn_matrix.nonzero()
    if mutual == True:
        edges = [[row, col] if row < col else (col, row)
                 for row, col in zip(rows, cols)]
        edges = np.asarray(edges)
        unique_edges, edges_count = np.unique(edges,
                                              return_counts=True,
                                              axis=0)
        edges = unique_edges[edges_count == 2]
    else:
        edges = [(row, col) for row, col in zip(rows, cols)]
    g = ig.Graph()
    g.add_vertices(latent.shape[0])
    g.add_edges(edges)
    louvain.set_rng_seed(rands)
    res = louvain.find_partition(g, louvain.ModularityVertexPartition)
    clusters = np.asarray(res.membership)
    return clusters
Ejemplo n.º 8
0
def louvain(
    adata: AnnData,
    resolution: Optional[float] = None,
    random_state: _utils.AnyRandom = 0,
    restrict_to: Optional[Tuple[str, Sequence[str]]] = None,
    key_added: str = 'louvain',
    adjacency: Optional[spmatrix] = None,
    flavor: Literal['vtraag', 'igraph', 'rapids'] = 'vtraag',
    directed: bool = True,
    use_weights: bool = False,
    partition_type: Optional[Type[MutableVertexPartition]] = None,
    partition_kwargs: Mapping[str, Any] = MappingProxyType({}),
    neighbors_key: Optional[str] = None,
    obsp: Optional[str] = None,
    copy: bool = False,
) -> Optional[AnnData]:
    """\
    Cluster cells into subgroups [Blondel08]_ [Levine15]_ [Traag17]_.

    Cluster cells using the Louvain algorithm [Blondel08]_ in the implementation
    of [Traag17]_. The Louvain algorithm has been proposed for single-cell
    analysis by [Levine15]_.

    This requires having ran :func:`~scanpy.pp.neighbors` or
    :func:`~scanpy.external.pp.bbknn` first,
    or explicitly passing a ``adjacency`` matrix.

    Parameters
    ----------
    adata
        The annotated data matrix.
    resolution
        For the default flavor (``'vtraag'``) or for ```RAPIDS```, you can provide a
        resolution (higher resolution means finding more and smaller clusters),
        which defaults to 1.0.
        See “Time as a resolution parameter” in [Lambiotte09]_.
    random_state
        Change the initialization of the optimization.
    restrict_to
        Restrict the clustering to the categories within the key for sample
        annotation, tuple needs to contain ``(obs_key, list_of_categories)``.
    key_added
        Key under which to add the cluster labels. (default: ``'louvain'``)
    adjacency
        Sparse adjacency matrix of the graph, defaults to neighbors connectivities.
    flavor
        Choose between to packages for computing the clustering.
        ``'vtraag'`` is much more powerful, and the default.
    directed
        Interpret the ``adjacency`` matrix as directed graph?
    use_weights
        Use weights from knn graph.
    partition_type
        Type of partition to use.
        Only a valid argument if ``flavor`` is ``'vtraag'``.
    partition_kwargs
        Key word arguments to pass to partitioning,
        if ``vtraag`` method is being used.
    neighbors_key
        Use neighbors connectivities as adjacency.
        If not specified, louvain looks .obsp['connectivities'] for connectivities
        (default storage place for pp.neighbors).
        If specified, louvain looks
        .obsp[.uns[neighbors_key]['connectivities_key']] for connectivities.
    obsp
        Use .obsp[obsp] as adjacency. You can't specify both
        `obsp` and `neighbors_key` at the same time.
    copy
        Copy adata or modify it inplace.

    Returns
    -------
    :obj:`None`
        By default (``copy=False``), updates ``adata`` with the following fields:

        ``adata.obs['louvain']`` (:class:`pandas.Series`, dtype ``category``)
            Array of dim (number of samples) that stores the subgroup id
            (``'0'``, ``'1'``, ...) for each cell.

    :class:`~anndata.AnnData`
        When ``copy=True`` is set, a copy of ``adata`` with those fields is returned.
    """
    partition_kwargs = dict(partition_kwargs)
    start = logg.info('running Louvain clustering')
    if (flavor != 'vtraag') and (partition_type is not None):
        raise ValueError('`partition_type` is only a valid argument '
                         'when `flavour` is "vtraag"')
    adata = adata.copy() if copy else adata
    if adjacency is None:
        adjacency = _choose_graph(adata, obsp, neighbors_key)
    if restrict_to is not None:
        restrict_key, restrict_categories = restrict_to
        adjacency, restrict_indices = restrict_adjacency(
            adata,
            restrict_key,
            restrict_categories,
            adjacency,
        )
    if flavor in {'vtraag', 'igraph'}:
        if flavor == 'igraph' and resolution is not None:
            logg.warning(
                '`resolution` parameter has no effect for flavor "igraph"')
        if directed and flavor == 'igraph':
            directed = False
        if not directed:
            logg.debug('    using the undirected graph')
        g = _utils.get_igraph_from_adjacency(adjacency, directed=directed)
        if use_weights:
            weights = np.array(g.es["weight"]).astype(np.float64)
        else:
            weights = None
        if flavor == 'vtraag':
            import louvain

            if partition_type is None:
                partition_type = louvain.RBConfigurationVertexPartition
            if resolution is not None:
                partition_kwargs["resolution_parameter"] = resolution
            if use_weights:
                partition_kwargs["weights"] = weights
            if version.parse(louvain.__version__) < version.parse("0.7.0"):
                louvain.set_rng_seed(random_state)
            else:
                partition_kwargs["seed"] = random_state
            logg.info('    using the "louvain" package of Traag (2017)')
            part = louvain.find_partition(
                g,
                partition_type,
                **partition_kwargs,
            )
            # adata.uns['louvain_quality'] = part.quality()
        else:
            part = g.community_multilevel(weights=weights)
        groups = np.array(part.membership)
    elif flavor == 'rapids':
        # nvLouvain only works with undirected graphs,
        # and `adjacency` must have a directed edge in both directions
        import cudf
        import cugraph

        offsets = cudf.Series(adjacency.indptr)
        indices = cudf.Series(adjacency.indices)
        if use_weights:
            sources, targets = adjacency.nonzero()
            weights = adjacency[sources, targets]
            if isinstance(weights, np.matrix):
                weights = weights.A1
            weights = cudf.Series(weights)
        else:
            weights = None
        g = cugraph.Graph()

        if hasattr(g, 'add_adj_list'):
            g.add_adj_list(offsets, indices, weights)
        else:
            g.from_cudf_adjlist(offsets, indices, weights)

        logg.info('    using the "louvain" package of rapids')
        if resolution is not None:
            louvain_parts, _ = cugraph.louvain(g, resolution=resolution)
        else:
            louvain_parts, _ = cugraph.louvain(g)
        groups = (louvain_parts.to_pandas().sort_values('vertex')[[
            'partition'
        ]].to_numpy().ravel())
    elif flavor == 'taynaud':
        # this is deprecated
        import networkx as nx
        import community

        g = nx.Graph(adjacency)
        partition = community.best_partition(g)
        groups = np.zeros(len(partition), dtype=int)
        for k, v in partition.items():
            groups[k] = v
    else:
        raise ValueError(
            '`flavor` needs to be "vtraag" or "igraph" or "taynaud".')
    if restrict_to is not None:
        if key_added == 'louvain':
            key_added += '_R'
        groups = rename_groups(
            adata,
            key_added,
            restrict_key,
            restrict_categories,
            restrict_indices,
            groups,
        )
    adata.obs[key_added] = pd.Categorical(
        values=groups.astype('U'),
        categories=natsorted(map(str, np.unique(groups))),
    )
    adata.uns['louvain'] = {}
    adata.uns['louvain']['params'] = dict(
        resolution=resolution,
        random_state=random_state,
    )
    logg.info(
        '    finished',
        time=start,
        deep=(
            f'found {len(np.unique(groups))} clusters and added\n'
            f'    {key_added!r}, the cluster labels (adata.obs, categorical)'),
    )
    return adata if copy else None
Ejemplo n.º 9
0
def louvain(adata,
            resolution=None,
            random_state=0,
            restrict_to=None,
            key_added=None,
            adjacency=None,
            flavor='vtraag',
            directed=True,
            copy=False):
    """Cluster cells into subgroups [Blondel08]_ [Levine15]_ [Traag17]_.

    Cluster cells using the Louvain algorithm [Blondel08]_ in the implementation
    of [Traag17]_. The Louvain algorithm has been proposed for single-cell
    analysis by [Levine15]_.

    This requires to run :func:`~scanpy.api.pp.neighbors`, first.

    Parameters
    ----------
    adata : :class:`~scanpy.api.AnnData`
        The annotated data matrix.
    resolution : `float` or `None`, optional (default: 1)
        For the default flavor ('vtraag'), you can provide a resolution (higher
        resolution means finding more and smaller clusters), which defaults to
        1.0. See “Time as a resolution parameter” in [Lambiotte09]_.
    random_state : `int`, optional (default: 0)
        Change the initialization of the optimization.
    restrict_to : `tuple`, optional (default: None)
        Restrict the clustering to the categories within the key for sample
        annotation, tuple needs to contain (obs key, list of categories).
    key_added : `str`, optional (default: 'louvain')
        Key under which to add the cluster labels.
    adjacency : sparse matrix or `None`, optional (default: `None`)
        Sparse adjacency matrix of the graph, defaults to
        `adata.uns['neighbors']['connectivities']`.
    flavor : {'vtraag', 'igraph'}
        Choose between to packages for computing the clustering. 'vtraag' is
        much more powerful.
    copy : `bool` (default: `False`)
        Copy adata or modify it inplace.

    Returns
    -------
    Depending on `copy`, returns or updates `adata` with the following fields.

    louvain : `pd.Series` (``adata.obs``, dtype `category`)
        Array of dim (number of samples) that stores the subgroup id ('0',
        '1', ...) for each cell.
    """
    logg.info('running Louvain clustering', r=True)
    adata = adata.copy() if copy else adata
    if adjacency is None and 'neighbors' not in adata.uns:
        raise ValueError(
            'You need to run `pp.neighbors` first to compute a neighborhood graph.'
        )
    if adjacency is None:
        adjacency = adata.uns['neighbors']['connectivities']
    if restrict_to is not None:
        restrict_key, restrict_categories = restrict_to
        if not isinstance(restrict_categories[0], str):
            raise ValueError('You need to use strings to label categories, '
                             'e.g. \'1\' instead of 1.')
        for c in restrict_categories:
            if c not in adata.obs[restrict_key].cat.categories:
                raise ValueError(
                    '\'{}\' is not a valid category for \'{}\''.format(
                        c, restrict_key))
        restrict_indices = adata.obs[restrict_key].isin(
            restrict_categories).values
        adjacency = adjacency[restrict_indices, :]
        adjacency = adjacency[:, restrict_indices]
    if flavor in {'vtraag', 'igraph'}:
        if flavor == 'igraph' and resolution is not None:
            logg.warn(
                '`resolution` parameter has no effect for flavor "igraph"')
        if directed and flavor == 'igraph':
            directed = False
        if not directed: logg.m('    using the undirected graph', v=4)
        g = utils.get_igraph_from_adjacency(adjacency, directed=directed)
        if flavor == 'vtraag':
            import louvain
            if resolution is None: resolution = 1
            try:
                logg.info('    using the "louvain" package of Traag (2017)')
                louvain.set_rng_seed(random_state)
                part = louvain.find_partition(
                    g,
                    louvain.RBConfigurationVertexPartition,
                    resolution_parameter=resolution)
                # adata.uns['louvain_quality'] = part.quality()
            except AttributeError:
                logg.warn('Did not find package louvain>=0.6, '
                          'the clustering result will therefore not '
                          'be 100% reproducible, '
                          'but still meaningful. '
                          'If you want 100% reproducible results, '
                          'update via "pip install louvain --upgrade".')
                part = louvain.find_partition(g,
                                              method='RBConfiguration',
                                              resolution_parameter=resolution)
        elif flavor == 'igraph':
            part = g.community_multilevel()
        groups = np.array(part.membership)
    elif flavor == 'taynaud':
        # this is deprecated
        import networkx as nx
        import community
        g = nx.Graph(adjacency)
        partition = community.best_partition(g)
        groups = np.zeros(len(partition), dtype=int)
        for k, v in partition.items():
            groups[k] = v
    else:
        raise ValueError(
            '`flavor` needs to be "vtraag" or "igraph" or "taynaud".')
    unique_groups = np.unique(groups)
    n_clusters = len(unique_groups)
    if restrict_to is None:
        groups = groups.astype('U')
        key_added = 'louvain' if key_added is None else key_added
        adata.obs[key_added] = pd.Categorical(values=groups,
                                              categories=natsorted(
                                                  unique_groups.astype('U')))
    else:
        key_added = restrict_key + '_R' if key_added is None else key_added
        all_groups = adata.obs[restrict_key].astype('U')
        prefix = '-'.join(restrict_categories) + ','
        new_groups = [prefix + g for g in groups.astype('U')]
        all_groups.iloc[restrict_indices] = new_groups
        adata.obs[key_added] = pd.Categorical(values=all_groups,
                                              categories=natsorted(
                                                  all_groups.unique()))
    adata.uns['louvain'] = {}
    adata.uns['louvain']['params'] = {
        'resolution': resolution,
        'random_state': random_state
    }
    logg.info('    finished',
              time=True,
              end=' ' if settings.verbosity > 2 else '\n')
    logg.hint('found {} clusters and added\n'
              '    \'{}\', the cluster labels (adata.obs, categorical)'.format(
                  n_clusters, key_added))
    return adata if copy else None
Ejemplo n.º 10
0
def getNclusters2(
    adata,
    G,
    n_clusters,
    seed,
    cluster_func,
    flavor,
    weights,
    range_min=0,
    range_max=3,
    max_steps=20,
):
    this_step = 0
    this_min = float(range_min)
    this_max = float(range_max)

    weighted = weights is None

    while this_step < max_steps:
        #         print('step ' + str(this_step))
        this_resolution = this_min + ((this_max - this_min) / 2)

        if cluster_func == "louvain":

            if flavor == "scanpy":
                sc.tl.louvain(
                    adata,
                    resolution=this_resolution,
                    random_state=seed,
                    use_weights=weighted,
                )
                clus = np.array(adata.obs["louvain"]).astype(int)
                this_clusters = adata.obs["louvain"].nunique()

            elif flavor == "base":
                louvain.set_rng_seed(seed)
                part_louvain = louvain.find_partition(
                    graph=G,
                    partition_type=louvain.RBConfigurationVertexPartition,
                    weights=weights,
                    resolution_parameter=this_resolution,
                )
                clus = np.array(part_louvain.membership)
                this_clusters = len(np.unique(clus))

        elif cluster_func == "leiden":

            if flavor == "scanpy":
                sc.tl.leiden(
                    adata,
                    resolution=this_resolution,
                    random_state=seed,
                    use_weights=weighted,
                )
                clus = np.array(adata.obs["leiden"]).astype(int)
                this_clusters = adata.obs["leiden"].nunique()

            elif flavor == "base":
                part_leiden = leidenalg.find_partition(
                    graph=G,
                    partition_type=leidenalg.RBConfigurationVertexPartition,
                    weights=weights,
                    resolution_parameter=this_resolution,
                    seed=seed,
                )
                clus = np.array(part_leiden.membership)
                this_clusters = len(np.unique(clus))

        else:
            raise ValueError(
                "incorrect cluster_func, choose 'leiden' or 'louvain'")

        #         print('got ' + str(this_clusters) + ' at resolution ' + str(this_resolution))

        if this_clusters > n_clusters:
            this_max = this_resolution
        elif this_clusters < n_clusters:
            this_min = this_resolution
        else:
            return clus, dict(resolution=this_resolution, succeeded=True)
        this_step += 1

    print("Cannot find the number of clusters")
    print("Clustering solution from last iteration is used:" +
          str(this_clusters) + " at resolution " + str(this_resolution))

    return clus, dict(resolution=this_resolution, succeeded=False)
Ejemplo n.º 11
0
def clustering_analysis(
    adata,
    true_labels,
    do_norm,
    norm_scale,
    do_log,
    do_pca,
    n_neighbors,
    n_clusters,
    metric,
    weighted,  # weighted adjmat for louvain/leiden clustering ?
    seed,
    n_comps=50,
    hubness_methods={
        "mp_normal": ("mp", {
            "method": "normal"
        }),
        "ls": ("ls", None),
        "ls_nicdm": ("ls", {
            "method": "nicdm"
        }),
        "dsl": ("dsl", None),
    },
    retained_cells_idx=None,
):

    results_dict = {}
    results_dict["params"] = dict(
        do_norm=do_norm,
        norm_scale=norm_scale,
        do_log=do_log,
        do_pca=do_pca,
        n_neighbors=n_neighbors,
        n_clusters=n_clusters,
        metric=metric,
        weighted=weighted,
        seed=seed,
        n_comps=n_comps,
    )

    start = time.time()

    ### preprocess, prepare clustering input ###
    if retained_cells_idx is None:
        retained_cells_idx = range(len(adata.X))

    if type(do_norm) is str:
        adata.X = scipy.sparse.csr_matrix(adata.X)

        if do_norm == "seurat":
            recipe_seurat(adata, do_log, norm_scale)
            print(f"\t\tseurat norm retained {adata.X.shape[1]} genes")
        elif do_norm == "zheng17":
            recipe_zheng17(adata, do_log, norm_scale, n_top_genes=5000)
            print(f"\t\tzheng norm retained {adata.X.shape[1]} genes")
        elif do_norm == "duo":
            recipe_duo(adata, do_log, renorm=norm_scale)
            print(f"\t\tduo norm retained {adata.X.shape[1]} genes")
        else:
            raise ValueError("do_norm not in 'duo', seurat', 'zheng17'")

    if scipy.sparse.issparse(adata.X):
        adata.X = adata.X.toarray()

    if do_log and not (type(do_norm) is str):
        print("\t\tlog_transformed data")
        sc.pp.log1p(adata)

    if do_pca:
        use_rep = "X_pca"
        sc.pp.pca(adata,
                  n_comps=min(adata.X.shape[1] - 1,
                              min(len(adata.X) - 1, n_comps)))
        X = adata.obsm["X_pca"]
        res_key = results_dict["X_pca"] = {}
    else:
        # already computed pca
        use_rep = "X_pca"
        X = adata.obsm["X_pca"]
        res_key = results_dict["X_pca"] = {}

    print("\t\t\tPreprocessing done:", round((time.time() - start) / 60, 2),
          "mn")
    start = time.time()

    adjmat, affinity_matrix, G, weights, scores = generate_clustering_inputs2(
        X,
        metric=metric,
        n_neighbors=n_neighbors,
        weighted=weighted,
        seed=seed,
        hubness=None,
        hubness_params=None,
    )

    print("\t\t\tInputs generated:", round((time.time() - start) / 60, 2),
          "mn")
    start = time.time()

    res_key["hubness_df"] = pd.DataFrame.from_dict(data=scores,
                                                   orient="index",
                                                   columns=["base"])
    # import pdb;pdb.set_trace()

    ### base and scanpy clustering ###
    # main dictionaries
    res_key["clus"] = {}
    res_key["clus_info"] = {}
    res_key["clus_scores"] = {}

    # sub dictionaries
    clus_methods_keys = [
        "scanpy_default_umap",
        "scanpy_default_gauss",
        "base_default",
        "scanpy_umap",
        "scanpy_gauss",
        "base",
    ]
    for k in clus_methods_keys:
        res_key["clus"][k] = {}
        res_key["clus_info"][k] = {}

    # cluster with default params
    # scanpy
    for method in ["umap", "gauss"]:
        # compute neighbors
        try:
            sc.pp.neighbors(
                adata,
                n_neighbors=n_neighbors + 1,
                metric=metric,
                use_rep=use_rep,
                method=method,
            )
        except:
            sc.pp.neighbors(
                adata,
                n_neighbors=n_neighbors + 1,
                metric=metric,
                use_rep=use_rep,
                method=method,
                knn=False,
            )

        # cluster
        sc.tl.louvain(adata,
                      resolution=1.0,
                      random_state=seed,
                      use_weights=weighted)
        res_key["clus"]["scanpy_default_" + method]["louvain"] = np.array(
            adata.obs["louvain"]).astype(int)
        res_key["clus_info"]["scanpy_default_" + method]["louvain"] = dict(
            n_clusters=len(
                np.unique(np.array(adata.obs["louvain"]).astype(int))))

        sc.tl.leiden(adata,
                     resolution=1.0,
                     random_state=seed,
                     use_weights=weighted)
        res_key["clus"]["scanpy_default_" + method]["leiden"] = np.array(
            adata.obs["leiden"]).astype(int)
        res_key["clus_info"]["scanpy_default_" + method]["leiden"] = dict(
            n_clusters=len(np.unique(
                np.array(adata.obs["leiden"]).astype(int))))

    print("\t\t\tScanpy louvain/leiden clus:",
          round((time.time() - start) / 60, 2), "mn")
    start = time.time()

    # base
    louvain.set_rng_seed(seed)
    part_louvain = louvain.find_partition(
        graph=G,
        partition_type=louvain.RBConfigurationVertexPartition,
        weights=weights,
        resolution_parameter=1.0,
    )
    res_key["clus"]["base_default"]["louvain"] = np.array(
        part_louvain.membership)
    res_key["clus_info"]["base_default"]["louvain"] = dict(
        n_clusters=len(np.unique(np.array(part_louvain.membership))))

    part_leiden = leidenalg.find_partition(
        graph=G,
        partition_type=leidenalg.RBConfigurationVertexPartition,
        weights=weights,
        resolution_parameter=1.0,
        seed=seed,
    )
    res_key["clus"]["base_default"]["leiden"] = np.array(
        part_leiden.membership)
    res_key["clus_info"]["base_default"]["leiden"] = dict(
        n_clusters=len(np.unique(np.array(part_leiden.membership))))

    print("\t\t\tLouvain/leiden clus:", round((time.time() - start) / 60, 2),
          "mn")
    start = time.time()

    # cluster with ground truth number of clusters
    # scanpy
    for method in ["umap", "gauss"]:
        # compute neighbors
        try:
            sc.pp.neighbors(
                adata,
                n_neighbors=n_neighbors + 1,
                metric=metric,
                use_rep=use_rep,
                method=method,
            )
        except:
            sc.pp.neighbors(
                adata,
                n_neighbors=n_neighbors + 1,
                metric=metric,
                use_rep=use_rep,
                method=method,
                knn=False,
            )
        # cluster
        (
            res_key["clus"]["scanpy_" + method]["louvain"],
            res_key["clus_info"]["scanpy_" + method]["louvain"],
        ) = getNclusters2(
            adata,
            G,
            n_clusters=n_clusters,
            seed=seed,
            cluster_func="louvain",
            flavor="scanpy",
            weights=weights,
        )
        (
            res_key["clus"]["scanpy_" + method]["leiden"],
            res_key["clus_info"]["scanpy_" + method]["leiden"],
        ) = getNclusters2(
            adata,
            G,
            n_clusters=n_clusters,
            seed=seed,
            cluster_func="leiden",
            flavor="scanpy",
            weights=weights,
        )

    print(
        "\t\t\tScanpy louvain/leiden clus, searching ground truth:",
        round((time.time() - start) / 60, 2),
        "mn",
    )
    start = time.time()

    (
        res_key["clus"]["base"]["louvain"],
        res_key["clus_info"]["base"]["louvain"],
    ) = getNclusters2(
        adata,
        G,
        n_clusters=n_clusters,
        seed=seed,
        cluster_func="louvain",
        flavor="base",
        weights=weights,
    )
    (
        res_key["clus"]["base"]["leiden"],
        res_key["clus_info"]["base"]["leiden"],
    ) = getNclusters2(
        adata,
        G,
        n_clusters=n_clusters,
        seed=seed,
        cluster_func="leiden",
        flavor="base",
        weights=weights,
    )
    print(
        "\t\t\tBase louvain/leiden clus, searching ground truth:",
        round((time.time() - start) / 60, 2),
        "mn",
    )
    start = time.time()

    # store scores, info
    for k in clus_methods_keys:
        res_key["clus_scores"][k] = get_scores(true_labels, res_key["clus"][k],
                                               retained_cells_idx, X, metric,
                                               seed)
        res_key["clus_info"][k] = pd.DataFrame(res_key["clus_info"][k])

    print("\t\t\tScoring:", round((time.time() - start) / 60, 2), "mn")
    start = time.time()

    del adjmat, affinity_matrix, G, weights, scores

    ### hubness-aware clustering ###

    for method_name, (hubness, hubness_params) in hubness_methods.items():
        res_key["clus"][method_name] = {}
        res_key["clus"][method_name + "_default"] = {}
        res_key["clus_info"][method_name] = {}
        res_key["clus_info"][method_name + "_default"] = {}

        (
            hub_adjmat,
            hub_affinity_matrix,
            hub_G,
            hub_weights,
            hub_scores,
        ) = generate_clustering_inputs(
            X,
            metric=metric,
            n_neighbors=n_neighbors,
            weighted=weighted,
            seed=seed,
            hubness=hubness,
            hubness_params=hubness_params,
        )

        # store hubness information
        res_key["hubness_df"] = pd.concat(
            (
                res_key["hubness_df"],
                pd.DataFrame.from_dict(
                    data=hub_scores, orient="index", columns=[method_name]),
            ),
            axis=1,
        )

        # cluster with default params
        louvain.set_rng_seed(seed)
        part_louvain = louvain.find_partition(
            graph=hub_G,
            partition_type=louvain.RBConfigurationVertexPartition,
            weights=hub_weights,
            resolution_parameter=1.0,
        )
        res_key["clus"][method_name + "_default"]["louvain"] = np.array(
            part_louvain.membership)
        res_key["clus_info"][method_name + "_default"]["louvain"] = dict(
            n_clusters=len(np.unique(np.array(part_louvain.membership))))

        part_leiden = leidenalg.find_partition(
            graph=hub_G,
            partition_type=leidenalg.RBConfigurationVertexPartition,
            weights=hub_weights,
            resolution_parameter=1.0,
            seed=seed,
        )
        res_key["clus"][method_name + "_default"]["leiden"] = np.array(
            part_leiden.membership)
        res_key["clus_info"][method_name + "_default"]["leiden"] = dict(
            n_clusters=len(np.unique(np.array(part_leiden.membership))))

        # cluster with ground truth number of clusters
        (
            res_key["clus"][method_name]["louvain"],
            res_key["clus_info"][method_name]["louvain"],
        ) = getNclusters2(
            adata,
            hub_G,
            n_clusters=n_clusters,
            seed=seed,
            cluster_func="louvain",
            flavor="base",
            weights=hub_weights,
        )
        (
            res_key["clus"][method_name]["leiden"],
            res_key["clus_info"][method_name]["leiden"],
        ) = getNclusters2(
            adata,
            hub_G,
            n_clusters=n_clusters,
            seed=seed,
            cluster_func="leiden",
            flavor="base",
            weights=hub_weights,
        )

        # store clustering scores, info
        res_key["clus_scores"][method_name + "_default"] = get_scores(
            true_labels,
            res_key["clus"][method_name + "_default"],
            retained_cells_idx,
            X,
            metric,
            seed,
        )
        res_key["clus_scores"][method_name] = get_scores(
            true_labels,
            res_key["clus"][method_name],
            retained_cells_idx,
            X,
            metric,
            seed,
        )

        res_key["clus_info"][method_name + "_default"] = pd.DataFrame(
            res_key["clus_info"][method_name + "_default"])
        res_key["clus_info"][method_name] = pd.DataFrame(
            res_key["clus_info"][method_name])

    print(
        "\t\t\tHubness methods full pipeline:",
        round((time.time() - start) / 60, 2),
        "mn",
    )

    return results_dict
Ejemplo n.º 12
0
def run_louvain(graph,
                config_model='Default',
                overlap=False,
                directed=False,
                deep=False,
                interslice_weight=0.1,
                resolution_parameter=0.1,
                seed=None):
    """
    :outdir: the output directory to comprehend the output link file
    :param graph: input file
    :param config_model: 'RB', 'RBER', 'CPM', 'Surprise', 'Significance'
    :param overlap: bool, whether to enable overlapping community detection
    :param directed
    :param deep
    :param interslice_weight
    :param resolution_parameter
    :return
    """

    if seed != None:
        louvain.set_rng_seed(seed)

    def louvain_hierarchy_output(partition):
        optimiser = louvain.Optimiser()
        partition_agg = partition.aggregate_partition()
        partition_layers = []
        while optimiser.move_nodes(partition_agg) > 0:
            partition.from_coarse_partition(partition_agg)
            partition_agg = partition_agg.aggregate_partition()
            partition_layers.append(list(partition))
        return partition_layers

    def louvain_multiplex(graphs, partition_type, interslice_weight,
                          resolution_parameter):
        layers, interslice_layer, G_full = louvain.time_slices_to_layers(
            graphs, vertex_id_attr='name', interslice_weight=interslice_weight)
        if partition_type == louvain.ModularityVertexPartition:
            partitions = [partition_type(H) for H in layers]
            interslice_partition = partition_type(interslice_layer,
                                                  weights='weight')
        else:
            partitions = [
                partition_type(H, resolution_parameter=resolution_parameter)
                for H in layers
            ]
            interslice_partition = partition_type(
                interslice_layer,
                resolution_parameter=resolution_parameter,
                weights='weight')
        optimiser = louvain.Optimiser()
        optimiser.optimise_partition_multiplex(partitions +
                                               [interslice_partition])
        quality = sum(
            [p.quality() for p in partitions + [interslice_partition]])
        return partitions[0], quality

    def partition_to_clust(graphs, partition, min_size_cut=2):
        clusts = []
        node_names = []
        if not isinstance(graphs, list):
            graphs = [graphs]
        for g in graphs:
            node_names.extend(g.vs['name'])
        for i in range(len(partition)):
            clust = [node_names[id] for id in partition[i]]
            clust = list(set(clust))
            if len(clust) < min_size_cut:
                continue
            clust.sort()
            clusts.append(clust)
        clusts = sorted(clusts, key=lambda x: len(x), reverse=True)
        return clusts

    multi = False
    if isinstance(graph, list):
        multi = True

    if overlap == True and multi == False:
        multi = True
        net = graph
        graph = []
        for i in range(4):
            graph.append(net)

    if multi == True and deep == True:
        sys.stderr.write(
            'louvain does not support hierarchical clustering with overlapped communities'
        )
        sys.exit()

    if config_model == 'RB':
        partition_type = louvain.RBConfigurationVertexPartition
    elif config_model == 'RBER':
        partition_type = louvain.RBERConfigurationVertexPartition
    elif config_model == 'CPM':
        partition_type = louvain.CPMVertexPartition
    elif config_model == 'Surprise':
        partition_type = louvain.SurpriseVertexPartition
    elif config_model == "Significance":
        partition_type = louvain.SignificanceVertexPartition
    else:
        sys.stderr.write("Not specifying the configuration model; "
                         "perform simple Louvain.")
        partition_type = louvain.ModularityVertexPartition

    weighted = False
    if multi:
        wL = []
        G = []
        for file in graph:
            with open(file, 'r') as f:
                lines = f.read().splitlines()
            elts = lines[0].split()
            if len(elts) == 3:
                weighted = True
            else:
                weighted = False
            for i in range(len(lines)):
                elts = lines[i].split()
                for j in range(2):
                    elts[j] = int(elts[j])
                if weighted == True:
                    elts[2] = float(elts[2])
                    if elts[2] < 0:
                        sys.stderr.write("negative edge weight not allowed")
                        return 1
                lines[i] = tuple(elts)
            g = igraph.Graph.TupleList(lines,
                                       directed=directed,
                                       weights=weighted)
            G.append(g)
            wL.append(weighted)
            f.close()
        if True in wL and False in wL:
            raise Exception('all graphs should follow the same format')
        if partition_type == louvain.CPMVertexPartition and directed is True:
            raise Exception('graph for CPMVertexPartition must be undirected')
        if partition_type == louvain.SignificanceVertexPartition and weighted is True:
            raise Exception('SignificanceVertexPartition only support '
                            'unweighted graphs')
        partition, quality = louvain_multiplex(G, partition_type,
                                               interslice_weight,
                                               resolution_parameter)

    else:
        with open(graph, 'r') as f:
            lines = f.read().splitlines()
        elts = lines[0].split()
        if len(elts) == 3:
            weighted = True
        else:
            weighted = False

        for i in range(len(lines)):
            elts = lines[i].split()
            for j in range(2):
                elts[j] = int(elts[j])
            if weighted is True:
                elts[2] = float(elts[2])
                if elts[2] < 0:
                    sys.stderr.write("negative edge weight not allowed")
                    return 1
            lines[i] = tuple(elts)
        f.close()

        G = igraph.Graph.TupleList(lines, directed=directed, weights=weighted)
        if weighted is False:
            weights = None
        else:
            weights = G.es['weight']
        if partition_type == louvain.ModularityVertexPartition:
            partition = partition_type(G, weights=weights)
        else:
            partition = partition_type(
                G, weights=weights, resolution_parameter=resolution_parameter)
        if deep == False:
            optimiser = louvain.Optimiser()
            optimiser.optimise_partition(partition)

    if deep == False:
        clusts = partition_to_clust(G, partition)
        if len(clusts) == 0:
            sys.stderr.write(
                "No cluster; Resolution parameter may be too extreme")
            return 1

        maxNode = 0
        for clust in clusts:
            maxNode = max(maxNode, max(clust))

        for i in range(len(clusts)):
            sys.stdout.write(
                str(maxNode + len(partition) + 1) + ',' +
                str(maxNode + i + 1) + ',' + 'c-c' + ';')
            for n in clusts[i]:
                sys.stdout.write(
                    str(maxNode + i + 1) + ',' + str(n) + ',' + 'c-m' + ';')
    else:
        partitions = louvain_hierarchy_output(partition)
        clusts_layers = []
        for p in partitions:
            clusts_layers.append(partition_to_clust(G, p))
        if len(clusts_layers[0]) == 0:
            sys.stderr.write(
                "No cluster; Resolution parameter may be too extreme")
            return 1
        maxNode = 0
        for clust in clusts_layers[0]:
            maxNode = max(maxNode, max(clust))
        for i in range(len(clusts_layers[0])):
            for n in clusts_layers[0][i]:
                sys.stdout.write(
                    str(maxNode + i + 1) + ',' + str(n) + ',' + 'c-m' + ';')
        maxNode = maxNode + len(clusts_layers[0])
        for i in range(1, len(clusts_layers)):
            for j in range(len(clusts_layers[i - 1])):
                for k in range(len(clusts_layers[i])):
                    if all(x in clusts_layers[i][k]
                           for x in clusts_layers[i - 1][j]):
                        sys.stdout.write(
                            str(maxNode + k + 1) + ',' +
                            str(maxNode - len(clusts_layers[i - 1]) + j + 1) +
                            ',' + 'c-c' + ';')
                        break
            maxNode = maxNode + len(clusts_layers[i])
        for i in range(len(clusts_layers[-1])):
            sys.stdout.write(
                str(maxNode + 1) + ',' +
                str(maxNode - len(clusts_layers[-1]) + i + 1) + ',' + 'c-c' +
                ';')

    sys.stdout.flush()
    return 0
Ejemplo n.º 13
0
def louvain(
    adata: AnnData,
    resolution: Optional[float] = None,
    random_state: Optional[Union[int, RandomState]] = 0,
    log_fname: str = '',
    restrict_to: Optional[Tuple[str, Sequence[str]]] = None,
    key_added: Optional[str] = 'louvain',
    adjacency: Optional[spmatrix] = None,
    flavor: str = 'vtraag',
    directed: bool = True,
    use_weights: bool = False,
    partition_type: Optional[Type[MutableVertexPartition]] = None,
    partition_kwargs: Optional[Mapping[str, Any]] = None,
    copy: bool = False,
) -> Optional[AnnData]:
    """Cluster cells into subgroups [Blondel08]_ [Levine15]_ [Traag17]_.

    Cluster cells using the Louvain algorithm [Blondel08]_ in the implementation
    of [Traag17]_. The Louvain algorithm has been proposed for single-cell
    analysis by [Levine15]_.

    This requires having ran :func:`~scanpy.pp.neighbors` or :func:`~scanpy.external.pp.bbknn` first,
    or explicitly passing a ``adjacency`` matrix.

    Parameters
    ----------
    adata
        The annotated data matrix.
    resolution
        For the default flavor (``'vtraag'``), you can provide a resolution
        (higher resolution means finding more and smaller clusters),
        which defaults to 1.0. See “Time as a resolution parameter” in [Lambiotte09]_.
    random_state
        Change the initialization of the optimization.
    restrict_to
        Restrict the clustering to the categories within the key for sample
        annotation, tuple needs to contain ``(obs_key, list_of_categories)``.
    key_added
        Key under which to add the cluster labels. (default: ``'louvain'``)
    adjacency
        Sparse adjacency matrix of the graph, defaults to
        ``adata.uns['neighbors']['connectivities']``.
    flavor : {``'vtraag'``, ``'igraph'``}
        Choose between to packages for computing the clustering.
        ``'vtraag'`` is much more powerful, and the default.
    directed
        Interpret the ``adjacency`` matrix as directed graph?
    use_weights
        Use weights from knn graph.
    partition_type
        Type of partition to use.
        Only a valid argument if ``flavor`` is ``'vtraag'``.
    partition_kwargs
        Key word arguments to pass to partitioning,
        if ``vtraag`` method is being used.
    copy
        Copy adata or modify it inplace.

    Returns
    -------
    :obj:`None`
        By default (``copy=False``), updates ``adata`` with the following fields:

        ``adata.obs['louvain']`` (:class:`pandas.Series`, dtype ``category``)
            Array of dim (number of samples) that stores the subgroup id
            (``'0'``, ``'1'``, ...) for each cell.

    :class:`~anndata.AnnData`
        When ``copy=True`` is set, a copy of ``adata`` with those fields is returned.
    """
    start = logg.info('running Louvain clustering')
    if (flavor != 'vtraag') and (partition_type is not None):
        raise ValueError(
            '`partition_type` is only a valid argument when `flavour` is "vtraag"'
        )
    adata = adata.copy() if copy else adata
    if adjacency is None and 'neighbors' not in adata.uns:
        raise ValueError(
            'You need to run `pp.neighbors` first to compute a neighborhood graph.'
        )
    if adjacency is None:
        adjacency = adata.uns['neighbors']['connectivities']
    if restrict_to is not None:
        restrict_key, restrict_categories = restrict_to
        adjacency, restrict_indices = restrict_adjacency(
            adata, restrict_key, restrict_categories, adjacency)
    if flavor in {'vtraag', 'igraph'}:
        if flavor == 'igraph' and resolution is not None:
            logg.warning(
                '`resolution` parameter has no effect for flavor "igraph"')
        if directed and flavor == 'igraph':
            directed = False
        if not directed: logg.debug('    using the undirected graph')
        g = utils.get_igraph_from_adjacency(adjacency, directed=directed)
        if use_weights:
            weights = np.array(g.es["weight"]).astype(np.float64)
        else:
            weights = None
        if flavor == 'vtraag':
            import louvain
            if partition_kwargs is None:
                partition_kwargs = {}
            if partition_type is None:
                partition_type = louvain.RBConfigurationVertexPartition
            if resolution is not None:
                partition_kwargs["resolution_parameter"] = resolution
            if use_weights:
                partition_kwargs["weights"] = weights
            logg.info('    using the "louvain" package of Traag (2017)')
            louvain.set_rng_seed(random_state)
            part = louvain.find_partition(
                g,
                partition_type,
                log_fname=log_fname,
                **partition_kwargs,
            )
            # adata.uns['louvain_quality'] = part.quality()
        else:
            part = g.community_multilevel(weights=weights)
        groups = np.array(part.membership)
    elif flavor == 'taynaud':
        # this is deprecated
        import networkx as nx
        import community
        g = nx.Graph(adjacency)
        partition = community.best_partition(g)
        groups = np.zeros(len(partition), dtype=int)
        for k, v in partition.items():
            groups[k] = v
    else:
        raise ValueError(
            '`flavor` needs to be "vtraag" or "igraph" or "taynaud".')
    if restrict_to is not None:
        if key_added == 'louvain':
            key_added += '_R'
        groups = rename_groups(adata, key_added, restrict_key,
                               restrict_categories, restrict_indices, groups)
    adata.obs[key_added] = pd.Categorical(
        values=groups.astype('U'),
        categories=natsorted(np.unique(groups).astype('U')),
    )
    adata.uns['louvain'] = {}
    adata.uns['louvain']['params'] = {
        'resolution': resolution,
        'random_state': random_state
    }
    logg.info(
        '    finished',
        time=start,
        #deep=(
        #    f'found {len(np.unique(groups))} clusters and added\n'
        #    f'    {key_added!r}, the cluster labels (adata.obs, categorical)'
        #),
    )
    return adata if copy else None
Ejemplo n.º 14
0
def temporal_louvain(net,
                     iter_n=1,
                     resolution_parameter=1,
                     interslice_weight=1,
                     quality_function='NewmanGirvan2004',
                     seed=100):
    """
    Temporal louvain clustering run for iter_n times.

    Parameters
    ----------
    net : array, dict
        network representation (contact sequences or graphlet)
    iter_n : int
        nummber of repeated louvain clustering
    resolution_parameter : int
        Spatial resolution parameter. Only valid for some qualtiy functions. Default=1.
        Resolution parameter is only needed for ReichardtBornholdt2006, and
    interslice_weight : int
        The weight that connects the different graphlets/snapshots to eachother. Default=0
    quality function : str
        What type of louvain clustering is done. Options: NewmanGirvan2004, TraagVanDoorenNesterov2011, ReichardtBornholdt2006
    seed : int
        Seed for reproduceability
    consensus_threshold : float
        Value between 0 and 1. When creating consensus matrix, ignore if value only occurs in specified fraction of iterations. If 0.5 two nodes must be in the same community 50% of the time to be considered in the consensus matrix.

    Returns
    -------
    communities : array
        Louvain clustering. Dimensions: [iter_n], commiunities, time

    Qualtify funciton sources
    --------------------------

    NewmanGirvan2004 :
        Newman, M. E. J., & Girvan, M. (2004). Finding and evaluating community structure in networks. Physical Review E, 69(2), 026113. 10.1103/PhysRevE.69.026113
        `Read more <http://louvain-igraph.readthedocs.io/en/latest/reference.html#modularityvertexpartition>`_
    ReichardtBornholdt2006 :
        Reichardt, J., & Bornholdt, S. (2006). Statistical mechanics of community detection. Physical Review E, 74(1), 016110. 10.1103/PhysRevE.74.016110
        `Read more <http://louvain-igraph.readthedocs.io/en/latest/reference.html#rbconfigurationvertexpartition>`_
    TraagVanDoorenNesterov2011 :
        Traag, V. A., Van Dooren, P., & Nesterov, Y. (2011). Narrow scope for resolution-limit-free community detection. Physical Review E, 84(1), 016114. 10.1103/PhysRevE.84.016114
        `Read more <http://louvain-igraph.readthedocs.io/en/latest/reference.html#cpmvertexpartition>`_
    TraagKringsVanDooren2013 :
        Traag, V. A., Krings, G., & Van Dooren, P. (2013). Significant scales in community structure. Scientific Reports, 3, 2930. 10.1038/srep02930
        `Read more <http://louvain-igraph.readthedocs.io/en/latest/reference.html#significancevertexpartition>`_
    TraagAldecoaDelvenne2015 :
        Traag, V. A., Aldecoa, R., & Delvenne, J.-C. (2015). Detecting communities using asymptotical surprise. Physical Review E, 92(2), 022816. 10.1103/PhysRevE.92.022816
        `Read more <http://louvain-igraph.readthedocs.io/en/latest/reference.html#surprisevertexpartition>`_

    Dependencies
    ------------
    These functions make use of iGraph (http://igraph.org/python/) and louvain-igraph (http://louvain-igraph.readthedocs.io/en/latest/)

    Note
    ----
    At the moment input should generally only be positive edges.



    """
    if isinstance(net, dict):
        dict_input = True
    else:
        dict_input = False
    if teneto.utils.checkInput(net, conMat=1) != 'M':
        net, netinfo = teneto.utils.process_input(net, ['C', 'G', 'TO'])
    if quality_function == 'TraagVanDoorenNesterov2011':
        louvain_alg = louvain.CPMVertexPartition
        louvain_kwags = {'resolution_parameter': resolution_parameter}
    elif quality_function == 'ReichardtBornholdt2006':
        louvain_alg = louvain.RBConfigurationVertexPartition
        louvain_kwags = {'resolution_parameter': resolution_parameter}
    elif quality_function == 'NewmanGirvan2004':
        louvain_alg = louvain.ModularityVertexPartition
        louvain_kwags = {}
    elif quality_function == 'TraagKringsVanDooren2013':
        louvain_alg = louvain.SignificanceVertexPartition
        louvain_kwags = {}
    elif quality_function == 'TraagAldecoaDelvenne2015':
        louvain_alg = louvain.SurpriseVertexPartition
        louvain_kwags = {}
    g_to_ig = []
    if len(net.shape) == 3:
        for i in range(net.shape[-1]):
            g_to_ig.append(ig.Graph.Weighted_Adjacency(net[:, :, i].tolist()))
        for n in range(net.shape[0]):
            for t in range(net.shape[-1]):
                g_to_ig[t].vs[n]['id'] = n
    elif len(net.shape) == 2:
        g_to_ig.append(ig.Graph.Weighted_Adjacency(net.tolist()))

    membership = []
    louvain.set_rng_seed(seed)
    if interslice_weight != 0:
        for n in range(0, iter_n):
            mem, improvement = louvain.find_partition_temporal(
                g_to_ig,
                louvain_alg,
                interslice_weight=interslice_weight,
                **louvain_kwags)
            membership.append(mem)
        com_membership = np.array(membership).transpose([0, 2, 1])
    else:
        com_membership = []
        for n in range(0, iter_n):
            membership = []
            for snapshot in g_to_ig:
                mem = louvain.find_partition(snapshot, louvain_alg,
                                             **louvain_kwags)
                membership.append(mem.membership)
            com_membership.append(membership)
        com_membership = np.array(com_membership).transpose([0, 2, 1])

    if dict_input:
        C = teneto.utils.graphlet2contact(net, netinfo)
        C['communities'] = np.squeeze(com_membership)
        return C
    else:
        return np.squeeze(com_membership)
Ejemplo n.º 15
0
def louvain(adata,
            n_neighbors=None,
            resolution=None,
            n_pcs=50,
            random_state=0,
            restrict_to=None,
            key_added=None,
            flavor='vtraag',
            directed=True,
            recompute_pca=False,
            recompute_distances=False,
            recompute_graph=False,
            n_dcs=None,
            n_jobs=None,
            copy=False):
    """Cluster cells into subgroups [Blondel08]_ [Levine15]_ [Traag17]_.

    Cluster cells using the Louvain algorithm [Blondel08]_ in the implementation
    of [Traag17]_. The Louvain algorithm has been proposed for single-cell
    analysis by [Levine15]_.

    Parameters
    ----------
    adata : :class:`~scanpy.api.AnnData`
        The annotated data matrix.
    n_neighbors : `int`, optional (default: 30)
        Number of neighbors to use for construction of knn graph.
    resolution : `float` or `None`, optional (default: 1)
        For the default flavor ('vtraag'), you can provide a resolution (higher
        resolution means finding more and smaller clusters), which defaults to
        1.0.
    n_pcs : int, optional (default: 50)
        Number of PCs to use for computation of data point graph.
    random_state : int, optional (default: 0)
        Change the initialization of the optimization.
    key_added : str, optional (default: `None`)
        Key under which to add the cluster labels.
    restrict_to : tuple, optional (default: None)
        Restrict the clustering to the categories within the key for sample
        annotation, tuple needs to contain (smp key, list of categories).
    flavor : {'vtraag', 'igraph'}
        Choose between to packages for computing the clustering. 'vtraag' is
        much more powerful.
    copy : `bool` (default: False)
        Copy adata or modify it inplace.

    Returns
    -------
    Depending on `copy`, returns or updates `adata` with the following fields.

    louvain_groups : `pd.Series` (``adata.smp``, dtype `category`)
        Array of dim (number of samples) that stores the subgroup id ('0',
        '1', ...) for each cell.
    """
    logg.info('running Louvain clustering', r=True)
    adata = adata.copy() if copy else adata
    add_or_update_graph_in_adata(adata,
                                 n_neighbors=n_neighbors,
                                 n_pcs=n_pcs,
                                 n_dcs=n_dcs,
                                 recompute_pca=recompute_pca,
                                 recompute_distances=recompute_distances,
                                 recompute_graph=recompute_graph,
                                 n_jobs=n_jobs)
    adjacency = adata.uns['data_graph_norm_weights']
    if restrict_to is not None:
        restrict_key, restrict_categories = restrict_to
        if not isinstance(restrict_categories[0], str):
            raise ValueError('You need to use strings to label categories, '
                             'e.g. \'1\' instead of 1.')
        restrict_indices = adata.smp[restrict_key].isin(
            restrict_categories).values
        adjacency = adjacency[restrict_indices, :]
        adjacency = adjacency[:, restrict_indices]
    if flavor in {'vtraag', 'igraph'}:
        if flavor == 'igraph' and resolution is not None:
            logg.warn(
                '`resolution` parameter has no effect for flavor "igraph"')
        if directed and flavor == 'igraph':
            directed = False
        if not directed: logg.m('    using the undirected graph', v=4)
        g = utils.get_igraph_from_adjacency(adjacency, directed=directed)
        if flavor == 'vtraag':
            import louvain
            if resolution is None: resolution = 1
            try:
                logg.info('    using the "louvain" package of Traag (2017)')
                louvain.set_rng_seed(random_state)
                part = louvain.find_partition(
                    g,
                    louvain.RBConfigurationVertexPartition,
                    resolution_parameter=resolution)
                # adata.uns['louvain_quality'] = part.quality()
            except AttributeError:
                logg.warn('Did not find package louvain>=0.6, '
                          'the clustering result will therefore not '
                          'be 100% reproducible, '
                          'but still meaningful. '
                          'If you want 100% reproducible results, '
                          'update via "pip install louvain --upgrade".')
                part = louvain.find_partition(g,
                                              method='RBConfiguration',
                                              resolution_parameter=resolution)
        elif flavor == 'igraph':
            part = g.community_multilevel()
        groups = np.array(part.membership)
    elif flavor == 'taynaud':
        # this is deprecated
        import networkx as nx
        import community
        g = nx.Graph(adata.uns['data_graph_distance_local'])
        partition = community.best_partition(g)
        groups = np.zeros(len(partition), dtype=int)
        for k, v in partition.items():
            groups[k] = v
    else:
        raise ValueError(
            '`flavor` needs to be "vtraag" or "igraph" or "taynaud".')
    unique_groups = np.unique(groups)
    n_clusters = len(unique_groups)
    if restrict_to is None:
        groups = groups.astype('U')
        adata.smp['louvain_groups'] = pd.Categorical(
            values=groups, categories=natsorted(unique_groups.astype('U')))
        key_added = 'louvain_groups' if key_added is None else key_added
    else:
        key_added = restrict_key + '_R' if key_added is None else key_added
        groups += 1
        adata.smp[key_added] = adata.smp[restrict_key].astype('U')
        adata.smp[key_added] += ','
        adata.smp[key_added].iloc[restrict_indices] += groups.astype('U')
        adata.smp[key_added].iloc[~restrict_indices] += '0'
        adata.smp[key_added] = adata.smp[key_added].astype(
            'category', categories=natsorted(adata.smp[key_added].unique()))
    adata.uns['louvain_params'] = np.array((
        resolution,
        random_state,
    ),
                                           dtype=[('resolution', float),
                                                  ('random_state', int)])
    logg.info('    finished', t=True, end=': ')
    logg.info(
        'found {} clusters and added\n'
        '    \'{}\', the cluster labels (adata.smp, dtype=category)'.format(
            n_clusters, key_added))
    return adata if copy else None
Ejemplo n.º 16
0
def graph_clustering(adata,
                     resolution=None,
                     random_state=0,
                     restrict_to=None,
                     key_added=None,
                     adjacency=None,
                     flavor='vtraag',
                     directed=True,
                     use_weights=False,
                     partition_type=None,
                     partition_kwargs=None,
                     copy=False):
    """Cluster cells into subgroups [Blondel08]_ [Levine15]_ [Traag17]_.
    Cluster cells using the Louvain algorithm [Blondel08]_ in the implementation
    of [Traag17]_. The Louvain algorithm has been proposed for single-cell
    analysis by [Levine15]_.
    This requires to run :func:`~scanpy.api.pp.neighbors`, first.
    Parameters
    ----------
    adata : :class:`~anndata.AnnData`
        The annotated data matrix.
    resolution : `float` or `None`, optional (default: 1)
        For the default flavor ('vtraag'), you can provide a resolution (higher
        resolution means finding more and smaller clusters), which defaults to
        1.0. See “Time as a resolution parameter” in [Lambiotte09]_.
    random_state : `int`, optional (default: 0)
        Change the initialization of the optimization.
    restrict_to : `tuple`, optional (default: None)
        Restrict the clustering to the categories within the key for sample
        annotation, tuple needs to contain (obs key, list of categories).
    key_added : `str`, optional (default: 'clustering')
        Key under which to add the cluster labels.
    adjacency : sparse matrix or `None`, optional (default: `None`)
        Sparse adjacency matrix of the graph, defaults to
        `adata.uns['neighbors']['connectivities']`.
    flavor : {'vtraag', 'igraph','leiden'}
        Choose between to packages for computing the clustering. 'vtraag' is
        much more powerful than igraph, and the default.
    use_weights : `bool`, optional (default: `False`)
        Use weights from knn graph.
    partition_type : `~louvain.MutableVertexPartition`, optional (default: `None`)
        Type of partition to use. Only a valid argument if `flavor` is 
        `'vtraag'`.
    partition_kwargs : `dict`, optional (default: `None`)
        Key word arguments to pass to partitioning, if `vtraag` method is 
        being used.
    copy : `bool` (default: `False`)
        Copy adata or modify it inplace.
    Returns
    -------
    None
        By default (`copy=False`), updates ``adata`` with the following fields:
        louvain : :class:`pandas.Series` (``adata.obs``, dtype `category`)
            Array of dim (number of samples) that stores the subgroup id ('0',
            '1', ...) for each cell.
    
    AnnData
        When `copy=True` is set, a copy of ``adata`` with those fields is returned.
    """
    logg.info('running clustering', r=True)
    if (flavor not in {'vtraag', 'leiden'}) and (partition_type is not None):
        raise ValueError(
            '`partition_type` is only a valid argument when `flavour` is "vtraag" or "leiden"'
        )
    adata = adata.copy() if copy else adata
    if adjacency is None and 'neighbors' not in adata.uns:
        raise ValueError(
            'You need to run `pp.neighbors` first to compute a neighborhood graph.'
        )
    if adjacency is None:
        adjacency = adata.uns['neighbors']['connectivities']
    if restrict_to is not None:
        restrict_key, restrict_categories = restrict_to
        if not isinstance(restrict_categories[0], str):
            raise ValueError('You need to use strings to label categories, '
                             'e.g. \'1\' instead of 1.')
        for c in restrict_categories:
            if c not in adata.obs[restrict_key].cat.categories:
                raise ValueError(
                    '\'{}\' is not a valid category for \'{}\''.format(
                        c, restrict_key))
        restrict_indices = adata.obs[restrict_key].isin(
            restrict_categories).values
        adjacency = adjacency[restrict_indices, :]
        adjacency = adjacency[:, restrict_indices]
    if flavor in {'vtraag', 'igraph', 'leiden'}:
        if flavor == 'igraph' and resolution is not None:
            logg.warn(
                '`resolution` parameter has no effect for flavor "igraph"')
        if directed and flavor == 'igraph':
            directed = False
        if not directed: logg.m('    using the undirected graph', v=4)
        g = utils.get_igraph_from_adjacency(adjacency, directed=directed)
        if use_weights:
            weights = np.array(g.es["weight"]).astype(np.float64)
        else:
            weights = None
        if flavor == 'leiden':
            import leidenalg
            if partition_kwargs is None:
                partition_kwargs = {}
            if partition_type is None:
                partition_type = leidenalg.ModularityVertexPartition
            if resolution is not None:
                partition_kwargs["resolution_parameter"] = resolution
            if use_weights:
                partition_kwargs["weights"] = weights
            logg.info('    using the "leiden" package of Traag (2018)')
            #leidenalg.set_rng_seed(random_state)
            #part = leidenalg.find_partition(g, partition_type, **partition_kwargs)
            part = leidenalg.find_partition(
                g, leidenalg.ModularityVertexPartition, **partition_kwargs)
            #part = louvain.find_partition(g, partition_type,
            # **partition_kwargs)
            adata.uns['leiden_quality'] = part.quality()
        elif flavor == 'vtraag':
            import louvain
            if partition_kwargs is None:
                partition_kwargs = {}
            if partition_type is None:
                partition_type = louvain.RBConfigurationVertexPartition
            if resolution is not None:
                partition_kwargs["resolution_parameter"] = resolution
            if use_weights:
                partition_kwargs["weights"] = weights
            logg.info('    using the "louvain" package of Traag (2017)')
            louvain.set_rng_seed(random_state)
            part = louvain.find_partition(g, partition_type,
                                          **partition_kwargs)
        elif flavor == 'igraph':
            part = g.community_multilevel(weights=weights)
        groups = np.array(part.membership)
    elif flavor == 'taynaud':
        # this is deprecated
        import networkx as nx
        import community
        g = nx.Graph(adjacency)
        partition = community.best_partition(g)
        groups = np.zeros(len(partition), dtype=int)
        for k, v in partition.items():
            groups[k] = v
    else:
        raise ValueError(
            '`flavor` needs to be "leiden" or "vtraag" or "igraph" or "taynaud".'
        )
    unique_groups = np.unique(groups)
    n_clusters = len(unique_groups)
    if restrict_to is None:
        groups = groups.astype('U')
        key_added = 'clustering' if key_added is None else key_added
        adata.obs[key_added] = pd.Categorical(values=groups,
                                              categories=natsorted(
                                                  unique_groups.astype('U')))
    else:
        key_added = restrict_key + '_R' if key_added is None else key_added
        all_groups = adata.obs[restrict_key].astype('U')
        prefix = '-'.join(restrict_categories) + ','
        new_groups = [prefix + g for g in groups.astype('U')]
        all_groups.iloc[restrict_indices] = new_groups
        adata.obs[key_added] = pd.Categorical(values=all_groups,
                                              categories=natsorted(
                                                  all_groups.unique()))
    adata.uns['clustering'] = {}
    adata.uns['clustering']['params'] = {
        'resolution': resolution,
        'random_state': random_state
    }
    logg.info('    finished',
              time=True,
              end=' ' if settings.verbosity > 2 else '\n')
    logg.hint('found {} clusters and added\n'
              '    \'{}\', the cluster labels (adata.obs, categorical)'.format(
                  n_clusters, key_added))
    return adata if copy else None
Ejemplo n.º 17
0
maxflows = [];
for year, H in zip(years, G_t):
  print('\tCalculating maxflow for {0}'.format(year))
  maxflow = [(s['ccode'], t['ccode'], year, 
              H.maxflow_value(s.index, t.index, capacity='weight')) 
              for s in H.vs
                for t in H.vs
              if s != t]
  maxflows.extend(maxflow)

#%% Write maxflow to file
maxflow_df = pd.DataFrame(maxflows, columns=['source_ccode', 'target_ccode', 'year', 'maxflow'])
maxflow_df.to_csv(output_dir + 'maxflow.csv', index=False)

#%% Set seed for random number generator
louvain.set_rng_seed(0)

#%% Covert slices to layers
interslice_weight = 0;
G_intraslice, G_interslice, G_all = louvain.time_slices_to_layers(G_t, 
                                                    interslice_weight=interslice_weight,
                                                    slice_attr='t',
                                                    vertex_id_attr='ccode')


#%% Do community detection
print('\nDoing community detection...')
n_repl = 100
resolutions = [0.6, 1.1, 1.7]
for resolution in resolutions:
    memberships = []
Ejemplo n.º 18
0
def compare_algorithms(n, network_dict, log_file_name):
    network, network_oslom = network_dict['igraph'], network_dict['tuple']
    all_partitions = {'Louvain':[], 'Directed Louvain':[], 'Leiden':[], 'Infomap':[], 'Oslom':[]}
    modularity_table = pd.DataFrame()
    size_table = pd.DataFrame()
    for i in range(0, n): # run alg n times
        louvain.set_rng_seed(i)
        #
        start = time.time() 
        ### 1) directed Louvain
        partition_dl = run_louvain(network)
        all_partitions['Directed Louvain'].append(partition_dl)
        modularity_table.at[i, 'Directed Louvain'] = partition_dl.quality()
        size_table.at[i, 'Directed Louvain'] = len(partition_dl)
        #
        end = time.time()
        with open(log_file_name + ".txt", "a") as f:
                f.write('CD - dir_louvain -: ' + str(i) + '  TIME: ' + str(round((end-start)/60,4)) + '\n')
        #
        start = time.time()
        ### 2) directed Leiden
        partition_lei = run_leiden(network, i)
        all_partitions['Leiden'].append(partition_lei)
        modularity_table.at[i, 'Leiden'] = partition_lei.quality()
        size_table.at[i, 'Leiden'] = len(partition_lei)
        #
        end = time.time()
        with open(log_file_name + ".txt", "a") as f:
                f.write('CD - dir_leiden -: ' + str(i) + '  TIME: ' + str(round((end-start)/60,4)) + '\n')
        #
        start = time.time()
        ### 3) undirected Louvain
        # create an undirected netowork for comparison
        network_ud = directed_to_undirected(network)
        partition_l = run_louvain(network_ud)
        all_partitions['Louvain'].append(partition_l)
        modularity_table.at[i, 'Louvain'] = partition_l.quality()
        size_table.at[i, 'Louvain'] = len(partition_l)
        #
        end = time.time()
        with open(log_file_name + ".txt", "a") as f:
                f.write('CD - undir_louvain -: ' + str(i) + '  TIME: ' + str(round((end-start)/60,4)) + '\n')
        #
        start = time.time()
        ### 4) directed infomap
        partition_i = network.community_infomap(edge_weights = network.es['weight'], trials=1)
        all_partitions['Infomap'].append(partition_i)
        size_table.at[i, 'Infomap'] = len(set(partition_i.membership))
        # modularity
        community_dict_infomap = get_community_dict(partition_i, network, filter_eu_members = False)['mod_dict']
        modularity_table.at[i, 'Infomap'] = get_modularity(network, community_dict_infomap)
        #
        end = time.time()
        with open(log_file_name + ".txt", "a") as f:
                f.write('CD - infomap -: ' + str(i) + '  TIME: ' + str(round((end-start)/60,4)) + '\n')
        #
        start = time.time()
        ### 5) directed oslom
        clusters = run_oslom(network_oslom, i)
        all_partitions['Oslom'].append(clusters[0])
        size_table.at[i, 'Oslom'] = clusters[0]['num_found'] # number of clusters found
        # modularity
        community_dict_oslom = get_community_dict_oslom(clusters[0], network, filter_eu_members = False)['mod_dict']
        modularity_table.at[i, 'Oslom'] = get_modularity(network, community_dict_oslom)
        #
        end = time.time()
        with open(log_file_name + ".txt", "a") as f:
                f.write('CD - oslom -: ' + str(i) + '  TIME: ' + str(round((end-start)/60,4)) + '\n')
        #
    return {'size_table':size_table, 'modularity_table':modularity_table, 'all_partitions':all_partitions}
Ejemplo n.º 19
0
def louvain(adata,
            n_neighbors=30,
            resolution=None,
            n_pcs=50,
            random_state=0,
            flavor='vtraag',
            directed=True,
            recompute_pca=False,
            recompute_distances=False,
            recompute_graph=False,
            n_dcs=15,
            n_jobs=None,
            copy=False):
    """Cluster cells into subgroups [Blondel08]_ [Levine15]_ [Traag17]_.

    `[source] <tl.louvain_>`__ Cluster cells using the Louvain algorithm
    [Blondel08]_ in the implementation of [Traag17]_. The Louvain algorithm has
    been proposed for single-cell analysis by [Levine15]_.

    *Examples:* See this `use case <17-05-05_>`__.

    .. _tl.louvain: https://github.com/theislab/scanpy/tree/master/scanpy/tools/louvain.py
    Parameters
    ----------
    adata : AnnData
        The annotated data matrix. 
    n_neighbors : int, optional (default: 30)
        Number of neighbors to use for construction of knn graph.
    resolution : float or None, optional
        For the default flavor ('vtraag'), you can provide a resolution (higher
        resolution means finding more and smaller clusters), which defaults to
        1.0.
    n_pcs : int, optional (default: 50)
        Number of PCs to use for computation of data point graph.
    random_state : int, optional (default: 0)
        Change the initialization of the optimization.
    flavor : {'vtraag', 'igraph'}
        Choose between to packages for computing the clustering. 'vtraag' is
        much more powerful.
    copy : bool (default: False)

    References
    ----------
    - implementation of Louvain algorithm: Traag, doi:10.5281/zenodo.35117 (2017)
    - Louvain algorithm: Blondel et al., J. Stat. Mech., P10008 (2008)
    - base graph package: Csardi et al., InterJournal Complex Systems, 1695 (2006)
    - basic suggestion for single-cell: Levine et al., Cell 162, 184-197 (2015)
    - combination with "attachedness" matrix: Wolf et al., bioRxiv (2017)
    """
    logg.info('running Louvain clustering', r=True)
    adata = adata.copy() if copy else adata
    add_or_update_graph_in_adata(adata,
                                 n_neighbors=n_neighbors,
                                 n_pcs=n_pcs,
                                 n_dcs=n_dcs,
                                 recompute_pca=recompute_pca,
                                 recompute_distances=recompute_distances,
                                 recompute_graph=recompute_graph,
                                 n_jobs=n_jobs)
    adjacency = adata.add['data_graph_norm_weights']
    if flavor in {'vtraag', 'igraph'}:
        if flavor == 'igraph' and resolution is not None:
            logg.warn(
                '`resolution` parameter has no effect for flavor "igraph"')
        if directed and flavor == 'igraph':
            directed = False
        if not directed: logg.m('    using the undirected graph', v=4)
        g = utils.get_igraph_from_adjacency(adjacency, directed=directed)
        if flavor == 'vtraag':
            import louvain
            if resolution is None: resolution = 1
            try:
                logg.info('    using the "louvain" package of Traag (2017)')
                louvain.set_rng_seed(random_state)
                part = louvain.find_partition(
                    g,
                    louvain.RBConfigurationVertexPartition,
                    resolution_parameter=resolution)
                adata.add['louvain_quality'] = part.quality()
            except AttributeError:
                logg.warn(
                    'Did not find package louvain>=0.6, '
                    'the clustering result will therefore not be 100% reproducible, '
                    'but still meaningful! '
                    'If you want 100% reproducible results, but louvain 0.6 is not yet '
                    'available via "pip install louvain", '
                    'either get the latest (development) version from '
                    'https://github.com/vtraag/louvain-igraph or use the option '
                    '`flavor=igraph` in sc.tl.louvain(). '
                    'The latter does not provide a `resolution` parameter, though.'
                )
                part = louvain.find_partition(g,
                                              method='RBConfiguration',
                                              resolution_parameter=resolution)
        elif flavor == 'igraph':
            part = g.community_multilevel()
        groups = np.array(part.membership, dtype='U')
    elif flavor == 'taynaud':
        # this is deprecated
        import networkx as nx
        import community
        g = nx.Graph(adata.add['data_graph_distance_local'])
        partition = community.best_partition(g)
        groups = np.zeros(len(partition), dtype=int)
        for k, v in partition.items():
            groups[k] = v
        groups = groups.astype('U')
    else:
        raise ValueError(
            '`flavor` needs to be "vtraag" or "igraph" or "taynaud".')
    adata.smp['louvain_groups'] = groups
    from natsort import natsorted
    adata.add['louvain_groups_order'] = np.array(natsorted(np.unique(groups)))
    adata.add['louvain_params'] = np.array((resolution, ),
                                           dtype=[('resolution', float)])
    logg.m('    finished', t=True, end=' ')
    logg.m(
        'and found', len(adata.add['louvain_groups_order']),
        'clusters, added\n'
        '    "louvain_groups", the cluster labels (adata.smp)\n'
        '    "louvain_groups_order", the unique cluster labels (adata.add)')
    return adata if copy else None
Ejemplo n.º 20
0
def louvain(graph):
    #lv.set_rng_seed(0)
    lv.set_rng_seed(random.randint(1, 100000))
    raw_partitions = lv.find_partition(graph, lv.ModularityVertexPartition)

    return raw_partitions
Ejemplo n.º 21
0
def run_louvain(graph,
                config_model='Default',
                overlap=False,
                directed=False,
                deep=False,
                interslice_weight=0.1,
                resolution_parameter=0.1,
                seed=None):
    """
    :outdir: the output directory to comprehend the output link file
    :param graph: input file
    :param config_model: 'RB', 'RBER', 'CPM', 'Surprise', 'Significance'
    :param overlap: bool, whether to enable overlapping community detection
    :param directed
    :param deep
    :param interslice_weight
    :param resolution_parameter
    :return
    """

    if seed != None:
        louvain.set_rng_seed(seed)

    def louvain_hierarchy_output(partition):
        optimiser = louvain.Optimiser()
        partition_agg = partition.aggregate_partition()
        partition_layers = []
        while optimiser.move_nodes(partition_agg) > 0:
            partition.from_coarse_partition(partition_agg)
            partition_agg = partition_agg.aggregate_partition()
            partition_layers.append(list(partition))
        return partition_layers

    def louvain_multiplex(graphs, partition_type, interslice_weight,
                          resolution_parameter):
        layers, interslice_layer, G_full = louvain.time_slices_to_layers(
            graphs, vertex_id_attr='name', interslice_weight=interslice_weight)
        if partition_type == louvain.ModularityVertexPartition:
            partitions = [partition_type(H) for H in layers]
            interslice_partition = partition_type(interslice_layer,
                                                  weights='weight')
        else:
            partitions = [
                partition_type(H, resolution_parameter=resolution_parameter)
                for H in layers
            ]
            interslice_partition = partition_type(
                interslice_layer,
                resolution_parameter=resolution_parameter,
                weights='weight')
        optimiser = louvain.Optimiser()
        optimiser.optimise_partition_multiplex(partitions +
                                               [interslice_partition])
        quality = sum(
            [p.quality() for p in partitions + [interslice_partition]])
        return partitions[0], quality

    def partition_to_clust(graphs, partition, min_size_cut=2):
        clusts = []
        node_names = []
        if not isinstance(graphs, list):
            graphs = [graphs]
        for g in graphs:
            node_names.extend(g.vs['name'])
        for i in range(len(partition)):
            clust = [node_names[id] for id in partition[i]]
            clust = list(set(clust))
            if len(clust) < min_size_cut:
                continue
            clust.sort()
            clusts.append(clust)
        clusts = sorted(clusts, key=lambda x: len(x), reverse=True)
        return clusts

    multi = False
    if isinstance(graph, list):
        multi = True

    if overlap == True and multi == False:
        multi = True
        net = graph
        graph = []
        for i in range(4):
            graph.append(net)

    if multi == True and deep == True:
        sys.stderr.write('louvain does not support hierarchical '
                         'clustering with overlapped communities\n')
        return 1

    if config_model == 'RB':
        partition_type = louvain.RBConfigurationVertexPartition
    elif config_model == 'RBER':
        partition_type = louvain.RBERConfigurationVertexPartition
    elif config_model == 'CPM':
        partition_type = louvain.CPMVertexPartition
    elif config_model == 'Surprise':
        partition_type = louvain.SurpriseVertexPartition
    elif config_model == "Significance":
        partition_type = louvain.SignificanceVertexPartition
    else:
        sys.stderr.write("Configuration model not set "
                         "performing simple Louvain.\n")
        partition_type = louvain.ModularityVertexPartition

    weighted = False
    if multi:
        wL = []
        G = []
        for file in graph:
            with open(file, 'r') as f:
                lines = f.read().splitlines()
            elts = lines[0].split()
            if len(elts) == 3:
                weighted = True
            else:
                weighted = False
            for i in range(len(lines)):
                elts = lines[i].split()
                for j in range(2):
                    elts[j] = int(elts[j])
                if weighted == True:
                    elts[2] = float(elts[2])
                    if elts[2] < 0:
                        sys.stderr.write('encountered a negative edge weight '
                                         'on row ' + str(i) + ' (' +
                                         str(lines[i]) +
                                         ') which is not allowed\n')
                        return 2
                lines[i] = tuple(elts)
            g = igraph.Graph.TupleList(lines,
                                       directed=directed,
                                       weights=weighted)
            G.append(g)
            wL.append(weighted)
            f.close()
        if True in wL and False in wL:
            raise Exception('all graphs should follow the same format')
        if partition_type == louvain.CPMVertexPartition and directed is True:
            raise Exception('graph for CPMVertexPartition must be undirected')
        if partition_type == louvain.SignificanceVertexPartition and weighted is True:
            raise Exception('SignificanceVertexPartition only support '
                            'unweighted graphs')
        partition, quality = louvain_multiplex(G, partition_type,
                                               interslice_weight,
                                               resolution_parameter)

    else:
        if not os.path.isfile(graph):
            sys.stderr.write(str(graph) + ' is not a file\n')
            return 3
        if os.path.getsize(graph) == 0:
            sys.stderr.write(str(graph) + ' is an empty file\n')
            return 4
        with open(graph, 'r') as f:
            lines = f.read().splitlines()
        elts = lines[0].split()
        if len(elts) == 3:
            weighted = True
        else:
            weighted = False

        for i in range(len(lines)):
            elts = lines[i].split()
            for j in range(2):
                elts[j] = int(elts[j])
            if weighted is True:
                elts[2] = float(elts[2])
                if elts[2] < 0:
                    sys.stderr.write('encountered a negative edge weight '
                                     'on row ' + str(i) + ' (' +
                                     str(lines[i]) +
                                     ') which is not allowed\n')
                    return 3
            lines[i] = tuple(elts)
        f.close()

        G = igraph.Graph.TupleList(lines, directed=directed, weights=weighted)
        if weighted is False:
            weights = None
        else:
            weights = G.es['weight']
        if partition_type == louvain.ModularityVertexPartition:
            partition = partition_type(G, weights=weights)
        else:
            partition = partition_type(
                G, weights=weights, resolution_parameter=resolution_parameter)
        if deep == False:
            optimiser = louvain.Optimiser()
            optimiser.optimise_partition(partition)

    lines = []
    if deep == False:
        clusts = partition_to_clust(G, partition)
        if len(clusts) == 0:
            sys.stderr.write(DEFAULT_ERR_MSG)
            return 4

        maxNode = 0
        for clust in clusts:
            maxNode = max(maxNode, max(clust))

        for i in range(len(clusts)):
            lines.append(
                str(maxNode + len(partition) + 1) + '\t' +
                str(maxNode + i + 1))
            for n in clusts[i]:
                lines.append(str(maxNode + i + 1) + '\t' + str(n))
    else:
        partitions = louvain_hierarchy_output(partition)
        clusts_layers = []
        for p in partitions:
            clusts_layers.append(partition_to_clust(G, p))
        if len(clusts_layers) == 0:
            sys.stderr.write(DEFAULT_ERR_MSG)
            return 5
        if len(clusts_layers[0]) == 0:
            sys.stderr.write(DEFAULT_ERR_MSG)
            return 6
        maxNode = 0
        for clust in clusts_layers[0]:
            maxNode = max(maxNode, max(clust))
        for i in range(len(clusts_layers[0])):
            for n in clusts_layers[0][i]:
                lines.append(str(maxNode + i + 1) + '\t' + str(n))
        maxNode = maxNode + len(clusts_layers[0])
        for i in range(1, len(clusts_layers)):
            for j in range(len(clusts_layers[i - 1])):
                for k in range(len(clusts_layers[i])):
                    if all(x in clusts_layers[i][k]
                           for x in clusts_layers[i - 1][j]):
                        lines.append(
                            str(maxNode + k + 1) + '\t' +
                            str(maxNode - len(clusts_layers[i - 1]) + j + 1))
                        break
            maxNode = maxNode + len(clusts_layers[i])
        for i in range(len(clusts_layers[-1])):
            lines.append(
                str(maxNode + 1) + '\t' +
                str(maxNode - len(clusts_layers[-1]) + i + 1))

    # trim the hierarchy to remove contigs
    up_tree = {}
    down_tree = {}
    for line in lines:
        elts = line.split()
        down_tree.setdefault(elts[0], [])
        down_tree[elts[0]].append(elts[1])
        up_tree.setdefault(elts[1], [])
        up_tree[elts[1]].append(elts[0])

    # store root and leaves
    set1 = set(down_tree.keys())
    set2 = set(up_tree.keys())
    root_l = list(set1.difference(set2))
    leaf_l = list(set2.difference(set1))
    node_l = list(set1.union(set2))

    # find all contigs in the DAG
    Contigs = []
    work_list = root_l
    visited = {}
    for node in node_l:
        visited[node] = 0
    work_path = []
    new_path = False
    while work_list:
        key = work_list.pop(0)
        if new_path == False:
            work_path.append(key)
        else:
            work_path.append(up_tree[key][visited[key]])
            work_path.append(key)
        if key in leaf_l:
            new_path = True
            Contigs.append(work_path)
            work_path = []
        elif len(down_tree[key]) > 1 or visited[key] > 0:
            new_path = True
            Contigs.append(work_path)
            work_path = []
        if visited[key] == 0 and key not in leaf_l:
            work_list = down_tree[key] + work_list
        visited[key] += 1

    # write trimmed DAG
    for path in Contigs[1:]:
        sys.stdout.write(path[0] + ',' + path[-1] + ',')
        if path[-1] in leaf_l:
            sys.stdout.write('c-m' + ';')
        else:
            sys.stdout.write('c-c' + ';')

    sys.stdout.flush()
    return 0