Esempio n. 1
0
    def test_graph(self):
        """
        Louvain accepts graphs too.
        :return:
        """
        graph = matrix_to_knn_graph(self.iris.X, 30, "l2")
        self.assertIsNotNone(graph)
        self.assertEqual(networkx.Graph, type(graph), 1)

        # basic clustering - get clusters
        c = self.louvain(graph)
        # First 20 iris belong to one cluster
        self.assertEqual(np.ndarray, type(c))
        self.assertEqual(len(self.iris), len(c))
        self.assertEqual(1, len(set(c[:20].ravel())))

        # clustering - get model
        c = self.louvain.get_model(graph)
        # First 20 iris belong to one cluster
        self.assertEqual(ClusteringModel, type(c))
        self.assertEqual(len(self.iris), len(c.labels))
 def __init__(self,
              gene_names: list,
              data: np.ndarray,
              neighbours: int = 30):
     """
     Prepare graph for Louvain clustering. Graph construction can be perfomed in different ways, as described below.
     If orange_graph=False use 1-distances as similarity for graph weights, else use Jaccard index.
     :param distance_matrix: Cosine Distances between samples, square matrix
     :param gene_names: List of names specifying what each row/column in distance matrix represents
     :param data: Preprocessed data
     :param orange_graph: Should graph be constructed with orange graph constructor
     :param trimm: When orange graph is not used, if closest is None the graph will be constructed based on all
         edges that would have 1-distance (weight) at least as big as trimm
     :param closest: If orange_graph=False use that many closest neighbours from similarity transformed distance
         matrix to construct the graph (weighs are similarities), if orange_graph=True use that many neighbours to
         compute Jaccard index between nodes, which is used as a weight
     """
     super().__init__(distance_matrix=None,
                      gene_names=gene_names,
                      data=data)
     self._graph = orange_louvain_graph.matrix_to_knn_graph(
         data=data, k_neighbors=neighbours, metric='cosine')
Esempio n. 3
0
def run_on_data(data, normalize, pca_components, k_neighbors, metric,
                resolution, state):
    # type: (Table, Optional[int], int, str, float, bool, TaskState) -> Results
    """
    Run the louvain clustering on `data`.

    state is used to report progress and partial results. Returns early if
    `task.is_interuption_requested()` returns true.

    Parameters
    ----------
    data : Table
        Data table
    normalize : bool
        If `True`, the data is first normalized before computing PCA.
    pca_components : Optional[int]
        If not `None` then the data is first projected onto first
        `pca_components` principal components.
    k_neighbors : int
        Passed to `table_to_knn_graph`
    metric : str
        Passed to `table_to_knn_graph`
    resolution : float
        Passed to `Louvain`
    state : TaskState

    Returns
    -------
    res : Results
    """
    state = state  # type: TaskState
    res = Results(
        normalize=normalize,
        pca_components=pca_components,
        k_neighbors=k_neighbors,
        metric=metric,
        resolution=resolution,
    )
    step = 0
    if state.is_interuption_requested():
        return res

    if pca_components is not None:
        steps = 3
        state.set_status("Computing PCA...")
        pca = PCA(n_components=pca_components, random_state=0)

        data = res.pca_projection = pca(data)(data)
        assert isinstance(data, Table)
        state.set_partial_results(("pca_projection", res.pca_projection))
        step += 1
    else:
        steps = 2

    if state.is_interuption_requested():
        return res

    state.set_progress_value(100. * step / steps)
    state.set_status("Building graph...")

    # Apply Louvain preprocessing before converting the table into a graph
    louvain = Louvain(resolution=resolution, random_state=0)
    data = louvain.preprocess(data)

    if state.is_interuption_requested():
        return res

    def pcallback(val):
        state.set_progress_value((100. * step + 100 * val) / steps)
        if state.is_interuption_requested():
            raise InteruptRequested()

    try:
        res.graph = graph = matrix_to_knn_graph(data.X,
                                                k_neighbors=k_neighbors,
                                                metric=metric,
                                                progress_callback=pcallback)
    except InteruptRequested:
        return res

    state.set_partial_results(("graph", res.graph))

    step += 1
    state.set_progress_value(100 * step / steps)
    state.set_status("Detecting communities...")
    if state.is_interuption_requested():
        return res

    res.partition = louvain(graph)
    state.set_partial_results(("partition", res.partition))
    return res