def test_graph(self): """ Louvain accepts graphs too. :return: """ graph = matrix_to_knn_graph(self.iris.X, 30, "l2") self.assertIsNotNone(graph) self.assertEqual(networkx.Graph, type(graph), 1) # basic clustering - get clusters c = self.louvain(graph) # First 20 iris belong to one cluster self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) self.assertEqual(1, len(set(c[:20].ravel()))) # clustering - get model c = self.louvain.get_model(graph) # First 20 iris belong to one cluster self.assertEqual(ClusteringModel, type(c)) self.assertEqual(len(self.iris), len(c.labels))
def __init__(self, gene_names: list, data: np.ndarray, neighbours: int = 30): """ Prepare graph for Louvain clustering. Graph construction can be perfomed in different ways, as described below. If orange_graph=False use 1-distances as similarity for graph weights, else use Jaccard index. :param distance_matrix: Cosine Distances between samples, square matrix :param gene_names: List of names specifying what each row/column in distance matrix represents :param data: Preprocessed data :param orange_graph: Should graph be constructed with orange graph constructor :param trimm: When orange graph is not used, if closest is None the graph will be constructed based on all edges that would have 1-distance (weight) at least as big as trimm :param closest: If orange_graph=False use that many closest neighbours from similarity transformed distance matrix to construct the graph (weighs are similarities), if orange_graph=True use that many neighbours to compute Jaccard index between nodes, which is used as a weight """ super().__init__(distance_matrix=None, gene_names=gene_names, data=data) self._graph = orange_louvain_graph.matrix_to_knn_graph( data=data, k_neighbors=neighbours, metric='cosine')
def run_on_data(data, normalize, pca_components, k_neighbors, metric, resolution, state): # type: (Table, Optional[int], int, str, float, bool, TaskState) -> Results """ Run the louvain clustering on `data`. state is used to report progress and partial results. Returns early if `task.is_interuption_requested()` returns true. Parameters ---------- data : Table Data table normalize : bool If `True`, the data is first normalized before computing PCA. pca_components : Optional[int] If not `None` then the data is first projected onto first `pca_components` principal components. k_neighbors : int Passed to `table_to_knn_graph` metric : str Passed to `table_to_knn_graph` resolution : float Passed to `Louvain` state : TaskState Returns ------- res : Results """ state = state # type: TaskState res = Results( normalize=normalize, pca_components=pca_components, k_neighbors=k_neighbors, metric=metric, resolution=resolution, ) step = 0 if state.is_interuption_requested(): return res if pca_components is not None: steps = 3 state.set_status("Computing PCA...") pca = PCA(n_components=pca_components, random_state=0) data = res.pca_projection = pca(data)(data) assert isinstance(data, Table) state.set_partial_results(("pca_projection", res.pca_projection)) step += 1 else: steps = 2 if state.is_interuption_requested(): return res state.set_progress_value(100. * step / steps) state.set_status("Building graph...") # Apply Louvain preprocessing before converting the table into a graph louvain = Louvain(resolution=resolution, random_state=0) data = louvain.preprocess(data) if state.is_interuption_requested(): return res def pcallback(val): state.set_progress_value((100. * step + 100 * val) / steps) if state.is_interuption_requested(): raise InteruptRequested() try: res.graph = graph = matrix_to_knn_graph(data.X, k_neighbors=k_neighbors, metric=metric, progress_callback=pcallback) except InteruptRequested: return res state.set_partial_results(("graph", res.graph)) step += 1 state.set_progress_value(100 * step / steps) state.set_status("Detecting communities...") if state.is_interuption_requested(): return res res.partition = louvain(graph) state.set_partial_results(("partition", res.partition)) return res