def _compute_partition(self): if self.partition is None: self.setStatusMessage('Detecting communities...') self.setBlocking(True) louvain = Louvain(resolution=self.resolution) self.partition = louvain.fit_predict(self.graph)
def run_on_graph(graph, resolution, state): # type: (nx.Graph, float, TaskState) -> Results """ Run the louvain clustering on `graph`. """ state = state # type: TaskState res = Results(resolution=resolution) louvain = Louvain(resolution=resolution, random_state=0) state.set_status("Detecting communities...") if state.is_interuption_requested(): return res partition = louvain.fit_predict(graph) res.partition = partition state.set_partial_results(("partition", res.partition)) return res
def test_louvain_parameters(self): louvain = Louvain(k_neighbors=3, resolution=1.2, random_state=42, metric="l2") c = louvain(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c))
def commit(self): self.Error.clear() # Kill any running jobs self.cancel() if self.data is None: return # Make sure the dataset is ok if len(self.data.domain.attributes) < 1: self.Error.empty_dataset() return # Preprocess the dataset if self.preprocessed_data is None: louvain = Louvain() self.preprocessed_data = louvain.preprocess(self.data) # Prepare the tasks to run queue = TaskQueue(parent=self) if self.pca_projection is None and self.apply_pca: queue.push(namespace(task=self._compute_pca_projection)) if self.graph is None: queue.push( namespace(task=self._compute_graph, progress_callback=True)) if self.partition is None: queue.push(namespace(task=self._compute_partition)) # Prepare callbacks queue.on_progress.connect(lambda val: self.progressBarSet(100 * val)) queue.on_complete.connect(self._on_complete) queue.on_exception.connect(self._handle_exceptions) self.__queue = queue # Run the task queue self.progressBarInit() self.setBlocking(True) self.__future = self.__executor.submit(queue.start)
def run_on_data(data, pca_components, k_neighbors, metric, resolution, state): # type: (Table, Optional[int], int, str, float, TaskState) -> Results """ Run the louvain clustering on `data`. state is used to report progress and partial results. Returns early if `task.is_interuption_requested()` returns true. Parameters ---------- data : Table Data table pca_components : Optional[int] If not `None` then the data is first projected onto first `pca_components` principal components. k_neighbors : int Passed to `table_to_knn_graph` metric : str Passed to `table_to_knn_graph` resolution : float Passed to `Louvain` state : TaskState Returns ------- res : Results """ state = state # type: TaskState res = Results( pca_components=pca_components, k_neighbors=k_neighbors, metric=metric, resolution=resolution, ) step = 0 if state.is_interuption_requested(): return res if pca_components is not None: steps = 3 state.set_status("Computing PCA...") pca = PCA(n_components=pca_components, random_state=0) data = res.pca_projection = pca(data)(data) assert isinstance(data, Table) state.set_partial_results(("pca_projection", res.pca_projection)) step += 1 else: steps = 2 if state.is_interuption_requested(): return res state.set_progress_value(100. * step / steps) state.set_status("Building graph...") def pcallback(val): state.set_progress_value((100. * step + 100 * val) / steps) if state.is_interuption_requested(): raise InteruptRequested() try: res.graph = graph = table_to_knn_graph(data, k_neighbors=k_neighbors, metric=metric, progress_callback=pcallback) except InteruptRequested: return res state.set_partial_results(("graph", res.graph)) step += 1 state.set_progress_value(100 * step / steps) state.set_status("Detecting communities...") if state.is_interuption_requested(): return res louvain = Louvain(resolution=resolution, random_state=0) res.partition = louvain.fit_predict(graph) state.set_partial_results(("partition", res.partition)) return res
def commit(self): self.__commit_timer.stop() self.__invalidated = False self._set_modified(False) self.Error.clear() # Cancel current running task self.__cancel_task(wait=False) if self.data is None: self.__set_state_ready() return # Make sure the dataset is ok if len(self.data.domain.attributes) < 1: self.Error.empty_dataset() self.__set_state_ready() return if self.partition is not None: self.__set_state_ready() self._send_data() return # Preprocess the dataset if self.preprocessed_data is None: louvain = Louvain(random_state=0) self.preprocessed_data = louvain.preprocess(self.data) state = TaskState(self) # Prepare/assemble the task(s) to run; reuse partial results if self.apply_pca: if self.pca_projection is not None: data = self.pca_projection pca_components = None else: data = self.preprocessed_data pca_components = self.pca_components else: data = self.preprocessed_data pca_components = None if self.graph is not None: # run on graph only; no need to do PCA and k-nn search ... graph = self.graph k_neighbors = metric = None else: k_neighbors, metric = self.k_neighbors, METRICS[self.metric_idx][1] graph = None if graph is None: task = partial(run_on_data, data, pca_components=pca_components, k_neighbors=k_neighbors, metric=metric, resolution=self.resolution, state=state) else: task = partial(run_on_graph, graph, resolution=self.resolution, state=state) self.__set_state_busy() self.__start_task(task, state)
def setUp(self): self.iris = Table('iris') self.louvain = Louvain()
class TestLouvain(unittest.TestCase): def setUp(self): self.iris = Table('iris') self.louvain = Louvain() def test_louvain(self): c = self.louvain(self.iris) # First 20 iris belong to one cluster self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) self.assertEqual(1, len(set(c[:20].ravel()))) def test_louvain_parameters(self): louvain = Louvain(k_neighbors=3, resolution=1.2, random_state=42, metric="l2") c = louvain(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) def test_predict_table(self): c = self.louvain(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) def test_predict_numpy(self): c = self.louvain.fit(self.iris.X) self.assertEqual(ClusteringModel, type(c)) self.assertEqual(np.ndarray, type(c.labels)) self.assertEqual(len(self.iris), len(c.labels)) def test_predict_sparse_csc(self): with self.iris.unlocked(): self.iris.X = csc_matrix(self.iris.X[::5]) c = self.louvain(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) def test_predict_sparse_csr(self): with self.iris.unlocked(): self.iris.X = csr_matrix(self.iris.X[::5]) c = self.louvain(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) def test_model(self): c = self.louvain.get_model(self.iris) self.assertEqual(ClusteringModel, type(c)) self.assertEqual(len(self.iris), len(c.labels)) self.assertRaises(NotImplementedError, c, self.iris) def test_model_np(self): """ Test with numpy array as an input in model. """ c = self.louvain.get_model(self.iris) self.assertRaises(NotImplementedError, c, self.iris.X) def test_model_sparse(self): """ Test with sparse array as an input in model. """ c = self.louvain.get_model(self.iris) self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X)) def test_model_instance(self): """ Test with instance as an input in model. """ c = self.louvain.get_model(self.iris) self.assertRaises(NotImplementedError, c, self.iris[0]) def test_model_list(self): """ Test with list as an input in model. """ c = self.louvain.get_model(self.iris) self.assertRaises(NotImplementedError, c, self.iris.X.tolist()) def test_graph(self): """ Louvain accepts graphs too. :return: """ graph = matrix_to_knn_graph(self.iris.X, 30, "l2") self.assertIsNotNone(graph) self.assertEqual(networkx.Graph, type(graph), 1) # basic clustering - get clusters c = self.louvain(graph) # First 20 iris belong to one cluster self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) self.assertEqual(1, len(set(c[:20].ravel()))) # clustering - get model c = self.louvain.get_model(graph) # First 20 iris belong to one cluster self.assertEqual(ClusteringModel, type(c)) self.assertEqual(len(self.iris), len(c.labels)) def test_model_bad_datatype(self): """ Check model with data-type that is not supported. """ c = self.louvain.get_model(self.iris) self.assertRaises(TypeError, c, 10)
def run_on_data(data, pca_components, k_neighbors, metric, resolution, state): # type: (Table, Optional[int], int, str, float, TaskState) -> Results """ Run the louvain clustering on `data`. state is used to report progress and partial results. Returns early if `task.is_interuption_requested()` returns true. Parameters ---------- data : Table Data table pca_components : Optional[int] If not `None` then the data is first projected onto first `pca_components` principal components. k_neighbors : int Passed to `table_to_knn_graph` metric : str Passed to `table_to_knn_graph` resolution : float Passed to `Louvain` state : TaskState Returns ------- res : Results """ state = state # type: TaskState res = Results( pca_components=pca_components, k_neighbors=k_neighbors, metric=metric, resolution=resolution, ) step = 0 if state.is_interuption_requested(): return res if pca_components is not None: steps = 3 state.set_status("Computing PCA...") pca = PCA(n_components=pca_components, random_state=0) data = res.pca_projection = pca(data)(data) assert isinstance(data, Table) state.set_partial_results(("pca_projection", res.pca_projection)) step += 1 else: steps = 2 if state.is_interuption_requested(): return res state.set_progress_value(100. * step / steps) state.set_status("Building graph...") def pcallback(val): state.set_progress_value((100. * step + 100 * val) / steps) if state.is_interuption_requested(): raise InteruptRequested() try: res.graph = graph = table_to_knn_graph( data, k_neighbors=k_neighbors, metric=metric, progress_callback=pcallback ) except InteruptRequested: return res state.set_partial_results(("graph", res.graph)) step += 1 state.set_progress_value(100 * step / steps) state.set_status("Detecting communities...") if state.is_interuption_requested(): return res louvain = Louvain(resolution=resolution, random_state=0) res.partition = louvain.fit_predict(graph) state.set_partial_results(("partition", res.partition)) return res
def commit(self): self.__commit_timer.stop() self.__invalidated = False self._set_modified(False) self.Error.clear() # Cancel current running task self.__cancel_task(wait=False) if self.data is None: self.__set_state_ready() return # Make sure the dataset is ok if len(self.data.domain.attributes) < 1: self.Error.empty_dataset() self.__set_state_ready() return if self.partition is not None: self.__set_state_ready() self._send_data() return # Preprocess the dataset if self.preprocessed_data is None: louvain = Louvain(random_state=0) self.preprocessed_data = louvain.preprocess(self.data) state = TaskState(self) # Prepare/assemble the task(s) to run; reuse partial results if self.apply_pca: if self.pca_projection is not None: data = self.pca_projection pca_components = None else: data = self.preprocessed_data pca_components = self.pca_components else: data = self.preprocessed_data pca_components = None if self.graph is not None: # run on graph only; no need to do PCA and k-nn search ... graph = self.graph k_neighbors = metric = None else: k_neighbors, metric = self.k_neighbors, METRICS[self.metric_idx][1] graph = None if graph is None: task = partial( run_on_data, data, pca_components=pca_components, k_neighbors=k_neighbors, metric=metric, resolution=self.resolution, state=state ) else: task = partial( run_on_graph, graph, resolution=self.resolution, state=state ) self.__set_state_busy() self.__start_task(task, state)
def setUpClass(cls): cls.data = Table('iris') cls.louvain = Louvain()