def commit(self): self.Error.clear() # Kill any running jobs self.cancel() if self.data is None: return # Make sure the dataset is ok if len(self.data.domain.attributes) < 1: self.Error.empty_dataset() return # Preprocess the dataset if self.preprocessed_data is None: louvain = Louvain() self.preprocessed_data = louvain.preprocess(self.data) # Prepare the tasks to run queue = TaskQueue(parent=self) if self.pca_projection is None and self.apply_pca: queue.push(namespace(task=self._compute_pca_projection)) if self.graph is None: queue.push( namespace(task=self._compute_graph, progress_callback=True)) if self.partition is None: queue.push(namespace(task=self._compute_partition)) # Prepare callbacks queue.on_progress.connect(lambda val: self.progressBarSet(100 * val)) queue.on_complete.connect(self._on_complete) queue.on_exception.connect(self._handle_exceptions) self.__queue = queue # Run the task queue self.progressBarInit() self.setBlocking(True) self.__future = self.__executor.submit(queue.start)
def commit(self): self.__commit_timer.stop() self.__invalidated = False self._set_modified(False) self.Error.clear() # Cancel current running task self.__cancel_task(wait=False) if self.data is None: self.__set_state_ready() return # Make sure the dataset is ok if len(self.data.domain.attributes) < 1: self.Error.empty_dataset() self.__set_state_ready() return if self.partition is not None: self.__set_state_ready() self._send_data() return # Preprocess the dataset if self.preprocessed_data is None: louvain = Louvain(random_state=0) self.preprocessed_data = louvain.preprocess(self.data) state = TaskState(self) # Prepare/assemble the task(s) to run; reuse partial results if self.apply_pca: if self.pca_projection is not None: data = self.pca_projection pca_components = None else: data = self.preprocessed_data pca_components = self.pca_components else: data = self.preprocessed_data pca_components = None if self.graph is not None: # run on graph only; no need to do PCA and k-nn search ... graph = self.graph k_neighbors = metric = None else: k_neighbors, metric = self.k_neighbors, METRICS[self.metric_idx][1] graph = None if graph is None: task = partial(run_on_data, data, pca_components=pca_components, k_neighbors=k_neighbors, metric=metric, resolution=self.resolution, state=state) else: task = partial(run_on_graph, graph, resolution=self.resolution, state=state) self.__set_state_busy() self.__start_task(task, state)
def run_on_data(data, normalize, pca_components, k_neighbors, metric, resolution, state): # type: (Table, Optional[int], int, str, float, bool, TaskState) -> Results """ Run the louvain clustering on `data`. state is used to report progress and partial results. Returns early if `task.is_interuption_requested()` returns true. Parameters ---------- data : Table Data table normalize : bool If `True`, the data is first normalized before computing PCA. pca_components : Optional[int] If not `None` then the data is first projected onto first `pca_components` principal components. k_neighbors : int Passed to `table_to_knn_graph` metric : str Passed to `table_to_knn_graph` resolution : float Passed to `Louvain` state : TaskState Returns ------- res : Results """ state = state # type: TaskState res = Results( normalize=normalize, pca_components=pca_components, k_neighbors=k_neighbors, metric=metric, resolution=resolution, ) step = 0 if state.is_interuption_requested(): return res if pca_components is not None: steps = 3 state.set_status("Computing PCA...") pca = PCA(n_components=pca_components, random_state=0) data = res.pca_projection = pca(data)(data) assert isinstance(data, Table) state.set_partial_results(("pca_projection", res.pca_projection)) step += 1 else: steps = 2 if state.is_interuption_requested(): return res state.set_progress_value(100. * step / steps) state.set_status("Building graph...") # Apply Louvain preprocessing before converting the table into a graph louvain = Louvain(resolution=resolution, random_state=0) data = louvain.preprocess(data) if state.is_interuption_requested(): return res def pcallback(val): state.set_progress_value((100. * step + 100 * val) / steps) if state.is_interuption_requested(): raise InteruptRequested() try: res.graph = graph = matrix_to_knn_graph(data.X, k_neighbors=k_neighbors, metric=metric, progress_callback=pcallback) except InteruptRequested: return res state.set_partial_results(("graph", res.graph)) step += 1 state.set_progress_value(100 * step / steps) state.set_status("Detecting communities...") if state.is_interuption_requested(): return res res.partition = louvain(graph) state.set_partial_results(("partition", res.partition)) return res
def commit(self): self.__commit_timer.stop() self.__invalidated = False self._set_modified(False) self.Error.clear() # Cancel current running task self.__cancel_task(wait=False) if self.data is None: self.__set_state_ready() return # Make sure the dataset is ok if len(self.data.domain.attributes) < 1: self.Error.empty_dataset() self.__set_state_ready() return if self.partition is not None: self.__set_state_ready() self._send_data() return # Preprocess the dataset if self.preprocessed_data is None: louvain = Louvain(random_state=0) self.preprocessed_data = louvain.preprocess(self.data) state = TaskState(self) # Prepare/assemble the task(s) to run; reuse partial results if self.apply_pca: if self.pca_projection is not None: data = self.pca_projection pca_components = None else: data = self.preprocessed_data pca_components = self.pca_components else: data = self.preprocessed_data pca_components = None if self.graph is not None: # run on graph only; no need to do PCA and k-nn search ... graph = self.graph k_neighbors = metric = None else: k_neighbors, metric = self.k_neighbors, METRICS[self.metric_idx][1] graph = None if graph is None: task = partial( run_on_data, data, pca_components=pca_components, k_neighbors=k_neighbors, metric=metric, resolution=self.resolution, state=state ) else: task = partial( run_on_graph, graph, resolution=self.resolution, state=state ) self.__set_state_busy() self.__start_task(task, state)
def run_on_data(data, normalize, pca_components, k_neighbors, metric, resolution, state): # type: (Table, Optional[int], int, str, float, bool, TaskState) -> Results """ Run the louvain clustering on `data`. state is used to report progress and partial results. Returns early if `task.is_interuption_requested()` returns true. Parameters ---------- data : Table Data table normalize : bool If `True`, the data is first normalized before computing PCA. pca_components : Optional[int] If not `None` then the data is first projected onto first `pca_components` principal components. k_neighbors : int Passed to `table_to_knn_graph` metric : str Passed to `table_to_knn_graph` resolution : float Passed to `Louvain` state : TaskState Returns ------- res : Results """ state = state # type: TaskState res = Results( normalize=normalize, pca_components=pca_components, k_neighbors=k_neighbors, metric=metric, resolution=resolution, ) step = 0 if state.is_interuption_requested(): return res if pca_components is not None: steps = 3 state.set_status("Computing PCA...") pca = PCA(n_components=pca_components, random_state=0) data = res.pca_projection = pca(data)(data) assert isinstance(data, Table) state.set_partial_results(("pca_projection", res.pca_projection)) step += 1 else: steps = 2 if state.is_interuption_requested(): return res state.set_progress_value(100. * step / steps) state.set_status("Building graph...") # Apply Louvain preprocessing before converting the table into a graph louvain = Louvain(resolution=resolution, random_state=0) data = louvain.preprocess(data) if state.is_interuption_requested(): return res def pcallback(val): state.set_progress_value((100. * step + 100 * val) / steps) if state.is_interuption_requested(): raise InteruptRequested() try: res.graph = graph = table_to_knn_graph( data, k_neighbors=k_neighbors, metric=metric, progress_callback=pcallback ) except InteruptRequested: return res state.set_partial_results(("graph", res.graph)) step += 1 state.set_progress_value(100 * step / steps) state.set_status("Detecting communities...") if state.is_interuption_requested(): return res res.partition = louvain.fit_predict(graph) state.set_partial_results(("partition", res.partition)) return res