Beispiel #1
0
    def commit(self):
        self.Error.clear()
        # Kill any running jobs
        self.cancel()

        if self.data is None:
            return

        # Make sure the dataset is ok
        if len(self.data.domain.attributes) < 1:
            self.Error.empty_dataset()
            return

        # Preprocess the dataset
        if self.preprocessed_data is None:
            louvain = Louvain()
            self.preprocessed_data = louvain.preprocess(self.data)

        # Prepare the tasks to run
        queue = TaskQueue(parent=self)

        if self.pca_projection is None and self.apply_pca:
            queue.push(namespace(task=self._compute_pca_projection))

        if self.graph is None:
            queue.push(
                namespace(task=self._compute_graph, progress_callback=True))

        if self.partition is None:
            queue.push(namespace(task=self._compute_partition))

        # Prepare callbacks
        queue.on_progress.connect(lambda val: self.progressBarSet(100 * val))
        queue.on_complete.connect(self._on_complete)
        queue.on_exception.connect(self._handle_exceptions)
        self.__queue = queue

        # Run the task queue
        self.progressBarInit()
        self.setBlocking(True)
        self.__future = self.__executor.submit(queue.start)
    def commit(self):
        self.__commit_timer.stop()
        self.__invalidated = False
        self._set_modified(False)
        self.Error.clear()

        # Cancel current running task
        self.__cancel_task(wait=False)

        if self.data is None:
            self.__set_state_ready()
            return

        # Make sure the dataset is ok
        if len(self.data.domain.attributes) < 1:
            self.Error.empty_dataset()
            self.__set_state_ready()
            return

        if self.partition is not None:
            self.__set_state_ready()
            self._send_data()
            return

        # Preprocess the dataset
        if self.preprocessed_data is None:
            louvain = Louvain(random_state=0)
            self.preprocessed_data = louvain.preprocess(self.data)

        state = TaskState(self)

        # Prepare/assemble the task(s) to run; reuse partial results
        if self.apply_pca:
            if self.pca_projection is not None:
                data = self.pca_projection
                pca_components = None
            else:
                data = self.preprocessed_data
                pca_components = self.pca_components
        else:
            data = self.preprocessed_data
            pca_components = None

        if self.graph is not None:
            # run on graph only; no need to do PCA and k-nn search ...
            graph = self.graph
            k_neighbors = metric = None
        else:
            k_neighbors, metric = self.k_neighbors, METRICS[self.metric_idx][1]
            graph = None

        if graph is None:
            task = partial(run_on_data,
                           data,
                           pca_components=pca_components,
                           k_neighbors=k_neighbors,
                           metric=metric,
                           resolution=self.resolution,
                           state=state)
        else:
            task = partial(run_on_graph,
                           graph,
                           resolution=self.resolution,
                           state=state)

        self.__set_state_busy()
        self.__start_task(task, state)
Beispiel #3
0
def run_on_data(data, normalize, pca_components, k_neighbors, metric,
                resolution, state):
    # type: (Table, Optional[int], int, str, float, bool, TaskState) -> Results
    """
    Run the louvain clustering on `data`.

    state is used to report progress and partial results. Returns early if
    `task.is_interuption_requested()` returns true.

    Parameters
    ----------
    data : Table
        Data table
    normalize : bool
        If `True`, the data is first normalized before computing PCA.
    pca_components : Optional[int]
        If not `None` then the data is first projected onto first
        `pca_components` principal components.
    k_neighbors : int
        Passed to `table_to_knn_graph`
    metric : str
        Passed to `table_to_knn_graph`
    resolution : float
        Passed to `Louvain`
    state : TaskState

    Returns
    -------
    res : Results
    """
    state = state  # type: TaskState
    res = Results(
        normalize=normalize,
        pca_components=pca_components,
        k_neighbors=k_neighbors,
        metric=metric,
        resolution=resolution,
    )
    step = 0
    if state.is_interuption_requested():
        return res

    if pca_components is not None:
        steps = 3
        state.set_status("Computing PCA...")
        pca = PCA(n_components=pca_components, random_state=0)

        data = res.pca_projection = pca(data)(data)
        assert isinstance(data, Table)
        state.set_partial_results(("pca_projection", res.pca_projection))
        step += 1
    else:
        steps = 2

    if state.is_interuption_requested():
        return res

    state.set_progress_value(100. * step / steps)
    state.set_status("Building graph...")

    # Apply Louvain preprocessing before converting the table into a graph
    louvain = Louvain(resolution=resolution, random_state=0)
    data = louvain.preprocess(data)

    if state.is_interuption_requested():
        return res

    def pcallback(val):
        state.set_progress_value((100. * step + 100 * val) / steps)
        if state.is_interuption_requested():
            raise InteruptRequested()

    try:
        res.graph = graph = matrix_to_knn_graph(data.X,
                                                k_neighbors=k_neighbors,
                                                metric=metric,
                                                progress_callback=pcallback)
    except InteruptRequested:
        return res

    state.set_partial_results(("graph", res.graph))

    step += 1
    state.set_progress_value(100 * step / steps)
    state.set_status("Detecting communities...")
    if state.is_interuption_requested():
        return res

    res.partition = louvain(graph)
    state.set_partial_results(("partition", res.partition))
    return res
    def commit(self):
        self.__commit_timer.stop()
        self.__invalidated = False
        self._set_modified(False)
        self.Error.clear()

        # Cancel current running task
        self.__cancel_task(wait=False)

        if self.data is None:
            self.__set_state_ready()
            return

        # Make sure the dataset is ok
        if len(self.data.domain.attributes) < 1:
            self.Error.empty_dataset()
            self.__set_state_ready()
            return

        if self.partition is not None:
            self.__set_state_ready()
            self._send_data()
            return

        # Preprocess the dataset
        if self.preprocessed_data is None:
            louvain = Louvain(random_state=0)
            self.preprocessed_data = louvain.preprocess(self.data)

        state = TaskState(self)

        # Prepare/assemble the task(s) to run; reuse partial results
        if self.apply_pca:
            if self.pca_projection is not None:
                data = self.pca_projection
                pca_components = None
            else:
                data = self.preprocessed_data
                pca_components = self.pca_components
        else:
            data = self.preprocessed_data
            pca_components = None

        if self.graph is not None:
            # run on graph only; no need to do PCA and k-nn search ...
            graph = self.graph
            k_neighbors = metric = None
        else:
            k_neighbors, metric = self.k_neighbors, METRICS[self.metric_idx][1]
            graph = None

        if graph is None:
            task = partial(
                run_on_data, data, pca_components=pca_components,
                k_neighbors=k_neighbors, metric=metric,
                resolution=self.resolution, state=state
            )
        else:
            task = partial(
                run_on_graph, graph, resolution=self.resolution, state=state
            )

        self.__set_state_busy()
        self.__start_task(task, state)
def run_on_data(data, normalize, pca_components, k_neighbors, metric, resolution, state):
    # type: (Table, Optional[int], int, str, float, bool, TaskState) -> Results
    """
    Run the louvain clustering on `data`.

    state is used to report progress and partial results. Returns early if
    `task.is_interuption_requested()` returns true.

    Parameters
    ----------
    data : Table
        Data table
    normalize : bool
        If `True`, the data is first normalized before computing PCA.
    pca_components : Optional[int]
        If not `None` then the data is first projected onto first
        `pca_components` principal components.
    k_neighbors : int
        Passed to `table_to_knn_graph`
    metric : str
        Passed to `table_to_knn_graph`
    resolution : float
        Passed to `Louvain`
    state : TaskState

    Returns
    -------
    res : Results
    """
    state = state  # type: TaskState
    res = Results(
        normalize=normalize, pca_components=pca_components,
        k_neighbors=k_neighbors, metric=metric, resolution=resolution,
    )
    step = 0
    if state.is_interuption_requested():
        return res

    if pca_components is not None:
        steps = 3
        state.set_status("Computing PCA...")
        pca = PCA(n_components=pca_components, random_state=0)

        data = res.pca_projection = pca(data)(data)
        assert isinstance(data, Table)
        state.set_partial_results(("pca_projection", res.pca_projection))
        step += 1
    else:
        steps = 2

    if state.is_interuption_requested():
        return res

    state.set_progress_value(100. * step / steps)
    state.set_status("Building graph...")

    # Apply Louvain preprocessing before converting the table into a graph
    louvain = Louvain(resolution=resolution, random_state=0)
    data = louvain.preprocess(data)

    if state.is_interuption_requested():
        return res

    def pcallback(val):
        state.set_progress_value((100. * step + 100 * val) / steps)
        if state.is_interuption_requested():
            raise InteruptRequested()

    try:
        res.graph = graph = table_to_knn_graph(
            data, k_neighbors=k_neighbors, metric=metric,
            progress_callback=pcallback
        )
    except InteruptRequested:
        return res

    state.set_partial_results(("graph", res.graph))

    step += 1
    state.set_progress_value(100 * step / steps)
    state.set_status("Detecting communities...")
    if state.is_interuption_requested():
        return res

    res.partition = louvain.fit_predict(graph)
    state.set_partial_results(("partition", res.partition))
    return res