Esempio n. 1
0
    def _compute_partition(self):
        if self.partition is None:
            self.setStatusMessage('Detecting communities...')
            self.setBlocking(True)

            louvain = Louvain(resolution=self.resolution)
            self.partition = louvain.fit_predict(self.graph)
Esempio n. 2
0
def run_on_graph(graph, resolution, state):
    # type: (nx.Graph, float, TaskState) -> Results
    """
    Run the louvain clustering on `graph`.
    """
    state = state  # type: TaskState
    res = Results(resolution=resolution)
    louvain = Louvain(resolution=resolution, random_state=0)
    state.set_status("Detecting communities...")
    if state.is_interuption_requested():
        return res
    partition = louvain.fit_predict(graph)
    res.partition = partition
    state.set_partial_results(("partition", res.partition))
    return res
Esempio n. 3
0
def run_on_graph(graph, resolution, state):
    # type: (nx.Graph, float, TaskState) -> Results
    """
    Run the louvain clustering on `graph`.
    """
    state = state  # type: TaskState
    res = Results(resolution=resolution)
    louvain = Louvain(resolution=resolution, random_state=0)
    state.set_status("Detecting communities...")
    if state.is_interuption_requested():
        return res
    partition = louvain.fit_predict(graph)
    res.partition = partition
    state.set_partial_results(("partition", res.partition))
    return res
Esempio n. 4
0
 def test_louvain_parameters(self):
     louvain = Louvain(k_neighbors=3,
                       resolution=1.2,
                       random_state=42,
                       metric="l2")
     c = louvain(self.iris)
     self.assertEqual(np.ndarray, type(c))
     self.assertEqual(len(self.iris), len(c))
Esempio n. 5
0
    def commit(self):
        self.Error.clear()
        # Kill any running jobs
        self.cancel()

        if self.data is None:
            return

        # Make sure the dataset is ok
        if len(self.data.domain.attributes) < 1:
            self.Error.empty_dataset()
            return

        # Preprocess the dataset
        if self.preprocessed_data is None:
            louvain = Louvain()
            self.preprocessed_data = louvain.preprocess(self.data)

        # Prepare the tasks to run
        queue = TaskQueue(parent=self)

        if self.pca_projection is None and self.apply_pca:
            queue.push(namespace(task=self._compute_pca_projection))

        if self.graph is None:
            queue.push(
                namespace(task=self._compute_graph, progress_callback=True))

        if self.partition is None:
            queue.push(namespace(task=self._compute_partition))

        # Prepare callbacks
        queue.on_progress.connect(lambda val: self.progressBarSet(100 * val))
        queue.on_complete.connect(self._on_complete)
        queue.on_exception.connect(self._handle_exceptions)
        self.__queue = queue

        # Run the task queue
        self.progressBarInit()
        self.setBlocking(True)
        self.__future = self.__executor.submit(queue.start)
Esempio n. 6
0
def run_on_data(data, pca_components, k_neighbors, metric, resolution, state):
    # type: (Table, Optional[int], int, str, float, TaskState) -> Results
    """
    Run the louvain clustering on `data`.

    state is used to report progress and partial results. Returns early if
    `task.is_interuption_requested()` returns true.

    Parameters
    ----------
    data : Table
        Data table
    pca_components : Optional[int]
        If not `None` then the data is first projected onto first
        `pca_components` principal components.
    k_neighbors : int
        Passed to `table_to_knn_graph`
    metric : str
        Passed to `table_to_knn_graph`
    resolution : float
        Passed to `Louvain`
    state : TaskState

    Returns
    -------
    res : Results
    """
    state = state  # type: TaskState
    res = Results(
        pca_components=pca_components,
        k_neighbors=k_neighbors,
        metric=metric,
        resolution=resolution,
    )
    step = 0
    if state.is_interuption_requested():
        return res
    if pca_components is not None:
        steps = 3
        state.set_status("Computing PCA...")
        pca = PCA(n_components=pca_components, random_state=0)
        data = res.pca_projection = pca(data)(data)
        assert isinstance(data, Table)
        state.set_partial_results(("pca_projection", res.pca_projection))
        step += 1
    else:
        steps = 2

    if state.is_interuption_requested():
        return res

    state.set_progress_value(100. * step / steps)
    state.set_status("Building graph...")

    def pcallback(val):
        state.set_progress_value((100. * step + 100 * val) / steps)
        if state.is_interuption_requested():
            raise InteruptRequested()

    try:
        res.graph = graph = table_to_knn_graph(data,
                                               k_neighbors=k_neighbors,
                                               metric=metric,
                                               progress_callback=pcallback)
    except InteruptRequested:
        return res

    state.set_partial_results(("graph", res.graph))

    step += 1
    state.set_progress_value(100 * step / steps)
    state.set_status("Detecting communities...")
    if state.is_interuption_requested():
        return res

    louvain = Louvain(resolution=resolution, random_state=0)
    res.partition = louvain.fit_predict(graph)
    state.set_partial_results(("partition", res.partition))
    return res
Esempio n. 7
0
    def commit(self):
        self.__commit_timer.stop()
        self.__invalidated = False
        self._set_modified(False)
        self.Error.clear()

        # Cancel current running task
        self.__cancel_task(wait=False)

        if self.data is None:
            self.__set_state_ready()
            return

        # Make sure the dataset is ok
        if len(self.data.domain.attributes) < 1:
            self.Error.empty_dataset()
            self.__set_state_ready()
            return

        if self.partition is not None:
            self.__set_state_ready()
            self._send_data()
            return

        # Preprocess the dataset
        if self.preprocessed_data is None:
            louvain = Louvain(random_state=0)
            self.preprocessed_data = louvain.preprocess(self.data)

        state = TaskState(self)

        # Prepare/assemble the task(s) to run; reuse partial results
        if self.apply_pca:
            if self.pca_projection is not None:
                data = self.pca_projection
                pca_components = None
            else:
                data = self.preprocessed_data
                pca_components = self.pca_components
        else:
            data = self.preprocessed_data
            pca_components = None

        if self.graph is not None:
            # run on graph only; no need to do PCA and k-nn search ...
            graph = self.graph
            k_neighbors = metric = None
        else:
            k_neighbors, metric = self.k_neighbors, METRICS[self.metric_idx][1]
            graph = None

        if graph is None:
            task = partial(run_on_data,
                           data,
                           pca_components=pca_components,
                           k_neighbors=k_neighbors,
                           metric=metric,
                           resolution=self.resolution,
                           state=state)
        else:
            task = partial(run_on_graph,
                           graph,
                           resolution=self.resolution,
                           state=state)

        self.__set_state_busy()
        self.__start_task(task, state)
Esempio n. 8
0
 def setUp(self):
     self.iris = Table('iris')
     self.louvain = Louvain()
Esempio n. 9
0
class TestLouvain(unittest.TestCase):
    def setUp(self):
        self.iris = Table('iris')
        self.louvain = Louvain()

    def test_louvain(self):
        c = self.louvain(self.iris)
        # First 20 iris belong to one cluster
        self.assertEqual(np.ndarray, type(c))
        self.assertEqual(len(self.iris), len(c))
        self.assertEqual(1, len(set(c[:20].ravel())))

    def test_louvain_parameters(self):
        louvain = Louvain(k_neighbors=3,
                          resolution=1.2,
                          random_state=42,
                          metric="l2")
        c = louvain(self.iris)
        self.assertEqual(np.ndarray, type(c))
        self.assertEqual(len(self.iris), len(c))

    def test_predict_table(self):
        c = self.louvain(self.iris)
        self.assertEqual(np.ndarray, type(c))
        self.assertEqual(len(self.iris), len(c))

    def test_predict_numpy(self):
        c = self.louvain.fit(self.iris.X)
        self.assertEqual(ClusteringModel, type(c))
        self.assertEqual(np.ndarray, type(c.labels))
        self.assertEqual(len(self.iris), len(c.labels))

    def test_predict_sparse_csc(self):
        with self.iris.unlocked():
            self.iris.X = csc_matrix(self.iris.X[::5])
        c = self.louvain(self.iris)
        self.assertEqual(np.ndarray, type(c))
        self.assertEqual(len(self.iris), len(c))

    def test_predict_sparse_csr(self):
        with self.iris.unlocked():
            self.iris.X = csr_matrix(self.iris.X[::5])
        c = self.louvain(self.iris)
        self.assertEqual(np.ndarray, type(c))
        self.assertEqual(len(self.iris), len(c))

    def test_model(self):
        c = self.louvain.get_model(self.iris)
        self.assertEqual(ClusteringModel, type(c))
        self.assertEqual(len(self.iris), len(c.labels))

        self.assertRaises(NotImplementedError, c, self.iris)

    def test_model_np(self):
        """
        Test with numpy array as an input in model.
        """
        c = self.louvain.get_model(self.iris)
        self.assertRaises(NotImplementedError, c, self.iris.X)

    def test_model_sparse(self):
        """
        Test with sparse array as an input in model.
        """
        c = self.louvain.get_model(self.iris)
        self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X))

    def test_model_instance(self):
        """
        Test with instance as an input in model.
        """
        c = self.louvain.get_model(self.iris)
        self.assertRaises(NotImplementedError, c, self.iris[0])

    def test_model_list(self):
        """
        Test with list as an input in model.
        """
        c = self.louvain.get_model(self.iris)
        self.assertRaises(NotImplementedError, c, self.iris.X.tolist())

    def test_graph(self):
        """
        Louvain accepts graphs too.
        :return:
        """
        graph = matrix_to_knn_graph(self.iris.X, 30, "l2")
        self.assertIsNotNone(graph)
        self.assertEqual(networkx.Graph, type(graph), 1)

        # basic clustering - get clusters
        c = self.louvain(graph)
        # First 20 iris belong to one cluster
        self.assertEqual(np.ndarray, type(c))
        self.assertEqual(len(self.iris), len(c))
        self.assertEqual(1, len(set(c[:20].ravel())))

        # clustering - get model
        c = self.louvain.get_model(graph)
        # First 20 iris belong to one cluster
        self.assertEqual(ClusteringModel, type(c))
        self.assertEqual(len(self.iris), len(c.labels))

    def test_model_bad_datatype(self):
        """
        Check model with data-type that is not supported.
        """
        c = self.louvain.get_model(self.iris)
        self.assertRaises(TypeError, c, 10)
Esempio n. 10
0
def run_on_data(data, pca_components, k_neighbors, metric, resolution, state):
    # type: (Table, Optional[int], int, str, float, TaskState) -> Results
    """
    Run the louvain clustering on `data`.

    state is used to report progress and partial results. Returns early if
    `task.is_interuption_requested()` returns true.

    Parameters
    ----------
    data : Table
        Data table
    pca_components : Optional[int]
        If not `None` then the data is first projected onto first
        `pca_components` principal components.
    k_neighbors : int
        Passed to `table_to_knn_graph`
    metric : str
        Passed to `table_to_knn_graph`
    resolution : float
        Passed to `Louvain`
    state : TaskState

    Returns
    -------
    res : Results
    """
    state = state  # type: TaskState
    res = Results(
        pca_components=pca_components, k_neighbors=k_neighbors, metric=metric,
        resolution=resolution,
    )
    step = 0
    if state.is_interuption_requested():
        return res
    if pca_components is not None:
        steps = 3
        state.set_status("Computing PCA...")
        pca = PCA(n_components=pca_components, random_state=0)
        data = res.pca_projection = pca(data)(data)
        assert isinstance(data, Table)
        state.set_partial_results(("pca_projection", res.pca_projection))
        step += 1
    else:
        steps = 2

    if state.is_interuption_requested():
        return res

    state.set_progress_value(100. * step / steps)
    state.set_status("Building graph...")

    def pcallback(val):
        state.set_progress_value((100. * step + 100 * val) / steps)
        if state.is_interuption_requested():
            raise InteruptRequested()

    try:
        res.graph = graph = table_to_knn_graph(
            data, k_neighbors=k_neighbors, metric=metric,
            progress_callback=pcallback
        )
    except InteruptRequested:
        return res

    state.set_partial_results(("graph", res.graph))

    step += 1
    state.set_progress_value(100 * step / steps)
    state.set_status("Detecting communities...")
    if state.is_interuption_requested():
        return res

    louvain = Louvain(resolution=resolution, random_state=0)
    res.partition = louvain.fit_predict(graph)
    state.set_partial_results(("partition", res.partition))
    return res
Esempio n. 11
0
    def commit(self):
        self.__commit_timer.stop()
        self.__invalidated = False
        self._set_modified(False)
        self.Error.clear()

        # Cancel current running task
        self.__cancel_task(wait=False)

        if self.data is None:
            self.__set_state_ready()
            return

        # Make sure the dataset is ok
        if len(self.data.domain.attributes) < 1:
            self.Error.empty_dataset()
            self.__set_state_ready()
            return

        if self.partition is not None:
            self.__set_state_ready()
            self._send_data()
            return

        # Preprocess the dataset
        if self.preprocessed_data is None:
            louvain = Louvain(random_state=0)
            self.preprocessed_data = louvain.preprocess(self.data)

        state = TaskState(self)

        # Prepare/assemble the task(s) to run; reuse partial results
        if self.apply_pca:
            if self.pca_projection is not None:
                data = self.pca_projection
                pca_components = None
            else:
                data = self.preprocessed_data
                pca_components = self.pca_components
        else:
            data = self.preprocessed_data
            pca_components = None

        if self.graph is not None:
            # run on graph only; no need to do PCA and k-nn search ...
            graph = self.graph
            k_neighbors = metric = None
        else:
            k_neighbors, metric = self.k_neighbors, METRICS[self.metric_idx][1]
            graph = None

        if graph is None:
            task = partial(
                run_on_data, data, pca_components=pca_components,
                k_neighbors=k_neighbors, metric=metric,
                resolution=self.resolution, state=state
            )
        else:
            task = partial(
                run_on_graph, graph, resolution=self.resolution, state=state
            )

        self.__set_state_busy()
        self.__start_task(task, state)
Esempio n. 12
0
 def setUpClass(cls):
     cls.data = Table('iris')
     cls.louvain = Louvain()