예제 #1
0
    def fit(self, X, y=None):
        """Compute DiviK clustering.

        Parameters
        ----------
        X : array-like or sparse matrix, shape=(n_samples, n_features)
            Training instances to cluster. It must be noted that the data
            will be converted to C ordering, which will cause a memory
            copy if the given data is not C-contiguous.
        y : Ignored
            not used, present here for API consistency by convention.
        """
        if np.isnan(X).any():
            raise ValueError("NaN values are not supported.")

        with context_if(self.verbose,
                        tqdm.tqdm,
                        total=X.shape[0],
                        file=sys.stdout,
                        smoothing=0) as progress:
            self.result_ = self._divik(X, progress)

        if self.result_ is None:
            self.labels_ = np.zeros((X.shape[0], ), dtype=int)
            self.paths_ = {0: (0, )}
        else:
            self.labels_, self.paths_ = summary.merged_partition(
                self.result_, return_paths=True)

        self.reverse_paths_ = {
            value: key
            for key, value in self.paths_.items()
        }

        if self.result_ is None:
            self.filters_ = np.ones([1, X.shape[1]], dtype=bool)
        else:
            self.filters_ = np.array(
                [self._get_filter(path) for path in self.reverse_paths_],
                dtype=bool)
        self.centroids_ = pd.DataFrame(X).groupby(self.labels_, sort=True)\
            .mean().values
        self.depth_ = summary.depth(self.result_)
        self.n_clusters_ = summary.total_number_of_clusters(self.result_)

        return self
예제 #2
0
def make_plot():
    result_depth = depth(divik_result())
    return html.Div(id=Fields.CLUSTERS_CONTAINER,
                    children=[
                        dcc.Graph(id=Fields.CLUSTERS_GRAPH,
                                  figure=default_clusters_figure(),
                                  style={'min-height': 600}),
                        html.H4('Level'),
                        dcc.Slider(
                            id=Fields.LEVEL,
                            value=1,
                            min=1,
                            max=result_depth - 1,
                            step=1,
                            marks={i: i
                                   for i in range(1, result_depth)})
                    ],
                    className='eight columns')
예제 #3
0
 def test_without_rejection_updates_merged_and_nothing_else(self):
     filtered = sm.reject_split(DUMMY_RESULT, 0)
     self.assertEqual(filtered.clustering.best_score_,
                      DUMMY_RESULT.clustering.best_score_)
     self.assertEqual(sm.depth(filtered), sm.depth(DUMMY_RESULT))
     npt.assert_equal(filtered.merged, sm.merged_partition(DUMMY_RESULT))
예제 #4
0
 def test_resolves_tree_depth(self):
     self.assertEqual(sm.depth(DUMMY_RESULT), 3)
예제 #5
0
파일: _sklearn.py 프로젝트: gmrukwa/divik
def make_merged(result: Optional[DivikResult]) -> np.ndarray:
    depth = summary.depth(result)
    return np.hstack([
        summary.merged_partition(result, limit + 1).reshape(-1, 1)
        for limit in range(depth)
    ])
예제 #6
0
 def test_resolves_tree_depth(self):
     assert sm.depth(DUMMY_RESULT) == 3