def fit(self, X, y=None): """Compute DiviK clustering. Parameters ---------- X : array-like or sparse matrix, shape=(n_samples, n_features) Training instances to cluster. It must be noted that the data will be converted to C ordering, which will cause a memory copy if the given data is not C-contiguous. y : Ignored not used, present here for API consistency by convention. """ if np.isnan(X).any(): raise ValueError("NaN values are not supported.") with context_if(self.verbose, tqdm.tqdm, total=X.shape[0], file=sys.stdout, smoothing=0) as progress: self.result_ = self._divik(X, progress) if self.result_ is None: self.labels_ = np.zeros((X.shape[0], ), dtype=int) self.paths_ = {0: (0, )} else: self.labels_, self.paths_ = summary.merged_partition( self.result_, return_paths=True) self.reverse_paths_ = { value: key for key, value in self.paths_.items() } if self.result_ is None: self.filters_ = np.ones([1, X.shape[1]], dtype=bool) else: self.filters_ = np.array( [self._get_filter(path) for path in self.reverse_paths_], dtype=bool) self.centroids_ = pd.DataFrame(X).groupby(self.labels_, sort=True)\ .mean().values self.depth_ = summary.depth(self.result_) self.n_clusters_ = summary.total_number_of_clusters(self.result_) return self
def make_plot(): result_depth = depth(divik_result()) return html.Div(id=Fields.CLUSTERS_CONTAINER, children=[ dcc.Graph(id=Fields.CLUSTERS_GRAPH, figure=default_clusters_figure(), style={'min-height': 600}), html.H4('Level'), dcc.Slider( id=Fields.LEVEL, value=1, min=1, max=result_depth - 1, step=1, marks={i: i for i in range(1, result_depth)}) ], className='eight columns')
def test_without_rejection_updates_merged_and_nothing_else(self): filtered = sm.reject_split(DUMMY_RESULT, 0) self.assertEqual(filtered.clustering.best_score_, DUMMY_RESULT.clustering.best_score_) self.assertEqual(sm.depth(filtered), sm.depth(DUMMY_RESULT)) npt.assert_equal(filtered.merged, sm.merged_partition(DUMMY_RESULT))
def test_resolves_tree_depth(self): self.assertEqual(sm.depth(DUMMY_RESULT), 3)
def make_merged(result: Optional[DivikResult]) -> np.ndarray: depth = summary.depth(result) return np.hstack([ summary.merged_partition(result, limit + 1).reshape(-1, 1) for limit in range(depth) ])
def test_resolves_tree_depth(self): assert sm.depth(DUMMY_RESULT) == 3