def test_on_trivial_input(inp): """Test that with one cluster, and one point, we always get one cluster, regardless of its location.""" n_points_per_cluster, n_clusters, dim, pts = inp fs = FirstSimpleGap() fs = fs.fit(pts) assert fs.n_clusters_ == n_clusters fh = FirstHistogramGap() fh = fh.fit(pts) assert fh.n_clusters_ == n_clusters
def test_max_fraction_clusters(inp, max_frac): """ Check that ``FirstSimpleGap`` and ``FirstHistogramGap`` respect the ``max_num_clusters`` constraint, if it is set.""" n_points_per_cluster, n_clusters, _, pts = inp max_num_clusters = max_frac * n_points_per_cluster * n_clusters fs = FirstSimpleGap(max_fraction=max_frac) _ = fs.fit_predict(pts) assert fs.n_clusters_ <= np.floor(max_num_clusters) fh = FirstHistogramGap(max_fraction=max_frac) _ = fh.fit_predict(pts) assert fh.n_clusters_ <= np.floor(max_num_clusters)
def test_firstsimplegap(inp): """For a multimodal distribution, check that ``FirstSimpleGap`` with appropriate parameters finds the right number of clusters, and that each has the correct number of points ``n_points_per_cluster``.""" n_points_per_cluster, n_clusters, _, pts = inp fs = FirstSimpleGap(relative_gap_size=0.5, max_fraction=1., affinity='euclidean', memory=None, linkage='single') preds = fs.fit_predict(pts).astype(int) unique, counts = np.unique(preds, return_counts=True) # check that the nb of clusters corresponds to the nb of synth. clusters assert unique.shape[0] == n_clusters # check that the nb of pts in a cluster corresponds to what we expect assert_almost_equal(counts, n_points_per_cluster)
def test_pipeline_cloned(X, clone_pipeline, layout_dim, color_by_columns_dropdown): """Verify that the pipeline is changed on interaction if and only if `clone_pipeline` is False (with `layout_dim` set to 2 or 3).""" # TODO: Monitor development of the ipytest project to convert these into # true notebook tests integrated with pytest params = { "cover": { "initial": {"n_intervals": 10, "kind": "uniform", "overlap_frac": 0.1}, "new": {"n_intervals": 15, "kind": "balanced", "overlap_frac": 0.2} }, "clusterer": { "initial": {"affinity": "euclidean"}, "new": {"affinity": "manhattan"} }, "contract_nodes": {"initial": True, "new": False}, "min_intersection": {"initial": 4, "new": 1}, } pipe = make_mapper_pipeline( cover=CubicalCover(**params["cover"]["initial"]), clusterer=FirstSimpleGap(**params["clusterer"]["initial"]), contract_nodes=params["contract_nodes"]["initial"], min_intersection=params["min_intersection"]["initial"] ) fig = plot_interactive_mapper_graph( pipe, X, clone_pipeline=clone_pipeline, layout_dim=layout_dim, color_by_columns_dropdown=color_by_columns_dropdown ) # Get relevant widgets and change their states, then check final values for step, values in params.items(): if step in ["cover", "clusterer"]: for param_name, initial_param_value in values["initial"].items(): new_param_value = values["new"][param_name] widgets = _get_widgets_by_trait(fig, "description", param_name) for w in widgets: w.set_state({'value': new_param_value}) final_param_value_actual = \ pipe.get_mapper_params()[f"{step}__{param_name}"] final_param_value_expected = \ initial_param_value if clone_pipeline else new_param_value assert final_param_value_actual == final_param_value_expected else: initial_param_value = values["initial"] new_param_value = values["new"] widgets = _get_widgets_by_trait(fig, "description", step) for w in widgets: w.set_state({'value': new_param_value}) final_param_value_actual = \ pipe.get_mapper_params()[f"{step}"] final_param_value_expected = \ initial_param_value if clone_pipeline else new_param_value assert final_param_value_actual == final_param_value_expected
def _runMapper(self): """ creates mapper graphs based on train data :return: None """ log.debug("--->creating mappers...") if not self.remake and os.path.exists(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label): fgin = open(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label, "rb") self.graphs = pickle.load(fgin) fpin = open(TEMP_DATA + "%s_mapper_pipes" % self.label, "rb") self.mapper_pipes = pickle.load(fpin) return clusterer = FirstSimpleGap() self.mapper_pipes = [] log.debug("------> creating projection components...") for k in range(self.n_components): log.debug("---------> on component {}/{}...".format(k + 1, self.n_components)) proj = Projection(columns=k) filter_func = Pipeline(steps=[('pca', self.rep), ('proj', proj)]) filtered_data = filter_func.fit_transform(self.data) cover = OneDimensionalCover(n_intervals=self.n_intervals, overlap_frac=self.overlap_frac, kind='balanced') cover.fit(filtered_data) mapper_pipe = make_mapper_pipeline(scaler=None, filter_func=filter_func, cover=cover, clusterer=clusterer, verbose=(log.getEffectiveLevel() == logging.DEBUG), n_jobs=1) mapper_pipe.set_params(filter_func__proj__columns=k) self.mapper_pipes.append(("PCA%d" % (k + 1), mapper_pipe)) # try parallelization log.debug("------> entering parallelization...") self.graphs = [mapper_pipe[1].fit_transform(self.data) for mapper_pipe in self.mapper_pipes] # # self.graphs = Parallel(n_jobs=5, prefer="threads")( # delayed(mapper_pipe[1].fit_transform)(self.data) for mapper_pipe in self.mapper_pipes # ) fg = open(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label, "wb") pickle.dump(self.graphs, fg) fg.close() fp = open(TEMP_DATA + "%s_mapper_pipes" % self.label, "wb") pickle.dump(self.mapper_pipes, fp) fp.close()
def test_cluster_sizes(self): """Verify that the total number of calculated clusters is equal to the number of displayed clusters.""" pipe = make_mapper_pipeline(clusterer=FirstSimpleGap()) fig = plot_static_mapper_graph(pipe, X_arr) node_trace = fig.data[1] node_sizes_vis = [_get_size_from_hovertext(ht) for ht in node_trace.hovertext] g = pipe.fit_transform(X_arr) node_size_real = [len(node) for node in g.vs['node_elements']] assert sum(node_sizes_vis) == sum(node_size_real)
def test_cluster_sizes(self): """Verify that the total number of calculated clusters is equal to the number of displayed clusters.""" pipe = make_mapper_pipeline(clusterer=FirstSimpleGap()) warnings.simplefilter("ignore") fig = plot_interactive_mapper_graph(pipe, X) w_scatter = self._get_widget_by_trait(fig, 'data') node_sizes_vis = [self._get_size_from_hovertext(s_) for s_ in w_scatter.get_state() ['_data'][1]['hovertext']] g = pipe.fit_transform(X) node_size_real = [len(node) for node in g['node_metadata']['node_elements']] assert sum(node_sizes_vis) == sum(node_size_real)