def test_invalid_color_features_as_array_of_indices(): pipe = make_mapper_pipeline() with pytest.raises(ValueError): plot_static_mapper_graph( pipe, X_arr, color_data=X_arr, color_features=np.arange(X_arr.shape[1]) )
def test_valid_colorscale(): pipe = make_mapper_pipeline() fig_2d = plot_static_mapper_graph( pipe, X, layout_dim=2, plotly_params={"node_trace": { "marker_colorscale": "blues" }}) fig_3d = plot_static_mapper_graph( pipe, X, layout_dim=3, plotly_params={"node_trace": { "marker_colorscale": "blues" }}) # Test that the custom colorscale is correctly applied both in 2d and in 3d marker_colorscale = fig_2d.data[1]["marker"]["colorscale"] marker_colorscale_3d = fig_3d.data[1]["marker"]["colorscale"] assert marker_colorscale == marker_colorscale_3d # Test that the default colorscale is "viridis" and that the custom one is # different fig_default = plot_static_mapper_graph(pipe, X) marker_colorscale_default = \ fig_default.data[1]["marker"]["colorscale"] assert marker_colorscale_default == viridis_colorscale assert marker_colorscale != marker_colorscale_default
def test_valid_hoverlabel_bgcolor(X, layout_dim): pipe = make_mapper_pipeline() fig = plot_static_mapper_graph( pipe, X, layout_dim=layout_dim, plotly_params={"node_trace": {"hoverlabel_bgcolor": "white"}} ) assert fig.data[1]["hoverlabel"]["bgcolor"] == "white"
def test_valid_layout_dim(X, layout_dim): pipe = make_mapper_pipeline() fig = plot_static_mapper_graph(pipe, X, layout_dim=layout_dim) edge_trace = fig.data[0] assert hasattr(edge_trace, "x") and hasattr(edge_trace, "y") is_z_present = hasattr(edge_trace, "z") assert is_z_present if layout_dim == 3 else not is_z_present
def test_unsuitable_colorscale_for_hoverlabel_3d(X): pipe = make_mapper_pipeline() with pytest.warns(RuntimeWarning): _ = plot_static_mapper_graph( pipe, X, layout_dim=3, plotly_params={"node_trace": {"marker_colorscale": hsl_colorscale}} )
def test_user_hoverlabel_bgcolor_interactive_3d(): pipe = make_mapper_pipeline() plotter = MapperInteractivePlotter(pipe, X_arr) plotter.plot(layout_dim=3, plotly_params={"node_trace": {"hoverlabel_bgcolor": "blue"}}) assert plotter.figure_.data[1].hoverlabel.bgcolor == "blue"
def test_mapper_pipeline_picklable(): # Regression test for issue #596 X = np.random.random((100, 2)) cachedir = mkdtemp() pipe = make_mapper_pipeline(memory=cachedir) pipe.fit_transform(X) rmtree(cachedir)
def test_invalid_node_color_statistic_interactive(): pipe = make_mapper_pipeline() graph = pipe.fit_transform(X_arr) node_color_statistic = np.arange(len(graph.vs)) with pytest.raises(ValueError): plot_interactive_mapper_graph( pipe, X_arr, node_color_statistic=node_color_statistic )
def test_edge_elements(X): # TODO: Replace pipe and graph by Nerve transformer # TODO: Improve the Hypothesis strategy to avoid needing to hardcode the # min_side to be greater than n_intervals (10 by default). pipe = make_mapper_pipeline() pipe_edge_elems = make_mapper_pipeline(store_edge_elements=True) graph = pipe.fit_transform(X) graph_edge_elems = pipe_edge_elems.fit_transform(X) # Check that when store_edge_elements=False (default) there is no # "edge_elements" attribute. with pytest.raises(KeyError): _ = graph.es["edge_elements"] # Check that graph and graph_ee agree otherwise # Vertices assert graph.vs.indices == graph_edge_elems.vs.indices for attr_name in ["pullback_set_label", "partial_cluster_label"]: assert graph.vs[attr_name] == graph_edge_elems.vs[attr_name] node_elements = graph.vs["node_elements"] node_elements_ee = graph_edge_elems.vs["node_elements"] assert all([ np.array_equal(node, node_ee) for node, node_ee in zip(node_elements, node_elements_ee) ]) assert graph.vs.indices == graph_edge_elems.vs.indices # Edges assert graph.es.indices == graph_edge_elems.es.indices assert graph.es["weight"] == graph_edge_elems.es["weight"] assert all([ edge.tuple == edge_ee.tuple for edge, edge_ee in zip(graph.es, graph_edge_elems.es) ]) # Check that the arrays edge_elements contain precisely those indices which # are in the element sets associated to both the first and second vertex, # and that the edge weight equals the size of edge_elements. flag = True for edge in graph_edge_elems.es: v1, v2 = edge.vertex_tuple flag *= np.array_equal( edge["edge_elements"], np.intersect1d(v1["node_elements"], v2["node_elements"])) flag *= len(edge["edge_elements"]) == edge["weight"] assert flag
def test_node_color_statistic_as_ndarray_wrong_length(): pipe = make_mapper_pipeline() graph = pipe.fit_transform(X_arr) node_color_statistic = np.arange(len(graph.vs) + 1) with pytest.raises(ValueError): plot_static_mapper_graph(pipe, X_arr, node_color_statistic=node_color_statistic)
def test_node_color_statistic_as_ndarray(): pipe = make_mapper_pipeline() graph = pipe.fit_transform(X_arr) node_color_statistic = np.arange(len(graph.vs)) fig = plot_static_mapper_graph(pipe, X_arr, node_color_statistic=node_color_statistic) assert np.array_equal(fig.data[1].marker.color, node_color_statistic)
def test_min_intersection(X, min_intersection): # TODO: Replace pipe and graph by Nerve transformer # TODO: Improve the Hypothesis strategy to avoid needing to hardcode the # min_side to be greater than n_intervals (10 by default). pipe = make_mapper_pipeline(min_intersection=min_intersection) graph = pipe.fit_transform(X) # Check that there are no edges with weight less than min_intersection assert all([x >= min_intersection for x in graph.es["weight"]])
def test_pipeline_cloned(X, clone_pipeline, layout_dim, color_by_columns_dropdown): """Verify that the pipeline is changed on interaction if and only if `clone_pipeline` is False (with `layout_dim` set to 2 or 3).""" # TODO: Monitor development of the ipytest project to convert these into # true notebook tests integrated with pytest params = { "cover": { "initial": {"n_intervals": 10, "kind": "uniform", "overlap_frac": 0.1}, "new": {"n_intervals": 15, "kind": "balanced", "overlap_frac": 0.2} }, "clusterer": { "initial": {"affinity": "euclidean"}, "new": {"affinity": "manhattan"} }, "contract_nodes": {"initial": True, "new": False}, "min_intersection": {"initial": 4, "new": 1}, } pipe = make_mapper_pipeline( cover=CubicalCover(**params["cover"]["initial"]), clusterer=FirstSimpleGap(**params["clusterer"]["initial"]), contract_nodes=params["contract_nodes"]["initial"], min_intersection=params["min_intersection"]["initial"] ) fig = plot_interactive_mapper_graph( pipe, X, clone_pipeline=clone_pipeline, layout_dim=layout_dim, color_by_columns_dropdown=color_by_columns_dropdown ) # Get relevant widgets and change their states, then check final values for step, values in params.items(): if step in ["cover", "clusterer"]: for param_name, initial_param_value in values["initial"].items(): new_param_value = values["new"][param_name] widgets = _get_widgets_by_trait(fig, "description", param_name) for w in widgets: w.set_state({'value': new_param_value}) final_param_value_actual = \ pipe.get_mapper_params()[f"{step}__{param_name}"] final_param_value_expected = \ initial_param_value if clone_pipeline else new_param_value assert final_param_value_actual == final_param_value_expected else: initial_param_value = values["initial"] new_param_value = values["new"] widgets = _get_widgets_by_trait(fig, "description", step) for w in widgets: w.set_state({'value': new_param_value}) final_param_value_actual = \ pipe.get_mapper_params()[f"{step}"] final_param_value_expected = \ initial_param_value if clone_pipeline else new_param_value assert final_param_value_actual == final_param_value_expected
def test_colors_same_2d_3d(X, color_variable, node_color_statistic): pipe = make_mapper_pipeline() fig_2d = plot_static_mapper_graph( pipe, X, layout_dim=2, color_variable=color_variable, node_color_statistic=node_color_statistic ) fig_3d = plot_static_mapper_graph( pipe, X, layout_dim=3, color_variable=color_variable, node_color_statistic=node_color_statistic ) assert fig_2d.data[1].marker.color == fig_3d.data[1].marker.color
def _runMapper(self): """ creates mapper graphs based on train data :return: None """ log.debug("--->creating mappers...") if not self.remake and os.path.exists(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label): fgin = open(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label, "rb") self.graphs = pickle.load(fgin) fpin = open(TEMP_DATA + "%s_mapper_pipes" % self.label, "rb") self.mapper_pipes = pickle.load(fpin) return clusterer = FirstSimpleGap() self.mapper_pipes = [] log.debug("------> creating projection components...") for k in range(self.n_components): log.debug("---------> on component {}/{}...".format(k + 1, self.n_components)) proj = Projection(columns=k) filter_func = Pipeline(steps=[('pca', self.rep), ('proj', proj)]) filtered_data = filter_func.fit_transform(self.data) cover = OneDimensionalCover(n_intervals=self.n_intervals, overlap_frac=self.overlap_frac, kind='balanced') cover.fit(filtered_data) mapper_pipe = make_mapper_pipeline(scaler=None, filter_func=filter_func, cover=cover, clusterer=clusterer, verbose=(log.getEffectiveLevel() == logging.DEBUG), n_jobs=1) mapper_pipe.set_params(filter_func__proj__columns=k) self.mapper_pipes.append(("PCA%d" % (k + 1), mapper_pipe)) # try parallelization log.debug("------> entering parallelization...") self.graphs = [mapper_pipe[1].fit_transform(self.data) for mapper_pipe in self.mapper_pipes] # # self.graphs = Parallel(n_jobs=5, prefer="threads")( # delayed(mapper_pipe[1].fit_transform)(self.data) for mapper_pipe in self.mapper_pipes # ) fg = open(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label, "wb") pickle.dump(self.graphs, fg) fg.close() fp = open(TEMP_DATA + "%s_mapper_pipes" % self.label, "wb") pickle.dump(self.mapper_pipes, fp) fp.close()
def test_colors_same_2d_3d(X, color_data, node_color_statistic): pipe = make_mapper_pipeline() fig_2d = plot_static_mapper_graph( pipe, X, layout_dim=2, color_data=color_data, node_color_statistic=node_color_statistic ) fig_3d = plot_static_mapper_graph( pipe, X, layout_dim=3, color_data=color_data, node_color_statistic=node_color_statistic ) assert np.array_equal(fig_2d.data[1].marker.color, fig_3d.data[1].marker.color)
def test_cluster_sizes(self): """Verify that the total number of calculated clusters is equal to the number of displayed clusters.""" pipe = make_mapper_pipeline(clusterer=FirstSimpleGap()) fig = plot_static_mapper_graph(pipe, X_arr) node_trace = fig.data[1] node_sizes_vis = [_get_size_from_hovertext(ht) for ht in node_trace.hovertext] g = pipe.fit_transform(X_arr) node_size_real = [len(node) for node in g.vs['node_elements']] assert sum(node_sizes_vis) == sum(node_size_real)
def test_node_color_statistic_as_ndarray(is_2d): pipe = make_mapper_pipeline() graph = pipe.fit_transform(X_arr) node_color_statistic_col_0 = np.arange(len(graph.vs)) if is_2d: node_color_statistic = np.vstack([node_color_statistic_col_0, node_color_statistic_col_0]).T else: node_color_statistic = node_color_statistic_col_0 fig = plot_static_mapper_graph(pipe, X_arr, node_color_statistic=node_color_statistic) assert np.array_equal(fig.data[1].marker.color, node_color_statistic_col_0)
def test_is_data_present(self): """Verify that what we see in the graph corresponds to the number of samples in the graph.""" pipe = make_mapper_pipeline() warnings.simplefilter("ignore") fig = plot_static_mapper_graph(pipe, X, color_variable=colors, clone_pipeline=False) xy = np.stack([fig.get_state()['_data'][1][c] for c in ['x', 'y']]).transpose() assert X.shape >= xy.shape real_colors = fig.get_state()['_data'][1]['marker']['color'] assert len(real_colors) == xy.shape[0]
def test_color_features_as_estimator_or_callable(color_features): pipe = make_mapper_pipeline() graph = pipe.fit_transform(X_arr) node_elements = graph.vs["node_elements"] pca = PCA(n_components=2) color_data_transformed = pca.fit_transform(X_arr) node_colors_color_features = \ np.array([np.mean(color_data_transformed[itr, 0]) for itr in node_elements]) fig = plot_static_mapper_graph(pipe, X_arr, color_data=X_arr, color_features=color_features) assert_almost_equal(fig.data[1].marker.color, node_colors_color_features)
def test_column_dropdown(X, columns, layout_dim): pipe = make_mapper_pipeline() fig = plot_static_mapper_graph(pipe, X, color_data=X, layout_dim=layout_dim) fig_buttons = fig.layout.updatemenus[0].buttons assert list(fig.data[1].marker.color) == \ list(fig_buttons[0].args[0]["marker.color"][1]) for i, col in enumerate(columns): fig_col = plot_static_mapper_graph( pipe, X, layout_dim=layout_dim, color_data=X, color_features=col ) assert list(fig_col.data[1].marker.color) == \ list(fig_buttons[i].args[0]["marker.color"][1])
def test_is_data_present(self): """Verify that what we see in the graph corresponds to the number of samples in the graph.""" pipe = make_mapper_pipeline() fig = plot_static_mapper_graph(pipe, X_arr, color_data=colors, clone_pipeline=False) node_trace_x = fig.data[1].x node_trace_y = fig.data[1].y assert node_trace_x.shape[0] == node_trace_y.shape[0] num_nodes = node_trace_x.shape[0] assert len(X_arr) >= num_nodes fig_colors = fig.data[1].marker.color assert len(fig_colors) == num_nodes
def Mappe(data, intervals): filter_func = Eccentricity() cover = CubicalCover(n_intervals=intervals, overlap_frac=0.3) clusterer = DBSCAN() n_jobs = 2 pipe = make_mapper_pipeline( filter_func=filter_func, cover=cover, clusterer=clusterer, verbose=False, n_jobs=n_jobs, ) g = pipe.fit_transform(data) A = g.get_edgelist() G = nx.Graph(A) return G
def test_cluster_sizes(self): """Verify that the total number of calculated clusters is equal to the number of displayed clusters.""" pipe = make_mapper_pipeline(clusterer=FirstSimpleGap()) warnings.simplefilter("ignore") fig = plot_interactive_mapper_graph(pipe, X) w_scatter = self._get_widget_by_trait(fig, 'data') node_sizes_vis = [self._get_size_from_hovertext(s_) for s_ in w_scatter.get_state() ['_data'][1]['hovertext']] g = pipe.fit_transform(X) node_size_real = [len(node) for node in g['node_metadata']['node_elements']] assert sum(node_sizes_vis) == sum(node_size_real)
def test_interactive_plotter_attrs(X, color_data, layout_dim): """Simple tests on the attributes stored by MapperInteractivePlotter when plotting.""" pipe = make_mapper_pipeline() plotter = MapperInteractivePlotter(pipe, X) plotter.plot(color_data=color_data, layout_dim=layout_dim) # 1 Test graph_ graph = pipe.fit_transform(X) assert plotter.graph_.isomorphic(graph) # 2 Test pipeline_ assert str(plotter.pipeline_) == str(pipe) # 3 Test color_features_ if color_data is not None: color_data_transformed = color_data else: color_data_transformed = np.arange(len(X)).reshape(-1, 1) assert np.array_equal(plotter.color_features_, color_data_transformed) # 4 Test node_summaries_ assert len(plotter.node_summaries_) == len(graph.vs) # 5 Test figure_ static_fig = plot_static_mapper_graph(pipe, X, color_data=color_data, layout_dim=layout_dim) interactive_fig = plotter.figure_ edge_trace_attrs = ["hoverinfo", "line", "name", "x", "y"] for attr in edge_trace_attrs: assert np.all(getattr(interactive_fig.data[0], attr) == getattr(static_fig.data[0], attr)) # Excluding marker, which gets treated separately below node_trace_attrs = ["hoverinfo", "hovertext", "mode", "name", "x", "y"] for attr in node_trace_attrs: assert np.all(getattr(interactive_fig.data[1], attr) == getattr(static_fig.data[1], attr)) marker_attrs = ["color", "colorbar", "colorscale", "line", "opacity", "reversescale", "showscale", "size", "sizemin", "sizemode", "sizeref"] for attr in marker_attrs: assert np.all(getattr(interactive_fig.data[1].marker, attr) == getattr(static_fig.data[1].marker, attr))
def test_node_intersection(X): # TODO: Replace pipe and graph by Nerve transformer # TODO: Improve the Hypothesis strategy to avoid needing to hardcode the # min_side to be greater than n_intervals (10 by default). pipe = make_mapper_pipeline() graph = pipe.fit_transform(X) # Check if the elements of nodes defining an edge are disjoint or not: # If True, they are disjoint, i.e. the created edge is incorrect. # If all are False, all edges are correct. disjoint_nodes = [ set(graph.vs['node_elements'][node_1]).isdisjoint( graph.vs['node_elements'][node_2]) for node_1, node_2 in graph.get_edgelist() ] # Check if there is a disjoint node pair given by an edge. assert not any(disjoint_nodes)
def test_color_by_column_dropdown_2d(layout_dim): pipe = make_mapper_pipeline() fig = plot_static_mapper_graph(pipe, X, layout_dim=layout_dim, color_by_columns_dropdown=True) fig_buttons = fig.layout.updatemenus[0].buttons assert list(fig.data[1].marker.color) == \ list(fig_buttons[0].args[0]["marker.color"][1]) for i in range(X.shape[1]): fig_col_i = plot_static_mapper_graph(pipe, X, layout_dim=layout_dim, color_variable=i) assert list(fig_col_i.data[1].marker.color) == \ list(fig_buttons[i + 1].args[0]["marker.color"][1])
def test_is_data_present(self): """Verify that what we see in the graph corresponds to the number of samples in the graph.""" pipe = make_mapper_pipeline() warnings.simplefilter("ignore") fig = plot_static_mapper_graph(pipe, X, color_variable=colors, clone_pipeline=False) node_trace_x = fig.get_state()['_data'][1]["x"] node_trace_y = fig.get_state()['_data'][1]["y"] assert node_trace_x["shape"][0] == node_trace_y["shape"][0] num_nodes = node_trace_x["shape"][0] assert len(X) >= num_nodes fig_colors = fig.get_state()['_data'][1]['marker']['color'] assert len(fig_colors) == num_nodes
def test_contract_nodes(): """Test that, on a pathological dataset, we generate a graph without edges when `contract_nodes` is set to False and with edges when it is set to True.""" X = make_circles(n_samples=2000)[0] filter_func = Projection() cover = OneDimensionalCover(n_intervals=5, overlap_frac=0.4) p = filter_func.fit_transform(X) m = cover.fit_transform(p) gap = 0.1 idx_to_remove = [] for i in range(m.shape[1] - 1): inters = np.logical_and(m[:, i], m[:, i + 1]) inters_idx = np.flatnonzero(inters) p_inters = p[inters_idx] min_p, max_p = np.min(p_inters), np.max(p_inters) idx_to_remove += list(np.flatnonzero((min_p <= p) & (p <= min_p + gap))) idx_to_remove += list(np.flatnonzero((max_p - gap <= p) & (p <= max_p))) X_f = X[[x for x in range(len(X)) if x not in idx_to_remove]] clusterer = DBSCAN(eps=0.05) pipe = make_mapper_pipeline(filter_func=filter_func, cover=cover, clusterer=clusterer, contract_nodes=True) graph = pipe.fit_transform(X_f) assert not len(graph.es) pipe.set_params(contract_nodes=False) graph = pipe.fit_transform(X_f) assert len(graph.es)
def test_invalid_layout_algorithm(X): with pytest.raises(KeyError): pipe = make_mapper_pipeline() _ = plot_static_mapper_graph(pipe, X, layout="foobar")