Ejemplo n.º 1
0
def test_invalid_color_features_as_array_of_indices():
    pipe = make_mapper_pipeline()
    with pytest.raises(ValueError):
        plot_static_mapper_graph(
            pipe, X_arr, color_data=X_arr,
            color_features=np.arange(X_arr.shape[1])
            )
Ejemplo n.º 2
0
def test_valid_colorscale():
    pipe = make_mapper_pipeline()

    fig_2d = plot_static_mapper_graph(
        pipe,
        X,
        layout_dim=2,
        plotly_params={"node_trace": {
            "marker_colorscale": "blues"
        }})
    fig_3d = plot_static_mapper_graph(
        pipe,
        X,
        layout_dim=3,
        plotly_params={"node_trace": {
            "marker_colorscale": "blues"
        }})

    # Test that the custom colorscale is correctly applied both in 2d and in 3d
    marker_colorscale = fig_2d.data[1]["marker"]["colorscale"]
    marker_colorscale_3d = fig_3d.data[1]["marker"]["colorscale"]
    assert marker_colorscale == marker_colorscale_3d

    # Test that the default colorscale is "viridis" and that the custom one is
    # different
    fig_default = plot_static_mapper_graph(pipe, X)
    marker_colorscale_default = \
        fig_default.data[1]["marker"]["colorscale"]
    assert marker_colorscale_default == viridis_colorscale
    assert marker_colorscale != marker_colorscale_default
Ejemplo n.º 3
0
def test_valid_hoverlabel_bgcolor(X, layout_dim):
    pipe = make_mapper_pipeline()
    fig = plot_static_mapper_graph(
        pipe, X, layout_dim=layout_dim,
        plotly_params={"node_trace": {"hoverlabel_bgcolor": "white"}}
        )
    assert fig.data[1]["hoverlabel"]["bgcolor"] == "white"
Ejemplo n.º 4
0
def test_valid_layout_dim(X, layout_dim):
    pipe = make_mapper_pipeline()
    fig = plot_static_mapper_graph(pipe, X, layout_dim=layout_dim)
    edge_trace = fig.data[0]
    assert hasattr(edge_trace, "x") and hasattr(edge_trace, "y")
    is_z_present = hasattr(edge_trace, "z")
    assert is_z_present if layout_dim == 3 else not is_z_present
Ejemplo n.º 5
0
def test_unsuitable_colorscale_for_hoverlabel_3d(X):
    pipe = make_mapper_pipeline()
    with pytest.warns(RuntimeWarning):
        _ = plot_static_mapper_graph(
            pipe, X, layout_dim=3,
            plotly_params={"node_trace": {"marker_colorscale": hsl_colorscale}}
            )
Ejemplo n.º 6
0
def test_user_hoverlabel_bgcolor_interactive_3d():
    pipe = make_mapper_pipeline()
    plotter = MapperInteractivePlotter(pipe, X_arr)
    plotter.plot(layout_dim=3,
                 plotly_params={"node_trace": {"hoverlabel_bgcolor": "blue"}})

    assert plotter.figure_.data[1].hoverlabel.bgcolor == "blue"
Ejemplo n.º 7
0
def test_mapper_pipeline_picklable():
    # Regression test for issue #596
    X = np.random.random((100, 2))
    cachedir = mkdtemp()
    pipe = make_mapper_pipeline(memory=cachedir)
    pipe.fit_transform(X)
    rmtree(cachedir)
Ejemplo n.º 8
0
def test_invalid_node_color_statistic_interactive():
    pipe = make_mapper_pipeline()
    graph = pipe.fit_transform(X_arr)
    node_color_statistic = np.arange(len(graph.vs))
    with pytest.raises(ValueError):
        plot_interactive_mapper_graph(
            pipe, X_arr, node_color_statistic=node_color_statistic
            )
Ejemplo n.º 9
0
def test_edge_elements(X):
    # TODO: Replace pipe and graph by Nerve transformer
    # TODO: Improve the Hypothesis strategy to avoid needing to hardcode the
    # min_side to be greater than n_intervals (10 by default).
    pipe = make_mapper_pipeline()
    pipe_edge_elems = make_mapper_pipeline(store_edge_elements=True)

    graph = pipe.fit_transform(X)
    graph_edge_elems = pipe_edge_elems.fit_transform(X)

    # Check that when store_edge_elements=False (default) there is no
    # "edge_elements" attribute.
    with pytest.raises(KeyError):
        _ = graph.es["edge_elements"]

    # Check that graph and graph_ee agree otherwise
    # Vertices
    assert graph.vs.indices == graph_edge_elems.vs.indices
    for attr_name in ["pullback_set_label", "partial_cluster_label"]:
        assert graph.vs[attr_name] == graph_edge_elems.vs[attr_name]
    node_elements = graph.vs["node_elements"]
    node_elements_ee = graph_edge_elems.vs["node_elements"]
    assert all([
        np.array_equal(node, node_ee)
        for node, node_ee in zip(node_elements, node_elements_ee)
    ])
    assert graph.vs.indices == graph_edge_elems.vs.indices
    # Edges
    assert graph.es.indices == graph_edge_elems.es.indices
    assert graph.es["weight"] == graph_edge_elems.es["weight"]
    assert all([
        edge.tuple == edge_ee.tuple
        for edge, edge_ee in zip(graph.es, graph_edge_elems.es)
    ])

    # Check that the arrays edge_elements contain precisely those indices which
    # are in the element sets associated to both the first and second vertex,
    # and that the edge weight equals the size of edge_elements.
    flag = True
    for edge in graph_edge_elems.es:
        v1, v2 = edge.vertex_tuple
        flag *= np.array_equal(
            edge["edge_elements"],
            np.intersect1d(v1["node_elements"], v2["node_elements"]))
        flag *= len(edge["edge_elements"]) == edge["weight"]
    assert flag
Ejemplo n.º 10
0
def test_node_color_statistic_as_ndarray_wrong_length():
    pipe = make_mapper_pipeline()
    graph = pipe.fit_transform(X_arr)
    node_color_statistic = np.arange(len(graph.vs) + 1)

    with pytest.raises(ValueError):
        plot_static_mapper_graph(pipe, X_arr,
                                 node_color_statistic=node_color_statistic)
Ejemplo n.º 11
0
def test_node_color_statistic_as_ndarray():
    pipe = make_mapper_pipeline()
    graph = pipe.fit_transform(X_arr)
    node_color_statistic = np.arange(len(graph.vs))
    fig = plot_static_mapper_graph(pipe,
                                   X_arr,
                                   node_color_statistic=node_color_statistic)

    assert np.array_equal(fig.data[1].marker.color, node_color_statistic)
Ejemplo n.º 12
0
def test_min_intersection(X, min_intersection):
    # TODO: Replace pipe and graph by Nerve transformer
    # TODO: Improve the Hypothesis strategy to avoid needing to hardcode the
    # min_side to be greater than n_intervals (10 by default).
    pipe = make_mapper_pipeline(min_intersection=min_intersection)
    graph = pipe.fit_transform(X)

    # Check that there are no edges with weight less than min_intersection
    assert all([x >= min_intersection for x in graph.es["weight"]])
Ejemplo n.º 13
0
def test_pipeline_cloned(X, clone_pipeline, layout_dim,
                         color_by_columns_dropdown):
    """Verify that the pipeline is changed on interaction if and only if
    `clone_pipeline` is False (with `layout_dim` set to 2 or 3)."""
    # TODO: Monitor development of the ipytest project to convert these into
    # true notebook tests integrated with pytest
    params = {
        "cover": {
            "initial": {"n_intervals": 10, "kind": "uniform",
                        "overlap_frac": 0.1},
            "new": {"n_intervals": 15, "kind": "balanced", "overlap_frac": 0.2}
            },
        "clusterer": {
            "initial": {"affinity": "euclidean"},
            "new": {"affinity": "manhattan"}
            },
        "contract_nodes": {"initial": True, "new": False},
        "min_intersection": {"initial": 4, "new": 1},
        }

    pipe = make_mapper_pipeline(
        cover=CubicalCover(**params["cover"]["initial"]),
        clusterer=FirstSimpleGap(**params["clusterer"]["initial"]),
        contract_nodes=params["contract_nodes"]["initial"],
        min_intersection=params["min_intersection"]["initial"]
        )
    fig = plot_interactive_mapper_graph(
        pipe, X, clone_pipeline=clone_pipeline, layout_dim=layout_dim,
        color_by_columns_dropdown=color_by_columns_dropdown
        )

    # Get relevant widgets and change their states, then check final values
    for step, values in params.items():
        if step in ["cover", "clusterer"]:
            for param_name, initial_param_value in values["initial"].items():
                new_param_value = values["new"][param_name]
                widgets = _get_widgets_by_trait(fig, "description", param_name)
                for w in widgets:
                    w.set_state({'value': new_param_value})
                final_param_value_actual = \
                    pipe.get_mapper_params()[f"{step}__{param_name}"]
                final_param_value_expected = \
                    initial_param_value if clone_pipeline else new_param_value
                assert final_param_value_actual == final_param_value_expected
        else:
            initial_param_value = values["initial"]
            new_param_value = values["new"]
            widgets = _get_widgets_by_trait(fig, "description", step)
            for w in widgets:
                w.set_state({'value': new_param_value})
            final_param_value_actual = \
                pipe.get_mapper_params()[f"{step}"]
            final_param_value_expected = \
                initial_param_value if clone_pipeline else new_param_value
            assert final_param_value_actual == final_param_value_expected
Ejemplo n.º 14
0
def test_colors_same_2d_3d(X, color_variable, node_color_statistic):
    pipe = make_mapper_pipeline()
    fig_2d = plot_static_mapper_graph(
        pipe, X, layout_dim=2, color_variable=color_variable,
        node_color_statistic=node_color_statistic
        )
    fig_3d = plot_static_mapper_graph(
        pipe, X, layout_dim=3, color_variable=color_variable,
        node_color_statistic=node_color_statistic
        )
    assert fig_2d.data[1].marker.color == fig_3d.data[1].marker.color
Ejemplo n.º 15
0
    def _runMapper(self):
        """
        creates mapper graphs based on train data

        :return: None
        """
        log.debug("--->creating mappers...")
        if not self.remake and os.path.exists(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label):
            fgin = open(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label, "rb")
            self.graphs = pickle.load(fgin)

            fpin = open(TEMP_DATA + "%s_mapper_pipes" % self.label, "rb")
            self.mapper_pipes = pickle.load(fpin)
            return

        clusterer = FirstSimpleGap()
        self.mapper_pipes = []

        log.debug("------> creating projection components...")

        for k in range(self.n_components):
            log.debug("---------> on component {}/{}...".format(k + 1, self.n_components))
            proj = Projection(columns=k)
            filter_func = Pipeline(steps=[('pca', self.rep), ('proj', proj)])
            filtered_data = filter_func.fit_transform(self.data)
            cover = OneDimensionalCover(n_intervals=self.n_intervals, overlap_frac=self.overlap_frac, kind='balanced')
            cover.fit(filtered_data)
            mapper_pipe = make_mapper_pipeline(scaler=None,
                                               filter_func=filter_func,
                                               cover=cover,
                                               clusterer=clusterer,
                                               verbose=(log.getEffectiveLevel() == logging.DEBUG),
                                               n_jobs=1)
            mapper_pipe.set_params(filter_func__proj__columns=k)
            self.mapper_pipes.append(("PCA%d" % (k + 1), mapper_pipe))

        # try parallelization
        log.debug("------> entering parallelization...")

        self.graphs = [mapper_pipe[1].fit_transform(self.data) for mapper_pipe in self.mapper_pipes]

        #
        # self.graphs = Parallel(n_jobs=5, prefer="threads")(
        #     delayed(mapper_pipe[1].fit_transform)(self.data) for mapper_pipe in self.mapper_pipes
        # )

        fg = open(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label, "wb")
        pickle.dump(self.graphs, fg)
        fg.close()

        fp = open(TEMP_DATA + "%s_mapper_pipes" % self.label, "wb")
        pickle.dump(self.mapper_pipes, fp)
        fp.close()
Ejemplo n.º 16
0
def test_colors_same_2d_3d(X, color_data, node_color_statistic):
    pipe = make_mapper_pipeline()
    fig_2d = plot_static_mapper_graph(
        pipe, X, layout_dim=2, color_data=color_data,
        node_color_statistic=node_color_statistic
        )
    fig_3d = plot_static_mapper_graph(
        pipe, X, layout_dim=3, color_data=color_data,
        node_color_statistic=node_color_statistic
        )
    assert np.array_equal(fig_2d.data[1].marker.color,
                          fig_3d.data[1].marker.color)
Ejemplo n.º 17
0
    def test_cluster_sizes(self):
        """Verify that the total number of calculated clusters is equal to
        the number of displayed clusters."""
        pipe = make_mapper_pipeline(clusterer=FirstSimpleGap())
        fig = plot_static_mapper_graph(pipe, X_arr)
        node_trace = fig.data[1]

        node_sizes_vis = [_get_size_from_hovertext(ht) for ht in
                          node_trace.hovertext]

        g = pipe.fit_transform(X_arr)
        node_size_real = [len(node) for node in g.vs['node_elements']]

        assert sum(node_sizes_vis) == sum(node_size_real)
Ejemplo n.º 18
0
def test_node_color_statistic_as_ndarray(is_2d):
    pipe = make_mapper_pipeline()
    graph = pipe.fit_transform(X_arr)
    node_color_statistic_col_0 = np.arange(len(graph.vs))
    if is_2d:
        node_color_statistic = np.vstack([node_color_statistic_col_0,
                                          node_color_statistic_col_0]).T
    else:
        node_color_statistic = node_color_statistic_col_0

    fig = plot_static_mapper_graph(pipe, X_arr,
                                   node_color_statistic=node_color_statistic)

    assert np.array_equal(fig.data[1].marker.color, node_color_statistic_col_0)
Ejemplo n.º 19
0
    def test_is_data_present(self):
        """Verify that what we see in the graph corresponds to
        the number of samples in the graph."""
        pipe = make_mapper_pipeline()
        warnings.simplefilter("ignore")
        fig = plot_static_mapper_graph(pipe,
                                       X,
                                       color_variable=colors,
                                       clone_pipeline=False)
        xy = np.stack([fig.get_state()['_data'][1][c]
                       for c in ['x', 'y']]).transpose()
        assert X.shape >= xy.shape

        real_colors = fig.get_state()['_data'][1]['marker']['color']
        assert len(real_colors) == xy.shape[0]
Ejemplo n.º 20
0
def test_color_features_as_estimator_or_callable(color_features):
    pipe = make_mapper_pipeline()
    graph = pipe.fit_transform(X_arr)
    node_elements = graph.vs["node_elements"]

    pca = PCA(n_components=2)
    color_data_transformed = pca.fit_transform(X_arr)
    node_colors_color_features = \
        np.array([np.mean(color_data_transformed[itr, 0])
                  for itr in node_elements])

    fig = plot_static_mapper_graph(pipe, X_arr, color_data=X_arr,
                                   color_features=color_features)

    assert_almost_equal(fig.data[1].marker.color, node_colors_color_features)
Ejemplo n.º 21
0
def test_column_dropdown(X, columns, layout_dim):
    pipe = make_mapper_pipeline()
    fig = plot_static_mapper_graph(pipe, X, color_data=X,
                                   layout_dim=layout_dim)
    fig_buttons = fig.layout.updatemenus[0].buttons

    assert list(fig.data[1].marker.color) == \
           list(fig_buttons[0].args[0]["marker.color"][1])

    for i, col in enumerate(columns):
        fig_col = plot_static_mapper_graph(
            pipe, X, layout_dim=layout_dim, color_data=X, color_features=col
            )
        assert list(fig_col.data[1].marker.color) == \
               list(fig_buttons[i].args[0]["marker.color"][1])
Ejemplo n.º 22
0
    def test_is_data_present(self):
        """Verify that what we see in the graph corresponds to
        the number of samples in the graph."""
        pipe = make_mapper_pipeline()
        fig = plot_static_mapper_graph(pipe, X_arr, color_data=colors,
                                       clone_pipeline=False)
        node_trace_x = fig.data[1].x
        node_trace_y = fig.data[1].y

        assert node_trace_x.shape[0] == node_trace_y.shape[0]

        num_nodes = node_trace_x.shape[0]
        assert len(X_arr) >= num_nodes

        fig_colors = fig.data[1].marker.color
        assert len(fig_colors) == num_nodes
Ejemplo n.º 23
0
def Mappe(data, intervals):
    filter_func = Eccentricity()
    cover = CubicalCover(n_intervals=intervals, overlap_frac=0.3)
    clusterer = DBSCAN()
    n_jobs = 2
    pipe = make_mapper_pipeline(
        filter_func=filter_func,
        cover=cover,
        clusterer=clusterer,
        verbose=False,
        n_jobs=n_jobs,
    )
    g = pipe.fit_transform(data)
    A = g.get_edgelist()
    G = nx.Graph(A)
    return G
Ejemplo n.º 24
0
    def test_cluster_sizes(self):
        """Verify that the total number of calculated clusters is equal to
        the number of displayed clusters."""
        pipe = make_mapper_pipeline(clusterer=FirstSimpleGap())
        warnings.simplefilter("ignore")
        fig = plot_interactive_mapper_graph(pipe, X)
        w_scatter = self._get_widget_by_trait(fig, 'data')

        node_sizes_vis = [self._get_size_from_hovertext(s_)
                          for s_ in w_scatter.get_state()
                          ['_data'][1]['hovertext']]

        g = pipe.fit_transform(X)
        node_size_real = [len(node)
                          for node in g['node_metadata']['node_elements']]

        assert sum(node_sizes_vis) == sum(node_size_real)
Ejemplo n.º 25
0
def test_interactive_plotter_attrs(X, color_data, layout_dim):
    """Simple tests on the attributes stored by MapperInteractivePlotter when
    plotting."""
    pipe = make_mapper_pipeline()
    plotter = MapperInteractivePlotter(pipe, X)
    plotter.plot(color_data=color_data, layout_dim=layout_dim)

    # 1 Test graph_
    graph = pipe.fit_transform(X)
    assert plotter.graph_.isomorphic(graph)

    # 2 Test pipeline_
    assert str(plotter.pipeline_) == str(pipe)

    # 3 Test color_features_
    if color_data is not None:
        color_data_transformed = color_data
    else:
        color_data_transformed = np.arange(len(X)).reshape(-1, 1)
    assert np.array_equal(plotter.color_features_, color_data_transformed)

    # 4 Test node_summaries_
    assert len(plotter.node_summaries_) == len(graph.vs)

    # 5 Test figure_
    static_fig = plot_static_mapper_graph(pipe, X, color_data=color_data,
                                          layout_dim=layout_dim)
    interactive_fig = plotter.figure_

    edge_trace_attrs = ["hoverinfo", "line", "name", "x", "y"]
    for attr in edge_trace_attrs:
        assert np.all(getattr(interactive_fig.data[0], attr) ==
                      getattr(static_fig.data[0], attr))

    # Excluding marker, which gets treated separately below
    node_trace_attrs = ["hoverinfo", "hovertext", "mode", "name", "x", "y"]
    for attr in node_trace_attrs:
        assert np.all(getattr(interactive_fig.data[1], attr) ==
                      getattr(static_fig.data[1], attr))

    marker_attrs = ["color", "colorbar", "colorscale", "line", "opacity",
                    "reversescale", "showscale", "size", "sizemin", "sizemode",
                    "sizeref"]
    for attr in marker_attrs:
        assert np.all(getattr(interactive_fig.data[1].marker, attr) ==
                      getattr(static_fig.data[1].marker, attr))
Ejemplo n.º 26
0
def test_node_intersection(X):
    # TODO: Replace pipe and graph by Nerve transformer
    # TODO: Improve the Hypothesis strategy to avoid needing to hardcode the
    # min_side to be greater than n_intervals (10 by default).
    pipe = make_mapper_pipeline()
    graph = pipe.fit_transform(X)

    # Check if the elements of nodes defining an edge are disjoint or not:
    # If True, they are disjoint, i.e. the created edge is incorrect.
    # If all are False, all edges are correct.
    disjoint_nodes = [
        set(graph.vs['node_elements'][node_1]).isdisjoint(
            graph.vs['node_elements'][node_2])
        for node_1, node_2 in graph.get_edgelist()
    ]

    # Check if there is a disjoint node pair given by an edge.
    assert not any(disjoint_nodes)
Ejemplo n.º 27
0
def test_color_by_column_dropdown_2d(layout_dim):
    pipe = make_mapper_pipeline()
    fig = plot_static_mapper_graph(pipe,
                                   X,
                                   layout_dim=layout_dim,
                                   color_by_columns_dropdown=True)
    fig_buttons = fig.layout.updatemenus[0].buttons

    assert list(fig.data[1].marker.color) == \
        list(fig_buttons[0].args[0]["marker.color"][1])

    for i in range(X.shape[1]):
        fig_col_i = plot_static_mapper_graph(pipe,
                                             X,
                                             layout_dim=layout_dim,
                                             color_variable=i)
        assert list(fig_col_i.data[1].marker.color) == \
            list(fig_buttons[i + 1].args[0]["marker.color"][1])
Ejemplo n.º 28
0
    def test_is_data_present(self):
        """Verify that what we see in the graph corresponds to
        the number of samples in the graph."""
        pipe = make_mapper_pipeline()
        warnings.simplefilter("ignore")
        fig = plot_static_mapper_graph(pipe, X,
                                       color_variable=colors,
                                       clone_pipeline=False)
        node_trace_x = fig.get_state()['_data'][1]["x"]
        node_trace_y = fig.get_state()['_data'][1]["y"]

        assert node_trace_x["shape"][0] == node_trace_y["shape"][0]

        num_nodes = node_trace_x["shape"][0]
        assert len(X) >= num_nodes

        fig_colors = fig.get_state()['_data'][1]['marker']['color']
        assert len(fig_colors) == num_nodes
Ejemplo n.º 29
0
def test_contract_nodes():
    """Test that, on a pathological dataset, we generate a graph without edges
    when `contract_nodes` is set to False and with edges when it is set to
    True."""
    X = make_circles(n_samples=2000)[0]

    filter_func = Projection()
    cover = OneDimensionalCover(n_intervals=5, overlap_frac=0.4)
    p = filter_func.fit_transform(X)
    m = cover.fit_transform(p)

    gap = 0.1
    idx_to_remove = []
    for i in range(m.shape[1] - 1):
        inters = np.logical_and(m[:, i], m[:, i + 1])
        inters_idx = np.flatnonzero(inters)
        p_inters = p[inters_idx]
        min_p, max_p = np.min(p_inters), np.max(p_inters)
        idx_to_remove += list(np.flatnonzero((min_p <= p)
                                             & (p <= min_p + gap)))
        idx_to_remove += list(np.flatnonzero((max_p - gap <= p)
                                             & (p <= max_p)))

    X_f = X[[x for x in range(len(X)) if x not in idx_to_remove]]

    clusterer = DBSCAN(eps=0.05)
    pipe = make_mapper_pipeline(filter_func=filter_func,
                                cover=cover,
                                clusterer=clusterer,
                                contract_nodes=True)
    graph = pipe.fit_transform(X_f)
    assert not len(graph.es)

    pipe.set_params(contract_nodes=False)
    graph = pipe.fit_transform(X_f)
    assert len(graph.es)
Ejemplo n.º 30
0
def test_invalid_layout_algorithm(X):
    with pytest.raises(KeyError):
        pipe = make_mapper_pipeline()
        _ = plot_static_mapper_graph(pipe, X, layout="foobar")