Ejemplo n.º 1
0
    def test_projection(self):
        atol = 0.1  # accomodate scaling, values are in (0,1), but will be scaled slightly

        mapper = KeplerMapper()
        data = np.random.rand(100, 5)
        lens = mapper.fit_transform(data, projection=[0, 1])
        np.testing.assert_allclose(lens, data[:, :2], atol=atol)

        lens = mapper.fit_transform(data, projection=[0])
        np.testing.assert_allclose(lens, data[:, :1], atol=atol)
Ejemplo n.º 2
0
    def test_distance_matrix(self):
        # todo, test other distance_matrix functions
        mapper = KeplerMapper(verbose=4)
        X = np.random.rand(100, 10)
        lens = mapper.fit_transform(X, distance_matrix="euclidean")

        X_pdist = distance.squareform(distance.pdist(X, metric="euclidean"))
        lens2 = mapper.fit_transform(X_pdist)

        np.testing.assert_array_equal(lens, lens2)
Ejemplo n.º 3
0
    def test_agglomerative_clustering(self):
        mapper = KeplerMapper()

        X = np.random.rand(100, 2)
        lens = mapper.fit_transform(X)

        graph = mapper.map(lens, X, clusterer=cluster.AgglomerativeClustering())
Ejemplo n.º 4
0
    def test_format_mapper_data(self, jinja_env):
        mapper = KeplerMapper()
        data, labels = make_circles(1000, random_state=0)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)

        color_function = lens[:, 0]
        inverse_X = data
        projected_X = lens
        projected_X_names = ["projected_%s" % (i) for i in range(projected_X.shape[1])]
        inverse_X_names = ["inverse_%s" % (i) for i in range(inverse_X.shape[1])]
        custom_tooltips = np.array(["customized_%s" % (l) for l in labels])

        graph_data = format_mapper_data(
            graph,
            color_function,
            inverse_X,
            inverse_X_names,
            projected_X,
            projected_X_names,
            custom_tooltips,
            jinja_env,
        )
        # print(graph_data)
        # Dump to json so we can easily tell what's in it.
        graph_data = json.dumps(graph_data)

        # TODO test more properties!
        assert "name" in graph_data
        assert """cube2_cluster0""" in graph_data
        assert """projected_0""" in graph_data
        assert """inverse_0""" in graph_data

        assert """customized_""" in graph_data
Ejemplo n.º 5
0
def profile():
    num_sets = 100
    blob_size = 1000
    nr_cubes = 10
    overlap = 0.2


    blob_list = []
    for i in range(num_sets):
        data, _ = datasets.make_blobs(blob_size)
        blob_list.append(data)

    mapper = KeplerMapper(verbose=0)

    pr = cProfile.Profile()
    pr.enable()

    for data in blob_list:
        lens = mapper.fit_transform(data)
        graph = mapper.map(lens,
                           data,
                           nr_cubes=nr_cubes,
                           overlap_perc=overlap)

    pr.disable()
    s = io.StringIO()
    sortby = 'cumulative'
    ps = pstats.Stats(pr, stream=s).strip_dirs().sort_stats(sortby)
    ps.print_stats("kmapper")
    print("Ran {} blobs of size {} with params (nr_cubes:{}\toverlap:{})".format(num_sets, blob_size, nr_cubes, overlap))
    print(s.getvalue())
Ejemplo n.º 6
0
    def test_precomputed(self):
        mapper = KeplerMapper()

        X = np.random.rand(100, 2)
        X_pdist = distance.squareform(distance.pdist(X, metric="euclidean"))

        lens = mapper.fit_transform(X_pdist)

        graph = mapper.map(
            lens,
            X=X_pdist,
            cover=Cover(n_cubes=10, perc_overlap=0.8),
            clusterer=cluster.DBSCAN(metric="precomputed", min_samples=3),
            precomputed=True,
        )
        graph2 = mapper.map(
            lens,
            X=X,
            cover=Cover(n_cubes=10, perc_overlap=0.8),
            clusterer=cluster.DBSCAN(metric="euclidean", min_samples=3),
        )

        assert graph["links"] == graph2["links"]
        assert graph["nodes"] == graph2["nodes"]
        assert graph["simplices"] == graph2["simplices"]
Ejemplo n.º 7
0
def test_color_function_deprecated_replaced():
    km = KeplerMapper()
    X, labels = make_circles(1000, random_state=0)
    lens = km.fit_transform(X, projection=[0])
    color_values = lens[:, 0]
    sc = km.map(lens, X)
    X_names=[]
    lens_names=[]
    custom_tooltips = np.array(["customized_%s" % (l) for l in labels])

    with warnings.catch_warnings(record=True) as w:
        # Cause all warnings to always be triggered.
        warnings.simplefilter("always")

        # TODO: plotlyviz.plotlyviz

        # plotlyviz.get_mapper_graph
        json_graph, mapper_summary, colorf_distribution = get_mapper_graph(sc, color_function=color_values)
        _test_raised_deprecation_warning(w)

        # plotlyviz.scomplex_to_graph
        _ = scomplex_to_graph(
            simplicial_complex=sc,
            color_function=color_values,
            X=X,
            X_names=X_names,
            lens=lens,
            lens_names=lens_names,
            custom_tooltips=custom_tooltips,
            colorscale=default_colorscale,
        )
        _test_raised_deprecation_warning(w)
Ejemplo n.º 8
0
def profile():
    num_sets = 100
    blob_size = 1000
    n_cubes = 10
    overlap = 0.2

    blob_list = []
    for i in range(num_sets):
        data, _ = datasets.make_blobs(blob_size)
        blob_list.append(data)

    mapper = KeplerMapper(verbose=0)

    pr = cProfile.Profile()
    pr.enable()

    for data in blob_list:
        lens = mapper.fit_transform(data)
        graph = mapper.map(lens, data, cover=Cover(n_cubes=n_cubes, perc_overlap=overlap))

    pr.disable()
    s = io.StringIO()
    sortby = "cumulative"
    ps = pstats.Stats(pr, stream=s).strip_dirs().sort_stats(sortby)
    ps.print_stats("kmapper")
    print(
        "Ran {} blobs of size {} with params (n_cubes:{}\toverlap:{})".format(
            num_sets, blob_size, n_cubes, overlap
        )
    )
    print(s.getvalue())
Ejemplo n.º 9
0
    def test_lens_size(self):
        mapper = KeplerMapper()

        data = np.random.rand(100, 10)
        lens = mapper.fit_transform(data)

        assert lens.shape[0] == data.shape[0]
Ejemplo n.º 10
0
    def test_logging_in_fit_transform(self, capsys):
        mapper = KeplerMapper(verbose=2)
        data = np.random.rand(100, 2)
        lens = mapper.fit_transform(data)

        captured = capsys.readouterr()
        assert "Composing projection pipeline of length 1" in captured[0]
Ejemplo n.º 11
0
    def test_affinity_prop_clustering(self):
        mapper = KeplerMapper()

        X = np.random.rand(100, 2)
        lens = mapper.fit_transform(X)

        graph = mapper.map(lens, X, clusterer=cluster.AffinityPropagation())
Ejemplo n.º 12
0
 def test_visualize_graph_with_cluster_stats_above_below(self):
     mapper = KeplerMapper()
     X = np.ones((1000, 3))
     ids = np.random.choice(20, 1000)
     X[ids, 0] = 10
     lens = mapper.fit_transform(X, projection=[0])
     graph = mapper.map(lens, X)
     output = mapper.visualize(graph, X=X, lens=X)
Ejemplo n.º 13
0
    def test_format_meta_with_meta(self):
        mapper = KeplerMapper()
        data = np.random.rand(1000, 10)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)

        cm = "My custom_meta"
        fmt = format_meta(graph, cm)
        assert fmt["custom_meta"] == cm
Ejemplo n.º 14
0
    def test_precomputed_with_knn_lens(self):
        mapper = KeplerMapper()

        X = np.random.rand(100, 5)

        lens = mapper.fit_transform(X,
                                    projection="knn_distance_3",
                                    distance_matrix="chebyshev")
        assert lens.shape == (100, 1)
Ejemplo n.º 15
0
    def test_lens_names(self):
        mapper = KeplerMapper()
        data = np.random.rand(100, 5)
        lens = mapper.fit_transform(data)
        graph = mapper.map(lens, data)
        template = mapper.visualize(graph=graph)
        template2 = mapper.visualize(graph=graph, lens_names=[])

        assert template == template2
Ejemplo n.º 16
0
    def test_visualize_standalone(self):
        # visualize will run on a fresh mapper object
        mapper = KeplerMapper()

        data = np.random.rand(100, 10)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)

        new_mapper = KeplerMapper()
        viz = new_mapper.visualize(graph)
Ejemplo n.º 17
0
    def test_visualize_one_color_function(self):
        mapper = KeplerMapper()
        data, labels = make_circles(1000, random_state=0)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)
        color_values = lens[:, 0]

        mapper.visualize(graph,
                         color_values=color_values,
                         color_function_name=["hotdog"])
Ejemplo n.º 18
0
    def test_format_meta(self):
        mapper = KeplerMapper()
        data = np.random.rand(1000, 10)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)

        assert ("<p>%s</p>" % (len(graph["nodes"])) in format_meta(graph))
        assert ("<h3>Description</h3>\n<p>A short description</p>"
                in format_meta(graph,
                               custom_meta=[("Description",
                                             "A short description")]))
Ejemplo n.º 19
0
    def test_file_not_written(self, tmpdir):
        mapper = KeplerMapper()

        file = tmpdir.join('output.html')

        data = np.random.rand(1000, 10)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)
        viz = mapper.visualize(graph, path_html=file.strpath, save_file=False)

        assert len(tmpdir.listdir()) == 0, "file was never written to"
Ejemplo n.º 20
0
    def test_file_not_written(self, tmpdir):
        mapper = KeplerMapper(verbose=1)

        file = tmpdir.join("output.html")

        data = np.random.rand(1000, 10)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)
        viz = mapper.visualize(graph, path_html=file.strpath, save_file=False)

        assert len(tmpdir.listdir()) == 0, "file was never written to"
Ejemplo n.º 21
0
    def test_no_warn_normally(self, recwarn):
        """ Confirm that deprecation warnings behave as expected"""
        mapper = KeplerMapper()
        data = np.random.rand(100, 10)
        lens = mapper.fit_transform(data)

        warnings.simplefilter("always")
        graph = mapper.map(lens, data)

        assert len(recwarn) == 0
        assert DeprecationWarning not in recwarn
Ejemplo n.º 22
0
    def test_format_meta_with_meta(self):
        mapper = KeplerMapper()
        data = np.random.rand(1000, 10)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)
        color_function_name = ["Row number"]
        node_color_function = "mean"

        cm = "My custom_meta"
        fmt = _format_meta(graph, color_function_name, node_color_function, cm)
        assert fmt["custom_meta"] == cm
Ejemplo n.º 23
0
    def test_file_written(self, tmpdir):
        mapper = KeplerMapper()

        file = tmpdir.join("output.html")

        data = np.random.rand(1000, 2)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)
        viz = mapper.visualize(graph, path_html=file.strpath)

        assert file.read() == viz
        assert len(tmpdir.listdir()) == 1, "file was written to"
Ejemplo n.º 24
0
    def test_no_color_values_many_color_function_exception(self):
        mapper = KeplerMapper()
        data, labels = make_circles(1000, random_state=0)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)

        with pytest.raises(Exception) as excinfo:
            color_values = mapper.visualize(
                graph,
                color_values=None,
                color_function_name=["hotdog", "hotdog", "hotdiggitydog"],
            )
        assert "Refusing to proceed" in str(excinfo.value)
Ejemplo n.º 25
0
 def test_visualize_multiple_node_color_functions(self):
     """ convenience test for generating a vis with multiple node_color_values but 1d color_values"""
     mapper = KeplerMapper()
     data, labels = make_circles(1000, random_state=0)
     lens = mapper.fit_transform(data, projection=[0])
     graph = mapper.map(lens, data)
     color_values = lens[:, 0]
     mapper.visualize(
         graph,
         color_values=color_values,
         color_function_name="hotdog",
         node_color_function=["mean", "std", "median", "max"],
     )
Ejemplo n.º 26
0
    def test_new_api_old_defaults(self):
        mapper = KeplerMapper()
        data = np.random.rand(100, 10)
        lens = mapper.fit_transform(data)

        _ = mapper.map(lens, data, nr_cubes=10)
        c2 = mapper.cover

        assert c2.perc_overlap == 0.1

        _ = mapper.map(lens, data, overlap_perc=0.1)
        c2 = mapper.cover

        assert c2.n_cubes == 10
Ejemplo n.º 27
0
    def test_color_function_deprecated_replaced(self, default_colorscale):
        mapper = KeplerMapper()
        data, labels = make_circles(1000, random_state=0)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)

        color_values = lens[:, 0]
        node_color_function = "mean"
        inverse_X = data
        projected_X = lens
        projected_X_names = [
            "projected_%s" % (i) for i in range(projected_X.shape[1])
        ]
        inverse_X_names = [
            "inverse_%s" % (i) for i in range(inverse_X.shape[1])
        ]
        custom_tooltips = np.array(["customized_%s" % (l) for l in labels])

        # https://docs.python.org/3/library/warnings.html#testing-warnings
        with warnings.catch_warnings(record=True) as w:
            # Cause all warnings to always be triggered.
            warnings.simplefilter("always")

            # kmapper.visualize
            html = mapper.visualize(graph,
                                    color_function=lens,
                                    color_function_name="lens[:, 0]")
            _test_raised_deprecation_warning(w)

            # visuals._format_mapper_data
            graph_data = _format_mapper_data(
                graph=graph,
                color_function=color_values,
                node_color_function=node_color_function,
                X=inverse_X,
                X_names=inverse_X_names,
                lens=projected_X,
                lens_names=projected_X_names,
                custom_tooltips=custom_tooltips,
            )
            _test_raised_deprecation_warning(w)

            # visuals._graph_data_distribution
            histogram = _graph_data_distribution(
                graph,
                color_function=lens,
                node_color_function=node_color_function,
                colorscale=default_colorscale,
            )
            _test_raised_deprecation_warning(w)
Ejemplo n.º 28
0
    def test_no_color_values_one_color_function_no_exception_yes_warning(self):
        mapper = KeplerMapper()
        data, labels = make_circles(1000, random_state=0)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)

        with warnings.catch_warnings(record=True) as w:
            # Cause all warnings to always be triggered.
            warnings.simplefilter("always")

            color_values = mapper.visualize(graph,
                                            color_values=None,
                                            color_function_name=["hotdog"])
        assert "unexpected" in str(w[-1].message)
Ejemplo n.º 29
0
    def test_visualize_multiple_color_functions(self):
        """ convenience test for generating a vis with multiple color_values"""
        mapper = KeplerMapper()
        data, labels = make_circles(1000, random_state=0)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)
        color_values = lens[:, 0]

        cv1 = np.array(lens)
        cv2 = np.flip(cv1)
        cv = np.column_stack([cv1, cv2])
        mapper.visualize(graph,
                         color_values=cv,
                         color_function_name=["hotdog", "hotdiggitydog"])
Ejemplo n.º 30
0
    def test_str_options(self):
        mapper = KeplerMapper()

        data = np.random.rand(100, 10)

        options = [
            ["sum", np.sum],
            ["mean", np.mean],
            ["median", np.median],
            ["max", np.max],
            ["min", np.min],
            ["std", np.std],
            ["l2norm", np.linalg.norm],
        ]

        first_point = data[0]
        last_point = data[-1]
        for tag, func in options:
            lens = mapper.fit_transform(data, projection=tag, scaler=None)
            np.testing.assert_almost_equal(lens[0][0], func(first_point))
            np.testing.assert_almost_equal(lens[-1][0], func(last_point))

        # For dist_mean, just make sure the code runs without breaking, not sure how to test this best
        lens = mapper.fit_transform(data, projection="dist_mean", scaler=None)
Ejemplo n.º 31
0
    def test_warn_old_api(self):
        """ Confirm old api works but throws warning """

        mapper = KeplerMapper()
        data = np.random.rand(100, 10)
        lens = mapper.fit_transform(data)

        with pytest.deprecated_call():
            graph = mapper.map(lens, data, nr_cubes=10)

        with pytest.deprecated_call():
            graph = mapper.map(lens, data, overlap_perc=10)

        with pytest.deprecated_call():
            graph = mapper.map(lens, data, nr_cubes=10, overlap_perc=0.1)
Ejemplo n.º 32
0
    def test_visualize_standalone_same(self, tmpdir):
        """ensure that the visualization is not dependent on the actual mapper object."""
        mapper = KeplerMapper()

        file = tmpdir.join("output.html")

        data = np.random.rand(1000, 10)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)
        viz1 = mapper.visualize(graph, path_html=file.strpath)

        new_mapper = KeplerMapper()
        viz2 = new_mapper.visualize(graph, path_html=file.strpath)

        assert viz1 == viz2
Ejemplo n.º 33
0
    def test_visualize_standalone_same(self, tmpdir):
        """ ensure that the visualization is not dependent on the actual mapper object.
        """
        mapper = KeplerMapper()

        file = tmpdir.join('output.html')

        data = np.random.rand(1000, 10)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)
        viz1 = mapper.visualize(graph, path_html=file.strpath)

        new_mapper = KeplerMapper()
        viz2 = new_mapper.visualize(graph, path_html=file.strpath)

        assert viz1 == viz2