Beispiel #1
0
    def test_convertions(self):
        mapper = KeplerMapper(verbose=0)
        data = np.random.rand(100, 2)
        graph = mapper.map(data)

        g = to_networkx(graph)
        assert isinstance(g, nx.Graph)
    def test_agglomerative_clustering(self):
        mapper = KeplerMapper()

        X = np.random.rand(100, 2)
        lens = mapper.fit_transform(X)

        graph = mapper.map(lens, X, clusterer=cluster.AgglomerativeClustering())
Beispiel #3
0
 def test_membership(self):
     mapper = KeplerMapper(verbose=0)
     data = np.random.rand(100, 2)
     graph = mapper.map(data)
     n, m = list(graph["nodes"].items())[0]
     g = to_networkx(graph)
     assert g.nodes[n]["membership"] == m
Beispiel #4
0
    def test_lens_size(self):
        mapper = KeplerMapper()

        data = np.random.rand(100, 10)
        lens = mapper.fit_transform(data)

        assert lens.shape[0] == data.shape[0]
def profile():
    num_sets = 100
    blob_size = 1000
    nr_cubes = 10
    overlap = 0.2


    blob_list = []
    for i in range(num_sets):
        data, _ = datasets.make_blobs(blob_size)
        blob_list.append(data)

    mapper = KeplerMapper(verbose=0)

    pr = cProfile.Profile()
    pr.enable()

    for data in blob_list:
        lens = mapper.fit_transform(data)
        graph = mapper.map(lens,
                           data,
                           nr_cubes=nr_cubes,
                           overlap_perc=overlap)

    pr.disable()
    s = io.StringIO()
    sortby = 'cumulative'
    ps = pstats.Stats(pr, stream=s).strip_dirs().sort_stats(sortby)
    ps.print_stats("kmapper")
    print("Ran {} blobs of size {} with params (nr_cubes:{}\toverlap:{})".format(num_sets, blob_size, nr_cubes, overlap))
    print(s.getvalue())
def test_color_function_deprecated_replaced():
    km = KeplerMapper()
    X, labels = make_circles(1000, random_state=0)
    lens = km.fit_transform(X, projection=[0])
    color_values = lens[:, 0]
    sc = km.map(lens, X)
    X_names=[]
    lens_names=[]
    custom_tooltips = np.array(["customized_%s" % (l) for l in labels])

    with warnings.catch_warnings(record=True) as w:
        # Cause all warnings to always be triggered.
        warnings.simplefilter("always")

        # TODO: plotlyviz.plotlyviz

        # plotlyviz.get_mapper_graph
        json_graph, mapper_summary, colorf_distribution = get_mapper_graph(sc, color_function=color_values)
        _test_raised_deprecation_warning(w)

        # plotlyviz.scomplex_to_graph
        _ = scomplex_to_graph(
            simplicial_complex=sc,
            color_function=color_values,
            X=X,
            X_names=X_names,
            lens=lens,
            lens_names=lens_names,
            custom_tooltips=custom_tooltips,
            colorscale=default_colorscale,
        )
        _test_raised_deprecation_warning(w)
Beispiel #7
0
    def test_affinity_prop_clustering(self):
        mapper = KeplerMapper()

        X = np.random.rand(100, 2)
        lens = mapper.fit_transform(X)

        graph = mapper.map(lens, X, clusterer=cluster.AffinityPropagation())
def profile():
    num_sets = 100
    blob_size = 1000
    n_cubes = 10
    overlap = 0.2

    blob_list = []
    for i in range(num_sets):
        data, _ = datasets.make_blobs(blob_size)
        blob_list.append(data)

    mapper = KeplerMapper(verbose=0)

    pr = cProfile.Profile()
    pr.enable()

    for data in blob_list:
        lens = mapper.fit_transform(data)
        graph = mapper.map(lens, data, cover=Cover(n_cubes=n_cubes, perc_overlap=overlap))

    pr.disable()
    s = io.StringIO()
    sortby = "cumulative"
    ps = pstats.Stats(pr, stream=s).strip_dirs().sort_stats(sortby)
    ps.print_stats("kmapper")
    print(
        "Ran {} blobs of size {} with params (n_cubes:{}\toverlap:{})".format(
            num_sets, blob_size, n_cubes, overlap
        )
    )
    print(s.getvalue())
Beispiel #9
0
 def test_map_custom_lens(self):
     # I think that map currently requires fit_transform to be called first
     mapper = KeplerMapper()
     data = np.random.rand(100, 2)
     #import pdb; pdb.set_trace()
     graph = mapper.map(data)
     assert graph["meta_graph"] == "custom"
Beispiel #10
0
    def test_logging_in_project(self, capsys):
        mapper = KeplerMapper(verbose=2)
        data = np.random.rand(100, 2)
        lens = mapper.project(data)

        captured = capsys.readouterr()
        assert "Projecting on" in captured[0]
    def test_format_mapper_data(self, jinja_env):
        mapper = KeplerMapper()
        data, labels = make_circles(1000, random_state=0)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)

        color_function = lens[:, 0]
        inverse_X = data
        projected_X = lens
        projected_X_names = ["projected_%s" % (i) for i in range(projected_X.shape[1])]
        inverse_X_names = ["inverse_%s" % (i) for i in range(inverse_X.shape[1])]
        custom_tooltips = np.array(["customized_%s" % (l) for l in labels])

        graph_data = format_mapper_data(
            graph,
            color_function,
            inverse_X,
            inverse_X_names,
            projected_X,
            projected_X_names,
            custom_tooltips,
            jinja_env,
        )
        # print(graph_data)
        # Dump to json so we can easily tell what's in it.
        graph_data = json.dumps(graph_data)

        # TODO test more properties!
        assert "name" in graph_data
        assert """cube2_cluster0""" in graph_data
        assert """projected_0""" in graph_data
        assert """inverse_0""" in graph_data

        assert """customized_""" in graph_data
Beispiel #12
0
 def test_map_custom_lens(self):
     # I think that map currently requires fit_transform to be called first
     mapper = KeplerMapper()
     data = np.random.rand(100, 2)
     graph = mapper.map(data)
     assert graph["meta_data"]["projection"] == "custom"
     assert graph["meta_data"]["scaler"] == "None"
Beispiel #13
0
    def test_no_link(self):
        mapper = KeplerMapper()

        groups = {"a": [1, 2, 3, 4], "b": [5, 6, 7]}
        links = mapper._create_links(groups)

        assert not links
Beispiel #14
0
    def test_wrong_id(self):
        mapper = KeplerMapper(verbose=1)
        data = np.random.rand(100, 2)

        graph = mapper.map(data)
        mems = mapper.data_from_cluster_id("new node", graph, data)
        np.testing.assert_array_equal(mems, np.array([]))
Beispiel #15
0
    def test_runs_with_logging_0(self, capsys):
        mapper = KeplerMapper(verbose=0)
        data = np.random.rand(100, 2)
        graph = mapper.map(data)

        captured = capsys.readouterr()
        assert captured[0] == ""
Beispiel #16
0
    def test_node_color_function_must_be_np_function(self, sc):
        mapper = KeplerMapper()

        with pytest.raises(
                AttributeError,
                match=r".*must be a function available on `numpy` class.*"):
            mapper.visualize(sc, node_color_function=["yinz"])
Beispiel #17
0
    def test_precomputed(self):
        mapper = KeplerMapper()

        X = np.random.rand(100, 2)
        X_pdist = distance.squareform(distance.pdist(X, metric="euclidean"))

        lens = mapper.fit_transform(X_pdist)

        graph = mapper.map(
            lens,
            X=X_pdist,
            cover=Cover(n_cubes=10, perc_overlap=0.8),
            clusterer=cluster.DBSCAN(metric="precomputed", min_samples=3),
            precomputed=True,
        )
        graph2 = mapper.map(
            lens,
            X=X,
            cover=Cover(n_cubes=10, perc_overlap=0.8),
            clusterer=cluster.DBSCAN(metric="euclidean", min_samples=3),
        )

        assert graph["links"] == graph2["links"]
        assert graph["nodes"] == graph2["nodes"]
        assert graph["simplices"] == graph2["simplices"]
Beispiel #18
0
    def test_logging_in_fit_transform(self, capsys):
        mapper = KeplerMapper(verbose=2)
        data = np.random.rand(100, 2)
        lens = mapper.fit_transform(data)

        captured = capsys.readouterr()
        assert "Composing projection pipeline of length 1" in captured[0]
def test_BasicCover():
    # TODO: add a mock that asserts the cover was called appropriately.. or test number of cubes etc.
    data, _ = datasets.make_circles()

    data = data.astype(np.float64)
    mapper = KeplerMapper()
    graph = mapper.map(data)
    mapper.visualize(graph)
Beispiel #20
0
 def test_visualize_graph_with_cluster_stats_above_below(self):
     mapper = KeplerMapper()
     X = np.ones((1000, 3))
     ids = np.random.choice(20, 1000)
     X[ids, 0] = 10
     lens = mapper.fit_transform(X, projection=[0])
     graph = mapper.map(lens, X)
     output = mapper.visualize(graph, X=X, lens=X)
Beispiel #21
0
    def test_complete_pipeline(self, CoverClass):
        # TODO: add a mock that asserts the cover was called appropriately.. or test number of cubes etc.
        data, _ = datasets.make_circles()

        data = data.astype(np.float64)
        mapper = KeplerMapper()
        graph = mapper.map(data, cover=CoverClass())
        mapper.visualize(graph)
Beispiel #22
0
    def test_tuple_projection_fit(self):
        mapper = KeplerMapper()
        data = np.random.rand(100, 5)
        y = np.random.rand(100, 1)
        lens = mapper.project(data, projection=(Lasso(), data, y), scaler=None)

        # hard to test this, at least it doesn't fail
        assert lens.shape == (100, 1)
Beispiel #23
0
    def test_finds_a_link(self):
        mapper = KeplerMapper()

        groups = {"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]}
        links = mapper._create_links(groups)

        assert "a" in links or "b" in links
        assert links["a"] == ["b"] or links["b"] == ["a"]
Beispiel #24
0
    def test_pass_through_result(self):
        mapper = KeplerMapper()

        groups = {"a": [1], "b": [2]}

        res = dict()
        links = mapper._create_links(groups, res)

        assert res == links
Beispiel #25
0
    def test_precomputed_with_knn_lens(self):
        mapper = KeplerMapper()

        X = np.random.rand(100, 5)

        lens = mapper.fit_transform(X,
                                    projection="knn_distance_3",
                                    distance_matrix="chebyshev")
        assert lens.shape == (100, 1)
    def test_project_sklearn_class(self):
        mapper = KeplerMapper()
        data = np.random.rand(100, 5)
        lens = mapper.project(data, projection=PCA(n_components=1), scaler=None)

        pca = PCA(n_components=1)
        lens_confirm = pca.fit_transform(data)
        assert lens.shape == (100, 1)
        np.testing.assert_array_equal(lens, lens_confirm)
    def test_format_meta_with_meta(self):
        mapper = KeplerMapper()
        data = np.random.rand(1000, 10)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)

        cm = "My custom_meta"
        fmt = format_meta(graph, cm)
        assert fmt["custom_meta"] == cm
Beispiel #28
0
    def test_projection(self):
        atol = 0.1  # accomodate scaling, values are in (0,1), but will be scaled slightly

        mapper = KeplerMapper()
        data = np.random.rand(100, 5)
        lens = mapper.fit_transform(data, projection=[0, 1])
        np.testing.assert_allclose(lens, data[:, :2], atol=atol)

        lens = mapper.fit_transform(data, projection=[0])
        np.testing.assert_allclose(lens, data[:, :1], atol=atol)
Beispiel #29
0
    def test_distance_matrix(self):
        # todo, test other distance_matrix functions
        mapper = KeplerMapper(verbose=4)
        X = np.random.rand(100, 10)
        lens = mapper.fit_transform(X, distance_matrix="euclidean")

        X_pdist = distance.squareform(distance.pdist(X, metric="euclidean"))
        lens2 = mapper.fit_transform(X_pdist)

        np.testing.assert_array_equal(lens, lens2)
    def test_file_not_written(self, tmpdir):
        mapper = KeplerMapper(verbose=1)

        file = tmpdir.join("output.html")

        data = np.random.rand(1000, 10)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)
        viz = mapper.visualize(graph, path_html=file.strpath, save_file=False)

        assert len(tmpdir.listdir()) == 0, "file was never written to"
    def test_no_warn_normally(self, recwarn):
        """ Confirm that deprecation warnings behave as expected"""
        mapper = KeplerMapper()
        data = np.random.rand(100, 10)
        lens = mapper.fit_transform(data)

        warnings.simplefilter("always")
        graph = mapper.map(lens, data)

        assert len(recwarn) == 0
        assert DeprecationWarning not in recwarn
    def test_file_not_written(self, tmpdir):
        mapper = KeplerMapper()

        file = tmpdir.join('output.html')

        data = np.random.rand(1000, 10)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)
        viz = mapper.visualize(graph, path_html=file.strpath, save_file=False)

        assert len(tmpdir.listdir()) == 0, "file was never written to"
    def test_visualize_standalone_same(self, tmpdir):
        """ ensure that the visualization is not dependent on the actual mapper object.
        """
        mapper = KeplerMapper()

        file = tmpdir.join('output.html')

        data = np.random.rand(1000, 10)
        lens = mapper.fit_transform(data, projection=[0])
        graph = mapper.map(lens, data)
        viz1 = mapper.visualize(graph, path_html=file.strpath)

        new_mapper = KeplerMapper()
        viz2 = new_mapper.visualize(graph, path_html=file.strpath)

        assert viz1 == viz2
def test_BasicCover():
    # TODO: add a mock that asserts the cover was called appropriately.. or test number of cubes etc.
    data, _ = datasets.make_circles()
    mapper = KeplerMapper()
    graph = mapper.map(data)
    mapper.visualize(graph)