def test_projection(self): atol = 0.1 # accomodate scaling, values are in (0,1), but will be scaled slightly mapper = KeplerMapper() data = np.random.rand(100, 5) lens = mapper.fit_transform(data, projection=[0, 1]) np.testing.assert_allclose(lens, data[:, :2], atol=atol) lens = mapper.fit_transform(data, projection=[0]) np.testing.assert_allclose(lens, data[:, :1], atol=atol)
def test_distance_matrix(self): # todo, test other distance_matrix functions mapper = KeplerMapper(verbose=4) X = np.random.rand(100, 10) lens = mapper.fit_transform(X, distance_matrix="euclidean") X_pdist = distance.squareform(distance.pdist(X, metric="euclidean")) lens2 = mapper.fit_transform(X_pdist) np.testing.assert_array_equal(lens, lens2)
def test_agglomerative_clustering(self): mapper = KeplerMapper() X = np.random.rand(100, 2) lens = mapper.fit_transform(X) graph = mapper.map(lens, X, clusterer=cluster.AgglomerativeClustering())
def test_format_mapper_data(self, jinja_env): mapper = KeplerMapper() data, labels = make_circles(1000, random_state=0) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) color_function = lens[:, 0] inverse_X = data projected_X = lens projected_X_names = ["projected_%s" % (i) for i in range(projected_X.shape[1])] inverse_X_names = ["inverse_%s" % (i) for i in range(inverse_X.shape[1])] custom_tooltips = np.array(["customized_%s" % (l) for l in labels]) graph_data = format_mapper_data( graph, color_function, inverse_X, inverse_X_names, projected_X, projected_X_names, custom_tooltips, jinja_env, ) # print(graph_data) # Dump to json so we can easily tell what's in it. graph_data = json.dumps(graph_data) # TODO test more properties! assert "name" in graph_data assert """cube2_cluster0""" in graph_data assert """projected_0""" in graph_data assert """inverse_0""" in graph_data assert """customized_""" in graph_data
def profile(): num_sets = 100 blob_size = 1000 nr_cubes = 10 overlap = 0.2 blob_list = [] for i in range(num_sets): data, _ = datasets.make_blobs(blob_size) blob_list.append(data) mapper = KeplerMapper(verbose=0) pr = cProfile.Profile() pr.enable() for data in blob_list: lens = mapper.fit_transform(data) graph = mapper.map(lens, data, nr_cubes=nr_cubes, overlap_perc=overlap) pr.disable() s = io.StringIO() sortby = 'cumulative' ps = pstats.Stats(pr, stream=s).strip_dirs().sort_stats(sortby) ps.print_stats("kmapper") print("Ran {} blobs of size {} with params (nr_cubes:{}\toverlap:{})".format(num_sets, blob_size, nr_cubes, overlap)) print(s.getvalue())
def test_precomputed(self): mapper = KeplerMapper() X = np.random.rand(100, 2) X_pdist = distance.squareform(distance.pdist(X, metric="euclidean")) lens = mapper.fit_transform(X_pdist) graph = mapper.map( lens, X=X_pdist, cover=Cover(n_cubes=10, perc_overlap=0.8), clusterer=cluster.DBSCAN(metric="precomputed", min_samples=3), precomputed=True, ) graph2 = mapper.map( lens, X=X, cover=Cover(n_cubes=10, perc_overlap=0.8), clusterer=cluster.DBSCAN(metric="euclidean", min_samples=3), ) assert graph["links"] == graph2["links"] assert graph["nodes"] == graph2["nodes"] assert graph["simplices"] == graph2["simplices"]
def test_color_function_deprecated_replaced(): km = KeplerMapper() X, labels = make_circles(1000, random_state=0) lens = km.fit_transform(X, projection=[0]) color_values = lens[:, 0] sc = km.map(lens, X) X_names=[] lens_names=[] custom_tooltips = np.array(["customized_%s" % (l) for l in labels]) with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") # TODO: plotlyviz.plotlyviz # plotlyviz.get_mapper_graph json_graph, mapper_summary, colorf_distribution = get_mapper_graph(sc, color_function=color_values) _test_raised_deprecation_warning(w) # plotlyviz.scomplex_to_graph _ = scomplex_to_graph( simplicial_complex=sc, color_function=color_values, X=X, X_names=X_names, lens=lens, lens_names=lens_names, custom_tooltips=custom_tooltips, colorscale=default_colorscale, ) _test_raised_deprecation_warning(w)
def profile(): num_sets = 100 blob_size = 1000 n_cubes = 10 overlap = 0.2 blob_list = [] for i in range(num_sets): data, _ = datasets.make_blobs(blob_size) blob_list.append(data) mapper = KeplerMapper(verbose=0) pr = cProfile.Profile() pr.enable() for data in blob_list: lens = mapper.fit_transform(data) graph = mapper.map(lens, data, cover=Cover(n_cubes=n_cubes, perc_overlap=overlap)) pr.disable() s = io.StringIO() sortby = "cumulative" ps = pstats.Stats(pr, stream=s).strip_dirs().sort_stats(sortby) ps.print_stats("kmapper") print( "Ran {} blobs of size {} with params (n_cubes:{}\toverlap:{})".format( num_sets, blob_size, n_cubes, overlap ) ) print(s.getvalue())
def test_lens_size(self): mapper = KeplerMapper() data = np.random.rand(100, 10) lens = mapper.fit_transform(data) assert lens.shape[0] == data.shape[0]
def test_logging_in_fit_transform(self, capsys): mapper = KeplerMapper(verbose=2) data = np.random.rand(100, 2) lens = mapper.fit_transform(data) captured = capsys.readouterr() assert "Composing projection pipeline of length 1" in captured[0]
def test_affinity_prop_clustering(self): mapper = KeplerMapper() X = np.random.rand(100, 2) lens = mapper.fit_transform(X) graph = mapper.map(lens, X, clusterer=cluster.AffinityPropagation())
def test_visualize_graph_with_cluster_stats_above_below(self): mapper = KeplerMapper() X = np.ones((1000, 3)) ids = np.random.choice(20, 1000) X[ids, 0] = 10 lens = mapper.fit_transform(X, projection=[0]) graph = mapper.map(lens, X) output = mapper.visualize(graph, X=X, lens=X)
def test_format_meta_with_meta(self): mapper = KeplerMapper() data = np.random.rand(1000, 10) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) cm = "My custom_meta" fmt = format_meta(graph, cm) assert fmt["custom_meta"] == cm
def test_precomputed_with_knn_lens(self): mapper = KeplerMapper() X = np.random.rand(100, 5) lens = mapper.fit_transform(X, projection="knn_distance_3", distance_matrix="chebyshev") assert lens.shape == (100, 1)
def test_lens_names(self): mapper = KeplerMapper() data = np.random.rand(100, 5) lens = mapper.fit_transform(data) graph = mapper.map(lens, data) template = mapper.visualize(graph=graph) template2 = mapper.visualize(graph=graph, lens_names=[]) assert template == template2
def test_visualize_standalone(self): # visualize will run on a fresh mapper object mapper = KeplerMapper() data = np.random.rand(100, 10) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) new_mapper = KeplerMapper() viz = new_mapper.visualize(graph)
def test_visualize_one_color_function(self): mapper = KeplerMapper() data, labels = make_circles(1000, random_state=0) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) color_values = lens[:, 0] mapper.visualize(graph, color_values=color_values, color_function_name=["hotdog"])
def test_format_meta(self): mapper = KeplerMapper() data = np.random.rand(1000, 10) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) assert ("<p>%s</p>" % (len(graph["nodes"])) in format_meta(graph)) assert ("<h3>Description</h3>\n<p>A short description</p>" in format_meta(graph, custom_meta=[("Description", "A short description")]))
def test_file_not_written(self, tmpdir): mapper = KeplerMapper() file = tmpdir.join('output.html') data = np.random.rand(1000, 10) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) viz = mapper.visualize(graph, path_html=file.strpath, save_file=False) assert len(tmpdir.listdir()) == 0, "file was never written to"
def test_file_not_written(self, tmpdir): mapper = KeplerMapper(verbose=1) file = tmpdir.join("output.html") data = np.random.rand(1000, 10) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) viz = mapper.visualize(graph, path_html=file.strpath, save_file=False) assert len(tmpdir.listdir()) == 0, "file was never written to"
def test_no_warn_normally(self, recwarn): """ Confirm that deprecation warnings behave as expected""" mapper = KeplerMapper() data = np.random.rand(100, 10) lens = mapper.fit_transform(data) warnings.simplefilter("always") graph = mapper.map(lens, data) assert len(recwarn) == 0 assert DeprecationWarning not in recwarn
def test_format_meta_with_meta(self): mapper = KeplerMapper() data = np.random.rand(1000, 10) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) color_function_name = ["Row number"] node_color_function = "mean" cm = "My custom_meta" fmt = _format_meta(graph, color_function_name, node_color_function, cm) assert fmt["custom_meta"] == cm
def test_file_written(self, tmpdir): mapper = KeplerMapper() file = tmpdir.join("output.html") data = np.random.rand(1000, 2) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) viz = mapper.visualize(graph, path_html=file.strpath) assert file.read() == viz assert len(tmpdir.listdir()) == 1, "file was written to"
def test_no_color_values_many_color_function_exception(self): mapper = KeplerMapper() data, labels = make_circles(1000, random_state=0) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) with pytest.raises(Exception) as excinfo: color_values = mapper.visualize( graph, color_values=None, color_function_name=["hotdog", "hotdog", "hotdiggitydog"], ) assert "Refusing to proceed" in str(excinfo.value)
def test_visualize_multiple_node_color_functions(self): """ convenience test for generating a vis with multiple node_color_values but 1d color_values""" mapper = KeplerMapper() data, labels = make_circles(1000, random_state=0) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) color_values = lens[:, 0] mapper.visualize( graph, color_values=color_values, color_function_name="hotdog", node_color_function=["mean", "std", "median", "max"], )
def test_new_api_old_defaults(self): mapper = KeplerMapper() data = np.random.rand(100, 10) lens = mapper.fit_transform(data) _ = mapper.map(lens, data, nr_cubes=10) c2 = mapper.cover assert c2.perc_overlap == 0.1 _ = mapper.map(lens, data, overlap_perc=0.1) c2 = mapper.cover assert c2.n_cubes == 10
def test_color_function_deprecated_replaced(self, default_colorscale): mapper = KeplerMapper() data, labels = make_circles(1000, random_state=0) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) color_values = lens[:, 0] node_color_function = "mean" inverse_X = data projected_X = lens projected_X_names = [ "projected_%s" % (i) for i in range(projected_X.shape[1]) ] inverse_X_names = [ "inverse_%s" % (i) for i in range(inverse_X.shape[1]) ] custom_tooltips = np.array(["customized_%s" % (l) for l in labels]) # https://docs.python.org/3/library/warnings.html#testing-warnings with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") # kmapper.visualize html = mapper.visualize(graph, color_function=lens, color_function_name="lens[:, 0]") _test_raised_deprecation_warning(w) # visuals._format_mapper_data graph_data = _format_mapper_data( graph=graph, color_function=color_values, node_color_function=node_color_function, X=inverse_X, X_names=inverse_X_names, lens=projected_X, lens_names=projected_X_names, custom_tooltips=custom_tooltips, ) _test_raised_deprecation_warning(w) # visuals._graph_data_distribution histogram = _graph_data_distribution( graph, color_function=lens, node_color_function=node_color_function, colorscale=default_colorscale, ) _test_raised_deprecation_warning(w)
def test_no_color_values_one_color_function_no_exception_yes_warning(self): mapper = KeplerMapper() data, labels = make_circles(1000, random_state=0) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") color_values = mapper.visualize(graph, color_values=None, color_function_name=["hotdog"]) assert "unexpected" in str(w[-1].message)
def test_visualize_multiple_color_functions(self): """ convenience test for generating a vis with multiple color_values""" mapper = KeplerMapper() data, labels = make_circles(1000, random_state=0) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) color_values = lens[:, 0] cv1 = np.array(lens) cv2 = np.flip(cv1) cv = np.column_stack([cv1, cv2]) mapper.visualize(graph, color_values=cv, color_function_name=["hotdog", "hotdiggitydog"])
def test_str_options(self): mapper = KeplerMapper() data = np.random.rand(100, 10) options = [ ["sum", np.sum], ["mean", np.mean], ["median", np.median], ["max", np.max], ["min", np.min], ["std", np.std], ["l2norm", np.linalg.norm], ] first_point = data[0] last_point = data[-1] for tag, func in options: lens = mapper.fit_transform(data, projection=tag, scaler=None) np.testing.assert_almost_equal(lens[0][0], func(first_point)) np.testing.assert_almost_equal(lens[-1][0], func(last_point)) # For dist_mean, just make sure the code runs without breaking, not sure how to test this best lens = mapper.fit_transform(data, projection="dist_mean", scaler=None)
def test_warn_old_api(self): """ Confirm old api works but throws warning """ mapper = KeplerMapper() data = np.random.rand(100, 10) lens = mapper.fit_transform(data) with pytest.deprecated_call(): graph = mapper.map(lens, data, nr_cubes=10) with pytest.deprecated_call(): graph = mapper.map(lens, data, overlap_perc=10) with pytest.deprecated_call(): graph = mapper.map(lens, data, nr_cubes=10, overlap_perc=0.1)
def test_visualize_standalone_same(self, tmpdir): """ensure that the visualization is not dependent on the actual mapper object.""" mapper = KeplerMapper() file = tmpdir.join("output.html") data = np.random.rand(1000, 10) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) viz1 = mapper.visualize(graph, path_html=file.strpath) new_mapper = KeplerMapper() viz2 = new_mapper.visualize(graph, path_html=file.strpath) assert viz1 == viz2
def test_visualize_standalone_same(self, tmpdir): """ ensure that the visualization is not dependent on the actual mapper object. """ mapper = KeplerMapper() file = tmpdir.join('output.html') data = np.random.rand(1000, 10) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) viz1 = mapper.visualize(graph, path_html=file.strpath) new_mapper = KeplerMapper() viz2 = new_mapper.visualize(graph, path_html=file.strpath) assert viz1 == viz2