def create_my_strategy(daily_returns, daily_index_returns, previous_weights): """ Creates the core strategy using Topological Data Analysis """ epsilon = 1.0 / (2 * daily_returns.shape[1]) threshold = epsilon alpha = parameters.ALPHA_MULTIPLIER * epsilon beta = parameters.BETA_MULTIPLIER * epsilon daily_returns = np.transpose(daily_returns.values) mapper = km.KeplerMapper(verbose=0) lens1 = mapper.fit_transform(daily_returns, projection='mean') lens2 = mapper.fit_transform(daily_returns, projection='std') simplicial_complex = mapper.map( np.c_[lens1, lens2], X=daily_returns, clusterer=sklearn.cluster.DBSCAN(eps=0.5, min_samples=3), cover=km.Cover(n_cubes=np.ceil(np.power(daily_returns.shape[0], 0.25)), perc_overlap=0.1)) if create_my_strategy.counter == 0 or create_my_strategy.counter == 79 or create_my_strategy.counter == 158: mapper.visualize(simplicial_complex, path_html=parameters.RESULT_DIRECTORY + "\\" + parameters.STOCK_INDEX + "_simplex_" + str(create_my_strategy.counter) + ".html") create_my_strategy.counter += 1 if previous_weights is None: current_portfolio = _get_max_sortino_ratios( daily_returns, simplicial_complex['nodes']).values() weights = np.zeros(daily_returns.shape[0]) for stock_index in current_portfolio: weights[stock_index] = 1.0 weights /= np.sum(weights) return weights else: weights = previous_weights alpha_weights = np.zeros(weights.shape) beta_weights = np.zeros(weights.shape) current_champion_stocks = _get_max_sortino_ratios( daily_returns, simplicial_complex['nodes']) for cluster, stock_index in current_champion_stocks.items(): if stock_index != -1: previous_weight_sum = np.sum( previous_weights[i] for i in simplicial_complex['nodes'][cluster]) alpha_weights[stock_index] = alpha * previous_weight_sum links_dict = simplicial_complex['links'] for curr_neighbour in links_dict[cluster]: neighbour_stock_index = current_champion_stocks[ curr_neighbour] if neighbour_stock_index != -1: beta_weights[ neighbour_stock_index] += beta * previous_weight_sum weights = weights + alpha * alpha_weights + beta * beta_weights weights[weights / np.sum(weights) < threshold] = 0 weights /= np.sum(weights) return weights
def do_analysis(data, lens, name_prefix, nc, po, metric='euclidean'): graph = mapper.map(lens, data.values, clusterer=lk.LinkageGap(verbose=0, metric=metric), cover=km.Cover(n_cubes=nc, perc_overlap=po)) name = "{}_n{}_o{}".format(name_prefix, nc, po) mapper.visualize( graph, # color_values=target.values, color_values=lens, color_function_name=name_prefix, path_html=name + "_nm_diabetes_RM.html", title=name + "nm_diabetes_RM") nx_graph_simple = expo.kmapper_to_nxmapper(graph) expo.cytoscapejson_dump(nx_graph_simple, name + "_nm_diabetes_RM_simple.cyjs") extra_data = {x: list(data.loc[:, x]) for x in data.columns} extra_transforms = { x: np.mean for x in extra_data if x != "Clinical_classification" } nx_graph = expo.kmapper_to_nxmapper(graph, node_extra_data=extra_data, node_transforms=extra_transforms) expo.cytoscapejson_dump(nx_graph, name + "_nm_diabetes_RM.cyjs")
def get_topological_graph(data,exp,clusterer_param,projection=True,cover=[10, 0.1]): mapper = km.KeplerMapper(verbose=0) clusterer_param *= np.sqrt(data.shape[1]) clusterer = sklearn.cluster.DBSCAN(eps=clusterer_param, min_samples=1) if isinstance(projection, int): level_set = mapper.fit_transform(exp, projection=np.arange(projection).tolist()) cover = km.Cover(cover[0],cover[1]) else: level_set = exp cover = km.Cover(cover[0],cover[1]) graph = mapper.map(level_set, data, clusterer=clusterer, cover=cover) return remove_duplicated_links(remove_graph_duplicates(graph))
def do_analysis(data, lens, name_prefix, nc, po): name = "{}_n{}_o{}".format(name_prefix, nc, po) graph = mapper.map(lens, data, clusterer=lk.LinkageGap(verbose=0), cover=km.Cover(n_cubes=nc, perc_overlap=po)) mapper.visualize(graph, color_function=lens, path_html=name + "_cat.html", title=name + "_cat")
def do_analysis(data, lens, cf, name_prefix, nc, po): name = "{:s}_n{:d}_o{:.2f}".format(name_prefix, nc, po) graph = mapper.map(lens, data, clusterer=lk.LinkageGap(verbose=0, metric='euclidean'), cover=km.Cover(n_cubes=nc, perc_overlap=po)) mapper.visualize(graph, color_function=cf, path_html=name + "_travel.html", title=name + "_travel")
def do_analysis(data, lens, name_prefix, nc, po,metric='euclidean'): graph = mapper.map(lens, data.values, clusterer = lk.LinkageGap(verbose=0,metric=metric), cover=km.Cover(n_cubes=nc, perc_overlap=po)) name = "{}_n{}_o{}".format(name_prefix,nc, po) mapper.visualize(graph, # color_function=target.values, color_function=lens, path_html=name + "_nm_diabetes_RM.html", title=name + "nm_diabetes_RM")
def perform_TDA(data, labels, NR, PO, n_clusters, filt="knn_distance_2", save_string="TDA", title="TDA", color_values=None, color_function_name=None): """ Perform Topological Data Analysis Args: data: 2-dimensional array, where first dimension is kept as member label names: labels for first dimension of data NR: number of hypercubes PO: percent overlap between hypercubes nclusters: number of clusters in hypercube filt: filtering scheme, default is "knn_distance_2" save_string: path where to save html and json title: title of map graph """ # Step 1. initiate a Mapper mapper = km.KeplerMapper(verbose=2) # Step 2. Projection projected_data = mapper.fit_transform(data, projection=filt) # Step 3. Covering, clustering & mapping graph = mapper.map(projected_data, data, cover=km.Cover(n_cubes=NR, perc_overlap=PO), clusterer=AgglomerativeClustering(n_clusters=n_clusters, linkage="ward", affinity="euclidean", memory=None, connectivity=None, compute_full_tree="auto", ) ) with open(save_string + ".json", "w") as f: json.dump(graph, f) if color_values is None or color_function_name is None: mapper.visualize(graph, X_names=labels, path_html=save_string + ".html", title=title, custom_tooltips=np.array(labels), ) else: mapper.visualize(graph, X_names=labels, path_html=save_string + ".html", title=title, custom_tooltips=np.array(labels), color_function_name=color_function_name, color_values=color_values, node_color_function=['mean', 'std', 'median', 'max'], )
def do_analysis(lens, name_prefix, nc, po): graph = mapper.map( lens, data, clusterer=lk.LinkageGap(verbose=0), # clusterer=sklearn.cluster.DBSCAN(1, min_samples=0), cover=km.Cover(n_cubes=nc, perc_overlap=po)) name = "{}_n{}_o{}".format(name_prefix, nc, po) mapper.visualize(graph, color_function=target, path_html=name + "_iris.html", title=name + "_iris")
def do_analysis(data, dists, lens, name_prefix, nc, po): name = "{}_n{}_o{}".format(name_prefix, nc, po) graph = mapper.map(lens, dists, clusterer=lk.LinkageGap(verbose=0, metric="precomputed"), precomputed=True, cover=km.Cover(n_cubes=nc, perc_overlap=po)) mapper.visualize(graph, color_values=lens, color_function_name=name_prefix, path_html=name + "_cat.html", title=name + "_cat")
def execute(self, matrix): matrix = matrixmodule.distMatrix(matrix) mapper = kmapper.KeplerMapper() mycover = kmapper.Cover(n_cubes=self.cover_n_cubes, perc_overlap=self.cover_perc_overlap) mynerve = kmapper.GraphNerve(min_intersection=self.graph_nerve_min_intersection) original_data = matrix if self.use_original_data else None projected_data = mapper.fit_transform(matrix, projection=self.projection, scaler=MapperAnalysis.scalers[self.scaler], distance_matrix=False) graph = mapper.map(projected_data, X=original_data, clusterer=MapperAnalysis.clusterers[self.clusterer], cover=mycover, nerve=mynerve, precomputed=False, remove_duplicate_nodes=self.remove_duplicate_nodes) output_graph = mapper.visualize(graph, save_file=True) self.graph = output_graph
def run_mapper(X=None, y=None, X_inverse=True, lens=None, zscore=False, verbose=0, **params): """ Wrap KeplerMapper calls Notes ----- - See PCA_metadata.ipynb """ X_ = np.copy(X) if zscore is True: X_ = scipy.stats.zscore(X_, axis=0) # init MAPPER params projection = params.get('projection', TSNE(perplexity=50, init='pca', random_state=0)) clusterer = params.get('clusterer', HDBSCAN(allow_single_cluster=True)) cover = params.get('cover', km.Cover(10, 0.67)) X_inverse = X_ if X_inverse is True else X_inverse # fit if lens is None: mapper = km.KeplerMapper(verbose=verbose - 1) lens = mapper.fit_transform(X_, projection=projection) # map mapper = km.KeplerMapper(verbose=verbose) graph = mapper.map(lens, X_inverse, clusterer=clusterer, coverer=cover) # dG dG = ds.DyNeuGraph(G=graph, y=y) # save results results = Bunch( X=X_, y=y, X_inverse=X_, lens=lens.copy(), graph=dict(graph), projection=projection, clusterer=clusterer, cover=cover, dG=dG, ) return results
def mapper(self, data): """Run the mapper algorithm on the data. Parameters ---------- data : array-like The data to run the algorihthm on, can have almost any shape. Returns ------- graph : The graph output from km.KeplerMapper(...).map(...) """ # Initialize logging.info("Applying the mapping algorithm.") mapper = km.KeplerMapper(verbose=2) # We create a custom 1-D lens with Isolation Forest model = ensemble.IsolationForest() model.fit(data) isolation_forest = model.decision_function(data).reshape( (data.shape[0], 1)) # Fit to and transform the data tsne_projection = mapper.fit_transform( data, projection=sklearn.manifold.TSNE(n_components=2, perplexity=20, init='pca')) lens = np.c_[isolation_forest, tsne_projection] # Create dictionary called 'graph' with nodes, edges and meta-information graph = mapper.map(tsne_projection, coverer=km.Cover(10, 0.2), clusterer=sklearn.cluster.DBSCAN(eps=1.0, min_samples=2)) color_function = np.array( [self._label_to_color(self.labels[i]) for i in range(len(data))]) # Visualize it mapper.visualize(graph, path_html="actions.html", title="chunk", custom_tooltips=self.tooltips, color_function=color_function) return graph
def do_analysis(lens, name_prefix): viz.scatter3d(data3, lens, show=False) viz.plt.savefig(name_prefix + "circle.png") viz.plt.close("all") graph = mapper.map(lens, data, clusterer=sklearn.cluster.DBSCAN(eps=0.1, min_samples=5), cover=km.Cover(n_cubes=10, perc_overlap=0.2)) mapper.visualize(graph, color_function=lens, path_html=name_prefix + "_circle_output.html", title=name_prefix + " circle", lens=lens)
def test_kmapper_sample(): data = np.array([[0], [1], [2]]) lens = data graph = km.KeplerMapper().map(data, data, clusterer=sklearn.cluster.DBSCAN( eps=1, min_samples=0), cover=km.Cover(n_cubes=2, perc_overlap=0.5)) nxgraph = td.kmapper_to_nxmapper(graph) assert len(nxgraph.edges) == 1 assert len(nxgraph.nodes) == 2 for _, _, data in nxgraph.edges.data(): assert 'membership' in data for _, data in nxgraph.nodes.data(): assert 'membership' in data
def do_analysis(data, lens, cf, name_prefix, nc, po): name = "{:s}_n{:d}_o{:.2f}".format(name_prefix, nc, po) graph = mapper.map(lens, data.values, clusterer=lk.LinkageGap(verbose=0, metric='euclidean'), cover=km.Cover(n_cubes=nc, perc_overlap=po)) mapper.visualize(graph, color_function=cf, path_html=name + "_travel.html", title=name + "_travel") extra_data = {x: list(data.loc[:, x]) for x in data.columns} extra_transforms = {x: np.mean for x in extra_data} nx_graph = expo.kmapper_to_nxmapper(graph, node_extra_data=extra_data, node_transforms=extra_transforms) expo.cytoscapejson_dump(nx_graph, name + "_travel.cyjs")
def run_mapper(X=None, y=None, X_inverse=True, lens=None, verbose=0, **params): """ Wrap KeplerMapper calls Notes ----- - See PCA_metadata.ipynb """ # init MAPPER params projection = params.get('projection', TSNE(perplexity=50, init='random', random_state=0)) clusterer = params.get('clusterer', HDBSCAN(allow_single_cluster=True)) cover = params.get('cover', km.Cover(10, 0.67)) X_inverse = X if X_inverse is True else X_inverse # fit if lens is None: mapper = km.KeplerMapper(verbose=verbose - 1) lens = mapper.fit_transform(X, projection=projection) # map mapper = km.KeplerMapper(verbose=verbose) graph = mapper.map(lens, X_inverse, clusterer=clusterer, coverer=cover) # dG dG = ds.DyNeuGraph() dG.fit(graph, y=y) # save results results = Bunch(X=X.copy(), X_inverse=X, lens=lens.copy(), graph=graph, params=params, cover=cover, dG=dG, G=dG.G_, TCM=dG.tcm_.copy()) return results
def make_tda_covering_scheme(tda_lens, precfg_tda_covering_scheme, verbo_lvl): tda_intvls_lowerbound0 = round_dw( min(tda_lens[:, 0]), precfg_tda_covering_scheme['lens_bound_rounding0']) tda_intvls_upperbound0 = round_up( max(tda_lens[:, 0]), precfg_tda_covering_scheme['lens_bound_rounding0']) tda_intvls_lowerbound1 = round_dw( min(tda_lens[:, 1]), precfg_tda_covering_scheme['lens_bound_rounding1']) tda_intvls_upperbound1 = round_up( max(tda_lens[:, 1]), precfg_tda_covering_scheme['lens_bound_rounding1']) cfg_tda_covering_scheme = dict() cfg_tda_covering_scheme['bound'] = np.array( [[tda_intvls_lowerbound0, tda_intvls_upperbound0], [tda_intvls_lowerbound1, tda_intvls_upperbound1]]) cfg_tda_covering_scheme['count'] = [ precfg_tda_covering_scheme['intvls_count0'], precfg_tda_covering_scheme['intvls_count1'] ] cfg_tda_covering_scheme['overlap'] = [ precfg_tda_covering_scheme['intvls_overlap0'], precfg_tda_covering_scheme['intvls_overlap1'] ] tda_covering_scheme = km.Cover( limits=cfg_tda_covering_scheme['bound'], n_cubes=cfg_tda_covering_scheme['count'], perc_overlap=cfg_tda_covering_scheme['overlap'], verbose=verbo_lvl) return tda_covering_scheme
lens['lens_2norm'] = mapper.fit_transform(X, projection='l2norm') pca = sklearn.decomposition.PCA(n_components=2) lens_pca = pd.DataFrame(pca.fit_transform(X)) for i, c in enumerate(lens_pca.columns): lens[f'pca{i}'] = lens_pca[c] lens_columns = lens.columns # lens = create_lens(df[columns]) lens['color'] = df['color'] fig = xp.scatter_matrix(lens, dimensions=lens_columns, color='color') html_xformed = fig.to_html() with open(f'plots/{data_name}_lensed.html', 'w') as fout: fout.write(html_xformed) # TODO: plot the projected data with colors for the labels # Create dictionary of the graph (network) of nodes, edges and meta-information graph = mapper.map(lens[lens_columns], df[columns], cover=km.Cover(n_cubes=N_CUBES, perc_overlap=PERC_OVERLAP), clusterer=sklearn.cluster.KMeans(n_clusters=2)) # Visualize it html_mapper = mapper.visualize( graph, path_html=f'plots/{data_name}_kmapper.html', title=f"sklearn..{data_name}({argify(data_kwargs)})", custom_tooltips=df['color'])
import kmapper as km import mapperutils.linkage_gap as lk import mapperutils.visualization as viz mapper = km.KeplerMapper(verbose=2) hand = trimesh.load_mesh("../0_data/hand/hand_simplified3k5.stl") data = np.array(hand.vertices) lens = data[:, 1:2] plot = True if plot: viz.scatter3d(data, lens, colorsMap='viridis', show=False) viz.plt.gca().view_init(elev=90, azim=0) viz.axisEqual3D(viz.plt.gca()) viz.plt.show() n = 7 p = 0.2 graph = mapper.map(lens, data, clusterer=lk.LinkageGap(verbose=0), cover=km.Cover(n_cubes=n, perc_overlap=p)) name = "n{}_p{}".format(n, p) mapper.visualize(graph, color_function=lens, path_html="hand_only_" + name + ".html", title="hand, " + name)
import sklearn from sklearn import datasets data, labels = datasets.make_circles(n_samples=5000, noise=0.05, factor=0.3) # Initialize mapper = km.KeplerMapper(verbose=2) # Fit to and transform the data projected_data = mapper.fit_transform(data, projection="dist_mean") # Create dictionary called 'simplicial_complex' with nodes, edges and meta-information simplicial_complex = mapper.map( projected_data, X=data, clusterer=sklearn.cluster.DBSCAN(eps=0.1, min_samples=5), cover=km.Cover(perc_overlap=0.2), ) # Visualize it mapper.visualize( simplicial_complex, path_html="keplermapper-makecircles-distmean.html", custom_meta={ "Data:": "datasets.make_circles(n_samples=5000, noise=0.05, factor=0.3)" }, custom_tooltips=labels, color_values=labels, )
import numpy as np import sklearn import kmapper as km data = np.genfromtxt('lion-reference.csv',delimiter=',') mapper = km.KeplerMapper(verbose=1) lens = mapper.fit_transform(data) graph = mapper.map(lens, data, clusterer=sklearn.cluster.DBSCAN(eps=0.1, min_samples=5), cover=km.Cover(n_cubes=10, perc_overlap=0.2)) mapper.visualize(graph, path_html="lion_keplermapper_output.html") # You may want to visualize the original point cloud data in 3D scatter too """ import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(data[:,0],data[:,1],data[:,2]) plt.savefig("lion-reference.csv.png") plt.show() """
import kmapper as km # Make very noisy circles import sklearn from sklearn import datasets data, labels = datasets.make_circles(n_samples=5000, noise=0.05, factor=0.3) # Initialize mapper = km.KeplerMapper(verbose=2) # Fit to and transform the data projected_data = mapper.fit_transform(data, projection="dist_mean") # Create dictionary called 'simplicial_complex' with nodes, edges and meta-information simplicial_complex = mapper.map(projected_data, X=data, clusterer=sklearn.cluster.DBSCAN(eps=0.1, min_samples=5), cover=km.Cover()) # Visualize it mapper.visualize(simplicial_complex, path_html="keplermapper-makecircles-distmean.html", custom_meta={"Data:": "datasets.make_circles(n_samples=5000, noise=0.05, factor=0.3)"}, custom_tooltips=labels)
numerical_cols = [ df.columns[pos] for pos, item in enumerate(df.dtypes) if item in [np.float64, np.int64] ] print(numerical_cols) data = df[numerical_cols].values print(data.shape) # In[ ]: # In[11]: import kmapper as km from kmapper import jupyter from sklearn import cluster mapper = km.KeplerMapper(verbose=1) projected_data = mapper.project(data, projection="sum") graph = mapper.map(projected_data, data, cover=km.Cover(n_cubes=5, perc_overlap=0.75), clusterer=cluster.AgglomerativeClustering( n_clusters=100, affinity="cosine")) # Visualize it mapper.visualize(graph, path_html="map-dataframe-test.html", title="Map Dataframe Test") IFrame("map-dataframe-test.html", 800, 600) # In[ ]:
fig = plt.figure() cd = hdb_unweighted.condensed_tree_.plot() # fig.suptitle('Unweighted HDBSCAN condensed tree plot') fig.savefig(TOPO_DIR + "HDBSCAN_%s_%s.pdf" % (KERAS_MODEL_NAME.split(".")[0], layer)) # plt.show() # Create the graph (we cluster on the projected data and suffer projection loss) graph = mapper.map( projected_data, clusterer=sklearn.cluster.DBSCAN(eps=0.8, min_samples=3), # clusterer=sklearn.cluster.DBSCAN(eps=5), #clusterer=HDBSCAN(min_cluster_size=5, gen_min_span_tree=True, allow_single_cluster=True), # coverer=km.Cover(35, 0.9) coverer=km.Cover(nr_cubes=10, overlap_perc=0.2), ) print(layer, "map successfully") simplicial_complex = graph print(labels) print(tooltip_s) try: # mapper.visualize(graph, # path_html="label_layer_%s_keplermapper_weights_visualization.html"%(layer), # custom_tooltips = labels # ) mapper.visualize(graph, path_html=TOPO_DIR + "picture_overlap_layer_%s_%s.html" %
df = pd.read_csv("data.csv") feature_names = [c for c in df.columns if c not in ["id", "diagnosis"]] df["diagnosis"] = df["diagnosis"].apply(lambda x: 1 if x == "M" else 0) X = np.array(df[feature_names].fillna(0)) # quick and dirty imputation y = np.array(df["diagnosis"]) # We create a custom 1-D lens with Isolation Forest model = ensemble.IsolationForest(random_state=1729) model.fit(X) lens1 = model.decision_function(X).reshape((X.shape[0], 1)) # We create another 1-D lens with L2-norm mapper = km.KeplerMapper(verbose=3) lens2 = mapper.fit_transform(X, projection="l2norm") # Combine both lenses to create a 2-D [Isolation Forest, L^2-Norm] lens lens = np.c_[lens1, lens2] # Create the simplicial complex graph = mapper.map(lens, X, cover=km.Cover(n_cubes=15, perc_overlap=0.7), clusterer=sklearn.cluster.KMeans(n_clusters=2, random_state=1618033)) # Visualization mapper.visualize(graph, path_html="breast-cancer.html", title="Wisconsin Breast Cancer Dataset", custom_tooltips=y)
def full(n_layer=2, n_file=2): #Get weights model = VGG16(weights='imagenet', include_top=False) weights = model.layers[n_layer].get_weights()[0] #Reshape weights s = weights.shape kern = weights.transpose([2,3,0,1]).reshape(s[2]*s[3], s[0]*s[0]) kern_disp = kern.reshape(len(kern), s[0], s[0]) #Normalize kern_means = kern.mean(axis=1) kern_std = kern.std(axis=1) kern_scaled = kern kern_scaled = np.asarray([kern_scaled[i]-kern_means[i] for i in range(len(kern_scaled))]) kern_scaled = np.asarray([kern_scaled[i]/kern_std[i] for i in range(len(kern_scaled))]) #Select top density points kern20nn = top_dens_nn(kern_scaled, n=100, p=0.3) name = 'VGG_layer_'+str(n_file) proj_pca = PCA(n_components=2, whiten=False) mapper = km.KeplerMapper(verbose=1) lens = mapper.fit_transform(kern20nn, projection=proj_pca, scaler=None, distance_matrix=False) plt.figure() plt.scatter(lens[:,0], lens[:,1], s=5) plt.title(name) V = kern20nn.std(axis=0) # d = distance.cdist(kern20nn, kern20nn) # Z = linkage(d, 'single') # plt.figure() # dendrogram(Z) # plt.show() # lens = np.zeros(Z.shape[0] + 1) # lens[:-1] = Z[:, 2] # lens[-1] = d.max() # hst, bins = np.histogram(lens, bins=64) # plt.figure() # plt.hist(lens, bins=64) # z = np.nonzero(hst == 0)[0] # print(hst[z[0]:len(hst)].sum()) # print(z.shape) # print(z[:10]) graph = mapper.map(lens, kern20nn, #clusterer = AgglomerativeClustering(n_clusters=2, linkage='single', affinity='euclidean'), clusterer = Single_linkage(), # clusterer = DBSCAN(metric=SNE(V)), coverer=km.Cover(nr_cubes=30, overlap_perc=0.66), ) ht=mapper.visualize(graph, path_html=name+".html", title=name ) return graph
import numpy as np import sklearn import kmapper as km data = np.genfromtxt('horse-reference.csv', delimiter=',') mapper = km.KeplerMapper(verbose=2) lens = mapper.fit_transform(data) graph = mapper.map(lens, data, clusterer=sklearn.cluster.DBSCAN(eps=0.1, min_samples=5), cover=km.Cover(30, 0.2)) mapper.visualize(graph, path_html="horse_keplermapper_output.html", custom_tooltips=np.arange(len(lens))) # You may want to visualize the original point cloud data in 3D scatter too """ import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(data[:,0],data[:,1],data[:,2]) plt.savefig("horse-reference.csv.png") plt.show() """
import numpy as np import sklearn import kmapper as km data = np.genfromtxt( 'C:/Users/lili/courses/ComputationalTopology/FinalProject/Mapper/kepler-mapper/examples/horse/horse-reference.csv', delimiter=',') mapper = km.KeplerMapper(verbose=2) lens = mapper.fit_transform(data) graph = mapper.map(lens, data, clusterer=sklearn.cluster.DBSCAN(eps=0.1, min_samples=5), cover=km.Cover(n_cubes=30, perc_overlap=0.2)) mapper.visualize( graph, path_html= "C:/Users/lili/courses/ComputationalTopology/FinalProject/Mapper/horse_keplermapper_output.html", custom_tooltips=np.arange(len(lens))) # You may want to visualize the original point cloud data in 3D scatter too """ import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(data[:,0],data[:,1],data[:,2]) plt.savefig("horse-reference.csv.png")
sq_distance_matrix[col, row] = distance_matrix[index] # In[73]: numerical_cols = [ sub_df.columns[pos] for pos, item in enumerate(sub_df.dtypes) if item in [np.float64, np.int64] ] new_data = sub_df[numerical_cols].to_numpy() dimensional_data = np.array([row[0] for row in new_data]) print(dimensional_data) mapper = km.KeplerMapper(verbose=1) graph = mapper.map(dimensional_data, X=sq_distance_matrix, precomputed=True, cover=km.Cover(n_cubes=35, perc_overlap=0.2), clusterer=sklearn.cluster.DBSCAN(algorithm='auto', eps=0.40, leaf_size=30, metric='precomputed', min_samples=3, n_jobs=4)) # In[74]: # Visualize it mapper.visualize(graph, path_html="map-dataframe-test.html", title="Map Dataframe Test", color_function=dimensional_data) IFrame("map-dataframe-test.html", 800, 600)
tooltip_s.append(img_tag) output.close() tooltip_s = np.array( tooltip_s) # need to make sure to feed it as a NumPy array, not a list # Initialize to use t-SNE with 2 components (reduces data to 2 dimensions). Also note high overlap_percentage. mapper = km.KeplerMapper(verbose=2) # Fit and transform data projected_data = mapper.fit_transform(data, projection=sklearn.manifold.TSNE()) # Create the graph (we cluster on the projected data and suffer projection loss) graph = mapper.map(projected_data, clusterer=sklearn.cluster.DBSCAN(eps=0.3, min_samples=15), cover=km.Cover(35, 0.4)) # Create the visualizations (increased the graph_gravity for a tighter graph-look.) print("Output graph examples to html") # Tooltips with image data for every cluster member mapper.visualize(graph, title="Handwritten digits Mapper", path_html="output/digits_custom_tooltips.html", color_values=labels, custom_tooltips=tooltip_s) # Tooltips with the target y-labels for every cluster member mapper.visualize(graph, title="Handwritten digits Mapper", path_html="output/digits_ylabel_tooltips.html", custom_tooltips=labels)