def run_mapper(X=None, y=None, X_inverse=True, lens=None, zscore=False, verbose=0, **params): """ Wrap KeplerMapper calls Notes ----- - See PCA_metadata.ipynb """ X_ = np.copy(X) if zscore is True: X_ = scipy.stats.zscore(X_, axis=0) # init MAPPER params projection = params.get('projection', TSNE(perplexity=50, init='pca', random_state=0)) clusterer = params.get('clusterer', HDBSCAN(allow_single_cluster=True)) cover = params.get('cover', km.Cover(10, 0.67)) X_inverse = X_ if X_inverse is True else X_inverse # fit if lens is None: mapper = km.KeplerMapper(verbose=verbose - 1) lens = mapper.fit_transform(X_, projection=projection) # map mapper = km.KeplerMapper(verbose=verbose) graph = mapper.map(lens, X_inverse, clusterer=clusterer, coverer=cover) # dG dG = ds.DyNeuGraph(G=graph, y=y) # save results results = Bunch( X=X_, y=y, X_inverse=X_, lens=lens.copy(), graph=dict(graph), projection=projection, clusterer=clusterer, cover=cover, dG=dG, ) return results
def projectWords(gloveFile, destination, n_components, perplexity, n_vectors, writeVectors): model, vectors, keys = loadGloveModel(gloveFile) mapper = km.KeplerMapper(verbose=2) np.savetxt(destination + "word-vectors" + "-c" + str(n_vectors) + ".csv", vectors[0:n_vectors], delimiter=',') subset = np.array(vectors[0:n_vectors]) subsetKeys = keys[0:n_vectors] if writeVectors: thefile = open(destination + 'words' + str(n_vectors) + ".csv", 'w') for item in subsetKeys: thefile.write("%s\n" % item) print "projecting" projected_data = mapper.fit_transform(subset, projection=sklearn.manifold.TSNE( n_components=n_components, perplexity=perplexity)) if writeVectors: print "saving vectors" np.savetxt(destination + "tsne-vectors" + "-nc" + str(n_components) + "-p" + str(perplexity) + "-c" + str(n_vectors) + ".csv", projected_data, delimiter=',') return subsetKeys, projected_data
def test(x): """Basic usage""" # instantiate a Mapper object start = time.time() filter = Projection(ax=2) cover = KeplerCover(nintervals=25, overlap=0.4) cluster = Linkage(method='single', metric='euclidean', cutoff=FirstGap(0.05)) mapper = lm.Mapper(data=x, filter=filter, cover=cover, cluster=cluster) mapper.fit(skeleton_only=True).plot() print('Martino mapper: {0:.4f} sec'.format(time.time()-start)) start = time.time() mapper = km.KeplerMapper(verbose=2) projected_data = mapper.fit_transform(x, projection=[2]) graph = mapper.map( projected_data, x, nr_cubes=25, overlap_perc=0.4, clusterer=sklearn.cluster.AgglomerativeClustering(linkage='single')) print('Kepler mapper: {0:.4f} sec'.format(time.time()-start)) return 0
def create_my_strategy(daily_returns, daily_index_returns, previous_weights): """ Creates the core strategy using Topological Data Analysis """ epsilon = 1.0 / (2 * daily_returns.shape[1]) threshold = epsilon alpha = parameters.ALPHA_MULTIPLIER * epsilon beta = parameters.BETA_MULTIPLIER * epsilon daily_returns = np.transpose(daily_returns.values) mapper = km.KeplerMapper(verbose=0) lens1 = mapper.fit_transform(daily_returns, projection='mean') lens2 = mapper.fit_transform(daily_returns, projection='std') simplicial_complex = mapper.map( np.c_[lens1, lens2], X=daily_returns, clusterer=sklearn.cluster.DBSCAN(eps=0.5, min_samples=3), cover=km.Cover(n_cubes=np.ceil(np.power(daily_returns.shape[0], 0.25)), perc_overlap=0.1)) if create_my_strategy.counter == 0 or create_my_strategy.counter == 79 or create_my_strategy.counter == 158: mapper.visualize(simplicial_complex, path_html=parameters.RESULT_DIRECTORY + "\\" + parameters.STOCK_INDEX + "_simplex_" + str(create_my_strategy.counter) + ".html") create_my_strategy.counter += 1 if previous_weights is None: current_portfolio = _get_max_sortino_ratios( daily_returns, simplicial_complex['nodes']).values() weights = np.zeros(daily_returns.shape[0]) for stock_index in current_portfolio: weights[stock_index] = 1.0 weights /= np.sum(weights) return weights else: weights = previous_weights alpha_weights = np.zeros(weights.shape) beta_weights = np.zeros(weights.shape) current_champion_stocks = _get_max_sortino_ratios( daily_returns, simplicial_complex['nodes']) for cluster, stock_index in current_champion_stocks.items(): if stock_index != -1: previous_weight_sum = np.sum( previous_weights[i] for i in simplicial_complex['nodes'][cluster]) alpha_weights[stock_index] = alpha * previous_weight_sum links_dict = simplicial_complex['links'] for curr_neighbour in links_dict[cluster]: neighbour_stock_index = current_champion_stocks[ curr_neighbour] if neighbour_stock_index != -1: beta_weights[ neighbour_stock_index] += beta * previous_weight_sum weights = weights + alpha * alpha_weights + beta * beta_weights weights[weights / np.sum(weights) < threshold] = 0 weights /= np.sum(weights) return weights
def test_no_warn_normally(self, recwarn): """ Confirm that deprecation warnings behave as expected""" mapper = km.KeplerMapper() data = np.random.rand(100, 10) lens = mapper.fit_transform(data) warnings.simplefilter('always') graph = mapper.map(lens, data) assert len(recwarn) == 0 assert DeprecationWarning not in recwarn
def test_from_kmapper_mapping_nodes(self): km = kmapper.KeplerMapper() np.random.seed(0) data = np.random.random((300, 5)) lens = km.project(data) graph = km.map(lens, data) f = {k: np.random.random() for k in graph['nodes']} e = Extended.from_kmapper(graph, f) assert len(f) == len(e.f) assert set(e.f.values()) == set(f.values())
def test_from_kmapper_mapping_lens(self): km = kmapper.KeplerMapper() np.random.seed(0) data = np.random.random((300, 5)) lens = km.project(data) graph = km.map(lens, data) e = Extended.from_kmapper(graph, lens) assert len(graph['nodes']) == len(e.f) assert set(e.f.values()) == set( np.mean(lens[v]) for v in graph['nodes'].values())
def perform_TDA(data, labels, NR, PO, n_clusters, filt="knn_distance_2", save_string="TDA", title="TDA", color_values=None, color_function_name=None): """ Perform Topological Data Analysis Args: data: 2-dimensional array, where first dimension is kept as member label names: labels for first dimension of data NR: number of hypercubes PO: percent overlap between hypercubes nclusters: number of clusters in hypercube filt: filtering scheme, default is "knn_distance_2" save_string: path where to save html and json title: title of map graph """ # Step 1. initiate a Mapper mapper = km.KeplerMapper(verbose=2) # Step 2. Projection projected_data = mapper.fit_transform(data, projection=filt) # Step 3. Covering, clustering & mapping graph = mapper.map(projected_data, data, cover=km.Cover(n_cubes=NR, perc_overlap=PO), clusterer=AgglomerativeClustering(n_clusters=n_clusters, linkage="ward", affinity="euclidean", memory=None, connectivity=None, compute_full_tree="auto", ) ) with open(save_string + ".json", "w") as f: json.dump(graph, f) if color_values is None or color_function_name is None: mapper.visualize(graph, X_names=labels, path_html=save_string + ".html", title=title, custom_tooltips=np.array(labels), ) else: mapper.visualize(graph, X_names=labels, path_html=save_string + ".html", title=title, custom_tooltips=np.array(labels), color_function_name=color_function_name, color_values=color_values, node_color_function=['mean', 'std', 'median', 'max'], )
def test_from_kmapper_simplices(self): km = kmapper.KeplerMapper() data = np.random.random((300, 5)) lens = km.project(data) graph = km.map(lens, data) e = Extended.from_kmapper(graph, lens) vs = [s for s in e.simplices if len(s) == 1] ls = [s for s in e.simplices if len(s) == 2] assert len(vs) == len(graph['nodes']) assert len(ls) == sum(len(v) for v in graph['links'].values()) assert len(e.simplices) == len(graph['simplices'])
def execute(self, matrix): matrix = matrixmodule.distMatrix(matrix) mapper = kmapper.KeplerMapper() mycover = kmapper.Cover(n_cubes=self.cover_n_cubes, perc_overlap=self.cover_perc_overlap) mynerve = kmapper.GraphNerve(min_intersection=self.graph_nerve_min_intersection) original_data = matrix if self.use_original_data else None projected_data = mapper.fit_transform(matrix, projection=self.projection, scaler=MapperAnalysis.scalers[self.scaler], distance_matrix=False) graph = mapper.map(projected_data, X=original_data, clusterer=MapperAnalysis.clusterers[self.clusterer], cover=mycover, nerve=mynerve, precomputed=False, remove_duplicate_nodes=self.remove_duplicate_nodes) output_graph = mapper.visualize(graph, save_file=True) self.graph = output_graph
def main(args): """Entry point of the program. """ data = np.genfromtxt(args.path, delimiter=',') mapper = km.KeplerMapper(verbose=0) lens = mapper.fit_transform(data, projection='l2norm') graph = mapper.map(lens, data, nr_cubes=20, overlap_perc=0.7, clusterer=sklearn.cluster.KMeans(n_clusters=2)) print(json.dumps(graph))
def run_mapper(X=None, y=None, X_inverse=True, lens=None, verbose=0, **params): """ Wrap KeplerMapper calls Notes ----- - See PCA_metadata.ipynb """ # init MAPPER params projection = params.get('projection', TSNE(perplexity=50, init='random', random_state=0)) clusterer = params.get('clusterer', HDBSCAN(allow_single_cluster=True)) cover = params.get('cover', km.Cover(10, 0.67)) X_inverse = X if X_inverse is True else X_inverse # fit if lens is None: mapper = km.KeplerMapper(verbose=verbose - 1) lens = mapper.fit_transform(X, projection=projection) # map mapper = km.KeplerMapper(verbose=verbose) graph = mapper.map(lens, X_inverse, clusterer=clusterer, coverer=cover) # dG dG = ds.DyNeuGraph() dG.fit(graph, y=y) # save results results = Bunch(X=X.copy(), X_inverse=X, lens=lens.copy(), graph=graph, params=params, cover=cover, dG=dG, G=dG.G_, TCM=dG.tcm_.copy()) return results
def test_new_api_old_defaults(self): mapper = km.KeplerMapper() data = np.random.rand(100, 10) lens = mapper.fit_transform(data) _ = mapper.map(lens, data, nr_cubes=10) c2 = mapper.coverer assert c2.overlap_perc == 0.1 _ = mapper.map(lens, data, overlap_perc=0.1) c2 = mapper.coverer assert c2.nr_cubes == 10
def test_warn_old_api(self): """ Confirm old api works but throws warning """ mapper = km.KeplerMapper() data = np.random.rand(100, 10) lens = mapper.fit_transform(data) with pytest.deprecated_call(): graph = mapper.map(lens, data, nr_cubes=10) with pytest.deprecated_call(): graph = mapper.map(lens, data, overlap_perc=10) with pytest.deprecated_call(): graph = mapper.map(lens, data, nr_cubes=10, overlap_perc=0.1)
def mapper(self, data): """Run the mapper algorithm on the data. Parameters ---------- data : array-like The data to run the algorihthm on, can have almost any shape. Returns ------- graph : The graph output from km.KeplerMapper(...).map(...) """ # Initialize logging.info("Applying the mapping algorithm.") mapper = km.KeplerMapper(verbose=2) # We create a custom 1-D lens with Isolation Forest model = ensemble.IsolationForest() model.fit(data) isolation_forest = model.decision_function(data).reshape( (data.shape[0], 1)) # Fit to and transform the data tsne_projection = mapper.fit_transform( data, projection=sklearn.manifold.TSNE(n_components=2, perplexity=20, init='pca')) lens = np.c_[isolation_forest, tsne_projection] # Create dictionary called 'graph' with nodes, edges and meta-information graph = mapper.map(tsne_projection, coverer=km.Cover(10, 0.2), clusterer=sklearn.cluster.DBSCAN(eps=1.0, min_samples=2)) color_function = np.array( [self._label_to_color(self.labels[i]) for i in range(len(data))]) # Visualize it mapper.visualize(graph, path_html="actions.html", title="chunk", custom_tooltips=self.tooltips, color_function=color_function) return graph
def visKMapper(data: np.array, id: str): ''' Bundles the functions used to calculate a KMapper visualization. Exports it as an .html object. ''' mapper = km.KeplerMapper(verbose=1) # init projected_data = mapper.fit_transform( data, projection=[0, 1]) # fit, transform data to X-Y axis graph = mapper.map( projected_data, data, ) # Create dictionary called 'graph' with nodes, edges and meta-information mapper.visualize( graph, path_html="make_circles_keplermapper_output" + id + ".html", title="make_circles(n_samples=5000, noise=0.03, factor=0.3)") return
def def_lenses_features(df, fs): mapper = km.KeplerMapper() keys = [] values = [] for idx, col in enumerate(fs): keys.append("lens_{}".format(col)) values.append( mapper.fit_transform(df[fs].as_matrix(), projection=[idx], scaler=MinMaxScaler())) lenses_features = dict(zip(keys, values)) return (lenses_features)
def test_kmapper_sample(): data = np.array([[0], [1], [2]]) lens = data graph = km.KeplerMapper().map(data, data, clusterer=sklearn.cluster.DBSCAN( eps=1, min_samples=0), cover=km.Cover(n_cubes=2, perc_overlap=0.5)) nxgraph = td.kmapper_to_nxmapper(graph) assert len(nxgraph.edges) == 1 assert len(nxgraph.nodes) == 2 for _, _, data in nxgraph.edges.data(): assert 'membership' in data for _, data in nxgraph.nodes.data(): assert 'membership' in data
def random(args): data, labels = datasets.make_circles(n_samples=5000, noise=0.03, factor=0.3) # Initialize mapper = km.KeplerMapper(verbose=1) # Fit to and transform the data projected_data = mapper.fit_transform(data, projection=[0, 1]) # X-Y axis # Create dictionary called 'graph' with nodes, edges and meta-information graph = mapper.map(projected_data, data, nr_cubes=10) # Visualize it mapper.visualize( graph, path_html="out/{}.html".format(args.action), title="make_circles(n_samples=5000, noise=0.03, factor=0.3)")
def get_topological_graph(data,exp,clusterer_param,projection=True,cover=[10, 0.1]): mapper = km.KeplerMapper(verbose=0) clusterer_param *= np.sqrt(data.shape[1]) clusterer = sklearn.cluster.DBSCAN(eps=clusterer_param, min_samples=1) if isinstance(projection, int): level_set = mapper.fit_transform(exp, projection=np.arange(projection).tolist()) cover = km.Cover(cover[0],cover[1]) else: level_set = exp cover = km.Cover(cover[0],cover[1]) graph = mapper.map(level_set, data, clusterer=clusterer, cover=cover) return remove_duplicated_links(remove_graph_duplicates(graph))
def def_lenses_dimred(df, fs, get_PCA, get_isomap, get_LLE, get_MDS, get_spectral_embedding, get_SVD): scaler = MinMaxScaler() mapper = km.KeplerMapper() X = df[fs].as_matrix() keys = [] values = [] minmax_scaler = MinMaxScaler() df_minmax = minmax_scaler.fit_transform(df[fs].as_matrix()) # PCA if get_PCA == True: keys.append('lens_pca_0') keys.append('lens_pca_1') pca = mapper.fit_transform(df_minmax, projection=PCA(n_components=2), scaler=None) values.append(scaler.fit_transform(pca[:, 0].reshape(-1, 1))) values.append(scaler.fit_transform(pca[:, 1].reshape(-1, 1))) # Isomap if get_isomap == True: keys.append('lens_isomap_0') keys.append('lens_isomap_1') isomap = manifold.Isomap(n_components=2, n_neighbors=3).fit_transform(df_minmax) values.append(scaler.fit_transform(isomap[:, 0].reshape(-1, 1))) values.append(scaler.fit_transform(isomap[:, 1].reshape(-1, 1))) # Locally linear embedding if get_LLE == True: keys.append('lens_LLE_0') keys.append('lens_LLE_1') LLE = manifold.locally_linear_embedding(df_minmax, n_neighbors=3, n_components=2, random_state=0)[0] values.append(scaler.fit_transform(LLE[:, 0].reshape(-1, 1))) values.append(scaler.fit_transform(LLE[:, 1].reshape(-1, 1))) # Multi-dimensional scaling if get_MDS == True: keys.append('lens_MDS_0') keys.append('lens_MDS_1') MDS = manifold.MDS(n_components=2).fit_transform(df_minmax) values.append(scaler.fit_transform(MDS[:, 0].reshape(-1, 1))) values.append(scaler.fit_transform(MDS[:, 1].reshape(-1, 1))) # Spectral embedding if get_spectral_embedding == True: keys.append('lens_spectral_embedding_0') keys.append('lens_spectral_embedding_1') L = manifold.SpectralEmbedding(n_components=2, n_neighbors=1, random_state=0).fit_transform(df_minmax) values.append(scaler.fit_transform(L[:, 0].reshape(-1, 1))) values.append(scaler.fit_transform(L[:, 1].reshape(-1, 1))) # truncated SVD if get_SVD == True: keys.append('lens_SVD_0') keys.append('lens_SVD_1') svd = TruncatedSVD(n_components=2, random_state=42).fit_transform(df_minmax) values.append(scaler.fit_transform(svd[:, 0].reshape(-1, 1))) values.append(scaler.fit_transform(svd[:, 1].reshape(-1, 1))) lenses_dimred = dict(zip(keys, values)) return (lenses_dimred)
n_jobs=None) tda_lens = np.log2(data[feature['5']].to_numpy()) # min(tda_lens[:,0]) == -3.77595972578207 # max(tda_lens[:,0]) == 18.194602975157967 # min(tda_lens[:,1]) == -4.832890014164741 # max(tda_lens[:,1]) == 15.189531985610547 precfg_tda_covering_scheme = dict() precfg_tda_covering_scheme['lens_bound_rounding0'] = 0.5 precfg_tda_covering_scheme['lens_bound_rounding1'] = 0.5 precfg_tda_covering_scheme['intvls_count0'] = 8 precfg_tda_covering_scheme['intvls_count1'] = 8 precfg_tda_covering_scheme['intvls_overlap0'] = 0.4 precfg_tda_covering_scheme['intvls_overlap1'] = 0.4 tda_covering_scheme = make_tda_covering_scheme(tda_lens, precfg_tda_covering_scheme, verbo_lvl) tda_mapper = km.KeplerMapper(verbose=verbo_lvl) tda_model = tda_mapper.map(X=tda_data, lens=tda_lens, cover=tda_covering_scheme, clusterer=tda_clusterer, remove_duplicate_nodes=True) tda_mapper.visualize(tda_model, path_html=filename_tda_model, title=title_tda_model)
def def_lenses_geometry(df, fs, get_density, get_eccentricity, eccentricity_exponent, get_inf_centrality, others, metric): scaler = MinMaxScaler() X = df[fs].as_matrix() if metric == 'cosine': X_cosine_distance = cosine_similarity(X) X_dist = np.abs(X_cosine_distance - 1) if metric == 'euclidean': X_dist = euclidean_distances(X) if metric == 'correlation': X_dist = pairwise_distances(X, metric='correlation') keys = [] values = [] # density - see: https://scikit-learn.org/stable/modules/density.html if get_density == True: keys.append('lens_density') # calc bandwidth using Scott’s Rule, see https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gaussian_kde.html n = np.shape(X)[0] d = np.shape(X)[1] bandwidth = n**(-1. / (d + 4)) # calc density kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(X) density = kde.score_samples(X) values.append(scaler.fit_transform(density.reshape(-1, 1))) # eccentricity if get_eccentricity == True: keys.append('lens_eccentricity') a = X_dist**eccentricity_exponent b = np.sum(a, axis=1) c = b / np.shape(X_dist)[0] eccentricity = c**(1 / eccentricity_exponent) values.append(scaler.fit_transform(eccentricity.reshape(-1, 1))) # inf centrality if get_inf_centrality == True: keys.append('lens_inf_centrality') inf_centrality = np.amax(X_dist, axis=1) values.append(scaler.fit_transform(inf_centrality.reshape(-1, 1))) mapper = km.KeplerMapper() if others == True: for metric in [ "sum", "mean", "median", "max", "min", "std", "dist_mean", "l2norm" ]: keys.append("lens_{}".format(metric)) values.append( mapper.fit_transform(df[fs].as_matrix(), projection=metric, scaler=MinMaxScaler())) lenses_geometry = dict(zip(keys, values)) return (lenses_geometry)
def full(n_layer=2, n_file=2): #Get weights model = VGG16(weights='imagenet', include_top=False) weights = model.layers[n_layer].get_weights()[0] #Reshape weights s = weights.shape kern = weights.transpose([2,3,0,1]).reshape(s[2]*s[3], s[0]*s[0]) kern_disp = kern.reshape(len(kern), s[0], s[0]) #Normalize kern_means = kern.mean(axis=1) kern_std = kern.std(axis=1) kern_scaled = kern kern_scaled = np.asarray([kern_scaled[i]-kern_means[i] for i in range(len(kern_scaled))]) kern_scaled = np.asarray([kern_scaled[i]/kern_std[i] for i in range(len(kern_scaled))]) #Select top density points kern20nn = top_dens_nn(kern_scaled, n=100, p=0.3) name = 'VGG_layer_'+str(n_file) proj_pca = PCA(n_components=2, whiten=False) mapper = km.KeplerMapper(verbose=1) lens = mapper.fit_transform(kern20nn, projection=proj_pca, scaler=None, distance_matrix=False) plt.figure() plt.scatter(lens[:,0], lens[:,1], s=5) plt.title(name) V = kern20nn.std(axis=0) # d = distance.cdist(kern20nn, kern20nn) # Z = linkage(d, 'single') # plt.figure() # dendrogram(Z) # plt.show() # lens = np.zeros(Z.shape[0] + 1) # lens[:-1] = Z[:, 2] # lens[-1] = d.max() # hst, bins = np.histogram(lens, bins=64) # plt.figure() # plt.hist(lens, bins=64) # z = np.nonzero(hst == 0)[0] # print(hst[z[0]:len(hst)].sum()) # print(z.shape) # print(z[:10]) graph = mapper.map(lens, kern20nn, #clusterer = AgglomerativeClustering(n_clusters=2, linkage='single', affinity='euclidean'), clusterer = Single_linkage(), # clusterer = DBSCAN(metric=SNE(V)), coverer=km.Cover(nr_cubes=30, overlap_perc=0.66), ) ht=mapper.visualize(graph, path_html=name+".html", title=name ) return graph
def mapper_parameter_gridsearch(df, fs, labels, metric, lenses_dict, parameter_values, num_connected_components, filepath): mapper = km.KeplerMapper() X = np.array(df[fs]) # for dataframe df_temp = [] # idx = 0 for lens1, lens2, int1, int2, pc1, pc2, eps in parameter_values: # Combine lenses lens = np.c_[lenses_dict[lens1], lenses_dict[lens2]] if metric == 'cosine': X_cosine_distance = cosine_similarity(X) X_dist = np.abs(X_cosine_distance - 1) scomplex = mapper.map(lens, X_dist, cover=km.cover.Cover(n_cubes=[int1, int2], perc_overlap=[pc1, pc2]), clusterer=DBSCAN(metric='precomputed', eps=eps, min_samples=1), precomputed=True) if metric == 'euclidean': scomplex = mapper.map(lens, X, cover=km.cover.Cover(n_cubes=[int1, int2], perc_overlap=[pc1, pc2]), clusterer=DBSCAN(metric='euclidean', eps=eps, min_samples=1), precomputed=False) if metric == 'correlation': scomplex = mapper.map(lens, X, cover=km.cover.Cover(n_cubes=[int1, int2], perc_overlap=[pc1, pc2]), clusterer=DBSCAN(metric='correlation', eps=eps, min_samples=1), precomputed=False) # Calculate number of connected components n_v, n_cc = count_connected_components(scomplex) # Append data to list for dataframe only if the simplex has num_connected_components # or less connected components if n_cc <= num_connected_components: df_temp.append( [lens1, lens2, int1, int2, pc1, pc2, eps, n_v, n_cc]) # Create dataframe print('Saving to data frame...') columns = [ 'lens1', 'lens2', 'lens1_n_cubes', 'lens2_n_cubes', 'lens1_perc_overlap', 'lens2_perc_overlap', 'eps', 'n_vertices', 'n_connected_components' ] df_sc = pd.DataFrame(data=df_temp, columns=columns) # save df to file print('Saving to file...') df_sc.to_csv(filepath) print('Done...') return (df_sc)
def getMapper(): return kmapper.KeplerMapper(verbose=0)
filters, biases = layer.get_weights() print(layer.name, filters.shape) #print(filters.shape) #print(filters) print(model_djia.layers[5].get_weights()[0].shape) #print(model_djia.layers[5].get_weights()[0][4][4][63]) raw_data = [] for i in range(5): for j in range(5): for k in range(64): raw_data.append(model_djia.layers[14].get_weights()[0][i][j][k]) raw_data_arr = numpy.array(raw_data) print(raw_data) #print(len(model_djia.layers[5].get_weights()[0].flatten())) mapper = kmapper.KeplerMapper(verbose=1) #select value for verbose neigh.fit(raw_data_arr) #from sklearn.decomposition import PCA pca = PCA(n_components=2) # select value for n_components #data_trans = mapper.fit_transform(filters, projection=[0,1]) projected_data = mapper.project( raw_data_arr, projection=pca) #choose which projection to use #projected_data = mapper.project(raw_data_arr, "knn_distance_5") #change which kind of projection to use #lens should be equal to the data_trans or projected_data #choose which clusterer to use #choose which cover to use simplicial_complex = mapper.map(projected_data) mapper.visualize(simplicial_complex, color_function=None,
for x in imgs]) col += 1 col = 5 for t in range(10, 60, 10): data[:,col] = [len(np.unique(label(threshold(x, t)))) for x in imgs] col += 1 ''' from scipy.misc import imresize data = arr([imresize(img, (256, 256)) for img in imgs]) data = data.reshape(data.shape[0], -1).astype(np.float64) import sklearn mapper = km.KeplerMapper() data_projected = mapper.fit_transform( data, projection=[0, 1], #projection='knn_distance_5', scaler=sklearn.preprocessing.MinMaxScaler()) graph = mapper.map( data_projected, #inverse_X=data, nr_cubes=10, #perc_overlap=0.1, clusterer=sklearn.cluster.DBSCAN()) _ = mapper.visualize(graph, path_html="tda_white.html", inverse_X=data) #inverse_X_names=[
if row == col: sq_distance_matrix[row, col] = 0.0 else: sq_distance_matrix[row, col] = distance_matrix[index] sq_distance_matrix[col, row] = distance_matrix[index] # In[73]: numerical_cols = [ sub_df.columns[pos] for pos, item in enumerate(sub_df.dtypes) if item in [np.float64, np.int64] ] new_data = sub_df[numerical_cols].to_numpy() dimensional_data = np.array([row[0] for row in new_data]) print(dimensional_data) mapper = km.KeplerMapper(verbose=1) graph = mapper.map(dimensional_data, X=sq_distance_matrix, precomputed=True, cover=km.Cover(n_cubes=35, perc_overlap=0.2), clusterer=sklearn.cluster.DBSCAN(algorithm='auto', eps=0.40, leaf_size=30, metric='precomputed', min_samples=3, n_jobs=4)) # In[74]: # Visualize it mapper.visualize(graph,
def mapper(): mapper = km.KeplerMapper(verbose=0) data = np.random.rand(100, 2) graph = mapper.map(data) return graph