def dimensionality_reduction( sample: pd.Series, background_df: pd.DataFrame, genes: List[str], col: str, method='trimap') -> Tuple[pd.DataFrame, hv.Scatter]: """ Wrapper for returning trimap plot with column for `color_index` and `size_index` Args: sample: n-of-1 sample. Gets own label background_df: Background dataset genes: Genes to use in dimensionality reduction col: Column to use for color_index method: Method of dimensionality reduction. `trimap` or `tsne` Returns: Holoviews Scatter object of plot with associated vdims """ assert method == 'trimap' or method == 'tsne', '`method` must be either `trimap` or `tsne`' combined = background_df.append(sample) if method == 'trimap': reduced = trimap.TRIMAP().fit_transform(combined[genes]) else: reduced = t_sne.TSNE().fit_transform(combined[genes]) df = pd.DataFrame(reduced, columns=['x', 'y']) df[col] = background_df[col].tolist() + [f'N-of-1 - {sample[col]}'] df['size'] = [1 for _ in background_df[col]] + [5] return df, hv.Scatter(data=df, kdims=['x'], vdims=['y', col, 'size'])
def calculateTestResults(): for x in pb.progressbar(iter(embeddings), redirect_stdout=True): silh = metrics.silhouette_score(x[0], digits_labels) davd = metrics.davies_bouldin_score(x[0], digits_labels) globalStruct = tri.TRIMAP(verbose=False).global_score( digits_hd_data, x[0]) localStruct = manifold.trustworthiness(digits_hd_data, x[0]) results.append([silh, davd, globalStruct, localStruct]) print("Processed a result") silh2 = metrics.silhouette_score(x[1], fashion_labels) davd2 = metrics.davies_bouldin_score(x[1], fashion_labels) globalStruct2 = tri.TRIMAP(verbose=False).global_score( fashion_hd_data, x[1]) localStruct2 = manifold.trustworthiness(fashion_hd_data, x[1]) results.append([silh2, davd2, globalStruct2, localStruct2]) print("Processed a result")
def run_DR_Algorithm(name, data_features): """ Runs each DR algorithm and returns the embedding. Parameters: ----------------- name : String, name of algorithm data_features : nD array, original features Returns: ----------------- points : nD array embedding """ int_dim=2 if name == "UMAP": reducer = umap.UMAP(n_neighbors=15, n_components=int_dim) points = reducer.fit_transform(data_features) elif name == "tSNE": tsne = TSNE(n_components=int_dim, perplexity=30) points = tsne.fit_transform(data_features) elif name == "PCA": pca = PCA(n_components=int_dim) points = pca.fit_transform(data_features) elif name == "Trimap": points = trimap.TRIMAP().fit_transform(data_features) elif name == "M_Core_tSNE": tsne = M_TSNE(n_components=int_dim, perplexity=30, n_jobs=8) points = tsne.fit_transform(data_features) elif name == "MDS": mds = MDS(n_components=int_dim) points = mds.fit_transform(data_features) elif name == "Isomap": isomap = Isomap(n_components=int_dim) points = isomap.fit_transform(data_features) elif name == "KernelPCA": kpca = KernelPCA(n_components=int_dim) points = kpca.fit_transform(data_features) return points
def experiment(X, method='PaCMAP', **kwargs): if method == 'PaCMAP': transformer = PaCMAP(**kwargs) elif method == 'UMAP': transformer = umap.UMAP(**kwargs) elif method == 'TriMAP': transformer = trimap.TRIMAP(**kwargs) else: print("Incorrect method specified") assert (False) start_time = time() X_low = transformer.fit_transform(X) total_time = time() - start_time return X_low, total_time
def embedding(X, args): print("... preprocess: normalization and PCA") method = args.method preprocess_start_time = time.time() X = normalize(X) if X.shape[1] > 20: prep_model = PCA(n_components=20) X = prep_model.fit_transform(X) preprocess_interval = time.time() - preprocess_start_time ## """ print("... saving prepprocessed data") filename_x=output_path+"/"+feature+"_x.npy" filename_y=output_path+"/"+feature+"_y.npy" np.save(filename_x,X) np.save(filename_y,Y) """ ## print("... embedding") embedding_start_time = time.time() if method == "song": import song model = song.song_.SONG(n_max_epoch=n_max_epoch, b=b) model.fit(X, Y) embedding = model.raw_embeddings[:, :] else: if method == "tsne": model = TSNE(n_components=2, random_state=42) elif method == "trimap": model = trimap.TRIMAP(n_iters=500) else: model = umap.UMAP() embedding = model.fit_transform(X) embedding_interval = time.time() - embedding_start_time print("Preprocess time\t{}\n".format(preprocess_interval)) print("Embedding time\t{}\n".format(embedding_interval)) return embedding
def trimap_fromR(data, n_dims, n_inliers, n_outliers, n_random, distance, lr, n_iters, knn_tuple, apply_pca, opt_method, verbose, weight_adj, return_seq): import trimap try: from StringIO import StringIO except ImportError: from io import StringIO import sys class Capturing(list): def __enter__(self): self._stdout = sys.stdout sys.stdout = self._stringio = StringIO() return self def __exit__(self, *args): self.extend(self._stringio.getvalue().splitlines()) del self._stringio # free up some memory sys.stdout = self._stdout knn_tuple = None with Capturing() as output: embedding = trimap.TRIMAP( n_dims=int(n_dims), n_inliers=int(n_inliers), n_outliers=int(n_outliers), n_random=int(n_random), distance=str(distance), lr=float(lr), n_iters=int(n_iters), apply_pca=bool(apply_pca), opt_method=str(opt_method), verbose=bool(verbose), weight_adj=float(weight_adj), return_seq=bool(return_seq)).fit_transform(data) return ([output, embedding])
def plot(input_path, resample, output_path, feature,method,limit_length,n_max_epoch,b): x=np.load(input_path+"/data_x."+feature+".npy") y=np.load(input_path+"/data_y."+feature+".npy") print("X (input file):",x.shape) ### if len(x.shape)==3: print("... generating sliding window") s=np.load(input_path+"/data_s."+feature+".npy") x,y=make_sliding_window(x,y,s,window=10,limit_length=limit_length) print("X (sliding window):",x.shape) if resample is not None: print("... resampling") idx=list(range(x.shape[0])) np.random.shuffle(idx) idx=idx[:resample] X=x[idx,:] Y=y[idx] else: X=x Y=y print("... preprocess: normalization and PCA") preprocess_start_time = time.time() X=normalize(X) if X.shape[1]>20: prep_model=PCA(n_components=20) X=prep_model.fit_transform(X) preprocess_interval = time.time() - preprocess_start_time ## """ print("... saving prepprocessed data") filename_x=output_path+"/"+feature+"_x.npy" filename_y=output_path+"/"+feature+"_y.npy" np.save(filename_x,X) np.save(filename_y,Y) """ ## print("... embedding") embedding_start_time = time.time() if method=="song": import song model = song.song_.SONG(n_max_epoch=n_max_epoch,b=b) model.fit(X, Y) embedding=model.raw_embeddings[:,:] else: if method=="tsne": model = TSNE(n_components=2, random_state=42) elif method=="trimap": model = trimap.TRIMAP(n_iters=500) else: model = umap.UMAP() embedding = model.fit_transform(X) embedding_interval = time.time() - embedding_start_time print("... plotting embedded points") os.makedirs(output_path,exist_ok=True) np.save(output_path+"/"+method+"."+feature+".npy",embedding) print(method,"time:",embedding_interval) title=method filename=output_path+"/"+method+"."+feature+".png" plot_scatter(embedding,Y,filename,title) print("... evaluation") classifier=KNeighborsClassifier(n_neighbors=10) pred_y=cross_val_predict(classifier,embedding,Y, cv=5) acc=sklearn.metrics.accuracy_score(Y,pred_y) print("accuracy:",acc) with open(output_path+"/"+method+"."+feature+".txt","w") as fp: fp.write("Preprocess time\t{}\n".format(preprocess_interval)) fp.write("Embedding time\t{}\n".format(embedding_interval)) fp.write("Accuracy\t{}\n".format(acc))
def run_reduce_dim( adata, X_data, n_components, n_pca_components, reduction_method, embedding_key, n_neighbors, neighbor_key, cores, kwargs, ): if reduction_method == "trimap": import trimap triplemap = trimap.TRIMAP( n_inliers=20, n_outliers=10, n_random=10, distance="euclidean", # cosine weight_adj=1000.0, apply_pca=False, ) X_dim = triplemap.fit_transform(X_data) adata.obsm[embedding_key] = X_dim adata.uns[neighbor_key] = { "params": { "n_neighbors": n_neighbors, "method": reduction_method }, # "connectivities": "connectivities", # "distances": "distances", # "indices": "indices", } elif reduction_method == "diffusion_map": pass elif reduction_method.lower() == "tsne": try: from fitsne import FItSNE except ImportError: print( "Please first install fitsne to perform accelerated tSNE method. Install instruction is " "provided here: https://pypi.org/project/fitsne/") X_dim = FItSNE(X_data, nthreads=cores) # use FitSNE # bh_tsne = TSNE(n_components = n_components) # X_dim = bh_tsne.fit_transform(X) adata.obsm[embedding_key] = X_dim adata.uns[neighbor_key] = { "params": { "n_neighbors": n_neighbors, "method": reduction_method }, # "connectivities": "connectivities", # "distances": "distances", # "indices": "indices", } elif reduction_method == "umap": _umap_kwargs = { "n_components": n_components, "metric": "euclidean", "min_dist": 0.5, "spread": 1.0, "n_epochs": 0, "alpha": 1.0, "gamma": 1.0, "negative_sample_rate": 5, "init_pos": "spectral", "random_state": 0, "densmap": False, "dens_lambda": 2.0, "dens_frac": 0.3, "dens_var_shift": 0.1, "output_dens": False, "verbose": False, } umap_kwargs = update_dict(_umap_kwargs, kwargs) with warnings.catch_warnings(): warnings.simplefilter("ignore") ( mapper, graph, knn_indices, knn_dists, X_dim, ) = umap_conn_indices_dist_embedding(X_data, n_neighbors, **umap_kwargs) # X adata.obsm[embedding_key] = X_dim knn_dists = knn_to_adj(knn_indices, knn_dists) adata.uns[neighbor_key] = { "params": { "n_neighbors": n_neighbors, "method": reduction_method }, # "connectivities": "connectivities", # "distances": "distances", "indices": knn_indices, } layer = neighbor_key.split("_")[0] if neighbor_key.__contains__( "_") else None conn_key = "connectivities" if layer is None else layer + "_connectivities" dist_key = "distances" if layer is None else layer + "_distances" adata.obsp[conn_key], adata.obsp[dist_key] = graph, knn_dists adata.uns["umap_fit"] = { "fit": mapper, "n_pca_components": n_pca_components } elif reduction_method == "psl": adj_mat, X_dim = psl(X_data, d=n_components, K=n_neighbors) # this need to be updated adata.obsm[embedding_key] = X_dim adata.uns[neighbor_key] = adj_mat else: raise Exception( "reduction_method {} is not supported.".format(reduction_method)) return adata
import trimap import numpy from sklearn.datasets import fetch_openml mnist = fetch_openml(name="Fashion-MNIST") output = open(r"Trimap_2D.txt", "w") embedding = trimap.TRIMAP().fit_transform(mnist['data']) output.write("70000 2 \n") numpy.savetxt("Trimap_2D.txt", embedding)
# X = X / 255. # L = L[:10000] X = X[:n] L = L[:n].astype(int) print("Dataset size = ({},{})".format(X.shape[0], X.shape[1])) # y_trimap = trimap.TRIMAP(verbose=True).fit_transform(X) # # Outlier # index = 9423 # c = np.random.normal(size=X.shape[1]) # create a random direction # Xo = X.copy() # Xo[index,:] += 5.0 * c yo_trimap = trimap.TRIMAP(verbose=True, hub='mp3_gauss').fit_transform(X) # yo_trimap = umap.UMAP().fit_transform(X) plt.scatter(yo_trimap[:, 0], yo_trimap[:, 1], s=0.1, c=cols[L, :]) # plt.scatter(yo_trimap[index,0], yo_trimap[index,1], s=80, c='red', marker='x') plt.show() # yo_pca = PCA(n_components = 2).fit_transform(Xo) # plt.scatter(yo_pca[:,0], yo_pca[:,1], s=0.1, c=cols[L,:]) # plt.scatter(yo_pca[index,0], yo_pca[index,1], s=80, c='red', marker='x') # plt.show() # AUC auc = calculate_AUC(X, yo_trimap) print("AUC: ", auc)
def __init__(self, outdim=2, **kwargs): import trimap self.reducer = trimap.TRIMAP(n_dims=outdim)
def apply_panel_of_manifold_learning_methods(X,color, Color_by_branches=[],precomputed_results={}, color_map='cool',ColorByFeature='', variable_names=[],ElMapFolder='', n_neighbors=20, n_components = 2, title_fontsize = 30,points_size = 30, methods_to_apply=[], n_subplots_x = 4, n_subplots_y = 3, figsizex = 20, figsizey =20): viz_results = precomputed_results #Set figure parameters n_points = X.shape[0] #cmap = plt.cm.Paired #cmap = 'hot' cmap = color_map # cmap = plt.cm.tab20 plt.style.use('ggplot') fig = plt.figure(figsize=(figsizex, figsizey)) applyAllMethods = True if len(methods_to_apply)>0: applyAllMethods = False color1 = color if len(Color_by_branches)>0: #color1 = vec_labels_by_branches color2 = Color_by_branches color_seq = [[1,0,0],[0,1,0],[0,0,1],[0,1,1],[1,0,1],[1,1,0], [1,0,0.5],[1,0.5,0],[0.5,0,1],[0.5,1,0], [0.5,0.5,1],[0.5,1,0.5],[1,0.5,0.5], [0,0.5,0.5],[0.5,0,0.5],[0.5,0.5,0],[0.5,0.5,0.5],[0,0,0.5],[0,0.5,0],[0.5,0,0], [0,0.25,0.5],[0,0.5,0.25],[0.25,0,0.5],[0.25,0.5,0],[0.5,0,0.25],[0.5,0.25,0], [0.25,0.25,0.5],[0.25,0.5,0.25],[0.5,0.25,0.25],[0.25,0.25,0.5],[0.25,0.5,0.25], [0.25,0.25,0.5],[0.25,0.5,0.25],[0.5,0,0.25],[0.5,0.25,0.25]] color2_unique, color2_count = np.unique(color2, return_counts=True) inds = sorted(range(len(color2_count)), key=lambda k: color2_count[k], reverse=True) newc = [] for i,c in enumerate(color2): k = np.where(color2_unique==c)[0][0] count = color2_count[k] k1 = np.where(inds==k)[0][0] k1 = k1%len(color_seq) col = color_seq[k1] newc.append(col) color2 = newc color1 = color2 if not ColorByFeature=='': k = variable_names.index(ColorByFeature) #color1 = X_original[:,k] color1 = X[:,k] onlyDraw = not len(precomputed_results)==0 print('Start computations...') # some standard methods i = 1 ####################### PCA ######################### if applyAllMethods or 'PCA' in methods_to_apply: pca = PCA(n_components=n_components) t0 = time() if not onlyDraw or not 'PCA' in precomputed_results: Y_PCA = pca.fit_transform(X) viz_results['PCA'] = Y_PCA else: Y_PCA = precomputed_results['PCA'] t1 = time() print("PCA: %.2g sec" % (t1 - t0)) ax = fig.add_subplot(n_subplots_x,n_subplots_y,i) plt.scatter(Y_PCA[:, 0], Y_PCA[:, 1], c=color1, cmap=cmap,s=points_size) plt.title("PCA",fontdict = {'fontsize' : title_fontsize}) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') ### LLE ### if applyAllMethods or 'LLE' in methods_to_apply: t0 = time() if not onlyDraw or not 'LLE' in precomputed_results: print('Computing LLE...') Y_LLE = manifold.LocallyLinearEmbedding(n_neighbors=n_neighbors, n_components=n_components, eigen_solver='auto', method='standard').fit_transform(X) viz_results['LLE'] = Y_LLE else: Y_LLE = viz_results['LLE'] t1 = time() print("%s: %.2g sec" % ('LLE', t1 - t0)) i+=1 ax = fig.add_subplot(n_subplots_x,n_subplots_y,i) plt.scatter(Y_LLE[:, 0], Y_LLE[:, 1], c=color1, cmap=cmap,s=points_size) plt.title("LLE",fontdict = {'fontsize' : title_fontsize}) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') ### Modified LLE ### if applyAllMethods or 'MLLE' in methods_to_apply: t0 = time() if not onlyDraw or not 'MLLE' in precomputed_results: Y_MLLE = manifold.LocallyLinearEmbedding(n_neighbors=n_neighbors, n_components=n_components, eigen_solver='auto', method='modified').fit_transform(X) viz_results['MLLE'] = Y_MLLE else: Y_MLLE = viz_results['MLLE'] t1 = time() print("%s: %.2g sec" % ('Modified LLE', t1 - t0)) i+=1 ax = fig.add_subplot(n_subplots_x,n_subplots_y,i) plt.scatter(Y_MLLE[:, 0], Y_MLLE[:, 1], c=color1, cmap=cmap,s=points_size) plt.title("MLLE",fontdict = {'fontsize' : title_fontsize}) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') ### ISOMAP ### if applyAllMethods or 'ISOMAP' in methods_to_apply: i += 1 t0 = time() if not onlyDraw or not 'ISOMAP' in precomputed_results: Y_ISOMAP = manifold.Isomap(n_neighbors=n_neighbors, n_components=n_components).fit_transform(X) viz_results['ISOMAP'] = Y_ISOMAP else: Y_ISOMAP = viz_results['ISOMAP'] t1 = time() print("Isomap: %.2g sec" % (t1 - t0)) ax = fig.add_subplot(n_subplots_x,n_subplots_y,i) plt.scatter(Y_ISOMAP[:, 0], Y_ISOMAP[:, 1], c=color1, cmap=cmap,s=points_size) plt.title("Isomap",fontdict = {'fontsize' : title_fontsize}) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') ### MDS ### if applyAllMethods or 'MDS' in methods_to_apply: i += 1 t0 = time() if not onlyDraw or not 'MDS' in precomputed_results: mds = manifold.MDS(n_components, max_iter=100, n_init=1) Y_MDS = mds.fit_transform(X) viz_results['MDS'] = Y_MDS else: Y_MDS = viz_results['MDS'] t1 = time() print("MDS: %.2g sec" % (t1 - t0)) ax = fig.add_subplot(n_subplots_x,n_subplots_y,i) plt.scatter(Y_MDS[:, 0], Y_MDS[:, 1], c=color1, cmap=cmap,s=points_size) plt.title("MDS",fontdict = {'fontsize' : title_fontsize}) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') ### SpectralEmbedding ### if applyAllMethods or 'SE' in methods_to_apply: i += 1 t0 = time() if not onlyDraw or not 'SE' in precomputed_results: se = manifold.SpectralEmbedding(n_components=n_components,n_neighbors=n_neighbors) Y_se = se.fit_transform(X) viz_results['SE'] = Y_se else: Y_se = viz_results['SE'] t1 = time() print("SpectralEmbedding: %.2g sec" % (t1 - t0)) ax = fig.add_subplot(n_subplots_x,n_subplots_y,i) plt.scatter(Y_se[:, 0], Y_se[:, 1], c=color1, cmap=cmap,s=points_size) plt.title("SpectralEmbedding",fontdict = {'fontsize' : title_fontsize}) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') ### t-SNE ### if applyAllMethods or 'TSNE' in methods_to_apply: i += 1 t0 = time() if not onlyDraw or not 'TSNE' in precomputed_results: tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=0, perplexity=100) Y_TSNE = tsne.fit_transform(X) viz_results['TSNE'] = Y_TSNE else: Y_TSNE = viz_results['TSNE'] t1 = time() print("t-SNE: %.2g sec" % (t1 - t0)) ax = fig.add_subplot(n_subplots_x,n_subplots_y,i) plt.scatter(Y_TSNE[:, 0], Y_TSNE[:, 1], c=color1, cmap=cmap,s=points_size) plt.title("t-SNE",fontdict = {'fontsize' : title_fontsize}) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') ### UMAP ### if applyAllMethods or 'UMAP' in methods_to_apply: i += 1 t0 = time() if not onlyDraw or not 'UMAP' in precomputed_results: um = UMAP(n_neighbors=n_neighbors, n_components=n_components) Y_UMAP = um.fit_transform(X) viz_results['UMAP'] = Y_UMAP else: Y_UMAP = viz_results['UMAP'] t1 = time() print("UMAP: %.2g sec" % (t1 - t0)) ax = fig.add_subplot(n_subplots_x,n_subplots_y,i) plt.scatter(Y_UMAP[:, 0], Y_UMAP[:, 1], c=color1, cmap=cmap,s=points_size) plt.title("UMAP",fontdict = {'fontsize' : title_fontsize}) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') ### TRIMAP ### if applyAllMethods or 'TRIMAP' in methods_to_apply: t0 = time() if not onlyDraw or not 'TRIMAP' in precomputed_results: Y_TRIMAP = trimap.TRIMAP(verbose=False).fit_transform(X) viz_results['TRIMAP'] = Y_TRIMAP else: Y_TRIMAP = viz_results['TRIMAP'] t1 = time() print("TRIMAP: %.2g sec" % (t1 - t0)) i += 1 ax = fig.add_subplot(n_subplots_x,n_subplots_y,i) plt.scatter(Y_TRIMAP[:, 0], Y_TRIMAP[:, 1], c=color1, cmap=cmap,s=points_size) plt.title("TRIMAP",fontdict = {'fontsize' : title_fontsize}) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') ### MDE ### if applyAllMethods or 'MDE' in methods_to_apply: t0 = time() if not onlyDraw or not 'MDE' in precomputed_results: Y_MDE = pymde.preserve_neighbors(X, embedding_dim=2, verbose=False).embed() viz_results['MDE'] = Y_MDE else: Y_MDE = viz_results['MDE'] t1 = time() print("MDE: %.2g sec" % (t1 - t0)) i += 1 ax = fig.add_subplot(n_subplots_x,n_subplots_y,i) plt.scatter(Y_MDE[:, 0], Y_MDE[:, 1], c=color1, cmap=cmap,s=points_size) plt.title("MDE",fontdict = {'fontsize' : title_fontsize}) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') ### Autoencoder ### if applyAllMethods or 'AUTOENCODER' in methods_to_apply: layer_sizes = [64,32,16,8,4] #encoder inputs = Input(shape=(X.shape[1],), name='encoder_input') x = inputs for size in layer_sizes: x = Dense(size, activation='relu',kernel_initializer='he_uniform')(x) latent = Dense(n_components,kernel_initializer='he_uniform', name='latent_vector')(x) encoder = Model(inputs, latent, name='encoder') #decoder latent_inputs = Input(shape=(n_components,), name='decoder_input') x = latent_inputs for size in layer_sizes[::-1]: x = Dense(size, activation='relu',kernel_initializer='he_uniform')(x) outputs = Dense(X.shape[1] ,activation='sigmoid',kernel_initializer='he_uniform',name='decoder_output')(x) decoder = Model(latent_inputs, outputs, name='decoder') #autoencoder autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') #model summary # encoder.summary() # decoder.summary() # autoencoder.summary() X_01 = (X-X.min())/(X.max()-X.min()) autoencoder.compile(loss='mse', optimizer='adam') t0 = time() if not onlyDraw or not 'AUTOENCODER' in precomputed_results: autoencoder.fit(x=X_01,y=X_01,epochs=200,verbose=0) Y_AUTOENCODER = encoder.predict(X_01) viz_results['AUTOENCODER'] = Y_AUTOENCODER else: Y_AUTOENCODER = viz_results['AUTOENCODER'] t1 = time() print("Autoencoder: %.2g sec" % (t1 - t0)) i += 1 ax = fig.add_subplot(n_subplots_x,n_subplots_y,i) plt.scatter(Y_AUTOENCODER[:, 0], Y_AUTOENCODER[:, 1], c=color1, cmap=cmap,s=points_size) plt.title("Autoencoder",fontdict = {'fontsize' : title_fontsize}) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') ### VAE ### if applyAllMethods or 'VAE' in methods_to_apply: def sampling(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(n_components,)) return z_mean + K.exp(z_log_var) * epsilon layer_sizes = [64,32,16,8] #encoder inputs = Input(shape=(X.shape[1],), name='encoder_input') x = inputs for size in layer_sizes: x = Dense(size, activation='relu',kernel_initializer='he_uniform')(x) z_mean = Dense(n_components,kernel_initializer='he_uniform', name='latent_mean')(x) z_log_var = Dense(n_components,kernel_initializer='he_uniform', name='latent_sigma')(x) z = Lambda(sampling, output_shape=(n_components,))([z_mean, z_log_var]) encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') #decoder latent_inputs = Input(shape=(n_components,), name='decoder_input_sampling') x = latent_inputs for size in layer_sizes[::-1]: x = Dense(size, activation='relu',kernel_initializer='he_uniform')(x) outputs = Dense(X.shape[1] ,activation='sigmoid',kernel_initializer='he_uniform',name='decoder_output')(x) decoder = Model(latent_inputs, outputs, name='decoder') #autoencoder vae = Model(inputs, decoder(encoder(inputs)[2]), name='vae') def vae_loss(x, x_decoded_mean): xent_loss = K.mean(K.square((x- x_decoded_mean))) kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) #print(type(xent_loss)) #print(type(kl_loss)) #return K.sum(xent_loss,kl_loss) #return tf.convert_to_tensor(kl_loss) return xent_loss+kl_loss vae.compile(optimizer='adam', loss=vae_loss) X_01 = (X-X.min())/(X.max()-X.min()) #print(X_01) #X_01 = X.copy() t0 = time() if not onlyDraw or not 'VAE' in precomputed_results: vae.fit(x=X_01,y=X_01,epochs=200,verbose=0) Y_VAE = encoder.predict(X_01)[0] viz_results['VAE'] = Y_VAE else: Y_VAE = viz_results['VAE'] t1 = time() print("VAE: %.2g sec" % (t1 - t0)) i += 1 ax = fig.add_subplot(n_subplots_x,n_subplots_y,i) plt.scatter(Y_VAE[:, 0], Y_VAE[:, 1], c=color1, cmap=cmap) plt.title("VAE",fontdict = {'fontsize' : title_fontsize}) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') plt.tight_layout() return viz_results
def run_DR_Algorithm(name, data_features, data_target, int_dim=2): """ Runs each DR algorithm and returns the embedding. Parameters ---------- name : String start time data_features : nD array original features data_target : list original labels int_dim : integer intrinsic dimensionality Returns ---------- points : nD array embedding """ if name == "UMAP": reducer = umap.UMAP() points = reducer.fit_transform(data_features) plot_scatter(points, data_target) elif name == "tSNE": tsne = TSNE(n_components=int_dim, n_iter=1000, random_state=RANDOM_STATE) points = tsne.fit_transform(data_features) plot_scatter(points, data_target) elif name == "PCA": pca = PCA(n_components=int_dim) points = pca.fit_transform(data_features) plot_scatter(points, data_target) elif name == "Trimap": points = trimap.TRIMAP().fit_transform(data_features) plot_scatter(points, data_target) elif name == "FIt_SNE": points = fast_tsne(data_features, perplexity=50, seed=42) plot_scatter(points, data_target) elif name == "M_Core_tSNE": tsne = M_TSNE(n_jobs=4) points = tsne.fit_transform(data_features) plot_scatter(points, data_target) elif name == "dPCA": dpca = dPCA.dPCA(labels='st', n_components=int_dim) points = dpca.fit_transform(data_features) plot_scatter(points, data_target) elif name == "LTSA": ltsa = LocallyLinearEmbedding(n_neighbors=12, n_components=int_dim, method='ltsa') points = ltsa.fit_transform(data_features) plot_scatter(points, data_target) elif name == "MLLE": ltsa = LocallyLinearEmbedding(n_neighbors=6, n_components=int_dim, method='modified') points = ltsa.fit_transform(data_features) plot_scatter(points, data_target) elif name == "openTSNE": tsne = opTSNE( n_components=int_dim, perplexity=30, learning_rate=200, n_jobs=4, initialization="pca", metric="euclidean", early_exaggeration_iter=250, early_exaggeration=12, n_iter=750, neighbors="exact", negative_gradient_method="bh", ) points = tsne.fit(data_features) plot_scatter(points, data_target) elif name == "MDS": mds = MDS(n_components=int_dim) points = mds.fit_transform(data_features) plot_scatter(points, data_target) elif name == "Isomap": isomap = Isomap(n_components=int_dim) points = isomap.fit_transform(data_features) plot_scatter(points, data_target) elif name == "KernelPCA": kpca = KernelPCA(n_components=int_dim, kernel='linear') points = kpca.fit_transform(data_features) plot_scatter(points, data_target) elif name == "LLE": lle = LocallyLinearEmbedding(n_components=int_dim) points = lle.fit_transform(data_features) plot_scatter(points, data_target) elif name == "HessianLLE": lapeig = LocallyLinearEmbedding(n_neighbors=6, n_components=int_dim, method='hessian') points = lapeig.fit_transform(data_features) plot_scatter(points, data_target) elif name == "LEM": lapeig = SpectralEmbedding(n_components=int_dim) points = lapeig.fit_transform(data_features) plot_scatter(points, data_target) elif name == "LVis": outdim = int_dim threads = 24 samples = -1 prop = -1 alpha = -1 trees = -1 neg = -1 neigh = -1 gamma = -1 perp = -1 with open('largevis_input.txt', 'w') as out: out.write("{}\t{}\n".format(*data_features.as_matrix().shape)) for row in data_features.as_matrix(): out.write('\t'.join(row.astype(str)) + '\n') LargeVis.loadfile("largevis_input.txt") points = LargeVis.run(outdim, threads, samples, prop, alpha, trees, neg, neigh, gamma, perp) plot_scatter(np.array(points), data_target) return points
def do_trimap(): return trimap.TRIMAP(n_dims=embed_dimensions, verbose=False).fit_transform(X)
def reduceDimension(adata, n_pca_components=25, n_components=2, n_neighbors=10, reduction_method='trimap', velocity_key='velocity_S', cores=1): """Compute a low dimension reduction projection of an annodata object first with PCA, followed by non-linear dimension reduction methods Arguments --------- adata: :class:`~anndata.AnnData` an Annodata object n_pca_components: 'int' (optional, default 50) Number of PCA components. n_components: 'int' (optional, default 50) The dimension of the space to embed into. n_neighbors: 'int' (optional, default 10) Number of nearest neighbors when constructing adjacency matrix. reduction_method: 'str' (optional, default trimap) Non-linear dimension reduction method to further reduce dimension based on the top n_pca_components PCA components. Currently, PSL (probablistic structure learning, a new dimension reduction by us), tSNE (fitsne instead of traditional tSNE used) or umap are supported. velocity_key: 'str' (optional, default velocity_S) The dictionary key that corresponds to the estimated velocity values. cores: `int` (optional, default `1`) Number of cores. Used only when the tSNE reduction_method is used. Returns ------- Returns an updated `adata` with reduced dimension data for spliced counts, projected future transcript counts 'Y_dim' and adjacency matrix when possible. """ n_obs = adata.shape[0] if 'use_for_dynamo' in adata.var.keys(): X = adata.X[:, adata.var.use_for_dynamo.values] if velocity_key is not None: X_t = adata.X[:, adata.var.use_for_dynamo.values] + adata.layers[ velocity_key][:, adata.var.use_for_dynamo.values] else: X = adata.X if velocity_key is not None: X_t = adata.X + adata.layers[velocity_key] if ((not 'X_pca' in adata.obsm.keys()) or 'pca_fit' not in adata.uns.keys()) or reduction_method is "pca": transformer = TruncatedSVD(n_components=n_pca_components + 1, random_state=0) X_fit = transformer.fit(X) X_pca = X_fit.transform(X)[:, 1:] adata.obsm['X_pca'] = X_pca if velocity_key is not None and "_velocity_pca" not in adata.obsm.keys( ): X_t_pca = X_fit.transform(X_t)[:, 1:] adata.obsm['_velocity_pca'] = X_t_pca - X_pca else: X_pca = adata.obsm['X_pca'][:, :n_pca_components] if velocity_key is not None and "_velocity_pca" not in adata.obsm.keys( ): X_t_pca = adata.uns['pca_fit'].fit_transform(X_t) adata.obsm['_velocity_pca'] = X_t_pca[:, 1:(n_pca_components + 1)] - X_pca adata.obsm['X_pca'] = X_pca if reduction_method is "trimap": import trimap triplemap = trimap.TRIMAP( n_inliers=20, n_outliers=10, n_random=10, distance='angular', # cosine weight_adj=1000.0, apply_pca=False) X_dim = triplemap.fit_transform(X_pca) adata.obsm['X_trimap'] = X_dim adata.uns['neighbors'] = {'params': {'n_neighbors': n_neighbors, 'method': reduction_method}, 'connectivities': None, \ 'distances': None, 'indices': None} elif reduction_method is 'tSNE': try: from fitsne import FItSNE except ImportError: print( 'Please first install fitsne to perform accelerated tSNE method. Install instruction is provided here: https://pypi.org/project/fitsne/' ) X_dim = FItSNE(X_pca, nthreads=cores) # use FitSNE # bh_tsne = TSNE(n_components = n_components) # X_dim = bh_tsne.fit_transform(X_pca) adata.obsm['X_tSNE'] = X_dim adata.uns['neighbors'] = {'params': {'n_neighbors': n_neighbors, 'method': reduction_method}, 'connectivities': None, \ 'distances': None, 'indices': None} elif reduction_method is 'umap': with warnings.catch_warnings(): warnings.simplefilter("ignore") graph, knn_indices, knn_dists, X_dim = umap_conn_indices_dist_embedding( X_pca) # X_pca adata.obsm['X_umap'] = X_dim adata.uns['neighbors'] = {'params': {'n_neighbors': n_neighbors, 'method': reduction_method}, 'connectivities': graph, \ 'distances': knn_dists, 'indices': knn_indices} elif reduction_method is 'psl': adj_mat, X_dim = psl_py(X_pca, d=n_components, K=n_neighbors) # this need to be updated adata.obsm['X_psl'] = X_dim adata.uns['PSL_adj_mat'] = adj_mat else: raise Exception( 'reduction_method {} is not supported.'.format(reduction_method)) return adata