def plot_pca(self, vlm): vcy.scatter_viz(vlm.pcs[:,0], vlm.pcs[:,1], c=vlm.colorandum, s=7.5) for i in list(set(vlm.ca["ClusterName"])): ts_m = np.median(vlm.pcs[vlm.ca["ClusterName"] == i, :], 0) plt.text(ts_m[0], ts_m[1], str(vlm.cluster_labels[vlm.ca["ClusterName"] == i][0]), fontsize=13, bbox={"facecolor":"w", "alpha":0.6}) plt.axis("off");
def marker_gene_plot(self, vlm, cluster, gene_df, bad_gene_list=[], cols=2, figsize=(16.5,7.5), num_genes=6, dpi=120): gene_list = [] for gene in list(gene_df.loc[gene_df['cluster'] == cluster].index): if gene in vlm.ra["Gene"] and gene not in bad_gene_list: gene_list.append(gene) if len(gene_list) == num_genes: break if num_genes % cols != 0: rows = int(num_genes/cols) + 1 else: rows = int(num_genes/cols) plt.figure(None, figsize, dpi=dpi) gs = plt.GridSpec(rows,cols*3) for i, gn in enumerate(gene_list): ax = plt.subplot(gs[i*3]) try: ix=np.where(vlm.ra["Gene"] == gn)[0][0] except: continue vcy.scatter_viz(vlm.Sx_sz[ix,:], vlm.Ux_sz[ix,:], c=vlm.colorandum, s=5, alpha=0.4, rasterized=True) plt.title(gn) xnew = np.linspace(0,vlm.Sx[ix,:].max()) plt.plot(xnew, vlm.gammas[ix] * xnew + vlm.q[ix], c="k") plt.ylim(0, np.max(vlm.Ux_sz[ix,:])*1.02) plt.xlim(0, np.max(vlm.Sx_sz[ix,:])*1.02) self.minimal_yticks(0, np.max(vlm.Ux_sz[ix,:])*1.02) self.minimal_xticks(0, np.max(vlm.Sx_sz[ix,:])*1.02) self.despline() vlm.plot_velocity_as_color(gene_name=gn, gs=gs[i*3+1], s=3, rasterized=True) vlm.plot_expression_as_color(gene_name=gn, gs=gs[i*3+2], s=3, rasterized=True) plt.tight_layout()
def locate_source_sink(self, vlm, figsize=(14,7), dpi=100): steps = 100, 100 grs = [] for dim_i in range(vlm.embedding.shape[1]): m, M = np.min(vlm.embedding[:, dim_i]), np.max(vlm.embedding[:, dim_i]) m = m - 0.025 * np.abs(M - m) M = M + 0.025 * np.abs(M - m) gr = np.linspace(m, M, steps[dim_i]) grs.append(gr) meshes_tuple = np.meshgrid(*grs) gridpoints_coordinates = np.vstack([i.flat for i in meshes_tuple]).T nn = NearestNeighbors() nn.fit(vlm.embedding) dist, ixs = nn.kneighbors(gridpoints_coordinates, 1) diag_step_dist = np.sqrt((meshes_tuple[0][0,0] - meshes_tuple[0][0,1])**2 + (meshes_tuple[1][0,0] - meshes_tuple[1][1,0])**2) min_dist = diag_step_dist / 2 ixs = ixs[dist < min_dist] gridpoints_coordinates = gridpoints_coordinates[dist.flat[:]<min_dist,:] dist = dist[dist < min_dist] ixs = np.unique(ixs) vlm.prepare_markov(sigma_D=diag_step_dist, sigma_W=diag_step_dist/2., direction='forward', cells_ixs=ixs) vlm.run_markov(starting_p=np.ones(len(ixs)), n_steps=2500) diffused_n = vlm.diffused - np.percentile(vlm.diffused, 3) diffused_n /= np.percentile(diffused_n, 97) diffused_n = np.clip(diffused_n, 0, 1) fig, ax = plt.subplots(1,2,figsize=figsize, dpi=dpi) plt.subplot(121) vcy.scatter_viz(vlm.embedding[ixs, 0], vlm.embedding[ixs, 1], c=diffused_n, alpha=0.5, s=50, lw=0., edgecolor="", cmap="viridis_r", rasterized=True) plt.title("Sinks") plt.axis("off"); vlm.prepare_markov(sigma_D=diag_step_dist, sigma_W=diag_step_dist/2., direction='backwards', cells_ixs=ixs) vlm.run_markov(starting_p=np.ones(len(ixs)), n_steps=2500) diffused_n = vlm.diffused - np.percentile(vlm.diffused, 3) diffused_n /= np.percentile(diffused_n, 97) diffused_n = np.clip(diffused_n, 0, 1) plt.subplot(122) vcy.scatter_viz(vlm.embedding[ixs, 0], vlm.embedding[ixs, 1], c=diffused_n, alpha=0.5, s=50, lw=0., edgecolor="", cmap="viridis_r", rasterized=True) plt.title("Sources") plt.axis("off");
def plot_doublets_tsne(self, vlm): vcy.scatter_viz(vlm.ts[:, 0], vlm.ts[:, 1], c=vlm.ca['doublet_predictions'], s=7.5, cmap=self.truncate_colormap(plt.get_cmap('binary'), 0.2)) for i in list(set(vlm.ca["ClusterName"])): ts_m = np.median(vlm.ts[vlm.ca["ClusterName"] == i, :], 0) plt.text(ts_m[0], ts_m[1], str(vlm.cluster_labels[vlm.ca["ClusterName"] == i][0]), fontsize=13, bbox={ "facecolor": "w", "alpha": 0.6 })
def calculate_gammas(vlm): #vlm.normalize("S", size=True, log=False) #vlm.normalize("U", size=True, log=False) vlm.knn_imputation(k = 1) vlm.normalize_median() vlm.fit_gammas(limit_gamma=False, fit_offset=False) plt.figure(None, (17,2.8), dpi=80) gs = plt.GridSpec(1,6) for i, gn in enumerate(vlm.ra["Gene"][1:5]): ax = plt.subplot(gs[i]) try: ix=np.where(vlm.ra["Gene"] == gn)[0][0] except: continue vcy.scatter_viz(vlm.S[ix,:], vlm.U[ix,:], s=5, alpha=0.4, rasterized=True) plt.title(gn) xnew = np.linspace(0,vlm.S[ix,:].max()) plt.plot(xnew, vlm.gammas[ix] * xnew + vlm.q[ix], c="k") plt.ylim(0, np.max(vlm.U[ix,:])*1.02) plt.xlim(0, np.max(vlm.S[ix,:])*1.02) #minimal_yticks(0, np.max(vlm.U[ix,:])*1.02) #minimal_xticks(0, np.max(vlm.S[ix,:])*1.02) plt.savefig(sys.argv[1].split(".loom")[0] + "_gn" + "_gamma.png")
vlm.ts = vlm.ca['umap'] cluster_list = vlm.ca["cluster"] #Use correlation to estimate transition probabilities for every cells to its embedding neighborhood vlm.estimate_transition_prob(hidim="Sx_sz", embed="ts", transform="sqrt", psc=1, n_neighbors=2000, knn_random=True, sampled_fraction=0.5) #Use the transition probability to project the velocity direction on the embedding vlm.calculate_embedding_shift(sigma_corr = 0.1, expression_scaling=True) #plot umap with cluster labels plt.figure(figsize=(10,10)) vcy.scatter_viz(vlm.ts[:,0], vlm.ts[:,1], c=vlm.colorandum, s=2) for i in range(len(cluster_list)): ts_m = np.median(vlm.ts[vlm.ca["cluster"] == cluster_list[i], :], 0) plt.text(ts_m[0], ts_m[1], str(vlm.cluster_labels[vlm.ca["cluster"] == cluster_list[i]][0]), fontsize=13, bbox={"facecolor":"w", "alpha":0.6}) plt.savefig(os.path.join(out_dir,"velocity_label_plot.png")) #Calculate the velocity using a points on a regular grid and a gaussian kernel vlm.calculate_grid_arrows(smooth=0.5, steps=(40, 40), n_neighbors=100) #vlm.flow = vlm.flow*50 #plt.figure(None,(20,10)) plt.figure(figsize=(10,10)) vlm.plot_grid_arrows(quiver_scale=0.1,
def pc_plot(vlm): vlm.perform_PCA() plt.figure(None, (17,3.5)) vcy.scatter_viz(vlm.pcs[:,0], vlm.pcs[:,1], s=10) plt.xlabel("PC1"); plt.ylabel("PC2") plt.savefig(sys.argv[1].split(".loom")[0] + "_pca.png")
dist, ixs = nn.kneighbors(gridpoints_coordinates, 1) diag_step_dist = np.sqrt((meshes_tuple[0][0, 0] - meshes_tuple[0][0, 1])**2 + (meshes_tuple[1][0, 0] - meshes_tuple[1][1, 0])**2) min_dist = diag_step_dist / 2 ixs = ixs[dist < min_dist] gridpoints_coordinates = gridpoints_coordinates[dist.flat[:] < min_dist, :] dist = dist[dist < min_dist] ixs = np.unique(ixs) plt.figure(None, (8, 8)) vcy.scatter_viz(vlm.embedding[ixs, 0], vlm.embedding[ixs, 1], c=vlm.colorandum[ixs], alpha=1, s=30, lw=0.4, edgecolor="0.4") vlm.prepare_markov(sigma_D=diag_step_dist, sigma_W=diag_step_dist / 2., direction='forward', cells_ixs=ixs) vlm.run_markov(starting_p=np.ones(len(ixs)), n_steps=2500) diffused_n = vlm.diffused - np.percentile(vlm.diffused, 3) diffused_n /= np.percentile(diffused_n, 97) diffused_n = np.clip(diffused_n, 0, 1)
'OPC': np.array([0.61, 0.13, 0.72352941]), 'nIPC': np.array([0.9, 0.8, 0.3]), 'Nbl1': np.array([0.7, 0.82, 0.6]), 'Nbl2': np.array([0.448, 0.85490196, 0.95098039]), 'ImmGranule1': np.array([0.35, 0.4, 0.82]), 'ImmGranule2': np.array([0.23, 0.3, 0.7]), 'Granule': np.array([0.05, 0.11, 0.51]), 'CA': np.array([0.2, 0.53, 0.71]), 'CA1-Sub': np.array([0.1, 0.45, 0.3]), 'CA2-3-4': np.array([0.3, 0.35, 0.5]) } vlm.set_clusters(vlm.ca["ClusterName"], cluster_colors_dict=colors_dict) # Plot TSNE plt.figure(figsize=(10, 10)) vcy.scatter_viz(vlm.ts[:, 0], vlm.ts[:, 1], c=vlm.colorandum, s=2) for i in range(max(vlm.ca["Clusters"])): ts_m = np.median(vlm.ts[vlm.ca["Clusters"] == i, :], 0) plt.text(ts_m[0], ts_m[1], str(vlm.cluster_labels[vlm.ca["Clusters"] == i][0]), fontsize=13, bbox={ "facecolor": "w", "alpha": 0.6 }) plt.axis("off") vlm.plot_fractions() #Velocity Analysis vlm.filter_cells(bool_array=vlm.initial_Ucell_size > np.percentile(
plt.savefig(print_dir + "raw_velocity.pdf") # %% velocity tsne ind.calculate_grid_arrows(smooth=0.8, steps=(30, 30), n_neighbors=300) f, ax = plt.subplots(1,1, figsize=(10,10)) ind.plot_grid_arrows(quiver_scale=0.05, scatter_kwargs_dict={"alpha":0.35, "lw":0.35, "edgecolor":"0.4", "s":38, "rasterized":True}, min_mass=1, angles='xy', scale_units='xy', headaxislength=2.75, headlength=5, headwidth=4.8, minlength=0.35, plot_random=False, scale_type='absolute') plt.axis("off"); plt.savefig(print_dir + "average_velocity.pdf") # %%tsne with cluster names f, ax = plt.subplots(1,1, figsize=(10,10)) vcy.scatter_viz(ind.ts[:,0], ind.ts[:,1], c=ind.colorandum, s=7.5) for i in list(set(ind.ca["ClusterName"])): ts_m = np.median(ind.ts[ind.ca["ClusterName"] == i, :], 0) plt.text(ts_m[0], ts_m[1], str(ind.cluster_labels[ind.ca["ClusterName"] == i][0]), fontsize=13, bbox={"facecolor":"w", "alpha":0.6}) plt.axis("off"); ################################################################################ # Pseudotime projection # ################################################################################ #make a copy of ind object for pseudotime Analysis ind_pseudotime = deepcopy(ind) def array_to_rmatrix(X): nr, nc = X.shape
gene_list = [ 'Pdgfra', 'Cspg4', 'Olig1', 'Olig2', "Chd8", "Smarca4", ] for i, gene in enumerate(gene_list): plt.subplot(gs[i]) this_colorandum = Sx_sz[np.where(genenames == gene)[0][0], :] vcy.scatter_viz(vlm.ts[:, 0], vlm.ts[:, 1], c=this_colorandum, cmap="magma_r", alpha=0.35, s=3, rasterized=True) plt.title(gene) plt.axis("off") #plt.savefig("../figures/Haber_cellcycle_genes.pdf") plt.savefig('gene.pdf') ############# plt.figure(None, (5, 8)) gs = plt.GridSpec(3, 2) gene_list = [
plt.figure(None, (20, 10)) vlm.plot_grid_arrows(quiver_scale=3.0, plot_random=True, scale_type="relative") plt.savefig("vectorfield.pdf") genes = ["Pdpn", "Hopx", "Emp2", "Trp53", "Top2a", "Aqp5", "Rtkn2", "Ager"] plt.figure(None, (17, 24), dpi=80) gs = plt.GridSpec(10, 6) for i, gn in enumerate(genes): ax = plt.subplot(gs[i * 3]) try: ix = np.where(vlm.ra["Gene"] == gn)[0][0] except: continue vcy.scatter_viz(vlm.Sx_sz[ix, :], vlm.Ux_sz[ix, :], c=vlm.colorandum, s=5, alpha=0.4, rasterized=True) plt.title(gn) xnew = np.linspace(0, vlm.Sx[ix, :].max()) plt.plot(xnew, vlm.gammas[ix] * xnew + vlm.q[ix], c="k") plt.ylim(0, np.max(vlm.Ux_sz[ix, :]) * 1.02) plt.xlim(0, np.max(vlm.Sx_sz[ix, :]) * 1.02) minimal_yticks(0, np.max(vlm.Ux_sz[ix, :]) * 1.02) minimal_xticks(0, np.max(vlm.Sx_sz[ix, :]) * 1.02) despline() vlm.plot_velocity_as_color(gene_name=gn, gs=gs[i * 3 + 1], s=3, rasterized=True)
def plot_genes_velocity(vlm, genes): """This function plots the distribution of spliced and unspliced counts for a given gene, as well as the estimated steads state, and velocity as color on the embedding. Parameters -------- vlm: VelocytoLoom object genes: list list of genes to consider """ # visualise n_genes = len(genes) n_row = np.int(np.ceil(n_genes / 2)) n_col = 6 plt.figure(None, (17, 2.8 * n_row), dpi=80) gs = plt.GridSpec(n_row, n_col) for i, gn in enumerate(genes): ax = plt.subplot(gs[i * 3]) try: ix = np.where(vlm.ra["Gene"] == gn)[0][0] except: continue # make a scatter plot of spliced and unspliced counts vcy.scatter_viz(vlm.Sx_sz[ix, :], vlm.Ux_sz[ix, :], c=vlm.colorandum, s=5, alpha=0.4, rasterized=True) plt.title(gn) plt.xlabel('Spliced') plt.ylabel('Unspliced') # add the trend showing the estimated steadt state xnew = np.linspace(0, vlm.Sx[ix, :].max()) plt.plot(xnew, vlm.gammas[ix] * xnew + vlm.q[ix], c='k') # change the axis limits plt.ylim(0, np.max(vlm.Ux_sz[ix, :]) * 1.02) plt.xlim(0, np.max(vlm.Sx_sz[ix, :]) * 1.02) # have fewer ticks on the yaxis minimal_yticks(0, np.max(vlm.Ux_sz[ix, :]) * 1.02) minimal_xticks(0, np.max(vlm.Sx_sz[ix, :]) * 1.02) # get rid of the top and right axis despline() # plot velocoties vlm.plot_velocity_as_color(gene_name=gn, gs=gs[i * 3 + 1], s=3, rasterized=True) vlm.plot_expression_as_color(gene_name=gn, gs=gs[i * 3 + 2], s=3, rasterized=True) plt.tight_layout()