def __init__(self, matrix=None, latent=None, name="Dim", barcodes=None): """ matrix = input matrix to save as metadata (optional) latent = n_cells x n_features matrix containing latent space output from DR method name = name of DR method for plotting and metadata barcodes = pd.DataFrame containing cell barcodes. Header of cell barcode column should be named 'Barcode'. """ self.input = pd.DataFrame(matrix) # store input matrix as metadata self.name = ( name # store placeholder name of DR technique for plotting and metadata ) if latent is not None: self.results = np.ascontiguousarray( latent ) # if initiating DR object from results matrix, create results attribute self.clu = Cluster( self.results, autoplot=False ) # get density-peak cluster information for results to use for plotting if barcodes is not None: if isinstance(barcodes, pd.DataFrame): self.barcodes = barcodes.iloc[:, 0] # maintain given barcode information as pd.Series else: self.barcodes = barcodes else: self.barcodes = None
def __init__(self, matrix, perplexity, seed=None, barcodes=None, **kwargs): DR.__init__(self, matrix=matrix, barcodes=barcodes) # inherits from DR object self.name = "UMAP" self.perplexity = perplexity self.fit = UMAP(n_neighbors=self.perplexity, random_state=seed, **kwargs).fit(self.input) self.results = self.fit.fit_transform(self.input) self.clu = Cluster(self.results.astype("double"), autoplot=False)
def __init__(self, matrix, n_components, barcodes=None): DR.__init__(self, matrix=matrix, barcodes=barcodes) # inherits from DR object self.name = "PC" self.components = n_components # store number of components as metadata self.fit = PCA(n_components=self.components).fit( self.input) # fit PCA to data self.results = self.fit.transform(self.input) # transform data to fit self.clu = Cluster( self.results, autoplot=False ) # get density-peak cluster information for results to use for plotting
def __init__(self, matrix, perplexity, seed=None, barcodes=None, **kwargs): DR.__init__(self, matrix=matrix, barcodes=barcodes) # inherits from DR object self.name = "t-SNE" self.perplexity = perplexity # store tSNE perplexity as metadata self.fit = TSNE(perplexity=self.perplexity, random_state=seed, **kwargs).fit(self.input) self.results = self.fit.fit_transform(self.input) self.clu = Cluster( self.results.astype("double"), autoplot=False ) # get density-peak cluster information for results to use for plotting
def setup_class(cls): # data generation cls.npoints = 1000 cls.mux = 1.8 cls.muy = 1.8 cls.fraction = 0.02 cls.points = np.zeros(shape=(cls.npoints, 2), dtype=np.float64) cls.points[:, 0] = np.random.randn(cls.npoints) + \ cls.mux * (-1)**np.random.randint(0, high=2, size=cls.npoints) cls.points[:, 1] = np.random.randn(cls.npoints) + \ cls.muy * (-1)**np.random.randint(0, high=2, size=cls.npoints) # cluster initialisation cls.dpc = Cluster(cls.points, cls.fraction, autoplot=False) cls.dpc.assign(20, 1.5)
def __init__( self, matrix, mode="latent", hidden_size=(64, 32, 64), norm=True, seed=None, barcodes=None, n_threads=2, ): """ mode = 'latent' to return n-dimensional latent space from hidden layer of autoencoder hidden_size = size of layers for encoder (m, n, p), where n determines number of dimensions of latent space in 'latent' mode norm = normalize output of DCA? seed = random number generator seed for reproducible result n_threads = parallelization of training (# of cores) """ DR.__init__(self, matrix=matrix, barcodes=barcodes) # inherits from DR object self.name = "DCA" self.DCA_norm = norm # store normalization decision as metadata self.adata = sc.AnnData( matrix ) # generate AnnData object (https://github.com/theislab/scanpy) for passing to DCA sc.pp.filter_genes( self.adata, min_counts=1) # remove features with 0 counts for all cells dca.api.dca( self.adata, mode=mode, threads=n_threads, random_state=seed, hidden_size=hidden_size, normalize_per_cell=False, ) # perform DCA analysis on AnnData object if self.DCA_norm: sc.pp.normalize_per_cell( self.adata ) # normalize features for each cell with scanpy's method sc.pp.log1p(self.adata) # log-transform data with scanpy's method if mode == "latent": self.results = self.adata.obsm[ "X_dca"] # return latent space as np.ndarray elif mode == "denoise": self.results = self.adata.X # return the denoised data as a np.ndarray self.clu = Cluster(self.results.astype("double"), autoplot=False)
def __init__(self, matrix, perplexity, seed=-1, barcodes=None, clean_workspace=True): DR.__init__(self, matrix=matrix, barcodes=barcodes) # inherits from DR object self.name = "FIt-SNE" self.perplexity = perplexity # store tSNE perplexity as metadata self.results = fast_tsne(self.input, perplexity=self.perplexity, seed=seed) self.clu = Cluster( self.results.astype("double"), autoplot=False ) # get density-peak cluster information for results to use for plotting if clean_workspace: # get rid of files used by C++ to run FFT t-SNE os.remove("data.dat") os.remove("result.dat")
def runDPC(self, dr, x_cutoff, y_cutoff, force_rerun=False): if ((self.DPC == None) or (force_rerun == True)): self.DPC = Cluster(dr.astype('float64')) self.DPC.assign(x_cutoff, y_cutoff)
class gate_visualize(object): def __init__(self, dr_in, subset=False): self.lib_values = dr_in.lib_values self.lib_size = dr_in.lib_size self.lib_rank = dr_in.lib_rank self.PCA = dr_in.PCA self.UMAP = dr_in.UMAP self.TSNE = dr_in.TSNE self.seed = dr_in.seed self.lib_geneID = dr_in.lib_geneID self.DPC = None def runDPC(self, dr, x_cutoff, y_cutoff, force_rerun=False): if ((self.DPC == None) or (force_rerun == True)): self.DPC = Cluster(dr.astype('float64')) self.DPC.assign(x_cutoff, y_cutoff) def plotDPC(self): fig = plt.figure(figsize=(30, 10)) ax1 = fig.add_subplot(131) ax2 = fig.add_subplot(132) ax3 = fig.add_subplot(133) ax1.scatter(self.UMAP[:, 0], self.UMAP[:, 1], c=self.DPC.membership, cmap='gist_rainbow', s=10) ax2.scatter(self.UMAP[:, 0], self.UMAP[:, 1], c=self.lib_size, cmap='seismic', s=10) ax3.scatter(self.UMAP[:, 0], self.UMAP[:, 1], c=self.lib_rank, cmap='seismic', s=10) #plt.colorbar(p,ax=ax3) color = plt.cm.gist_rainbow(np.linspace(0, 1, len(self.DPC.clusters))) for i in range(len(self.DPC.clusters)): x = self.UMAP[self.DPC.clusters[i], 0] y = self.UMAP[self.DPC.clusters[i], 1] text = ax1.text(x, y, i, fontsize=15, color='black', horizontalalignment='center', verticalalignment='center', weight='bold', bbox=dict(facecolor='white', edgecolor='black', boxstyle='Circle', pad=0.1, alpha=.5)) ax1.set_title("Density Peak Clusters", size=15, weight="bold") ax2.set_title("Library Size (Blue = Low Quality)", size=15, weight="bold") ax3.set_title("Ranked Library Sizes (Blue = Low Quality)", size=15, weight="bold") def plotGenes(self, feature_list, embedding="UMAP"): self.lib_geneID = pd.Series(self.lib_geneID) feature_inds = [] gene_overlays = [] for features in feature_list: feature_inds.append( np.where(self.lib_geneID.str.contains(features))[0]) for features in feature_inds: gene_overlays.append( np.array(np.sum(self.lib_values[:, features], axis=1)).flatten()) if (embedding == "UMAP"): fig = plt.figure(figsize=(30, 30)) for i in range(len(gene_overlays)): plt.subplot(3, 3, i + 1) plt.scatter(self.UMAP[:, 0], self.UMAP[:, 1], c=gene_overlays[i], cmap='hot', s=20) plt.title(feature_list[i], size=15, weight="bold") if (embedding == "TSNE"): fig = plt.figure(figsize=(30, 30)) for i in range(len(gene_overlays)): plt.subplot(3, 3, i + 1) plt.scatter(self.TSNE[:, 0], self.TSNE[:, 1], c=gene_overlays[i], cmap='hot', s=20) plt.title(feature_list[i], size=15, weight="bold") def manual_gating(self, gate_out): #embedding = "UMAP" color = plt.cm.gist_rainbow(np.linspace(0, 1, len(self.DPC.clusters))) clust_inds = np.delete( np.arange(0, len(self.DPC.membership), 1), gate_out) # clusters that represent cells to keep cluster_ids = np.delete(np.arange(0, len(self.DPC.clusters), 1), gate_out) clust_mask = np.isin(self.DPC.membership, clust_inds) gate_out_inds = np.where(clust_mask == False) gated_embedding = self.UMAP[clust_mask] fig = plt.figure(figsize=(10, 10)) ax1 = fig.add_subplot(111) ax1.scatter(gated_embedding[:, 0], gated_embedding[:, 1], alpha=1, s=20, c=self.DPC.membership[clust_mask], cmap='gist_rainbow') ax1.scatter(self.UMAP[gate_out_inds, 0], self.UMAP[gate_out_inds, 1], alpha=0.5, s=20, c='gray') for i in range(len(self.DPC.clusters[cluster_ids])): x = self.UMAP[self.DPC.clusters[cluster_ids][i], 0] y = self.UMAP[self.DPC.clusters[cluster_ids][i], 1] text = ax1.text(x, y, i, fontsize=15, color='black', horizontalalignment='center', verticalalignment='center', weight='bold', bbox=dict(facecolor='white', edgecolor='black', boxstyle='Circle', pad=0.1, alpha=.5)) return (np.where(clust_mask)[0])
def __init__(self, matrix, K, barcodes=None): DR.__init__(self, matrix=matrix, barcodes=barcodes) # inherits from DR object self.name = "ZIFA" self.results, self.model_params = block_ZIFA.fitModel(matrix, K) self.clu = Cluster(self.results.astype("double"), autoplot=False)
# fig, ax = plt.subplots(figsize=(5, 5)) # ax.scatter(points[:, 0], points[:, 1], s=40) # ax.plot([-mux, -mux], [-1.5 * muy, 1.5 * muy], '--', linewidth=2, color="red") # ax.plot([mux, mux], [-1.5 * muy, 1.5 * muy], '--', linewidth=2, color="red") # ax.plot([-1.5 * mux, 1.5 * mux], [-muy, -muy], '--', linewidth=2, color="red") # ax.plot([-1.5 * mux, 1.5 * mux], [muy, muy], '--', linewidth=2, color="red") # ax.set_xlabel(r"x / a.u.", fontsize=20) # ax.set_ylabel(r"y / a.u.", fontsize=20) # ax.tick_params(labelsize=15) # ax.set_xlim([-7, 7]) # ax.set_ylim([-7, 7]) # ax.set_aspect('equal') # fig.tight_layout() # plt.show() clu = Cluster(points) clu.assign(50, 150) fig, ax = plt.subplots(1, 3, figsize=(15, 5)) ax[0].scatter(points[:, 0], points[:, 1], s=1) ax[0].scatter(points[clu.clusters, 0], points[clu.clusters, 1], s=10, c="red") ax[1].scatter(points[:, 0], points[:, 1], s=1, c=clu.density) ax[2].scatter(points[:, 0], points[:, 1], s=1, c=clu.membership, cmap=mpl.cm.cool) # for _ax in ax: # _ax.plot([-mux, -mux], [-1.5 * muy, 1.5 * muy], '--', linewidth=2, color="red") # _ax.plot([mux, mux], [-1.5 * muy, 1.5 * muy], '--', linewidth=2, color="red")