Ejemplo n.º 1
0
    def __init__(self, matrix=None, latent=None, name="Dim", barcodes=None):
        """
        matrix = input matrix to save as metadata (optional)
        latent = n_cells x n_features matrix containing latent space output from DR method
        name = name of DR method for plotting and metadata
        barcodes = pd.DataFrame containing cell barcodes. Header of cell barcode column should be named 'Barcode'.
        """
        self.input = pd.DataFrame(matrix)  # store input matrix as metadata
        self.name = (
            name  # store placeholder name of DR technique for plotting and metadata
        )

        if latent is not None:
            self.results = np.ascontiguousarray(
                latent
            )  # if initiating DR object from results matrix, create results attribute
            self.clu = Cluster(
                self.results, autoplot=False
            )  # get density-peak cluster information for results to use for plotting

        if barcodes is not None:
            if isinstance(barcodes, pd.DataFrame):
                self.barcodes = barcodes.iloc[:,
                                              0]  # maintain given barcode information as pd.Series
            else:
                self.barcodes = barcodes

        else:
            self.barcodes = None
Ejemplo n.º 2
0
 def __init__(self, matrix, perplexity, seed=None, barcodes=None, **kwargs):
     DR.__init__(self, matrix=matrix,
                 barcodes=barcodes)  # inherits from DR object
     self.name = "UMAP"
     self.perplexity = perplexity
     self.fit = UMAP(n_neighbors=self.perplexity,
                     random_state=seed,
                     **kwargs).fit(self.input)
     self.results = self.fit.fit_transform(self.input)
     self.clu = Cluster(self.results.astype("double"), autoplot=False)
Ejemplo n.º 3
0
 def __init__(self, matrix, n_components, barcodes=None):
     DR.__init__(self, matrix=matrix,
                 barcodes=barcodes)  # inherits from DR object
     self.name = "PC"
     self.components = n_components  # store number of components as metadata
     self.fit = PCA(n_components=self.components).fit(
         self.input)  # fit PCA to data
     self.results = self.fit.transform(self.input)  # transform data to fit
     self.clu = Cluster(
         self.results, autoplot=False
     )  # get density-peak cluster information for results to use for plotting
Ejemplo n.º 4
0
 def __init__(self, matrix, perplexity, seed=None, barcodes=None, **kwargs):
     DR.__init__(self, matrix=matrix,
                 barcodes=barcodes)  # inherits from DR object
     self.name = "t-SNE"
     self.perplexity = perplexity  # store tSNE perplexity as metadata
     self.fit = TSNE(perplexity=self.perplexity,
                     random_state=seed,
                     **kwargs).fit(self.input)
     self.results = self.fit.fit_transform(self.input)
     self.clu = Cluster(
         self.results.astype("double"), autoplot=False
     )  # get density-peak cluster information for results to use for plotting
Ejemplo n.º 5
0
 def setup_class(cls):
     # data generation
     cls.npoints = 1000
     cls.mux = 1.8
     cls.muy = 1.8
     cls.fraction = 0.02
     cls.points = np.zeros(shape=(cls.npoints, 2), dtype=np.float64)
     cls.points[:, 0] = np.random.randn(cls.npoints) + \
         cls.mux * (-1)**np.random.randint(0, high=2, size=cls.npoints)
     cls.points[:, 1] = np.random.randn(cls.npoints) + \
         cls.muy * (-1)**np.random.randint(0, high=2, size=cls.npoints)
     # cluster initialisation
     cls.dpc = Cluster(cls.points, cls.fraction, autoplot=False)
     cls.dpc.assign(20, 1.5)
Ejemplo n.º 6
0
    def __init__(
        self,
        matrix,
        mode="latent",
        hidden_size=(64, 32, 64),
        norm=True,
        seed=None,
        barcodes=None,
        n_threads=2,
    ):
        """
        mode = 'latent' to return n-dimensional latent space from hidden layer of autoencoder
        hidden_size = size of layers for encoder (m, n, p), where n determines number of dimensions of latent space in 'latent' mode
        norm = normalize output of DCA?
        seed = random number generator seed for reproducible result
        n_threads = parallelization of training (# of cores)
        """
        DR.__init__(self, matrix=matrix,
                    barcodes=barcodes)  # inherits from DR object
        self.name = "DCA"
        self.DCA_norm = norm  # store normalization decision as metadata
        self.adata = sc.AnnData(
            matrix
        )  # generate AnnData object (https://github.com/theislab/scanpy) for passing to DCA
        sc.pp.filter_genes(
            self.adata,
            min_counts=1)  # remove features with 0 counts for all cells
        dca.api.dca(
            self.adata,
            mode=mode,
            threads=n_threads,
            random_state=seed,
            hidden_size=hidden_size,
            normalize_per_cell=False,
        )  # perform DCA analysis on AnnData object

        if self.DCA_norm:
            sc.pp.normalize_per_cell(
                self.adata
            )  # normalize features for each cell with scanpy's method
            sc.pp.log1p(self.adata)  # log-transform data with scanpy's method

        if mode == "latent":
            self.results = self.adata.obsm[
                "X_dca"]  # return latent space as np.ndarray
        elif mode == "denoise":
            self.results = self.adata.X  # return the denoised data as a np.ndarray

        self.clu = Cluster(self.results.astype("double"), autoplot=False)
Ejemplo n.º 7
0
 def __init__(self,
              matrix,
              perplexity,
              seed=-1,
              barcodes=None,
              clean_workspace=True):
     DR.__init__(self, matrix=matrix,
                 barcodes=barcodes)  # inherits from DR object
     self.name = "FIt-SNE"
     self.perplexity = perplexity  # store tSNE perplexity as metadata
     self.results = fast_tsne(self.input,
                              perplexity=self.perplexity,
                              seed=seed)
     self.clu = Cluster(
         self.results.astype("double"), autoplot=False
     )  # get density-peak cluster information for results to use for plotting
     if clean_workspace:
         # get rid of files used by C++ to run FFT t-SNE
         os.remove("data.dat")
         os.remove("result.dat")
Ejemplo n.º 8
0
 def runDPC(self, dr, x_cutoff, y_cutoff, force_rerun=False):
     if ((self.DPC == None) or (force_rerun == True)):
         self.DPC = Cluster(dr.astype('float64'))
     self.DPC.assign(x_cutoff, y_cutoff)
Ejemplo n.º 9
0
class gate_visualize(object):
    def __init__(self, dr_in, subset=False):
        self.lib_values = dr_in.lib_values
        self.lib_size = dr_in.lib_size
        self.lib_rank = dr_in.lib_rank
        self.PCA = dr_in.PCA
        self.UMAP = dr_in.UMAP
        self.TSNE = dr_in.TSNE
        self.seed = dr_in.seed
        self.lib_geneID = dr_in.lib_geneID
        self.DPC = None

    def runDPC(self, dr, x_cutoff, y_cutoff, force_rerun=False):
        if ((self.DPC == None) or (force_rerun == True)):
            self.DPC = Cluster(dr.astype('float64'))
        self.DPC.assign(x_cutoff, y_cutoff)

    def plotDPC(self):
        fig = plt.figure(figsize=(30, 10))
        ax1 = fig.add_subplot(131)
        ax2 = fig.add_subplot(132)
        ax3 = fig.add_subplot(133)

        ax1.scatter(self.UMAP[:, 0],
                    self.UMAP[:, 1],
                    c=self.DPC.membership,
                    cmap='gist_rainbow',
                    s=10)
        ax2.scatter(self.UMAP[:, 0],
                    self.UMAP[:, 1],
                    c=self.lib_size,
                    cmap='seismic',
                    s=10)
        ax3.scatter(self.UMAP[:, 0],
                    self.UMAP[:, 1],
                    c=self.lib_rank,
                    cmap='seismic',
                    s=10)
        #plt.colorbar(p,ax=ax3)
        color = plt.cm.gist_rainbow(np.linspace(0, 1, len(self.DPC.clusters)))

        for i in range(len(self.DPC.clusters)):
            x = self.UMAP[self.DPC.clusters[i], 0]
            y = self.UMAP[self.DPC.clusters[i], 1]
            text = ax1.text(x,
                            y,
                            i,
                            fontsize=15,
                            color='black',
                            horizontalalignment='center',
                            verticalalignment='center',
                            weight='bold',
                            bbox=dict(facecolor='white',
                                      edgecolor='black',
                                      boxstyle='Circle',
                                      pad=0.1,
                                      alpha=.5))

        ax1.set_title("Density Peak Clusters", size=15, weight="bold")
        ax2.set_title("Library Size (Blue = Low Quality)",
                      size=15,
                      weight="bold")
        ax3.set_title("Ranked Library Sizes (Blue = Low Quality)",
                      size=15,
                      weight="bold")

    def plotGenes(self, feature_list, embedding="UMAP"):
        self.lib_geneID = pd.Series(self.lib_geneID)
        feature_inds = []
        gene_overlays = []
        for features in feature_list:
            feature_inds.append(
                np.where(self.lib_geneID.str.contains(features))[0])
        for features in feature_inds:
            gene_overlays.append(
                np.array(np.sum(self.lib_values[:, features],
                                axis=1)).flatten())
        if (embedding == "UMAP"):
            fig = plt.figure(figsize=(30, 30))
            for i in range(len(gene_overlays)):
                plt.subplot(3, 3, i + 1)
                plt.scatter(self.UMAP[:, 0],
                            self.UMAP[:, 1],
                            c=gene_overlays[i],
                            cmap='hot',
                            s=20)
                plt.title(feature_list[i], size=15, weight="bold")
        if (embedding == "TSNE"):
            fig = plt.figure(figsize=(30, 30))
            for i in range(len(gene_overlays)):
                plt.subplot(3, 3, i + 1)
                plt.scatter(self.TSNE[:, 0],
                            self.TSNE[:, 1],
                            c=gene_overlays[i],
                            cmap='hot',
                            s=20)
                plt.title(feature_list[i], size=15, weight="bold")

    def manual_gating(self, gate_out):  #embedding = "UMAP"
        color = plt.cm.gist_rainbow(np.linspace(0, 1, len(self.DPC.clusters)))
        clust_inds = np.delete(
            np.arange(0, len(self.DPC.membership),
                      1), gate_out)  # clusters that represent cells to keep
        cluster_ids = np.delete(np.arange(0, len(self.DPC.clusters), 1),
                                gate_out)
        clust_mask = np.isin(self.DPC.membership, clust_inds)
        gate_out_inds = np.where(clust_mask == False)
        gated_embedding = self.UMAP[clust_mask]

        fig = plt.figure(figsize=(10, 10))
        ax1 = fig.add_subplot(111)
        ax1.scatter(gated_embedding[:, 0],
                    gated_embedding[:, 1],
                    alpha=1,
                    s=20,
                    c=self.DPC.membership[clust_mask],
                    cmap='gist_rainbow')
        ax1.scatter(self.UMAP[gate_out_inds, 0],
                    self.UMAP[gate_out_inds, 1],
                    alpha=0.5,
                    s=20,
                    c='gray')

        for i in range(len(self.DPC.clusters[cluster_ids])):
            x = self.UMAP[self.DPC.clusters[cluster_ids][i], 0]
            y = self.UMAP[self.DPC.clusters[cluster_ids][i], 1]
            text = ax1.text(x,
                            y,
                            i,
                            fontsize=15,
                            color='black',
                            horizontalalignment='center',
                            verticalalignment='center',
                            weight='bold',
                            bbox=dict(facecolor='white',
                                      edgecolor='black',
                                      boxstyle='Circle',
                                      pad=0.1,
                                      alpha=.5))
        return (np.where(clust_mask)[0])
Ejemplo n.º 10
0
 def __init__(self, matrix, K, barcodes=None):
     DR.__init__(self, matrix=matrix,
                 barcodes=barcodes)  # inherits from DR object
     self.name = "ZIFA"
     self.results, self.model_params = block_ZIFA.fitModel(matrix, K)
     self.clu = Cluster(self.results.astype("double"), autoplot=False)
Ejemplo n.º 11
0
# fig, ax = plt.subplots(figsize=(5, 5))
# ax.scatter(points[:, 0], points[:, 1], s=40)
# ax.plot([-mux, -mux], [-1.5 * muy, 1.5 * muy], '--', linewidth=2, color="red")
# ax.plot([mux, mux], [-1.5 * muy, 1.5 * muy], '--', linewidth=2, color="red")
# ax.plot([-1.5 * mux,  1.5 * mux], [-muy, -muy], '--', linewidth=2, color="red")
# ax.plot([-1.5 * mux,  1.5 * mux], [muy, muy], '--', linewidth=2, color="red")
# ax.set_xlabel(r"x / a.u.", fontsize=20)
# ax.set_ylabel(r"y / a.u.", fontsize=20)
# ax.tick_params(labelsize=15)
# ax.set_xlim([-7, 7])
# ax.set_ylim([-7, 7])
# ax.set_aspect('equal')
# fig.tight_layout()
# plt.show()

clu = Cluster(points)

clu.assign(50, 150)

fig, ax = plt.subplots(1, 3, figsize=(15, 5))
ax[0].scatter(points[:, 0], points[:, 1], s=1)
ax[0].scatter(points[clu.clusters, 0], points[clu.clusters, 1], s=10, c="red")
ax[1].scatter(points[:, 0], points[:, 1], s=1, c=clu.density)
ax[2].scatter(points[:, 0],
              points[:, 1],
              s=1,
              c=clu.membership,
              cmap=mpl.cm.cool)
# for _ax in ax:
#     _ax.plot([-mux, -mux], [-1.5 * muy, 1.5 * muy], '--', linewidth=2, color="red")
#     _ax.plot([mux, mux], [-1.5 * muy, 1.5 * muy], '--', linewidth=2, color="red")