Python TenxAnalysis.load Exemples, interface.tenxanalysis.TenxAnalysis.load Python Exemples

Exemple #1

0

Afficher le fichier

def umap_by_gene(rdata, gene, prefix, pcs):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    tsne_dims = sce.reducedDims["UMAP"]
    barcodes = sce.colData["Barcode"]
    transcripts = sce.rowData["Symbol"]
    adata = tenx.create_scanpy_adata(barcodes=barcodes, transcripts=symbols)
    assert len(barcodes) == len(adata[:, gene])
    expression = dict(zip(barcodes, adata[:, gene]))
    tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes))
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    x = []
    y = []
    clusters = []
    for barcode in barcodes:
        clusters.append(float(expression[barcode]))
        x.append(x_coded[barcode])
        y.append(y_coded[barcode])
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("PCA - Clusters - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("figures/umap_by_{}.png".format(gene))

Exemple #2

0

Afficher le fichier

def plot_by_genes(rdata, tenx_analysis, genes, prefix, rep, pcs):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    tsne_dims = sce.getReducedDims(rep)
    barcodes = sce.colData["Barcode"]
    transcripts = sce.rowData["Symbol"]
    adata = tenx.create_scanpy_adata(barcodes=barcodes,
                                     transcripts=transcripts)
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    if not os.path.exists("figures/expression"):
        os.makedirs("figures/expression")
    x = []
    y = []
    for barcode in barcodes:
        x.append(x_coded[barcode])
        y.append(y_coded[barcode])
    for gene in genes:
        expression = []
        for barcode in barcodes:
            val = adata[barcode, gene].X
            expression.append(float(val))
        f, ax = plt.subplots(figsize=(10, 8))
        sns.scatterplot(x=x, y=y, hue=expression, alpha=0.85)
        ax.set_title("{} Counts".format(gene))
        ax.legend()
        plt.tight_layout()
        plt.savefig("figures/expression/expression_{}.png".format(gene))

Exemple #3

0

Afficher le fichier

def Run(sampleid, before, finished):
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis,sampleid)
    plots = qc.plots
    cellassign = os.path.join(os.path.split(plots)[0],"cellassignanalysis")
    results = Results(config.jobpath)

    results.add_analysis(tenx.tenx_path)
    results.add_sce(qc.qcdsce)

    umi = os.path.join(plots,"umi.png")
    mito = os.path.join(plots,"mito.png")
    ribo = os.path.join(plots, "ribo.png")
    total_counts = os.path.join(plots, "total_counts.png")
    tfbc = os.path.join(plots, "total_features_by_counts.png")
    tcvfc = os.path.join(plots, "total_counts_v_features_by_counts.png")
    celltypes = os.path.join(cellassign, "cell_types.png")

    results.add_plot(umi,"UMI Distribution")
    results.add_plot(mito,"Mito Distribution")
    results.add_plot(ribo,"Ribo Distribution")
    results.add_plot(total_counts,"Total Counts Distribution")
    results.add_plot(tcvfc,"Total Counts")
    results.add_plot(tcvfc,"Total Features by Counts")
    results.add_plot(celltypes,"Cell Types")

    exportMD(results)
    exportUpload(results)
    open(finished,"w").write("Completed")

Exemple #4

0

Afficher le fichier

def Analysis(sampleid, before, finished):
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    analysis_path = tenx.tenx_path
    tenx_analysis = TenxAnalysis(analysis_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    cellassign_analysis = ".cache/{}/cellassignanalysis/".format(sampleid)
    if not os.path.exists(cellassign_analysis):
        os.makedirs(cellassign_analysis)
    pyfit = os.path.join(".cache/{}/cell_types.pkl".format(sampleid))
    assert os.path.exists(pyfit), "No Pyfit Found."
    pyfit = pickle.load(open(pyfit, "rb"))
    marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix)
    cell_types = marker_list.celltypes()
    if "B cell" not in cell_types: cell_types.append("B cell")
    celltypes(pyfit, sampleid, cellassign_analysis, known_types=cell_types)
    tsne_by_cell_type(qc.sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    umap_by_cell_type(qc.sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    open(finished, "w").write("Completed")

Exemple #5

0

Afficher le fichier

def Search(sampleid):
    tenxs = []
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    analysis_path = tenx.tenx_path
    print(analysis_path)
    tenx_analysis = TenxAnalysis(analysis_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    tenxs.append(tenx_analysis.adata(qc.sce))
    print ("Loading main sce {}".format(sampleid))
    sys.stdout.flush()
    samples = glob.glob("../../*/runs/.cache/*/metrics_summary.csv")
    for sample in samples:
        print ("Loading project sample {}".format(sample))
        sys.stdout.flush()
        sample_rel_path = os.path.split(sample)[0]
        sid = sample_rel_path.split("/")[-1]
        sidsce = os.path.join(sample_rel_path,"{0}.rdata".format(sid))
        if not os.path.exists(sidsce):
            print("Not found",sidsce)
            continue
        tenx_analysis = TenxAnalysis(sample_rel_path)
        tenx_analysis.load()
        tenx_analysis.extract()
        tenxs.append(tenx_analysis.adata(sidsce))
    print ("Finished project tree search.")
    sys.stdout.flush()
    return tenxs

Exemple #6

0

Afficher le fichier

def Run(sampleid, species, umi_plot, mito_plot, ribo_plot, counts_plot,
        raw_sce):
    print("Running QC.")
    tenx = TenxDataStorage(sampleid)
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    qc.run(mito=config.mito)
    plots = qc.plots
    umi = os.path.join(plots, "umi.png")
    mito = os.path.join(plots, "mito.png")
    ribo = os.path.join(plots, "ribo.png")
    counts = os.path.join(plots, "counts.png")
    cvf = os.path.join(plots, "total_counts_v_features.png")

    results = os.path.join(config.jobpath, "results")
    if not os.path.exists(results):
        os.makedirs(results)

    shutil.copyfile(umi, umi_plot)
    shutil.copyfile(mito, mito_plot)
    shutil.copyfile(ribo, ribo_plot)
    shutil.copyfile(counts, counts_plot)
    shutil.copyfile(qc.sce, raw_sce)

Exemple #7

0

Afficher le fichier

def RunExtract(sample_to_path, rdata_path):
    sample = json.loads(open(sample_to_path, "r").read())
    sampleid, path = list(sample.items()).pop()
    tenx_analysis = TenxAnalysis(path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    if not os.path.exists(qc.sce):
        qc.run(mito=config.mito)
    shutil.copyfile(qc.sce, rdata_path)

Exemple #8

0

Afficher le fichier

def scvis_by_cluster_markers(rdata, tenx_analysis, prefix, pcs,
                             embedding_file):
    try:
        tenx = TenxAnalysis(tenx_analysis)
        tenx.load()
        sce = SingleCellExperiment.fromRData(rdata)
        cluster_labels = tenx.markers_by_clusters(
            sce, rep="SCVIS", pcs=pcs, embedding_file=embedding_file)
    except Exception as e:
        return

Exemple #9

0

Afficher le fichier

def cluster_markers(rdata, tenx_analysis, rep, pcs, embedding_file, prefix):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    markers = tenx.markers_by_clusters(sce, rep="PCA", pcs=pcs)
    markers_by_cluster = list(zip(*markers["rank_genes_groups"]["names"]))
    for i, markers in enumerate(markers_by_cluster):
        cluster_prefix = "Cluster {} {}".format(i, prefix)
        plot_by_markers(rdata, tenx_analysis, markers, cluster_prefix, rep,
                        pcs, embedding_file)

Exemple #10

0

Afficher le fichier

def Run(sampleid, before, finished):
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    analysis_path = tenx.tenx_path
    tenx_analysis = TenxAnalysis(analysis_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    CellAssign.run(qc.sce, config.rho_matrix,
                   ".cache/{}/celltypes.rdata".format(sampleid))
    open(finished, "w").write("Completed")

Exemple #11

0

Afficher le fichier

 def __init__(self, sampleids, chem="v2", output="./"):
     self.output = output
     self.samples = sampleids
     self.tenxs = []
     for sampleid in self.samples:
         tenx = TenxDataStorage(sampleid, version=chem)
         tenx.download()
         tenx_analysis = TenxAnalysis(tenx.tenx_path)
         tenx_analysis.load()
         tenx_analysis.extract()
         self.tenxs.append(tenx_analysis)

Exemple #12

0

Afficher le fichier

def Run(sampleid, before, finished):
    print("Running QC.")
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    print("Extracted.")
    qc = QualityControl(tenx_analysis, sampleid)
    qc.run(mito=config.mito)
    print("Uploading")
    qc.upload_raw()
    qc.upload()
    open(finished, "w").write("Completed")

Exemple #13

0

Afficher le fichier

def main():
    sample = "patient2"

    tenx = TenxDataStorage(sample, version="v2")
    tenx.download()
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    output = "/igo_large/scratch/test_kallisto"
    fastq_directory = FastQDirectory(
        "/igo_large/scratch/allen/bams/xfastqs2/McGilvery_Sonya__TLH_MissingLibrary_1_CB8R9ANXX/",
        sample, output)

    krunner = Kallisto(fastq_directory, tenx_analysis)
    krunner.de()

Exemple #14

0

Afficher le fichier

def cell_type_by_cluster(rdata, cell_assign_fit, tenx_analysis, prefix):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    fit = pickle.load(open(cell_assign_fit, "rb"))
    cell_types = dict(zip(fit["Barcode"], fit["cell_type"]))
    sce = SingleCellExperiment.fromRData(rdata)
    cluster_labels = tenx.clusters(sce)
    clusters = dict(zip(sce.colData["Barcode"], cluster_labels))
    data_by_cluster = collections.defaultdict(list)
    data_by_celltype = collections.defaultdict(list)
    cluster = []
    cell_type = []
    for barcode, cell in cell_types.items():
        try:
            cluster.append(str(clusters[barcode]))
            cell_type.append(cell)
            data_by_celltype[cell] = str(clusters[barcode])
            data_by_cluster[str(clusters[barcode])] = cell
        except Exception as e:
            continue
    f, ax = plt.subplots(figsize=(16, 8))
    counts = collections.defaultdict(lambda: collections.defaultdict(int))
    for cluster, ctype in zip(cluster, cell_type):
        counts[cluster][ctype] += 1
    fclusters = []
    fcelltypes = []
    fpercentages = []
    for cluster, ctype in counts.items():
        total = float(sum(ctype.values()))
        for cell in cell_type:
            fcelltypes.append(cell)
            fclusters.append(cluster)
            if cell in ctype:
                fpercentages.append(float(ctype[cell]) / total)
            else:
                fpercentages.append(0.0)
    df = pandas.DataFrame({
        "Cluster": fclusters,
        "Cell Type": fcelltypes,
        "Percentage": fpercentages
    })
    ax = sns.barplot(x="Cluster",
                     y="Percentage",
                     hue="Cell Type",
                     data=df,
                     palette="tab10")
    ax.set_title("Cell Type by Cluster - {}".format(prefix))
    plt.tight_layout()
    plt.savefig("figures/cell_type_by_cluster.png")

Exemple #15

0

Afficher le fichier

def Run(sampleid, before, finished, use_corrected=False):
    if use_corrected and os.path.exists(".cache/corrected/"):
        sce = ".cache/corrected/corrected_sce.rdata"
        if not os.path.exists(sce):
            utils = DropletUtils()
            utils.read10xCounts(".cache/corrected/",
                                ".cache/corrected/corrected_sce.rdata")
    else:
        tenx = TenxDataStorage(sampleid, version="v3")
        tenx.download()
        analysis_path = tenx.tenx_path
        tenx_analysis = TenxAnalysis(analysis_path)
        tenx_analysis.load()
        tenx_analysis.extract()
        qc = QualityControl(tenx_analysis, sampleid)
        sce = qc.sce
    if not os.path.exists(".cache/{}/celltypes.rdata".format(sampleid)):
        CellAssign.run(sce, config.rho_matrix,
                       ".cache/{}/celltypes.rdata".format(sampleid))
    open(finished, "w").write("Completed")

Exemple #16

0

Afficher le fichier

def Run(sampleid, before, finished):
    clustering = ".cache/{}/clustering/".format(sampleid)
    if not os.path.exists(clustering):
        os.makedirs(clustering)
    cluster_results = os.path.join(clustering,
                                   "{}_clusters.pkl".format(sampleid))
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    analysis_path = tenx.tenx_path
    tenx_analysis = TenxAnalysis(analysis_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    if not os.path.exists(cluster_results):
        clusters = tenx_analysis.clusters(qc.sce)
        pickle.dump(clusters, open(cluster_results, "wb"))
    else:
        clusters = pickle.load(open(cluster_results, "rb"))
    tsne_by_cluster(qc.sce, clusters, sampleid, clustering)
    umap_by_cluster(qc.sce, clusters, sampleid, clustering)
    open(finished, "w").write("Completed")

Exemple #17

0

Afficher le fichier

def Analysis(sampleid, before, finished, use_corrected=False):
    if use_corrected and os.path.exists(".cache/corrected"):
        sce = ".cache/corrected/corrected_sce.rdata"
        if not os.path.exists(sce):
            utils = DropletUtils()
            utils.read10xCounts(".cache/corrected/",
                                ".cache/corrected/corrected_sce.rdata")
        filtered_sce = sce
    else:
        tenx = TenxDataStorage(sampleid, version="v3")
        tenx.download()
        analysis_path = tenx.tenx_path
        tenx_analysis = TenxAnalysis(analysis_path)
        tenx_analysis.load()
        tenx_analysis.extract()
        qc = QualityControl(tenx_analysis, sampleid)
        filtered_sce = os.path.join(os.path.split(qc.sce)[0], "sce_cas.rdata")
    cellassign_analysis = ".cache/{}/cellassignanalysis/".format(sampleid)
    if not os.path.exists(cellassign_analysis):
        os.makedirs(cellassign_analysis)
    pyfit = os.path.join(".cache/{}/cell_types.pkl".format(sampleid))
    assert os.path.exists(pyfit), "No Pyfit Found."
    pyfit = pickle.load(open(pyfit, "rb"))
    marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix)
    cell_types = marker_list.celltypes()
    if "B cell" not in cell_types: cell_types.append("B cell")
    celltypes(pyfit, sampleid, cellassign_analysis, known_types=cell_types)

    tsne_by_cell_type(filtered_sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    umap_by_cell_type(filtered_sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    open(finished, "w").write("Completed")

Exemple #18

0

Afficher le fichier

 def run_transcript(self, fastqs=[]):
     matrices = dict()
     assert len(fastqs) == len(
         self.samples), "Provide fastq object for each sample."
     for sampleid, fastq in zip(self.samples, self.fastqs):
         tenx = TenxDataStorage(sampleid, version="v2")
         tenx.download()
         tenx_analysis = TenxAnalysis(tenx.tenx_path)
         tenx_analysis.load()
         tenx_analysis.extract()
         self.krunner = Kallisto(fastqs, tenx_analysis, chem=chem)
         self.krunner.run_pseudo()
         self.krunner.run_bus()
         matrix = self.krunner.design_matrix()
         matrices[sampleid] = matrix
     self.matrices = matrices
     self.matrix1 = self.matrices[sampleids[0]]
     self.matrix2 = self.matrices[sampleids[1]]
     self.common_genes = set(self.matrix1.keys()).intersection(
         set(self.matrix2.keys()))
     self.model = LogisticRegression(random_state=0,
                                     solver='lbfgs',
                                     multi_class='multinomial')
     de_file = "{}_{}_de.tsv".format(self.samples[0], self.samples[1])
     if not os.path.exists(de_file):
         return
         output = open(
             "{}_{}_de.tsv".format(self.samples[0], self.samples[1]), "w")
         output.write("Gene\tPValue\n")
         differential_genes = dict()
         for gene in tqdm.tqdm(self.common_genes):
             tcc_common = set(self.matrix1[gene].keys()).intersection(
                 set(self.matrix2[gene].keys()))
             if len(tcc_common) == 0:
                 continue
             Y = []
             X = []
             cells1 = list(
                 itertools.chain.from_iterable([
                     list(self.matrix1[gene][tcc].keys())
                     for tcc in tcc_common
                 ]))
             cells2 = list(
                 itertools.chain.from_iterable([
                     list(self.matrix2[gene][tcc].keys())
                     for tcc in tcc_common
                 ]))
             if len(cells1) == 0 or len(cells2) == 0:
                 continue
             for cell in cells1:
                 Y.append(self.samples[0])
                 predictors = []
                 for tcc in tcc_common:
                     try:
                         predictors.append(self.matrix1[gene][tcc][cell])
                     except KeyError:
                         predictors.append(0)
                 X.append(predictors)
             for cell in cells2:
                 Y.append(self.samples[1])
                 predictors = []
                 for tcc in tcc_common:
                     try:
                         predictors.append(self.matrix2[gene][tcc][cell])
                     except KeyError:
                         predictors.append(0)
                 X.append(predictors)
             classes = set(Y)
             Y = numpy.array(Y)
             X = numpy.array(X)
             if Y.shape[0] < 2 or len(classes) == 1:
                 continue
             self.model.fit(X, Y)
             null_prob = 2.0 / float(Y.shape[0]) * numpy.ones(Y.shape)
             df = X.shape[1]
             alt_prob = self.model.predict_proba(X)
             alt_log_likelihood = -log_loss(Y, alt_prob, normalize=False)
             null_log_likelihood = -log_loss(Y, null_prob, normalize=False)
             G = 2 * (alt_log_likelihood - null_log_likelihood)
             p_value = chi2.sf(G, df)
             differential_genes[gene] = p_value
             output.write("{}\t{}\n".format(gene, p_value))
         sorted_genes = sorted(differential_genes.items(),
                               key=operator.itemgetter(1))
         print("**************** Differential Genes ********************")
         for gene, pvalue in sorted_genes[:100]:
             print(gene, pvalue)
         output.close()
     else:
         differential_genes = dict()
         differential_genes_adj = dict()
         genes = open(de_file, "r").read().splitlines()
         genes.pop(0)
         _genes = []
         pvalues = []
         adjpvalues = []
         for gene in genes:
             gene, pvalue = gene.split()
             differential_genes[gene] = float(pvalue)
             pvalues.append(float(pvalue))
             _genes.append(gene)
         adj_pvalues = list(multitest.multipletests(pvalues)[1])
         print(adj_pvalues)
         for gene, pvalue, adjp in zip(_genes, pvalues, adj_pvalues):
             differential_genes_adj[gene] = adjp
         sorted_genes = sorted(differential_genes_adj.items(),
                               key=operator.itemgetter(1))
         thresholds = (0.05, 0.01, 0.001)
         import collections
         sig_genes = collections.defaultdict(list)
         for gene, pvalue in sorted_genes:
             for threshold in thresholds:
                 if pvalue < threshold:
                     sig_genes[str(threshold)].append(gene)
         print("**************** Differential Genes ********************")
         for thresh, sig_genes in sig_genes.items():
             print(thresh, len(sig_genes))
         for gene, pvalue in sorted_genes[:100]:
             print(gene, pvalue)
     return sorted_genes

Exemple #19

0

Afficher le fichier

def umap_by_cluster_markers(rdata, tenx_analysis, prefix, pcs):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    markers = tenx.markers_by_clusters(sce, rep="UMAP", pcs=pcs)
    print(markers.keys())