コード例 #1
0
def Run(sampleid, species, umi_plot, mito_plot, ribo_plot, counts_plot,
        raw_sce):
    print("Running QC.")
    tenx = TenxDataStorage(sampleid)
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    qc.run(mito=config.mito)
    plots = qc.plots
    umi = os.path.join(plots, "umi.png")
    mito = os.path.join(plots, "mito.png")
    ribo = os.path.join(plots, "ribo.png")
    counts = os.path.join(plots, "counts.png")
    cvf = os.path.join(plots, "total_counts_v_features.png")

    results = os.path.join(config.jobpath, "results")
    if not os.path.exists(results):
        os.makedirs(results)

    shutil.copyfile(umi, umi_plot)
    shutil.copyfile(mito, mito_plot)
    shutil.copyfile(ribo, ribo_plot)
    shutil.copyfile(counts, counts_plot)
    shutil.copyfile(qc.sce, raw_sce)
コード例 #2
0
def Analysis(sampleid, before, finished):
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    analysis_path = tenx.tenx_path
    tenx_analysis = TenxAnalysis(analysis_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    cellassign_analysis = ".cache/{}/cellassignanalysis/".format(sampleid)
    if not os.path.exists(cellassign_analysis):
        os.makedirs(cellassign_analysis)
    pyfit = os.path.join(".cache/{}/cell_types.pkl".format(sampleid))
    assert os.path.exists(pyfit), "No Pyfit Found."
    pyfit = pickle.load(open(pyfit, "rb"))
    marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix)
    cell_types = marker_list.celltypes()
    if "B cell" not in cell_types: cell_types.append("B cell")
    celltypes(pyfit, sampleid, cellassign_analysis, known_types=cell_types)
    tsne_by_cell_type(qc.sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    umap_by_cell_type(qc.sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    open(finished, "w").write("Completed")
コード例 #3
0
def Run(sampleid, before, finished):
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis,sampleid)
    plots = qc.plots
    cellassign = os.path.join(os.path.split(plots)[0],"cellassignanalysis")
    results = Results(config.jobpath)

    results.add_analysis(tenx.tenx_path)
    results.add_sce(qc.qcdsce)

    umi = os.path.join(plots,"umi.png")
    mito = os.path.join(plots,"mito.png")
    ribo = os.path.join(plots, "ribo.png")
    total_counts = os.path.join(plots, "total_counts.png")
    tfbc = os.path.join(plots, "total_features_by_counts.png")
    tcvfc = os.path.join(plots, "total_counts_v_features_by_counts.png")
    celltypes = os.path.join(cellassign, "cell_types.png")

    results.add_plot(umi,"UMI Distribution")
    results.add_plot(mito,"Mito Distribution")
    results.add_plot(ribo,"Ribo Distribution")
    results.add_plot(total_counts,"Total Counts Distribution")
    results.add_plot(tcvfc,"Total Counts")
    results.add_plot(tcvfc,"Total Features by Counts")
    results.add_plot(celltypes,"Cell Types")

    exportMD(results)
    exportUpload(results)
    open(finished,"w").write("Completed")
コード例 #4
0
def Search(sampleid):
    tenxs = []
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    analysis_path = tenx.tenx_path
    print(analysis_path)
    tenx_analysis = TenxAnalysis(analysis_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    tenxs.append(tenx_analysis.adata(qc.sce))
    print ("Loading main sce {}".format(sampleid))
    sys.stdout.flush()
    samples = glob.glob("../../*/runs/.cache/*/metrics_summary.csv")
    for sample in samples:
        print ("Loading project sample {}".format(sample))
        sys.stdout.flush()
        sample_rel_path = os.path.split(sample)[0]
        sid = sample_rel_path.split("/")[-1]
        sidsce = os.path.join(sample_rel_path,"{0}.rdata".format(sid))
        if not os.path.exists(sidsce):
            print("Not found",sidsce)
            continue
        tenx_analysis = TenxAnalysis(sample_rel_path)
        tenx_analysis.load()
        tenx_analysis.extract()
        tenxs.append(tenx_analysis.adata(sidsce))
    print ("Finished project tree search.")
    sys.stdout.flush()
    return tenxs
コード例 #5
0
def RunExtract(sample_to_path, rdata_path):
    sample = json.loads(open(sample_to_path, "r").read())
    sampleid, path = list(sample.items()).pop()
    tenx_analysis = TenxAnalysis(path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    if not os.path.exists(qc.sce):
        qc.run(mito=config.mito)
    shutil.copyfile(qc.sce, rdata_path)
コード例 #6
0
def Run(sampleid, before, finished):
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    analysis_path = tenx.tenx_path
    tenx_analysis = TenxAnalysis(analysis_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    CellAssign.run(qc.sce, config.rho_matrix,
                   ".cache/{}/celltypes.rdata".format(sampleid))
    open(finished, "w").write("Completed")
コード例 #7
0
 def __init__(self, sampleids, chem="v2", output="./"):
     self.output = output
     self.samples = sampleids
     self.tenxs = []
     for sampleid in self.samples:
         tenx = TenxDataStorage(sampleid, version=chem)
         tenx.download()
         tenx_analysis = TenxAnalysis(tenx.tenx_path)
         tenx_analysis.load()
         tenx_analysis.extract()
         self.tenxs.append(tenx_analysis)
コード例 #8
0
def Run(sampleid, before, finished):
    print("Running QC.")
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    print("Extracted.")
    qc = QualityControl(tenx_analysis, sampleid)
    qc.run(mito=config.mito)
    print("Uploading")
    qc.upload_raw()
    qc.upload()
    open(finished, "w").write("Completed")
コード例 #9
0
def Run(sampleid, before, finished, use_corrected=False):
    if use_corrected and os.path.exists(".cache/corrected/"):
        sce = ".cache/corrected/corrected_sce.rdata"
        if not os.path.exists(sce):
            utils = DropletUtils()
            utils.read10xCounts(".cache/corrected/",
                                ".cache/corrected/corrected_sce.rdata")
    else:
        tenx = TenxDataStorage(sampleid, version="v3")
        tenx.download()
        analysis_path = tenx.tenx_path
        tenx_analysis = TenxAnalysis(analysis_path)
        tenx_analysis.load()
        tenx_analysis.extract()
        qc = QualityControl(tenx_analysis, sampleid)
        sce = qc.sce
    if not os.path.exists(".cache/{}/celltypes.rdata".format(sampleid)):
        CellAssign.run(sce, config.rho_matrix,
                       ".cache/{}/celltypes.rdata".format(sampleid))
    open(finished, "w").write("Completed")
コード例 #10
0
def Run(sampleid, before, finished):
    clustering = ".cache/{}/clustering/".format(sampleid)
    if not os.path.exists(clustering):
        os.makedirs(clustering)
    cluster_results = os.path.join(clustering,
                                   "{}_clusters.pkl".format(sampleid))
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    analysis_path = tenx.tenx_path
    tenx_analysis = TenxAnalysis(analysis_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    if not os.path.exists(cluster_results):
        clusters = tenx_analysis.clusters(qc.sce)
        pickle.dump(clusters, open(cluster_results, "wb"))
    else:
        clusters = pickle.load(open(cluster_results, "rb"))
    tsne_by_cluster(qc.sce, clusters, sampleid, clustering)
    umap_by_cluster(qc.sce, clusters, sampleid, clustering)
    open(finished, "w").write("Completed")
コード例 #11
0
def Analysis(sampleid, before, finished, use_corrected=False):
    if use_corrected and os.path.exists(".cache/corrected"):
        sce = ".cache/corrected/corrected_sce.rdata"
        if not os.path.exists(sce):
            utils = DropletUtils()
            utils.read10xCounts(".cache/corrected/",
                                ".cache/corrected/corrected_sce.rdata")
        filtered_sce = sce
    else:
        tenx = TenxDataStorage(sampleid, version="v3")
        tenx.download()
        analysis_path = tenx.tenx_path
        tenx_analysis = TenxAnalysis(analysis_path)
        tenx_analysis.load()
        tenx_analysis.extract()
        qc = QualityControl(tenx_analysis, sampleid)
        filtered_sce = os.path.join(os.path.split(qc.sce)[0], "sce_cas.rdata")
    cellassign_analysis = ".cache/{}/cellassignanalysis/".format(sampleid)
    if not os.path.exists(cellassign_analysis):
        os.makedirs(cellassign_analysis)
    pyfit = os.path.join(".cache/{}/cell_types.pkl".format(sampleid))
    assert os.path.exists(pyfit), "No Pyfit Found."
    pyfit = pickle.load(open(pyfit, "rb"))
    marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix)
    cell_types = marker_list.celltypes()
    if "B cell" not in cell_types: cell_types.append("B cell")
    celltypes(pyfit, sampleid, cellassign_analysis, known_types=cell_types)

    tsne_by_cell_type(filtered_sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    umap_by_cell_type(filtered_sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    open(finished, "w").write("Completed")
コード例 #12
0
 def run_transcript(self, fastqs=[]):
     matrices = dict()
     assert len(fastqs) == len(
         self.samples), "Provide fastq object for each sample."
     for sampleid, fastq in zip(self.samples, self.fastqs):
         tenx = TenxDataStorage(sampleid, version="v2")
         tenx.download()
         tenx_analysis = TenxAnalysis(tenx.tenx_path)
         tenx_analysis.load()
         tenx_analysis.extract()
         self.krunner = Kallisto(fastqs, tenx_analysis, chem=chem)
         self.krunner.run_pseudo()
         self.krunner.run_bus()
         matrix = self.krunner.design_matrix()
         matrices[sampleid] = matrix
     self.matrices = matrices
     self.matrix1 = self.matrices[sampleids[0]]
     self.matrix2 = self.matrices[sampleids[1]]
     self.common_genes = set(self.matrix1.keys()).intersection(
         set(self.matrix2.keys()))
     self.model = LogisticRegression(random_state=0,
                                     solver='lbfgs',
                                     multi_class='multinomial')
     de_file = "{}_{}_de.tsv".format(self.samples[0], self.samples[1])
     if not os.path.exists(de_file):
         return
         output = open(
             "{}_{}_de.tsv".format(self.samples[0], self.samples[1]), "w")
         output.write("Gene\tPValue\n")
         differential_genes = dict()
         for gene in tqdm.tqdm(self.common_genes):
             tcc_common = set(self.matrix1[gene].keys()).intersection(
                 set(self.matrix2[gene].keys()))
             if len(tcc_common) == 0:
                 continue
             Y = []
             X = []
             cells1 = list(
                 itertools.chain.from_iterable([
                     list(self.matrix1[gene][tcc].keys())
                     for tcc in tcc_common
                 ]))
             cells2 = list(
                 itertools.chain.from_iterable([
                     list(self.matrix2[gene][tcc].keys())
                     for tcc in tcc_common
                 ]))
             if len(cells1) == 0 or len(cells2) == 0:
                 continue
             for cell in cells1:
                 Y.append(self.samples[0])
                 predictors = []
                 for tcc in tcc_common:
                     try:
                         predictors.append(self.matrix1[gene][tcc][cell])
                     except KeyError:
                         predictors.append(0)
                 X.append(predictors)
             for cell in cells2:
                 Y.append(self.samples[1])
                 predictors = []
                 for tcc in tcc_common:
                     try:
                         predictors.append(self.matrix2[gene][tcc][cell])
                     except KeyError:
                         predictors.append(0)
                 X.append(predictors)
             classes = set(Y)
             Y = numpy.array(Y)
             X = numpy.array(X)
             if Y.shape[0] < 2 or len(classes) == 1:
                 continue
             self.model.fit(X, Y)
             null_prob = 2.0 / float(Y.shape[0]) * numpy.ones(Y.shape)
             df = X.shape[1]
             alt_prob = self.model.predict_proba(X)
             alt_log_likelihood = -log_loss(Y, alt_prob, normalize=False)
             null_log_likelihood = -log_loss(Y, null_prob, normalize=False)
             G = 2 * (alt_log_likelihood - null_log_likelihood)
             p_value = chi2.sf(G, df)
             differential_genes[gene] = p_value
             output.write("{}\t{}\n".format(gene, p_value))
         sorted_genes = sorted(differential_genes.items(),
                               key=operator.itemgetter(1))
         print("**************** Differential Genes ********************")
         for gene, pvalue in sorted_genes[:100]:
             print(gene, pvalue)
         output.close()
     else:
         differential_genes = dict()
         differential_genes_adj = dict()
         genes = open(de_file, "r").read().splitlines()
         genes.pop(0)
         _genes = []
         pvalues = []
         adjpvalues = []
         for gene in genes:
             gene, pvalue = gene.split()
             differential_genes[gene] = float(pvalue)
             pvalues.append(float(pvalue))
             _genes.append(gene)
         adj_pvalues = list(multitest.multipletests(pvalues)[1])
         print(adj_pvalues)
         for gene, pvalue, adjp in zip(_genes, pvalues, adj_pvalues):
             differential_genes_adj[gene] = adjp
         sorted_genes = sorted(differential_genes_adj.items(),
                               key=operator.itemgetter(1))
         thresholds = (0.05, 0.01, 0.001)
         import collections
         sig_genes = collections.defaultdict(list)
         for gene, pvalue in sorted_genes:
             for threshold in thresholds:
                 if pvalue < threshold:
                     sig_genes[str(threshold)].append(gene)
         print("**************** Differential Genes ********************")
         for thresh, sig_genes in sig_genes.items():
             print(thresh, len(sig_genes))
         for gene, pvalue in sorted_genes[:100]:
             print(gene, pvalue)
     return sorted_genes