Beispiel #1
0
def Analysis(sampleid, before, finished):
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    analysis_path = tenx.tenx_path
    tenx_analysis = TenxAnalysis(analysis_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    cellassign_analysis = ".cache/{}/cellassignanalysis/".format(sampleid)
    if not os.path.exists(cellassign_analysis):
        os.makedirs(cellassign_analysis)
    pyfit = os.path.join(".cache/{}/cell_types.pkl".format(sampleid))
    assert os.path.exists(pyfit), "No Pyfit Found."
    pyfit = pickle.load(open(pyfit, "rb"))
    marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix)
    cell_types = marker_list.celltypes()
    if "B cell" not in cell_types: cell_types.append("B cell")
    celltypes(pyfit, sampleid, cellassign_analysis, known_types=cell_types)
    tsne_by_cell_type(qc.sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    umap_by_cell_type(qc.sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    open(finished, "w").write("Completed")
Beispiel #2
0
def Analysis(sampleid, sce_cas, celltype_plot, tsne, umap):
    filtered_sce = sce_cas
    cellassign_analysis = ".cache/{}/cellassignanalysis/".format(sampleid)
    if not os.path.exists(cellassign_analysis):
        os.makedirs(cellassign_analysis)
    pyfit = os.path.join(os.path.split(sce_cas)[0], "cell_types.pkl")
    assert os.path.exists(pyfit), "No Pyfit Found."
    pyfit = pickle.load(open(pyfit, "rb"))
    marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix)
    cell_types = marker_list.celltypes()
    if "B cell" not in cell_types: cell_types.append("B cell")
    celltypes(pyfit, sampleid, cellassign_analysis, known_types=cell_types)
    tsne_by_cell_type(filtered_sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    umap_by_cell_type(filtered_sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    _celltypes = os.path.join(cellassign_analysis, "cell_types.png")
    _tsne = os.path.join(cellassign_analysis, "tsne_by_cell_type.png")
    _umap = os.path.join(cellassign_analysis, "umap_by_cell_type.png")
    shutil.copyfile(_celltypes, celltype_plot)
    shutil.copyfile(_umap, umap)
    shutil.copyfile(_tsne, tsne)
Beispiel #3
0
def RunSeuratViz(seurat, tsne, umap, tsne_celltype, umap_celltype, ridge,
                 exprs):
    marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix)
    markers = ["'" + marker + "'" for marker in marker_list.genes]
    tsne_plot = os.path.join(os.path.split(seurat)[0], "tsne.png")
    umap_plot = os.path.join(os.path.split(seurat)[0], "umap.png")
    tsne_celltype_plot = os.path.join(
        os.path.split(seurat)[0], "tsne_celltype.png")
    umap_celltype_plot = os.path.join(
        os.path.split(seurat)[0], "umap_celltype.png")
    ridge_plot = os.path.join(os.path.split(seurat)[0], "ridge.png")
    exprs_plot = os.path.join(os.path.split(seurat)[0], "features.png")
    rcode = """
    library(Seurat)
    library(ggplot2)
    seurat <- readRDS("{seurat}")

    png("{tsne}")
    DimPlot(object = seurat, reduction = "tsne")
    dev.off()
    png("{umap}")
    DimPlot(object = seurat, reduction = "umap")
    dev.off()

    png("{tsne_celltype}")
    DimPlot(object = seurat, reduction = "tsne", group.by = "cell_type")
    dev.off()
    png("{umap_celltype}")
    DimPlot(object = seurat, reduction = "umap", group.by = "cell_type")
    dev.off()

    png("{ridge}",width=600,heigh=5000)
    RidgePlot(object = seurat, features = c({markers}), ncol = 2)
    dev.off()

    png("{exprs}",width=600,heigh=5000)
    FeaturePlot(object = seurat, features = c({markers}), ncol= 2)
    dev.off()
    """
    path = os.path.split(seurat)[0]
    qc_script = os.path.join(path, "viz.R")
    output = open(qc_script, "w")
    output.write(
        rcode.format(seurat=seurat,
                     tsne=tsne_plot,
                     umap=umap_plot,
                     tsne_celltype=tsne_celltype_plot,
                     umap_celltype=umap_celltype_plot,
                     markers=",".join(markers),
                     ridge=ridge_plot,
                     exprs=exprs_plot))
    output.close()
    subprocess.call(["Rscript", "{}".format(qc_script)])
    shutil.copyfile(tsne_plot, tsne)
    shutil.copyfile(umap_plot, umap)
    shutil.copyfile(tsne_celltype_plot, tsne_celltype)
    shutil.copyfile(umap_celltype_plot, umap_celltype)
    shutil.copyfile(ridge_plot, ridge)
    shutil.copyfile(exprs_plot, exprs)
Beispiel #4
0
 def test_cell_assign_em(self):
     example_rda = os.path.join(base_dir, "tests/cell_assign_test.RData")
     sce = SingleCellExperiment.fromRData(example_rda)
     cellassigner = CellAssign()
     rho = GeneMarkerMatrix(genes=[
         "Gene161", "Gene447", "Gene519", "Gene609", "Gene677", "Gene750",
         "Gene754", "Gene860", "Gene929", "Gene979"
     ],
                            cells=["Groups1", "Groups2"])
     res = cellassigner.run_em(sce, rho)
Beispiel #5
0
 def test_cell_assign_pkl(self):
     import pickle
     import collections
     tenx = TenxAnalysis("tests/pre_igo")
     sce = TenX.read10xCounts(tenx)
     handle = open("tests/rho_up.pkl","rb")
     rho_matrix = pickle.load(handle)
     handle.close()
     rho = GeneMarkerMatrix(rho_matrix)
     cellassigner = CellAssign()
     res = cellassigner.run_em(sce, rho)
Beispiel #6
0
def IntegratedSummary(sce, sampleid, report):
    if not os.path.exists("viz/"):
        os.makedirs("viz")
    if not os.path.exists("viz/html/"):
        os.makedirs("viz/html/")
    if not os.path.exists("viz/{}.json".format(sampleid)):
        sce = SingleCellExperiment.fromRData(sce)
        column_data = dump_all_coldata(sce)
        patient_data = collections.defaultdict(dict)
        patient_data[sampleid]["celldata"] = column_data
        gene_data = dump_all_rowdata(sce)
        patient_data[sampleid]["genedata"] = gene_data
        logcounts = sce.assays["logcounts"].todense().tolist()
        log_count_matrix = collections.defaultdict(dict)
        for symbol, row in zip(gene_data["Symbol"], logcounts):
            for barcode, cell in zip(column_data["Barcode"], row):
                if float(cell) != 0.0:
                    log_count_matrix[barcode][symbol] = cell
        patient_data[sampleid]["log_count_matrix"] = dict(log_count_matrix)
        rdims = sce.reducedDims["UMAP"]
        barcodes = sce.colData["Barcode"]
        rdims = numpy.array(rdims).reshape(2, len(barcodes))
        _celltypes = sce.colData["cell_type"]
        celltypes = []
        for celltype in _celltypes:
            if celltype == "Monocyte.Macrophage":
                celltype = "Monocyte/Macrophage"
            else:
                celltype = celltype.replace(".", " ")
            celltypes.append(celltype)
        fit = dict(zip(barcodes, celltypes))
        x_coded = dict(zip(barcodes, rdims[0]))
        y_coded = dict(zip(barcodes, rdims[1]))
        coords = dict()
        for barcode, celltype in fit.items():
            try:
                x_val = int(x_coded[barcode])
                y_val = int(y_coded[barcode])
            except Exception as e:
                continue
            coords[barcode] = (x_val, y_val)
        patient_data[sampleid]["cellassign"] = fit
        patient_data[sampleid]["umap"] = coords
        patient_data["rho"] = GeneMarkerMatrix.read_yaml(
            config.rho_matrix).marker_list
        patient_data_str = json.dumps(patient_data)
        output = open("viz/{}.json".format(sampleid), "w")
        output.write(str(patient_data_str))
        output.close()
    shutil.copyfile("viz/{}.json".format(sampleid), report)
def RunSeuratViz(custom_output, seurat, umap_celltype, ridge, exprs):
    sample = json.loads(open(custom_output, "r").read())
    sampleid, path = list(sample.items()).pop()
    marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix)
    markers = ["'" + marker + "'" for marker in marker_list.genes]
    umap_celltype_plot = os.path.join(config.jobpath, "results",
                                      "umap_celltype_{}.png".format(sampleid))
    ridge_plot = os.path.join(config.jobpath, "results",
                              "ridge_{}.png".format(sampleid))
    exprs_plot = os.path.join(config.jobpath, "results",
                              "features_{}.png".format(sampleid))
    rcode = """
    library(Seurat)
    library(ggplot2)
    seurat <- readRDS("{seurat}")

    png("{umap_celltype}", width=1000, height=1000)
    DimPlot(object = seurat, reduction = "umap", group.by = "cell_type", dark.theme=TRUE, plot.title="{sample}")
    dev.off()

    png("{ridge}",width=600,height=5000)
    RidgePlot(object = seurat, features = c({markers}), ncol = 2)
    dev.off()

    png("{exprs}",width=600,height=5000)
    FeaturePlot(object = seurat, features = c({markers}), ncol= 2)
    dev.off()
    """
    path = os.path.split(seurat)[0]
    qc_script = os.path.join(path, "viz_{}.R".format(sampleid))
    output = open(qc_script, "w")
    output.write(
        rcode.format(seurat=seurat,
                     umap_celltype=umap_celltype_plot,
                     markers=",".join(markers),
                     ridge=ridge_plot,
                     exprs=exprs_plot,
                     sample=sampleid))
    output.close()
    if not os.path.exists(exprs_plot):
        subprocess.call(["Rscript", "{}".format(qc_script)])
    shutil.copyfile(umap_celltype_plot, umap_celltype)
    shutil.copyfile(ridge_plot, ridge)
    shutil.copyfile(exprs_plot, exprs)
Beispiel #8
0
def Analysis(sampleid, before, finished, use_corrected=False):
    if use_corrected and os.path.exists(".cache/corrected"):
        sce = ".cache/corrected/corrected_sce.rdata"
        if not os.path.exists(sce):
            utils = DropletUtils()
            utils.read10xCounts(".cache/corrected/",
                                ".cache/corrected/corrected_sce.rdata")
        filtered_sce = sce
    else:
        tenx = TenxDataStorage(sampleid, version="v3")
        tenx.download()
        analysis_path = tenx.tenx_path
        tenx_analysis = TenxAnalysis(analysis_path)
        tenx_analysis.load()
        tenx_analysis.extract()
        qc = QualityControl(tenx_analysis, sampleid)
        filtered_sce = os.path.join(os.path.split(qc.sce)[0], "sce_cas.rdata")
    cellassign_analysis = ".cache/{}/cellassignanalysis/".format(sampleid)
    if not os.path.exists(cellassign_analysis):
        os.makedirs(cellassign_analysis)
    pyfit = os.path.join(".cache/{}/cell_types.pkl".format(sampleid))
    assert os.path.exists(pyfit), "No Pyfit Found."
    pyfit = pickle.load(open(pyfit, "rb"))
    marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix)
    cell_types = marker_list.celltypes()
    if "B cell" not in cell_types: cell_types.append("B cell")
    celltypes(pyfit, sampleid, cellassign_analysis, known_types=cell_types)

    tsne_by_cell_type(filtered_sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    umap_by_cell_type(filtered_sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    open(finished, "w").write("Completed")
Beispiel #9
0
def RunSeuratViz(seurat, umap, umap_celltype, umap_clone):
    marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix)
    markers = ["'" + marker + "'" for marker in marker_list.genes]
    umap_plot = os.path.join(os.path.split(seurat)[0], "umap.png")
    umap_celltype_plot = os.path.join(
        os.path.split(seurat)[0], "umap_celltype.png")
    umap_clone_plot = os.path.join(os.path.split(seurat)[0], "umap_clone.png")
    rcode = """
    library(Seurat)
    library(ggplot2)
    seurat <- readRDS("{seurat}")

    png("{umap}")
    DimPlot(object = seurat, reduction = "umap")
    dev.off()

    png("{umap_celltype}")
    DimPlot(object = seurat, reduction = "umap", group.by = "cell_type")
    dev.off()

    png("{umap_clone}")
    DimPlot(object = seurat, reduction = "umap", group.by = "clone")
    dev.off()
    """
    path = os.path.split(seurat)[0]
    qc_script = os.path.join(path, "viz.R")
    output = open(qc_script, "w")
    output.write(
        rcode.format(seurat=seurat,
                     umap=umap_plot,
                     umap_celltype=umap_celltype_plot,
                     umap_clone=umap_clone_plot))
    output.close()
    if not os.path.exists(umap_clone_plot):
        subprocess.call(["Rscript", "{}".format(qc_script)])
    shutil.copyfile(umap_plot, umap)
    shutil.copyfile(umap_celltype_plot, umap_celltype)
    shutil.copyfile(umap_clone_plot, umap_clone)
 def run(rdata,
         rho_yaml,
         results,
         rho_csv=".cache/rho.csv",
         lsf=False,
         B=10,
         min_delta=2,
         script_prefix=""):
     if not os.path.exists(".cache"):
         os.makedirs(".cache")
     marker_list = GeneMarkerMatrix.read_yaml(rho_yaml)
     marker_list.write_matrix(rho_csv)
     assert os.path.exists(rho_csv)
     CellAssign.cmd(rdata,
                    rho_csv,
                    results,
                    lsf=lsf,
                    B=B,
                    min_delta=min_delta,
                    script_prefix=script_prefix)
     print("CellAssign finished.")
     matched_results = os.path.join(
         os.path.split(rdata)[0], "{}cell_types.tsv".format(script_prefix))
     pkl_fit = os.path.join(
         os.path.split(rdata)[0], "{}cell_types.pkl".format(script_prefix))
     lines = open(matched_results, "r").read().splitlines()
     header = lines.pop(0)
     barcodes = []
     celltypes = []
     pyfit = dict()
     for line in lines:
         line = [x.replace('"', '') for x in line.split(",")]
         barcodes.append(line[1])
         celltypes.append(line[2])
     pyfit["Barcode"] = barcodes
     pyfit["cell_type"] = celltypes
     pickle.dump(pyfit, open(pkl_fit, "wb"))
     print("Results written.")
Beispiel #11
0
def RunSampleSummary(sample_to_path, summary, sce, cellassign_fit, metrics,
                     report):
    sample_map = dict([
        x.split()
        for x in open(config.sample_mapping, "r").read().splitlines()
    ])
    sample = json.loads(open(sample_to_path, "r").read())
    sampleid, path = list(sample.items()).pop()
    sample_original = sampleid
    sampleid = sample_map[sampleid]
    if not os.path.exists("viz/{}.json".format(sampleid)):
        if not os.path.exists("viz/"):
            os.makedirs("viz")
        if not os.path.exists("viz/html/"):
            os.makedirs("viz/html/")
        sce = SingleCellExperiment.fromRData(sce)
        column_data = dump_all_coldata(sce)
        patient_data = collections.defaultdict(dict)
        patient_data[sampleid]["celldata"] = column_data
        gene_data = dump_all_rowdata(sce)
        patient_data[sampleid]["genedata"] = gene_data
        logcounts = sce.assays["logcounts"].todense().tolist()
        log_count_matrix = collections.defaultdict(dict)
        for symbol, row in zip(gene_data["Symbol"], logcounts):
            for barcode, cell in zip(column_data["Barcode"], row):
                if float(cell) != 0.0:
                    log_count_matrix[barcode][symbol] = cell
        patient_data[sampleid]["log_count_matrix"] = dict(log_count_matrix)
        final_summary = "viz/html/{}_web_summary.html".format(sampleid)
        shutil.copyfile(summary,
                        "viz/html/{}_web_summary.html".format(sampleid))
        patient_data[sampleid]["web_summary"] = final_summary
        rdims = sce.reducedDims["UMAP"]
        barcodes = sce.colData["Barcode"]
        rdims = numpy.array(rdims).reshape(2, len(barcodes))
        cellassign = pickle.load(open(cellassign_fit, "rb"))
        celltypes = []
        for celltype in cellassign["cell_type"]:
            if celltype == "Monocyte.Macrophage":
                celltype = "Monocyte/Macrophage"
            else:
                celltype = celltype.replace(".", " ")
            celltypes.append(celltype)
        fit = dict(zip(cellassign["Barcode"], celltypes))
        x_coded = dict(zip(barcodes, rdims[0]))
        y_coded = dict(zip(barcodes, rdims[1]))
        coords = dict()
        for barcode, celltype in fit.items():
            try:
                x_val = int(x_coded[barcode])
                y_val = int(y_coded[barcode])
            except Exception as e:
                continue
            coords[barcode] = (x_val, y_val)
        patient_data[sampleid]["cellassign"] = fit
        patient_data[sampleid]["umap"] = coords
        output = open(".cache/runqc_{}.R".format(sampleid), "w")
        rdata = ".cache/{0}/{0}.rdata".format(sample_original)
        stats = ".cache/{0}_stats.tsv".format(sampleid)
        rcode = """
        library(SingleCellExperiment)
        rdata <- readRDS('{sce}')
        sce <- as(rdata, 'SingleCellExperiment')
        cells_to_keep <- sce$pct_counts_mito < as.numeric(20)
        table_cells_to_keep <- table(cells_to_keep)
        write.table(table_cells_to_keep, file='{stats}',sep="\t")
        """
        output.write(rcode.format(sce=rdata, stats=stats))
        output.close()
        subprocess.call(["Rscript", ".cache/runqc_{}.R".format(sampleid)])
        patient_data["statistics"] = get_statistics(sampleid, summary, metrics,
                                                    report, stats)
        patient_data["rho"] = GeneMarkerMatrix.read_yaml(
            config.rho_matrix).marker_list
        patient_data_str = json.dumps(patient_data)
        output = open("viz/{}.json".format(sampleid), "w")
        output.write(str(patient_data_str))
        output.close()
    shutil.copyfile("viz/{}.json".format(sampleid), report)