Esempio n. 1
0
def RunCellAssign(sce, annot_sce):
    _rho_csv = os.path.join(os.path.split(sce)[0], "rho_csv_sub.csv")
    _fit = os.path.join(os.path.split(sce)[0], "fit_sub.pkl")
    sampleid = sce.split("/")[-2]
    CellAssign.run(sce, config.rho_matrix, _fit, rho_csv=_rho_csv)
    filtered_sce = os.path.join(os.path.split(sce)[0], "sce_cas.rdata")
    shutil.copyfile(filtered_sce, annot_sce)
Esempio n. 2
0
 def test_cell_assign_em(self):
     example_rda = os.path.join(base_dir, "tests/cell_assign_test.RData")
     sce = SingleCellExperiment.fromRData(example_rda)
     cellassigner = CellAssign()
     rho = GeneMarkerMatrix(genes=[
         "Gene161", "Gene447", "Gene519", "Gene609", "Gene677", "Gene750",
         "Gene754", "Gene860", "Gene929", "Gene979"
     ],
                            cells=["Groups1", "Groups2"])
     res = cellassigner.run_em(sce, rho)
Esempio n. 3
0
def Run(sampleid, before, finished):
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    analysis_path = tenx.tenx_path
    tenx_analysis = TenxAnalysis(analysis_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    CellAssign.run(qc.sce, config.rho_matrix,
                   ".cache/{}/celltypes.rdata".format(sampleid))
    open(finished, "w").write("Completed")
Esempio n. 4
0
 def test_cell_assign_pkl(self):
     import pickle
     import collections
     tenx = TenxAnalysis("tests/pre_igo")
     sce = TenX.read10xCounts(tenx)
     handle = open("tests/rho_up.pkl","rb")
     rho_matrix = pickle.load(handle)
     handle.close()
     rho = GeneMarkerMatrix(rho_matrix)
     cellassigner = CellAssign()
     res = cellassigner.run_em(sce, rho)
Esempio n. 5
0
 def test_cell_assign_em(self):
     #example_rda = os.path.join(base_dir, "tests/cell_assign_test.RData")
     print("Init CellEM")
     sce = "sce_final.rdata"
     # sce = SingleCellExperiment.fromRData(example_rda)
     cellassigner = CellAssign()
     # rho_matrix = dict()
     # rho_matrix["Group1"] = ["Gene161", "Gene447", "Gene609", "Gene754", "Gene860", "Gene929", "Gene979"]
     # rho_matrix["Group2"] = ["Gene161", "Gene447", "Gene609", "Gene754", "Gene860", "Gene929", "Gene979","Gene101","Gene212","Gene400"]
     # rho = GeneMarkerMatrix(rho_matrix)
     res = cellassigner.run_em(sce, "cell_assign_fit.rdata", "test")
Esempio n. 6
0
def RunCellAssign(custom_output, sce, annot_sce):
    sample = json.loads(open(custom_output,"r").read())
    sampleid, path = list(sample.items()).pop()
    temp = os.path.split(annot_sce)[0]
    _rho_csv = os.path.join(os.path.split(sce)[0],"rho_csv_sub.csv")
    _fit = os.path.join(os.path.split(sce)[0],"fit_sub.pkl")
    _filtered_sce = os.path.join(os.path.split(sce)[0],"sce_cas.rdata")
    filtered_sce = os.path.join(config.jobpath,"results","sce_cas_{}.rdata".format(sampleid))
    if not os.path.exists(filtered_sce):
        rho = os.path.join(config.jobpath,config.rho_matrix)
        CellAssign.run(sce, rho, _fit, rho_csv=_rho_csv,lsf=True)
        shutil.copyfile(_filtered_sce, filtered_sce)
    shutil.copyfile(filtered_sce, annot_sce)
Esempio n. 7
0
def RunHRD(custom_input, sce, rdata, umap):
    sample = json.loads(open(custom_input, "r").read())
    sampleid, path = list(sample.items()).pop()
    rdata_cached = os.path.join(config.jobpath, "results",
                                "hrd_{}.rdata".format(sampleid))
    umap_cached = os.path.join(config.jobpath, "results",
                               "hrd_umap_{}.png".format(sampleid))

    temp = os.path.split(sce)[0]
    rho_csv = os.path.join(os.path.split(sce)[0], "hrd_rho_csv.csv")
    fit = os.path.join(os.path.split(sce)[0], "fit_sub_hrd.pkl")
    filtered_sce = os.path.join(os.path.split(sce)[0], "hrd_sce_cas.rdata")

    if not os.path.exists(rdata_cached):
        rho = "/codebase/hrd.yaml"
        CellAssign.run(sce,
                       rho,
                       fit,
                       rho_csv=rho_csv,
                       lsf=True,
                       script_prefix="hrd_")
        shutil.copyfile(filtered_sce, rdata_cached)

    rcode = """
    library(Seurat)
    library(ggplot2)

    fit <- readRDS("{fit}")
    sce <- readRDS("{sce}")
    sce$repairtype <- fit$cell_type
    sce$HRD_prob <- fit$mle_params$gamma[,"HRD"]
    sce$HR_prob <- fit$mle_params$gamma[,"HR"]
    seurat <- as.Seurat(sce, counts = "counts", data = "logcounts")
    seurat$repairtype <- sce$repairtype

    png("{umap}", width=1000, height=1000)
    DimPlot(object = seurat, reduction = "UMAP", group.by = "repairtype", dark.theme=TRUE, plot.title="{sample}")
    dev.off()
    saveRDS(sce, file="{sce}")
    """
    qc_script = os.path.join(temp, "hrd_viz_{}.R".format(sampleid))
    output = open(qc_script, "w")
    output.write(
        rcode.format(sce=rdata_cached,
                     umap=umap_cached,
                     sample=sampleid,
                     fit=fit))
    output.close()
    subprocess.call(["Rscript", "{}".format(qc_script)])
    shutil.copyfile(rdata_cached, rdata)
    shutil.copyfile(umap_cached, umap)
Esempio n. 8
0
def RunCellAssign(sce, annot_sce):
    filtered_sce = os.path.join(os.path.split(sce)[0], "sce_cas.rdata")
    _rho_csv = os.path.join(os.path.split(sce)[0], "rho_csv_sub.csv")
    _fit = os.path.join(os.path.split(sce)[0], "fit.rdata")
    sampleid = sce.split("/")[-2]
    if not os.path.exists(filtered_sce):
        CellAssign.run(sce,
                       config.rho_matrix,
                       _fit,
                       rho_csv=_rho_csv,
                       lsf=False)
    shutil.copyfile(filtered_sce, annot_sce)
    path = os.getcwd()
    shutil.copyfile(_fit, os.path.join(path, "fit.rdata"))
Esempio n. 9
0
def RunCellAssign(sce, annot_sce, cellfit):
    _rho_csv = os.path.join(os.path.split(sce)[0], "rho_csv_sub.csv")
    _fit = os.path.join(os.path.split(sce)[0], "fit_sub.pkl")
    sampleid = sce.split("/")[-2]
    filtered_sce = os.path.join(os.path.split(sce)[0], "sce_cas.rdata")
    if not os.path.exists(filtered_sce) or not os.path.exists(_fit):
        if "CD45N" in sampleid:
            rho = config.negative_rho_matrix
        elif "CD45P" in sampleid:
            rho = config.positive_rho_matrix
        else:
            rho = config.rho_matrix
        CellAssign.run(sce, rho, _fit, rho_csv=_rho_csv)
    shutil.copyfile(filtered_sce, annot_sce)
    shutil.copyfile(_fit.replace("fit_sub", "cell_types"), cellfit)
Esempio n. 10
0
def RunExhaustion(custom_input, sce, rdata, umap):
    sample = json.loads(open(custom_input, "r").read())
    sampleid, path = list(sample.items()).pop()
    rdata_cached = os.path.join(config.jobpath, "results",
                                "exhaustion_{}.rdata".format(sampleid))
    umap_cached = os.path.join(config.jobpath, "results",
                               "exhaustion_umap_{}.png".format(sampleid))

    temp = os.path.split(sce)[0]
    _rho_csv = os.path.join(os.path.split(sce)[0], "exhaustion_rho_csv.csv")
    _fit = os.path.join(os.path.split(sce)[0], "fit_sub_exhaustion.pkl")
    _filtered_sce = os.path.join(
        os.path.split(sce)[0], "exhaustion_sce_cas.rdata")

    if not os.path.exists(rdata_cached):
        rho = "/codebase/exhaustion.yaml"
        CellAssign.run(sce,
                       rho,
                       _fit,
                       rho_csv=_rho_csv,
                       lsf=True,
                       script_prefix="exhaustion_")
        shutil.copyfile(_filtered_sce, rdata_cached)
    shutil.copyfile(rdata_cached, rdata)
    rcode = """
    library(Seurat)
    library(ggplot2)
    sce <- readRDS("{sce}")
    fit <- readRDS("{fit}")

    sce$Exhaustion_prob <- fit$mle_params$gamma[,"Exhausted.T.cell"]

    seurat <- as.Seurat(sce, counts = "counts", data = "logcounts")
    seurat$Exhaustion_prob <- sce$Exhaustion_prob
    png("{umap}", width=1000, height=1000)
    FeaturePlot(object = seurat, reduction = "UMAP", features=c("Exhaustion_prob"))
    dev.off()
    """
    qc_script = os.path.join(temp, "exhaustion_viz_{}.R".format(sampleid))
    output = open(qc_script, "w")
    output.write(
        rcode.format(sce=rdata, umap=umap_cached, sample=sampleid, fit=_fit))
    output.close()
    subprocess.call(["Rscript", "{}".format(qc_script)])
    shutil.copyfile(umap_cached, umap)
Esempio n. 11
0
def Run(sampleid, before, finished, use_corrected=False):
    if use_corrected and os.path.exists(".cache/corrected/"):
        sce = ".cache/corrected/corrected_sce.rdata"
        if not os.path.exists(sce):
            utils = DropletUtils()
            utils.read10xCounts(".cache/corrected/",
                                ".cache/corrected/corrected_sce.rdata")
    else:
        tenx = TenxDataStorage(sampleid, version="v3")
        tenx.download()
        analysis_path = tenx.tenx_path
        tenx_analysis = TenxAnalysis(analysis_path)
        tenx_analysis.load()
        tenx_analysis.extract()
        qc = QualityControl(tenx_analysis, sampleid)
        sce = qc.sce
    if not os.path.exists(".cache/{}/celltypes.rdata".format(sampleid)):
        CellAssign.run(sce, config.rho_matrix,
                       ".cache/{}/celltypes.rdata".format(sampleid))
    open(finished, "w").write("Completed")
Esempio n. 12
0
def Run(sampleid, raw_sce, sce_cas, rho, B, min_delta):
    output = os.path.join(os.path.split(raw_sce)[0], "sce_cas.rdata")
    celltypes = os.path.join(os.path.split(raw_sce)[0], "celltypes.rdata")
    if not os.path.exists(celltypes):
        CellAssign.run(raw_sce, rho, celltypes, B, min_delta)
    shutil.copyfile(output, sce_cas)