Example #1
0
def make_bcbiornaseq_object(data):
    """
    load the initial bcb.rda object using bcbioRNASeq
    """
    if "bcbiornaseq" not in dd.get_tools_on(data):
        return data
    upload_dir = tz.get_in(("upload", "dir"), data)
    report_dir = os.path.join(upload_dir, "bcbioRNASeq")
    safe_makedir(report_dir)
    organism = dd.get_bcbiornaseq(data).get("organism", None)
    groups = dd.get_bcbiornaseq(data).get("interesting_groups", None)
    loadstring = create_load_string(upload_dir, groups, organism, "gene")
    r_file = os.path.join(report_dir, "load_bcbioRNAseq.R")
    with file_transaction(r_file) as tmp_file:
        memoize_write_file(loadstring, tmp_file)
    rcmd = Rscript_cmd()
    with chdir(report_dir):
        do.run([rcmd, "--vanilla", r_file], "Loading bcbioRNASeq object.")
        write_counts(os.path.join(report_dir, "data", "bcb.rda"), "gene")
    loadstring = create_load_string(upload_dir, groups, organism, "transcript")
    r_file = os.path.join(report_dir, "load_transcript_bcbioRNAseq.R")
    with file_transaction(r_file) as tmp_file:
        memoize_write_file(loadstring, tmp_file)
    rcmd = Rscript_cmd()
    with chdir(report_dir):
        do.run([rcmd, "--vanilla", r_file],
               "Loading transcript-level bcbioRNASeq object.")
        write_counts(os.path.join(report_dir, "data-transcript", "bcb.rda"),
                     "transcript")
    make_quality_report(data)
    return data
Example #2
0
def rnaseq_vardict_variant_calling(data):
    sample = dd.get_sample_name(data)
    variation_dir = os.path.join(dd.get_work_dir(data), "variation")
    safe_makedir(variation_dir)
    out_file = os.path.join(variation_dir, sample + "-vardict.vcf.gz")
    if file_exists(out_file):
        data = dd.set_vrn_file(data, out_file)
        return data
    vardict_cmd = vardict.get_vardict_command(data)
    strandbias = "teststrandbias.R"
    var2vcf = "var2vcf_valid.pl"
    vcfstreamsort = config_utils.get_program("vcfstreamsort", data)
    compress_cmd = "| bgzip -c"
    freq = float(dd.get_min_allele_fraction(data, 20) / 100.0)
    var2vcf_opts = "-v 50"
    fix_ambig = vcfutils.fix_ambiguous_cl()
    remove_dup = vcfutils.remove_dup_cl()
    r_setup = ("unset R_HOME && export PATH=%s:$PATH && "
                % os.path.dirname(Rscript_cmd()))
    ref_file = dd.get_ref_file(data)
    bamfile = dd.get_work_bam(data)
    bed_file = gtf.gtf_to_bed(dd.get_gtf_file(data))
    opts = " -c 1 -S 2 -E 3 -g 4 "
    with file_transaction(out_file) as tx_out_file:
        jvm_opts = vardict._get_jvm_opts(data, tx_out_file)
        cmd = ("{r_setup}{jvm_opts}{vardict_cmd} -G {ref_file} -f {freq} "
                "-N {sample} -b {bamfile} {opts} {bed_file} "
                "| {strandbias}"
                "| {var2vcf} -N {sample} -E -f {freq} {var2vcf_opts} "
                "| {fix_ambig} | {remove_dup} | {vcfstreamsort} {compress_cmd} "
                "> {tx_out_file}")
        message = "Calling RNA-seq variants with VarDict"
        do.run(cmd.format(**locals()), message)
    data = dd.set_vrn_file(data, out_file)
    return data
Example #3
0
def write_counts(bcb, level="gene"):
    """
    pull counts and metadata out of the bcbioRNASeq object
    """
    date = dt.strftime(dt.now(), "%Y-%m-%d")
    out_dir = os.path.join(os.path.dirname(bcb), "..", "results", date, level,
                           "counts")
    out_dir_string = _quotestring(out_dir)
    out_file = os.path.join(out_dir, "counts.csv.gz")
    safe_makedir(out_dir)
    if file_exists(out_file):
        return out_file
    bcb_string = _quotestring(bcb)
    rcmd = Rscript_cmd()
    render_string = (
        f'load({bcb_string});'
        f'date=format(Sys.time(), "%Y-%m-%d");'
        f'dir={out_dir_string};'
        f'library(tidyverse);'
        f'library(bcbioRNASeq);'
        f'counts = bcbioRNASeq::counts(bcb) %>% as.data.frame() %>% round() %>% tibble::rownames_to_column("gene");'
        f'metadata = colData(bcb) %>% as.data.frame() %>% tibble::rownames_to_column("sample");'
        f'readr::write_csv(counts, file.path(dir, "counts.csv.gz"));'
        f'readr::write_csv(metadata, file.path(dir, "metadata.csv.gz"));')
    do.run([rcmd, "--vanilla", "-e", render_string],
           f"Writing counts table to {out_file}.")
    return out_file
Example #4
0
def load_tximport(data):
    rcmd = Rscript_cmd()
    salmon_dir = os.path.join(dd.get_work_dir(data), "salmon")
    tx2gene_file = os.path.join(dd.get_work_dir(data), "inputs", "transcriptome", "tx2gene.csv")
    out_dir = os.path.join(salmon_dir, "combined")
    safe_makedir(out_dir)
    tpm_file = os.path.join(out_dir, "tximport-tpm.csv")
    counts_file = os.path.join(out_dir, "tximport-counts.csv")
    if file_exists(tpm_file) and file_exists(counts_file):
        return {"gene_tpm": tpm_file,
                "gene_counts": counts_file}
    with file_transaction(tpm_file) as tx_tpm_file, file_transaction(counts_file) as tx_counts_file:
        render_string = (
            f'library(tidyverse);'
            f'salmon_files = list.files("{salmon_dir}", pattern="quant.sf", recursive=TRUE, full.names=TRUE);'
            f'tx2gene = readr::read_csv("{tx2gene_file}", col_names=c("transcript", "gene")); '
            f'samples = basename(dirname(salmon_files));'
            f'names(salmon_files) = samples;'
            f'txi = tximport::tximport(salmon_files, type="salmon", tx2gene=tx2gene, countsFromAbundance="lengthScaledTPM", dropInfReps=TRUE);'
            f'readr::write_csv(round(txi$counts) %>% as.data.frame() %>% tibble::rownames_to_column("gene"), "{tx_counts_file}");'
            f'readr::write_csv(txi$abundance %>% as.data.frame() %>% tibble::rownames_to_column("gene"), "{tx_tpm_file}");'
        )
        do.run([rcmd, "--vanilla", "-e", render_string], f"Loading tximport.")
    return {"gene_tpm": tpm_file,
            "gene_counts": counts_file}
Example #5
0
def make_scrnaseq_object(samples):
    """
    load the initial se.rda object using sinclecell-experiment
    """
    local_sitelib = R_sitelib()
    counts_dir = os.path.dirname(
        dd.get_in_samples(samples, dd.get_combined_counts))
    gtf_file = dd.get_in_samples(samples, dd.get_transcriptome_gtf)
    if not gtf_file:
        gtf_file = dd.get_in_samples(samples, dd.get_gtf_file)
    rda_file = os.path.join(counts_dir, "se.rda")
    if not file_exists(rda_file):
        with file_transaction(rda_file) as tx_out_file:
            rcode = "%s-run.R" % os.path.splitext(rda_file)[0]
            rrna_file = "%s-rrna.txt" % os.path.splitext(rda_file)[0]
            rrna_file = _find_rRNA_genes(gtf_file, rrna_file)
            with open(rcode, "w") as out_handle:
                out_handle.write(_script.format(**locals()))
            rscript = Rscript_cmd()
            try:
                # do.run([rscript, "--vanilla", rcode],
                #        "SingleCellExperiment",
                #        log_error=False)
                rda_file = rcode
            except subprocess.CalledProcessError as msg:
                logger.exception()
Example #6
0
def load_summarizedexperiment(samples):
    """ create summarizedexperiment rds object
    fails with n_samples = 1 """
    # using r36 (4.0) - will eventually drop R3.5
    rcmd = Rscript_cmd("r36")
    se_script = os.path.join(os.path.dirname(__file__), os.pardir, "scripts",
                             "R", "bcbio2se.R")
    data = samples[0][0]
    work_dir = dd.get_work_dir(data)
    out_dir = os.path.join(work_dir, "salmon")
    summarized_experiment = os.path.join(out_dir, "bcbio-se.rds")
    if not file_exists(summarized_experiment):
        with file_transaction(summarized_experiment) as tx_out_file:
            cmd = f"{rcmd} --vanilla {se_script} {work_dir} {tx_out_file}"
            message = f"Loading SummarizedExperiment."
            try:
                do.run(cmd, message)
            except Exception:
                logger.error("SE creation failed")
    if file_exists(summarized_experiment):
        try:
            se_qc_report = generate_se_qc_report(work_dir)
        except Exception:
            se_qc_report = None
            logger.error("SE QC failed")
        updated_samples = []
        for data in dd.sample_data_iterator(samples):
            data = dd.set_summarized_experiment(data, summarized_experiment)
            updated_samples.append([data])
        return updated_samples
    else:
        return samples
Example #7
0
def _sleuthify_sailfish(sailfish_dir):
    """
    if installed, use wasabi to create abundance.h5 output for use with
    sleuth
    """
    if not R_package_path("wasabi"):
        return None
    else:
        rscript = Rscript_cmd()
        cmd = """{rscript} -e 'library("wasabi"); prepare_fish_for_sleuth(c("{sailfish_dir}"))'"""
        do.run(cmd.format(**locals()), "Converting Sailfish to Sleuth format.")
    return os.path.join(sailfish_dir, "abundance.h5")
Example #8
0
def render_rmarkdown_file(filename):
    """
    render a rmarkdown file using the rmarkdown library
    """
    render_template = Template('rmarkdown::render("$filename")')
    render_string = render_template.substitute(filename=filename)
    report_dir = os.path.dirname(filename)
    rcmd = Rscript_cmd()
    with chdir(report_dir):
        do.run([rcmd, "--no-environ", "-e", render_string],
               "Rendering bcbioRNASeq quality control report.")
    return filename
Example #9
0
def load_summarizedexperiment(data):
    rcmd = Rscript_cmd()
    se_script = os.path.join(os.path.dirname(__file__), os.pardir, "scripts",
                             "R", "bcbio2se.R")
    work_dir = dd.get_work_dir(data)
    out_dir = os.path.join(work_dir, "salmon")
    out_file = os.path.join(out_dir, "bcbio-se.rds")
    if file_exists(out_file):
        return out_file
    with file_transaction(out_file) as tx_out_file:
        cmd = f"{rcmd} --vanilla {se_script} {work_dir} {tx_out_file}"
        message = f"Loading SummarizedExperiment."
        do.run(cmd, message)
    return out_file
def make_bcbiornaseq_object(data):
    if "bcbiornaseq" not in dd.get_tools_on(data):
        return data
    upload_dir = tz.get_in(("upload", "dir"), data)
    report_dir = os.path.join(upload_dir, "bcbioRNASeq")
    safe_makedir(report_dir)
    organism = dd.get_bcbiornaseq(data).get("organism", None)
    groups = dd.get_bcbiornaseq(data).get("interesting_groups", None)
    loadstring = create_load_string(upload_dir, groups, organism)
    r_file = os.path.join(report_dir, "load_bcbioRNAseq.R")
    with file_transaction(r_file) as tmp_file:
        write_load_bcbiornaseq_file(loadstring, tmp_file)
    rcmd = Rscript_cmd()
    with chdir(report_dir):
        do.run([rcmd, r_file], "Loading bcbioRNASeq object.")
    return data
Example #11
0
def generate_se_qc_report(work_dir):
    """ generate QC report based on SE RDS object"""
    rcmd = Rscript_cmd("r36")
    qc_script = os.path.join(os.path.dirname(__file__), os.pardir, "scripts",
                             "R", "se2qc.Rmd")
    out_file = os.path.join(work_dir, "qc", "bcbio-se.html")
    rds_file = os.path.join(work_dir, "salmon", "bcbio-se.rds")
    if file_exists(out_file):
        return out_file
    with file_transaction(out_file) as tx_out_file:
        cmd = (
            f"""{rcmd} --vanilla """
            f"""-e 'rmarkdown::render("{qc_script}", params = list(rds_file="{rds_file}"), output_file="{tx_out_file}")'"""
        )
        message = f"Creating SE QC report"
        do.run(cmd, message)
    return out_file
Example #12
0
def rmarkdown_draft(filename, template, package):
    """
    create a draft rmarkdown file from an installed template
    """
    if file_exists(filename):
        return filename
    draft_template = Template(
        'rmarkdown::draft("$filename", template="$template", package="$package", edit=FALSE)'
    )
    draft_string = draft_template.substitute(
        filename=filename, template=template, package=package)
    report_dir = os.path.dirname(filename)
    rcmd = Rscript_cmd()
    with chdir(report_dir):
        do.run([rcmd, "--no-environ", "-e", draft_string], "Creating bcbioRNASeq quality control template.")
        do.run(["sed", "-i", "s/YYYY-MM-DD\///g", filename], "Editing bcbioRNAseq quality control template.")
    return filename