コード例 #1
0
def _run_qc_tools(bam_file, data):
    """Run a set of third party quality control tools, returning QC directory and metrics.
    """
    metrics = {}
    to_run = [("fastqc", _run_fastqc)]
    if data["analysis"].lower().startswith("rna-seq"):
        to_run.append(("rnaseqc", bcbio.rnaseq.qc.sample_summary))
#        to_run.append(("coverage", _run_gene_coverage))
        to_run.append(("complexity", _run_complexity))
    elif data["analysis"].lower().startswith("chip-seq"):
        to_run.append(["bamtools", _run_bamtools_stats])
    else:
        to_run += [("bamtools", _run_bamtools_stats), ("gemini", _run_gemini_stats)]
    qc_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "qc", data["description"]))
    metrics = {}
    for program_name, qc_fn in to_run:
        cur_qc_dir = os.path.join(qc_dir, program_name)
        cur_metrics = qc_fn(bam_file, data, cur_qc_dir)
        metrics.update(cur_metrics)
    ratio = bam.get_aligned_reads(bam_file,data)
    if ratio < 0.60 and data['config']["algorithm"].get("kraken", False) and data["analysis"].lower() == "rna-seq":
        cur_metrics =_run_kraken(data, ratio)
        metrics.update(cur_metrics)
    metrics["Name"] = data["name"][-1]
    metrics["Quality format"] = utils.get_in(data,
                                             ("config", "algorithm",
                                              "quality_format"),
                                             "standard").lower()
    return {"qc": qc_dir, "metrics": metrics}
コード例 #2
0
def _run_qc_tools(bam_file, data):
    """Run a set of third party quality control tools, returning QC directory and metrics.

        :param bam_file: alignments in bam format
        :param data: dict with all configuration information

        :returns: dict with output of different tools
    """
    metrics = {}
    to_run = []
    if "fastqc" not in tz.get_in(("config", "algorithm", "tools_off"), data,
                                 []):
        to_run.append(("fastqc", _run_fastqc))
    if data["analysis"].lower().startswith("rna-seq"):
        # to_run.append(("rnaseqc", bcbio.rnaseq.qc.sample_summary))
        # to_run.append(("coverage", _run_gene_coverage))
        # to_run.append(("complexity", _run_complexity))
        to_run.append(("qualimap", _rnaseq_qualimap))
    elif data["analysis"].lower().startswith("chip-seq"):
        to_run.append(["bamtools", _run_bamtools_stats])
    elif not data["analysis"].lower().startswith("smallrna-seq"):
        to_run += [("bamtools", _run_bamtools_stats),
                   ("gemini", _run_gemini_stats)]
    if data["analysis"].lower().startswith(("standard", "variant2")):
        to_run.append(["qsignature", _run_qsignature_generator])
        if "qualimap" in tz.get_in(("config", "algorithm", "tools_on"), data,
                                   []):
            to_run.append(("qualimap", _run_qualimap))
    qc_dir = utils.safe_makedir(
        os.path.join(data["dirs"]["work"], "qc", data["description"]))
    metrics = {}
    for program_name, qc_fn in to_run:
        cur_qc_dir = os.path.join(qc_dir, program_name)
        cur_metrics = qc_fn(bam_file, data, cur_qc_dir)
        metrics.update(cur_metrics)
    # if (ratio < 0.60 and data['config']["algorithm"].get("kraken", None) and
    # (data["analysis"].lower().startswith("rna-seq") or
    #  data["analysis"].lower().startswith("standard"))):
    if data['config']["algorithm"].get("kraken", None):
        ratio = bam.get_aligned_reads(bam_file, data)
        cur_metrics = _run_kraken(data, ratio)
        metrics.update(cur_metrics)

    bam.remove("%s-downsample%s" % os.path.splitext(bam_file))

    metrics["Name"] = data["name"][-1]
    metrics["Quality format"] = utils.get_in(
        data, ("config", "algorithm", "quality_format"), "standard").lower()
    return {"qc": qc_dir, "metrics": metrics}
コード例 #3
0
def _run_qc_tools(bam_file, data):
    """Run a set of third party quality control tools, returning QC directory and metrics.

        :param bam_file: alignments in bam format
        :param data: dict with all configuration information

        :returns: dict with output of different tools
    """
    metrics = {}
    to_run = []
    if "fastqc" not in tz.get_in(("config", "algorithm", "tools_off"), data, []):
        to_run.append(("fastqc", _run_fastqc))
    if data["analysis"].lower().startswith("rna-seq"):
        # to_run.append(("rnaseqc", bcbio.rnaseq.qc.sample_summary))
        # to_run.append(("coverage", _run_gene_coverage))
        # to_run.append(("complexity", _run_complexity))
        to_run.append(("qualimap", _rnaseq_qualimap))
    elif data["analysis"].lower().startswith("chip-seq"):
        to_run.append(["bamtools", _run_bamtools_stats])
    elif not data["analysis"].lower().startswith("smallrna-seq"):
        to_run += [("bamtools", _run_bamtools_stats), ("gemini", _run_gemini_stats)]
    if data["analysis"].lower().startswith(("standard", "variant2")):
        to_run.append(["qsignature", _run_qsignature_generator])
        if "qualimap" in tz.get_in(("config", "algorithm", "tools_on"), data, []):
            to_run.append(("qualimap", _run_qualimap))
    qc_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "qc", data["description"]))
    metrics = {}
    for program_name, qc_fn in to_run:
        cur_qc_dir = os.path.join(qc_dir, program_name)
        cur_metrics = qc_fn(bam_file, data, cur_qc_dir)
        metrics.update(cur_metrics)
    # if (ratio < 0.60 and data['config']["algorithm"].get("kraken", None) and
        # (data["analysis"].lower().startswith("rna-seq") or
        #  data["analysis"].lower().startswith("standard"))):
    if data['config']["algorithm"].get("kraken", None):
        ratio = bam.get_aligned_reads(bam_file, data)
        cur_metrics = _run_kraken(data, ratio)
        metrics.update(cur_metrics)

    bam.remove("%s-downsample%s" % os.path.splitext(bam_file))

    metrics["Name"] = data["name"][-1]
    metrics["Quality format"] = utils.get_in(data,
                                             ("config", "algorithm",
                                              "quality_format"),
                                             "standard").lower()
    return {"qc": qc_dir, "metrics": metrics}
コード例 #4
0
def run(bam_file, data, out_dir):
    """Run kraken, generating report in specified directory and parsing metrics.
       Using only first paired reads.
    """
    # logger.info("Number of aligned reads < than 0.60 in %s: %s" % (dd.get_sample_name(data), ratio))
    logger.info("Running kraken to determine contaminant: %s" %
                dd.get_sample_name(data))
    ratio = bam.get_aligned_reads(bam_file, data)
    out = out_stats = None
    db = data['config']["algorithm"]["kraken"]
    kraken_cmd = config_utils.get_program("kraken", data["config"])
    if db == "minikraken":
        db = os.path.join(install._get_data_dir(), "genomes", "kraken",
                          "minikraken")

    if not os.path.exists(db):
        logger.info("kraken: no database found %s, skipping" % db)
        return {"kraken_report": "null"}

    if not os.path.exists(os.path.join(out_dir, "kraken_out")):
        work_dir = os.path.dirname(out_dir)
        utils.safe_makedir(work_dir)
        num_cores = data["config"]["algorithm"].get("num_cores", 1)
        fn_file = data["files_orig"][0] if dd.get_save_diskspace(
            data) else data["files"][0]
        if fn_file.endswith("bam"):
            logger.info("kraken: need fasta files as input")
            return {"kraken_report": "null"}
        with tx_tmpdir(data) as tx_tmp_dir:
            with utils.chdir(tx_tmp_dir):
                out = os.path.join(tx_tmp_dir, "kraken_out")
                out_stats = os.path.join(tx_tmp_dir, "kraken_stats")
                cat = "zcat" if fn_file.endswith(".gz") else "cat"
                cl = ("{cat} {fn_file} | {kraken_cmd} --db {db} --quick "
                      "--preload --min-hits 2 "
                      "--threads {num_cores} "
                      "--out {out} --fastq-input /dev/stdin  2> {out_stats}"
                      ).format(**locals())
                do.run(cl, "kraken: %s" % dd.get_sample_name(data))
                if os.path.exists(out_dir):
                    shutil.rmtree(out_dir)
                shutil.move(tx_tmp_dir, out_dir)
    metrics = _parse_kraken_output(out_dir, db, data)
    return metrics
コード例 #5
0
ファイル: kraken.py プロジェクト: DoaneAS/bcbio-nextgen
def run(bam_file, data, out_dir):
    """Run kraken, generating report in specified directory and parsing metrics.
       Using only first paired reads.
    """
    # logger.info("Number of aligned reads < than 0.60 in %s: %s" % (dd.get_sample_name(data), ratio))
    logger.info("Running kraken to determine contaminant: %s" % dd.get_sample_name(data))
    ratio = bam.get_aligned_reads(bam_file, data)
    out = out_stats = None
    db = data['config']["algorithm"]["kraken"]
    kraken_cmd = config_utils.get_program("kraken", data["config"])
    if db == "minikraken":
        db = os.path.join(install._get_data_dir(), "genomes", "kraken", "minikraken")

    if not os.path.exists(db):
        logger.info("kraken: no database found %s, skipping" % db)
        return {"kraken_report": "null"}

    if not os.path.exists(os.path.join(out_dir, "kraken_out")):
        work_dir = os.path.dirname(out_dir)
        utils.safe_makedir(work_dir)
        num_cores = data["config"]["algorithm"].get("num_cores", 1)
        fn_file = data["files_orig"][0] if dd.get_save_diskspace(data) else data["files"][0]
        if fn_file.endswith("bam"):
            logger.info("kraken: need fastq files as input")
            return {"kraken_report": "null"}
        with tx_tmpdir(data) as tx_tmp_dir:
            with utils.chdir(tx_tmp_dir):
                out = os.path.join(tx_tmp_dir, "kraken_out")
                out_stats = os.path.join(tx_tmp_dir, "kraken_stats")
                cat = "zcat" if fn_file.endswith(".gz") else "cat"
                cl = ("{cat} {fn_file} | {kraken_cmd} --db {db} --quick "
                      "--preload --min-hits 2 "
                      "--threads {num_cores} "
                      "--out {out} --fastq-input /dev/stdin  2> {out_stats}").format(**locals())
                do.run(cl, "kraken: %s" % dd.get_sample_name(data))
                if os.path.exists(out_dir):
                    shutil.rmtree(out_dir)
                shutil.move(tx_tmp_dir, out_dir)
    metrics = _parse_kraken_output(out_dir, db, data)
    return metrics