Beispiel #1
0
def _run_qc_tools(bam_file, data):
    """Run a set of third party quality control tools, returning QC directory and metrics.

        :param bam_file: alignments in bam format
        :param data: dict with all configuration information

        :returns: dict with output of different tools
    """
    metrics = {}
    to_run = []
    if "fastqc" not in tz.get_in(("config", "algorithm", "tools_off"), data,
                                 []):
        to_run.append(("fastqc", _run_fastqc))
    if data["analysis"].lower().startswith("rna-seq"):
        # to_run.append(("rnaseqc", bcbio.rnaseq.qc.sample_summary))
        # to_run.append(("coverage", _run_gene_coverage))
        # to_run.append(("complexity", _run_complexity))
        to_run.append(("qualimap", _rnaseq_qualimap))
    elif data["analysis"].lower().startswith("chip-seq"):
        to_run.append(["bamtools", _run_bamtools_stats])
    elif not data["analysis"].lower().startswith("smallrna-seq"):
        to_run += [("bamtools", _run_bamtools_stats),
                   ("gemini", _run_gemini_stats)]
    if data["analysis"].lower().startswith(("standard", "variant2")):
        to_run.append(["qsignature", _run_qsignature_generator])
        if "qualimap" in tz.get_in(("config", "algorithm", "tools_on"), data,
                                   []):
            to_run.append(("qualimap", _run_qualimap))
    qc_dir = utils.safe_makedir(
        os.path.join(data["dirs"]["work"], "qc", data["description"]))
    metrics = {}
    for program_name, qc_fn in to_run:
        cur_qc_dir = os.path.join(qc_dir, program_name)
        cur_metrics = qc_fn(bam_file, data, cur_qc_dir)
        metrics.update(cur_metrics)
    # if (ratio < 0.60 and data['config']["algorithm"].get("kraken", None) and
    # (data["analysis"].lower().startswith("rna-seq") or
    #  data["analysis"].lower().startswith("standard"))):
    if data['config']["algorithm"].get("kraken", None):
        ratio = bam.get_aligned_reads(bam_file, data)
        cur_metrics = _run_kraken(data, ratio)
        metrics.update(cur_metrics)

    bam.remove("%s-downsample%s" % os.path.splitext(bam_file))

    metrics["Name"] = data["name"][-1]
    metrics["Quality format"] = utils.get_in(
        data, ("config", "algorithm", "quality_format"), "standard").lower()
    return {"qc": qc_dir, "metrics": metrics}
def _run_qc_tools(bam_file, data):
    """Run a set of third party quality control tools, returning QC directory and metrics.

        :param bam_file: alignments in bam format
        :param data: dict with all configuration information

        :returns: dict with output of different tools
    """
    metrics = {}
    to_run = []
    if "fastqc" not in tz.get_in(("config", "algorithm", "tools_off"), data, []):
        to_run.append(("fastqc", _run_fastqc))
    if data["analysis"].lower().startswith("rna-seq"):
        # to_run.append(("rnaseqc", bcbio.rnaseq.qc.sample_summary))
        # to_run.append(("coverage", _run_gene_coverage))
        # to_run.append(("complexity", _run_complexity))
        to_run.append(("qualimap", _rnaseq_qualimap))
    elif data["analysis"].lower().startswith("chip-seq"):
        to_run.append(["bamtools", _run_bamtools_stats])
    elif not data["analysis"].lower().startswith("smallrna-seq"):
        to_run += [("bamtools", _run_bamtools_stats), ("gemini", _run_gemini_stats)]
    if data["analysis"].lower().startswith(("standard", "variant2")):
        to_run.append(["qsignature", _run_qsignature_generator])
        if "qualimap" in tz.get_in(("config", "algorithm", "tools_on"), data, []):
            to_run.append(("qualimap", _run_qualimap))
    qc_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "qc", data["description"]))
    metrics = {}
    for program_name, qc_fn in to_run:
        cur_qc_dir = os.path.join(qc_dir, program_name)
        cur_metrics = qc_fn(bam_file, data, cur_qc_dir)
        metrics.update(cur_metrics)
    # if (ratio < 0.60 and data['config']["algorithm"].get("kraken", None) and
        # (data["analysis"].lower().startswith("rna-seq") or
        #  data["analysis"].lower().startswith("standard"))):
    if data['config']["algorithm"].get("kraken", None):
        ratio = bam.get_aligned_reads(bam_file, data)
        cur_metrics = _run_kraken(data, ratio)
        metrics.update(cur_metrics)

    bam.remove("%s-downsample%s" % os.path.splitext(bam_file))

    metrics["Name"] = data["name"][-1]
    metrics["Quality format"] = utils.get_in(data,
                                             ("config", "algorithm",
                                              "quality_format"),
                                             "standard").lower()
    return {"qc": qc_dir, "metrics": metrics}
Beispiel #3
0
def _run_qc_tools(bam_file, data):
    """Run a set of third party quality control tools, returning QC directory and metrics.

        :param bam_file: alignments in bam format
        :param data: dict with all configuration information

        :returns: dict with output of different tools
    """
    from bcbio.qc import fastqc, gemini, kraken, qsignature, qualimap, samtools, picard, srna, umi
    tools = {
        "fastqc": fastqc.run,
        "small-rna": srna.run,
        "samtools": samtools.run,
        "qualimap": qualimap.run,
        "qualimap_rnaseq": qualimap.run_rnaseq,
        "gemini": gemini.run,
        "qsignature": qsignature.run,
        "coverage": _run_coverage_qc,
        "variants": _run_variants_qc,
        "kraken": kraken.run,
        "picard": picard.run,
        "umi": umi.run
    }
    qc_dir = utils.safe_makedir(
        os.path.join(data["dirs"]["work"], "qc", data["description"]))
    metrics = {}
    qc_out = {}
    for program_name in tz.get_in(["config", "algorithm", "qc"], data):
        qc_fn = tools[program_name]
        cur_qc_dir = os.path.join(qc_dir, program_name)
        out = qc_fn(bam_file, data, cur_qc_dir)
        qc_files = None
        if out and isinstance(out, dict):
            metrics.update(out)
        elif out and isinstance(out, basestring) and os.path.exists(out):
            qc_files = {"base": out, "secondary": []}
        if not qc_files:
            qc_files = _organize_qc_files(program_name, cur_qc_dir)
        if qc_files:
            qc_out[program_name] = qc_files

    bam.remove("%s-downsample%s" % os.path.splitext(bam_file))

    metrics["Name"] = dd.get_sample_name(data)
    metrics["Quality format"] = dd.get_quality_format(data).lower()
    return {"qc": qc_out, "metrics": metrics}
Beispiel #4
0
def _run_qc_tools(bam_file, data):
    """Run a set of third party quality control tools, returning QC directory and metrics.

        :param bam_file: alignments in bam format
        :param data: dict with all configuration information

        :returns: dict with output of different tools
    """
    from bcbio.qc import fastqc, gemini, kraken, qsignature, qualimap, samtools, picard, srna
    tools = {"fastqc": fastqc.run,
             "small-rna": srna.run,
             "samtools": samtools.run,
             "qualimap": qualimap.run,
             "qualimap_rnaseq": qualimap.run_rnaseq,
             "gemini": gemini.run,
             "qsignature": qsignature.run,
             "coverage": _run_coverage_qc,
             "variants": _run_variants_qc,
             "kraken": kraken.run,
             "picard": picard.run}
    qc_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "qc", data["description"]))
    metrics = {}
    qc_out = {}
    for program_name in tz.get_in(["config", "algorithm", "qc"], data):
        qc_fn = tools[program_name]
        cur_qc_dir = os.path.join(qc_dir, program_name)
        out = qc_fn(bam_file, data, cur_qc_dir)
        qc_files = None
        if out and isinstance(out, dict):
            metrics.update(out)
        elif out and isinstance(out, basestring) and os.path.exists(out):
            qc_files = {"base": out, "secondary": []}
        if not qc_files:
            qc_files = _organize_qc_files(program_name, cur_qc_dir)
        if qc_files:
            qc_out[program_name] = qc_files

    bam.remove("%s-downsample%s" % os.path.splitext(bam_file))

    metrics["Name"] = dd.get_sample_name(data)
    metrics["Quality format"] = dd.get_quality_format(data).lower()
    return {"qc": qc_out, "metrics": metrics}