def _run_qc_tools(bam_file, data): """Run a set of third party quality control tools, returning QC directory and metrics. :param bam_file: alignments in bam format :param data: dict with all configuration information :returns: dict with output of different tools """ metrics = {} to_run = [] if "fastqc" not in tz.get_in(("config", "algorithm", "tools_off"), data, []): to_run.append(("fastqc", _run_fastqc)) if data["analysis"].lower().startswith("rna-seq"): # to_run.append(("rnaseqc", bcbio.rnaseq.qc.sample_summary)) # to_run.append(("coverage", _run_gene_coverage)) # to_run.append(("complexity", _run_complexity)) to_run.append(("qualimap", _rnaseq_qualimap)) elif data["analysis"].lower().startswith("chip-seq"): to_run.append(["bamtools", _run_bamtools_stats]) elif not data["analysis"].lower().startswith("smallrna-seq"): to_run += [("bamtools", _run_bamtools_stats), ("gemini", _run_gemini_stats)] if data["analysis"].lower().startswith(("standard", "variant2")): to_run.append(["qsignature", _run_qsignature_generator]) if "qualimap" in tz.get_in(("config", "algorithm", "tools_on"), data, []): to_run.append(("qualimap", _run_qualimap)) qc_dir = utils.safe_makedir( os.path.join(data["dirs"]["work"], "qc", data["description"])) metrics = {} for program_name, qc_fn in to_run: cur_qc_dir = os.path.join(qc_dir, program_name) cur_metrics = qc_fn(bam_file, data, cur_qc_dir) metrics.update(cur_metrics) # if (ratio < 0.60 and data['config']["algorithm"].get("kraken", None) and # (data["analysis"].lower().startswith("rna-seq") or # data["analysis"].lower().startswith("standard"))): if data['config']["algorithm"].get("kraken", None): ratio = bam.get_aligned_reads(bam_file, data) cur_metrics = _run_kraken(data, ratio) metrics.update(cur_metrics) bam.remove("%s-downsample%s" % os.path.splitext(bam_file)) metrics["Name"] = data["name"][-1] metrics["Quality format"] = utils.get_in( data, ("config", "algorithm", "quality_format"), "standard").lower() return {"qc": qc_dir, "metrics": metrics}
def _run_qc_tools(bam_file, data): """Run a set of third party quality control tools, returning QC directory and metrics. :param bam_file: alignments in bam format :param data: dict with all configuration information :returns: dict with output of different tools """ metrics = {} to_run = [] if "fastqc" not in tz.get_in(("config", "algorithm", "tools_off"), data, []): to_run.append(("fastqc", _run_fastqc)) if data["analysis"].lower().startswith("rna-seq"): # to_run.append(("rnaseqc", bcbio.rnaseq.qc.sample_summary)) # to_run.append(("coverage", _run_gene_coverage)) # to_run.append(("complexity", _run_complexity)) to_run.append(("qualimap", _rnaseq_qualimap)) elif data["analysis"].lower().startswith("chip-seq"): to_run.append(["bamtools", _run_bamtools_stats]) elif not data["analysis"].lower().startswith("smallrna-seq"): to_run += [("bamtools", _run_bamtools_stats), ("gemini", _run_gemini_stats)] if data["analysis"].lower().startswith(("standard", "variant2")): to_run.append(["qsignature", _run_qsignature_generator]) if "qualimap" in tz.get_in(("config", "algorithm", "tools_on"), data, []): to_run.append(("qualimap", _run_qualimap)) qc_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "qc", data["description"])) metrics = {} for program_name, qc_fn in to_run: cur_qc_dir = os.path.join(qc_dir, program_name) cur_metrics = qc_fn(bam_file, data, cur_qc_dir) metrics.update(cur_metrics) # if (ratio < 0.60 and data['config']["algorithm"].get("kraken", None) and # (data["analysis"].lower().startswith("rna-seq") or # data["analysis"].lower().startswith("standard"))): if data['config']["algorithm"].get("kraken", None): ratio = bam.get_aligned_reads(bam_file, data) cur_metrics = _run_kraken(data, ratio) metrics.update(cur_metrics) bam.remove("%s-downsample%s" % os.path.splitext(bam_file)) metrics["Name"] = data["name"][-1] metrics["Quality format"] = utils.get_in(data, ("config", "algorithm", "quality_format"), "standard").lower() return {"qc": qc_dir, "metrics": metrics}
def _run_qc_tools(bam_file, data): """Run a set of third party quality control tools, returning QC directory and metrics. :param bam_file: alignments in bam format :param data: dict with all configuration information :returns: dict with output of different tools """ from bcbio.qc import fastqc, gemini, kraken, qsignature, qualimap, samtools, picard, srna, umi tools = { "fastqc": fastqc.run, "small-rna": srna.run, "samtools": samtools.run, "qualimap": qualimap.run, "qualimap_rnaseq": qualimap.run_rnaseq, "gemini": gemini.run, "qsignature": qsignature.run, "coverage": _run_coverage_qc, "variants": _run_variants_qc, "kraken": kraken.run, "picard": picard.run, "umi": umi.run } qc_dir = utils.safe_makedir( os.path.join(data["dirs"]["work"], "qc", data["description"])) metrics = {} qc_out = {} for program_name in tz.get_in(["config", "algorithm", "qc"], data): qc_fn = tools[program_name] cur_qc_dir = os.path.join(qc_dir, program_name) out = qc_fn(bam_file, data, cur_qc_dir) qc_files = None if out and isinstance(out, dict): metrics.update(out) elif out and isinstance(out, basestring) and os.path.exists(out): qc_files = {"base": out, "secondary": []} if not qc_files: qc_files = _organize_qc_files(program_name, cur_qc_dir) if qc_files: qc_out[program_name] = qc_files bam.remove("%s-downsample%s" % os.path.splitext(bam_file)) metrics["Name"] = dd.get_sample_name(data) metrics["Quality format"] = dd.get_quality_format(data).lower() return {"qc": qc_out, "metrics": metrics}
def _run_qc_tools(bam_file, data): """Run a set of third party quality control tools, returning QC directory and metrics. :param bam_file: alignments in bam format :param data: dict with all configuration information :returns: dict with output of different tools """ from bcbio.qc import fastqc, gemini, kraken, qsignature, qualimap, samtools, picard, srna tools = {"fastqc": fastqc.run, "small-rna": srna.run, "samtools": samtools.run, "qualimap": qualimap.run, "qualimap_rnaseq": qualimap.run_rnaseq, "gemini": gemini.run, "qsignature": qsignature.run, "coverage": _run_coverage_qc, "variants": _run_variants_qc, "kraken": kraken.run, "picard": picard.run} qc_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "qc", data["description"])) metrics = {} qc_out = {} for program_name in tz.get_in(["config", "algorithm", "qc"], data): qc_fn = tools[program_name] cur_qc_dir = os.path.join(qc_dir, program_name) out = qc_fn(bam_file, data, cur_qc_dir) qc_files = None if out and isinstance(out, dict): metrics.update(out) elif out and isinstance(out, basestring) and os.path.exists(out): qc_files = {"base": out, "secondary": []} if not qc_files: qc_files = _organize_qc_files(program_name, cur_qc_dir) if qc_files: qc_out[program_name] = qc_files bam.remove("%s-downsample%s" % os.path.splitext(bam_file)) metrics["Name"] = dd.get_sample_name(data) metrics["Quality format"] = dd.get_quality_format(data).lower() return {"qc": qc_out, "metrics": metrics}