def run(bam_file, data, out_dir): config = data["config"] if "picard" not in dd.get_tools_on(data): return {} ref_file = dd.get_ref_file(data) sample = dd.get_sample_name(data) target_file = dd.get_variant_regions(data) broad_runner = broad.PicardCmdRunner("picard", config) bam_fname = os.path.abspath(bam_file) path = os.path.dirname(bam_fname) utils.safe_makedir(out_dir) hsmetric_file = os.path.join(out_dir, "%s-sort.hs_metrics" % sample) if utils.file_exists(hsmetric_file): return hsmetric_file with utils.chdir(out_dir): with tx_tmpdir() as tmp_dir: cur_bam = os.path.basename(bam_fname) if not os.path.exists(cur_bam): os.symlink(bam_fname, cur_bam) gen_metrics = PicardMetrics(broad_runner, tmp_dir) gen_metrics.report(cur_bam, ref_file, bam.is_paired(bam_fname), target_file, target_file, None, config) do.run("sed -i 's/-sort.bam//g' %s" % hsmetric_file, "") return hsmetric_file
def run(bam_file, data, out_dir): if "picard" not in dd.get_tools_on(data): return {} ref_file = dd.get_ref_file(data) sample = dd.get_sample_name(data) target_file = dd.get_variant_regions(data) or dd.get_sample_callable(data) broad_runner = broad.PicardCmdRunner("picard", data["config"]) bam_fname = os.path.abspath(bam_file) path = os.path.dirname(bam_fname) utils.safe_makedir(out_dir) out_base = utils.splitext_plus(os.path.basename(bam_fname))[0] hsmetric_file = os.path.join(out_dir, "%s.hs_metrics" % out_base) hsinsert_file = os.path.join(out_dir, "%s.insert_metrics" % out_base) if not utils.file_exists(hsmetric_file) and not utils.file_exists( hsinsert_file): with utils.chdir(out_dir): with tx_tmpdir() as tmp_dir: cur_bam = os.path.basename(bam_fname) if not os.path.exists(cur_bam): os.symlink(bam_fname, cur_bam) gen_metrics = PicardMetrics(broad_runner, tmp_dir) gen_metrics.report(cur_bam, ref_file, bam.is_paired(bam_fname), target_file, target_file, None, data["config"]) if utils.file_exists(hsmetric_file): do.run( "sed -i 's/%s.bam//g' %s" % (out_base.replace(sample, ""), hsmetric_file), "") if utils.file_exists(hsinsert_file): do.run( "sed -i 's/%s.bam//g' %s" % (out_base.replace(sample, ""), hsinsert_file), "") return hsmetric_file
def run(bam_file, data, out_dir): config = data["config"] if "picard" not in dd.get_tools_on(data): return {} ref_file = dd.get_ref_file(data) sample = dd.get_sample_name(data) target_file = dd.get_variant_regions(data) broad_runner = broad.PicardCmdRunner("picard", config) bam_fname = os.path.abspath(bam_file) path = os.path.dirname(bam_fname) utils.safe_makedir(out_dir) hsmetric_file = os.path.join(out_dir, "%s-sort.hs_metrics" % sample) hsinsert_file = os.path.join(out_dir, "%s-sort.insert_metrics" % sample) if utils.file_exists(hsmetric_file): return hsmetric_file with utils.chdir(out_dir): with tx_tmpdir() as tmp_dir: cur_bam = os.path.basename(bam_fname) if not os.path.exists(cur_bam): os.symlink(bam_fname, cur_bam) gen_metrics = PicardMetrics(broad_runner, tmp_dir) gen_metrics.report(cur_bam, ref_file, bam.is_paired(bam_fname), target_file, target_file, None, config) do.run("sed -i 's/-sort.bam//g' %s" % hsmetric_file, "") do.run("sed -i 's/-sort.bam//g' %s" % hsinsert_file, "") return hsmetric_file
def run(bam_file, data, out_dir): if "picard" not in dd.get_tools_on(data): return {} ref_file = dd.get_ref_file(data) sample = dd.get_sample_name(data) target_file = dd.get_variant_regions(data) or dd.get_sample_callable(data) broad_runner = broad.PicardCmdRunner("picard", data["config"]) bam_fname = os.path.abspath(bam_file) path = os.path.dirname(bam_fname) utils.safe_makedir(out_dir) out_base = utils.splitext_plus(os.path.basename(bam_fname))[0] hsmetric_file = os.path.join(out_dir, "%s.hs_metrics" % out_base) hsinsert_file = os.path.join(out_dir, "%s.insert_metrics" % out_base) if not utils.file_exists(hsmetric_file) and not utils.file_exists(hsinsert_file): with utils.chdir(out_dir): with tx_tmpdir() as tmp_dir: cur_bam = os.path.basename(bam_fname) if not os.path.exists(cur_bam): os.symlink(bam_fname, cur_bam) gen_metrics = PicardMetrics(broad_runner, tmp_dir) gen_metrics.report(cur_bam, ref_file, bam.is_paired(bam_fname), target_file, target_file, None, data["config"]) if utils.file_exists(hsmetric_file): do.run("sed -i 's/%s.bam//g' %s" % (out_base.replace(sample, ""), hsmetric_file), "") if utils.file_exists(hsinsert_file): do.run("sed -i 's/%s.bam//g' %s" % (out_base.replace(sample, ""), hsinsert_file), "") return hsmetric_file
def _generate_metrics(bam_fname, config_file, ref_file, bait_file, target_file): """Run Picard commands to generate metrics files when missing. """ with open(config_file) as in_handle: config = yaml.load(in_handle) broad_runner = broad.runner_from_config(config) bam_fname = os.path.abspath(bam_fname) path = os.path.dirname(bam_fname) out_dir = os.path.join(path, "metrics") utils.safe_makedir(out_dir) with utils.chdir(out_dir): with tx_tmpdir() as tmp_dir: cur_bam = os.path.basename(bam_fname) if not os.path.exists(cur_bam): os.symlink(bam_fname, cur_bam) gen_metrics = PicardMetrics(broad_runner, tmp_dir) gen_metrics.report(cur_bam, ref_file, _bam_is_paired(bam_fname), bait_file, target_file) return out_dir
def _generate_metrics(bam_fname, config_file, ref_file, bait_file, target_file): """Run Picard commands to generate metrics files when missing. """ config = load_config(config_file) broad_runner = broad.runner_from_config(config) bam_fname = os.path.abspath(bam_fname) path = os.path.dirname(bam_fname) out_dir = os.path.join(path, "metrics") utils.safe_makedir(out_dir) with utils.chdir(out_dir): with utils.curdir_tmpdir() as tmp_dir: cur_bam = os.path.basename(bam_fname) if not os.path.exists(cur_bam): os.symlink(bam_fname, cur_bam) gen_metrics = PicardMetrics(broad_runner, tmp_dir) gen_metrics.report(cur_bam, ref_file, _bam_is_paired(bam_fname), bait_file, target_file) return out_dir
def main(picard_dir, align_bam, ref_file, fastq_one, fastq_pair=None, bait_file=None, target_file=None, do_sort=False, sample_name="", config=None): tmp_dir = _make_tmpdir() work_dir = os.getcwd() if config: with open(config) as in_handle: params = yaml.load(in_handle)["program"] else: params = PARAM_DEFAULTS picard = BroadRunner(picard_dir) if do_sort: align_bam = picard_sort(picard, align_bam, tmp_dir) metrics = PicardMetrics(picard, tmp_dir) summary_table, metrics_graphs = metrics.report(align_bam, ref_file, fastq_pair is not None, bait_file, target_file) base, ext = os.path.splitext(align_bam) base = base.replace(".", "-") total_count, read_size, fastq_graphs = plot_fastq_stats( [fastq_one, fastq_pair], base, params) qa_graphs = solexaqa_plots([fastq_one, fastq_pair], params, work_dir) # add read_size to the total summary table summary_table[0] = (summary_table[0][0], summary_table[0][1], "%sbp %s" % (read_size, summary_table[0][-1])) ref_org = os.path.splitext(os.path.split(ref_file)[-1])[0] summary_table.insert(0, ("Reference organism", ref_org.replace("_", " "), "")) tmpl = Template(section_template) sample_name = "%s (%s)" % (sample_name.replace( "_", "\_"), base.replace("_", " ")) section = tmpl.render(name=sample_name, summary=None, summary_table=summary_table, figures=[(f, c) for (f, c) in metrics_graphs + fastq_graphs + qa_graphs if f], recal_figures=_get_recal_plots(work_dir, align_bam)) out_file = "%s-summary.tex" % base out_tmpl = Template(base_template) with open(out_file, "w") as out_handle: out_handle.write(out_tmpl.render(parts=[section])) run_pdflatex(out_file, params) shutil.rmtree(tmp_dir)
def main(picard_dir, align_bam, ref_file, is_paired, bait_file=None, target_file=None, do_sort=False, sample_name="", config=None): with utils.curdir_tmpdir() as tmp_dir: work_dir = os.getcwd() params = {} java_memory = "" if config: with open(config) as in_handle: info = yaml.load(in_handle) params = info["program"] java_memory = info["algorithm"].get("java_memory", "") picard = BroadRunner(picard_dir, max_memory=java_memory) if do_sort: align_bam = picard_sort(picard, align_bam, tmp_dir) metrics = PicardMetrics(picard, tmp_dir) summary_table, metrics_graphs = metrics.report(align_bam, ref_file, is_paired, bait_file, target_file) metrics_graphs = [(p, c, 0.75) for p, c in metrics_graphs] base, ext = os.path.splitext(align_bam) base = base.replace(".", "-") fastqc_graphs, fastqc_stats, fastqc_overrep = \ fastqc_report(align_bam, params) all_graphs = fastqc_graphs + metrics_graphs summary_table = _update_summary_table(summary_table, ref_file, fastqc_stats) tmpl = Template(section_template) if sample_name is None: sample_name = fastqc_stats["Filename"] sample_name = "%s (%s)" % (sample_name.replace( "_", "\_"), base.replace("_", " ")) section = tmpl.render( name=sample_name, summary=None, summary_table=summary_table, figures=[(f, c, i) for (f, c, i) in all_graphs if f], overrep=fastqc_overrep, recal_figures=_get_recal_plots(work_dir, align_bam)) out_file = "%s-summary.tex" % base out_tmpl = Template(base_template) with open(out_file, "w") as out_handle: out_handle.write(out_tmpl.render(parts=[section])) run_pdflatex(out_file, params)
def _graphs_and_summary(bam_file, sam_ref, is_paired, tmp_dir, config): """Prepare picard/FastQC graphs and summary details. """ bait = config["algorithm"].get("hybrid_bait", None) target = config["algorithm"].get("hybrid_target", None) broad_runner = runner_from_config(config) metrics = PicardMetrics(broad_runner, tmp_dir) summary_table, metrics_graphs = metrics.report(bam_file, sam_ref, is_paired, bait, target) metrics_graphs = [(p, c, 0.75) for p, c in metrics_graphs] fastqc_graphs, fastqc_stats, fastqc_overrep = fastqc_report(bam_file, config) all_graphs = fastqc_graphs + metrics_graphs summary_table = _update_summary_table(summary_table, sam_ref, fastqc_stats) return all_graphs, summary_table, fastqc_overrep
def _graphs_and_summary(bam_file, sam_ref, bait_file, output_dir, tmp_dir, config): """Prepare picard/FastQC graphs and summary details. """ broad_runner = runner_from_config(config) metrics = PicardMetrics(broad_runner, tmp_dir) summary_table, metrics_graphs = metrics.report( bam_file, sam_ref, is_paired(bam_file), bait_file, bait_file, False, config ) metrics_graphs = [(p, c, 0.75) for p, c in metrics_graphs] fastqc_graphs, fastqc_stats, fastqc_overrep = fastqc_report(bam_file, output_dir, config) all_graphs = fastqc_graphs + metrics_graphs summary_table = _update_summary_table(summary_table, sam_ref, fastqc_stats) return all_graphs, summary_table, fastqc_overrep
def _graphs_and_summary(bam_file, sam_ref, tmp_dir, config): """Prepare picard/FastQC graphs and summary details. """ bait = config["algorithm"].get("hybrid_bait", None) target = config["algorithm"].get("hybrid_target", None) broad_runner = runner_from_config(config) metrics = PicardMetrics(broad_runner, tmp_dir) summary_table, metrics_graphs = \ metrics.report(bam_file, sam_ref, is_paired(bam_file), bait, target) metrics_graphs = [(p, c, 0.75) for p, c in metrics_graphs] fastqc_graphs, fastqc_stats, fastqc_overrep = \ fastqc_report(bam_file, config) all_graphs = fastqc_graphs + metrics_graphs summary_table = _update_summary_table(summary_table, sam_ref, fastqc_stats) return all_graphs, summary_table, fastqc_overrep
def main(picard_dir, align_bam, ref_file, fastq_one, fastq_pair=None, bait_file=None, target_file=None, do_sort=False, sample_name="", config=None): tmp_dir = _make_tmpdir() work_dir = os.getcwd() if config: with open(config) as in_handle: params = yaml.load(in_handle)["program"] else: params = PARAM_DEFAULTS picard = BroadRunner(picard_dir) if do_sort: align_bam = picard_sort(picard, align_bam, tmp_dir) metrics = PicardMetrics(picard, tmp_dir) summary_table, metrics_graphs = metrics.report( align_bam, ref_file, fastq_pair is not None, bait_file, target_file) base, ext = os.path.splitext(align_bam) base = base.replace(".", "-") total_count, read_size, fastq_graphs = plot_fastq_stats( [fastq_one, fastq_pair], base, params) qa_graphs = solexaqa_plots([fastq_one, fastq_pair], params, work_dir) # add read_size to the total summary table summary_table[0] = (summary_table[0][0], summary_table[0][1], "%sbp %s" % (read_size, summary_table[0][-1])) ref_org = os.path.splitext(os.path.split(ref_file)[-1])[0] summary_table.insert(0, ("Reference organism", ref_org.replace("_", " "), "")) tmpl = Template(section_template) sample_name = "%s (%s)" % (sample_name.replace("_", "\_"), base.replace("_", " ")) section = tmpl.render(name=sample_name, summary=None, summary_table=summary_table, figures=[(f, c) for (f, c) in metrics_graphs + fastq_graphs + qa_graphs if f], recal_figures=_get_recal_plots(work_dir, align_bam)) out_file = "%s-summary.tex" % base out_tmpl = Template(base_template) with open(out_file, "w") as out_handle: out_handle.write(out_tmpl.render(parts=[section])) run_pdflatex(out_file, params) shutil.rmtree(tmp_dir)
def main(picard_dir, align_bam, ref_file, is_paired, bait_file=None, target_file=None, do_sort=False, sample_name="", config=None): with utils.curdir_tmpdir() as tmp_dir: work_dir = os.getcwd() params = {} java_memory = "" if config: with open(config) as in_handle: info = yaml.load(in_handle) params = info["program"] java_memory = info["algorithm"].get("java_memory", "") picard = BroadRunner(picard_dir, max_memory=java_memory) if do_sort: align_bam = picard_sort(picard, align_bam, tmp_dir) metrics = PicardMetrics(picard, tmp_dir) summary_table, metrics_graphs = metrics.report( align_bam, ref_file, is_paired, bait_file, target_file) metrics_graphs = [(p, c, 0.75) for p, c in metrics_graphs] base, ext = os.path.splitext(align_bam) base = base.replace(".", "-") fastqc_graphs, fastqc_stats, fastqc_overrep = \ fastqc_report(align_bam, params) all_graphs = fastqc_graphs + metrics_graphs summary_table = _update_summary_table(summary_table, ref_file, fastqc_stats) tmpl = Template(section_template) if sample_name is None: sample_name = fastqc_stats["Filename"] sample_name = "%s (%s)" % (sample_name.replace("_", "\_"), base.replace("_", " ")) section = tmpl.render(name=sample_name, summary=None, summary_table=summary_table, figures=[(f, c, i) for (f, c, i) in all_graphs if f], overrep=fastqc_overrep, recal_figures=_get_recal_plots(work_dir, align_bam)) out_file = "%s-summary.tex" % base out_tmpl = Template(base_template) with open(out_file, "w") as out_handle: out_handle.write(out_tmpl.render(parts=[section])) run_pdflatex(out_file, params)