Example #1
0
def run(bam_file, data, out_dir):
    config = data["config"]
    if "picard" not in dd.get_tools_on(data):
        return {}
    ref_file = dd.get_ref_file(data)
    sample = dd.get_sample_name(data)
    target_file = dd.get_variant_regions(data)
    broad_runner = broad.PicardCmdRunner("picard", config)
    bam_fname = os.path.abspath(bam_file)
    path = os.path.dirname(bam_fname)
    utils.safe_makedir(out_dir)
    hsmetric_file = os.path.join(out_dir, "%s-sort.hs_metrics" % sample)
    if utils.file_exists(hsmetric_file):
        return hsmetric_file
    with utils.chdir(out_dir):
        with tx_tmpdir() as tmp_dir:
            cur_bam = os.path.basename(bam_fname)
            if not os.path.exists(cur_bam):
                os.symlink(bam_fname, cur_bam)
            gen_metrics = PicardMetrics(broad_runner, tmp_dir)
            gen_metrics.report(cur_bam, ref_file,
                               bam.is_paired(bam_fname),
                               target_file, target_file, None, config)
    do.run("sed -i 's/-sort.bam//g' %s" % hsmetric_file, "")
    return hsmetric_file
Example #2
0
def run(bam_file, data, out_dir):
    if "picard" not in dd.get_tools_on(data):
        return {}
    ref_file = dd.get_ref_file(data)
    sample = dd.get_sample_name(data)
    target_file = dd.get_variant_regions(data) or dd.get_sample_callable(data)
    broad_runner = broad.PicardCmdRunner("picard", data["config"])
    bam_fname = os.path.abspath(bam_file)
    path = os.path.dirname(bam_fname)
    utils.safe_makedir(out_dir)
    out_base = utils.splitext_plus(os.path.basename(bam_fname))[0]
    hsmetric_file = os.path.join(out_dir, "%s.hs_metrics" % out_base)
    hsinsert_file = os.path.join(out_dir, "%s.insert_metrics" % out_base)
    if not utils.file_exists(hsmetric_file) and not utils.file_exists(
            hsinsert_file):
        with utils.chdir(out_dir):
            with tx_tmpdir() as tmp_dir:
                cur_bam = os.path.basename(bam_fname)
                if not os.path.exists(cur_bam):
                    os.symlink(bam_fname, cur_bam)
                gen_metrics = PicardMetrics(broad_runner, tmp_dir)
                gen_metrics.report(cur_bam, ref_file, bam.is_paired(bam_fname),
                                   target_file, target_file, None,
                                   data["config"])
        if utils.file_exists(hsmetric_file):
            do.run(
                "sed -i 's/%s.bam//g' %s" %
                (out_base.replace(sample, ""), hsmetric_file), "")
        if utils.file_exists(hsinsert_file):
            do.run(
                "sed -i 's/%s.bam//g' %s" %
                (out_base.replace(sample, ""), hsinsert_file), "")
    return hsmetric_file
Example #3
0
def run(bam_file, data, out_dir):
    config = data["config"]
    if "picard" not in dd.get_tools_on(data):
        return {}
    ref_file = dd.get_ref_file(data)
    sample = dd.get_sample_name(data)
    target_file = dd.get_variant_regions(data)
    broad_runner = broad.PicardCmdRunner("picard", config)
    bam_fname = os.path.abspath(bam_file)
    path = os.path.dirname(bam_fname)
    utils.safe_makedir(out_dir)
    hsmetric_file = os.path.join(out_dir, "%s-sort.hs_metrics" % sample)
    hsinsert_file = os.path.join(out_dir, "%s-sort.insert_metrics" % sample)
    if utils.file_exists(hsmetric_file):
        return hsmetric_file
    with utils.chdir(out_dir):
        with tx_tmpdir() as tmp_dir:
            cur_bam = os.path.basename(bam_fname)
            if not os.path.exists(cur_bam):
                os.symlink(bam_fname, cur_bam)
            gen_metrics = PicardMetrics(broad_runner, tmp_dir)
            gen_metrics.report(cur_bam, ref_file, bam.is_paired(bam_fname),
                               target_file, target_file, None, config)
    do.run("sed -i 's/-sort.bam//g' %s" % hsmetric_file, "")
    do.run("sed -i 's/-sort.bam//g' %s" % hsinsert_file, "")
    return hsmetric_file
Example #4
0
def run(bam_file, data, out_dir):
    if "picard" not in dd.get_tools_on(data):
        return {}
    ref_file = dd.get_ref_file(data)
    sample = dd.get_sample_name(data)
    target_file = dd.get_variant_regions(data) or dd.get_sample_callable(data)
    broad_runner = broad.PicardCmdRunner("picard", data["config"])
    bam_fname = os.path.abspath(bam_file)
    path = os.path.dirname(bam_fname)
    utils.safe_makedir(out_dir)
    out_base = utils.splitext_plus(os.path.basename(bam_fname))[0]
    hsmetric_file = os.path.join(out_dir, "%s.hs_metrics" % out_base)
    hsinsert_file = os.path.join(out_dir, "%s.insert_metrics" % out_base)
    if not utils.file_exists(hsmetric_file) and not utils.file_exists(hsinsert_file):
        with utils.chdir(out_dir):
            with tx_tmpdir() as tmp_dir:
                cur_bam = os.path.basename(bam_fname)
                if not os.path.exists(cur_bam):
                    os.symlink(bam_fname, cur_bam)
                gen_metrics = PicardMetrics(broad_runner, tmp_dir)
                gen_metrics.report(cur_bam, ref_file,
                                bam.is_paired(bam_fname),
                                target_file, target_file, None, data["config"])
        if utils.file_exists(hsmetric_file):
            do.run("sed -i 's/%s.bam//g' %s" % (out_base.replace(sample, ""), hsmetric_file), "")
        if utils.file_exists(hsinsert_file):
            do.run("sed -i 's/%s.bam//g' %s" % (out_base.replace(sample, ""), hsinsert_file), "")
    return hsmetric_file
Example #5
0
def main(picard_dir,
         align_bam,
         ref_file,
         fastq_one,
         fastq_pair=None,
         bait_file=None,
         target_file=None,
         do_sort=False,
         sample_name="",
         config=None):
    tmp_dir = _make_tmpdir()
    work_dir = os.getcwd()
    if config:
        with open(config) as in_handle:
            params = yaml.load(in_handle)["program"]
    else:
        params = PARAM_DEFAULTS
    picard = BroadRunner(picard_dir)
    if do_sort:
        align_bam = picard_sort(picard, align_bam, tmp_dir)

    metrics = PicardMetrics(picard, tmp_dir)
    summary_table, metrics_graphs = metrics.report(align_bam, ref_file,
                                                   fastq_pair is not None,
                                                   bait_file, target_file)
    base, ext = os.path.splitext(align_bam)
    base = base.replace(".", "-")
    total_count, read_size, fastq_graphs = plot_fastq_stats(
        [fastq_one, fastq_pair], base, params)
    qa_graphs = solexaqa_plots([fastq_one, fastq_pair], params, work_dir)

    # add read_size to the total summary table
    summary_table[0] = (summary_table[0][0], summary_table[0][1],
                        "%sbp %s" % (read_size, summary_table[0][-1]))
    ref_org = os.path.splitext(os.path.split(ref_file)[-1])[0]
    summary_table.insert(0,
                         ("Reference organism", ref_org.replace("_", " "), ""))
    tmpl = Template(section_template)
    sample_name = "%s (%s)" % (sample_name.replace(
        "_", "\_"), base.replace("_", " "))
    section = tmpl.render(name=sample_name,
                          summary=None,
                          summary_table=summary_table,
                          figures=[(f, c) for (f, c) in metrics_graphs +
                                   fastq_graphs + qa_graphs if f],
                          recal_figures=_get_recal_plots(work_dir, align_bam))
    out_file = "%s-summary.tex" % base
    out_tmpl = Template(base_template)
    with open(out_file, "w") as out_handle:
        out_handle.write(out_tmpl.render(parts=[section]))
    run_pdflatex(out_file, params)
    shutil.rmtree(tmp_dir)
Example #6
0
def main(picard_dir,
         align_bam,
         ref_file,
         is_paired,
         bait_file=None,
         target_file=None,
         do_sort=False,
         sample_name="",
         config=None):
    with utils.curdir_tmpdir() as tmp_dir:
        work_dir = os.getcwd()
        params = {}
        java_memory = ""
        if config:
            with open(config) as in_handle:
                info = yaml.load(in_handle)
                params = info["program"]
                java_memory = info["algorithm"].get("java_memory", "")
        picard = BroadRunner(picard_dir, max_memory=java_memory)
        if do_sort:
            align_bam = picard_sort(picard, align_bam, tmp_dir)

        metrics = PicardMetrics(picard, tmp_dir)
        summary_table, metrics_graphs = metrics.report(align_bam, ref_file,
                                                       is_paired, bait_file,
                                                       target_file)
        metrics_graphs = [(p, c, 0.75) for p, c in metrics_graphs]
        base, ext = os.path.splitext(align_bam)
        base = base.replace(".", "-")
        fastqc_graphs, fastqc_stats, fastqc_overrep = \
                       fastqc_report(align_bam, params)

        all_graphs = fastqc_graphs + metrics_graphs
        summary_table = _update_summary_table(summary_table, ref_file,
                                              fastqc_stats)
        tmpl = Template(section_template)
        if sample_name is None:
            sample_name = fastqc_stats["Filename"]
        sample_name = "%s (%s)" % (sample_name.replace(
            "_", "\_"), base.replace("_", " "))
        section = tmpl.render(
            name=sample_name,
            summary=None,
            summary_table=summary_table,
            figures=[(f, c, i) for (f, c, i) in all_graphs if f],
            overrep=fastqc_overrep,
            recal_figures=_get_recal_plots(work_dir, align_bam))
        out_file = "%s-summary.tex" % base
        out_tmpl = Template(base_template)
        with open(out_file, "w") as out_handle:
            out_handle.write(out_tmpl.render(parts=[section]))
        run_pdflatex(out_file, params)
Example #7
0
def _graphs_and_summary(bam_file, sam_ref, is_paired, tmp_dir, config):
    """Prepare picard/FastQC graphs and summary details.
    """
    bait = config["algorithm"].get("hybrid_bait", None)
    target = config["algorithm"].get("hybrid_target", None)
    broad_runner = runner_from_config(config)
    metrics = PicardMetrics(broad_runner, tmp_dir)
    summary_table, metrics_graphs = metrics.report(bam_file, sam_ref, is_paired, bait, target)
    metrics_graphs = [(p, c, 0.75) for p, c in metrics_graphs]
    fastqc_graphs, fastqc_stats, fastqc_overrep = fastqc_report(bam_file, config)
    all_graphs = fastqc_graphs + metrics_graphs
    summary_table = _update_summary_table(summary_table, sam_ref, fastqc_stats)
    return all_graphs, summary_table, fastqc_overrep
def _graphs_and_summary(bam_file, sam_ref, bait_file, output_dir, tmp_dir, config):
    """Prepare picard/FastQC graphs and summary details.
    """
    broad_runner = runner_from_config(config)
    metrics = PicardMetrics(broad_runner, tmp_dir)
    summary_table, metrics_graphs = metrics.report(
        bam_file, sam_ref, is_paired(bam_file), bait_file, bait_file, False, config
    )
    metrics_graphs = [(p, c, 0.75) for p, c in metrics_graphs]
    fastqc_graphs, fastqc_stats, fastqc_overrep = fastqc_report(bam_file, output_dir, config)
    all_graphs = fastqc_graphs + metrics_graphs
    summary_table = _update_summary_table(summary_table, sam_ref, fastqc_stats)
    return all_graphs, summary_table, fastqc_overrep
Example #9
0
def _graphs_and_summary(bam_file, sam_ref, tmp_dir, config):
    """Prepare picard/FastQC graphs and summary details.
    """
    bait = config["algorithm"].get("hybrid_bait", None)
    target = config["algorithm"].get("hybrid_target", None)
    broad_runner = runner_from_config(config)
    metrics = PicardMetrics(broad_runner, tmp_dir)
    summary_table, metrics_graphs = \
                   metrics.report(bam_file, sam_ref, is_paired(bam_file), bait, target)
    metrics_graphs = [(p, c, 0.75) for p, c in metrics_graphs]
    fastqc_graphs, fastqc_stats, fastqc_overrep = \
                   fastqc_report(bam_file, config)
    all_graphs = fastqc_graphs + metrics_graphs
    summary_table = _update_summary_table(summary_table, sam_ref, fastqc_stats)
    return all_graphs, summary_table, fastqc_overrep
Example #10
0
def main(picard_dir, align_bam, ref_file, fastq_one, fastq_pair=None,
        bait_file=None, target_file=None, do_sort=False, sample_name="",
        config=None):
    tmp_dir = _make_tmpdir()
    work_dir = os.getcwd()
    if config:
        with open(config) as in_handle:
            params = yaml.load(in_handle)["program"]
    else:
        params = PARAM_DEFAULTS
    picard = BroadRunner(picard_dir)
    if do_sort:
        align_bam = picard_sort(picard, align_bam, tmp_dir)

    metrics = PicardMetrics(picard, tmp_dir)
    summary_table, metrics_graphs = metrics.report(
            align_bam, ref_file, fastq_pair is not None,
            bait_file, target_file)
    base, ext = os.path.splitext(align_bam)
    base = base.replace(".", "-")
    total_count, read_size, fastq_graphs = plot_fastq_stats(
            [fastq_one, fastq_pair], base, params)
    qa_graphs = solexaqa_plots([fastq_one, fastq_pair], params, work_dir)

    # add read_size to the total summary table
    summary_table[0] = (summary_table[0][0], summary_table[0][1],
            "%sbp %s" % (read_size, summary_table[0][-1]))
    ref_org = os.path.splitext(os.path.split(ref_file)[-1])[0]
    summary_table.insert(0, ("Reference organism",
        ref_org.replace("_", " "), ""))
    tmpl = Template(section_template)
    sample_name = "%s (%s)" % (sample_name.replace("_", "\_"),
            base.replace("_", " "))
    section = tmpl.render(name=sample_name, summary=None,
            summary_table=summary_table,
            figures=[(f, c) for (f, c) in metrics_graphs + fastq_graphs +
                     qa_graphs if f],
            recal_figures=_get_recal_plots(work_dir, align_bam))
    out_file = "%s-summary.tex" % base
    out_tmpl = Template(base_template)
    with open(out_file, "w") as out_handle:
        out_handle.write(out_tmpl.render(parts=[section]))
    run_pdflatex(out_file, params)
    shutil.rmtree(tmp_dir)
def _generate_metrics(bam_fname, config_file, ref_file, bait_file,
                      target_file):
    """Run Picard commands to generate metrics files when missing.
    """
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    broad_runner = broad.runner_from_config(config)
    bam_fname = os.path.abspath(bam_fname)
    path = os.path.dirname(bam_fname)
    out_dir = os.path.join(path, "metrics")
    utils.safe_makedir(out_dir)
    with utils.chdir(out_dir):
        with tx_tmpdir() as tmp_dir:
            cur_bam = os.path.basename(bam_fname)
            if not os.path.exists(cur_bam):
                os.symlink(bam_fname, cur_bam)
            gen_metrics = PicardMetrics(broad_runner, tmp_dir)
            gen_metrics.report(cur_bam, ref_file, _bam_is_paired(bam_fname),
                               bait_file, target_file)
    return out_dir
Example #12
0
def _generate_metrics(bam_fname, config_file, ref_file,
                      bait_file, target_file):
    """Run Picard commands to generate metrics files when missing.
    """
    config = load_config(config_file)
    broad_runner = broad.runner_from_config(config)
    bam_fname = os.path.abspath(bam_fname)
    path = os.path.dirname(bam_fname)
    out_dir = os.path.join(path, "metrics")
    utils.safe_makedir(out_dir)
    with utils.chdir(out_dir):
        with utils.curdir_tmpdir() as tmp_dir:
            cur_bam = os.path.basename(bam_fname)
            if not os.path.exists(cur_bam):
                os.symlink(bam_fname, cur_bam)
            gen_metrics = PicardMetrics(broad_runner, tmp_dir)
            gen_metrics.report(cur_bam, ref_file,
                               _bam_is_paired(bam_fname),
                               bait_file, target_file)
    return out_dir
Example #13
0
def main(picard_dir, align_bam, ref_file, is_paired, bait_file=None,
         target_file=None, do_sort=False, sample_name="", config=None):
    with utils.curdir_tmpdir() as tmp_dir:
        work_dir = os.getcwd()
        params = {}
        java_memory = ""
        if config:
            with open(config) as in_handle:
                info = yaml.load(in_handle)
                params = info["program"]
                java_memory = info["algorithm"].get("java_memory", "")
        picard = BroadRunner(picard_dir, max_memory=java_memory)
        if do_sort:
            align_bam = picard_sort(picard, align_bam, tmp_dir)

        metrics = PicardMetrics(picard, tmp_dir)
        summary_table, metrics_graphs = metrics.report(
                align_bam, ref_file, is_paired, bait_file, target_file)
        metrics_graphs = [(p, c, 0.75) for p, c in metrics_graphs]
        base, ext = os.path.splitext(align_bam)
        base = base.replace(".", "-")
        fastqc_graphs, fastqc_stats, fastqc_overrep = \
                       fastqc_report(align_bam, params)

        all_graphs = fastqc_graphs + metrics_graphs
        summary_table = _update_summary_table(summary_table, ref_file, fastqc_stats)
        tmpl = Template(section_template)
        if sample_name is None:
            sample_name = fastqc_stats["Filename"]
        sample_name = "%s (%s)" % (sample_name.replace("_", "\_"),
                base.replace("_", " "))
        section = tmpl.render(name=sample_name, summary=None,
                              summary_table=summary_table,
                              figures=[(f, c, i) for (f, c, i) in all_graphs if f],
                              overrep=fastqc_overrep,
                              recal_figures=_get_recal_plots(work_dir, align_bam))
        out_file = "%s-summary.tex" % base
        out_tmpl = Template(base_template)
        with open(out_file, "w") as out_handle:
            out_handle.write(out_tmpl.render(parts=[section]))
        run_pdflatex(out_file, params)