Example #1
0
def postprocess_alignment(data):
    """Perform post-processing steps required on full BAM files.
    Prepares list of callable genome regions allowing subsequent parallelization.
    """
    data = cwlutils.normalize_missing(utils.to_single_data(data))
    data = cwlutils.unpack_tarballs(data, data)
    bam_file = data.get("align_bam") or data.get("work_bam")
    if vmulti.bam_needs_processing(data) and bam_file and bam_file.endswith(".bam"):
        ref_file = dd.get_ref_file(data)
        out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "align",
                                                  dd.get_sample_name(data)))
        bam_file_ready = os.path.join(out_dir, os.path.basename(bam_file))
        if not utils.file_exists(bam_file_ready):
            utils.symlink_plus(bam_file, bam_file_ready)
        bam.index(bam_file_ready, data["config"])
        covinfo = callable.sample_callable_bed(bam_file_ready, ref_file, data)
        callable_region_bed, nblock_bed, callable_bed = \
            callable.block_regions(covinfo.raw_callable, bam_file_ready, ref_file, data)
        data["regions"] = {"nblock": nblock_bed,
                           "callable": callable_bed,
                           "sample_callable": covinfo.callable,
                           "mapped_stats": readstats.get_cache_file(data)}
        data["depth"] = covinfo.depth_files
        data = coverage.assign_interval(data)
        if (os.path.exists(callable_region_bed) and
                not data["config"]["algorithm"].get("variant_regions")):
            data["config"]["algorithm"]["variant_regions"] = callable_region_bed
            data = clean_inputs(data)
        data = recalibrate.prep_recal(data)
        data = recalibrate.apply_recal(data)
    return [[data]]
Example #2
0
def postprocess_alignment(data):
    """Perform post-processing steps required on full BAM files.
    Prepares list of callable genome regions allowing subsequent parallelization.
    """
    data = cwlutils.normalize_missing(utils.to_single_data(data))
    data = cwlutils.unpack_tarballs(data, data)
    bam_file = data.get("align_bam") or data.get("work_bam")
    ref_file = dd.get_ref_file(data)
    artifacts = gatk.collect_artifact_metrics(data)
    if artifacts:
        data = dd.update_summary_qc(data, "picard", artifacts.pop(), artifacts)
        oxog = gatk.collect_oxog_metrics(data)
        data = dd.update_summary_qc(data, "picard", oxog.pop(), oxog)
    if vmulti.bam_needs_processing(data) and bam_file and bam_file.endswith(
            ".bam"):
        out_dir = utils.safe_makedir(
            os.path.join(dd.get_work_dir(data), "align",
                         dd.get_sample_name(data)))
        bam_file_ready = os.path.join(out_dir, os.path.basename(bam_file))
        if not utils.file_exists(bam_file_ready):
            utils.symlink_plus(bam_file, bam_file_ready)
        bam.index(bam_file_ready, data["config"])
        covinfo = callable.sample_callable_bed(bam_file_ready, ref_file, data)
        callable_region_bed, nblock_bed = \
            callable.block_regions(covinfo.raw_callable, bam_file_ready, ref_file, data)
        data["regions"] = {
            "nblock": nblock_bed,
            "callable": covinfo.raw_callable,
            "sample_callable": covinfo.callable,
            "mapped_stats": readstats.get_cache_file(data)
        }
        data["depth"] = covinfo.depth_files
        data = coverage.assign_interval(data)
        data = samtools.run_and_save(data)
        data = recalibrate.prep_recal(data)
        data = recalibrate.apply_recal(data)
    elif dd.get_variant_regions(data):
        callable_region_bed, nblock_bed = \
            callable.block_regions(dd.get_variant_regions(data), bam_file, ref_file, data)
        data["regions"] = {
            "nblock": nblock_bed,
            "callable": dd.get_variant_regions(data),
            "sample_callable": dd.get_variant_regions(data)
        }
    return [[data]]
Example #3
0
def prep_recal(*args):
    return recalibrate.prep_recal(*args)
Example #4
0
def _recal_no_markduplicates(data):
    orig_config = copy.deepcopy(data["config"])
    data["config"]["algorithm"]["mark_duplicates"] = False
    data = recalibrate.prep_recal(data)[0][0]
    data["config"] = orig_config
    return data
Example #5
0
def prep_recal(*args):
    return recalibrate.prep_recal(*args)
Example #6
0
def _recal_no_markduplicates(data):
    orig_config = copy.deepcopy(data["config"])
    data["config"]["algorithm"]["mark_duplicates"] = False
    data = recalibrate.prep_recal(data)[0][0]
    data["config"] = orig_config
    return data