Esempio n. 1
0
def postprocess_alignment(data):
    """Perform post-processing steps required on full BAM files.
    Prepares list of callable genome regions allowing subsequent parallelization.
    """
    if vmulti.bam_needs_processing(data) and data["work_bam"].endswith(".bam"):
        ref_file = dd.get_ref_file(data)
        callable_region_bed, nblock_bed, callable_bed = \
            callable.block_regions(data["work_bam"], ref_file, data)
        highdepth_bed = highdepth.identify(data)
        bam.index(data["work_bam"], data["config"])
        sample_callable = callable.sample_callable_bed(data["work_bam"],
                                                       ref_file, data)
        offtarget_stats = callable.calculate_offtarget(data["work_bam"],
                                                       ref_file, data)
        data["regions"] = {
            "nblock": nblock_bed,
            "callable": callable_bed,
            "highdepth": highdepth_bed,
            "sample_callable": sample_callable,
            "offtarget_stats": offtarget_stats
        }
        data = coverage.assign_interval(data)
        if (os.path.exists(callable_region_bed)
                and not data["config"]["algorithm"].get("variant_regions")):
            data["config"]["algorithm"][
                "variant_regions"] = callable_region_bed
            data = bedutils.clean_inputs(data)
        data = _recal_no_markduplicates(data)
    return [[data]]
Esempio n. 2
0
def postprocess_alignment(data):
    """Perform post-processing steps required on full BAM files.
    Prepares list of callable genome regions allowing subsequent parallelization.
    Cleans input BED files to avoid issues with overlapping input segments.
    """
    data = bedutils.clean_inputs(data)
    if vmulti.bam_needs_processing(data) and data["work_bam"].endswith(".bam"):
        callable_region_bed, nblock_bed, callable_bed = callable.block_regions(
            data["work_bam"], data["sam_ref"], data["config"]
        )
        data["regions"] = {"nblock": nblock_bed, "callable": callable_bed}
        if os.path.exists(callable_region_bed) and not data["config"]["algorithm"].get("variant_regions"):
            data["config"]["algorithm"]["variant_regions"] = callable_region_bed
            data = bedutils.clean_inputs(data)
        data = _recal_no_markduplicates(data)
    return [[data]]
Esempio n. 3
0
def postprocess_alignment(data):
    """Perform post-processing steps required on full BAM files.
    Prepares list of callable genome regions allowing subsequent parallelization.
    """
    data = utils.to_single_data(data)
    bam_file = data.get("align_bam") or data.get("work_bam")
    if vmulti.bam_needs_processing(data) and bam_file and bam_file.endswith(".bam"):
        ref_file = dd.get_ref_file(data)
        out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "align",
                                                  dd.get_sample_name(data)))
        bam_file_ready = os.path.join(out_dir, os.path.basename(bam_file))
        if not utils.file_exists(bam_file_ready):
            utils.symlink_plus(bam_file, bam_file_ready)
        bam.index(bam_file_ready, data["config"])
        callable_region_bed, nblock_bed, callable_bed = \
            callable.block_regions(bam_file_ready, ref_file, data)
        sample_callable = callable.sample_callable_bed(bam_file_ready, ref_file, data)
        offtarget_stats = callable.calculate_offtarget(bam_file_ready, ref_file, data)
        data["regions"] = {"nblock": nblock_bed, "callable": callable_bed,
                           "sample_callable": sample_callable,
                           "offtarget_stats": offtarget_stats}
        data = coverage.assign_interval(data)
        highdepth_bed = highdepth.identify(data)
        data["regions"]["highdepth"] = highdepth_bed
        if (os.path.exists(callable_region_bed) and
                not data["config"]["algorithm"].get("variant_regions")):
            data["config"]["algorithm"]["variant_regions"] = callable_region_bed
            data = bedutils.clean_inputs(data)
        data = _recal_no_markduplicates(data)
    return [[data]]
Esempio n. 4
0
def postprocess_alignment(data):
    """Perform post-processing steps required on full BAM files.
    Prepares list of callable genome regions allowing subsequent parallelization.
    Cleans input BED files to avoid issues with overlapping input segments.
    """
    data = bedutils.clean_inputs(data)
    if vmulti.bam_needs_processing(data):
        callable_region_bed, nblock_bed, callable_bed = \
            callable.block_regions(data["work_bam"], data["sam_ref"], data["config"])
        data["regions"] = {"nblock": nblock_bed, "callable": callable_bed}
        if (os.path.exists(callable_region_bed) and
                not data["config"]["algorithm"].get("variant_regions")):
            data["config"]["algorithm"]["variant_regions"] = callable_region_bed
            data = bedutils.clean_inputs(data)
        data = _recal_no_markduplicates(data)
    return [data]
Esempio n. 5
0
def prep_samples(*items):
    """Handle any global preparatory steps for samples with potentially shared data.

    Avoids race conditions in postprocess alignment when performing prep tasks
    on shared files between multiple similar samples.

    Cleans input BED files to avoid issues with overlapping input segments.
    """
    out = []
    for data in (x[0] for x in items):
        data = bedutils.clean_inputs(data)
        out.append([data])
    return out
Esempio n. 6
0
def prep_samples(*items):
    """Handle any global preparatory steps for samples with potentially shared data.

    Avoids race conditions in postprocess alignment when performing prep tasks
    on shared files between multiple similar samples.

    Cleans input BED files to avoid issues with overlapping input segments.
    """
    out = []
    for data in (utils.to_single_data(x) for x in items):
        data = bedutils.clean_inputs(data)
        out.append([data])
    return out
Esempio n. 7
0
def prep_samples(*items):
    """Handle any global preparatory steps for samples with potentially shared data.

    Avoids race conditions in postprocess alignment when performing prep tasks
    on shared files between multiple similar samples.

    Cleans input BED files to avoid issues with overlapping input segments.

    Handles both single sample cases (CWL) and all sample cases (standard bcbio).
    """
    out = []
    for data in ((x[0] if (isinstance(x,
                                      (list, tuple)) and len(x) == 1) else x)
                 for x in items):
        data = bedutils.clean_inputs(data)
        out.append([data])
    return out
Esempio n. 8
0
def postprocess_alignment(data):
    """Perform post-processing steps required on full BAM files.
    Prepares list of callable genome regions allowing subsequent parallelization.
    """
    if vmulti.bam_needs_processing(data) and data["work_bam"].endswith(".bam"):
        callable_region_bed, nblock_bed, callable_bed = \
            callable.block_regions(data["work_bam"], data["sam_ref"], data["config"])
        highdepth_bed = highdepth.identify(data)
        data["regions"] = {
            "nblock": nblock_bed,
            "callable": callable_bed,
            "highdepth": highdepth_bed
        }
        if (os.path.exists(callable_region_bed)
                and not data["config"]["algorithm"].get("variant_regions")):
            data["config"]["algorithm"][
                "variant_regions"] = callable_region_bed
            data = bedutils.clean_inputs(data)
        data = _recal_no_markduplicates(data)
    return [[data]]
Esempio n. 9
0
def postprocess_alignment(data):
    """Perform post-processing steps required on full BAM files.
    Prepares list of callable genome regions allowing subsequent parallelization.
    """
    if vmulti.bam_needs_processing(data) and data["work_bam"].endswith(".bam"):
        ref_file = dd.get_ref_file(data)
        callable_region_bed, nblock_bed, callable_bed = \
            callable.block_regions(data["work_bam"], ref_file, data)
        highdepth_bed = highdepth.identify(data)
        sample_callable = callable.sample_callable_bed(data["work_bam"], ref_file, data)
        offtarget_stats = callable.calculate_offtarget(data["work_bam"], ref_file, data)
        data["regions"] = {"nblock": nblock_bed, "callable": callable_bed, "highdepth": highdepth_bed,
                           "sample_callable": sample_callable,
                           "offtarget_stats": offtarget_stats}
        data = coverage.assign_interval(data)
        if (os.path.exists(callable_region_bed) and
                not data["config"]["algorithm"].get("variant_regions")):
            data["config"]["algorithm"]["variant_regions"] = callable_region_bed
            data = bedutils.clean_inputs(data)
        data = _recal_no_markduplicates(data)
    return [[data]]