def postprocess_alignment(data): """Perform post-processing steps required on full BAM files. Prepares list of callable genome regions allowing subsequent parallelization. """ if vmulti.bam_needs_processing(data) and data["work_bam"].endswith(".bam"): ref_file = dd.get_ref_file(data) callable_region_bed, nblock_bed, callable_bed = \ callable.block_regions(data["work_bam"], ref_file, data) highdepth_bed = highdepth.identify(data) bam.index(data["work_bam"], data["config"]) sample_callable = callable.sample_callable_bed(data["work_bam"], ref_file, data) offtarget_stats = callable.calculate_offtarget(data["work_bam"], ref_file, data) data["regions"] = { "nblock": nblock_bed, "callable": callable_bed, "highdepth": highdepth_bed, "sample_callable": sample_callable, "offtarget_stats": offtarget_stats } data = coverage.assign_interval(data) if (os.path.exists(callable_region_bed) and not data["config"]["algorithm"].get("variant_regions")): data["config"]["algorithm"][ "variant_regions"] = callable_region_bed data = bedutils.clean_inputs(data) data = _recal_no_markduplicates(data) return [[data]]
def postprocess_alignment(data): """Perform post-processing steps required on full BAM files. Prepares list of callable genome regions allowing subsequent parallelization. Cleans input BED files to avoid issues with overlapping input segments. """ data = bedutils.clean_inputs(data) if vmulti.bam_needs_processing(data) and data["work_bam"].endswith(".bam"): callable_region_bed, nblock_bed, callable_bed = callable.block_regions( data["work_bam"], data["sam_ref"], data["config"] ) data["regions"] = {"nblock": nblock_bed, "callable": callable_bed} if os.path.exists(callable_region_bed) and not data["config"]["algorithm"].get("variant_regions"): data["config"]["algorithm"]["variant_regions"] = callable_region_bed data = bedutils.clean_inputs(data) data = _recal_no_markduplicates(data) return [[data]]
def postprocess_alignment(data): """Perform post-processing steps required on full BAM files. Prepares list of callable genome regions allowing subsequent parallelization. """ data = utils.to_single_data(data) bam_file = data.get("align_bam") or data.get("work_bam") if vmulti.bam_needs_processing(data) and bam_file and bam_file.endswith(".bam"): ref_file = dd.get_ref_file(data) out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "align", dd.get_sample_name(data))) bam_file_ready = os.path.join(out_dir, os.path.basename(bam_file)) if not utils.file_exists(bam_file_ready): utils.symlink_plus(bam_file, bam_file_ready) bam.index(bam_file_ready, data["config"]) callable_region_bed, nblock_bed, callable_bed = \ callable.block_regions(bam_file_ready, ref_file, data) sample_callable = callable.sample_callable_bed(bam_file_ready, ref_file, data) offtarget_stats = callable.calculate_offtarget(bam_file_ready, ref_file, data) data["regions"] = {"nblock": nblock_bed, "callable": callable_bed, "sample_callable": sample_callable, "offtarget_stats": offtarget_stats} data = coverage.assign_interval(data) highdepth_bed = highdepth.identify(data) data["regions"]["highdepth"] = highdepth_bed if (os.path.exists(callable_region_bed) and not data["config"]["algorithm"].get("variant_regions")): data["config"]["algorithm"]["variant_regions"] = callable_region_bed data = bedutils.clean_inputs(data) data = _recal_no_markduplicates(data) return [[data]]
def postprocess_alignment(data): """Perform post-processing steps required on full BAM files. Prepares list of callable genome regions allowing subsequent parallelization. Cleans input BED files to avoid issues with overlapping input segments. """ data = bedutils.clean_inputs(data) if vmulti.bam_needs_processing(data): callable_region_bed, nblock_bed, callable_bed = \ callable.block_regions(data["work_bam"], data["sam_ref"], data["config"]) data["regions"] = {"nblock": nblock_bed, "callable": callable_bed} if (os.path.exists(callable_region_bed) and not data["config"]["algorithm"].get("variant_regions")): data["config"]["algorithm"]["variant_regions"] = callable_region_bed data = bedutils.clean_inputs(data) data = _recal_no_markduplicates(data) return [data]
def prep_samples(*items): """Handle any global preparatory steps for samples with potentially shared data. Avoids race conditions in postprocess alignment when performing prep tasks on shared files between multiple similar samples. Cleans input BED files to avoid issues with overlapping input segments. """ out = [] for data in (x[0] for x in items): data = bedutils.clean_inputs(data) out.append([data]) return out
def prep_samples(*items): """Handle any global preparatory steps for samples with potentially shared data. Avoids race conditions in postprocess alignment when performing prep tasks on shared files between multiple similar samples. Cleans input BED files to avoid issues with overlapping input segments. """ out = [] for data in (utils.to_single_data(x) for x in items): data = bedutils.clean_inputs(data) out.append([data]) return out
def prep_samples(*items): """Handle any global preparatory steps for samples with potentially shared data. Avoids race conditions in postprocess alignment when performing prep tasks on shared files between multiple similar samples. Cleans input BED files to avoid issues with overlapping input segments. Handles both single sample cases (CWL) and all sample cases (standard bcbio). """ out = [] for data in ((x[0] if (isinstance(x, (list, tuple)) and len(x) == 1) else x) for x in items): data = bedutils.clean_inputs(data) out.append([data]) return out
def postprocess_alignment(data): """Perform post-processing steps required on full BAM files. Prepares list of callable genome regions allowing subsequent parallelization. """ if vmulti.bam_needs_processing(data) and data["work_bam"].endswith(".bam"): callable_region_bed, nblock_bed, callable_bed = \ callable.block_regions(data["work_bam"], data["sam_ref"], data["config"]) highdepth_bed = highdepth.identify(data) data["regions"] = { "nblock": nblock_bed, "callable": callable_bed, "highdepth": highdepth_bed } if (os.path.exists(callable_region_bed) and not data["config"]["algorithm"].get("variant_regions")): data["config"]["algorithm"][ "variant_regions"] = callable_region_bed data = bedutils.clean_inputs(data) data = _recal_no_markduplicates(data) return [[data]]
def postprocess_alignment(data): """Perform post-processing steps required on full BAM files. Prepares list of callable genome regions allowing subsequent parallelization. """ if vmulti.bam_needs_processing(data) and data["work_bam"].endswith(".bam"): ref_file = dd.get_ref_file(data) callable_region_bed, nblock_bed, callable_bed = \ callable.block_regions(data["work_bam"], ref_file, data) highdepth_bed = highdepth.identify(data) sample_callable = callable.sample_callable_bed(data["work_bam"], ref_file, data) offtarget_stats = callable.calculate_offtarget(data["work_bam"], ref_file, data) data["regions"] = {"nblock": nblock_bed, "callable": callable_bed, "highdepth": highdepth_bed, "sample_callable": sample_callable, "offtarget_stats": offtarget_stats} data = coverage.assign_interval(data) if (os.path.exists(callable_region_bed) and not data["config"]["algorithm"].get("variant_regions")): data["config"]["algorithm"]["variant_regions"] = callable_region_bed data = bedutils.clean_inputs(data) data = _recal_no_markduplicates(data) return [[data]]