def align_to_sort_bam(fastq1, fastq2, aligner, data): """Align to the named genome build, returning a sorted BAM file. """ names = data["rgnames"] align_dir_parts = [data["dirs"]["work"], "align", names["sample"]] if data.get("disambiguate"): align_dir_parts.append(data["disambiguate"]["genome_build"]) aligner_index = _get_aligner_index(aligner, data) align_dir = utils.safe_makedir(apply(os.path.join, align_dir_parts)) ref_file = tz.get_in(("reference", "fasta", "base"), data) if fastq1.endswith(".bam"): data = _align_from_bam(fastq1, aligner, aligner_index, ref_file, names, align_dir, data) else: data = _align_from_fastq(fastq1, fastq2, aligner, aligner_index, ref_file, names, align_dir, data) if data["work_bam"] and utils.file_exists(data["work_bam"]): if data.get("align_split") and dd.get_mark_duplicates(data): # If merging later with with bamsormadup need query sorted inputs # but CWL requires a bai file. Create a fake one to make it happy. bam.fake_index(data["work_bam"], data) else: bam.index(data["work_bam"], data["config"]) for extra in ["-sr", "-disc"]: extra_bam = utils.append_stem(data['work_bam'], extra) if utils.file_exists(extra_bam): bam.index(extra_bam, data["config"]) return data