def convert_sam_to_bam(sam_paths, output_dir="02_reference_alignment"): bam_paths = list() for sam_path in sam_paths: bam_path = str(sam_path).replace(".sam", ".bam") bam_paths.append(bam_path) if os.path.exists(bam_path): continue samtools.view(sam_path, sam_path) assert len(bam_paths) return bam_paths
def prepare_alignment(fasta_path, sam_paths, output_dir, add_sequence_dicts=False): if not os.path.exists(output_dir): os.makedirs(output_dir) # Step: Add sequence dicts. if add_sequence_dicts: print "+++ Adding sequence dictionaries to: {}".format(", ".join(sam_paths)) sam_paths = add_sequence_dict(fasta_path, sam_paths, output_dir) # Step: Picardtools: Add read groups. print "+++ Adding read groups to: {}".format(", ".join(sam_paths)) sam_paths = add_read_groups(sam_paths, output_dir) # Step: Picardtools: Sort. print "+++ Sorting: {}".format(", ".join(sam_paths)) sam_paths = sort_sams(sam_paths, output_dir) # Step: Picardtools: Merge. print "+++ Merging: {}".format(", ".join(sam_paths)) sam_path = sam_paths[0] if len(sam_paths) == 1 else merge_sams(sam_paths, output_dir) # Step: Samtools: BAM print "+++ Converting to BAM format: {}".format(", ".join(sam_paths)) bam_path = sam_path.replace(".sam", ".bam") if not os.path.exists(bam_path): bam_path = samtools.view(sam_path, bam_path) # Step: Sort BAM print "+++ Sorting: {}".format(bam_path) bam_path = sort_bam(bam_path, output_dir) return bam_path