def create_bed_file(bed_txt_path, bed_coordinates): with bed_txt_path.open("w") as bed_file: for bed_coordinate in bed_coordinates: print(*bed_coordinate, sep="\t", file=bed_file) bed_gz_path = compress_using_bgzip(bed_txt_path) _ = index_using_tabix(bed_gz_path)
def _prepare_annotation(self) -> Tuple[Path, Path]: log.print_progress( "Data preprocessing to prepare CWAS annotation step") with self.bed_key_list_path.open() as bed_key_list_file: bed_key_list = yaml.safe_load(bed_key_list_file) bed_file_and_keys = [] for bed_filename, bed_key in bed_key_list.items(): bed_file_path = self.annot_data_dir / bed_filename bed_file_and_keys.append((bed_file_path, bed_key)) log.print_progress( "Merge all of your annotation BED files into one BED file") merge_bed_path = self.workspace / "merged_annotation.bed" merge_bed_files( merge_bed_path, bed_file_and_keys, self.num_proc, self.force_overwrite, ) log.print_progress("Compress your BED file.") bed_gz_path = compress_using_bgzip(merge_bed_path, self.force_overwrite) log.print_progress("Make an index of your BED file.") bed_idx_path = index_using_tabix(bed_gz_path, self.force_overwrite) return bed_gz_path, bed_idx_path
def create_bed_files(bed_txt_paths, input_coordinates): for bed_txt_path, coordinates_per_file in zip(bed_txt_paths, input_coordinates): create_bed_file(bed_txt_path, bed_entries(coordinates_per_file)) bed_gz_path = compress_using_bgzip(bed_txt_path) _ = index_using_tabix(bed_gz_path)
def process_vep_vcf(self): print_progress("Compress the VEP output using bgzip") vcf_gz_path = compress_using_bgzip(self.vep_output_vcf_path) print_progress("Create an index of the VEP output using tabix") index_using_tabix(vcf_gz_path)