Esempio n. 1
0
def create_bed_file(bed_txt_path, bed_coordinates):
    with bed_txt_path.open("w") as bed_file:
        for bed_coordinate in bed_coordinates:
            print(*bed_coordinate, sep="\t", file=bed_file)

    bed_gz_path = compress_using_bgzip(bed_txt_path)
    _ = index_using_tabix(bed_gz_path)
Esempio n. 2
0
    def _prepare_annotation(self) -> Tuple[Path, Path]:
        log.print_progress(
            "Data preprocessing to prepare CWAS annotation step")

        with self.bed_key_list_path.open() as bed_key_list_file:
            bed_key_list = yaml.safe_load(bed_key_list_file)

        bed_file_and_keys = []
        for bed_filename, bed_key in bed_key_list.items():
            bed_file_path = self.annot_data_dir / bed_filename
            bed_file_and_keys.append((bed_file_path, bed_key))

        log.print_progress(
            "Merge all of your annotation BED files into one BED file")
        merge_bed_path = self.workspace / "merged_annotation.bed"
        merge_bed_files(
            merge_bed_path,
            bed_file_and_keys,
            self.num_proc,
            self.force_overwrite,
        )
        log.print_progress("Compress your BED file.")
        bed_gz_path = compress_using_bgzip(merge_bed_path,
                                           self.force_overwrite)

        log.print_progress("Make an index of your BED file.")
        bed_idx_path = index_using_tabix(bed_gz_path, self.force_overwrite)

        return bed_gz_path, bed_idx_path
Esempio n. 3
0
def create_bed_files(bed_txt_paths, input_coordinates):
    for bed_txt_path, coordinates_per_file in zip(bed_txt_paths,
                                                  input_coordinates):
        create_bed_file(bed_txt_path, bed_entries(coordinates_per_file))
        bed_gz_path = compress_using_bgzip(bed_txt_path)
        _ = index_using_tabix(bed_gz_path)
Esempio n. 4
0
 def process_vep_vcf(self):
     print_progress("Compress the VEP output using bgzip")
     vcf_gz_path = compress_using_bgzip(self.vep_output_vcf_path)
     print_progress("Create an index of the VEP output using tabix")
     index_using_tabix(vcf_gz_path)