def _sam2bam2sorted_bam(self): subprocess.getoutput("rm -f {}*".format(self._pk.samtools_sorted_file_name)) Utilities.batch_remove(self._pk.samtools_converted_log_file_name) # SamTools details: http://www.htslib.org/doc/samtools.html # Avoiding self._pk.samtools_converted_file_name s = subprocess.getoutput("samtools view -bu -@ 1 {a} | \ samtools sort - -o -@ 1 {b}".format(a=self._pk.mapped_reads_file_name, b=self._pk.samtools_sorted_file_name)) Utilities.dump_string(string=s, file=self._pk.samtools_converted_log_file_name) logging.info("Sorted SAM file: '{}'".format(self._pk.samtools_sorted_file_name)) del s
def compile(input_file: str, output_dir: str, preserve_headers: bool = False, chop: bool = False, chunk_length: int = int(3.6 * 10**9)): import json from modules.FASTAArray import FASTAArray from modules.RefDataLine import RefDataLine output_dir = Utilities.ends_with_slash(output_dir) os.makedirs(output_dir, exist_ok=True) refdatas_dict = FASTAArray.prepare_nfasta_for_indexing( input_file=input_file, output_dir=output_dir, preserve_headers=preserve_headers, chop=chop, chunk_length=chunk_length) output_dict = {} for sequence_id in refdatas_dict: annotation_dict = refdatas_dict[sequence_id] nfasta_file = annotation_dict.get("reference_nfasta") if not nfasta_file: continue indexing_dict = {"alias": Utilities.filename_only(nfasta_file)} indexing_dict.update(RefDataLine.fill_dict(nfasta_file)) indexing_dict.update(annotation_dict) print("Processing nFASTA: '{}'".format(nfasta_file)) refdata = RefDataLine(indexing_dict) refdata.index() output_dict[sequence_id] = indexing_dict output_file = "{a}{b}_refdata.json".format( a=Utilities.ends_with_slash(output_dir), b=Utilities.filename_only(input_file)) Utilities.dump_string( string=json.dumps(output_dict, sort_keys=False, indent=4) + "\n", file=output_file) print("Created reference data linker: '{}'".format(output_file)) return output_file
def __samtools_faidx(self): s = subprocess.getoutput("samtools faidx {}".format(self._nfasta)) Utilities.dump_string(string=s, file="{}_samtools_faidx.log".format(self._reference_mask)) os.rename("{}.fai".format(self._nfasta), self.samtools_index_file) print("Created SAMTools FAI file: '{}'".format(self.samtools_index_file)) self.___fai2genome()
def __bowtie2_build(self): s = subprocess.getoutput("bowtie2-build {a} {b}".format(a=self._nfasta, b=self.bowtie2_index_mask)) Utilities.dump_string(string=s, file="{}_bowtie2-build.log".format(self._reference_mask)) print("Created bowtie2 index with mask: '{}'".format(self.bowtie2_index_mask))
def _index_bam(self): Utilities.batch_remove(self._pk.samtools_index_file_name, self._pk.samtools_index_log_file_name) s = subprocess.getoutput("samtools index {}".format(self._pk.samtools_sorted_file_name)) Utilities.dump_string(string=s, file=self._pk.samtools_index_log_file_name) logging.info("Indexed BAM file: '{}'".format(self._pk.samtools_index_file_name)) del s