def prepare_nfasta_for_indexing(input_file: str, output_dir: str, preserve_headers: bool = False, chop: bool = False, chunk_length: int = int(3.6 * 10**9)): array = FASTAArray.parse(Utilities.load_string(input_file)) if not preserve_headers: array._fix_headers() output_dir = Utilities.ends_with_slash(output_dir) os.makedirs(output_dir, exist_ok=True) output_file_mask = (output_dir + Utilities.filename_only(input_file)) annotation_file = "{}_annotation.tsv".format(output_file_mask) array.dump_annotation(annotation_file) arrays_dict = {"{}.fasta".format(output_file_mask): array} if chop and array.get_total_length() >= chunk_length: print("Too large reference nFASTA file: '{}'. Splitting sequences". format(input_file)) arrays_dict = array._chop_sequences(chunk_length) arrays_dict = { "{a}_{i}.fasta".format(a=output_file_mask, i=i): arrays_dict[i] for i in arrays_dict } refdatas_dict = {} counter = 0 for chunk_file in arrays_dict: counter += 1 arrays_dict[chunk_file].dump_fastas(chunk_file) refdatas_dict["sequence_{}".format(counter)] = { "reference_nfasta": chunk_file, "annotation": annotation_file } print("FASTA files created: {}".format(counter)) return refdatas_dict
def _bam2stats(self): def __get_base_alignment_stats(string: str): d = {} # SamTools stats file columns: ID, stat, value, comment for line_list in Utilities.string_to_2d_array(string): if len(line_list) < 3 or line_list[0] != "SN": continue d[re.sub(":$", "", line_list[1])] = line_list[2] if len(d) == 0: logging.critical("Bad alignment: no SAMTools stats to extract!") return {} try: out = {"total_reads": d["raw total sequences"], "mapped_reads": d["reads mapped"], "total_bp": d["total length"], "mapped_bp": d["bases mapped"]} except KeyError: return {} return {"sample_{}".format(k): int(out[k]) for k in out} Utilities.batch_remove(self._pk.samtools_stats_file_name, self._pk.samtools_stats_log_file_name) s = subprocess.getoutput("samtools stats {a} 2> {b}".format(a=self._pk.samtools_sorted_file_name, b=self._pk.samtools_stats_log_file_name)) Utilities.dump_string(string=s, file=self._pk.samtools_stats_file_name) logging.info("Saved SAMTools total coverage statistics: '{}'".format(self._pk.samtools_stats_file_name)) self._samtools_stats_dict = __get_base_alignment_stats(s) del s
def __init__(self, path_keeper: PathsKeeper, threads_number: int): self._pk = path_keeper self._threads_number = threads_number Utilities.batch_remove(self._pk.mapped_reads_file_name, self._pk.samtools_converted_file_name, self._pk.samtools_sorted_file_name, self._pk.unmapped_reads_file_name, *self._pk.pairwise_unmapped_reads_files_list, self._pk.aligner_log_file_name)
def run(self): subprocess.getoutput("rm -f {}*".format(self._pk.samtools_sorted_file_name)) Utilities.batch_remove(self._pk.aligner_log_file_name) bwt_cmd_string = " ".join(self._get_cmd()) pipeline = """{a} 2> {b} | \ samtools view - -bu -@ {c} | \ samtools sort - -@ {c} -o {d}""".format(a=bwt_cmd_string, b=self._pk.aligner_log_file_name, c=self._threads_number, d=self._pk.samtools_sorted_file_name) logging.debug("Started alignment pipeline with arguments: '{}'".format(pipeline)) s = subprocess.getoutput(pipeline) logging.info("Completed alignment pipeline with arguments: '{a}' and output:\n{b}\n".format(a=pipeline, b=s))
def _bam2idxstats(self): Utilities.batch_remove(self._pk.samtools_idxstats_file_name, self._pk.samtools_idxstats_log_file_name) s = subprocess.getoutput("samtools idxstats {a} 2> {b}".format(a=self._pk.samtools_sorted_file_name, b=self._pk.samtools_idxstats_log_file_name)) Utilities.dump_string(string=s, file=self._pk.samtools_idxstats_file_name) logging.info("Saved SAMTools mapped reads statistics: '{}'".format(self._pk.samtools_idxstats_file_name)) self._samtools_idxstats_df = pd.DataFrame(Utilities.string_to_2d_array(s), columns=[self._index_column, "id_bp", "id_mapped_reads", "id_unmapped_reads"]) del s
def __init__(self, parsed_dictionary: dict): self._nfasta = parsed_dictionary["reference_nfasta"] self.db_name = parsed_dictionary.get("alias") if not self.db_name: self.db_name = Utilities.filename_only(self._nfasta) self._reference_mask = Utilities.ends_with_slash(os.path.dirname(os.path.realpath(self._nfasta))) + self.db_name self.bowtie_index_mask = parsed_dictionary["ebwt_mask"] self.bowtie2_index_mask = parsed_dictionary["bt2_mask"] self.samtools_index_file = parsed_dictionary["fai"] self.bedtools_genome_file = parsed_dictionary["genome"] self.annotation_file = parsed_dictionary["annotation"]
def _sam2bam2sorted_bam(self): subprocess.getoutput("rm -f {}*".format(self._pk.samtools_sorted_file_name)) Utilities.batch_remove(self._pk.samtools_converted_log_file_name) # SamTools details: http://www.htslib.org/doc/samtools.html # Avoiding self._pk.samtools_converted_file_name s = subprocess.getoutput("samtools view -bu -@ 1 {a} | \ samtools sort - -o -@ 1 {b}".format(a=self._pk.mapped_reads_file_name, b=self._pk.samtools_sorted_file_name)) Utilities.dump_string(string=s, file=self._pk.samtools_converted_log_file_name) logging.info("Sorted SAM file: '{}'".format(self._pk.samtools_sorted_file_name)) del s
def _bam2histogram(self): Utilities.batch_remove(self._pk.bedtools_histogram_file_name, self._pk.genomeCoverageBed_log_file_name) s = subprocess.getoutput("genomeCoverageBed -ibam {a} 2> {b}".format(a=self._pk.samtools_sorted_file_name, b=self._pk.genomeCoverageBed_log_file_name)) # GenomeCoverageBed details: https://bedtools.readthedocs.io/en/stable/content/tools/genomecov.html # Cannot be converted to DataFrame before stacking Utilities.dump_string(string=s, file=self._pk.bedtools_histogram_file_name) self._bedtools_histogram_2d_array = Utilities.string_to_2d_array(s) if len(self._bedtools_histogram_2d_array) == 0: logging.critical("Bad alignment: no BEDTools coverage histogram to save!") logging.info("Saved BEDTools coverage histogram data: '{}'".format(self._pk.bedtools_histogram_file_name)) del s
def read(file: str): wrapper = open(file=file, mode="r", encoding="utf-8") try: if file.endswith(".json"): return RefDataArray._parse_json_refdata(wrapper) else: return RefDataArray._parse_table_refdata(wrapper) except ValueError: wrapper.close() traceback.print_exc() Utilities.log_and_raise("Bad reference data file: {}".format(file)) wrapper.close()
def export(self): sampledatas_2d_array = [i for i in self._queue if i[0] in self._no_coverages_df["sample_name"].values.tolist()] if len(sampledatas_2d_array) == 0: print("All files have correct number of lines. No files to process") else: os.makedirs(os.path.dirname(mainInitializer.output), exist_ok=True) Utilities.dump_2d_array(sampledatas_2d_array, file=mainInitializer.output) print("Files to process: {}\nDumped sample data: '{}'".format(len(self._no_coverages_df), mainInitializer.output)) if mainInitializer.debugging_bool: debug_table = "{}_debug.tsv".format(mainInitializer.output) self._verified_df.to_csv(debug_table, sep='\t', header=True, index=False) print("Dumped debug table: '{}'".format(debug_table))
def ___fai2genome(self): """Process existing fasta index, depends from 'samtools_faidx' function""" def ____parse_fai_line(split_line: list): if len(split_line) >= 2: return split_line[:2] print("Bad FAI file line: {}".format("\t".join(split_line))) fai_2d_array = Utilities.load_2d_array("{}_samtools.fai".format(self._reference_mask)) genome_2d_array = [] for line in fai_2d_array: genome_2d_array.append(____parse_fai_line(line)) out = "{}_samtools.genome".format(self._reference_mask) Utilities.dump_2d_array(array=Utilities.remove_empty_values(genome_2d_array), file=out) print("Created BEDTools genome index: '{}'".format(out))
def __init__(self, single_sampledata_row): body_list = Utilities.remove_empty_values( [i.strip() for i in re.sub("[\r\n]", "", single_sampledata_row).split("\t")]) if len(body_list) < 2: Utilities.log_and_raise( "Failed to parse sample data row (not enough columns): {}".format(single_sampledata_row)) self.name = body_list[0] self.raw_reads_files_list = body_list[1:] if len(self.raw_reads_files_list) > 2: logging.warning("Only up to two input files are supported for alignment, using first two values. " "Given sample data row: '{}'".format(single_sampledata_row)) self.raw_reads_files_list = self.raw_reads_files_list[:2] for file in self.raw_reads_files_list: if not os.path.isfile(file): logging.warning("Not found the raw reads file: '{}'".format(file))
def __init__(self): namespace = self._parse_args() self.input_file_name = namespace.input self.refdata_file_name = namespace.refdata self.chunk_id = namespace.chunk self.mapped_reads_directory = Utilities.ends_with_slash( os.path.dirname(os.path.abspath(self.input_file_name))) self._output_directory = Utilities.ends_with_slash("/".join( os.path.dirname(os.path.abspath( self.input_file_name)).split("/")[:-1])) self.logs_directory = "{}Logs/".format(self._output_directory) self.statistics_directory = "{}Statistics/".format( self._output_directory) self._create_dirs()
def set_root_dir(self, inputDirPath): """set the root directory""" if os.path.isdir(inputDirPath): self.rootDir = inputDirPath logging.debug("ScriptsBrowser::set_root_dir-> pathDirName: %s" % self.rootDir) Utilities.save_json( self.rootDirSaveFile, self.rootDir) # Saves root directory path to a json file else: logging.error( "ERROR << ScriptsBrowser::set_root_dir-> '%s' is not valid path" % inputDirPath) raise Exception
def parse(string: str): string = "\n" + re.sub("[\r\n]+", "\n", string).strip() q = [ ">{}".format(j) for j in Utilities.remove_empty_values( [i.strip() for i in string.split("\n>")]) ] return FASTAArray(sorted(set([FASTALine(i) for i in q]), reverse=True))
def __init__(self, parsed_fastas_list: list): self._parsed_fastas_list = Utilities.remove_empty_values( parsed_fastas_list) self._parsed_fastas_list.sort(key=len, reverse=True) self._annotations_2d_array = [["reference_id", "id_bp"]] for fasta in self._parsed_fastas_list: self._annotations_2d_array.append([fasta.header, str(len(fasta))])
def __init__(self, ScriptsBrowserWidget, ScriptsBrowserInstance): logging.debug( "SettingsDialog::__init__-> initalizing MainWindow class") super().__init__() self.setupUi(self) # self.scriptsBrowserInstance = ScriptsBrowserInstance self.scriptsBrowserWidget = ScriptsBrowserWidget # Load json files # ---------------- # load saved root directory try: rootDirSaveFile = self.scriptsBrowserInstance.rootDirSaveFile self.scriptsBrowserWidget.rootDirModel.directory = Utilities.load_json( rootDirSaveFile) self.rootDirectory_lineEdit.setText( self.scriptsBrowserWidget.rootDirModel.directory) self.entered_root_dir() except Exception: logging.error( "ERROR << SettingsDialog::__init__-> Utilites.load_json call failed" ) # gets the path Nuke indie executable from the ScriptsBrowser class self.NukeExe_lineEdit.setText(self.scriptsBrowserInstance.exePath) # Slot-Signal connections # ----------------------- # self.NukeExe_lineEdit.selectionChanged.connect() self.ok_buttonBox.accepted.connect(self.enter_confirm_settings) self.ok_buttonBox.rejected.connect(self.close_settings_dialog_window)
def __init__(self, sampledata_file_name): if not os.path.isfile(sampledata_file_name): Utilities.log_and_raise( "Sample data linker file not found: {}".format( sampledata_file_name)) self._sampledatas_list = [] with open(sampledata_file_name, "r", encoding="utf-8") as f: for r in f: r = r.strip() if len(r) > 0: try: self._sampledatas_list.append(SampleDataLine(r)) except ValueError: continue self._sampledatas_list = Utilities.remove_empty_values( self._sampledatas_list)
def _reference2statistics(self): Utilities.batch_remove(self._pk.final_coverage_file_name) stats_dict = self._samtools_stats_dict if len(stats_dict) == 0: logging.critical("Bad alignment: empty SAMTools stats: '{}'".format(self._pk.samtools_stats_file_name)) return if len(self._stacked_coverages_df) == 0: logging.critical("Bad alignment: empty stacked BEDTools coverage: '{}'".format(self._pk.stacked_coverage_file_name)) return chunk_size = 10 ** 6 reader = pd.read_table(self._pk.bedtools_genome_file, sep='\t', header="infer", names=[self._index_column, "id_bp"], chunksize=chunk_size) for chunk_number, reference_df in enumerate(reader): genomes_coverages_df = reference_df.merge(self._stacked_coverages_df.loc[:, [self._index_column] + [i for i in list(self._stacked_coverages_df) if i not in list(reference_df)]], on=self._index_column, how="left") genomes_coverages_df = genomes_coverages_df[~genomes_coverages_df[self._index_column].isin(["*", "genome"])] if self._non_zero_bool: genomes_coverages_df = genomes_coverages_df[genomes_coverages_df.id_coverage_breadth.notnull()] else: genomes_coverages_df = genomes_coverages_df.fillna(0) genomes_coverages_df["id_total_relative_abundance"] = (10 ** 12) * genomes_coverages_df["id_mapped_bp"].astype(int) / (genomes_coverages_df["id_bp"].astype(int) * int(stats_dict["sample_total_bp"])) genomes_coverages_df["id_mapped_relative_abundance"] = (10 ** 12) * genomes_coverages_df["id_mapped_bp"].astype(int) / (genomes_coverages_df["id_bp"].astype(int) * int(stats_dict["sample_mapped_bp"])) # MRA details: http://www.ibmc.msk.ru/content/thesisDocs/TyakhtAV_thesis.pdf (p.63) genomes_coverages_df["sample_total_reads"] = stats_dict["sample_total_reads"] genomes_coverages_df["sample_mapped_reads"] = stats_dict["sample_mapped_reads"] genomes_coverages_df["sample_total_bp"] = stats_dict["sample_total_bp"] genomes_coverages_df["sample_mapped_bp"] = stats_dict["sample_mapped_bp"] genomes_coverages_df["sample_average_total_reads_bp"] = float(stats_dict["sample_total_reads"]) / float(stats_dict["sample_total_bp"]) genomes_coverages_df["sample_average_mapped_reads_bp"] = float(stats_dict["sample_mapped_reads"]) / float(stats_dict["sample_total_bp"]) genomes_coverages_df["sample_mapped_reads_to_total_reads"] = float(stats_dict["sample_mapped_reads"]) / float(stats_dict["sample_total_reads"]) genomes_coverages_df = genomes_coverages_df.merge(self._samtools_idxstats_df.loc[:, [self._index_column] + [i for i in list(self._samtools_idxstats_df) if i not in list(genomes_coverages_df)]], on=self._index_column, how="left") genomes_coverages_df["id_mapped_reads_per_million_sample_total_reads"] = genomes_coverages_df["id_mapped_reads"].astype(int) * (10 ** 6) / int(stats_dict["sample_total_reads"]) genomes_coverages_df["id_mapped_reads_per_million_sample_mapped_reads"] = genomes_coverages_df["id_mapped_reads"].astype(int) * (10 ** 6) / int(stats_dict["sample_mapped_reads"]) # RPM details: https://www.biostars.org/p/273537/ genomes_coverages_df["id_mapped_reads_per_kbp_per_million_sample_total_reads"] = genomes_coverages_df["id_mapped_reads"].astype(int) * (10 ** 9) / (int(stats_dict["sample_total_reads"]) * genomes_coverages_df["id_bp"]) genomes_coverages_df["id_mapped_reads_per_kbp_per_million_sample_mapped_reads"] = genomes_coverages_df["id_mapped_reads"].astype(int) * (10 ** 9) / (int(stats_dict["sample_mapped_reads"]) * genomes_coverages_df["id_bp"]) # RPKM details: https://www.biostars.org/p/273537/ for int_column in ["id_bp", "id_coverage_breadth", "id_mapped_bp", "id_maximal_coverage_depth", "id_mapped_reads", "sample_total_reads", "sample_mapped_reads", "sample_total_bp", "sample_mapped_bp"]: genomes_coverages_df[int_column] = genomes_coverages_df[int_column].astype(int) genomes_coverages_df = genomes_coverages_df.loc[:, [i for i in list(genomes_coverages_df) if len(i.strip()) > 0]] if chunk_number == 0: genomes_coverages_df.to_csv(self._pk.final_coverage_file_name, sep='\t', header=True, index=False) else: with open(file=self._pk.final_coverage_file_name, mode="a", encoding="utf-8") as f: genomes_coverages_df.to_csv(f, sep='\t', header=False, index=False) logging.info("Processed chunk {} with size of {} lines".format(chunk_number, chunk_size)) logging.info("Finished processing coverage table: '{}'".format(self._pk.final_coverage_file_name))
def __init__(self): self._namespace = self.parse_args() self.input_nfasta = self._namespace.input self.preserve_headers_bool = self._namespace.preserve_headers self.not_large_index_bool = self._namespace.not_large_index self.chunk_length = int(self._namespace.size * 10**9) self.output_dir = Utilities.ends_with_slash(self._namespace.output) os.makedirs(self.output_dir, exist_ok=True)
def fill_dict(nfasta_file: str): mask = Utilities.ends_with_slash(os.path.dirname(os.path.realpath(nfasta_file))) + Utilities.filename_only(nfasta_file) d = {"ebwt_mask": "{}_colorspace".format(mask), "bt2_mask": "{}_bowtie2".format(mask), "fai": "{}_samtools.fai".format(mask), "genome": "{}_samtools.genome".format(mask), "annotation": "{}_annotation.tsv".format(mask)} return d
def _stack_coverage(self): Utilities.batch_remove(self._pk.stacked_coverage_file_name) # genomecov file columns: reference sequence name, depth of coverage, breadth of coverage with that depth, sequence length, coverage ratio stacked_coverages_2d_array = [] row_processing_2d_array = [] counting_id = "" for row_list in self._bedtools_histogram_2d_array: if len(row_list) != 5: logging.warning("Cannot parse coverage histogram row '{a}' from file '{b}'".format(a=row_list, b=self._pk.bedtools_histogram_file_name)) continue reference_id, id_local_coverage_depth, id_local_coverage_breadth, id_bp, id_local_coverage_ratio = row_list if reference_id == 'genome' or '*' in reference_id: continue if reference_id == counting_id and int(id_local_coverage_depth) > 0: row_processing_2d_array.append(row_list) else: if len(row_processing_2d_array) > 0: # output file columns: reference sequence name, maximal depth of coverage, total breadth of coverage, sequence length, coverage ratio, total mapped bases id_maximal_coverage_depth = max([int(i[1]) for i in row_processing_2d_array]) id_coverage_breadth = sum([int(i[2]) for i in row_processing_2d_array]) id_bp = int(row_processing_2d_array[0][3]) id_coverage_breadth_to_id_bp = sum([float(i[4]) for i in row_processing_2d_array]) id_mapped_bp = sum([int(i[1]) * int(i[2]) for i in row_processing_2d_array]) stacked_coverages_2d_array.append([counting_id, id_maximal_coverage_depth, id_coverage_breadth, id_bp, id_coverage_breadth_to_id_bp, id_mapped_bp]) row_processing_2d_array = [] counting_id = reference_id if len(stacked_coverages_2d_array) == 0: logging.critical("Bad alignment: no coverage to stack!") return self._stacked_coverages_df = pd.DataFrame(stacked_coverages_2d_array, columns=[self._index_column, "id_maximal_coverage_depth", "id_coverage_breadth", "id_bp", "id_coverage_breadth_to_id_bp", "id_mapped_bp"]) self._stacked_coverages_df.to_csv(self._pk.stacked_coverage_file_name, sep='\t', index=False) logging.info("Stacked BEDTools coverage: '{}'".format(self._pk.stacked_coverage_file_name)) del self._bedtools_histogram_2d_array, stacked_coverages_2d_array gc.collect()
def __init__(self): self._namespace = self._parse_args() self.sampledata = self._namespace.input self.target_length = CoveragesVerifier.get_wc_l(self._namespace.genome) + 1 self.prefix = self._namespace.prefix self.suffix = self._namespace.suffix self.debugging_bool = self._namespace.debug self.output = self._namespace.output if len(self.output) == 0: self.output = "{}sampledata/{}.sampledata".format(Utilities.ends_with_slash(os.path.dirname(self.prefix)), Utilities.get_time())
def __init__(self): namespace = self._parse_args() self.sampledata_file_name = namespace.input self.refdata_file_name = namespace.refdata self.input_mask = namespace.mask # *_output_mask are attributes of RefDataLine class self.threads_number = self._parse_threads_number(namespace.threads) self.no_coverage_bool = namespace.no_coverage self.output_dir = Utilities.ends_with_slash(namespace.output) self.logs_directory = "{}Logs/".format(self.output_dir) [ os.makedirs(i, exist_ok=True) for i in [self.output_dir, self.logs_directory] ]
def __get_base_alignment_stats(string: str): d = {} # SamTools stats file columns: ID, stat, value, comment for line_list in Utilities.string_to_2d_array(string): if len(line_list) < 3 or line_list[0] != "SN": continue d[re.sub(":$", "", line_list[1])] = line_list[2] if len(d) == 0: logging.critical("Bad alignment: no SAMTools stats to extract!") return {} try: out = {"total_reads": d["raw total sequences"], "mapped_reads": d["reads mapped"], "total_bp": d["total length"], "mapped_bp": d["bases mapped"]} except KeyError: return {} return {"sample_{}".format(k): int(out[k]) for k in out}
def compile(input_file: str, output_dir: str, preserve_headers: bool = False, chop: bool = False, chunk_length: int = int(3.6 * 10**9)): import json from modules.FASTAArray import FASTAArray from modules.RefDataLine import RefDataLine output_dir = Utilities.ends_with_slash(output_dir) os.makedirs(output_dir, exist_ok=True) refdatas_dict = FASTAArray.prepare_nfasta_for_indexing( input_file=input_file, output_dir=output_dir, preserve_headers=preserve_headers, chop=chop, chunk_length=chunk_length) output_dict = {} for sequence_id in refdatas_dict: annotation_dict = refdatas_dict[sequence_id] nfasta_file = annotation_dict.get("reference_nfasta") if not nfasta_file: continue indexing_dict = {"alias": Utilities.filename_only(nfasta_file)} indexing_dict.update(RefDataLine.fill_dict(nfasta_file)) indexing_dict.update(annotation_dict) print("Processing nFASTA: '{}'".format(nfasta_file)) refdata = RefDataLine(indexing_dict) refdata.index() output_dict[sequence_id] = indexing_dict output_file = "{a}{b}_refdata.json".format( a=Utilities.ends_with_slash(output_dir), b=Utilities.filename_only(input_file)) Utilities.dump_string( string=json.dumps(output_dict, sort_keys=False, indent=4) + "\n", file=output_file) print("Created reference data linker: '{}'".format(output_file)) return output_file
def run(self): Utilities.single_core_queue(func=self._run_pipeline, queue=self.chunks_list)
def run(self): Utilities.single_core_queue(func=self._run_aligner, queue=sampleFilesList) if not mainInitializer.no_coverage_bool: Utilities.single_core_queue(func=self._run_extractor, queue=sampleFilesList)
self.chunks_list = RefDataArray.read( mainInitializer.refdata_file_name).get_parsed_list() @staticmethod def _run_pipeline(refdata: RefDataLine): handler = PipelineHandler(refdata) handler.run() def run(self): Utilities.single_core_queue(func=self._run_pipeline, queue=self.chunks_list) if __name__ == '__main__': mainInitializer = Initializer() launchTime = Utilities.get_time() nodeName = subprocess.getoutput("hostname").strip() mainLogFile = "{a}nBee_{b}_{c}.log".format( a=mainInitializer.logs_directory, b=nodeName, c=launchTime) print("Started main workflow with log file: '{}'".format(mainLogFile)) logging.basicConfig( format=u'%(levelname)-8s [%(asctime)s] %(message)s', level=logging.DEBUG, handlers=[logging.FileHandler(mainLogFile), logging.StreamHandler()]) sampleDataParser = SampleDataParser(mainInitializer.sampledata_file_name) sampleFilesList = sampleDataParser.get_parsed_list() if len(sampleFilesList) == 0: Utilities.log_and_raise( "No files to process, exiting: '{}'".format(sampleFilesList)) chunksHandler = ChunksHandler()
def __samtools_faidx(self): s = subprocess.getoutput("samtools faidx {}".format(self._nfasta)) Utilities.dump_string(string=s, file="{}_samtools_faidx.log".format(self._reference_mask)) os.rename("{}.fai".format(self._nfasta), self.samtools_index_file) print("Created SAMTools FAI file: '{}'".format(self.samtools_index_file)) self.___fai2genome()