def unsplit_fa(self, input_file_path, output_file_path): input = fa.SequenceSource(input_file_path) output = fa.FastaOutput(output_file_path) while input.next(): output.store(input, split=False) output.close()
def combine_w_gast_fa(self, input_file_path, output_file_path): output = fa.FastaOutput(output_file_path) fa_input = fa.SequenceSource(input_file_path) gast_file_name = input_file_path + ".gast" while fa_input.next(): file = open(gast_file_name, "r") gast_file_content = file.readlines() res = self.lines_that_contain(fa_input.id, gast_file_content) gast_taxonomy = res[0].split("\t") id_gast = fa_input.id + "|" + gast_taxonomy[1] fa_input.id = id_gast output.store(fa_input, split=False) output.close()
def move_out_chimeric(self): chimeric_ids = self.get_chimeric_ids() for idx_key in self.input_file_names: fasta_file_path = os.path.join(self.indir, self.input_file_names[idx_key]) read_fasta = fa.ReadFasta(fasta_file_path) read_fasta.close() non_chimeric_file = fasta_file_path + self.nonchimeric_suffix non_chimeric_fasta = fa.FastaOutput(non_chimeric_file) fasta = fa.SequenceSource(fasta_file_path, lazy_init = False) while fasta.next(): if not fasta.id in chimeric_ids: non_chimeric_fasta.store(fasta, store_frequencies = False) non_chimeric_fasta.close()
def move_out_chimeric(self): txt_ids = self.get_chimeric_ids( os.path.join(self.dir_name, self.chimeric_file_name_txt)) db_ids = self.get_chimeric_ids( os.path.join(self.dir_name, self.chimeric_file_name_db)) all_chimeric_ids = set(txt_ids) | set(db_ids) print("len(all_chimeric_ids) = ") print(len(all_chimeric_ids)) non_chimeric_fasta = fa.FastaOutput( os.path.join(self.dir_name, self.output_file_name)) orig_fasta = fa.SequenceSource(os.path.join(self.dir_name, self.chg_file), lazy_init=False) while next(orig_fasta): if not orig_fasta.id in all_chimeric_ids: non_chimeric_fasta.store(orig_fasta, store_frequencies=False) non_chimeric_fasta.close()
def write_clean_fasta_file(self): """ def to write a new fasta from the original fasta file using the deleted file The deleted file contains the trimming deleted as well as the chimera deleted Then write the uniques from Meren's fastalib """ sleep(2) for lane_key in self.lane_keys: logger.debug("write_clean_fasta_file working on lanekey: " + lane_key) deleted_id_list = [] original_trimmed_file = os.path.join(self.trim_dir, lane_key + ".trimmed.fa") new_trimmed_file_name = os.path.join(self.trim_dir, lane_key + ".newtrimmed.fa") new_trimmed_file = fa.FastaOutput(new_trimmed_file_name) # open trimmed file and read a line trimmedfasta = fa.SequenceSource(original_trimmed_file) logger.debug( "write_clean_fasta_file about to check trimmedfasta file") deleted_id_list = self.deleted_ids[lane_key] if len(deleted_id_list) == 0: continue while trimmedfasta.next(): if trimmedfasta.id not in deleted_id_list: new_trimmed_file.store(trimmedfasta) new_trimmed_file.close() # rename to newtrimmed => trimmed os.rename( original_trimmed_file, os.path.join(self.trim_dir, lane_key + ".trimmed_with_chimera.fa")) os.rename(new_trimmed_file_name, original_trimmed_file)
def write_clean_uniques_file(self): """ Write out a new unique file with all the deleted ids removed especially the chimeras which were detected after the original unique file was created. """ for lane_key in self.lane_keys: deleted_id_list = [] new_unique_file_name = os.path.join(self.trim_dir, lane_key + ".newunique.fa") new_unique_file = fa.FastaOutput(new_unique_file_name) original_unique_file = os.path.join(self.trim_dir, lane_key + '.unique.fa') deleted_id_list = self.deleted_ids[lane_key] if len(deleted_id_list) == 0: continue # open unique file and read a line uniquesfasta = fa.SequenceSource(original_unique_file) while uniquesfasta.next(): #print(uniquesfasta.id,self.orphans[lane_key]) if uniquesfasta.id in self.orphans[lane_key].keys(): #print("found orphan",uniquesfasta.id) uniquesfasta.id = self.orphans[lane_key][ uniquesfasta.id][0] #print("new id",uniquesfasta.id) if uniquesfasta.id not in deleted_id_list: new_unique_file.store(uniquesfasta) new_unique_file.close() # rename to newuniques => uniques os.rename( original_unique_file, os.path.join(self.trim_dir, lane_key + ".unique_dirty.fa")) os.rename(new_unique_file_name, original_unique_file)