def test_potential_target(self): seq_file = os.path.join(self.test_folder, "seq") gff_file = os.path.join(self.test_folder, "gff") gen_file(seq_file, self.example.seq_file) gen_file(gff_file, self.example.gff_file) args = self.mock_args.mock() args.tar_start = 2 args.tar_end = 10 args.features = ["CDS"] pt.potential_target(gff_file, seq_file, self.test_folder, args) data = import_data(os.path.join(self.test_folder, "aaa_target.fa")) self.assertTrue("\n".join(data), self.example.all_result)
def test_potential_target(self): seq_file = os.path.join(self.test_folder, "seq") gff_file = os.path.join(self.test_folder, "gff") gen_file(seq_file, self.example.seq_file) gen_file(gff_file, self.example.gff_file) args = self.mock_args.mock() args.tar_start = 2 args.tar_end = 10 args.features = ["CDS"] pt.potential_target(gff_file, seq_file, self.test_folder, args) data = import_data(os.path.join(self.test_folder, "aaa_target.fa")) self.assertTrue("\n".join(data), self.example.all_result)
def _gen_seq(self, prefixs, args_tar): print("Generating sRNA fasta files") for srna in os.listdir(self.srna_path): if srna.endswith("_sRNA.gff"): prefix = srna.replace("_sRNA.gff", "") prefixs.append(prefix) srna_out = os.path.join(self.srna_seq_path, "_".join([prefix, "sRNA.fa"])) if "all" in args_tar.query: self.helper.get_seq( os.path.join(self.srna_path, srna), os.path.join(self.fasta_path, prefix + ".fa"), srna_out) else: if "_".join([prefix, "sRNA.fa"]) in os.listdir(self.srna_seq_path): os.remove(srna_out) self._get_specific_seq( os.path.join(self.srna_path, srna), os.path.join(self.fasta_path, prefix + ".fa"), srna_out, args_tar.query) self._sort_srna_fasta(srna_out, prefix, self.srna_seq_path) print("Generating target fasta files") for gff in os.listdir(self.gff_path): if gff.endswith(".gff"): prefix = gff.replace(".gff", "") potential_target(os.path.join(self.gff_path, gff), os.path.join(self.fasta_path, prefix + ".fa"), os.path.join(self.target_seq_path), args_tar) file_num = 1 num = 0 sub_prefix = os.path.join(self.target_seq_path, "_".join([prefix, "target"])) sub_out = open("_".join([sub_prefix, str(file_num) + ".fa"]), "w") with open((sub_prefix + ".fa"), "r") as t_f: for line in t_f: line = line.strip() if line.startswith(">"): # line = line.replace("|", "_") num += 1 if (num == 100): num = 0 file_num += 1 sub_out.close() sub_out = open( "_".join([sub_prefix, str(file_num) + ".fa"]), "w") sub_out.write(line + "\n") sub_out.close()
def _gen_seq(self, prefixs, args_tar): print("Generating sRNA fasta files") for srna in os.listdir(self.srna_path): if srna.endswith("_sRNA.gff"): prefix = srna.replace("_sRNA.gff", "") prefixs.append(prefix) srna_out = os.path.join(self.srna_seq_path, "_".join([prefix, "sRNA.fa"])) if "all" in args_tar.query: self.helper.get_seq( os.path.join(self.srna_path, srna), os.path.join(self.fasta_path, prefix + ".fa"), srna_out) else: if "_".join([prefix, "sRNA.fa"]) in os.listdir( self.srna_seq_path): os.remove(srna_out) self._get_specific_seq( os.path.join(self.srna_path, srna), os.path.join(self.fasta_path, prefix + ".fa"), srna_out, args_tar.query) self._sort_srna_fasta(srna_out, prefix, self.srna_seq_path) print("Generating target fasta files") for gff in os.listdir(self.gff_path): if gff.endswith(".gff"): prefix = gff.replace(".gff", "") potential_target(os.path.join(self.gff_path, gff), os.path.join(self.fasta_path, prefix + ".fa"), os.path.join(self.target_seq_path), args_tar) file_num = 1 num = 0 sub_prefix = os.path.join(self.target_seq_path, "_".join([prefix, "target"])) sub_out = open("_".join([sub_prefix, str(file_num) + ".fa"]), "w") with open((sub_prefix + ".fa"), "r") as t_f: for line in t_f: line = line.strip() if line.startswith(">"): # line = line.replace("|", "_") num += 1 if (num == 100): num = 0 file_num += 1 sub_out.close() sub_out = open("_".join([sub_prefix, str(file_num) + ".fa"]), "w") sub_out.write(line + "\n") sub_out.close()
def _gen_seq(self, prefixs, target_prefixs, args_tar): print("Generating sRNA fasta files") for gff in os.listdir(self.gff_path): if gff.endswith(".gff"): prefix = gff.replace(".gff", "") target_prefixs.append(prefix) detect = False for gff in os.listdir(self.gff_path): if gff.endswith(".gff"): prefix = gff.replace(".gff", "") potential_target(os.path.join(self.gff_path, gff), os.path.join(self.fasta_path, prefix + ".fa"), os.path.join(self.target_seq_path), args_tar, target_prefixs) file_num = 1 num = 0 sub_prefix = os.path.join(self.target_seq_path, "_".join([prefix, "target"])) if os.path.exists(sub_prefix + ".fa"): sub_out = open( "_".join([sub_prefix, str(file_num) + ".fa"]), "w") with open((sub_prefix + ".fa"), "r") as t_f: for line in t_f: line = line.strip() if line.startswith(">"): # line = line.replace("|", "_") num += 1 if (num == 100): num = 0 file_num += 1 sub_out.close() sub_out = open( "_".join( [sub_prefix, str(file_num) + ".fa"]), "w") detect = True sub_out.write(line + "\n") sub_out.close() else: open(sub_prefix + ".fa", "w").close() if not detect: print("No assigned features can be found. " "Please check your genome annotation. " "And assign correct features to --target_feature.") sys.exit() print("Generating sRNA fasta files") for srna in os.listdir(self.srna_path): if srna.endswith("_sRNA.gff"): prefix = srna.replace("_sRNA.gff", "") prefixs.append(prefix) srna_out = os.path.join(self.srna_seq_path, "_".join([prefix, "sRNA.fa"])) if "all" in args_tar.query: self.helper.get_seq( os.path.join(self.srna_path, srna), os.path.join(self.fasta_path, prefix + ".fa"), srna_out) else: if "_".join([prefix, "sRNA.fa"]) in os.listdir(self.srna_seq_path): os.remove(srna_out) self._get_specific_seq( os.path.join(self.srna_path, srna), os.path.join(self.fasta_path, prefix + ".fa"), srna_out, args_tar.query) self._sort_srna_fasta(srna_out, prefix, self.srna_seq_path)