def _scan_extract_rfam(self, prefixs, args_ribo): for gff in os.listdir(self.gff_path): if gff.endswith(".gff"): prefix = gff.replace(".gff", "") first_seq = os.path.join(self.tmp_files["fasta"], prefix + ".fa") prefixs.append(prefix) print("extracting seq of riboswitch candidates of {0}".format( prefix)) extract_potential_rbs( os.path.join(self.fasta_path, prefix + ".fa"), os.path.join(self.gff_path, gff), os.path.join(self.tss_path, prefix + "_TSS.gff"), os.path.join(self.tran_path, prefix + "_transcript.gff"), first_seq, args_ribo) print("pre-scanning of {0}".format(prefix)) first_scan_file = self._run_infernal(args_ribo, first_seq, "txt", prefix) sec_seq = os.path.join(self.tmp_files["fasta"], "_".join([prefix, "regenerate.fa"])) first_table = os.path.join( self.tmp_files["table"], "_".join([prefix, self.suffixs["csv"]])) regenerate_seq(first_scan_file, first_seq, first_table, sec_seq) print("scanning of {0}".format(prefix)) sec_scan_file = self._run_infernal(args_ribo, sec_seq, "re_txt", prefix) sec_table = os.path.join( self.tmp_files["table"], "_".join([prefix, self.suffixs["re_csv"]])) reextract_rbs(sec_scan_file, first_table, sec_table) shutil.move(sec_table, first_table) modify_table(first_table, args_ribo.output_all) return prefixs
def test_modify_table(self): result = """#ID\tstrain\tstrand\tassociated_CDS\tstart_genome\tend_genome Rfam e_value start_align end_align riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t15948\t16046 RF00162 1.6e-18 1 99 riboswitch_11\tStaphylococcus_aureus_HG003\t-\tSAOUHSC_00007\t27955\t28053 RF00162 1.6e-18 1 99 riboswitch_183\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00372\t377996\t378098 RF00167 2.2e-18 1 103""" table = os.path.join(self.test_folder, "test") gen_file(table, self.example.ribos) mrt.modify_table(table, True) data = import_data(table) self.assertEqual("\n".join(data), result) gen_file(table, self.example.ribos) mrt.modify_table(table, False) data = import_data(table) self.assertEqual("\n".join(data), result)
def test_modify_table(self): result = """#ID\tGenome\tStrand\tAssociated_CDS\tStart_genome\tEnd_genome\tRfam\tE_value\tScore\tStart_align\tEnd_align riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t15948\t16046\tRF00162\t1.6e-18\t74\t1\t99 riboswitch_11\tStaphylococcus_aureus_HG003\t-\tSAOUHSC_00007\t27955\t28053\tRF00162\t1.6e-18\t74\t1\t99 riboswitch_183\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00372\t377996\t378098\tRF00167\t2.2e-18\t45\t1\t103""" table = os.path.join(self.test_folder, "test") gen_file(table, self.example.ribos) mrt.modify_table(table, True) data = import_data(table) self.assertEqual("\n".join(data), result) gen_file(table, self.example.ribos) mrt.modify_table(table, False) data = import_data(table) self.assertEqual("\n".join(data), result)
def _scan_extract_rfam(self, prefixs, args_ribo, tmp_files, suffixs, feature, rfam): '''extract the seq of candidates and scanning the candidates''' for gff in os.listdir(self.gff_path): if gff.endswith(".gff"): prefix = gff.replace(".gff", "") first_seq = os.path.join(tmp_files["fasta"], prefix + ".fa") prefixs.append(prefix) print("Extracting sequences of candidates for {0}".format( prefix)) extract_potential_rbs( os.path.join(self.fasta_path, prefix + ".fa"), os.path.join(self.gff_path, gff), os.path.join(self.tss_path, prefix + "_TSS.gff"), os.path.join(self.tran_path, prefix + "_transcript.gff"), first_seq, args_ribo, feature) print("Pre-scanning of {0}".format(prefix)) first_scan_file = self._run_cmscan( args_ribo, first_seq, "txt", prefix, tmp_files, suffixs, rfam) sec_seq = os.path.join(tmp_files["fasta"], "_".join([prefix, "regenerate.fa"])) first_table = os.path.join( tmp_files["table"], "_".join([prefix, suffixs["csv"]])) regenerate_seq(first_scan_file, first_seq, first_table, sec_seq) print("Scanning of {0}".format(prefix)) sec_scan_file = self._run_cmscan( args_ribo, sec_seq, "re_txt", prefix, tmp_files, suffixs, rfam) sec_table = os.path.join( tmp_files["table"], "_".join([prefix, suffixs["re_csv"]])) reextract_rbs(sec_scan_file, first_table, sec_table) shutil.move(sec_table, first_table) modify_table(first_table, args_ribo.output_all) return prefixs
def _scan_extract_rfam(self, prefixs, args_ribo, tmp_files, suffixs, feature, rfam, log): '''extract the seq of candidates and scanning the candidates''' for gff in os.listdir(self.gff_path): if gff.endswith(".gff"): prefix = gff.replace(".gff", "") first_seq = os.path.join(tmp_files["fasta"], prefix + ".fa") prefixs.append(prefix) print("Extracting sequences of candidates for {0}".format( prefix)) if self.tss_path is not None: tss_file = os.path.join(self.tss_path, prefix + "_TSS.gff") else: tss_file = None log.write("Running extract_RBS.py to extract potential " "sequences of riboswitches/RNA thermometers for " "{0}.\n".format(prefix)) extract_potential_rbs( os.path.join(self.fasta_path, prefix + ".fa"), os.path.join(self.gff_path, gff), tss_file, os.path.join(self.tran_path, prefix + "_transcript.gff"), first_seq, args_ribo, feature) log.write("\t" + first_seq + " is temporary generated.\n") print("Pre-scanning of {0}".format(prefix)) log.write("Using Infernal to pre-scan riboswitches/RNA " "thermometers for {0}.\n".format(prefix)) log.write("Please make sure the version of Infernal is at least 1.1.1.\n") first_scan_file = self._run_cmscan( args_ribo, first_seq, "txt", prefix, tmp_files, suffixs, rfam, log) sec_seq = os.path.join(tmp_files["fasta"], "_".join([prefix, "regenerate.fa"])) first_table = os.path.join( tmp_files["table"], "_".join([prefix, suffixs["csv"]])) log.write("Running recompute_RBS.py to update the potential " "sequences of riboswitches/RNA thermometers for {0} " "based on the pre-scanning results.\n".format(prefix)) regenerate_seq(first_scan_file, first_seq, first_table, sec_seq) log.write("\t" + sec_seq + " is temporary generated.\n") print("Scanning of {0}".format(prefix)) log.write("Using Infernal to scan riboswitches/RNA " "thermometers for {0}.\n".format(prefix)) log.write("Please make sure the version of Infernal is at " "least 1.1.1.\n") sec_scan_file = self._run_cmscan( args_ribo, sec_seq, "re_txt", prefix, tmp_files, suffixs, rfam, log) sec_table = os.path.join( tmp_files["table"], "_".join([prefix, suffixs["re_csv"]])) log.write("Running recompute_RBS.py and modify_rbs_table.py " "to generate tables for {0} " "based on the scanning results.\n".format(prefix)) reextract_rbs(sec_scan_file, first_table, sec_table, args_ribo.cutoff) shutil.move(sec_table, first_table) modify_table(first_table, args_ribo.output_all) return prefixs
def _scan_extract_rfam(self, prefixs, args_ribo, tmp_files, suffixs, feature, rfam, log): '''extract the seq of candidates and scanning the candidates''' for gff in os.listdir(self.gff_path): if gff.endswith(".gff"): prefix = gff.replace(".gff", "") first_seq = os.path.join(tmp_files["fasta"], prefix + ".fa") prefixs.append(prefix) print("Extracting sequences of candidates for {0}".format( prefix)) if self.tss_path is not None: tss_file = os.path.join(self.tss_path, prefix + "_TSS.gff") else: tss_file = None log.write("Running extract_RBS.py to extract potential " "sequences of riboswitches/RNA thermometers for " "{0}.\n".format(prefix)) extract_potential_rbs( os.path.join(self.fasta_path, prefix + ".fa"), os.path.join(self.gff_path, gff), tss_file, os.path.join(self.tran_path, prefix + "_transcript.gff"), first_seq, args_ribo, feature) log.write("\t" + first_seq + " is temporary generated.\n") print("Pre-scanning of {0}".format(prefix)) log.write("Using Infernal to pre-scan riboswitches/RNA " "thermometers for {0}.\n".format(prefix)) log.write( "Please make sure the version of Infernal is at least 1.1.1.\n" ) first_scan_file = self._run_cmscan(args_ribo, first_seq, "txt", prefix, tmp_files, suffixs, rfam, log) sec_seq = os.path.join(tmp_files["fasta"], "_".join([prefix, "regenerate.fa"])) first_table = os.path.join(tmp_files["table"], "_".join([prefix, suffixs["csv"]])) log.write( "Running recompute_RBS.py to update the potential " "sequences of riboswitches/RNA thermometers for {0} " "based on the pre-scanning results.\n".format(prefix)) regenerate_seq(first_scan_file, first_seq, first_table, sec_seq) log.write("\t" + sec_seq + " is temporary generated.\n") print("Scanning of {0}".format(prefix)) log.write("Using Infernal to scan riboswitches/RNA " "thermometers for {0}.\n".format(prefix)) log.write("Please make sure the version of Infernal is at " "least 1.1.1.\n") sec_scan_file = self._run_cmscan(args_ribo, sec_seq, "re_txt", prefix, tmp_files, suffixs, rfam, log) sec_table = os.path.join(tmp_files["table"], "_".join([prefix, suffixs["re_csv"]])) log.write("Running recompute_RBS.py and modify_rbs_table.py " "to generate tables for {0} " "based on the scanning results.\n".format(prefix)) reextract_rbs(sec_scan_file, first_table, sec_table, args_ribo.cutoff) shutil.move(sec_table, first_table) modify_table(first_table, args_ribo.output_all) return prefixs