Exemple #1
0
 def _scan_extract_rfam(self, prefixs, args_ribo):
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(self.tmp_files["fasta"],
                                      prefix + ".fa")
             prefixs.append(prefix)
             print("extracting seq of riboswitch candidates of {0}".format(
                   prefix))
             extract_potential_rbs(
                   os.path.join(self.fasta_path, prefix + ".fa"),
                   os.path.join(self.gff_path, gff),
                   os.path.join(self.tss_path, prefix + "_TSS.gff"),
                   os.path.join(self.tran_path, prefix + "_transcript.gff"),
                   first_seq, args_ribo)
             print("pre-scanning of {0}".format(prefix))
             first_scan_file = self._run_infernal(args_ribo, first_seq,
                                                  "txt", prefix)
             sec_seq = os.path.join(self.tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(
                     self.tmp_files["table"],
                     "_".join([prefix, self.suffixs["csv"]]))
             regenerate_seq(first_scan_file, first_seq,
                            first_table, sec_seq)
             print("scanning of {0}".format(prefix))
             sec_scan_file = self._run_infernal(args_ribo, sec_seq,
                                                "re_txt", prefix)
             sec_table = os.path.join(
                     self.tmp_files["table"],
                     "_".join([prefix, self.suffixs["re_csv"]]))
             reextract_rbs(sec_scan_file, first_table, sec_table)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs
Exemple #2
0
 def _scan_extract_rfam(self, prefixs, args_ribo):
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(self.tmp_files["fasta"],
                                      prefix + ".fa")
             prefixs.append(prefix)
             print("extracting seq of riboswitch candidates of {0}".format(
                 prefix))
             extract_potential_rbs(
                 os.path.join(self.fasta_path, prefix + ".fa"),
                 os.path.join(self.gff_path, gff),
                 os.path.join(self.tss_path, prefix + "_TSS.gff"),
                 os.path.join(self.tran_path, prefix + "_transcript.gff"),
                 first_seq, args_ribo)
             print("pre-scanning of {0}".format(prefix))
             first_scan_file = self._run_infernal(args_ribo, first_seq,
                                                  "txt", prefix)
             sec_seq = os.path.join(self.tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(
                 self.tmp_files["table"],
                 "_".join([prefix, self.suffixs["csv"]]))
             regenerate_seq(first_scan_file, first_seq, first_table,
                            sec_seq)
             print("scanning of {0}".format(prefix))
             sec_scan_file = self._run_infernal(args_ribo, sec_seq,
                                                "re_txt", prefix)
             sec_table = os.path.join(
                 self.tmp_files["table"],
                 "_".join([prefix, self.suffixs["re_csv"]]))
             reextract_rbs(sec_scan_file, first_table, sec_table)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs
    def test_reextract_rbs(self):
        align_file = os.path.join(self.test_folder, "align")
        first_file = os.path.join(self.test_folder, "first")
        output_file = os.path.join(self.test_folder, "output")
        first_content = """riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16	RF00162	6.2e-18	5	12"""
        gen_file(align_file, self.example.scan_file)
        gen_file(first_file, first_content)
        rr.reextract_rbs(align_file, first_file, output_file)
        data = import_data(output_file)
        self.assertEqual("\n".join(data), first_content)
        first_content = """riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16	RF00178	6.2e-20	13	17"""
        gen_file(first_file, first_content)
        rr.reextract_rbs(align_file, first_file, output_file)
        data = import_data(output_file)
        self.assertEqual("\n".join(data), 
"""riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16	RF00162	6.2e-18	5	12
riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16	RF00178	6.2e-20	13	17""")
    def test_reextract_rbs(self):
        align_file = os.path.join(self.test_folder, "align")
        first_file = os.path.join(self.test_folder, "first")
        output_file = os.path.join(self.test_folder, "output")
        first_content = """riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16	RF00162	6.2e-18	5	12"""
        gen_file(align_file, self.example.scan_file)
        gen_file(first_file, first_content)
        rr.reextract_rbs(align_file, first_file, output_file)
        data = import_data(output_file)
        self.assertEqual("\n".join(data), first_content)
        first_content = """riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16	RF00178	6.2e-20	13	17"""
        gen_file(first_file, first_content)
        rr.reextract_rbs(align_file, first_file, output_file)
        data = import_data(output_file)
        self.assertEqual(
            "\n".join(data),
            """riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16	RF00162	6.2e-18	5	12
riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16	RF00178	6.2e-20	13	17"""
        )
Exemple #5
0
 def _scan_extract_rfam(self, prefixs, args_ribo, tmp_files, suffixs,
                        feature, rfam):
     '''extract the seq of candidates and scanning the candidates'''
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(tmp_files["fasta"],
                                      prefix + ".fa")
             prefixs.append(prefix)
             print("Extracting sequences of candidates for {0}".format(
                   prefix))
             extract_potential_rbs(
                   os.path.join(self.fasta_path, prefix + ".fa"),
                   os.path.join(self.gff_path, gff),
                   os.path.join(self.tss_path, prefix + "_TSS.gff"),
                   os.path.join(self.tran_path, prefix + "_transcript.gff"),
                   first_seq, args_ribo, feature)
             print("Pre-scanning of {0}".format(prefix))
             first_scan_file = self._run_cmscan(
                     args_ribo, first_seq, "txt", prefix, tmp_files,
                     suffixs, rfam)
             sec_seq = os.path.join(tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(
                     tmp_files["table"],
                     "_".join([prefix, suffixs["csv"]]))
             regenerate_seq(first_scan_file, first_seq,
                            first_table, sec_seq)
             print("Scanning of {0}".format(prefix))
             sec_scan_file = self._run_cmscan(
                     args_ribo, sec_seq, "re_txt", prefix, tmp_files,
                     suffixs, rfam)
             sec_table = os.path.join(
                     tmp_files["table"],
                     "_".join([prefix, suffixs["re_csv"]]))
             reextract_rbs(sec_scan_file, first_table, sec_table)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs
Exemple #6
0
 def _scan_extract_rfam(self, prefixs, args_ribo, tmp_files, suffixs,
                        feature, rfam, log):
     '''extract the seq of candidates and scanning the candidates'''
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(tmp_files["fasta"],
                                      prefix + ".fa")
             prefixs.append(prefix)
             print("Extracting sequences of candidates for {0}".format(
                   prefix))
             if self.tss_path is not None:
                 tss_file = os.path.join(self.tss_path, prefix + "_TSS.gff")
             else:
                 tss_file = None
             log.write("Running extract_RBS.py to extract potential "
                       "sequences of riboswitches/RNA thermometers for "
                       "{0}.\n".format(prefix))
             extract_potential_rbs(
                   os.path.join(self.fasta_path, prefix + ".fa"),
                   os.path.join(self.gff_path, gff), tss_file,
                   os.path.join(self.tran_path, prefix + "_transcript.gff"),
                   first_seq, args_ribo, feature)
             log.write("\t" + first_seq + " is temporary generated.\n")
             print("Pre-scanning of {0}".format(prefix))
             log.write("Using Infernal to pre-scan riboswitches/RNA "
                       "thermometers for {0}.\n".format(prefix))
             log.write("Please make sure the version of Infernal is at least 1.1.1.\n")
             first_scan_file = self._run_cmscan(
                     args_ribo, first_seq, "txt", prefix, tmp_files,
                     suffixs, rfam, log)
             sec_seq = os.path.join(tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(
                     tmp_files["table"],
                     "_".join([prefix, suffixs["csv"]]))
             log.write("Running recompute_RBS.py to update the potential "
                       "sequences of riboswitches/RNA thermometers for {0} "
                       "based on the pre-scanning results.\n".format(prefix))
             regenerate_seq(first_scan_file, first_seq,
                            first_table, sec_seq)
             log.write("\t" + sec_seq + " is temporary generated.\n")
             print("Scanning of {0}".format(prefix))
             log.write("Using Infernal to scan riboswitches/RNA "
                       "thermometers for {0}.\n".format(prefix))
             log.write("Please make sure the version of Infernal is at "
                       "least 1.1.1.\n")
             sec_scan_file = self._run_cmscan(
                     args_ribo, sec_seq, "re_txt", prefix, tmp_files,
                     suffixs, rfam, log)
             sec_table = os.path.join(
                     tmp_files["table"],
                     "_".join([prefix, suffixs["re_csv"]]))
             log.write("Running recompute_RBS.py and modify_rbs_table.py "
                       "to generate tables for {0} "
                       "based on the scanning results.\n".format(prefix))
             reextract_rbs(sec_scan_file, first_table, sec_table,
                           args_ribo.cutoff)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs
Exemple #7
0
 def _scan_extract_rfam(self, prefixs, args_ribo, tmp_files, suffixs,
                        feature, rfam, log):
     '''extract the seq of candidates and scanning the candidates'''
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(tmp_files["fasta"], prefix + ".fa")
             prefixs.append(prefix)
             print("Extracting sequences of candidates for {0}".format(
                 prefix))
             if self.tss_path is not None:
                 tss_file = os.path.join(self.tss_path, prefix + "_TSS.gff")
             else:
                 tss_file = None
             log.write("Running extract_RBS.py to extract potential "
                       "sequences of riboswitches/RNA thermometers for "
                       "{0}.\n".format(prefix))
             extract_potential_rbs(
                 os.path.join(self.fasta_path, prefix + ".fa"),
                 os.path.join(self.gff_path, gff), tss_file,
                 os.path.join(self.tran_path, prefix + "_transcript.gff"),
                 first_seq, args_ribo, feature)
             log.write("\t" + first_seq + " is temporary generated.\n")
             print("Pre-scanning of {0}".format(prefix))
             log.write("Using Infernal to pre-scan riboswitches/RNA "
                       "thermometers for {0}.\n".format(prefix))
             log.write(
                 "Please make sure the version of Infernal is at least 1.1.1.\n"
             )
             first_scan_file = self._run_cmscan(args_ribo, first_seq, "txt",
                                                prefix, tmp_files, suffixs,
                                                rfam, log)
             sec_seq = os.path.join(tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(tmp_files["table"],
                                        "_".join([prefix, suffixs["csv"]]))
             log.write(
                 "Running recompute_RBS.py to update the potential "
                 "sequences of riboswitches/RNA thermometers for {0} "
                 "based on the pre-scanning results.\n".format(prefix))
             regenerate_seq(first_scan_file, first_seq, first_table,
                            sec_seq)
             log.write("\t" + sec_seq + " is temporary generated.\n")
             print("Scanning of {0}".format(prefix))
             log.write("Using Infernal to scan riboswitches/RNA "
                       "thermometers for {0}.\n".format(prefix))
             log.write("Please make sure the version of Infernal is at "
                       "least 1.1.1.\n")
             sec_scan_file = self._run_cmscan(args_ribo, sec_seq, "re_txt",
                                              prefix, tmp_files, suffixs,
                                              rfam, log)
             sec_table = os.path.join(tmp_files["table"],
                                      "_".join([prefix, suffixs["re_csv"]]))
             log.write("Running recompute_RBS.py and modify_rbs_table.py "
                       "to generate tables for {0} "
                       "based on the scanning results.\n".format(prefix))
             reextract_rbs(sec_scan_file, first_table, sec_table,
                           args_ribo.cutoff)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs