Exemple #1
0
 def test_extract_blast(self):
     esi.read_gff = Mock_func().mock_read_gff
     nr_blast = os.path.join(self.test_folder, "nr_table")
     gen_file(nr_blast, self.example.blast_nr_all)
     srna_blast = os.path.join(self.test_folder, "srna_table")
     gen_file(srna_blast, self.example.blast_srna_all)
     output_file = os.path.join(self.test_folder, "out.gff")
     output_table = os.path.join(self.test_folder, "out.csv")
     esi.extract_blast(nr_blast, "test.srna", output_file, output_table,
                       "nr", None, None)
     datas, attributes = extract_info(output_file, "file")
     refs, ref_attributes = extract_info(self.example.out_nr_gff, "string")
     self.assertEqual(set(datas), set(refs[1:]))
     self.assertEqual(set(attributes[0]), set(attributes[0]))
     self.assertEqual(set(attributes[1]), set(attributes[1]))
     datas = import_data(output_table)
     esi.extract_blast(srna_blast, "test.srna", output_file, output_table,
                       "sRNA", None, None)
     datas, attributes = extract_info(output_file, "file")
     refs, ref_attributes = extract_info(self.example.out_srna_gff,
                                         "string")
     self.assertEqual(set(datas), set(refs[1:]))
     self.assertEqual(set(attributes[0]), set(attributes[0]))
     self.assertEqual(set(attributes[1]), set(attributes[1]))
     datas = import_data(output_table)
     self.assertEqual(set(datas),
                      set(self.example.out_srna_csv.split("\n")))
Exemple #2
0
 def _blast(self, database, database_format, data_type, args_srna, prefixs,
            program, database_type, e, filters):
     if (database is None):
         print("Error: No database was assigned!")
     else:
         if database_format:
             database = self._formatdb(database, data_type,
                                       args_srna.out_folder,
                                       args_srna.blastdb, database_type)
         for prefix in prefixs:
             blast_file = os.path.join(
                 args_srna.out_folder, "blast_results_and_misc",
                 "_".join([database_type, "blast", prefix + ".txt"]))
             if os.path.exists(blast_file):
                 os.remove(blast_file)
             srna_file = "_".join([self.prefixs["basic"], prefix])
             out_file = os.path.join(
                 args_srna.out_folder,
                 "_".join(["tmp", database_type, prefix]))
             print("Running Blast of {0} in {1}".format(prefix, database))
             seq_file = os.path.join(args_srna.out_folder,
                                     "_".join(["sRNA_seq", prefix]))
             if (seq_file not in os.listdir(
                     args_srna.out_folder)) or ((database_type == "nr") and
                                                ("sec_str" in filters)):
                 self.helper.get_seq(
                     srna_file, os.path.join(self.fasta_path,
                                             prefix + ".fa"), seq_file)
             if database_type == "nr":
                 tmp_plus, tmp_minus = self._get_strand_fasta(
                     seq_file, args_srna.out_folder)
                 tmp_blast = os.path.join(args_srna.out_folder,
                                          "blast_results_and_misc",
                                          "tmp_blast.txt")
                 if os.path.exists(tmp_blast):
                     os.remove(tmp_blast)
                 self._run_para_blast(program, database, e, tmp_plus,
                                      tmp_blast, "plus",
                                      args_srna.para_blast)
                 self._run_para_blast(program, database, e, tmp_minus,
                                      blast_file, "minus",
                                      args_srna.para_blast)
                 self.helper.merge_file(tmp_blast, blast_file)
                 os.remove(tmp_plus)
                 os.remove(tmp_minus)
             else:
                 self._run_para_blast(program, database, e, seq_file,
                                      blast_file, "both",
                                      args_srna.para_blast)
             extract_blast(blast_file, srna_file, out_file,
                           out_file + ".csv", database_type)
             shutil.move(out_file, srna_file)
Exemple #3
0
 def _blast(self, database, database_format, data_type, args_srna,
            prefixs, program, database_type, e):
     if (database is None):
         print("Error: No database assigned!")
     else:
         if database_format:
             self._formatdb(database, data_type, args_srna.out_folder,
                            args_srna.blast_path, database_type)
         for prefix in prefixs:
             blast_file = os.path.join(
                     args_srna.out_folder, "blast_result_and_misc",
                     "_".join([database_type, "blast", prefix + ".txt"]))
             srna_file = "_".join([self.prefixs["basic"], prefix])
             out_file = os.path.join(
                     args_srna.out_folder,
                     "_".join(["tmp", database_type, prefix]))
             print("Running Blast of {0}".format(prefix))
             seq_file = os.path.join(
                     args_srna.out_folder, "_".join(["sRNA_seq", prefix]))
             if seq_file not in os.listdir(args_srna.out_folder):
                 self.helper.get_seq(
                         srna_file,
                         os.path.join(self.fasta_path, prefix + ".fa"),
                         seq_file)
             if database_type == "nr":
                 tmp_plus, tmp_minus = self._get_strand_fasta(
                         seq_file, args_srna.out_folder)
                 tmp_blast = os.path.join("tmp_blast.txt")
                 self._run_blast(args_srna.blast_path, program, database, e,
                                 tmp_plus, tmp_blast, "plus")
                 self._run_blast(args_srna.blast_path, program, database, e,
                                 tmp_minus, blast_file, "minus")
                 self.helper.merge_file(tmp_blast, blast_file)
                 os.remove(tmp_blast)
                 os.remove(tmp_plus)
                 os.remove(tmp_minus)
             else:
                 self._run_blast(args_srna.blast_path, program, database, e,
                                 seq_file, blast_file, "both")
             extract_blast(blast_file, srna_file, out_file,
                           out_file + ".csv", database_type)
             shutil.move(out_file, srna_file)
    def test_extract_blast(self):
        esi.read_gff = Mock_func().mock_read_gff
        nr_blast = os.path.join(self.test_folder, "nr_table")
        gen_file(nr_blast, self.example.blast_nr_all)
        srna_blast = os.path.join(self.test_folder, "srna_table")
        gen_file(srna_blast, self.example.blast_srna_all)
        output_file = os.path.join(self.test_folder, "out.gff")
        output_table = os.path.join(self.test_folder, "out.csv")
        esi.extract_blast(nr_blast, "test.srna", output_file, output_table, "nr")
        datas, attributes = extract_info(output_file, "file")
        refs, ref_attributes = extract_info(self.example.out_nr_gff, "string")
        self.assertEqual(set(datas), set(refs[1:]))
        self.assertEqual(set(attributes[0]), set(attributes[0]))
        self.assertEqual(set(attributes[1]), set(attributes[1]))
        datas = import_data(output_table)
#        self.assertEqual(set(datas), set(self.example.out_nr_csv.split("\n")))
        esi.extract_blast(srna_blast, "test.srna", output_file, output_table, "sRNA")
        datas, attributes = extract_info(output_file, "file")
        refs, ref_attributes = extract_info(self.example.out_srna_gff, "string")
        self.assertEqual(set(datas), set(refs[1:]))
        self.assertEqual(set(attributes[0]), set(attributes[0]))
        self.assertEqual(set(attributes[1]), set(attributes[1]))
        datas = import_data(output_table)
        self.assertEqual(set(datas), set(self.example.out_srna_csv.split("\n")))