예제 #1
0
class TestConverter(unittest.TestCase):
    def setUp(self):
        self.converter = Converter()
        self.example = Example()
        self.converter.gff3parser = Mock_gff3_parser
        self.converter._print_rntptt_title = Mock_func().print_rntptt_title
        self.converter.tsspredator = Mock_TSSPredatorReader()
        self.converter._read_file = Mock_func().mock_read_file
        self.gff_file = self.example.gff_file
        self.ptt_out = self.example.ptt_out
        self.rnt_out = self.example.rnt_out
        self.srna_out = self.example.srna_out
        self.embl_file = self.example.embl_file
        self.embl_out = self.example.embl_out
        self.multi_embl = self.example.multi_embl
        self.gff_out = self.example.gff_out
        self.mastertable = self.example.mastertable
        self.tss_file = self.example.tss_file
        self.fasta_file = self.example.fasta_file
        self.transterm = self.example.transterm
        self.term_file = self.example.term_file
        self.circ_file = self.example.circrna_table
        self.circ_all = self.example.circrna_all
        self.circ_best = self.example.circrna_best
        self.test_folder = "test_folder"
        self.mock_args = MockClass()
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_print_rntptt_file(self):
        cdss = []
        genes = []
        rnas = []
        gff_dict = Example().gff_dict
        for gff in gff_dict:
            if gff["feature"] == "gene":
                genes.append(self.converter.gff3parser.entries(self, gff))
            elif gff["feature"] == "CDS":
                cdss.append(self.converter.gff3parser.entries(self, gff))
            elif gff["feature"] == "tRNA":
                rnas.append(self.converter.gff3parser.entries(self, gff))
        out_p = StringIO()
        out_r = StringIO()
        self.converter._print_rntptt_file(out_p, cdss, genes)
        self.converter._print_rntptt_file(out_r, rnas, genes)
        self.assertEqual(out_p.getvalue().split("\n")[:-1],
                         self.example.ptt_out_list)
        self.assertEqual(out_r.getvalue().split("\n")[:-1],
                         self.example.rnt_out_list)
        out_p.close()
        out_r.close()

    def test_srna2pttrnt(self):
        srna_input_file = os.path.join(self.test_folder, "srna.gff")
        srna_output_file = os.path.join(self.test_folder, "srna.out")
        with open(srna_input_file, "w") as fh:
            fh.write(self.gff_file)
        srnas = []
        self.converter._srna2rntptt(srna_input_file, srna_output_file, srnas,
                                    1234567)
        datas = import_data(srna_output_file)
        self.assertEqual(set(datas), set(self.srna_out.split("\n")))

    def test_multi_embl_pos(self):
        embls = []
        for line in self.embl_file.split("\n"):
            datas = self.converter._multi_embl_pos(line.strip())
            if datas != "Wrong":
                embls.append(datas)
        for index in range(0, 7):
            self.assertDictEqual(embls[index], self.embl_out[index])
        for index in range(0, 2):
            self.assertDictEqual(embls[-1]["pos"][index],
                                 self.multi_embl[index])

    def test_parser_embl_data(self):
        embl_file = os.path.join(self.test_folder, "test.embl")
        embl_out = os.path.join(self.test_folder, "test.embl_out")
        out = StringIO()
        with open(embl_file, "w") as eh:
            for line in self.embl_file.split("\n"):
                eh.write(line + "\n")
        info = self.converter._parser_embl_data(embl_file, out)
        datas = out.getvalue().split("\n")
        self.assertEqual(set(datas[:-1]), set(self.gff_out.split("\n")))
        self.assertEqual(info[0], "NC_007795.1")
        for index in range(0, 2):
            self.assertDictEqual(info[1]["pos"][index], self.multi_embl[index])
        out.close()

    def test_multi_tss_class(self):
        nums = {"tss": 0, "tss_uni": 0, "class": 1}
        utrs = {"total": [], "pri": [], "sec": []}
        tss_features = {"tss_types": [], "locus_tags": [], "utr_lengths": []}
        tss_index = defaultdict(lambda: 0)
        master_file = os.path.join(self.test_folder, "test.tsv")
        fh = StringIO(self.mastertable)
        for tss in self.converter.tsspredator.entries(fh):
            self.converter._multi_tss_class(tss, tss_index, tss_features, nums,
                                            utrs)
        fh.close()
        self.assertDictEqual(nums, {'tss_uni': 0, 'class': 5, 'tss': 2})

    def test_convert_mastertable2gff(self):
        master_file = os.path.join(self.test_folder, "test.tsv")
        with open(master_file, "w") as th:
            th.write(self.mastertable)
        out_gff = os.path.join(self.test_folder, "test.tsv_out")
        self.converter.convert_mastertable2gff(master_file, "ANNOgesic", "TSS",
                                               "aaa", out_gff)
        datas = import_data(out_gff)
        self.assertEqual(set(datas), set(self.tss_file.split("\n")))

    def test_convert_gff2rntptt(self):
        srna_input_file = os.path.join(self.test_folder, "srna.gff")
        srna_output_file = os.path.join(self.test_folder, "srna.out")
        gff_file = os.path.join(self.test_folder, "test.gff")
        rnt_file = os.path.join(self.test_folder, "test.rnt")
        ptt_file = os.path.join(self.test_folder, "test.ptt")
        fasta_file = os.path.join(self.test_folder, "test.fa")
        with open(srna_input_file, "w") as fh:
            fh.write(self.gff_file)
        with open(gff_file, "w") as fh:
            fh.write(self.gff_file)
        with open(fasta_file, "w") as fh:
            fh.write(self.fasta_file)
        self.converter.convert_gff2rntptt(gff_file, fasta_file, ptt_file,
                                          rnt_file, srna_input_file,
                                          srna_output_file)
        self.assertTrue(srna_output_file)
        self.assertTrue(rnt_file)
        self.assertTrue(ptt_file)

    def test_convert_embl2gff(self):
        embl_file = os.path.join(self.test_folder, "test.embl")
        gff_file = os.path.join(self.test_folder, "test.embl_out")
        with open(embl_file, "w") as eh:
            for line in self.embl_file.split("\n"):
                eh.write(line + "\n")
        self.converter.convert_embl2gff(embl_file, gff_file)
        datas = import_data(gff_file)
        self.assertEqual(set(datas[1:-2]), set(self.gff_out.split("\n")))

    def test_convert_transtermhp2gff(self):
        transterm_file = os.path.join(self.test_folder,
                                      "test_best_terminator_after_gene.bag")
        gff_file = os.path.join(self.test_folder, "transterm.gff")
        with open(transterm_file, "w") as th:
            th.write(self.transterm)
        self.converter.convert_transtermhp2gff(transterm_file, gff_file)
        datas = import_data(gff_file)
        self.assertEqual(set(datas), set(self.term_file.split("\n")))

    def get_info(datas):
        f_datas = []
        for data in datas:
            if not data.startswith("#"):
                f_datas.append("\t".join(data.split("\t")[:8]))
        return f_datas

    def test_convert_circ2gff(self):
        circ_file = os.path.join(self.test_folder, "circ.csv")
        out_all = os.path.join(self.test_folder, "all.gff")
        out_filter = os.path.join(self.test_folder, "best.gff")
        with open(circ_file, "w") as ch:
            ch.write(self.circ_file)
        args = self.mock_args.mock()
        args.start_ratio = 0.5
        args.end_ratio = 0.5
        args.support = 5
        self.converter.convert_circ2gff(circ_file, args, out_all, out_filter)
        datas = import_data(out_all)
        f_datas = []
        for data in datas:
            if not data.startswith("#"):
                f_datas.append("\t".join(data.split("\t")[:8]))
        c_datas = []
        for data in self.circ_all.split("\n"):
            if not data.startswith("#"):
                c_datas.append("\t".join(data.split("\t")[:8]))
        self.assertListEqual(f_datas, c_datas)
        datas = import_data(out_filter)
        f_datas = []
        for data in datas:
            if not data.startswith("#"):
                f_datas.append("\t".join(data.split("\t")[:8]))
        c_datas = []
        for data in self.circ_best.split("\n"):
            if not data.startswith("#"):
                c_datas.append("\t".join(data.split("\t")[:8]))
        self.assertListEqual(f_datas, c_datas)
예제 #2
0
class RATT(object):
    '''annotation transfer'''
    def __init__(self, args_ratt):
        self.multiparser = Multiparser()
        self.converter = Converter()
        self.format_fixer = FormatFixer()
        self.helper = Helper()
        if args_ratt.ref_gbk:
            self.gbk = os.path.join(args_ratt.ref_gbk, "gbk_tmp")
            self.gbk_tmp = os.path.join(self.gbk, "tmp")
            self.embl = os.path.join(args_ratt.ref_gbk, "embls")
        if args_ratt.ref_embls:
            self.embl = args_ratt.ref_embls
        self.ratt_log = os.path.join(args_ratt.output_path, "ratt_log.txt")
        self.tmp_files = {
            "tar": os.path.join(args_ratt.tar_fastas, "tmp"),
            "ref": os.path.join(args_ratt.ref_fastas, "tmp"),
            "out_gff": os.path.join(args_ratt.gff_outfolder, "tmp"),
            "gff": os.path.join(args_ratt.gff_outfolder, "tmp.gff"),
            "ptt": os.path.join(args_ratt.gff_outfolder, "tmp.ptt"),
            "rnt": os.path.join(args_ratt.gff_outfolder, "tmp.rnt")
        }

    def _convert_to_pttrnt(self, gffs, files):
        for gff in files:
            if gff.endswith(".gff"):
                gff = os.path.join(gffs, gff)
                filename = gff.split("/")
                prefix = filename[-1][:-4]
                rnt = gff[:-3] + "rnt"
                ptt = gff[:-3] + "ptt"
                fasta = self.helper.get_correct_file(self.tmp_files["tar"],
                                                     ".fa", prefix, None, None)
                if fasta:
                    self.converter.convert_gff2rntptt(gff, fasta, ptt, rnt,
                                                      None, None)

    def _remove_files(self, args_ratt, out_gbk):
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".gff", "file")
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".ptt", "file")
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".rnt", "file")
        self.helper.move_all_content(self.tmp_files["out_gff"],
                                     args_ratt.gff_outfolder, None)
        shutil.rmtree(self.tmp_files["out_gff"])
        shutil.rmtree(self.tmp_files["tar"])
        shutil.rmtree(self.tmp_files["ref"])
        self.helper.remove_tmp_dir(args_ratt.tar_fastas)
        self.helper.remove_tmp_dir(args_ratt.ref_fastas)
        self.helper.remove_tmp_dir(args_ratt.ref_embls)
        self.helper.remove_tmp_dir(args_ratt.ref_gbk)

    def _convert_to_gff(self, ratt_result, args_ratt, files):
        name = ratt_result.split(".")
        filename = ".".join(name[1:-2]) + ".gff"
        output_file = os.path.join(args_ratt.output_path, filename)
        self.converter.convert_embl2gff(
            os.path.join(args_ratt.output_path, ratt_result), output_file)
        self.format_fixer.fix_ratt(output_file, ".".join(name[1:-2]),
                                   "tmp_gff")
        shutil.move("tmp_gff", output_file)
        shutil.copy(output_file, os.path.join(args_ratt.gff_outfolder,
                                              filename))
        files.append(filename)

    def _parser_embl_gbk(self, files):
        self.helper.check_make_folder(self.gbk)
        for file_ in files:
            close = False
            with open(file_, "r") as f_h:
                for line in f_h:
                    if (line.startswith("LOCUS")):
                        out = open(self.gbk_tmp, "w")
                        datas = line.split(" ")
                        for data in datas:
                            if (len(data) != 0) and (data != "LOCUS"):
                                filename = ".".join([data, "gbk"])
                                break
                    elif (line.startswith("VERSION")):
                        datas = line.split(" ")
                        for data in datas:
                            if (len(data) != 0) and (data != "VERSION"):
                                new_filename = ".".join([data, "gbk"])
                                break
                        if new_filename.find(filename):
                            filename = new_filename
                    if out:
                        out.write(line)
                    if line.startswith("//"):
                        out.close()
                        close = True
                        shutil.move(self.gbk_tmp,
                                    os.path.join(self.gbk, filename))
            if not close:
                out.close()
        return self.gbk

    def _convert_embl(self, ref_embls):
        '''convert gbk to embl'''
        detect_gbk = False
        gbks = []
        out_gbk = None
        for embl in os.listdir(ref_embls):
            if (embl.endswith(".gbk")) or (embl.endswith(".gbff")) or (
                    embl.endswith(".gb")):
                detect_gbk = True
                gbks.append(os.path.join(ref_embls, embl))
        if not detect_gbk:
            print("Error: Please assign proper Genebank files!")
            sys.exit()
        elif detect_gbk:
            out_gbk = self._parser_embl_gbk(gbks)
            self.converter.convert_gbk2embl(out_gbk)
            self.helper.check_make_folder(self.embl)
            self.helper.move_all_content(out_gbk, self.embl, [".embl"])
        return out_gbk

    def _run_ratt(self, args_ratt, tar, ref, out):
        call([
            args_ratt.ratt_path, self.embl,
            os.path.join(self.tmp_files["tar"], tar + ".fa"),
            args_ratt.element, args_ratt.transfer_type,
            os.path.join(self.tmp_files["ref"], ref + ".fa")
        ],
             stdout=out,
             stderr=DEVNULL)

    def _format_and_run(self, args_ratt):
        print("Running RATT")
        for pair in args_ratt.pairs:
            ref = pair.split(":")[0]
            tar = pair.split(":")[1]
            out = open(self.ratt_log, "w+")
            self._run_ratt(args_ratt, tar, ref, out)
            for filename in os.listdir():
                if ("final" in filename):
                    shutil.move(filename,
                                os.path.join(args_ratt.output_path, filename))
                elif (args_ratt.element in filename) or (
                        "query" in filename) or ("Reference" in filename) or (
                            "Query" in filename) or ("Sequences" in filename):
                    if os.path.isfile(filename):
                        os.remove(filename)
                    if os.path.isdir(filename):
                        shutil.rmtree(filename)
        out.close()

    def annotation_transfer(self, args_ratt):
        self.multiparser.parser_fasta(args_ratt.tar_fastas)
        self.multiparser.parser_fasta(args_ratt.ref_fastas)
        out_gbk = None
        if args_ratt.ref_embls is None:
            out_gbk = self._convert_embl(args_ratt.ref_gbk)
        self._format_and_run(args_ratt)
        if args_ratt.convert:
            files = []
            for data in os.listdir(args_ratt.output_path):
                if "final.embl" in data:
                    self._convert_to_gff(data, args_ratt, files)
                    self._convert_to_pttrnt(args_ratt.gff_outfolder, files)
            self.helper.check_make_folder(self.tmp_files["out_gff"])
            for folder in os.listdir(args_ratt.tar_fastas):
                files = []
                if "_folder" in folder:
                    datas = folder.split("_folder")
                    prefix = ".".join(datas[0].split(".")[:-1])
                    for file_ in os.listdir(
                            os.path.join(args_ratt.tar_fastas, folder)):
                        files.append(file_[:-3])
                    for gff in os.listdir(args_ratt.gff_outfolder):
                        for file_ in files:
                            if (".gff" in gff) and (file_ == gff[:-4]):
                                self.helper.merge_file(
                                    os.path.join(args_ratt.gff_outfolder, gff),
                                    self.tmp_files["gff"])
                            if (".ptt" in gff) and (file_ == gff[:-4]):
                                self.helper.merge_file(
                                    os.path.join(args_ratt.gff_outfolder, gff),
                                    self.tmp_files["ptt"])
                            if (".rnt" in gff) and (file_ == gff[:-4]):
                                self.helper.merge_file(
                                    os.path.join(args_ratt.gff_outfolder, gff),
                                    self.tmp_files["rnt"])
                    if os.path.exists(self.tmp_files["gff"]):
                        shutil.move(
                            self.tmp_files["gff"],
                            os.path.join(self.tmp_files["out_gff"],
                                         prefix + ".gff"))
                        shutil.move(
                            self.tmp_files["ptt"],
                            os.path.join(self.tmp_files["out_gff"],
                                         prefix + ".ptt"))
                        shutil.move(
                            self.tmp_files["rnt"],
                            os.path.join(self.tmp_files["out_gff"],
                                         prefix + ".rnt"))
                    else:
                        print("Error: Please check your fasta or "
                              "annotation files, they should only contain "
                              "the query genome. And make sure your RATT can "
                              "work properly (check $ANNOgesic/output/"
                              "annotation_transfer/ratt_log.txt).")
        self._remove_files(args_ratt, out_gbk)
예제 #3
0
class RATT(object):

    def __init__(self, args_ratt):
        self.multiparser = Multiparser()
        self.converter = Converter()
        self.format_fixer = FormatFixer()
        self.helper = Helper()
        self.gbk = os.path.join(args_ratt.ref_embls, "gbk_tmp")
        self.gbk_tmp = os.path.join(self.gbk, "tmp")
        self.embl = os.path.join(args_ratt.ref_embls, "embls")
        self.ratt_log = os.path.join(args_ratt.output_path, "ratt_log.txt")
        self.tmp_files = {"tar": os.path.join(args_ratt.tar_fastas, "tmp"),
                          "ref": os.path.join(args_ratt.ref_fastas, "tmp"),
                          "out_gff": os.path.join(args_ratt.gff_outfolder,
                                                  "tmp"),
                          "gff": os.path.join(args_ratt.gff_outfolder,
                                              "tmp.gff"),
                          "ptt": os.path.join(args_ratt.gff_outfolder,
                                              "tmp.ptt"),
                          "rnt": os.path.join(args_ratt.gff_outfolder,
                                              "tmp.rnt")}

    def _convert_to_pttrnt(self, gffs, files):
        for gff in files:
            if gff.endswith(".gff"):
                gff = os.path.join(gffs, gff)
                filename = gff.split("/")
                prefix = filename[-1][:-4]
                rnt = gff[:-3] + "rnt"
                ptt = gff[:-3] + "ptt"
                fasta = self.helper.get_correct_file(self.tmp_files["tar"],
                                                     ".fa", prefix, None, None)
                if fasta:
                    self.converter.convert_gff2rntptt(gff, fasta, ptt, rnt,
                                                      None, None)

    def _remove_files(self, args_ratt, out_gbk):
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".gff", "file")
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".ptt", "file")
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".rnt", "file")
        self.helper.move_all_content(self.tmp_files["out_gff"],
                                     args_ratt.gff_outfolder, None)
        shutil.rmtree(self.tmp_files["out_gff"])
        shutil.rmtree(self.tmp_files["tar"])
        shutil.rmtree(self.tmp_files["ref"])
        shutil.rmtree(self.embl)
        self.helper.remove_all_content(args_ratt.tar_fastas, "_folder", "dir")
        self.helper.remove_all_content(args_ratt.ref_fastas, "_folder", "dir")
        if out_gbk:
            shutil.rmtree(out_gbk)

    def _convert_to_gff(self, ratt_result, args_ratt, files):
        name = ratt_result.split(".")
        filename = ".".join(name[1:-2]) + ".gff"
        output_file = os.path.join(args_ratt.output_path, filename)
        self.converter.convert_embl2gff(
             os.path.join(args_ratt.output_path, ratt_result), output_file)
        self.format_fixer.fix_ratt(output_file, ".".join(name[1:-2]),
                                   "tmp_gff")
        shutil.move("tmp_gff", output_file)
        shutil.copy(output_file, os.path.join(args_ratt.gff_outfolder,
                                              filename))
        files.append(filename)

    def _parser_embl_gbk(self, files):
        self.helper.check_make_folder(self.gbk)
        for file_ in files:
            close = False
            with open(file_, "r") as f_h:
                for line in f_h:
                    if (line.startswith("LOCUS")):
                        out = open(self.gbk_tmp, "w")
                        datas = line.split(" ")
                        for data in datas:
                            if (len(data) != 0) and (data != "LOCUS"):
                                filename = ".".join([data, "gbk"])
                                break
                    elif (line.startswith("VERSION")):
                        datas = line.split(" ")
                        for data in datas:
                            if (len(data) != 0) and (data != "VERSION"):
                                new_filename = ".".join([data, "gbk"])
                                break
                        if new_filename.find(filename):
                            filename = new_filename
                    if out:
                        out.write(line)
                    if line.startswith("//"):
                        out.close()
                        close = True
                        shutil.move(self.gbk_tmp,
                                    os.path.join(self.gbk, filename))
            if not close:
                out.close()
        return self.gbk

    def _convert_embl(self, ref_embls):
        detect_gbk = False
        gbks = []
        out_gbk = None
        for embl in os.listdir(ref_embls):
            if embl.endswith(".gbk"):
                detect_gbk = True
                gbks.append(os.path.join(ref_embls, embl))
        if not detect_gbk:
            print("Error: please assign proper folder for Genebank file!!!")
            sys.exit()
        elif detect_gbk:
            out_gbk = self._parser_embl_gbk(gbks)
            self.converter.convert_gbk2embl(out_gbk)
            self.helper.check_make_folder(self.embl)
            self.helper.move_all_content(out_gbk, self.embl, [".embl"])
        return out_gbk

    def _run_ratt(self, args_ratt, tar, ref, out):
        call([args_ratt.ratt_path, self.embl,
              os.path.join(self.tmp_files["tar"], tar + ".fa"),
              args_ratt.element, args_ratt.transfer_type,
              os.path.join(self.tmp_files["ref"], ref + ".fa")],
             stdout=out, stderr=DEVNULL)

    def _format_and_run(self, args_ratt):
        print("Running RATT...")
        for pair in args_ratt.pairs:
            ref = pair.split(":")[0]
            tar = pair.split(":")[1]
            out = open(self.ratt_log, "w+")
            print(tar)
            self._run_ratt(args_ratt, tar, ref, out)
            for filename in os.listdir():
                if ("final" in filename):
                    shutil.move(filename, os.path.join(args_ratt.output_path,
                                                       filename))
                elif (args_ratt.element in filename) or (
                      "query" in filename) or (
                      "Reference" in filename) or (
                      "Query" in filename) or (
                      "Sequences" in filename):
                    if os.path.isfile(filename):
                        os.remove(filename)
                    if os.path.isdir(filename):
                        shutil.rmtree(filename)
        out.close()

    def annotation_transfer(self, args_ratt):
        self.multiparser.parser_fasta(args_ratt.tar_fastas)
        self.multiparser.parser_fasta(args_ratt.ref_fastas)
        out_gbk = self._convert_embl(args_ratt.ref_embls)
        self._format_and_run(args_ratt)
        if args_ratt.convert:
            files = []
            for data in os.listdir(args_ratt.output_path):
                if "final.embl" in data:
                    self._convert_to_gff(data, args_ratt, files)
                    self._convert_to_pttrnt(args_ratt.gff_outfolder, files)
            self.helper.check_make_folder(self.tmp_files["out_gff"])
            for folder in os.listdir(args_ratt.tar_fastas):
                files = []
                if "_folder" in folder:
                    datas = folder.split("_folder")
                    prefix = datas[0][:-3]
                    for file_ in os.listdir(os.path.join(args_ratt.tar_fastas,
                                                         folder)):
                        files.append(file_[:-3])
                    for gff in os.listdir(args_ratt.gff_outfolder):
                        for file_ in files:
                            if (".gff" in gff) and (file_ == gff[:-4]):
                                self.helper.merge_file(os.path.join(
                                     args_ratt.gff_outfolder, gff),
                                     self.tmp_files["gff"])
                            if (".ptt" in gff) and (file_ == gff[:-4]):
                                self.helper.merge_file(os.path.join(
                                     args_ratt.gff_outfolder, gff),
                                     self.tmp_files["ptt"])
                            if (".rnt" in gff) and (file_ == gff[:-4]):
                                self.helper.merge_file(os.path.join(
                                     args_ratt.gff_outfolder, gff),
                                     self.tmp_files["rnt"])
                    shutil.move(self.tmp_files["gff"], os.path.join(
                                self.tmp_files["out_gff"], prefix + ".gff"))
                    shutil.move(self.tmp_files["ptt"], os.path.join(
                                self.tmp_files["out_gff"], prefix + ".ptt"))
                    shutil.move(self.tmp_files["rnt"], os.path.join(
                                self.tmp_files["out_gff"], prefix + ".rnt"))
        self._remove_files(args_ratt, out_gbk)
예제 #4
0
class RATT(object):
    '''annotation transfer'''
    def __init__(self, args_ratt):
        self.multiparser = Multiparser()
        self.converter = Converter()
        self.format_fixer = FormatFixer()
        self.helper = Helper()
        if args_ratt.ref_gbk:
            self.gbk = os.path.join(args_ratt.ref_gbk, "gbk_tmp")
            self.gbk_tmp = os.path.join(self.gbk, "tmp")
            self.embl = os.path.join(args_ratt.ref_gbk, "embls")
        if args_ratt.ref_embls:
            self.embl = args_ratt.ref_embls
        self.ratt_log = os.path.join(args_ratt.output_path, "ratt_log.txt")
        self.tmp_files = {
            "tar": os.path.join(args_ratt.tar_fastas, "tmp"),
            "ref": os.path.join(args_ratt.ref_fastas, "tmp"),
            "out_gff": os.path.join(args_ratt.gff_outfolder, "tmp"),
            "gff": os.path.join(args_ratt.gff_outfolder, "tmp.gff"),
            "ptt": os.path.join(args_ratt.gff_outfolder, "tmp.ptt"),
            "rnt": os.path.join(args_ratt.gff_outfolder, "tmp.rnt")
        }

    def _convert_to_pttrnt(self, gffs, files, log):
        for gff in files:
            if gff.endswith(".gff"):
                gff = os.path.join(gffs, gff)
                filename = gff.split("/")
                prefix = filename[-1][:-4]
                rnt = gff[:-3] + "rnt"
                ptt = gff[:-3] + "ptt"
                fasta = self.helper.get_correct_file(self.tmp_files["tar"],
                                                     ".fa", prefix, None, None)
                if fasta:
                    self.converter.convert_gff2rntptt(gff, fasta, ptt, rnt,
                                                      None, None)
                    log.write("\t" + ptt + " is generated.\n")
                    log.write("\t" + rnt + " is generated.\n")

    def _remove_files(self, args_ratt, out_gbk, log):
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".gff", "file")
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".ptt", "file")
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".rnt", "file")
        log.write("Moving the final output files to {0}.\n".format(
            args_ratt.gff_outfolder))
        self.helper.move_all_content(self.tmp_files["out_gff"],
                                     args_ratt.gff_outfolder, None)
        log.write("Remove the temperary files.\n")
        shutil.rmtree(self.tmp_files["out_gff"])
        shutil.rmtree(self.tmp_files["tar"])
        shutil.rmtree(self.tmp_files["ref"])
        self.helper.remove_tmp_dir(args_ratt.tar_fastas)
        self.helper.remove_tmp_dir(args_ratt.ref_fastas)
        self.helper.remove_tmp_dir(args_ratt.ref_embls)
        self.helper.remove_tmp_dir(args_ratt.ref_gbk)

    def _convert_to_gff(self, ratt_result, args_ratt, files, log):
        name = ratt_result.split(".")
        filename = ".".join(name[1:-2]) + ".gff"
        output_file = os.path.join(args_ratt.output_path, filename)
        self.converter.convert_embl2gff(
            os.path.join(args_ratt.output_path, ratt_result), output_file)
        self.format_fixer.fix_ratt(output_file, ".".join(name[1:-2]),
                                   "tmp_gff")
        shutil.move("tmp_gff", output_file)
        shutil.copy(output_file, os.path.join(args_ratt.gff_outfolder,
                                              filename))
        log.write("\t" + os.path.join(args_ratt.gff_outfolder, filename) +
                  " is generated.\n")
        files.append(filename)

    def _parser_embl_gbk(self, files):
        self.helper.check_make_folder(self.gbk)
        for file_ in files:
            close = False
            with open(file_, "r") as f_h:
                for line in f_h:
                    if (line.startswith("LOCUS")):
                        out = open(self.gbk_tmp, "w")
                        datas = line.split(" ")
                        for data in datas:
                            if (len(data) != 0) and (data != "LOCUS"):
                                filename = ".".join([data.strip(), "gbk"])
                                break
                    elif (line.startswith("VERSION")):
                        datas = line.split(" ")
                        for data in datas:
                            if (len(data) != 0) and (data != "VERSION"):
                                new_filename = ".".join([data.strip(), "gbk"])
                                break
                        if new_filename.find(filename):
                            filename = new_filename
                    if out:
                        out.write(line)
                    if line.startswith("//"):
                        out.close()
                        close = True
                        shutil.move(self.gbk_tmp,
                                    os.path.join(self.gbk, filename))
            if not close:
                out.close()
        return self.gbk

    def _convert_embl(self, ref_embls, log):
        '''convert gbk to embl'''
        detect_gbk = False
        gbks = []
        out_gbk = None
        for embl in os.listdir(ref_embls):
            if (embl.endswith(".gbk")) or (embl.endswith(".gbff")) or (
                    embl.endswith(".gb")):
                detect_gbk = True
                gbks.append(os.path.join(ref_embls, embl))
        if not detect_gbk:
            log.write(
                "--related_gbk_files is assigned, but not gbk files are detected.\n"
                "The gbk file names need to be ended at .gbk, .gb, or .gbff. \n"
            )
            print("Error: Please assign proper Genebank files!")
            sys.exit()
        elif detect_gbk:
            out_gbk = self._parser_embl_gbk(gbks)
            log.write(
                "Running converter.py to convert gbk file to embl format.\n")
            self.converter.convert_gbk2embl(out_gbk)
            self.helper.check_make_folder(self.embl)
            self.helper.move_all_content(out_gbk, self.embl, [".embl"])
            log.write("\t" + self.embl +
                      " is generated and the embl files are stored in it.\n")
        return out_gbk

    def _run_ratt(self, args_ratt, tar, ref, out, log):
        if (not os.path.exists(self.embl)) or (not os.path.exists(
                os.path.join(self.tmp_files["tar"], tar + ".fa"))) or (
                    not os.path.exists(
                        os.path.join(self.tmp_files["ref"], ref + ".fa"))):
            print("Error: Please check --compare_pair, the strain names "
                  "should be the same as the strain names in fasta, "
                  "genbank or embl files!")
            log.write(
                "The strain names in --compare_pair should be the same "
                "as the strain names in fasta, genbank, or embl files.\n")
            sys.exit()
        log.write("Make sure your RATT version is at least 1.64.\n")
        log.write("If the RATT can not run properly, please check the "
                  "RATT_HOME and PAGIT_HOME is assigned correctly.\n")
        log.write(" ".join([
            args_ratt.ratt_path, self.embl,
            os.path.join(self.tmp_files["tar"], tar +
                         ".fa"), args_ratt.element, args_ratt.transfer_type,
            os.path.join(self.tmp_files["ref"], ref + ".fa")
        ]) + "\n")
        call([
            args_ratt.ratt_path, self.embl,
            os.path.join(self.tmp_files["tar"], tar + ".fa"),
            args_ratt.element, args_ratt.transfer_type,
            os.path.join(self.tmp_files["ref"], ref + ".fa")
        ],
             stdout=out,
             stderr=DEVNULL)
        log.write("Done!\n")

    def _format_and_run(self, args_ratt, log):
        print("Running RATT")
        for pair in args_ratt.pairs:
            ref = pair.split(":")[0]
            tar = pair.split(":")[1]
            out = open(self.ratt_log, "w+")
            self._run_ratt(args_ratt, tar, ref, out, log)
            log.write("The following files are generatd:\n")
            for filename in os.listdir():
                if ("final" in filename):
                    log.write("\t" + filename + "\n")
                    shutil.move(filename,
                                os.path.join(args_ratt.output_path, filename))
                elif (args_ratt.element in filename) or (
                        "query" in filename) or ("Reference" in filename) or (
                            "Query" in filename) or ("Sequences" in filename):
                    log.write("\t" + filename + "\n")
                    if os.path.isfile(filename):
                        os.remove(filename)
                    if os.path.isdir(filename):
                        shutil.rmtree(filename)
        out.close()

    def annotation_transfer(self, args_ratt, log):
        self.multiparser.parser_fasta(args_ratt.tar_fastas)
        self.multiparser.parser_fasta(args_ratt.ref_fastas)
        out_gbk = None
        if args_ratt.ref_embls is None:
            out_gbk = self._convert_embl(args_ratt.ref_gbki, log)
        self._format_and_run(args_ratt, log)
        files = []
        for data in os.listdir(args_ratt.output_path):
            if "final.embl" in data:
                log.write(
                    "Running converter.py to convert embl "
                    "files in {0} to gff, ptt, and rnt format.\n".format(data))
                self._convert_to_gff(data, args_ratt, files, log)
                self._convert_to_pttrnt(args_ratt.gff_outfolder, files, log)
        self.helper.check_make_folder(self.tmp_files["out_gff"])
        log.write("Merging the output of {0}.\n".format(data))
        for folder in os.listdir(args_ratt.tar_fastas):
            files = []
            if "_folder" in folder:
                datas = folder.split("_folder")
                prefix = ".".join(datas[0].split(".")[:-1])
                for file_ in os.listdir(
                        os.path.join(args_ratt.tar_fastas, folder)):
                    files.append(file_[:-3])
                for gff in os.listdir(args_ratt.gff_outfolder):
                    for file_ in files:
                        if (".gff" in gff) and (file_ == gff[:-4]):
                            self.helper.merge_file(
                                os.path.join(args_ratt.gff_outfolder, gff),
                                self.tmp_files["gff"])
                        if (".ptt" in gff) and (file_ == gff[:-4]):
                            self.helper.merge_file(
                                os.path.join(args_ratt.gff_outfolder, gff),
                                self.tmp_files["ptt"])
                        if (".rnt" in gff) and (file_ == gff[:-4]):
                            self.helper.merge_file(
                                os.path.join(args_ratt.gff_outfolder, gff),
                                self.tmp_files["rnt"])
                if os.path.exists(self.tmp_files["gff"]):
                    shutil.move(
                        self.tmp_files["gff"],
                        os.path.join(self.tmp_files["out_gff"],
                                     prefix + ".gff"))
                    shutil.move(
                        self.tmp_files["ptt"],
                        os.path.join(self.tmp_files["out_gff"],
                                     prefix + ".ptt"))
                    shutil.move(
                        self.tmp_files["rnt"],
                        os.path.join(self.tmp_files["out_gff"],
                                     prefix + ".rnt"))
                else:
                    print("Error: Please check your fasta or "
                          "annotation files, they should only contain "
                          "the query genome. And make sure your RATT can "
                          "work properly (check $ANNOgesic/output/"
                          "annotation_transfer/ratt_log.txt).")
                    log.write("Please check your fasta or "
                              "annotation files, they should only contain "
                              "the query genome. And make sure your RATT can "
                              "work properly (check $ANNOgesic/output/"
                              "annotation_transfer/ratt_log.txt).\n")
        self._remove_files(args_ratt, out_gbk, log)
예제 #5
0
class TestConverter(unittest.TestCase):

    def setUp(self):
        self.converter = Converter()
        self.example = Example()
        self.converter.gff3parser = Mock_gff3_parser
        self.converter._print_rntptt_title = Mock_func().print_rntptt_title
        self.converter.tsspredator = Mock_TSSPredatorReader()
        self.converter._read_file = Mock_func().mock_read_file
        self.gff_file = self.example.gff_file
        self.ptt_out = self.example.ptt_out
        self.rnt_out = self.example.rnt_out
        self.srna_out = self.example.srna_out
        self.embl_file = self.example.embl_file
        self.embl_out = self.example.embl_out
        self.multi_embl = self.example.multi_embl
        self.gff_out = self.example.gff_out
        self.mastertable = self.example.mastertable
        self.tss_file = self.example.tss_file
        self.fasta_file = self.example.fasta_file
        self.transterm = self.example.transterm
        self.term_file = self.example.term_file
        self.circ_file = self.example.circrna_table
        self.circ_all = self.example.circrna_all
        self.circ_best = self.example.circrna_best
        self.test_folder = "test_folder"
        self.mock_args = MockClass()
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_print_rntptt_file(self):
        cdss = []
        genes = []
        rnas = []
        gff_dict = Example().gff_dict
        for gff in gff_dict:
            if gff["feature"] == "gene":
                genes.append(self.converter.gff3parser.entries(self, gff))
            elif gff["feature"] == "CDS":
                cdss.append(self.converter.gff3parser.entries(self, gff))
            elif gff["feature"] == "tRNA":
                rnas.append(self.converter.gff3parser.entries(self, gff))
        out_p = StringIO()
        out_r = StringIO()
        self.converter._print_rntptt_file(out_p, cdss, genes)
        self.converter._print_rntptt_file(out_r, rnas, genes)
        self.assertEqual(out_p.getvalue().split("\n")[:-1],
                         self.example.ptt_out_list)
        self.assertEqual(out_r.getvalue().split("\n")[:-1],
                         self.example.rnt_out_list)
        out_p.close()
        out_r.close()

    def test_srna2pttrnt(self):
        srna_input_file = os.path.join(self.test_folder, "srna.gff")
        srna_output_file = os.path.join(self.test_folder, "srna.out")
        with open(srna_input_file, "w") as fh:
            fh.write(self.gff_file)
        srnas = []
        self.converter._srna2rntptt(srna_input_file, srna_output_file,
                                    srnas, 1234567)
        datas = import_data(srna_output_file)
        self.assertEqual(set(datas), set(self.srna_out.split("\n")))

    def test_multi_embl_pos(self):
        embls = []
        for line in self.embl_file.split("\n"):
            datas = self.converter._multi_embl_pos(line.strip())
            if datas != "Wrong":
                embls.append(datas)
        for index in range(0, 7):
            self.assertDictEqual(embls[index], self.embl_out[index])
        for index in range(0, 2):
            self.assertDictEqual(embls[-1]["pos"][index],
                                 self.multi_embl[index])
        
    def test_parser_embl_data(self):
        embl_file = os.path.join(self.test_folder, "test.embl")
        embl_out = os.path.join(self.test_folder, "test.embl_out")
        out = StringIO()
        with open(embl_file, "w") as eh:
            for line in self.embl_file.split("\n"):
                eh.write(line + "\n")
        info = self.converter._parser_embl_data(embl_file, out)
        datas = out.getvalue().split("\n")
        self.assertEqual(set(datas[:-1]), set(self.gff_out.split("\n")))
        self.assertEqual(info[0], "NC_007795.1")
        for index in range(0, 2):
            self.assertDictEqual(info[1]["pos"][index], self.multi_embl[index])
        out.close()

    def test_multi_tss_class(self):
        nums = {"tss": 0, "tss_uni": 0, "class": 1}
        utrs = {"total": [], "pri": [], "sec": []}
        tss_features = {"tss_types": [], "locus_tags": [], "utr_lengths": []}
        tss_index = defaultdict(lambda: 0)
        master_file = os.path.join(self.test_folder, "test.tsv")
        fh = StringIO(self.mastertable)
        for tss in self.converter.tsspredator.entries(fh):
            self.converter._multi_tss_class(
                tss, tss_index, tss_features, nums, utrs)
        fh.close()
        self.assertDictEqual(nums, {'tss_uni': 0, 'class': 5, 'tss': 2})

    def test_convert_mastertable2gff(self):
        master_file = os.path.join(self.test_folder, "test.tsv")
        with open(master_file, "w") as th:
            th.write(self.mastertable)
        out_gff = os.path.join(self.test_folder, "test.tsv_out")
        self.converter.convert_mastertable2gff(master_file, "ANNOgesic", "TSS",
                                               "aaa", out_gff)
        datas = import_data(out_gff)
        self.assertEqual(set(datas), set(self.tss_file.split("\n")))

    def test_convert_gff2rntptt(self):
        srna_input_file = os.path.join(self.test_folder, "srna.gff")
        srna_output_file = os.path.join(self.test_folder, "srna.out")
        gff_file = os.path.join(self.test_folder, "test.gff")
        rnt_file = os.path.join(self.test_folder, "test.rnt")
        ptt_file = os.path.join(self.test_folder, "test.ptt")
        fasta_file = os.path.join(self.test_folder, "test.fa")
        with open(srna_input_file, "w") as fh:
            fh.write(self.gff_file)
        with open(gff_file, "w") as fh:
            fh.write(self.gff_file)
        with open(fasta_file, "w") as fh:
            fh.write(self.fasta_file)
        self.converter.convert_gff2rntptt(
             gff_file, fasta_file, ptt_file, rnt_file,
             srna_input_file, srna_output_file)
        self.assertTrue(srna_output_file)
        self.assertTrue(rnt_file)
        self.assertTrue(ptt_file)

    def test_convert_embl2gff(self):
        embl_file = os.path.join(self.test_folder, "test.embl")
        gff_file = os.path.join(self.test_folder, "test.embl_out")
        with open(embl_file, "w") as eh:
            for line in self.embl_file.split("\n"):
                eh.write(line + "\n")
        self.converter.convert_embl2gff(embl_file, gff_file)
        datas = import_data(gff_file)
        self.assertEqual(set(datas[1:-2]), set(self.gff_out.split("\n")))

    def test_convert_transtermhp2gff(self):
        transterm_file = os.path.join(
            self.test_folder, "test_best_terminator_after_gene.bag")
        gff_file = os.path.join(self.test_folder, "transterm.gff")
        with open(transterm_file, "w") as th:
            th.write(self.transterm)
        self.converter.convert_transtermhp2gff(transterm_file, gff_file)
        datas = import_data(gff_file)
        self.assertEqual(set(datas), set(self.term_file.split("\n")))

    def get_info(datas):
        f_datas = []
        for data in datas:
            if not data.startswith("#"):
                f_datas.append("\t".join(data.split("\t")[:8]))
        return f_datas

    def test_convert_circ2gff(self):
        circ_file = os.path.join(self.test_folder, "circ.csv")
        out_all = os.path.join(self.test_folder, "all.gff")
        out_filter = os.path.join(self.test_folder, "best.gff")  
        with open(circ_file, "w") as ch:
            ch.write(self.circ_file)
        args = self.mock_args.mock()
        args.start_ratio = 0.5
        args.end_ratio = 0.5
        args.support = 5
        self.converter.convert_circ2gff(circ_file, args, out_all, out_filter)
        datas = import_data(out_all)
        f_datas = []
        for data in datas:
            if not data.startswith("#"):
                f_datas.append("\t".join(data.split("\t")[:8]))
        c_datas = []
        for data in self.circ_all.split("\n"):
            if not data.startswith("#"):
                c_datas.append("\t".join(data.split("\t")[:8]))
        self.assertListEqual(f_datas, c_datas)
        datas = import_data(out_filter)
        f_datas = []
        for data in datas:
            if not data.startswith("#"):
                f_datas.append("\t".join(data.split("\t")[:8]))
        c_datas = []
        for data in self.circ_best.split("\n"):
            if not data.startswith("#"):
                c_datas.append("\t".join(data.split("\t")[:8]))
        self.assertListEqual(f_datas, c_datas)
예제 #6
0
파일: ratt.py 프로젝트: Sung-Huan/ANNOgesic
class RATT(object):
    '''annotation transfer'''

    def __init__(self, args_ratt):
        self.multiparser = Multiparser()
        self.converter = Converter()
        self.format_fixer = FormatFixer()
        self.helper = Helper()
        if args_ratt.ref_gbk:
            self.gbk = os.path.join(args_ratt.ref_gbk, "gbk_tmp")
            self.gbk_tmp = os.path.join(self.gbk, "tmp")
            self.embl = os.path.join(args_ratt.ref_gbk, "embls")
        if args_ratt.ref_embls:
            self.embl = args_ratt.ref_embls
        self.ratt_log = os.path.join(args_ratt.output_path, "ratt_log.txt")
        self.tmp_files = {"tar": os.path.join(args_ratt.tar_fastas, "tmp"),
                          "ref": os.path.join(args_ratt.ref_fastas, "tmp"),
                          "out_gff": os.path.join(args_ratt.gff_outfolder,
                                                  "tmp"),
                          "gff": os.path.join(args_ratt.gff_outfolder,
                                              "tmp.gff"),
                          "ptt": os.path.join(args_ratt.gff_outfolder,
                                              "tmp.ptt"),
                          "rnt": os.path.join(args_ratt.gff_outfolder,
                                              "tmp.rnt")}

    def _convert_to_pttrnt(self, gffs, files, log):
        for gff in files:
            if gff.endswith(".gff"):
                gff = os.path.join(gffs, gff)
                filename = gff.split("/")
                prefix = filename[-1][:-4]
                rnt = gff[:-3] + "rnt"
                ptt = gff[:-3] + "ptt"
                fasta = self.helper.get_correct_file(self.tmp_files["tar"],
                                                     ".fa", prefix, None, None)
                if fasta:
                    self.converter.convert_gff2rntptt(gff, fasta, ptt, rnt,
                                                      None, None)
                    log.write("\t" + ptt + " is generated.\n")
                    log.write("\t" + rnt + " is generated.\n")

    def _remove_files(self, args_ratt, out_gbk, log):
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".gff", "file")
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".ptt", "file")
        self.helper.remove_all_content(args_ratt.gff_outfolder, ".rnt", "file")
        log.write("Moving the final output files to {0}.\n".format(args_ratt.gff_outfolder))
        self.helper.move_all_content(self.tmp_files["out_gff"],
                                     args_ratt.gff_outfolder, None)
        log.write("Remove the temperary files.\n")
        shutil.rmtree(self.tmp_files["out_gff"])
        shutil.rmtree(self.tmp_files["tar"])
        shutil.rmtree(self.tmp_files["ref"])
        self.helper.remove_tmp_dir(args_ratt.tar_fastas)
        self.helper.remove_tmp_dir(args_ratt.ref_fastas)
        self.helper.remove_tmp_dir(args_ratt.ref_embls)
        self.helper.remove_tmp_dir(args_ratt.ref_gbk)

    def _convert_to_gff(self, ratt_result, args_ratt, files, log):
        name = ratt_result.split(".")
        filename = ".".join(name[1:-2]) + ".gff"
        output_file = os.path.join(args_ratt.output_path, filename)
        self.converter.convert_embl2gff(
             os.path.join(args_ratt.output_path, ratt_result), output_file)
        self.format_fixer.fix_ratt(output_file, ".".join(name[1:-2]),
                                   "tmp_gff")
        shutil.move("tmp_gff", output_file)
        shutil.copy(output_file, os.path.join(args_ratt.gff_outfolder,
                                              filename))
        log.write("\t" + os.path.join(args_ratt.gff_outfolder, filename) + 
                  " is generated.\n")
        files.append(filename)

    def _parser_embl_gbk(self, files):
        self.helper.check_make_folder(self.gbk)
        for file_ in files:
            close = False
            with open(file_, "r") as f_h:
                for line in f_h:
                    if (line.startswith("LOCUS")):
                        out = open(self.gbk_tmp, "w")
                        datas = line.split(" ")
                        for data in datas:
                            if (len(data) != 0) and (data != "LOCUS"):
                                filename = ".".join([data.strip(), "gbk"])
                                break
                    elif (line.startswith("VERSION")):
                        datas = line.split(" ")
                        for data in datas:
                            if (len(data) != 0) and (data != "VERSION"):
                                new_filename = ".".join([data.strip(), "gbk"])
                                break
                        if new_filename.find(filename):
                            filename = new_filename
                    if out:
                        out.write(line)
                    if line.startswith("//"):
                        out.close()
                        close = True
                        shutil.move(self.gbk_tmp,
                                    os.path.join(self.gbk, filename))
            if not close:
                out.close()
        return self.gbk

    def _convert_embl(self, ref_embls, log):
        '''convert gbk to embl'''
        detect_gbk = False
        gbks = []
        out_gbk = None
        for embl in os.listdir(ref_embls):
            if (embl.endswith(".gbk")) or (
                    embl.endswith(".gbff")) or (
                    embl.endswith(".gb")):
                detect_gbk = True
                gbks.append(os.path.join(ref_embls, embl))
        if not detect_gbk:
            log.write("--related_gbk_files is assigned, but not gbk files are detected.\n"
                      "The gbk file names need to be ended at .gbk, .gb, or .gbff. \n")
            print("Error: Please assign proper Genebank files!")
            sys.exit()
        elif detect_gbk:
            out_gbk = self._parser_embl_gbk(gbks)
            log.write("Running converter.py to convert gbk file to embl format.\n")
            self.converter.convert_gbk2embl(out_gbk)
            self.helper.check_make_folder(self.embl)
            self.helper.move_all_content(out_gbk, self.embl, [".embl"])
            log.write("\t" + self.embl + " is generated and the embl files are stored in it.\n")
        return out_gbk

    def _run_ratt(self, args_ratt, tar, ref, out, log):
        if (not os.path.exists(self.embl)) or (
                not os.path.exists(os.path.join(
                    self.tmp_files["tar"], tar + ".fa"))) or (
                not os.path.exists(os.path.join(
                    self.tmp_files["ref"], ref + ".fa"))):
            print("Error: Please check --compare_pair, the strain names "
                  "should be the same as the strain names in fasta, "
                  "genbank or embl files!")
            log.write("The strain names in --compare_pair should be the same "
                      "as the strain names in fasta, genbank, or embl files.\n")
            sys.exit()
        log.write("Make sure your RATT version is at least 1.64.\n")
        log.write("If the RATT can not run properly, please check the "
                  "RATT_HOME and PAGIT_HOME is assigned correctly.\n")
        log.write(" ".join([args_ratt.ratt_path, self.embl,
              os.path.join(self.tmp_files["tar"], tar + ".fa"),
              args_ratt.element, args_ratt.transfer_type,
              os.path.join(self.tmp_files["ref"], ref + ".fa")]) + "\n")
        call([args_ratt.ratt_path, self.embl,
              os.path.join(self.tmp_files["tar"], tar + ".fa"),
              args_ratt.element, args_ratt.transfer_type,
              os.path.join(self.tmp_files["ref"], ref + ".fa")],
             stdout=out, stderr=DEVNULL)
        log.write("Done!\n")

    def _format_and_run(self, args_ratt, log):
        print("Running RATT")
        for pair in args_ratt.pairs:
            ref = pair.split(":")[0]
            tar = pair.split(":")[1]
            out = open(self.ratt_log, "w+")
            self._run_ratt(args_ratt, tar, ref, out, log)
            log.write("The following files are generatd:\n")
            for filename in os.listdir():
                if ("final" in filename):
                    log.write("\t" + filename + "\n")
                    shutil.move(filename, os.path.join(args_ratt.output_path,
                                                       filename))
                elif (args_ratt.element in filename) or (
                      "query" in filename) or (
                      "Reference" in filename) or (
                      "Query" in filename) or (
                      "Sequences" in filename):
                    log.write("\t" + filename + "\n")
                    if os.path.isfile(filename):
                        os.remove(filename)
                    if os.path.isdir(filename):
                        shutil.rmtree(filename)
        out.close()

    def annotation_transfer(self, args_ratt, log):
        self.multiparser.parser_fasta(args_ratt.tar_fastas)
        self.multiparser.parser_fasta(args_ratt.ref_fastas)
        out_gbk = None
        if args_ratt.ref_embls is None:
            out_gbk = self._convert_embl(args_ratt.ref_gbki, log)
        self._format_and_run(args_ratt, log)
        files = []
        for data in os.listdir(args_ratt.output_path):
            if "final.embl" in data:
                log.write("Running converter.py to convert embl "
                          "files in {0} to gff, ptt, and rnt format.\n".format(data))
                self._convert_to_gff(data, args_ratt, files, log)
                self._convert_to_pttrnt(args_ratt.gff_outfolder, files, log)
        self.helper.check_make_folder(self.tmp_files["out_gff"])
        log.write("Merging the output of {0}.\n".format(data))
        for folder in os.listdir(args_ratt.tar_fastas):
            files = []
            if "_folder" in folder:
                datas = folder.split("_folder")
                prefix = ".".join(datas[0].split(".")[:-1])
                for file_ in os.listdir(os.path.join(args_ratt.tar_fastas,
                                                     folder)):
                    files.append(file_[:-3])
                for gff in os.listdir(args_ratt.gff_outfolder):
                    for file_ in files:
                        if (".gff" in gff) and (file_ == gff[:-4]):
                            self.helper.merge_file(os.path.join(
                                 args_ratt.gff_outfolder, gff),
                                 self.tmp_files["gff"])
                        if (".ptt" in gff) and (file_ == gff[:-4]):
                            self.helper.merge_file(os.path.join(
                                 args_ratt.gff_outfolder, gff),
                                 self.tmp_files["ptt"])
                        if (".rnt" in gff) and (file_ == gff[:-4]):
                            self.helper.merge_file(os.path.join(
                                 args_ratt.gff_outfolder, gff),
                                 self.tmp_files["rnt"])
                if os.path.exists(self.tmp_files["gff"]):
                    shutil.move(self.tmp_files["gff"], os.path.join(
                                self.tmp_files["out_gff"], prefix + ".gff"))
                    shutil.move(self.tmp_files["ptt"], os.path.join(
                                self.tmp_files["out_gff"], prefix + ".ptt"))
                    shutil.move(self.tmp_files["rnt"], os.path.join(
                                self.tmp_files["out_gff"], prefix + ".rnt"))
                else:
                    print("Error: Please check your fasta or "
                          "annotation files, they should only contain "
                          "the query genome. And make sure your RATT can "
                          "work properly (check $ANNOgesic/output/"
                          "annotation_transfer/ratt_log.txt).")
                    log.write("Please check your fasta or "
                              "annotation files, they should only contain "
                              "the query genome. And make sure your RATT can "
                              "work properly (check $ANNOgesic/output/"
                              "annotation_transfer/ratt_log.txt).\n")
        self._remove_files(args_ratt, out_gbk, log)