예제 #1
0
    def test_find_gembase_file_basename_file_not_in_gembase(self):
        """
        test if find_gembase_file_basename get the the right basename
        for files not located in gembase and file name is the output of split operation
        a file containing one contig
        a file representing a chunk
        """
        gembase_path = self.find_data('Gembase')

        file_names = {
            'ACBA.0917.00019':
            self.find_data(
                os.path.join('Replicons', 'ACBA.0917.00019.0001.fst')),
            'ESCO001.C.00001.C001.fst':
            os.path.join(self.tmp_dir, 'ESCO001.C.00001.C001_chunk_1.fst')
        }

        shutil.copyfile(
            os.path.join(gembase_path, 'Replicons',
                         'ESCO001.C.00001.C001.fst'),
            file_names['ESCO001.C.00001.C001.fst'])

        for base_file_name, replicon_path in file_names.items():
            self.args.replicon = replicon_path
            self.args.gembase_path = gembase_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            replicon.path = replicon_path
            os.makedirs(cfg.tmp_dir(replicon.id))

            with self.catch_log():
                db = GembaseDB(replicon, cfg, gembase_path=gembase_path)
            self.assertTrue(
                db._find_gembase_file_basename(gembase_path, replicon_path),
                base_file_name)

        replicon_path = self.find_data(
            os.path.join('Replicons', 'acba.007.p01.13.fst'))
        self.args.replicon = replicon_path
        self.args.gembase_path = gembase_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        with self.assertRaises(FileNotFoundError) as ctx:
            with self.catch_log():
                GembaseDB(replicon, cfg, gembase_path=gembase_path)
        self.assertEqual(
            str(ctx.exception),
            'cannot find lst file matching {} sequence'.format(replicon_path))
예제 #2
0
    def test_protfile(self):
        file_name = 'acba.007.p01.13'
        prot_name = 'ACBA.007.P01_13.prt'
        replicon_path = self.find_data(os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        db = ProdigalDB(replicon, cfg)
        self.assertEqual(os.path.join(cfg.tmp_dir(replicon.id), prot_name), db.protfile)
예제 #3
0
    def test_protfile(self):
        file_name = (('ACBA.0917.00019', '.fna'), ('ESCO001.C.00001.C001', '.fst'))
        for seq_name, ext in file_name:
            replicon_path = self.find_data(os.path.join('Gembase', 'Replicons', seq_name + ext))
            self.args.replicon = replicon_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            replicon.path = replicon_path
            os.makedirs(cfg.tmp_dir(replicon.id))

            with self.catch_log():
                db = GembaseDB(replicon, cfg)
            self.assertEqual(os.path.join(cfg.tmp_dir(replicon.id), replicon.id + '.prt'), db.protfile)
예제 #4
0
    def test_protfile(self):
        file_name = 'acba.007.p01.13'
        prot_name = 'ACBA.007.P01_13.prt'
        replicon_path = self.find_data(
            os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        db = ProdigalDB(replicon, cfg)
        self.assertEqual(os.path.join(cfg.tmp_dir(replicon.id), prot_name),
                         db.protfile)
예제 #5
0
    def test_getitem(self):
        file_name = (('ACBA.0917.00019', '.fna'), ('ESCO001.C.00001.C001',
                                                   '.fst'))
        for seq_name, ext in file_name:
            replicon_path = self.find_data(
                os.path.join('Gembase', 'Replicons', seq_name + ext))
            self.args.replicon = replicon_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            os.makedirs(cfg.tmp_dir(replicon.id))

            with self.catch_log():
                db = GembaseDB(replicon, cfg)
            exp = read_multi_prot_fasta(
                self.find_data(
                    os.path.join('Gembase', 'Proteins', seq_name + '.prt')))

            specie, date, strain, contig = replicon.id.split('.')
            pattern = '{}\.{}\.{}\.\w?{}'.format(specie, date, strain, contig)

            for prot_expected in exp:
                if re.match(pattern, prot_expected.id):
                    prot_received = db[prot_expected.id]
                    self.assertEqual(prot_received.id, prot_expected.id)
                    self.assertEqual(prot_received.seq, prot_expected.seq)
        with self.assertRaises(KeyError) as ctx:
            db['nimport_naoik']
        self.assertEqual(str(ctx.exception), "'nimport_naoik'")
예제 #6
0
    def test_get_description(self):
        # SeqDesc(id, strand, strat, stop)
        file_name = {('ACBA.0917.00019', '.fna'):
                         {'ACBA.0917.00019.b0001_00001': SeqDesc('ACBA.0917.00019.b0001_00001', -1, 266, 1480),
                          'ACBA.0917.00019.i0001_03957': SeqDesc('ACBA.0917.00019.i0001_03957', -1, 4043755, 4044354)},
                     }

        for seq_name, ext in file_name:
            replicon_path = self.find_data(os.path.join('Gembase', 'Replicons', seq_name + ext))
            self.args.replicon = replicon_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            replicon.path = replicon_path
            os.makedirs(cfg.tmp_dir(replicon.id))

            db = GembaseDB(replicon, cfg)

            descriptions = file_name[(seq_name, ext)]
            for seq_id, desc in descriptions.items():
                self.assertEqual(desc, db.get_description(seq_id))

        with self.assertRaises(IntegronError) as ctx:
            db.get_description('nimport_naoik')
        self.assertEqual(str(ctx.exception), "'nimport_naoik' is not a valid Gembase protein identifier.")

        with self.assertRaises(KeyError) as ctx:
            db.get_description('FOO.BAR.00019.i0001_03924')
        self.assertEqual(str(ctx.exception), "'FOO.BAR.00019.i0001_03924'")
예제 #7
0
    def test_getitem(self):
        file_name = (('ACBA.0917.00019', '.fna'), ('ESCO001.C.00001.C001', '.fst'))
        for seq_name, ext in file_name:
            replicon_path = self.find_data(os.path.join('Gembase', 'Replicons', seq_name + ext))
            self.args.replicon = replicon_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            os.makedirs(cfg.tmp_dir(replicon.id))

            with self.catch_log():
                db = GembaseDB(replicon, cfg)
            exp = read_multi_prot_fasta(self.find_data(os.path.join('Gembase', 'Proteins', seq_name + '.prt')))

            specie, date, strain, contig = replicon.id.split('.')
            pattern = '{}\.{}\.{}\.\w?{}'.format(specie, date, strain, contig)

            for prot_expected in exp:
                if re.match(pattern, prot_expected.id):
                    prot_received = db[prot_expected.id]
                    self.assertEqual(prot_received.id,
                                     prot_expected.id)
                    self.assertEqual(prot_received.seq,
                                     prot_expected.seq)
        with self.assertRaises(KeyError) as ctx:
            db['nimport_naoik']
        self.assertEqual(str(ctx.exception), "'nimport_naoik'")
예제 #8
0
    def test_make_protfile(self):
        file_name = (('ACBA.0917.00019', '.fna', 3870),
                     ('ESCO001.C.00001.C001', '.fst', 3870))
        for seq_name, ext, seq_nb in file_name:
            replicon_path = self.find_data(
                os.path.join('Gembase', 'Replicons', seq_name + ext))
            self.args.replicon = replicon_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            replicon.path = replicon_path
            os.makedirs(cfg.tmp_dir(replicon.id))

            with self.catch_log():
                db = GembaseDB(replicon, cfg)
            for seq_nb, seqs in enumerate(
                    zip(
                        read_multi_prot_fasta(
                            self.find_data(
                                os.path.join('Gembase', 'Proteins',
                                             seq_name + '.prt'))),
                        read_multi_prot_fasta(db.protfile)), 1):
                expected, test = seqs
                self.assertEqual(expected.id, test.id)
            self.assertEqual(seq_nb, seq_nb)
예제 #9
0
    def test_protfile(self):
        file_name = (('ACBA.0917.00019', '.fna'), ('ESCO001.C.00001.C001',
                                                   '.fst'))
        for seq_name, ext in file_name:
            replicon_path = self.find_data(
                os.path.join('Gembase', 'Replicons', seq_name + ext))
            self.args.replicon = replicon_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            replicon.path = replicon_path
            os.makedirs(cfg.tmp_dir(replicon.id))

            with self.catch_log():
                db = GembaseDB(replicon, cfg)
            self.assertEqual(
                os.path.join(cfg.tmp_dir(replicon.id), replicon.id + '.prt'),
                db.protfile)
예제 #10
0
    def test_find_gembase_file_basename_file_not_in_gembase(self):
        """
        test if find_gembase_file_basename get the the right basename
        for files not located in gembase and file name is the output of split operation
        a file containing one contig
        a file representing a chunk
        """
        gembase_path = self.find_data('Gembase')

        file_names = {'ACBA.0917.00019': self.find_data(os.path.join('Replicons', 'ACBA.0917.00019.0001.fst')),
                      'ESCO001.C.00001.C001.fst': os.path.join(self.tmp_dir, 'ESCO001.C.00001.C001_chunk_1.fst')
                      }

        shutil.copyfile(os.path.join(gembase_path, 'Replicons', 'ESCO001.C.00001.C001.fst'),
                        file_names['ESCO001.C.00001.C001.fst'])

        for base_file_name, replicon_path in file_names.items():
            self.args.replicon = replicon_path
            self.args.gembase_path = gembase_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            replicon.path = replicon_path
            os.makedirs(cfg.tmp_dir(replicon.id))

            with self.catch_log():
                db = GembaseDB(replicon, cfg, gembase_path=gembase_path)
            self.assertTrue(db._find_gembase_file_basename(gembase_path, replicon_path),
                            base_file_name)

        replicon_path = self.find_data(os.path.join('Replicons', 'acba.007.p01.13.fst'))
        self.args.replicon = replicon_path
        self.args.gembase_path = gembase_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        with self.assertRaises(FileNotFoundError) as ctx:
            with self.catch_log():
                GembaseDB(replicon, cfg, gembase_path=gembase_path)
        self.assertEqual(str(ctx.exception),
                         'cannot find lst file matching {} sequence'.format(replicon_path))
예제 #11
0
    def test_ProteinDB(self):
        file_name = 'acba.007.p01.13'
        replicon_path = self.find_data(os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        db = ProdigalDB(replicon, cfg)
        self.assertTrue(db.replicon.id, replicon.id)
예제 #12
0
    def test_ProteinDB_no_prodigal(self):
        file_name = 'acba.007.p01.13'
        replicon_path = self.find_data(os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        self.args.prodigal = None
        with self.assertRaises(RuntimeError) as ctx:
            ProdigalDB(replicon, cfg)
예제 #13
0
    def test_ProteinDB(self):
        file_name = 'acba.007.p01.13'
        replicon_path = self.find_data(
            os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        db = ProdigalDB(replicon, cfg)
        self.assertTrue(db.replicon.id, replicon.id)
예제 #14
0
    def test_ProteinDB_no_prodigal(self):
        file_name = 'acba.007.p01.13'
        replicon_path = self.find_data(
            os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        self.args.prodigal = None
        with self.assertRaises(RuntimeError) as ctx:
            ProdigalDB(replicon, cfg)
예제 #15
0
    def test_ProteinDB(self):
        # From Gembase Draft , Gembase Complete
        file_names = ('ACBA.0917.00019.fna', 'ESCO001.C.00001.C001.fst')
        for file_name in file_names:
            replicon_path = self.find_data(os.path.join('Gembase', 'Replicons', file_name))
            self.args.replicon = replicon_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            replicon.path = replicon_path
            os.makedirs(cfg.tmp_dir(replicon.id))

            with self.catch_log():
                db = GembaseDB(replicon, cfg)
            self.assertTrue(db.replicon.id, replicon.id)
예제 #16
0
    def test_iter(self):
        file_name = 'acba.007.p01.13'
        prot_name = 'ACBA.007.P01_13.prt'
        replicon_path = self.find_data(os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        db = ProdigalDB(replicon, cfg)
        idx = SeqIO.index(self.find_data(os.path.join('Proteins', prot_name)), 'fasta',
                          alphabet=Seq.IUPAC.extended_protein)
        for exp_seq_id, get_seq_id in zip(idx, db):
            self.assertEqual(exp_seq_id, get_seq_id)
예제 #17
0
    def test_ProteinDB(self):
        # From Gembase Draft , Gembase Complete
        file_names = ('ACBA.0917.00019.fna', 'ESCO001.C.00001.C001.fst')
        for file_name in file_names:
            replicon_path = self.find_data(
                os.path.join('Gembase', 'Replicons', file_name))
            self.args.replicon = replicon_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            replicon.path = replicon_path
            os.makedirs(cfg.tmp_dir(replicon.id))

            with self.catch_log():
                db = GembaseDB(replicon, cfg)
            self.assertTrue(db.replicon.id, replicon.id)
예제 #18
0
    def test_get_description(self):
        # SeqDesc(id, strand, strat, stop)
        file_name = 'acba.007.p01.13'
        replicon_path = self.find_data(os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        db = ProdigalDB(replicon, cfg)

        descriptions = {'ACBA.007.P01_13_23': SeqDesc('ACBA.007.P01_13_23', -1, 19721, 20254),
                        'ACBA.007.P01_13_1':  SeqDesc('ACBA.007.P01_13_1', 1, 55, 1014)}
        for seq_id, desc in descriptions.items():
            self.assertEqual(desc, db.get_description(seq_id))
예제 #19
0
    def test_iter(self):
        file_name = 'acba.007.p01.13'
        prot_name = 'ACBA.007.P01_13.prt'
        replicon_path = self.find_data(
            os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        db = ProdigalDB(replicon, cfg)
        idx = SeqIO.index(self.find_data(os.path.join('Proteins', prot_name)),
                          'fasta',
                          alphabet=Seq.IUPAC.extended_protein)
        for exp_seq_id, get_seq_id in zip(idx, db):
            self.assertEqual(exp_seq_id, get_seq_id)
예제 #20
0
    def test_make_protfile(self):
        file_name = (('ACBA.0917.00019', '.fna', 3870), ('ESCO001.C.00001.C001', '.fst', 3870))
        for seq_name, ext, seq_nb in file_name:
            replicon_path = self.find_data(os.path.join('Gembase', 'Replicons', seq_name + ext))
            self.args.replicon = replicon_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            replicon.path = replicon_path
            os.makedirs(cfg.tmp_dir(replicon.id))

            with self.catch_log():
                db = GembaseDB(replicon, cfg)
            for seq_nb, seqs in enumerate(zip(
                    read_multi_prot_fasta(self.find_data(os.path.join('Gembase', 'Proteins', seq_name + '.prt'))),
                    read_multi_prot_fasta(db.protfile)), 1):
                expected, test = seqs
                self.assertEqual(expected.id, test.id)
            self.assertEqual(seq_nb, seq_nb)
예제 #21
0
    def test_make_protfile(self):
        file_name = 'acba.007.p01.13'
        prot_name = 'ACBA.007.P01_13.prt'

        replicon_path = self.find_data(os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        db = ProdigalDB(replicon, cfg)
        for seq_nb, seqs in enumerate(zip(
                read_multi_prot_fasta(self.find_data(os.path.join('Proteins', prot_name))),
                read_multi_prot_fasta(db.protfile)), 1):
            expected, test = seqs
            self.assertEqual(expected.id, test.id)
        self.assertEqual(seq_nb, 23)
예제 #22
0
    def test_find_gembase_file_basename(self):
        """
        test if find_gembase_file_basename get the the right basename
        for files in gembase
        """
        gembase_path = self.find_data('Gembase')
        file_names = ('ACBA.0917.00019.fna', 'ESCO001.C.00001.C001.fst')
        for file_name in file_names:
            replicon_path = self.find_data(os.path.join('Gembase', 'Replicons', file_name))
            self.args.replicon = replicon_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            replicon.path = replicon_path
            os.makedirs(cfg.tmp_dir(replicon.id))

            with self.catch_log():
                db = GembaseDB(replicon, cfg)
            self.assertTrue(db._find_gembase_file_basename(gembase_path, replicon_path),
                            os.path.splitext(file_name)[0])
예제 #23
0
    def test_get_description(self):
        # SeqDesc(id, strand, strat, stop)
        file_name = 'acba.007.p01.13'
        replicon_path = self.find_data(
            os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        db = ProdigalDB(replicon, cfg)

        descriptions = {
            'ACBA.007.P01_13_23': SeqDesc('ACBA.007.P01_13_23', -1, 19721,
                                          20254),
            'ACBA.007.P01_13_1': SeqDesc('ACBA.007.P01_13_1', 1, 55, 1014)
        }
        for seq_id, desc in descriptions.items():
            self.assertEqual(desc, db.get_description(seq_id))
예제 #24
0
    def test_find_gembase_file_basename(self):
        """
        test if find_gembase_file_basename get the the right basename
        for files in gembase
        """
        gembase_path = self.find_data('Gembase')
        file_names = ('ACBA.0917.00019.fna', 'ESCO001.C.00001.C001.fst')
        for file_name in file_names:
            replicon_path = self.find_data(
                os.path.join('Gembase', 'Replicons', file_name))
            self.args.replicon = replicon_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            replicon.path = replicon_path
            os.makedirs(cfg.tmp_dir(replicon.id))

            with self.catch_log():
                db = GembaseDB(replicon, cfg)
            self.assertTrue(
                db._find_gembase_file_basename(gembase_path, replicon_path),
                os.path.splitext(file_name)[0])
예제 #25
0
    def test_getitem(self):
        file_name = 'acba.007.p01.13'
        prot_name = 'ACBA.007.P01_13.prt'
        replicon_path = self.find_data(os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        db = ProdigalDB(replicon, cfg)
        exp = read_multi_prot_fasta(self.find_data(os.path.join('Proteins', prot_name)))
        for prot_expected in exp:
            prot_received = db[prot_expected.id]
            self.assertEqual(prot_received.id,
                             prot_expected.id)
            self.assertEqual(prot_received.seq,
                             prot_expected.seq)
        with self.assertRaises(KeyError) as ctx:
            db['nimport_naoik']
        self.assertEqual(str(ctx.exception), "'nimport_naoik'")
예제 #26
0
    def test_getitem(self):
        file_name = 'acba.007.p01.13'
        prot_name = 'ACBA.007.P01_13.prt'
        replicon_path = self.find_data(
            os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        db = ProdigalDB(replicon, cfg)
        exp = read_multi_prot_fasta(
            self.find_data(os.path.join('Proteins', prot_name)))
        for prot_expected in exp:
            prot_received = db[prot_expected.id]
            self.assertEqual(prot_received.id, prot_expected.id)
            self.assertEqual(prot_received.seq, prot_expected.seq)
        with self.assertRaises(KeyError) as ctx:
            db['nimport_naoik']
        self.assertEqual(str(ctx.exception), "'nimport_naoik'")
예제 #27
0
    def test_make_protfile(self):
        file_name = 'acba.007.p01.13'
        prot_name = 'ACBA.007.P01_13.prt'

        replicon_path = self.find_data(
            os.path.join('Replicons', file_name + '.fst'))
        self.args.replicon = replicon_path
        cfg = Config(self.args)
        seq_db = read_multi_prot_fasta(replicon_path)
        replicon = next(seq_db)
        replicon.path = replicon_path
        os.makedirs(cfg.tmp_dir(replicon.id))

        db = ProdigalDB(replicon, cfg)
        for seq_nb, seqs in enumerate(
                zip(
                    read_multi_prot_fasta(
                        self.find_data(os.path.join('Proteins', prot_name))),
                    read_multi_prot_fasta(db.protfile)), 1):
            expected, test = seqs
            self.assertEqual(expected.id, test.id)
        self.assertEqual(seq_nb, 23)
예제 #28
0
    def test_get_description(self):
        # SeqDesc(id, strand, strat, stop)
        file_name = {
            ('ACBA.0917.00019', '.fna'): {
                'ACBA.0917.00019.b0001_00001':
                SeqDesc('ACBA.0917.00019.b0001_00001', -1, 266, 1480),
                'ACBA.0917.00019.i0001_03957':
                SeqDesc('ACBA.0917.00019.i0001_03957', -1, 4043755, 4044354)
            },
        }

        for seq_name, ext in file_name:
            replicon_path = self.find_data(
                os.path.join('Gembase', 'Replicons', seq_name + ext))
            self.args.replicon = replicon_path
            cfg = Config(self.args)
            seq_db = read_multi_prot_fasta(replicon_path)
            replicon = next(seq_db)
            replicon.path = replicon_path
            os.makedirs(cfg.tmp_dir(replicon.id))

            db = GembaseDB(replicon, cfg)

            descriptions = file_name[(seq_name, ext)]
            for seq_id, desc in descriptions.items():
                self.assertEqual(desc, db.get_description(seq_id))

        with self.assertRaises(IntegronError) as ctx:
            db.get_description('nimport_naoik')
        self.assertEqual(
            str(ctx.exception),
            "'nimport_naoik' is not a valid Gembase protein identifier.")

        with self.assertRaises(KeyError) as ctx:
            db.get_description('FOO.BAR.00019.i0001_03924')
        self.assertEqual(str(ctx.exception), "'FOO.BAR.00019.i0001_03924'")