예제 #1
0
class TestOptionsParser(unittest.TestCase):
    def setUp(self):
        self.options_parser = OptionsParser(-1)

        pass

    def tearDown(self):
        pass

    def test__verify_genome_id__valid(self):
        """ Test that a valid genome id returns True. """
        self.assertTrue(self.options_parser._verify_genome_id('genome_1'))

    def test__verify_genome_id__invalid(self):
        """ Test that invalid genome ids throw an exception. """
        for c in list('()[],;='):
            self.assertRaises(GenomeNameInvalid,
                              self.options_parser._verify_genome_id,
                              'genome%s1' % c)

    def test__genomes_to_process__genome_dir__valid(self):
        """ Test that the expected results are returned when using genome_dir. """
        try:
            tmp_genome_dir = tempfile.mkdtemp()
            open(os.path.join(tmp_genome_dir, 'genome_1.fna'), 'a').close()
            open(os.path.join(tmp_genome_dir, 'genome_2.fna'), 'a').close()
            open(os.path.join(tmp_genome_dir, 'other_file.txt'), 'a').close()
            results = self.options_parser._genomes_to_process(
                tmp_genome_dir, '', 'fna')
            expected = {
                'genome_1': os.path.join(tmp_genome_dir, 'genome_1.fna'),
                'genome_2': os.path.join(tmp_genome_dir, 'genome_2.fna')
            }
            self.assertDictEqual(results, expected)
        finally:
            shutil.rmtree(tmp_genome_dir)

    def test__genomes_to_process__batchfile__valid(self):
        """ Test that the expected results are returned when using batchfile """
        try:
            tmp_genome_dir = tempfile.mkdtemp()
            path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt')
            path_genome_1 = os.path.join(tmp_genome_dir, 'genome_1.fna')
            path_genome_2 = os.path.join(tmp_genome_dir, 'genome_2.fna')
            open(path_genome_1, 'a').close()
            open(path_genome_2, 'a').close()

            with open(path_batchfile, 'a') as f:
                f.write('%s\tgenome_1\n' % path_genome_1)
                f.write('\n')
                f.write('%s\tgenome_2\n' % path_genome_2)

            results = self.options_parser._genomes_to_process(
                '', path_batchfile, 'fna')
            expected = {'genome_1': path_genome_1, 'genome_2': path_genome_2}
            self.assertDictEqual(results, expected)
        finally:
            shutil.rmtree(tmp_genome_dir)

    def test__genomes_to_process__batchfile__invalid_columns(self):
        """ Test that a batchfile containing columns not equal to 2 throws an exception. """
        try:
            tmp_genome_dir = tempfile.mkdtemp()
            path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt')
            path_genome_1 = os.path.join(tmp_genome_dir, 'genome_1.fna')
            path_genome_2 = os.path.join(tmp_genome_dir, 'genome_2.fna')
            open(path_genome_1, 'a').close()
            open(path_genome_2, 'a').close()

            with open(path_batchfile, 'a') as f:
                f.write('%s\tgenome_1\n' % path_genome_1)
                f.write('\n')
                f.write('%s\tgenome_2\tfoo\n' % path_genome_2)

            self.assertRaises(GenomeBatchfileMalformed,
                              self.options_parser._genomes_to_process, '',
                              path_batchfile, 'fna')
        finally:
            shutil.rmtree(tmp_genome_dir)

    def test__genomes_to_process__batchfile__blank_genome_path(self):
        """ Test that a batchfile containing a blank genome path throws an exception. """
        try:
            tmp_genome_dir = tempfile.mkdtemp()
            path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt')
            path_genome_1 = os.path.join(tmp_genome_dir, 'genome_1.fna')
            path_genome_2 = os.path.join(tmp_genome_dir, 'genome_2.fna')
            open(path_genome_1, 'a').close()
            open(path_genome_2, 'a').close()

            with open(path_batchfile, 'a') as f:
                f.write('%s\tgenome_1\n' % path_genome_1)
                f.write('\n')
                f.write('%s\tgenome_2\n' % '')

            self.assertRaises(GenomeBatchfileMalformed,
                              self.options_parser._genomes_to_process, '',
                              path_batchfile, 'fna')
        finally:
            shutil.rmtree(tmp_genome_dir)

    def test__genomes_to_process__batchfile__blank_genome_id(self):
        """ Test that a batchfile containing a blank genome id throws an exception. """
        try:
            tmp_genome_dir = tempfile.mkdtemp()
            path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt')
            path_genome_1 = os.path.join(tmp_genome_dir, 'genome_1.fna')
            path_genome_2 = os.path.join(tmp_genome_dir, 'genome_2.fna')
            open(path_genome_1, 'a').close()
            open(path_genome_2, 'a').close()

            with open(path_batchfile, 'a') as f:
                f.write('%s\tgenome_1\n' % path_genome_1)
                f.write('\n')
                f.write('%s\t\n' % path_genome_2)

            self.assertRaises(GenomeBatchfileMalformed,
                              self.options_parser._genomes_to_process, '',
                              path_batchfile, 'fna')
        finally:
            shutil.rmtree(tmp_genome_dir)

    def test__genomes_to_process__batchfile__duplicate_genome_id(self):
        """ Test that a batchfile containing duplicate genome ids throws an exception. """
        # Branch 1: The number of columns are not equal to 2.
        try:
            tmp_genome_dir = tempfile.mkdtemp()
            path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt')
            path_genome_1 = os.path.join(tmp_genome_dir, 'genome_1.fna')
            path_genome_2 = os.path.join(tmp_genome_dir, 'genome_2.fna')
            open(path_genome_1, 'a').close()
            open(path_genome_2, 'a').close()

            with open(path_batchfile, 'a') as f:
                f.write('%s\tgenome_1\n' % path_genome_1)
                f.write('\n')
                f.write('%s\tgenome_1\n' % path_genome_2)

            self.assertRaises(GenomeBatchfileMalformed,
                              self.options_parser._genomes_to_process, '',
                              path_batchfile, 'fna')
        finally:
            shutil.rmtree(tmp_genome_dir)

    def test__genomes_to_process__batchfile__invalid_genome_id(self):
        """ Test that a batchfile containing duplicate genome ids throws an exception. """
        # Branch 1: The number of columns are not equal to 2.
        try:
            tmp_genome_dir = tempfile.mkdtemp()
            path_batchfile_1 = os.path.join(tmp_genome_dir, 'batchfile_1.txt')
            path_batchfile_2 = os.path.join(tmp_genome_dir, 'batchfile_2.txt')
            path_batchfile_3 = os.path.join(tmp_genome_dir, 'batchfile_3.txt')
            path_genome_1 = os.path.join(tmp_genome_dir, 'genome_1.fna')
            path_genome_2 = os.path.join(tmp_genome_dir, 'genome_2.fna')
            open(path_genome_1, 'a').close()
            open(path_genome_2, 'a').close()

            with open(path_batchfile_1, 'a') as f:
                f.write('%s\tgenome_1\n' % path_genome_1)
                f.write('\n')
                f.write('%s\tGB_genome_2\n' % path_genome_2)

            with open(path_batchfile_2, 'a') as f:
                f.write('%s\tgenome_1\n' % path_genome_1)
                f.write('\n')
                f.write('%s\tRS_genome_2\n' % path_genome_2)

            with open(path_batchfile_3, 'a') as f:
                f.write('%s\tgenome_1\n' % path_genome_1)
                f.write('\n')
                f.write('%s\tUBAgenome_2\n' % path_genome_2)

            self.assertRaises(GenomeNameInvalid,
                              self.options_parser._genomes_to_process, '',
                              path_batchfile_1, 'fna')
            self.assertRaises(GenomeNameInvalid,
                              self.options_parser._genomes_to_process, '',
                              path_batchfile_2, 'fna')
            self.assertRaises(GenomeNameInvalid,
                              self.options_parser._genomes_to_process, '',
                              path_batchfile_3, 'fna')
        finally:
            shutil.rmtree(tmp_genome_dir)

    def test__genomes_to_process__no_files(self):
        """ Test that an exception is thrown if no files are found to process """
        # Branch 1 : genome_dir is specified
        try:
            tmp_genome_dir = tempfile.mkdtemp()
            self.assertRaises(NoGenomesFound,
                              self.options_parser._genomes_to_process,
                              tmp_genome_dir, '', 'fna')
        finally:
            shutil.rmtree(tmp_genome_dir)

        # Branch 2: batchfile is specified
        try:
            tmp_genome_dir = tempfile.mkdtemp()
            path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt')
            open(path_batchfile, 'a').close()
            self.assertRaises(NoGenomesFound,
                              self.options_parser._genomes_to_process, '',
                              path_batchfile, 'fna')
        finally:
            shutil.rmtree(tmp_genome_dir)

    def test__marker_set_id(self):
        """ Test that the correct marker set id is returned """
        self.assertEqual(
            self.options_parser._marker_set_id(True, False, False), 'bac120')
        self.assertEqual(
            self.options_parser._marker_set_id(False, True, False), 'ar122')
        self.assertEqual(
            self.options_parser._marker_set_id(False, False, True), 'rps23')

    def test_identify__genome_dir_raises_io_exception(self):
        """ Test that the identify method raises an exception on invalid genome_dir """
        options = argparse.ArgumentParser()
        options.genome_dir = os.path.join(tempfile.gettempdir(),
                                          'non-existent-dir')
        self.assertRaises(BioLibDirNotFound, self.options_parser.identify,
                          options)

    def test_identify__batchfile_raises_io_exception(self):
        """ Test that the identify method raises an exception on invalid batchfile """
        options = argparse.ArgumentParser()
        options.genome_dir = None
        options.batchfile = os.path.join(tempfile.gettempdir(),
                                         'non-existent-file.txt')
        self.assertRaises(BioLibFileNotFound, self.options_parser.identify,
                          options)

    def test_align__identify_dir_raises_io_exception(self):
        """ Test that the align method raises an exception on invalid identify dir """
        options = argparse.ArgumentParser()
        options.identify_dir = os.path.join(tempfile.gettempdir(),
                                            'non-existent-dir')
        self.assertRaises(BioLibDirNotFound, self.options_parser.align,
                          options)

    def test_infer__msa_raises_io_exception(self):
        """ Test that the infer method raises an exception on invalid MSA """
        options = argparse.ArgumentParser()
        options.msa_file = os.path.join(tempfile.gettempdir(),
                                        'non-existent-msa.txt')
        self.assertRaises(BioLibFileNotFound, self.options_parser.infer,
                          options)

    def test_classify__align_dir_raises_io_exception(self):
        """ Test that the classify method raises an exception on invalid align dir """
        options = argparse.ArgumentParser()
        options.align_dir = os.path.join(tempfile.gettempdir(),
                                         'non-existent-dir')
        self.assertRaises(BioLibDirNotFound, self.options_parser.classify,
                          options)

    def test_root__no_tree_raises_io_exception(self):
        """ Test that the infer method raises an exception on invalid tree """
        options = argparse.ArgumentParser()
        options.input_tree = os.path.join(tempfile.gettempdir(),
                                          'non-existent-tree.tree')
        self.assertRaises(BioLibFileNotFound, self.options_parser.root,
                          options)

    def test_decorate__no_tree_raises_io_exception(self):
        """ Test that the infer method raises an exception on invalid tree """
        options = argparse.ArgumentParser()
        options.input_tree = os.path.join(tempfile.gettempdir(),
                                          'non-existent-tree.tree')
        self.assertRaises(BioLibFileNotFound, self.options_parser.decorate,
                          options)

    def test_trim_msa__mask_file(self):
        """ Test that the expected result is returned when running trim_msa with mask_file """
        try:
            tmp_out_dir = tempfile.mkdtemp()
            path_untrimmed_msa = os.path.join(tmp_out_dir,
                                              'untrimmed_msa.fasta')
            path_mask_file = os.path.join(tmp_out_dir, 'mask_file.txt')
            path_output = os.path.join(tmp_out_dir, 'trimmed_msa.fasta')

            with open(path_untrimmed_msa, 'w') as f:
                f.write('>genome_1\n')
                f.write('ALGPVW\n')
                f.write('>genome_2\n')
                f.write('WVPGLA\n')

            with open(path_mask_file, 'w') as f:
                f.write('010010\n')

            options = argparse.ArgumentParser()
            # Required arguments
            options.untrimmed_msa = path_untrimmed_msa
            options.output = path_output
            # Mutex arguments
            options.mask_file = path_mask_file
            options.reference_mask = None

            self.options_parser.trim_msa(options)

            results = dict()
            with open(path_output, 'r') as f:
                re_hits = re.findall(r'>(.+)\n(.+)\n', f.read())
                for gid, seq in re_hits:
                    results[gid] = seq

            expected = {'genome_1': 'LV', 'genome_2': 'VL'}

            self.assertDictEqual(results, expected)

            pass
        finally:
            shutil.rmtree(tmp_out_dir)

    def test_trim_msa__reference_mask_arc(self):
        """ Test that the expected result is returned when running trim_msa with archaeal reference_mask """
        try:
            tmp_out_dir = tempfile.mkdtemp()
            path_untrimmed_msa = os.path.join(tmp_out_dir,
                                              'untrimmed_msa.fasta')
            path_output = os.path.join(tmp_out_dir, 'trimmed_msa.fasta')

            msa_str = str()
            while len(msa_str) < 32675:
                msa_str += 'ALGPVW'
            msa_str = msa_str[0:32675]

            with open(path_untrimmed_msa, 'w') as f:
                f.write('>genome_1\n')
                f.write('%s\n' % msa_str)
                f.write('>genome_2\n')
                f.write('%s\n' % msa_str[::-1])

            options = argparse.ArgumentParser()
            # Required arguments
            options.untrimmed_msa = path_untrimmed_msa
            options.output = path_output
            # Mutex arguments
            options.mask_file = None
            options.reference_mask = 'arc'

            self.options_parser.trim_msa(options)

            results = dict()
            with open(path_output, 'r') as f:
                re_hits = re.findall(r'>(.+)\n(.+)\n', f.read())
                for gid, seq in re_hits:
                    results[gid] = hashlib.sha256(seq).hexdigest()

            expected = {
                'genome_1':
                '332b8cd125a36c375196064e136efab78db38e41bbd8bd8484243531bc57df6d',
                'genome_2':
                '84e91b9f5fa1ec0bedc0097233044e6dd0e79557bb6df3625928dc9573795989'
            }

            self.assertDictEqual(results, expected)
        finally:
            shutil.rmtree(tmp_out_dir)

    def test_trim_msa__reference_mask_bac(self):
        """ Test that the expected result is returned when running trim_msa with bacterial reference_mask """
        try:
            tmp_out_dir = tempfile.mkdtemp()
            path_untrimmed_msa = os.path.join(tmp_out_dir,
                                              'untrimmed_msa.fasta')
            path_output = os.path.join(tmp_out_dir, 'trimmed_msa.fasta')

            msa_str = str()
            while len(msa_str) < 41155:
                msa_str += 'ALGPVW'
            msa_str = msa_str[0:41155]

            with open(path_untrimmed_msa, 'w') as f:
                f.write('>genome_1\n')
                f.write('%s\n' % msa_str)
                f.write('>genome_2\n')
                f.write('%s\n' % msa_str[::-1])

            options = argparse.ArgumentParser()
            # Required arguments
            options.untrimmed_msa = path_untrimmed_msa
            options.output = path_output
            # Mutex arguments
            options.mask_file = None
            options.reference_mask = 'bac'

            self.options_parser.trim_msa(options)

            results = dict()
            with open(path_output, 'r') as f:
                re_hits = re.findall(r'>(.+)\n(.+)\n', f.read())
                for gid, seq in re_hits:
                    results[gid] = hashlib.sha256(seq).hexdigest()

            expected = {
                'genome_1':
                '35e080f9ab7d318e8f4a7cef46ce6044bd9c538e6fbe8a69b17431df44bd5a81',
                'genome_2':
                'bb4beed69063dad0092a809ee5854ff124da0b55c651edd50c47b1d8fdff0d7b'
            }

            self.assertDictEqual(results, expected)
        finally:
            shutil.rmtree(tmp_out_dir)

    def test_export_msa__arc(self):
        """ Test that the untrimmed archaeal MSA is exported correctly """
        try:
            tmp_out_dir = tempfile.mkdtemp()
            path_out = os.path.join(tmp_out_dir, 'output.fasta')

            options = argparse.ArgumentParser()
            options.domain = 'arc'
            options.output = path_out

            self.options_parser.export_msa(options)

            with open(path_out, 'r') as f:
                out_hash = hashlib.sha256(f.read()).hexdigest()
            self.assertEqual(
                out_hash,
                '11eb12b91ab20c43824abafb909ccc20bed84a8609a9bf82748b2cdbdd8b7aad'
            )
        finally:
            shutil.rmtree(tmp_out_dir)

    def test_export_msa__bac(self):
        """ Test that the untrimmed bacterial MSA is exported correctly """
        try:
            tmp_out_dir = tempfile.mkdtemp()
            path_out = os.path.join(tmp_out_dir, 'output.fasta')

            options = argparse.ArgumentParser()
            options.domain = 'bac'
            options.output = path_out

            self.options_parser.export_msa(options)

            with open(path_out, 'r') as f:
                out_hash = hashlib.sha256(f.read()).hexdigest()
            self.assertEqual(
                out_hash,
                '50dde1e96df9533def7c7047a1e8627d4ad566db10f8ab3de72751e62c4ac10a'
            )
        finally:
            shutil.rmtree(tmp_out_dir)
예제 #2
0
파일: test_main.py 프로젝트: alienzj/GTDBTk
class TestOptionsParser(unittest.TestCase):
    def setUp(self):
        self.options_parser = OptionsParser('-1')
        self.dir_tmp = tempfile.mkdtemp(prefix='gtdbtk_tmp_')
        pass

    def tearDown(self):
        shutil.rmtree(self.dir_tmp)

    def test__verify_genome_id__valid(self):
        """ Test that a valid genome id returns True. """
        self.assertTrue(self.options_parser._verify_genome_id('genome_1'))

    def test__verify_genome_id__invalid(self):
        """ Test that invalid genome ids throw an exception. """
        for c in list('()[],;='):
            self.assertRaises(GTDBTkExit,
                              self.options_parser._verify_genome_id,
                              'genome%s1' % c)

    def test__genomes_to_process__genome_dir__valid(self):
        """ Test that the expected results are returned when using genome_dir. """
        open(os.path.join(self.dir_tmp, 'genome_1.fna'), 'a').close()
        open(os.path.join(self.dir_tmp, 'genome_2.fna'), 'a').close()
        open(os.path.join(self.dir_tmp, 'other_file.txt'), 'a').close()
        results, tln_table = self.options_parser._genomes_to_process(
            self.dir_tmp, '', 'fna')
        expected = {
            'genome_1': os.path.join(self.dir_tmp, 'genome_1.fna'),
            'genome_2': os.path.join(self.dir_tmp, 'genome_2.fna')
        }
        self.assertDictEqual(results, expected)

    def test__genomes_to_process__batchfile__valid(self):
        """ Test that the expected results are returned when using batchfile """
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write(f'{path_genome_1}\tgenome_1\n')
            f.write('\n')
            f.write(f'{path_genome_2}\tgenome_2\t4\n')

        results, tln_table = self.options_parser._genomes_to_process(
            '', path_batchfile, 'fna')
        expected = {'genome_1': path_genome_1, 'genome_2': path_genome_2}
        expected_tln = {'genome_2': 4}
        self.assertDictEqual(results, expected)
        self.assertDictEqual(tln_table, expected_tln)

    def test__genomes_to_process__batchfile__invalid_columns(self):
        """ Test that a batchfile containing columns not equal to 2 throws an exception. """
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\tgenome_2\tfoo\n' % path_genome_2)

        self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process,
                          '', path_batchfile, 'fna')

    def test__genomes_to_process__batchfile__blank_genome_path(self):
        """ Test that a batchfile containing a blank genome path throws an exception. """
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\tgenome_2\n' % '')

        self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process,
                          '', path_batchfile, 'fna')

    def test__genomes_to_process__batchfile__blank_genome_id(self):
        """ Test that a batchfile containing a blank genome id throws an exception. """
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\t\n' % path_genome_2)

        self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process,
                          '', path_batchfile, 'fna')

    def test__genomes_to_process__batchfile__duplicate_genome_id(self):
        """ Test that a batchfile containing duplicate genome ids throws an exception. """
        # Branch 1: The number of columns are not equal to 2.
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\tgenome_1\n' % path_genome_2)

        self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process,
                          '', path_batchfile, 'fna')

    # def test__genomes_to_process__batchfile__invalid_genome_id(self):
    #     """ Test that a batchfile containing duplicate genome ids throws an exception. """
    #     # Branch 1: The number of columns are not equal to 2.
    #     path_batchfile_1 = os.path.join(self.dir_tmp, 'batchfile_1.txt')
    #     path_batchfile_2 = os.path.join(self.dir_tmp, 'batchfile_2.txt')
    #     path_batchfile_3 = os.path.join(self.dir_tmp, 'batchfile_3.txt')
    #     path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
    #     path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
    #     open(path_genome_1, 'a').close()
    #     open(path_genome_2, 'a').close()
    #
    #     with open(path_batchfile_1, 'a') as f:
    #         f.write('%s\tgenome_1\n' % path_genome_1)
    #         f.write('\n')
    #         f.write('%s\tGB_genome_2\n' % path_genome_2)
    #
    #     with open(path_batchfile_2, 'a') as f:
    #         f.write('%s\tgenome_1\n' % path_genome_1)
    #         f.write('\n')
    #         f.write('%s\tRS_genome_2\n' % path_genome_2)
    #
    #     with open(path_batchfile_3, 'a') as f:
    #         f.write('%s\tgenome_1\n' % path_genome_1)
    #         f.write('\n')
    #         f.write('%s\tUBAgenome_2\n' % path_genome_2)
    #
    #     self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_1, 'fna')
    #     self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_2, 'fna')
    #     self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_3, 'fna')

    def test__genomes_to_process__no_files(self):
        """ Test that an exception is thrown if no files are found to process """
        # Branch 1 : genome_dir is specified
        tmp_genome_dir = tempfile.mkdtemp()
        try:
            self.assertRaises(GTDBTkExit,
                              self.options_parser._genomes_to_process,
                              tmp_genome_dir, '', 'fna')
        finally:
            shutil.rmtree(tmp_genome_dir)

        # Branch 2: batchfile is specified
        tmp_genome_dir = tempfile.mkdtemp()
        try:
            path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt')
            open(path_batchfile, 'a').close()
            self.assertRaises(GTDBTkExit,
                              self.options_parser._genomes_to_process, '',
                              path_batchfile, 'fna')
        finally:
            shutil.rmtree(tmp_genome_dir)

    def test_identify__genome_dir_raises_io_exception(self):
        """ Test that the identify method raises an exception on invalid genome_dir """
        options = argparse.ArgumentParser()
        options.genome_dir = os.path.join(tempfile.gettempdir(),
                                          'non-existent-dir')
        self.assertRaises(BioLibDirNotFound, self.options_parser.identify,
                          options)

    def test_identify__batchfile_raises_io_exception(self):
        """ Test that the identify method raises an exception on invalid batchfile """
        options = argparse.ArgumentParser()
        options.genome_dir = None
        options.batchfile = os.path.join(tempfile.gettempdir(),
                                         'non-existent-file.txt')
        self.assertRaises(BioLibFileNotFound, self.options_parser.identify,
                          options)

    def test_align__identify_dir_raises_io_exception(self):
        """ Test that the align method raises an exception on invalid identify dir """
        options = argparse.ArgumentParser()
        options.identify_dir = os.path.join(tempfile.gettempdir(),
                                            'non-existent-dir')
        self.assertRaises(BioLibDirNotFound, self.options_parser.align,
                          options)

    def test_infer__msa_raises_io_exception(self):
        """ Test that the infer method raises an exception on invalid MSA """
        options = argparse.ArgumentParser()
        options.msa_file = os.path.join(tempfile.gettempdir(),
                                        'non-existent-msa.txt')
        self.assertRaises(BioLibFileNotFound, self.options_parser.infer,
                          options)

    def test_run_test(self):
        """Test that the user-test method runs correctly"""
        options = argparse.ArgumentParser()
        options.out_dir = self.dir_tmp
        options.cpus = 3
        self.assertTrue(self.options_parser.run_test(options))

    # def test_run_test__throws_exception(self):
    #     """Test that the user-test method fails correctly"""
    #     options = argparse.ArgumentParser()
    #     options.out_dir = self.dir_tmp
    #     os.mkdir(os.path.join(self.dir_tmp, 'genomes'))
    #     options.cpus = 3
    #     self.assertRaises(GTDBTkTestFailure, self.options_parser.run_test, options)

    def test_classify__align_dir_raises_io_exception(self):
        """ Test that the classify method raises an exception on invalid align dir """
        options = argparse.ArgumentParser()
        options.align_dir = os.path.join(tempfile.gettempdir(),
                                         'non-existent-dir')
        self.assertRaises(BioLibDirNotFound, self.options_parser.classify,
                          options)

    def test_root__no_tree_raises_io_exception(self):
        """ Test that the infer method raises an exception on invalid tree """
        options = argparse.ArgumentParser()
        options.input_tree = os.path.join(tempfile.gettempdir(),
                                          'non-existent-tree.tree')
        self.assertRaises(BioLibFileNotFound, self.options_parser.root,
                          options)

    def test_decorate__no_tree_raises_io_exception(self):
        """ Test that the infer method raises an exception on invalid tree """
        options = argparse.ArgumentParser()
        options.input_tree = os.path.join(tempfile.gettempdir(),
                                          'non-existent-tree.tree')
        self.assertRaises(BioLibFileNotFound, self.options_parser.decorate,
                          options)
예제 #3
0
class TestOptionsParser(unittest.TestCase):
    def setUp(self):
        self.options_parser = OptionsParser('-1')
        self.dir_tmp = tempfile.mkdtemp(prefix='gtdbtk_tmp_')
        pass

    def tearDown(self):
        shutil.rmtree(self.dir_tmp)

    def test__verify_genome_id__valid(self):
        """ Test that a valid genome id returns True. """
        self.assertTrue(self.options_parser._verify_genome_id('genome_1'))

    def test__verify_genome_id__invalid(self):
        """ Test that invalid genome ids throw an exception. """
        for c in list('()[],;='):
            self.assertRaises(GenomeNameInvalid,
                              self.options_parser._verify_genome_id,
                              'genome%s1' % c)

    def test__genomes_to_process__genome_dir__valid(self):
        """ Test that the expected results are returned when using genome_dir. """
        open(os.path.join(self.dir_tmp, 'genome_1.fna'), 'a').close()
        open(os.path.join(self.dir_tmp, 'genome_2.fna'), 'a').close()
        open(os.path.join(self.dir_tmp, 'other_file.txt'), 'a').close()
        results = self.options_parser._genomes_to_process(
            self.dir_tmp, '', 'fna')
        expected = {
            'genome_1': os.path.join(self.dir_tmp, 'genome_1.fna'),
            'genome_2': os.path.join(self.dir_tmp, 'genome_2.fna')
        }
        self.assertDictEqual(results, expected)

    def test__genomes_to_process__batchfile__valid(self):
        """ Test that the expected results are returned when using batchfile """
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\tgenome_2\n' % path_genome_2)

        results = self.options_parser._genomes_to_process(
            '', path_batchfile, 'fna')
        expected = {'genome_1': path_genome_1, 'genome_2': path_genome_2}
        self.assertDictEqual(results, expected)

    def test__genomes_to_process__batchfile__invalid_columns(self):
        """ Test that a batchfile containing columns not equal to 2 throws an exception. """
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\tgenome_2\tfoo\n' % path_genome_2)

        self.assertRaises(GenomeBatchfileMalformed,
                          self.options_parser._genomes_to_process, '',
                          path_batchfile, 'fna')

    def test__genomes_to_process__batchfile__blank_genome_path(self):
        """ Test that a batchfile containing a blank genome path throws an exception. """
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\tgenome_2\n' % '')

        self.assertRaises(GenomeBatchfileMalformed,
                          self.options_parser._genomes_to_process, '',
                          path_batchfile, 'fna')

    def test__genomes_to_process__batchfile__blank_genome_id(self):
        """ Test that a batchfile containing a blank genome id throws an exception. """
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\t\n' % path_genome_2)

        self.assertRaises(GenomeBatchfileMalformed,
                          self.options_parser._genomes_to_process, '',
                          path_batchfile, 'fna')

    def test__genomes_to_process__batchfile__duplicate_genome_id(self):
        """ Test that a batchfile containing duplicate genome ids throws an exception. """
        # Branch 1: The number of columns are not equal to 2.
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\tgenome_1\n' % path_genome_2)

        self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process,
                          '', path_batchfile, 'fna')

    def test__genomes_to_process__batchfile__invalid_genome_id(self):
        """ Test that a batchfile containing duplicate genome ids throws an exception. """
        # Branch 1: The number of columns are not equal to 2.
        path_batchfile_1 = os.path.join(self.dir_tmp, 'batchfile_1.txt')
        path_batchfile_2 = os.path.join(self.dir_tmp, 'batchfile_2.txt')
        path_batchfile_3 = os.path.join(self.dir_tmp, 'batchfile_3.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile_1, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\tGB_genome_2\n' % path_genome_2)

        with open(path_batchfile_2, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\tRS_genome_2\n' % path_genome_2)

        with open(path_batchfile_3, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\tUBAgenome_2\n' % path_genome_2)

        self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process,
                          '', path_batchfile_1, 'fna')
        self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process,
                          '', path_batchfile_2, 'fna')
        self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process,
                          '', path_batchfile_3, 'fna')

    def test__genomes_to_process__no_files(self):
        """ Test that an exception is thrown if no files are found to process """
        # Branch 1 : genome_dir is specified
        tmp_genome_dir = tempfile.mkdtemp()
        try:
            self.assertRaises(GTDBTkExit,
                              self.options_parser._genomes_to_process,
                              tmp_genome_dir, '', 'fna')
        finally:
            shutil.rmtree(tmp_genome_dir)

        # Branch 2: batchfile is specified
        tmp_genome_dir = tempfile.mkdtemp()
        try:
            path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt')
            open(path_batchfile, 'a').close()
            self.assertRaises(GTDBTkExit,
                              self.options_parser._genomes_to_process, '',
                              path_batchfile, 'fna')
        finally:
            shutil.rmtree(tmp_genome_dir)

    def test__marker_set_id(self):
        """ Test that the correct marker set id is returned """
        self.assertEqual(
            self.options_parser._marker_set_id(True, False, False), 'bac120')
        self.assertEqual(
            self.options_parser._marker_set_id(False, True, False), 'ar122')
        self.assertEqual(
            self.options_parser._marker_set_id(False, False, True), 'rps23')

    def test_identify__genome_dir_raises_io_exception(self):
        """ Test that the identify method raises an exception on invalid genome_dir """
        options = argparse.ArgumentParser()
        options.genome_dir = os.path.join(tempfile.gettempdir(),
                                          'non-existent-dir')
        self.assertRaises(BioLibDirNotFound, self.options_parser.identify,
                          options)

    def test_identify__batchfile_raises_io_exception(self):
        """ Test that the identify method raises an exception on invalid batchfile """
        options = argparse.ArgumentParser()
        options.genome_dir = None
        options.batchfile = os.path.join(tempfile.gettempdir(),
                                         'non-existent-file.txt')
        self.assertRaises(BioLibFileNotFound, self.options_parser.identify,
                          options)

    def test_align__identify_dir_raises_io_exception(self):
        """ Test that the align method raises an exception on invalid identify dir """
        options = argparse.ArgumentParser()
        options.identify_dir = os.path.join(tempfile.gettempdir(),
                                            'non-existent-dir')
        self.assertRaises(BioLibDirNotFound, self.options_parser.align,
                          options)

    def test_infer__msa_raises_io_exception(self):
        """ Test that the infer method raises an exception on invalid MSA """
        options = argparse.ArgumentParser()
        options.msa_file = os.path.join(tempfile.gettempdir(),
                                        'non-existent-msa.txt')
        self.assertRaises(BioLibFileNotFound, self.options_parser.infer,
                          options)

    def test_run_test(self):
        """Test that the user-test method runs correctly"""
        options = argparse.ArgumentParser()
        options.out_dir = self.dir_tmp
        options.cpus = 3
        self.assertTrue(self.options_parser.run_test(options))

    # def test_run_test__throws_exception(self):
    #     """Test that the user-test method fails correctly"""
    #     options = argparse.ArgumentParser()
    #     options.out_dir = self.dir_tmp
    #     os.mkdir(os.path.join(self.dir_tmp, 'genomes'))
    #     options.cpus = 3
    #     self.assertRaises(GTDBTkTestFailure, self.options_parser.run_test, options)

    def test_classify__align_dir_raises_io_exception(self):
        """ Test that the classify method raises an exception on invalid align dir """
        options = argparse.ArgumentParser()
        options.align_dir = os.path.join(tempfile.gettempdir(),
                                         'non-existent-dir')
        self.assertRaises(BioLibDirNotFound, self.options_parser.classify,
                          options)

    def test_root__no_tree_raises_io_exception(self):
        """ Test that the infer method raises an exception on invalid tree """
        options = argparse.ArgumentParser()
        options.input_tree = os.path.join(tempfile.gettempdir(),
                                          'non-existent-tree.tree')
        self.assertRaises(BioLibFileNotFound, self.options_parser.root,
                          options)

    def test_decorate__no_tree_raises_io_exception(self):
        """ Test that the infer method raises an exception on invalid tree """
        options = argparse.ArgumentParser()
        options.input_tree = os.path.join(tempfile.gettempdir(),
                                          'non-existent-tree.tree')
        self.assertRaises(BioLibFileNotFound, self.options_parser.decorate,
                          options)

    def test_trim_msa__mask_file(self):
        """ Test that the expected result is returned when running trim_msa with mask_file """
        path_untrimmed_msa = os.path.join(self.dir_tmp, 'untrimmed_msa.fasta')
        path_mask_file = os.path.join(self.dir_tmp, 'mask_file.txt')
        path_output = os.path.join(self.dir_tmp, 'trimmed_msa.fasta')

        with open(path_untrimmed_msa, 'w') as f:
            f.write('>genome_1\n')
            f.write('ALGPVW\n')
            f.write('>genome_2\n')
            f.write('WVPGLA\n')

        with open(path_mask_file, 'w') as f:
            f.write('010010\n')

        options = argparse.ArgumentParser()
        # Required arguments
        options.untrimmed_msa = path_untrimmed_msa
        options.output = path_output
        # Mutex arguments
        options.mask_file = path_mask_file
        options.reference_mask = None

        self.options_parser.trim_msa(options)

        results = dict()
        with open(path_output, 'r') as f:
            re_hits = re.findall(r'>(.+)\n(.+)\n', f.read())
            for gid, seq in re_hits:
                results[gid] = seq

        expected = {'genome_1': 'LV', 'genome_2': 'VL'}

        self.assertDictEqual(results, expected)

    def test_trim_msa__reference_mask_arc(self):
        """ Test that the expected result is returned when running trim_msa with archaeal reference_mask """
        path_untrimmed_msa = os.path.join(self.dir_tmp, 'untrimmed_msa.fasta')
        path_output = os.path.join(self.dir_tmp, 'trimmed_msa.fasta')

        msa_str = str()
        while len(msa_str) < 32675:
            msa_str += 'ALGPVW'
        msa_str = msa_str[0:32675]

        with open(path_untrimmed_msa, 'w') as f:
            f.write('>genome_1\n')
            f.write('%s\n' % msa_str)
            f.write('>genome_2\n')
            f.write('%s\n' % msa_str[::-1])

        options = argparse.ArgumentParser()
        # Required arguments
        options.untrimmed_msa = path_untrimmed_msa
        options.output = path_output
        # Mutex arguments
        options.mask_file = None
        options.reference_mask = 'arc'

        self.options_parser.trim_msa(options)

        results = dict()
        with open(path_output, 'r') as f:
            re_hits = re.findall(r'>(.+)\n(.+)\n', f.read())
            for gid, seq in re_hits:
                results[gid] = hashlib.sha256(seq.encode('utf-8')).hexdigest()

        expected = {
            'genome_1':
            '4975c04d640415de4c715552f6f6b460a8996226239440faa6539ac777622515',
            'genome_2':
            '7b53881aecb13bbe54612962e22736db7ab83271ffe4685d63c16e962e3561d9'
        }

        self.assertDictEqual(results, expected)

    def test_trim_msa__reference_mask_bac(self):
        """ Test that the expected result is returned when running trim_msa with bacterial reference_mask """
        path_untrimmed_msa = os.path.join(self.dir_tmp, 'untrimmed_msa.fasta')
        path_output = os.path.join(self.dir_tmp, 'trimmed_msa.fasta')

        msa_str = str()
        while len(msa_str) < 41155:
            msa_str += 'ALGPVW'
        msa_str = msa_str[0:41155]

        with open(path_untrimmed_msa, 'w') as f:
            f.write('>genome_1\n')
            f.write('%s\n' % msa_str)
            f.write('>genome_2\n')
            f.write('%s\n' % msa_str[::-1])

        options = argparse.ArgumentParser()
        # Required arguments
        options.untrimmed_msa = path_untrimmed_msa
        options.output = path_output
        # Mutex arguments
        options.mask_file = None
        options.reference_mask = 'bac'

        self.options_parser.trim_msa(options)

        results = dict()
        with open(path_output, 'r') as f:
            re_hits = re.findall(r'>(.+)\n(.+)\n', f.read())
            for gid, seq in re_hits:
                results[gid] = hashlib.sha256(seq.encode('utf-8')).hexdigest()

        expected = {
            'genome_1':
            '32798bdc3245b2ac5ecd8a15ea2cfb21011b22b6021baa51066864b1c02d72b4',
            'genome_2':
            '0b63d416c72e9641011f80fcf64fa41eb3f0e8e85dbaa4bd8feba12cf3b64c62'
        }

        self.assertDictEqual(results, expected)

    def test_export_msa__arc(self):
        """ Test that the untrimmed archaeal MSA is exported correctly """
        path_out = os.path.join(self.dir_tmp, 'output.fasta')

        options = argparse.ArgumentParser()
        options.domain = 'arc'
        options.output = path_out

        self.options_parser.export_msa(options)

        with open(path_out, 'rb') as f:
            out_hash = hashlib.sha256(f.read()).hexdigest()
        self.assertEqual(
            out_hash,
            'e84edf65511002b73f110ff44c9acee3ae44220448dfc971a2778d43c966bbba')

    def test_export_msa__bac(self):
        """ Test that the untrimmed bacterial MSA is exported correctly """
        path_out = os.path.join(self.dir_tmp, 'output.fasta')

        options = argparse.ArgumentParser()
        options.domain = 'bac'
        options.output = path_out

        self.options_parser.export_msa(options)

        with open(path_out, 'rb') as f:
            out_hash = hashlib.sha256(f.read()).hexdigest()
        self.assertEqual(
            out_hash,
            '5e37bc123819061490681068b49450fc43587d09b87df90ef62452bd73f961cc')
예제 #4
0
class TestOptionsParser(unittest.TestCase):
    def setUp(self):
        self.options_parser = OptionsParser('-1')
        self.dir_tmp = tempfile.mkdtemp(prefix='gtdbtk_tmp_')
        pass

    def tearDown(self):
        shutil.rmtree(self.dir_tmp)

    def test__verify_genome_id__valid(self):
        """ Test that a valid genome id returns True. """
        self.assertTrue(self.options_parser._verify_genome_id('genome_1'))

    def test__verify_genome_id__invalid(self):
        """ Test that invalid genome ids throw an exception. """
        for c in list('()[],;='):
            self.assertRaises(GenomeNameInvalid,
                              self.options_parser._verify_genome_id,
                              'genome%s1' % c)

    def test__genomes_to_process__genome_dir__valid(self):
        """ Test that the expected results are returned when using genome_dir. """
        open(os.path.join(self.dir_tmp, 'genome_1.fna'), 'a').close()
        open(os.path.join(self.dir_tmp, 'genome_2.fna'), 'a').close()
        open(os.path.join(self.dir_tmp, 'other_file.txt'), 'a').close()
        results, tln_table = self.options_parser._genomes_to_process(
            self.dir_tmp, '', 'fna')
        expected = {
            'genome_1': os.path.join(self.dir_tmp, 'genome_1.fna'),
            'genome_2': os.path.join(self.dir_tmp, 'genome_2.fna')
        }
        self.assertDictEqual(results, expected)

    def test__genomes_to_process__batchfile__valid(self):
        """ Test that the expected results are returned when using batchfile """
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write(f'{path_genome_1}\tgenome_1\n')
            f.write('\n')
            f.write(f'{path_genome_2}\tgenome_2\t4\n')

        results, tln_table = self.options_parser._genomes_to_process(
            '', path_batchfile, 'fna')
        expected = {'genome_1': path_genome_1, 'genome_2': path_genome_2}
        expected_tln = {'genome_2': 4}
        self.assertDictEqual(results, expected)
        self.assertDictEqual(tln_table, expected_tln)

    def test__genomes_to_process__batchfile__invalid_columns(self):
        """ Test that a batchfile containing columns not equal to 2 throws an exception. """
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\tgenome_2\tfoo\n' % path_genome_2)

        self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process,
                          '', path_batchfile, 'fna')

    def test__genomes_to_process__batchfile__blank_genome_path(self):
        """ Test that a batchfile containing a blank genome path throws an exception. """
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\tgenome_2\n' % '')

        self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process,
                          '', path_batchfile, 'fna')

    def test__genomes_to_process__batchfile__blank_genome_id(self):
        """ Test that a batchfile containing a blank genome id throws an exception. """
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\t\n' % path_genome_2)

        self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process,
                          '', path_batchfile, 'fna')

    def test__genomes_to_process__batchfile__duplicate_genome_id(self):
        """ Test that a batchfile containing duplicate genome ids throws an exception. """
        # Branch 1: The number of columns are not equal to 2.
        path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt')
        path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
        path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
        open(path_genome_1, 'a').close()
        open(path_genome_2, 'a').close()

        with open(path_batchfile, 'a') as f:
            f.write('%s\tgenome_1\n' % path_genome_1)
            f.write('\n')
            f.write('%s\tgenome_1\n' % path_genome_2)

        self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process,
                          '', path_batchfile, 'fna')

    # def test__genomes_to_process__batchfile__invalid_genome_id(self):
    #     """ Test that a batchfile containing duplicate genome ids throws an exception. """
    #     # Branch 1: The number of columns are not equal to 2.
    #     path_batchfile_1 = os.path.join(self.dir_tmp, 'batchfile_1.txt')
    #     path_batchfile_2 = os.path.join(self.dir_tmp, 'batchfile_2.txt')
    #     path_batchfile_3 = os.path.join(self.dir_tmp, 'batchfile_3.txt')
    #     path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna')
    #     path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna')
    #     open(path_genome_1, 'a').close()
    #     open(path_genome_2, 'a').close()
    #
    #     with open(path_batchfile_1, 'a') as f:
    #         f.write('%s\tgenome_1\n' % path_genome_1)
    #         f.write('\n')
    #         f.write('%s\tGB_genome_2\n' % path_genome_2)
    #
    #     with open(path_batchfile_2, 'a') as f:
    #         f.write('%s\tgenome_1\n' % path_genome_1)
    #         f.write('\n')
    #         f.write('%s\tRS_genome_2\n' % path_genome_2)
    #
    #     with open(path_batchfile_3, 'a') as f:
    #         f.write('%s\tgenome_1\n' % path_genome_1)
    #         f.write('\n')
    #         f.write('%s\tUBAgenome_2\n' % path_genome_2)
    #
    #     self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_1, 'fna')
    #     self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_2, 'fna')
    #     self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_3, 'fna')

    def test__genomes_to_process__no_files(self):
        """ Test that an exception is thrown if no files are found to process """
        # Branch 1 : genome_dir is specified
        tmp_genome_dir = tempfile.mkdtemp()
        try:
            self.assertRaises(GTDBTkExit,
                              self.options_parser._genomes_to_process,
                              tmp_genome_dir, '', 'fna')
        finally:
            shutil.rmtree(tmp_genome_dir)

        # Branch 2: batchfile is specified
        tmp_genome_dir = tempfile.mkdtemp()
        try:
            path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt')
            open(path_batchfile, 'a').close()
            self.assertRaises(GTDBTkExit,
                              self.options_parser._genomes_to_process, '',
                              path_batchfile, 'fna')
        finally:
            shutil.rmtree(tmp_genome_dir)

    def test_identify__genome_dir_raises_io_exception(self):
        """ Test that the identify method raises an exception on invalid genome_dir """
        options = argparse.ArgumentParser()
        options.genome_dir = os.path.join(tempfile.gettempdir(),
                                          'non-existent-dir')
        self.assertRaises(BioLibDirNotFound, self.options_parser.identify,
                          options)

    def test_identify__batchfile_raises_io_exception(self):
        """ Test that the identify method raises an exception on invalid batchfile """
        options = argparse.ArgumentParser()
        options.genome_dir = None
        options.batchfile = os.path.join(tempfile.gettempdir(),
                                         'non-existent-file.txt')
        self.assertRaises(BioLibFileNotFound, self.options_parser.identify,
                          options)

    def test_align__identify_dir_raises_io_exception(self):
        """ Test that the align method raises an exception on invalid identify dir """
        options = argparse.ArgumentParser()
        options.identify_dir = os.path.join(tempfile.gettempdir(),
                                            'non-existent-dir')
        self.assertRaises(BioLibDirNotFound, self.options_parser.align,
                          options)

    def test_infer__msa_raises_io_exception(self):
        """ Test that the infer method raises an exception on invalid MSA """
        options = argparse.ArgumentParser()
        options.msa_file = os.path.join(tempfile.gettempdir(),
                                        'non-existent-msa.txt')
        self.assertRaises(BioLibFileNotFound, self.options_parser.infer,
                          options)

    def test_run_test(self):
        """Test that the user-test method runs correctly"""
        options = argparse.ArgumentParser()
        options.out_dir = self.dir_tmp
        options.cpus = 3
        self.assertTrue(self.options_parser.run_test(options))

    # def test_run_test__throws_exception(self):
    #     """Test that the user-test method fails correctly"""
    #     options = argparse.ArgumentParser()
    #     options.out_dir = self.dir_tmp
    #     os.mkdir(os.path.join(self.dir_tmp, 'genomes'))
    #     options.cpus = 3
    #     self.assertRaises(GTDBTkTestFailure, self.options_parser.run_test, options)

    def test_classify__align_dir_raises_io_exception(self):
        """ Test that the classify method raises an exception on invalid align dir """
        options = argparse.ArgumentParser()
        options.align_dir = os.path.join(tempfile.gettempdir(),
                                         'non-existent-dir')
        self.assertRaises(BioLibDirNotFound, self.options_parser.classify,
                          options)

    def test_root__no_tree_raises_io_exception(self):
        """ Test that the infer method raises an exception on invalid tree """
        options = argparse.ArgumentParser()
        options.input_tree = os.path.join(tempfile.gettempdir(),
                                          'non-existent-tree.tree')
        self.assertRaises(BioLibFileNotFound, self.options_parser.root,
                          options)

    def test_decorate__no_tree_raises_io_exception(self):
        """ Test that the infer method raises an exception on invalid tree """
        options = argparse.ArgumentParser()
        options.input_tree = os.path.join(tempfile.gettempdir(),
                                          'non-existent-tree.tree')
        self.assertRaises(BioLibFileNotFound, self.options_parser.decorate,
                          options)

    def test_trim_msa__mask_file(self):
        """ Test that the expected result is returned when running trim_msa with mask_file """
        path_untrimmed_msa = os.path.join(self.dir_tmp, 'untrimmed_msa.fasta')
        path_mask_file = os.path.join(self.dir_tmp, 'mask_file.txt')
        path_output = os.path.join(self.dir_tmp, 'trimmed_msa.fasta')

        with open(path_untrimmed_msa, 'w') as f:
            f.write('>genome_1\n')
            f.write('ALGPVW\n')
            f.write('>genome_2\n')
            f.write('WVPGLA\n')

        with open(path_mask_file, 'w') as f:
            f.write('010010\n')

        options = argparse.ArgumentParser()
        # Required arguments
        options.untrimmed_msa = path_untrimmed_msa
        options.output = path_output
        # Mutex arguments
        options.mask_file = path_mask_file
        options.reference_mask = None

        self.options_parser.trim_msa(options)

        results = dict()
        with open(path_output, 'r') as f:
            re_hits = re.findall(r'>(.+)\n(.+)\n', f.read())
            for gid, seq in re_hits:
                results[gid] = seq

        expected = {'genome_1': 'LV', 'genome_2': 'VL'}

        self.assertDictEqual(results, expected)

    def test_trim_msa__reference_mask_arc(self):
        """ Test that the expected result is returned when running trim_msa with archaeal reference_mask """
        path_untrimmed_msa = os.path.join(self.dir_tmp, 'untrimmed_msa.fasta')
        path_output = os.path.join(self.dir_tmp, 'trimmed_msa.fasta')
        shutil.copyfile(Config.CONCAT_AR122, path_untrimmed_msa)

        options = argparse.ArgumentParser()
        # Required arguments
        options.untrimmed_msa = path_untrimmed_msa
        options.output = path_output
        # Mutex arguments
        options.mask_file = None
        options.reference_mask = 'arc'

        self.options_parser.trim_msa(options)

        actual = sha256(path_output)
        expected = '1146351be59ae8d27668256c5b2c425a6f38c37c'

        self.assertEqual(actual, expected)

    def test_trim_msa__reference_mask_bac(self):
        """ Test that the expected result is returned when running trim_msa with bacterial reference_mask """
        path_untrimmed_msa = os.path.join(self.dir_tmp, 'untrimmed_msa.fasta')
        path_output = os.path.join(self.dir_tmp, 'trimmed_msa.fasta')
        shutil.copyfile(Config.CONCAT_BAC120, path_untrimmed_msa)

        options = argparse.ArgumentParser()
        # Required arguments
        options.untrimmed_msa = path_untrimmed_msa
        options.output = path_output
        # Mutex arguments
        options.mask_file = None
        options.reference_mask = 'bac'

        self.options_parser.trim_msa(options)

        actual = sha256(path_output)
        expected = 'ae6e24e89540fed03b81436147f99bcd120d059a'

        self.assertEqual(actual, expected)

    def test_export_msa__arc(self):
        """ Test that the untrimmed archaeal MSA is exported correctly """
        path_out = os.path.join(self.dir_tmp, 'output.fasta')

        options = argparse.ArgumentParser()
        options.domain = 'arc'
        options.output = path_out

        self.options_parser.export_msa(options)

        with open(path_out, 'rb') as f:
            out_hash = hashlib.sha256(f.read()).hexdigest()
        self.assertEqual(
            out_hash,
            '8706b42a3f4b2445273058e7e876f0d8332bd8dec95c0fc8bc024d76a5a5aade')

    def test_export_msa__bac(self):
        """ Test that the untrimmed bacterial MSA is exported correctly """
        path_out = os.path.join(self.dir_tmp, 'output.fasta')

        options = argparse.ArgumentParser()
        options.domain = 'bac'
        options.output = path_out

        self.options_parser.export_msa(options)

        with open(path_out, 'rb') as f:
            out_hash = hashlib.sha256(f.read()).hexdigest()
        self.assertEqual(
            out_hash,
            '3c5dfa4dc5ef943459e6d0ed4da1e5a5858332c824739630beffb57fab303486')