class TestOptionsParser(unittest.TestCase): def setUp(self): self.options_parser = OptionsParser(-1) pass def tearDown(self): pass def test__verify_genome_id__valid(self): """ Test that a valid genome id returns True. """ self.assertTrue(self.options_parser._verify_genome_id('genome_1')) def test__verify_genome_id__invalid(self): """ Test that invalid genome ids throw an exception. """ for c in list('()[],;='): self.assertRaises(GenomeNameInvalid, self.options_parser._verify_genome_id, 'genome%s1' % c) def test__genomes_to_process__genome_dir__valid(self): """ Test that the expected results are returned when using genome_dir. """ try: tmp_genome_dir = tempfile.mkdtemp() open(os.path.join(tmp_genome_dir, 'genome_1.fna'), 'a').close() open(os.path.join(tmp_genome_dir, 'genome_2.fna'), 'a').close() open(os.path.join(tmp_genome_dir, 'other_file.txt'), 'a').close() results = self.options_parser._genomes_to_process( tmp_genome_dir, '', 'fna') expected = { 'genome_1': os.path.join(tmp_genome_dir, 'genome_1.fna'), 'genome_2': os.path.join(tmp_genome_dir, 'genome_2.fna') } self.assertDictEqual(results, expected) finally: shutil.rmtree(tmp_genome_dir) def test__genomes_to_process__batchfile__valid(self): """ Test that the expected results are returned when using batchfile """ try: tmp_genome_dir = tempfile.mkdtemp() path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt') path_genome_1 = os.path.join(tmp_genome_dir, 'genome_1.fna') path_genome_2 = os.path.join(tmp_genome_dir, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_2\n' % path_genome_2) results = self.options_parser._genomes_to_process( '', path_batchfile, 'fna') expected = {'genome_1': path_genome_1, 'genome_2': path_genome_2} self.assertDictEqual(results, expected) finally: shutil.rmtree(tmp_genome_dir) def test__genomes_to_process__batchfile__invalid_columns(self): """ Test that a batchfile containing columns not equal to 2 throws an exception. """ try: tmp_genome_dir = tempfile.mkdtemp() path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt') path_genome_1 = os.path.join(tmp_genome_dir, 'genome_1.fna') path_genome_2 = os.path.join(tmp_genome_dir, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_2\tfoo\n' % path_genome_2) self.assertRaises(GenomeBatchfileMalformed, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') finally: shutil.rmtree(tmp_genome_dir) def test__genomes_to_process__batchfile__blank_genome_path(self): """ Test that a batchfile containing a blank genome path throws an exception. """ try: tmp_genome_dir = tempfile.mkdtemp() path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt') path_genome_1 = os.path.join(tmp_genome_dir, 'genome_1.fna') path_genome_2 = os.path.join(tmp_genome_dir, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_2\n' % '') self.assertRaises(GenomeBatchfileMalformed, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') finally: shutil.rmtree(tmp_genome_dir) def test__genomes_to_process__batchfile__blank_genome_id(self): """ Test that a batchfile containing a blank genome id throws an exception. """ try: tmp_genome_dir = tempfile.mkdtemp() path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt') path_genome_1 = os.path.join(tmp_genome_dir, 'genome_1.fna') path_genome_2 = os.path.join(tmp_genome_dir, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\t\n' % path_genome_2) self.assertRaises(GenomeBatchfileMalformed, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') finally: shutil.rmtree(tmp_genome_dir) def test__genomes_to_process__batchfile__duplicate_genome_id(self): """ Test that a batchfile containing duplicate genome ids throws an exception. """ # Branch 1: The number of columns are not equal to 2. try: tmp_genome_dir = tempfile.mkdtemp() path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt') path_genome_1 = os.path.join(tmp_genome_dir, 'genome_1.fna') path_genome_2 = os.path.join(tmp_genome_dir, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_1\n' % path_genome_2) self.assertRaises(GenomeBatchfileMalformed, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') finally: shutil.rmtree(tmp_genome_dir) def test__genomes_to_process__batchfile__invalid_genome_id(self): """ Test that a batchfile containing duplicate genome ids throws an exception. """ # Branch 1: The number of columns are not equal to 2. try: tmp_genome_dir = tempfile.mkdtemp() path_batchfile_1 = os.path.join(tmp_genome_dir, 'batchfile_1.txt') path_batchfile_2 = os.path.join(tmp_genome_dir, 'batchfile_2.txt') path_batchfile_3 = os.path.join(tmp_genome_dir, 'batchfile_3.txt') path_genome_1 = os.path.join(tmp_genome_dir, 'genome_1.fna') path_genome_2 = os.path.join(tmp_genome_dir, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile_1, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tGB_genome_2\n' % path_genome_2) with open(path_batchfile_2, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tRS_genome_2\n' % path_genome_2) with open(path_batchfile_3, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tUBAgenome_2\n' % path_genome_2) self.assertRaises(GenomeNameInvalid, self.options_parser._genomes_to_process, '', path_batchfile_1, 'fna') self.assertRaises(GenomeNameInvalid, self.options_parser._genomes_to_process, '', path_batchfile_2, 'fna') self.assertRaises(GenomeNameInvalid, self.options_parser._genomes_to_process, '', path_batchfile_3, 'fna') finally: shutil.rmtree(tmp_genome_dir) def test__genomes_to_process__no_files(self): """ Test that an exception is thrown if no files are found to process """ # Branch 1 : genome_dir is specified try: tmp_genome_dir = tempfile.mkdtemp() self.assertRaises(NoGenomesFound, self.options_parser._genomes_to_process, tmp_genome_dir, '', 'fna') finally: shutil.rmtree(tmp_genome_dir) # Branch 2: batchfile is specified try: tmp_genome_dir = tempfile.mkdtemp() path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt') open(path_batchfile, 'a').close() self.assertRaises(NoGenomesFound, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') finally: shutil.rmtree(tmp_genome_dir) def test__marker_set_id(self): """ Test that the correct marker set id is returned """ self.assertEqual( self.options_parser._marker_set_id(True, False, False), 'bac120') self.assertEqual( self.options_parser._marker_set_id(False, True, False), 'ar122') self.assertEqual( self.options_parser._marker_set_id(False, False, True), 'rps23') def test_identify__genome_dir_raises_io_exception(self): """ Test that the identify method raises an exception on invalid genome_dir """ options = argparse.ArgumentParser() options.genome_dir = os.path.join(tempfile.gettempdir(), 'non-existent-dir') self.assertRaises(BioLibDirNotFound, self.options_parser.identify, options) def test_identify__batchfile_raises_io_exception(self): """ Test that the identify method raises an exception on invalid batchfile """ options = argparse.ArgumentParser() options.genome_dir = None options.batchfile = os.path.join(tempfile.gettempdir(), 'non-existent-file.txt') self.assertRaises(BioLibFileNotFound, self.options_parser.identify, options) def test_align__identify_dir_raises_io_exception(self): """ Test that the align method raises an exception on invalid identify dir """ options = argparse.ArgumentParser() options.identify_dir = os.path.join(tempfile.gettempdir(), 'non-existent-dir') self.assertRaises(BioLibDirNotFound, self.options_parser.align, options) def test_infer__msa_raises_io_exception(self): """ Test that the infer method raises an exception on invalid MSA """ options = argparse.ArgumentParser() options.msa_file = os.path.join(tempfile.gettempdir(), 'non-existent-msa.txt') self.assertRaises(BioLibFileNotFound, self.options_parser.infer, options) def test_classify__align_dir_raises_io_exception(self): """ Test that the classify method raises an exception on invalid align dir """ options = argparse.ArgumentParser() options.align_dir = os.path.join(tempfile.gettempdir(), 'non-existent-dir') self.assertRaises(BioLibDirNotFound, self.options_parser.classify, options) def test_root__no_tree_raises_io_exception(self): """ Test that the infer method raises an exception on invalid tree """ options = argparse.ArgumentParser() options.input_tree = os.path.join(tempfile.gettempdir(), 'non-existent-tree.tree') self.assertRaises(BioLibFileNotFound, self.options_parser.root, options) def test_decorate__no_tree_raises_io_exception(self): """ Test that the infer method raises an exception on invalid tree """ options = argparse.ArgumentParser() options.input_tree = os.path.join(tempfile.gettempdir(), 'non-existent-tree.tree') self.assertRaises(BioLibFileNotFound, self.options_parser.decorate, options) def test_trim_msa__mask_file(self): """ Test that the expected result is returned when running trim_msa with mask_file """ try: tmp_out_dir = tempfile.mkdtemp() path_untrimmed_msa = os.path.join(tmp_out_dir, 'untrimmed_msa.fasta') path_mask_file = os.path.join(tmp_out_dir, 'mask_file.txt') path_output = os.path.join(tmp_out_dir, 'trimmed_msa.fasta') with open(path_untrimmed_msa, 'w') as f: f.write('>genome_1\n') f.write('ALGPVW\n') f.write('>genome_2\n') f.write('WVPGLA\n') with open(path_mask_file, 'w') as f: f.write('010010\n') options = argparse.ArgumentParser() # Required arguments options.untrimmed_msa = path_untrimmed_msa options.output = path_output # Mutex arguments options.mask_file = path_mask_file options.reference_mask = None self.options_parser.trim_msa(options) results = dict() with open(path_output, 'r') as f: re_hits = re.findall(r'>(.+)\n(.+)\n', f.read()) for gid, seq in re_hits: results[gid] = seq expected = {'genome_1': 'LV', 'genome_2': 'VL'} self.assertDictEqual(results, expected) pass finally: shutil.rmtree(tmp_out_dir) def test_trim_msa__reference_mask_arc(self): """ Test that the expected result is returned when running trim_msa with archaeal reference_mask """ try: tmp_out_dir = tempfile.mkdtemp() path_untrimmed_msa = os.path.join(tmp_out_dir, 'untrimmed_msa.fasta') path_output = os.path.join(tmp_out_dir, 'trimmed_msa.fasta') msa_str = str() while len(msa_str) < 32675: msa_str += 'ALGPVW' msa_str = msa_str[0:32675] with open(path_untrimmed_msa, 'w') as f: f.write('>genome_1\n') f.write('%s\n' % msa_str) f.write('>genome_2\n') f.write('%s\n' % msa_str[::-1]) options = argparse.ArgumentParser() # Required arguments options.untrimmed_msa = path_untrimmed_msa options.output = path_output # Mutex arguments options.mask_file = None options.reference_mask = 'arc' self.options_parser.trim_msa(options) results = dict() with open(path_output, 'r') as f: re_hits = re.findall(r'>(.+)\n(.+)\n', f.read()) for gid, seq in re_hits: results[gid] = hashlib.sha256(seq).hexdigest() expected = { 'genome_1': '332b8cd125a36c375196064e136efab78db38e41bbd8bd8484243531bc57df6d', 'genome_2': '84e91b9f5fa1ec0bedc0097233044e6dd0e79557bb6df3625928dc9573795989' } self.assertDictEqual(results, expected) finally: shutil.rmtree(tmp_out_dir) def test_trim_msa__reference_mask_bac(self): """ Test that the expected result is returned when running trim_msa with bacterial reference_mask """ try: tmp_out_dir = tempfile.mkdtemp() path_untrimmed_msa = os.path.join(tmp_out_dir, 'untrimmed_msa.fasta') path_output = os.path.join(tmp_out_dir, 'trimmed_msa.fasta') msa_str = str() while len(msa_str) < 41155: msa_str += 'ALGPVW' msa_str = msa_str[0:41155] with open(path_untrimmed_msa, 'w') as f: f.write('>genome_1\n') f.write('%s\n' % msa_str) f.write('>genome_2\n') f.write('%s\n' % msa_str[::-1]) options = argparse.ArgumentParser() # Required arguments options.untrimmed_msa = path_untrimmed_msa options.output = path_output # Mutex arguments options.mask_file = None options.reference_mask = 'bac' self.options_parser.trim_msa(options) results = dict() with open(path_output, 'r') as f: re_hits = re.findall(r'>(.+)\n(.+)\n', f.read()) for gid, seq in re_hits: results[gid] = hashlib.sha256(seq).hexdigest() expected = { 'genome_1': '35e080f9ab7d318e8f4a7cef46ce6044bd9c538e6fbe8a69b17431df44bd5a81', 'genome_2': 'bb4beed69063dad0092a809ee5854ff124da0b55c651edd50c47b1d8fdff0d7b' } self.assertDictEqual(results, expected) finally: shutil.rmtree(tmp_out_dir) def test_export_msa__arc(self): """ Test that the untrimmed archaeal MSA is exported correctly """ try: tmp_out_dir = tempfile.mkdtemp() path_out = os.path.join(tmp_out_dir, 'output.fasta') options = argparse.ArgumentParser() options.domain = 'arc' options.output = path_out self.options_parser.export_msa(options) with open(path_out, 'r') as f: out_hash = hashlib.sha256(f.read()).hexdigest() self.assertEqual( out_hash, '11eb12b91ab20c43824abafb909ccc20bed84a8609a9bf82748b2cdbdd8b7aad' ) finally: shutil.rmtree(tmp_out_dir) def test_export_msa__bac(self): """ Test that the untrimmed bacterial MSA is exported correctly """ try: tmp_out_dir = tempfile.mkdtemp() path_out = os.path.join(tmp_out_dir, 'output.fasta') options = argparse.ArgumentParser() options.domain = 'bac' options.output = path_out self.options_parser.export_msa(options) with open(path_out, 'r') as f: out_hash = hashlib.sha256(f.read()).hexdigest() self.assertEqual( out_hash, '50dde1e96df9533def7c7047a1e8627d4ad566db10f8ab3de72751e62c4ac10a' ) finally: shutil.rmtree(tmp_out_dir)
class TestOptionsParser(unittest.TestCase): def setUp(self): self.options_parser = OptionsParser('-1') self.dir_tmp = tempfile.mkdtemp(prefix='gtdbtk_tmp_') pass def tearDown(self): shutil.rmtree(self.dir_tmp) def test__verify_genome_id__valid(self): """ Test that a valid genome id returns True. """ self.assertTrue(self.options_parser._verify_genome_id('genome_1')) def test__verify_genome_id__invalid(self): """ Test that invalid genome ids throw an exception. """ for c in list('()[],;='): self.assertRaises(GTDBTkExit, self.options_parser._verify_genome_id, 'genome%s1' % c) def test__genomes_to_process__genome_dir__valid(self): """ Test that the expected results are returned when using genome_dir. """ open(os.path.join(self.dir_tmp, 'genome_1.fna'), 'a').close() open(os.path.join(self.dir_tmp, 'genome_2.fna'), 'a').close() open(os.path.join(self.dir_tmp, 'other_file.txt'), 'a').close() results, tln_table = self.options_parser._genomes_to_process( self.dir_tmp, '', 'fna') expected = { 'genome_1': os.path.join(self.dir_tmp, 'genome_1.fna'), 'genome_2': os.path.join(self.dir_tmp, 'genome_2.fna') } self.assertDictEqual(results, expected) def test__genomes_to_process__batchfile__valid(self): """ Test that the expected results are returned when using batchfile """ path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write(f'{path_genome_1}\tgenome_1\n') f.write('\n') f.write(f'{path_genome_2}\tgenome_2\t4\n') results, tln_table = self.options_parser._genomes_to_process( '', path_batchfile, 'fna') expected = {'genome_1': path_genome_1, 'genome_2': path_genome_2} expected_tln = {'genome_2': 4} self.assertDictEqual(results, expected) self.assertDictEqual(tln_table, expected_tln) def test__genomes_to_process__batchfile__invalid_columns(self): """ Test that a batchfile containing columns not equal to 2 throws an exception. """ path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_2\tfoo\n' % path_genome_2) self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') def test__genomes_to_process__batchfile__blank_genome_path(self): """ Test that a batchfile containing a blank genome path throws an exception. """ path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_2\n' % '') self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') def test__genomes_to_process__batchfile__blank_genome_id(self): """ Test that a batchfile containing a blank genome id throws an exception. """ path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\t\n' % path_genome_2) self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') def test__genomes_to_process__batchfile__duplicate_genome_id(self): """ Test that a batchfile containing duplicate genome ids throws an exception. """ # Branch 1: The number of columns are not equal to 2. path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_1\n' % path_genome_2) self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') # def test__genomes_to_process__batchfile__invalid_genome_id(self): # """ Test that a batchfile containing duplicate genome ids throws an exception. """ # # Branch 1: The number of columns are not equal to 2. # path_batchfile_1 = os.path.join(self.dir_tmp, 'batchfile_1.txt') # path_batchfile_2 = os.path.join(self.dir_tmp, 'batchfile_2.txt') # path_batchfile_3 = os.path.join(self.dir_tmp, 'batchfile_3.txt') # path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') # path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') # open(path_genome_1, 'a').close() # open(path_genome_2, 'a').close() # # with open(path_batchfile_1, 'a') as f: # f.write('%s\tgenome_1\n' % path_genome_1) # f.write('\n') # f.write('%s\tGB_genome_2\n' % path_genome_2) # # with open(path_batchfile_2, 'a') as f: # f.write('%s\tgenome_1\n' % path_genome_1) # f.write('\n') # f.write('%s\tRS_genome_2\n' % path_genome_2) # # with open(path_batchfile_3, 'a') as f: # f.write('%s\tgenome_1\n' % path_genome_1) # f.write('\n') # f.write('%s\tUBAgenome_2\n' % path_genome_2) # # self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_1, 'fna') # self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_2, 'fna') # self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_3, 'fna') def test__genomes_to_process__no_files(self): """ Test that an exception is thrown if no files are found to process """ # Branch 1 : genome_dir is specified tmp_genome_dir = tempfile.mkdtemp() try: self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, tmp_genome_dir, '', 'fna') finally: shutil.rmtree(tmp_genome_dir) # Branch 2: batchfile is specified tmp_genome_dir = tempfile.mkdtemp() try: path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt') open(path_batchfile, 'a').close() self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') finally: shutil.rmtree(tmp_genome_dir) def test_identify__genome_dir_raises_io_exception(self): """ Test that the identify method raises an exception on invalid genome_dir """ options = argparse.ArgumentParser() options.genome_dir = os.path.join(tempfile.gettempdir(), 'non-existent-dir') self.assertRaises(BioLibDirNotFound, self.options_parser.identify, options) def test_identify__batchfile_raises_io_exception(self): """ Test that the identify method raises an exception on invalid batchfile """ options = argparse.ArgumentParser() options.genome_dir = None options.batchfile = os.path.join(tempfile.gettempdir(), 'non-existent-file.txt') self.assertRaises(BioLibFileNotFound, self.options_parser.identify, options) def test_align__identify_dir_raises_io_exception(self): """ Test that the align method raises an exception on invalid identify dir """ options = argparse.ArgumentParser() options.identify_dir = os.path.join(tempfile.gettempdir(), 'non-existent-dir') self.assertRaises(BioLibDirNotFound, self.options_parser.align, options) def test_infer__msa_raises_io_exception(self): """ Test that the infer method raises an exception on invalid MSA """ options = argparse.ArgumentParser() options.msa_file = os.path.join(tempfile.gettempdir(), 'non-existent-msa.txt') self.assertRaises(BioLibFileNotFound, self.options_parser.infer, options) def test_run_test(self): """Test that the user-test method runs correctly""" options = argparse.ArgumentParser() options.out_dir = self.dir_tmp options.cpus = 3 self.assertTrue(self.options_parser.run_test(options)) # def test_run_test__throws_exception(self): # """Test that the user-test method fails correctly""" # options = argparse.ArgumentParser() # options.out_dir = self.dir_tmp # os.mkdir(os.path.join(self.dir_tmp, 'genomes')) # options.cpus = 3 # self.assertRaises(GTDBTkTestFailure, self.options_parser.run_test, options) def test_classify__align_dir_raises_io_exception(self): """ Test that the classify method raises an exception on invalid align dir """ options = argparse.ArgumentParser() options.align_dir = os.path.join(tempfile.gettempdir(), 'non-existent-dir') self.assertRaises(BioLibDirNotFound, self.options_parser.classify, options) def test_root__no_tree_raises_io_exception(self): """ Test that the infer method raises an exception on invalid tree """ options = argparse.ArgumentParser() options.input_tree = os.path.join(tempfile.gettempdir(), 'non-existent-tree.tree') self.assertRaises(BioLibFileNotFound, self.options_parser.root, options) def test_decorate__no_tree_raises_io_exception(self): """ Test that the infer method raises an exception on invalid tree """ options = argparse.ArgumentParser() options.input_tree = os.path.join(tempfile.gettempdir(), 'non-existent-tree.tree') self.assertRaises(BioLibFileNotFound, self.options_parser.decorate, options)
class TestOptionsParser(unittest.TestCase): def setUp(self): self.options_parser = OptionsParser('-1') self.dir_tmp = tempfile.mkdtemp(prefix='gtdbtk_tmp_') pass def tearDown(self): shutil.rmtree(self.dir_tmp) def test__verify_genome_id__valid(self): """ Test that a valid genome id returns True. """ self.assertTrue(self.options_parser._verify_genome_id('genome_1')) def test__verify_genome_id__invalid(self): """ Test that invalid genome ids throw an exception. """ for c in list('()[],;='): self.assertRaises(GenomeNameInvalid, self.options_parser._verify_genome_id, 'genome%s1' % c) def test__genomes_to_process__genome_dir__valid(self): """ Test that the expected results are returned when using genome_dir. """ open(os.path.join(self.dir_tmp, 'genome_1.fna'), 'a').close() open(os.path.join(self.dir_tmp, 'genome_2.fna'), 'a').close() open(os.path.join(self.dir_tmp, 'other_file.txt'), 'a').close() results = self.options_parser._genomes_to_process( self.dir_tmp, '', 'fna') expected = { 'genome_1': os.path.join(self.dir_tmp, 'genome_1.fna'), 'genome_2': os.path.join(self.dir_tmp, 'genome_2.fna') } self.assertDictEqual(results, expected) def test__genomes_to_process__batchfile__valid(self): """ Test that the expected results are returned when using batchfile """ path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_2\n' % path_genome_2) results = self.options_parser._genomes_to_process( '', path_batchfile, 'fna') expected = {'genome_1': path_genome_1, 'genome_2': path_genome_2} self.assertDictEqual(results, expected) def test__genomes_to_process__batchfile__invalid_columns(self): """ Test that a batchfile containing columns not equal to 2 throws an exception. """ path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_2\tfoo\n' % path_genome_2) self.assertRaises(GenomeBatchfileMalformed, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') def test__genomes_to_process__batchfile__blank_genome_path(self): """ Test that a batchfile containing a blank genome path throws an exception. """ path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_2\n' % '') self.assertRaises(GenomeBatchfileMalformed, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') def test__genomes_to_process__batchfile__blank_genome_id(self): """ Test that a batchfile containing a blank genome id throws an exception. """ path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\t\n' % path_genome_2) self.assertRaises(GenomeBatchfileMalformed, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') def test__genomes_to_process__batchfile__duplicate_genome_id(self): """ Test that a batchfile containing duplicate genome ids throws an exception. """ # Branch 1: The number of columns are not equal to 2. path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_1\n' % path_genome_2) self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') def test__genomes_to_process__batchfile__invalid_genome_id(self): """ Test that a batchfile containing duplicate genome ids throws an exception. """ # Branch 1: The number of columns are not equal to 2. path_batchfile_1 = os.path.join(self.dir_tmp, 'batchfile_1.txt') path_batchfile_2 = os.path.join(self.dir_tmp, 'batchfile_2.txt') path_batchfile_3 = os.path.join(self.dir_tmp, 'batchfile_3.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile_1, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tGB_genome_2\n' % path_genome_2) with open(path_batchfile_2, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tRS_genome_2\n' % path_genome_2) with open(path_batchfile_3, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tUBAgenome_2\n' % path_genome_2) self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_1, 'fna') self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_2, 'fna') self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_3, 'fna') def test__genomes_to_process__no_files(self): """ Test that an exception is thrown if no files are found to process """ # Branch 1 : genome_dir is specified tmp_genome_dir = tempfile.mkdtemp() try: self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, tmp_genome_dir, '', 'fna') finally: shutil.rmtree(tmp_genome_dir) # Branch 2: batchfile is specified tmp_genome_dir = tempfile.mkdtemp() try: path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt') open(path_batchfile, 'a').close() self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') finally: shutil.rmtree(tmp_genome_dir) def test__marker_set_id(self): """ Test that the correct marker set id is returned """ self.assertEqual( self.options_parser._marker_set_id(True, False, False), 'bac120') self.assertEqual( self.options_parser._marker_set_id(False, True, False), 'ar122') self.assertEqual( self.options_parser._marker_set_id(False, False, True), 'rps23') def test_identify__genome_dir_raises_io_exception(self): """ Test that the identify method raises an exception on invalid genome_dir """ options = argparse.ArgumentParser() options.genome_dir = os.path.join(tempfile.gettempdir(), 'non-existent-dir') self.assertRaises(BioLibDirNotFound, self.options_parser.identify, options) def test_identify__batchfile_raises_io_exception(self): """ Test that the identify method raises an exception on invalid batchfile """ options = argparse.ArgumentParser() options.genome_dir = None options.batchfile = os.path.join(tempfile.gettempdir(), 'non-existent-file.txt') self.assertRaises(BioLibFileNotFound, self.options_parser.identify, options) def test_align__identify_dir_raises_io_exception(self): """ Test that the align method raises an exception on invalid identify dir """ options = argparse.ArgumentParser() options.identify_dir = os.path.join(tempfile.gettempdir(), 'non-existent-dir') self.assertRaises(BioLibDirNotFound, self.options_parser.align, options) def test_infer__msa_raises_io_exception(self): """ Test that the infer method raises an exception on invalid MSA """ options = argparse.ArgumentParser() options.msa_file = os.path.join(tempfile.gettempdir(), 'non-existent-msa.txt') self.assertRaises(BioLibFileNotFound, self.options_parser.infer, options) def test_run_test(self): """Test that the user-test method runs correctly""" options = argparse.ArgumentParser() options.out_dir = self.dir_tmp options.cpus = 3 self.assertTrue(self.options_parser.run_test(options)) # def test_run_test__throws_exception(self): # """Test that the user-test method fails correctly""" # options = argparse.ArgumentParser() # options.out_dir = self.dir_tmp # os.mkdir(os.path.join(self.dir_tmp, 'genomes')) # options.cpus = 3 # self.assertRaises(GTDBTkTestFailure, self.options_parser.run_test, options) def test_classify__align_dir_raises_io_exception(self): """ Test that the classify method raises an exception on invalid align dir """ options = argparse.ArgumentParser() options.align_dir = os.path.join(tempfile.gettempdir(), 'non-existent-dir') self.assertRaises(BioLibDirNotFound, self.options_parser.classify, options) def test_root__no_tree_raises_io_exception(self): """ Test that the infer method raises an exception on invalid tree """ options = argparse.ArgumentParser() options.input_tree = os.path.join(tempfile.gettempdir(), 'non-existent-tree.tree') self.assertRaises(BioLibFileNotFound, self.options_parser.root, options) def test_decorate__no_tree_raises_io_exception(self): """ Test that the infer method raises an exception on invalid tree """ options = argparse.ArgumentParser() options.input_tree = os.path.join(tempfile.gettempdir(), 'non-existent-tree.tree') self.assertRaises(BioLibFileNotFound, self.options_parser.decorate, options) def test_trim_msa__mask_file(self): """ Test that the expected result is returned when running trim_msa with mask_file """ path_untrimmed_msa = os.path.join(self.dir_tmp, 'untrimmed_msa.fasta') path_mask_file = os.path.join(self.dir_tmp, 'mask_file.txt') path_output = os.path.join(self.dir_tmp, 'trimmed_msa.fasta') with open(path_untrimmed_msa, 'w') as f: f.write('>genome_1\n') f.write('ALGPVW\n') f.write('>genome_2\n') f.write('WVPGLA\n') with open(path_mask_file, 'w') as f: f.write('010010\n') options = argparse.ArgumentParser() # Required arguments options.untrimmed_msa = path_untrimmed_msa options.output = path_output # Mutex arguments options.mask_file = path_mask_file options.reference_mask = None self.options_parser.trim_msa(options) results = dict() with open(path_output, 'r') as f: re_hits = re.findall(r'>(.+)\n(.+)\n', f.read()) for gid, seq in re_hits: results[gid] = seq expected = {'genome_1': 'LV', 'genome_2': 'VL'} self.assertDictEqual(results, expected) def test_trim_msa__reference_mask_arc(self): """ Test that the expected result is returned when running trim_msa with archaeal reference_mask """ path_untrimmed_msa = os.path.join(self.dir_tmp, 'untrimmed_msa.fasta') path_output = os.path.join(self.dir_tmp, 'trimmed_msa.fasta') msa_str = str() while len(msa_str) < 32675: msa_str += 'ALGPVW' msa_str = msa_str[0:32675] with open(path_untrimmed_msa, 'w') as f: f.write('>genome_1\n') f.write('%s\n' % msa_str) f.write('>genome_2\n') f.write('%s\n' % msa_str[::-1]) options = argparse.ArgumentParser() # Required arguments options.untrimmed_msa = path_untrimmed_msa options.output = path_output # Mutex arguments options.mask_file = None options.reference_mask = 'arc' self.options_parser.trim_msa(options) results = dict() with open(path_output, 'r') as f: re_hits = re.findall(r'>(.+)\n(.+)\n', f.read()) for gid, seq in re_hits: results[gid] = hashlib.sha256(seq.encode('utf-8')).hexdigest() expected = { 'genome_1': '4975c04d640415de4c715552f6f6b460a8996226239440faa6539ac777622515', 'genome_2': '7b53881aecb13bbe54612962e22736db7ab83271ffe4685d63c16e962e3561d9' } self.assertDictEqual(results, expected) def test_trim_msa__reference_mask_bac(self): """ Test that the expected result is returned when running trim_msa with bacterial reference_mask """ path_untrimmed_msa = os.path.join(self.dir_tmp, 'untrimmed_msa.fasta') path_output = os.path.join(self.dir_tmp, 'trimmed_msa.fasta') msa_str = str() while len(msa_str) < 41155: msa_str += 'ALGPVW' msa_str = msa_str[0:41155] with open(path_untrimmed_msa, 'w') as f: f.write('>genome_1\n') f.write('%s\n' % msa_str) f.write('>genome_2\n') f.write('%s\n' % msa_str[::-1]) options = argparse.ArgumentParser() # Required arguments options.untrimmed_msa = path_untrimmed_msa options.output = path_output # Mutex arguments options.mask_file = None options.reference_mask = 'bac' self.options_parser.trim_msa(options) results = dict() with open(path_output, 'r') as f: re_hits = re.findall(r'>(.+)\n(.+)\n', f.read()) for gid, seq in re_hits: results[gid] = hashlib.sha256(seq.encode('utf-8')).hexdigest() expected = { 'genome_1': '32798bdc3245b2ac5ecd8a15ea2cfb21011b22b6021baa51066864b1c02d72b4', 'genome_2': '0b63d416c72e9641011f80fcf64fa41eb3f0e8e85dbaa4bd8feba12cf3b64c62' } self.assertDictEqual(results, expected) def test_export_msa__arc(self): """ Test that the untrimmed archaeal MSA is exported correctly """ path_out = os.path.join(self.dir_tmp, 'output.fasta') options = argparse.ArgumentParser() options.domain = 'arc' options.output = path_out self.options_parser.export_msa(options) with open(path_out, 'rb') as f: out_hash = hashlib.sha256(f.read()).hexdigest() self.assertEqual( out_hash, 'e84edf65511002b73f110ff44c9acee3ae44220448dfc971a2778d43c966bbba') def test_export_msa__bac(self): """ Test that the untrimmed bacterial MSA is exported correctly """ path_out = os.path.join(self.dir_tmp, 'output.fasta') options = argparse.ArgumentParser() options.domain = 'bac' options.output = path_out self.options_parser.export_msa(options) with open(path_out, 'rb') as f: out_hash = hashlib.sha256(f.read()).hexdigest() self.assertEqual( out_hash, '5e37bc123819061490681068b49450fc43587d09b87df90ef62452bd73f961cc')
class TestOptionsParser(unittest.TestCase): def setUp(self): self.options_parser = OptionsParser('-1') self.dir_tmp = tempfile.mkdtemp(prefix='gtdbtk_tmp_') pass def tearDown(self): shutil.rmtree(self.dir_tmp) def test__verify_genome_id__valid(self): """ Test that a valid genome id returns True. """ self.assertTrue(self.options_parser._verify_genome_id('genome_1')) def test__verify_genome_id__invalid(self): """ Test that invalid genome ids throw an exception. """ for c in list('()[],;='): self.assertRaises(GenomeNameInvalid, self.options_parser._verify_genome_id, 'genome%s1' % c) def test__genomes_to_process__genome_dir__valid(self): """ Test that the expected results are returned when using genome_dir. """ open(os.path.join(self.dir_tmp, 'genome_1.fna'), 'a').close() open(os.path.join(self.dir_tmp, 'genome_2.fna'), 'a').close() open(os.path.join(self.dir_tmp, 'other_file.txt'), 'a').close() results, tln_table = self.options_parser._genomes_to_process( self.dir_tmp, '', 'fna') expected = { 'genome_1': os.path.join(self.dir_tmp, 'genome_1.fna'), 'genome_2': os.path.join(self.dir_tmp, 'genome_2.fna') } self.assertDictEqual(results, expected) def test__genomes_to_process__batchfile__valid(self): """ Test that the expected results are returned when using batchfile """ path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write(f'{path_genome_1}\tgenome_1\n') f.write('\n') f.write(f'{path_genome_2}\tgenome_2\t4\n') results, tln_table = self.options_parser._genomes_to_process( '', path_batchfile, 'fna') expected = {'genome_1': path_genome_1, 'genome_2': path_genome_2} expected_tln = {'genome_2': 4} self.assertDictEqual(results, expected) self.assertDictEqual(tln_table, expected_tln) def test__genomes_to_process__batchfile__invalid_columns(self): """ Test that a batchfile containing columns not equal to 2 throws an exception. """ path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_2\tfoo\n' % path_genome_2) self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') def test__genomes_to_process__batchfile__blank_genome_path(self): """ Test that a batchfile containing a blank genome path throws an exception. """ path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_2\n' % '') self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') def test__genomes_to_process__batchfile__blank_genome_id(self): """ Test that a batchfile containing a blank genome id throws an exception. """ path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\t\n' % path_genome_2) self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') def test__genomes_to_process__batchfile__duplicate_genome_id(self): """ Test that a batchfile containing duplicate genome ids throws an exception. """ # Branch 1: The number of columns are not equal to 2. path_batchfile = os.path.join(self.dir_tmp, 'batchfile.txt') path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') open(path_genome_1, 'a').close() open(path_genome_2, 'a').close() with open(path_batchfile, 'a') as f: f.write('%s\tgenome_1\n' % path_genome_1) f.write('\n') f.write('%s\tgenome_1\n' % path_genome_2) self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') # def test__genomes_to_process__batchfile__invalid_genome_id(self): # """ Test that a batchfile containing duplicate genome ids throws an exception. """ # # Branch 1: The number of columns are not equal to 2. # path_batchfile_1 = os.path.join(self.dir_tmp, 'batchfile_1.txt') # path_batchfile_2 = os.path.join(self.dir_tmp, 'batchfile_2.txt') # path_batchfile_3 = os.path.join(self.dir_tmp, 'batchfile_3.txt') # path_genome_1 = os.path.join(self.dir_tmp, 'genome_1.fna') # path_genome_2 = os.path.join(self.dir_tmp, 'genome_2.fna') # open(path_genome_1, 'a').close() # open(path_genome_2, 'a').close() # # with open(path_batchfile_1, 'a') as f: # f.write('%s\tgenome_1\n' % path_genome_1) # f.write('\n') # f.write('%s\tGB_genome_2\n' % path_genome_2) # # with open(path_batchfile_2, 'a') as f: # f.write('%s\tgenome_1\n' % path_genome_1) # f.write('\n') # f.write('%s\tRS_genome_2\n' % path_genome_2) # # with open(path_batchfile_3, 'a') as f: # f.write('%s\tgenome_1\n' % path_genome_1) # f.write('\n') # f.write('%s\tUBAgenome_2\n' % path_genome_2) # # self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_1, 'fna') # self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_2, 'fna') # self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile_3, 'fna') def test__genomes_to_process__no_files(self): """ Test that an exception is thrown if no files are found to process """ # Branch 1 : genome_dir is specified tmp_genome_dir = tempfile.mkdtemp() try: self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, tmp_genome_dir, '', 'fna') finally: shutil.rmtree(tmp_genome_dir) # Branch 2: batchfile is specified tmp_genome_dir = tempfile.mkdtemp() try: path_batchfile = os.path.join(tmp_genome_dir, 'batchfile.txt') open(path_batchfile, 'a').close() self.assertRaises(GTDBTkExit, self.options_parser._genomes_to_process, '', path_batchfile, 'fna') finally: shutil.rmtree(tmp_genome_dir) def test_identify__genome_dir_raises_io_exception(self): """ Test that the identify method raises an exception on invalid genome_dir """ options = argparse.ArgumentParser() options.genome_dir = os.path.join(tempfile.gettempdir(), 'non-existent-dir') self.assertRaises(BioLibDirNotFound, self.options_parser.identify, options) def test_identify__batchfile_raises_io_exception(self): """ Test that the identify method raises an exception on invalid batchfile """ options = argparse.ArgumentParser() options.genome_dir = None options.batchfile = os.path.join(tempfile.gettempdir(), 'non-existent-file.txt') self.assertRaises(BioLibFileNotFound, self.options_parser.identify, options) def test_align__identify_dir_raises_io_exception(self): """ Test that the align method raises an exception on invalid identify dir """ options = argparse.ArgumentParser() options.identify_dir = os.path.join(tempfile.gettempdir(), 'non-existent-dir') self.assertRaises(BioLibDirNotFound, self.options_parser.align, options) def test_infer__msa_raises_io_exception(self): """ Test that the infer method raises an exception on invalid MSA """ options = argparse.ArgumentParser() options.msa_file = os.path.join(tempfile.gettempdir(), 'non-existent-msa.txt') self.assertRaises(BioLibFileNotFound, self.options_parser.infer, options) def test_run_test(self): """Test that the user-test method runs correctly""" options = argparse.ArgumentParser() options.out_dir = self.dir_tmp options.cpus = 3 self.assertTrue(self.options_parser.run_test(options)) # def test_run_test__throws_exception(self): # """Test that the user-test method fails correctly""" # options = argparse.ArgumentParser() # options.out_dir = self.dir_tmp # os.mkdir(os.path.join(self.dir_tmp, 'genomes')) # options.cpus = 3 # self.assertRaises(GTDBTkTestFailure, self.options_parser.run_test, options) def test_classify__align_dir_raises_io_exception(self): """ Test that the classify method raises an exception on invalid align dir """ options = argparse.ArgumentParser() options.align_dir = os.path.join(tempfile.gettempdir(), 'non-existent-dir') self.assertRaises(BioLibDirNotFound, self.options_parser.classify, options) def test_root__no_tree_raises_io_exception(self): """ Test that the infer method raises an exception on invalid tree """ options = argparse.ArgumentParser() options.input_tree = os.path.join(tempfile.gettempdir(), 'non-existent-tree.tree') self.assertRaises(BioLibFileNotFound, self.options_parser.root, options) def test_decorate__no_tree_raises_io_exception(self): """ Test that the infer method raises an exception on invalid tree """ options = argparse.ArgumentParser() options.input_tree = os.path.join(tempfile.gettempdir(), 'non-existent-tree.tree') self.assertRaises(BioLibFileNotFound, self.options_parser.decorate, options) def test_trim_msa__mask_file(self): """ Test that the expected result is returned when running trim_msa with mask_file """ path_untrimmed_msa = os.path.join(self.dir_tmp, 'untrimmed_msa.fasta') path_mask_file = os.path.join(self.dir_tmp, 'mask_file.txt') path_output = os.path.join(self.dir_tmp, 'trimmed_msa.fasta') with open(path_untrimmed_msa, 'w') as f: f.write('>genome_1\n') f.write('ALGPVW\n') f.write('>genome_2\n') f.write('WVPGLA\n') with open(path_mask_file, 'w') as f: f.write('010010\n') options = argparse.ArgumentParser() # Required arguments options.untrimmed_msa = path_untrimmed_msa options.output = path_output # Mutex arguments options.mask_file = path_mask_file options.reference_mask = None self.options_parser.trim_msa(options) results = dict() with open(path_output, 'r') as f: re_hits = re.findall(r'>(.+)\n(.+)\n', f.read()) for gid, seq in re_hits: results[gid] = seq expected = {'genome_1': 'LV', 'genome_2': 'VL'} self.assertDictEqual(results, expected) def test_trim_msa__reference_mask_arc(self): """ Test that the expected result is returned when running trim_msa with archaeal reference_mask """ path_untrimmed_msa = os.path.join(self.dir_tmp, 'untrimmed_msa.fasta') path_output = os.path.join(self.dir_tmp, 'trimmed_msa.fasta') shutil.copyfile(Config.CONCAT_AR122, path_untrimmed_msa) options = argparse.ArgumentParser() # Required arguments options.untrimmed_msa = path_untrimmed_msa options.output = path_output # Mutex arguments options.mask_file = None options.reference_mask = 'arc' self.options_parser.trim_msa(options) actual = sha256(path_output) expected = '1146351be59ae8d27668256c5b2c425a6f38c37c' self.assertEqual(actual, expected) def test_trim_msa__reference_mask_bac(self): """ Test that the expected result is returned when running trim_msa with bacterial reference_mask """ path_untrimmed_msa = os.path.join(self.dir_tmp, 'untrimmed_msa.fasta') path_output = os.path.join(self.dir_tmp, 'trimmed_msa.fasta') shutil.copyfile(Config.CONCAT_BAC120, path_untrimmed_msa) options = argparse.ArgumentParser() # Required arguments options.untrimmed_msa = path_untrimmed_msa options.output = path_output # Mutex arguments options.mask_file = None options.reference_mask = 'bac' self.options_parser.trim_msa(options) actual = sha256(path_output) expected = 'ae6e24e89540fed03b81436147f99bcd120d059a' self.assertEqual(actual, expected) def test_export_msa__arc(self): """ Test that the untrimmed archaeal MSA is exported correctly """ path_out = os.path.join(self.dir_tmp, 'output.fasta') options = argparse.ArgumentParser() options.domain = 'arc' options.output = path_out self.options_parser.export_msa(options) with open(path_out, 'rb') as f: out_hash = hashlib.sha256(f.read()).hexdigest() self.assertEqual( out_hash, '8706b42a3f4b2445273058e7e876f0d8332bd8dec95c0fc8bc024d76a5a5aade') def test_export_msa__bac(self): """ Test that the untrimmed bacterial MSA is exported correctly """ path_out = os.path.join(self.dir_tmp, 'output.fasta') options = argparse.ArgumentParser() options.domain = 'bac' options.output = path_out self.options_parser.export_msa(options) with open(path_out, 'rb') as f: out_hash = hashlib.sha256(f.read()).hexdigest() self.assertEqual( out_hash, '3c5dfa4dc5ef943459e6d0ed4da1e5a5858332c824739630beffb57fab303486')