class TestCoreGene(MacsyTest): def setUp(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') args.res_search_dir = tempfile.gettempdir() args.log_level = 30 self.cfg = Config(MacsyDefaults(), args) self.model_name = 'foo' self.model_location = ModelLocation(path=os.path.join(args.models_dir, self.model_name)) self.profile_factory = ProfileFactory(self.cfg) def tearDown(self): try: shutil.rmtree(self.cfg.working_dir()) except: pass def test_core_gene(self): model_fqn = "foo/bar" model = Model(model_fqn, 10) gene_name = 'toto' cg = CoreGene(self.model_location, gene_name, self.profile_factory) self.assertEqual(cg.name, gene_name) self.assertEqual(cg.model_family_name, model.family_name) self.assertEqual(cg.profile, self.profile_factory.get_profile(cg, self.model_location)) cg2 = CoreGene(self.model_location, gene_name, self.profile_factory) self.assertTrue(isinstance(hash(cg), int)) self.assertEqual(hash(cg), hash(cg2)) gene_name = 'totote' cg3 = CoreGene(self.model_location, gene_name, self.profile_factory) self.assertNotEqual(hash(cg), hash(cg3))
class TestReport(MacsyTest): def setUp(self): args = argparse.Namespace() args.db_type = 'gembase' args.models_dir = self.find_data('models') args.res_search_dir = tempfile.gettempdir() args.log_level = 30 args.out_dir = os.path.join(args.res_search_dir, 'test_macsyfinder_Report') if os.path.exists(args.out_dir): shutil.rmtree(args.out_dir) os.mkdir(args.out_dir) seq_db = self.find_data("base", "test_base.fa") shutil.copy(seq_db, args.out_dir) args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db)) self.cfg = Config(MacsyDefaults(), args) os.mkdir(os.path.join(self.cfg.out_dir(), self.cfg.hmmer_dir())) self.model_name = 'foo' self.model_location = ModelLocation( path=os.path.join(args.models_dir, self.model_name)) # we need to reset the ProfileFactory # because it's a like a singleton # so other tests are influenced by ProfileFactory and it's configuration # for instance search_genes get profile without hmmer_exe self.profile_factory = ProfileFactory(self.cfg) idx = Indexes(self.cfg) idx.build() def tearDown(self): try: shutil.rmtree(self.cfg.working_dir()) except Exception: pass
def test_working_dir(self): cfg = Config(self.defaults, self.parsed_args) self.assertEqual(cfg.out_dir(), cfg.working_dir())
class TestModelParser(MacsyTest): def setUp(self): defaults = MacsyDefaults() self.args = argparse.Namespace() self.args.sequence_db = self.find_data("base", "test_1.fasta") self.args.db_type = 'gembase' self.args.models_dir = self.find_data('models') self.args.res_search_dir = tempfile.gettempdir() self.cfg = Config(defaults, self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.profile_factory = ProfileFactory(self.cfg) self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) def tearDown(self): try: shutil.rmtree(self.cfg.working_dir()) except: pass def test_parse_with_exchangeable(self): model_name = 'model_1' model_family = 'foo' fqn = f"{model_family}/{model_name}" #def_2_parse = set() #def_2_parse.add(fqn) models_2_detect = [self.model_registry['foo'].get_definition(fqn)] self.parser.parse(models_2_detect) self.assertEqual(len(self.model_bank), 1) m1 = self.model_bank[fqn] self.assertEqual(m1.name, model_name) self.assertEqual(m1.fqn, fqn) self.assertEqual(m1.inter_gene_max_space, 20) self.assertEqual(m1.min_mandatory_genes_required, 2) self.assertEqual(m1.min_genes_required, 4) self.assertTrue(m1.multi_loci) self.assertEqual(len(m1.mandatory_genes), 2) mandatory_genes_name = sorted([g.name for g in m1.mandatory_genes]) theoric_list = sorted(["sctJ_FLG", "sctN_FLG"]) self.assertListEqual(mandatory_genes_name, theoric_list) self.assertEqual(len(m1.accessory_genes), 2) accessory_genes_name = sorted([g.name for g in m1.accessory_genes]) theoric_list = sorted(["flgB", "flgC"]) self.assertListEqual(accessory_genes_name, theoric_list) self.assertEqual(len(m1.neutral_genes), 2) neutral_genes_name = sorted([g.name for g in m1.neutral_genes]) theoric_list = sorted(["fliE", "tadZ"]) self.assertListEqual(neutral_genes_name, theoric_list) self.assertEqual(len(m1.forbidden_genes), 1) forbidden_genes_name = sorted([g.name for g in m1.forbidden_genes]) theoric_list = sorted(["sctC"]) self.assertListEqual(forbidden_genes_name, theoric_list) sctJ_FLG = m1.get_gene('sctJ_FLG') sctJ_FLG_exchangeables = sctJ_FLG.exchangeables self.assertEqual(len(sctJ_FLG_exchangeables), 2) self.assertEqual(sctJ_FLG_exchangeables[0].name, 'sctJ') self.assertEqual(sctJ_FLG_exchangeables[1].name, 'abc') self.assertTrue(isinstance(sctJ_FLG_exchangeables[0], Exchangeable)) self.assertTrue(isinstance(sctJ_FLG_exchangeables[0]._gene, CoreGene)) self.assertTrue( isinstance(sctJ_FLG_exchangeables[0].alternate_of(), ModelGene)) self.assertTrue(sctJ_FLG_exchangeables[0].loner) self.assertFalse(sctJ_FLG.is_exchangeable) sctJ = m1.get_gene('sctJ') self.assertTrue(sctJ.is_exchangeable) def test_exchangeable_inheritance(self): def_2_parse = set() def_2_parse.add('foo/model_1') models_2_detect = [ self.model_registry['foo'].get_definition('foo/model_1') ] self.parser.parse(models_2_detect) m1 = self.model_bank['foo/model_1'] sctJ = m1.get_gene('sctJ') self.assertTrue(sctJ.is_exchangeable) self.assertTrue(sctJ.loner) self.assertTrue(sctJ.multi_system) self.assertFalse(sctJ.multi_model) sctJ_FLG = m1.get_gene('sctJ_FLG') self.assertTrue(sctJ_FLG.multi_system) abc = m1.get_gene('abc') self.assertFalse(abc.multi_system) sctN = m1.get_gene('sctN') sctN_FLG = m1.get_gene('sctN_FLG') self.assertFalse(sctN_FLG.loner) self.assertTrue(sctN.loner) self.assertIsNone(sctN_FLG.inter_gene_max_space) self.assertEqual(sctN.inter_gene_max_space, 10) self.assertFalse(sctN_FLG.multi_model) self.assertFalse(sctN.multi_model) gspD = m1.get_gene('gspD') self.assertFalse(sctN_FLG.multi_system) self.assertTrue(gspD.multi_model) self.assertTrue(gspD.multi_system) def test_model_w_unkown_attr(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/model_w_unknown_attribute') ] with self.assertRaises(MacsypyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "unable to parse model definition 'foo/model_w_unknown_attribute' : " "The model definition model_w_unknown_attribute.xml has an unknow attribute 'multi-loci'. " "Please fix the definition.") def test_gene_w_unkown_attr(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/gene_w_unknown_attribute') ] with self.assertRaises(MacsypyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "unable to parse model definition 'foo/gene_w_unknown_attribute' : " "The model definition gene_w_unknown_attribute.xml has an unknown attribute 'multi-system' for a gene." " Please fix the definition.") def test_wo_presence(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/fail_wo_presence') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition 'foo/fail_wo_presence': gene 'sctN_FLG' without presence" ) def test_invalid_presence(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/fail_invalid_presence') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model 'fail_invalid_presence' definition: presence value must be either: " "'mandatory', 'accessory', 'neutral', 'forbidden' not foo_bar") def test_gene_no_name(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/gene_no_name') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition 'foo/gene_no_name': gene without name") def test_invalid_homolog(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/invalid_homolog') ] with self.assertRaises(MacsypyError) as context: self.parser.parse(model_2_detect) self.assertEqual(str(context.exception), "'foo/foo_bar': No such profile") def test_invalid_homolog_2(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/invalid_homolog_2') ] with self.assertRaises(SyntaxError) as ctx: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(ctx.exception), "Invalid model definition 'foo/invalid_homolog_2': gene without name" ) def test_bad_min_genes_required(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_genes_required') ] with self.assertRaises(ModelInconsistencyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), 'model \'bad_min_genes_required\' is not consistent: min_genes_required 16 must be lesser ' 'or equal than the number of "accessory" and "mandatory" components in the model: 6' ) def test_bad_min_genes_required_2(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_genes_required_2') ] with self.catch_log(): with self.assertRaisesRegex( SyntaxError, "Invalid model definition (.*): " "min_genes_required must be an integer: 16.5"): self.parser.parse(model_2_detect) def test_bad_min_mandatory_genes_required(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_mandatory_genes_required') ] with self.catch_log(): with self.assertRaises(ModelInconsistencyError) as context: self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), 'model \'bad_min_mandatory_genes_required\' is not consistent: min_genes_required 16 must ' 'be lesser or equal than the number of "accessory" and "mandatory" components in the model: 6' ) def test_bad_min_mandatory_genes_required_2(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_mandatory_genes_required_2') ] with self.assertRaises(ModelInconsistencyError) as context: with self.catch_log(): # error raised by System initialization # which occur before check_consistency # the last test : not(model.min_mandatory_genes_required <= model.min_genes_required) # seems to be useless self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "foo/bad_min_mandatory_genes_required_2: min_genes_required '6' must be greater or equal" " than min_mandatory_genes_required '8'") def test_bad_min_mandatory_genes_required_4(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_mandatory_genes_required_4') ] with self.assertRaisesRegex( SyntaxError, "Invalid model definition (.*): " "min_mandatory_genes_required must be an integer: 12.5"): with self.catch_log(): self.parser.parse(model_2_detect) def test_min_mandatory_genes_required_lesser_than_mandatory_genes(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_mandatory_genes_required_3') ] with self.assertRaises(ModelInconsistencyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "model 'bad_min_mandatory_genes_required_3' is not consistent:" " 'min_mandatory_genes_required': 6 must be lesser or equal than the number of 'mandatory' " "components in the model: 5") def test_only_one_accessory(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/only_one_accessory') ] with self.assertRaises(ModelInconsistencyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual(str(context.exception), f"model 'only_one_accessory' is not consistent: there is only one gene in your model. " \ f"So its status should be 'mandatory'.") def test_bad_max_nb_genes(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/bad_max_nb_genes') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) model_name, def_name = model_2_detect[0].split_fqn( model_2_detect[0].fqn) self.assertEqual( str(context.exception), "Invalid model definition ({0}.xml): max_nb_genes must be an integer: HOHOHO" .format( os.path.join(self.cfg.models_dir()[0], model_name, 'definitions', def_name))) def test_bad_inter_gene_max_space(self): fqn = 'foo/bad_inter_gene_max_space' model_family, model_name = fqn.split('/') model_2_detect = [self.model_registry['foo'].get_definition(fqn)] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition ({}): inter_gene_max_space must be an integer: 12.5" .format( os.path.join(self.cfg.models_dir()[0], model_family, 'definitions', model_name + ".xml"))) def test_no_inter_gene_max_space(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/no_inter_gene_max_space') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition ({}): inter_gene_max_space must be defined" .format( os.path.join(self.cfg.models_dir()[0], "foo/definitions/no_inter_gene_max_space.xml"))) def test_loner(self): model_fqn = 'foo/model_5' model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)] self.parser.parse(model_2_detect) m5 = self.model_bank[model_fqn] m5_flgC = m5.get_gene('flgC') self.assertFalse(m5_flgC.loner) m5_tadZ = m5.get_gene('tadZ') self.assertTrue(m5_tadZ.loner) model_fqn = 'foo/model_6' model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)] self.parser.parse(model_2_detect) m6 = self.model_bank[model_fqn] m6_flgC = m6.get_gene('flgC') self.assertFalse(m6_flgC.loner) m6_tadZ = m6.get_gene('tadZ') self.assertFalse(m6_tadZ.loner) def test_multi_system(self): model_fqn = 'foo/model_5' model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)] self.parser.parse(model_2_detect) m = self.model_bank[model_fqn] flgC = m.get_gene('flgC') self.assertFalse(flgC.multi_system) fliE = m.get_gene('fliE') self.assertTrue(fliE.multi_system) def test_multi_model(self): model_fqn = 'foo/model_5' model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)] self.parser.parse(model_2_detect) m = self.model_bank[model_fqn] flgC = m.get_gene('flgC') self.assertFalse(flgC.multi_model) abc = m.get_gene('abc') self.assertTrue(abc.multi_model) def test_gene_inter_gene_max_space(self): model_fqn = ['foo/model_5', 'foo/model_6'] models_2_detect = [ self.model_registry['foo'].get_definition(fqn) for fqn in model_fqn ] self.parser.parse(models_2_detect) m5 = self.model_bank['foo/model_5'] self.assertEqual(m5.name, 'model_5') self.assertEqual(m5.fqn, 'foo/model_5') self.assertEqual(m5.inter_gene_max_space, 20) m5_flgB = m5.get_gene('flgB') m5_flgC = m5.get_gene('flgC') self.assertIsNone(m5_flgB.inter_gene_max_space) self.assertEqual(m5_flgC.inter_gene_max_space, 2) m6 = self.model_bank['foo/model_6'] m6_flgC = m6.get_gene('flgC') self.assertEqual(m6_flgC.inter_gene_max_space, 12) def test_inter_gene_max_space_cfg(self): # test inter_gene_max_space is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' inter_gene_max_space_cfg = [[model_fqn, '222']] self.args.inter_gene_max_space = inter_gene_max_space_cfg self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.inter_gene_max_space, 222) def test_min_mandatory_genes_required_cfg(self): # test min_mandatory_genes_required is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' min_mandatory_genes_required = [[model_fqn, '3']] self.args.min_mandatory_genes_required = min_mandatory_genes_required self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.min_mandatory_genes_required, 3) def test_min_genes_required_cfg(self): # test min_genes_required is specified from configuration # so this value must overload the value read from xml def_2_parse = set() model_fqn = 'foo/model_5' def_2_parse.add(model_fqn) parsed = set() min_genes_required = [[model_fqn, '4']] self.args.min_genes_required = min_genes_required self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.min_genes_required, 4) def test_max_nb_genes_cfg(self): self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) # max_nb_genes is specified in xml # no user configuration on this self.cfg = Config(MacsyDefaults(), self.args) model_fqn = 'foo/model_6' # 4 genes in this model but xml specify 3 self.cfg = Config(MacsyDefaults(), self.args) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.max_nb_genes, 3) # max_nb_genes is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' # 4 genes in this model max_nb_genes = [[model_fqn, '6']] self.args.max_nb_genes = max_nb_genes self.cfg = Config(MacsyDefaults(), self.args) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.max_nb_genes, 6) def test_multi_loci_cfg(self): # test multi_loci is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' self.args.multi_loci = model_fqn self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertTrue(m.multi_loci) def test_bad_gene_inter_gene_max_space_2(self): model_fqn = 'foo/bad_inter_gene_max_space_2' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.assertRaises(SyntaxError) as ctx: with self.catch_log(): self.parser.parse(models_2_detect) self.assertEqual( str(ctx.exception), "Invalid model definition 'bad_inter_gene_max_space_2': " "inter_gene_max_space must be an integer: 2.5") def test_bad_exchangeable_inter_gene_max_space(self): fqn = 'foo/bad_exchangeable_inter_gene_max_space' model_2_detect = [self.model_registry['foo'].get_definition(fqn)] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition 'bad_exchangeable_inter_gene_max_space': " "inter_gene_max_space must be an integer: 1.5") def test_parse_model_old_syntax(self): # the attribute vers is not set model_fqn = 'foo/model_old_1' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, "unable to parse model definition 'foo/model_old_1' : " "The model definition model_old_1.xml is not versioned. Please update your model." ) # the root is system instead of mmodel model_fqn = 'foo/model_old_2' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, f"unable to parse model definition '{model_fqn}' : " "The model definition model_old_2.xml is obsolete. Please update your model." ) # there still system_ref attribute model_fqn = 'foo/model_old_3' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, f"unable to parse model definition '{model_fqn}' : " "The model definition model_old_3.xml is obsolete. Please update your model." ) # there still homologs tag model_fqn = 'foo/model_old_4' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, f"unable to parse model definition '{model_fqn}' : " "The model definition model_old_4.xml is obsolete. Please update your model." ) # there still analogs tag model_fqn = 'foo/model_old_5' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, f"unable to parse model definition '{model_fqn}' : " "The model definition model_old_5.xml is obsolete. Please update your model." )
class TestProfile(MacsyTest): def setUp(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') args.res_search_dir = tempfile.gettempdir() args.log_level = 50 self.cfg = Config(MacsyDefaults(), args) if os.path.exists(self.cfg.working_dir()): shutil.rmtree(self.cfg.working_dir()) os.makedirs(self.cfg.working_dir()) self.model_name = 'foo' self.model_location = ModelLocation( path=os.path.join(args.models_dir, self.model_name)) self.profile_factory = ProfileFactory(self.cfg) def tearDown(self): try: shutil.rmtree(self.cfg.working_dir()) except: pass def test_len(self): model = Model("foo/T2SS", 10) gene_name = 'abc' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile("abc") profile = Profile(gene, self.cfg, path) self.assertEqual(len(profile), 501) def test_ga_threshold(self): # No GA threshold model = Model("foo/T2SS", 10) gene_name = 'abc' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile(gene_name) profile = Profile(gene, self.cfg, path) self.assertFalse(profile.ga_threshold) # GA threshold line ends with ; gene_name = 'T5aSS_PF03797' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile(gene_name) profile = Profile(gene, self.cfg, path) self.assertTrue(profile.ga_threshold) # GA threshold line do NOT ends with ; gene_name = 'PF05930.13' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile(gene_name) profile = Profile(gene, self.cfg, path) self.assertTrue(profile.ga_threshold) # GA threshold invalid format string instead float gene_name = 'bad_GA' with self.catch_log(log_name='macsypy'): # When a CoreGene is created a Profile is automatically instanciated # So I mute the log to do not polute output c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile(gene_name) with self.catch_log(log_name='macsypy') as log: profile = Profile(gene, self.cfg, path) catch_msg = log.get_value().strip() self.assertFalse(profile.ga_threshold) self.assertEqual( catch_msg, "bad_GA GA score is not well formatted expected 2 floats got ''22.00'' ''23.00''.\n" "GA score will not used for gene 'bad_GA'.") # GA threshold invalid format only one score gene_name = 'bad_GA_2' with self.catch_log(log_name='macsypy'): # When a CoreGene is created a Profile is automatically instanciated # So I mute the log to do not polute output c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile(gene_name) with self.catch_log(log_name='macsypy') as log: profile = Profile(gene, self.cfg, path) catch_msg = log.get_value().strip() self.assertFalse(profile.ga_threshold) self.assertEqual( catch_msg, "bad_GA_2 GA score is not well formatted. expected: 'GA float float' got 'GA 22.00'.\n" "GA score will not used for gene 'bad_GA_2'.") def test_str(self): model = Model("foo/T2SS", 10) gene_name = 'abc' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile("abc") profile = Profile(gene, self.cfg, path) s = "{0} : {1}".format(gene.name, path) self.assertEqual(str(profile), s) @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH') def test_execute_hmm_with_GA(self): for db_type in ("gembase", "ordered_replicon", "unordered"): self.cfg._set_db_type(db_type) model = Model("foo/T2SS", 10) gene_name = 'T5aSS_PF03797' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) # case GA threshold in profile profile_path = self.model_location.get_profile("T5aSS_PF03797") profile = Profile(gene, self.cfg, profile_path) report = profile.execute() hmmer_raw_out = profile.hmm_raw_output with open(hmmer_raw_out, 'r') as hmmer_raw_out_file: first_l = hmmer_raw_out_file.readline() # a hmmsearch output file has been produced self.assertTrue( first_l.startswith( "# hmmsearch :: search profile(s) against a sequence database" )) for i in range(5): # skip 4 lines l = hmmer_raw_out_file.readline() # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}" self.assertTrue(l.find(profile_path) != -1) for i in range(3): # skip 2 lines l = hmmer_raw_out_file.readline() self.assertEqual( "# model-specific thresholding: GA cutoffs", l.strip()) # test if profile is executed only once per run report_bis = profile.execute() self.assertIs(report, report_bis) @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH') def test_execute_hmm_protected_path(self): # create a hmmdir with space in name self.cfg.hmmer_dir = lambda: 'hmmer results' # create sequence_db path with space in path seq_path = os.path.join(self.cfg.working_dir(), "test test1.fasta") shutil.copyfile(self.find_data("base", "test_1.fasta"), seq_path) self.cfg._set_sequence_db(seq_path) model = Model("foo/T2SS", 10) gene_name = 'T5aSS_PF03797' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) # case GA threshold in profile profile_path = self.model_location.get_profile("T5aSS_PF03797") profile = Profile(gene, self.cfg, profile_path) report = profile.execute() hmmer_raw_out = profile.hmm_raw_output with open(hmmer_raw_out, 'r') as hmmer_raw_out_file: first_l = hmmer_raw_out_file.readline() # a hmmsearch output file has been produced self.assertTrue( first_l.startswith( "# hmmsearch :: search profile(s) against a sequence database" )) for i in range(5): # skip 4 lines l = hmmer_raw_out_file.readline() # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}" self.assertTrue(l.find(profile_path) != -1) for i in range(3): # skip 2 lines l = hmmer_raw_out_file.readline() self.assertEqual("# model-specific thresholding: GA cutoffs", l.strip()) @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH') def test_execute_hmm_w_GA_n_nocutga(self): # case GA threshold in profile but --no-cut-ga is set args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') args.res_search_dir = tempfile.gettempdir() args.log_level = 0 args.e_value_search = 0.5 args.no_cut_ga = True cfg = Config(MacsyDefaults(), args) model = Model("foo/T2SS", 10) gene_name = 'T5aSS_PF03797' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) profile_path = self.model_location.get_profile("T5aSS_PF03797") profile = Profile(gene, cfg, profile_path) report = profile.execute() hmmer_raw_out = profile.hmm_raw_output with open(hmmer_raw_out, 'r') as hmmer_raw_out_file: for i in range(9): l = hmmer_raw_out_file.readline() self.assertEqual( "# sequence reporting threshold: E-value <= 0.5", l.strip()) @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH') def test_execute_hmm_wo_GA(self): # case cut-ga but no GA threshold in hmmprofile model = Model("foo/T2SS", 10) gene_name = 'abc' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) # case -cut-ga and GA threshold in profile profile_path = self.model_location.get_profile("abc") profile = Profile(gene, self.cfg, profile_path) with self.catch_log() as log: report = profile.execute() hmmer_raw_out = profile.hmm_raw_output with open(hmmer_raw_out, 'r') as hmmer_raw_out_file: first_l = hmmer_raw_out_file.readline() # a hmmsearch output file has been produced self.assertTrue( first_l.startswith( "# hmmsearch :: search profile(s) against a sequence database" )) for i in range(5): # skip 4 lines l = hmmer_raw_out_file.readline() # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}" self.assertTrue(l.find(profile_path) != -1) for i in range(3): # skip 2 lines l = hmmer_raw_out_file.readline() self.assertEqual( '# sequence reporting threshold: E-value <= 0.1', l.strip()) def test_execute_unknown_binary(self): self.cfg._options['hmmer'] = "Nimportnaoik" model = Model("foo/T2SS", 10) gene_name = 'abc' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile("abc", ) profile = Profile(gene, self.cfg, path) with self.catch_log(): with self.assertRaises(RuntimeError): profile.execute() def test_execute_hmmer_failed(self): fake_hmmer = os.path.join(tempfile.gettempdir(), 'hmmer_failed') with open(fake_hmmer, 'w') as hmmer: hmmer.write("""#! {} import sys sys.exit(127) """.format(sysconfig.sys.executable)) try: os.chmod(hmmer.name, 0o755) self.cfg._options['hmmer'] = hmmer.name model = Model("foo/T2SS", 10) gene_name = 'abc' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile("abc", ) profile = Profile(gene, self.cfg, path) with self.catch_log(): with self.assertRaisesRegex( RuntimeError, "an error occurred during Hmmer " "execution: command = .* : return code = 127 .*" ) as ctx: profile.execute() finally: try: os.unlink(fake_hmmer) except Exception: pass
class TestModelGene(MacsyTest): def setUp(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') args.res_search_dir = tempfile.gettempdir() args.log_level = 30 self.cfg = Config(MacsyDefaults(), args) self.model_name = 'foo' self.model_location = ModelLocation( path=os.path.join(args.models_dir, self.model_name)) self.profile_factory = ProfileFactory(self.cfg) def tearDown(self): try: shutil.rmtree(self.cfg.working_dir()) except: pass def test_init(self): model_foo = Model("foo", 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene_1 = ModelGene(c_gene, model_foo) with self.assertRaises(MacsypyError) as ctx: ModelGene(gene_1, model_foo) self.assertEqual( str(ctx.exception), "The ModeleGene gene argument must be a CoreGene not <class 'macsypy.gene.ModelGene'>." ) def test_hash(self): model_foo = Model("foo", 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene_1 = ModelGene(c_gene, model_foo) gene_2 = ModelGene(c_gene, model_foo) self.assertTrue(isinstance(hash(gene_1), int)) self.assertEqual(hash(gene_1), hash(gene_1)) self.assertNotEqual(hash(gene_1), hash(gene_2)) def test_unknown_attribute(self): model_foo = Model("foo", 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model_foo) with self.assertRaises(AttributeError) as ctx: gene.foo self.assertEqual(str(ctx.exception), "'ModelGene' object has no attribute 'foo'") def test_add_exchangeable(self): model_foo = Model("foo", 10) gene_name = 'sctJ' c_gene_ref = CoreGene(self.model_location, gene_name, self.profile_factory) gene_ref = ModelGene(c_gene_ref, model_foo) h_gene_name = 'sctJ_FLG' h_c_gene = CoreGene(self.model_location, h_gene_name, self.profile_factory) homolog = Exchangeable(h_c_gene, gene_ref) gene_ref.add_exchangeable(homolog) self.assertEqual(len(gene_ref.exchangeables), 1) self.assertEqual(gene_ref.exchangeables[0], homolog) def test_exhangeables(self): model_foo = Model("foo", 10) gene_name = 'sctN' c_sctn = CoreGene(self.model_location, gene_name, self.profile_factory) sctn = ModelGene(c_sctn, model_foo) gene_name = 'sctJ_FLG' c_sctJ_FLG = CoreGene(self.model_location, gene_name, self.profile_factory) gene_name = 'sctJ' c_sctJ = CoreGene(self.model_location, gene_name, self.profile_factory) homolog_1 = Exchangeable(c_sctJ, sctn) sctn.add_exchangeable(homolog_1) homolog_2 = Exchangeable(c_sctJ_FLG, sctn) sctn.add_exchangeable(homolog_2) self.assertEqual(sctn.exchangeables, [homolog_1, homolog_2]) def test_is_exchangeable(self): model_foo = Model("foo", 10) gene_name = 'sctN' c_sctn = CoreGene(self.model_location, gene_name, self.profile_factory) sctn = ModelGene(c_sctn, model_foo) gene_name = 'sctJ_FLG' c_sctj_flg = CoreGene(self.model_location, gene_name, self.profile_factory) sctj_flg = ModelGene(c_sctj_flg, model_foo) gene_name = 'sctJ' c_sctj = CoreGene(self.model_location, gene_name, self.profile_factory) sctj = ModelGene(c_sctj, model_foo) homolog = Exchangeable(c_sctj_flg, sctj) sctj.add_exchangeable(homolog) self.assertFalse(sctj_flg.is_exchangeable) self.assertFalse(sctj.is_exchangeable) self.assertTrue(homolog.is_exchangeable) self.assertFalse(sctn.is_exchangeable) def test_alternate_of(self): model_foo = Model("foo", 10) gene_name = 'sctJ' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctj = ModelGene(c_gene, model_foo) gene_name = 'sctJ_FLG' c_sctj_flg = CoreGene(self.model_location, gene_name, self.profile_factory) analog = Exchangeable(c_sctj_flg, sctj) sctj.add_exchangeable(analog) self.assertEqual(sctj.alternate_of(), sctj) def test_model(self): """ test getter/setter for model property """ model_foo = Model("foo", 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ_FLG = ModelGene(c_gene, model_foo) self.assertEqual(sctJ_FLG.model, model_foo) def test_loner(self): """ test getter for loner property """ model_foo = Model("foo", 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ_FLG = ModelGene(c_gene, model_foo) self.assertFalse(sctJ_FLG.loner) gene_name = 'sctJ' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ = ModelGene(c_gene, model_foo, loner=True) self.assertTrue(sctJ.loner) def test_is_mandatory(self): """ test if gene belong to model mandatory genes """ model_foo = Model("foo", 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ_FLG = ModelGene(c_gene, model_foo) model_foo.add_mandatory_gene(sctJ_FLG) self.assertTrue(sctJ_FLG.is_mandatory(model_foo)) gene_name = 'sctJ' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ = ModelGene(c_gene, model_foo) model_foo.add_accessory_gene(sctJ) self.assertFalse(sctJ.is_mandatory(model_foo)) def test_is_accessory(self): """ test if gene belong to model mandatory genes """ model_foo = Model("foo", 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ_FLG = ModelGene(c_gene, model_foo) model_foo.add_mandatory_gene(sctJ_FLG) self.assertFalse(sctJ_FLG.is_accessory(model_foo)) gene_name = 'sctJ' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ = ModelGene(c_gene, model_foo) model_foo.add_accessory_gene(sctJ) self.assertTrue(sctJ.is_accessory(model_foo)) def test_is_Forbidden(self): """ test if gene belong to model mandatory genes """ model_foo = Model("foo", 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ_FLG = ModelGene(c_gene, model_foo) model_foo.add_mandatory_gene(sctJ_FLG) self.assertFalse(sctJ_FLG.is_forbidden(model_foo)) gene_name = 'sctJ' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ = ModelGene(c_gene, model_foo) model_foo.add_forbidden_gene(sctJ) self.assertTrue(sctJ.is_forbidden(model_foo)) def test_multi_system(self): """ test getter for multi_system property """ model_foo = Model("foo", 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ_FLG = ModelGene(c_gene, model_foo) self.assertFalse(sctJ_FLG.multi_system) gene_name = 'sctJ' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ = ModelGene(c_gene, model_foo, multi_system=True) self.assertTrue(sctJ.multi_system) def test_inter_gene_max_space(self): """ test getter for inter_gene_max_space property """ system_inter_gene_max_space = 40 gene_inter_gene_max_space = 50 model_foo = Model("foo", system_inter_gene_max_space) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ_FLG = ModelGene(c_gene, model_foo) self.assertEqual(sctJ_FLG.inter_gene_max_space, system_inter_gene_max_space) gene_name = 'sctJ' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ = ModelGene(c_gene, model_foo, inter_gene_max_space=gene_inter_gene_max_space) self.assertEqual(sctJ.inter_gene_max_space, gene_inter_gene_max_space) def test_str(self): """ """ model_foo = Model("foo", 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ_FLG = ModelGene(c_gene, model_foo) gene_name = 'sctJ' c_sctJ = CoreGene(self.model_location, gene_name, self.profile_factory) homolog = Exchangeable(c_sctJ, sctJ_FLG) sctJ_FLG.add_exchangeable(homolog) gene_name = 'sctN' c_sctN = CoreGene(self.model_location, gene_name, self.profile_factory) analog = Exchangeable(c_sctN, sctJ_FLG) sctJ_FLG.add_exchangeable(analog) s = """name : sctJ_FLG inter_gene_max_space: 10 exchangeables: sctJ, sctN""" self.assertEqual(str(sctJ_FLG), s) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ_FLG = ModelGene(c_gene, model_foo, loner=True, multi_system=True, inter_gene_max_space=10) s = """name : sctJ_FLG inter_gene_max_space: 10 loner multi_system""" self.assertEqual(str(sctJ_FLG), s)
class Test(MacsyTest): def setUp(self): args = argparse.Namespace() args.db_type = 'gembase' args.e_value_res = 1 args.i_evalue_sel = 0.5 args.models_dir = self.find_data('models') args.res_search_suffix = '' args.log_level = 30 args.out_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes') if os.path.exists(args.out_dir): shutil.rmtree(os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes')) os.makedirs(args.out_dir) seq_db = self.find_data("base", "test_1.fasta") shutil.copy(seq_db, args.out_dir) args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db)) self.cfg = Config(MacsyDefaults(), args) def tearDown(self): try: shutil.rmtree(self.cfg.working_dir()) except: pass def test_find_my_indexes(self): idx = Indexes(self.cfg) self.assertIsNone(idx.find_my_indexes()) new_idx = os.path.join(os.path.dirname(self.cfg.sequence_db()), idx.name + ".idx") with open(new_idx, 'w'): pass self.assertEqual(idx.find_my_indexes(), new_idx) def test_build_no_idx(self): idx = Indexes(self.cfg) idx.build() my_idx = idx.find_my_indexes() self.assertEqual(my_idx, os.path.join(os.path.dirname(self.cfg.sequence_db()), idx.name + ".idx")) def test_build_with_idx(self): idx = Indexes(self.cfg) open(os.path.join(os.path.dirname(self.cfg.sequence_db()), idx.name + ".idx"), 'w').close() idx.build() my_idx = idx.find_my_indexes() self.assertEqual(os.path.getsize(my_idx), 0) def test_build_force(self): idx = Indexes(self.cfg) idx.build(force=True) my_idx = idx.find_my_indexes() self.assertNotEqual(os.path.getsize(my_idx), 0) @unittest.skipIf(platform.system() == 'Windows' or os.getuid() == 0, 'Skip test on Windows or if run as root') def test_build_not_writable(self): # Skip test on Windows, since setting the folder permissions is not affecting files inside # in Singularity container tess are run as root and this test as non sense idx = Indexes(self.cfg) idx_dir = os.path.join(os.path.dirname(self.cfg.sequence_db())) os.chmod(idx_dir, 0000) try: with self.assertRaises(IOError) as ctx: with self.catch_log(): idx.build() self.assertRegex(str(ctx.exception), "cannot build indexes, \(.+/test_macsyfinder_indexes\) is not writable") finally: os.chmod(idx_dir, 0o777) def test_build_my_indexes(self): args = argparse.Namespace() args.db_type = 'gembase' args.e_value_res = 1 args.i_evalue_sel = 0.5 args.models_dir = self.find_data('models') args.res_search_suffix = '' args.log_level = 30 args.out_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes') if os.path.exists(args.out_dir): shutil.rmtree(os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes')) os.makedirs(args.out_dir) seq_db = self.find_data("base", "test_base_with_errors.fa") shutil.copy(seq_db, args.out_dir) args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db)) cfg = Config(MacsyDefaults(), args) idx = Indexes(cfg) with self.assertRaises(MacsypyError) as e: with self.catch_log(): idx._build_my_indexes() self.assertTrue(str(e.exception).startswith("unable to index the sequence dataset:"))
class TestModel(MacsyTest): def setUp(self): self.args = argparse.Namespace() self.args.sequence_db = self.find_data("base", "test_1.fasta") self.args.db_type = 'gembase' self.args.models_dir = self.find_data('models') self.args.res_search_dir = tempfile.gettempdir() self.args.log_level = 30 self.args.out_dir = os.path.join(self.args.res_search_dir, 'test_macsyfinder_Model') if os.path.exists(self.args.out_dir): shutil.rmtree(self.args.out_dir) os.mkdir(self.args.out_dir) self.cfg = Config(MacsyDefaults(), self.args) self.model_name = 'foo' self.model_location = ModelLocation(path=os.path.join(self.args.models_dir, self.model_name)) self.profile_factory = ProfileFactory(self.cfg) def tearDown(self): self.clean_working_dir() def clean_working_dir(self): try: shutil.rmtree(self.cfg.working_dir()) except: pass def test_fqn(self): fqn = 'foo/bla' model = Model(fqn, 10) self.assertEqual(model.fqn, fqn) self.assertEqual(model.name, 'bla') def test_inter_gene_max_space(self): model_fqn = 'foo/bar' inter_gene_max_space_xml = 40 # test inter_gene_max_space from xml model = Model(model_fqn, inter_gene_max_space_xml) self.assertEqual(model.inter_gene_max_space, inter_gene_max_space_xml) self.clean_working_dir() def test_min_genes_required(self): model_fqn = 'foo/model_1' min_genes_required_xml = 40 model = Model(model_fqn, 10, min_genes_required=min_genes_required_xml) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) model.add_mandatory_gene(gene) self.assertEqual(model.min_genes_required, min_genes_required_xml) model = Model(model_fqn, 10) self.assertEqual(model.min_genes_required, len(model.mandatory_genes)) self.clean_working_dir() def test_min_mandatory_genes_required(self): model_fqn = 'foo/bar' min_mandatory_genes_required_xml = 40 model = Model(model_fqn, 10, min_mandatory_genes_required=min_mandatory_genes_required_xml) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) model.add_mandatory_gene(gene) self.assertEqual(model.min_mandatory_genes_required, min_mandatory_genes_required_xml) system = Model(model_fqn, 10) self.assertEqual(system.min_mandatory_genes_required, len(system.mandatory_genes)) self.clean_working_dir() def test_max_nb_genes(self): model_fqn = 'foo/bar' inter_gene_max_space = 40 max_nb_genes_xml = 10 model = Model(model_fqn, inter_gene_max_space, max_nb_genes=max_nb_genes_xml) self.assertEqual(model.max_nb_genes, max_nb_genes_xml) model = Model(model_fqn, inter_gene_max_space) self.assertEqual(model.max_nb_genes, 0) c_gene_sctc = CoreGene(self.model_location, "sctC", self.profile_factory) gene_sctc = ModelGene(c_gene_sctc, model) c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, model) model.add_mandatory_gene(gene_sctc) model.add_accessory_gene(gene_abc) self.assertEqual(model.max_nb_genes, 2) self.clean_working_dir() def test_multi_loci(self): model_fqn = 'foo/True' inter_gene_max_space = 40 model = Model(model_fqn, inter_gene_max_space, multi_loci=True) self.assertTrue(model.multi_loci) model_fqn = 'foo/False' inter_gene_max_space = 40 model = Model(model_fqn, inter_gene_max_space) self.assertFalse(model.multi_loci) self.clean_working_dir() self.args.multi_loci = 'foo/False' model_fqn = 'foo/False' inter_gene_max_space = 40 model = Model(model_fqn, inter_gene_max_space, multi_loci=False) self.assertFalse(model.multi_loci) def test_accessor_mutator(self): model = Model("foo", 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) categories = set(model.gene_category) for cat in categories: other_cat = categories - {cat} getattr(model, f'add_{cat}_gene')(gene) self.assertEqual(getattr(model, f'{cat}_genes'), [gene]) for other in other_cat: self.assertEqual(getattr(model, f'{other}_genes'), []) # don't forget to reset the model to avoid # to accumulate genes model = Model("foo", 10) def test_get_gene(self): model = Model("foo", 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) for meth in [getattr(model, f'add_{cat}_gene') for cat in model.gene_category]: for cat in model.gene_category: setattr(model, f'_{cat}_genes', []) meth(gene) self.assertEqual(gene, model.get_gene(gene_name)) self.assertRaises(KeyError, model.get_gene, 'bar') homolog_name = 'sctJ' c_gene_homolog = CoreGene(self.model_location, homolog_name, self.profile_factory) homolog = Exchangeable(c_gene_homolog, gene) gene.add_exchangeable(homolog) for meth in [getattr(model, f'add_{cat}_gene') for cat in model.gene_category]: for cat in model.gene_category: setattr(model, f'_{cat}_genes', []) meth(gene) self.assertEqual(homolog, model.get_gene(homolog_name)) def test_str(self): model_fqn = "foo/bar" model = Model(model_fqn, 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) mandatory_gene = ModelGene(c_gene, model) model.add_mandatory_gene(mandatory_gene) homolog_name = 'sctJ' c_gene_homolg = CoreGene(self.model_location, homolog_name, self.profile_factory) homolog = Exchangeable(c_gene_homolg, mandatory_gene) mandatory_gene.add_exchangeable(homolog) gene_name = 'sctN_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) accessory_gene = ModelGene(c_gene, model) model.add_accessory_gene(accessory_gene) analog_name = 'sctN' c_gene_analog = CoreGene(self.model_location, analog_name, self.profile_factory) analog = Exchangeable(c_gene_analog, accessory_gene) accessory_gene.add_exchangeable(analog) gene_name = 'toto' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) neutral_gene = ModelGene(c_gene, model) model.add_neutral_gene(neutral_gene) gene_name = 'sctC' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) forbidden_gene = ModelGene(c_gene, model) model.add_forbidden_gene(forbidden_gene) exp_str = """name: bar fqn: foo/bar ==== mandatory genes ==== sctJ_FLG ==== accessory genes ==== sctN_FLG ==== neutral genes ==== toto ==== forbidden genes ==== sctC ============== end pprint model ================ """ self.assertEqual(str(model), exp_str) def test_eq(self): aa1 = Model("aaa", 10) aa2 = Model("aaa", 10) self.assertEqual(aa1, aa2) def test_lt(self): aaa = Model("aaa", 10) zzz = Model("zzz", 10) self.assertLess(aaa, zzz) def test_gt(self): aaa = Model("aaa", 10) zzz = Model("zzz", 10) self.assertGreater(zzz, aaa) def test_filter(self): model_fqn = "foo/bar" model = Model(model_fqn, 10) model_2 = Model("foo/buz", 10) gene_name = 'sctJ_FLG' sctJ_FLG_core = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ_FLG = ModelGene(sctJ_FLG_core, model) model.add_mandatory_gene(sctJ_FLG) gene_name = 'sctJ' sctJ_core = CoreGene(self.model_location, gene_name, self.profile_factory) sctj = Exchangeable(sctJ_core, sctJ_FLG) sctJ_FLG.add_exchangeable(sctj) gene_name = 'sctN_FLG' sctN_FLG_core = CoreGene(self.model_location, gene_name, self.profile_factory) sctN_FLG = ModelGene(sctN_FLG_core, model) model.add_accessory_gene(sctN_FLG) gene_name = 'sctN' sctN_core = CoreGene(self.model_location, gene_name, self.profile_factory) sctn = Exchangeable(sctN_core, sctN_FLG) sctN_FLG.add_exchangeable(sctn) gene_name = 'sctC' sctC_core = CoreGene(self.model_location, gene_name, self.profile_factory) sctC = ModelGene(sctC_core, model) model.add_forbidden_gene(sctC) gene_name = 'toto' toto_core = CoreGene(self.model_location, gene_name, self.profile_factory) toto = ModelGene(toto_core, model) model.add_neutral_gene(toto) gene_name = 'totote' totote_core = CoreGene(self.model_location, gene_name, self.profile_factory) totote = Exchangeable(totote_core, toto) toto.add_exchangeable(totote) gene_name = 'gspD' gspd_core = CoreGene(self.model_location, gene_name, self.profile_factory) gspd = ModelGene(gspd_core, model_2) gene_name = 'tadZ' tadz_core = CoreGene(self.model_location, gene_name, self.profile_factory) tadz = Exchangeable(tadz_core, gspd) gspd.add_exchangeable(tadz) hit_to_keep = [] for gene in (sctJ_FLG, sctN_FLG, sctC, toto, totote): hit_to_keep.append(CoreHit(gene, f"PSAE001c01_{gene.name}", 1, "PSAE001c01", 1, 1.0, 1.0, 1.0, 1.0, 1, 2) ) hit_to_filter_out = [] for gene in (gspd, tadz): hit_to_filter_out.append(CoreHit(gene, f"PSAE001c01_{gene.name}", 1, "PSAE001c01", 1, 1.0, 1.0, 1.0, 1.0, 1, 2) ) filtered_hits = model.filter(hit_to_keep + hit_to_filter_out) self.assertListEqual(sorted(hit_to_keep), sorted(filtered_hits)) def test_hash(self): model_bar = Model('Foo/bar', 10) model_bar_bis = Model('Foo/bar', 10) model_buz = Model('Foo/buz', 10) self.assertTrue(isinstance(hash(model_bar), int)) self.assertEqual(hash(model_bar), hash(model_bar_bis)) self.assertNotEqual(hash(model_bar), hash(model_buz))
def main(args=None, loglevel=None): """ main entry point to MacSyFinder do some check before to launch :func:`main_search_systems` which is the real function that perform a search :param args: the arguments passed on the command line without the program name :type args: List of string :param loglevel: the output verbosity :type loglevel: a positive int or a string among 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL' """ args = sys.argv[1:] if args is None else args parser, parsed_args = parse_args(args) defaults = MacsyDefaults() config = Config(defaults, parsed_args) ########################### # creation of working dir ########################### working_dir = config.working_dir() if not os.path.exists(working_dir): os.makedirs(working_dir) else: if os.path.isdir(working_dir): if os.listdir(working_dir): raise ValueError( f"'{working_dir}' already exists and is not a empty") else: raise ValueError( f"'{working_dir}' already exists and is not a directory") ################ # init loggers # ################ macsypy.init_logger(log_file=os.path.join(config.working_dir(), config.log_file()), out=not config.mute()) if not loglevel: # logs are specify from args options macsypy.logger_set_level(level=config.log_level()) else: # used by unit tests to mute or unmute logs macsypy.logger_set_level(level=loglevel) logger = logging.getLogger('macsypy.macsyfinder') if parsed_args.list_models: print(list_models(parsed_args), file=sys.stdout) sys.exit(0) else: if not parsed_args.previous_run and not parsed_args.models: parser.print_help() print() sys.tracebacklimit = 0 raise OptionError( "argument --models or --previous-run is required.") elif not parsed_args.previous_run and not parsed_args.sequence_db: parser.print_help() print() sys.tracebacklimit = 0 raise OptionError( "argument --sequence-db or --previous-run is required.") elif not parsed_args.previous_run and not parsed_args.db_type: parser.print_help() print() sys.tracebacklimit = 0 raise OptionError( "argument --db-type or --previous-run is required.") _log.info(f"command used: {' '.join(sys.argv)}") models = ModelBank() genes = GeneBank() profile_factory = ProfileFactory(config) macsypy.hit.hit_weight = macsypy.hit.HitWeight(itself=3, exchangeable=.75, mandatory=2, accessory=.25, neutral=1.5) logger.info("\n{:#^70}".format(" Searching systems ")) all_systems, rejected_clusters = search_systems( config, models, genes, profile_factory, logger) track_multi_systems_hit = HitSystemTracker(all_systems) if config.db_type() in ('gembase', 'ordered_replicon'): ############################# # Ordered/Gembase replicons # ############################# ########################### # select the best systems # ########################### logger.info("\n{:#^70}".format(" Computing best solutions ")) best_solutions = [] one_best_solution = [] # group systems found by replicon # before to search best system combination import time for rep_name, syst_group in itertools.groupby( all_systems, key=lambda s: s.replicon_name): syst_group = list(syst_group) logger.info( f"Computing best solutions for {rep_name} (nb of systems {len(syst_group)})" ) t0 = time.time() best_sol_4_1_replicon, score = find_best_solutions(syst_group) t1 = time.time() logger.info( f"It took {t1 - t0:.2f}sec to find best solution ({score:.2f}) for replicon {rep_name}" ) # if several solutions are equivalent same number of system and score is same # store all equivalent solution in best_solution => all_best_systems # pick one in one_best_solution => best_systems best_solutions.extend(best_sol_4_1_replicon) one_best_solution.append(best_sol_4_1_replicon[0]) ############################## # Write the results in files # ############################## logger.info("\n{:#^70}".format(" Writing down results ")) system_filename = os.path.join(config.working_dir(), "all_systems.txt") tsv_filename = os.path.join(config.working_dir(), "all_systems.tsv") with open(system_filename, "w") as sys_file: systems_to_txt(all_systems, track_multi_systems_hit, sys_file) with open(tsv_filename, "w") as tsv_file: systems_to_tsv(all_systems, track_multi_systems_hit, tsv_file) cluster_filename = os.path.join(config.working_dir(), "rejected_clusters.txt") with open(cluster_filename, "w") as clst_file: rejected_clusters.sort(key=lambda clst: ( clst.replicon_name, clst.model, clst.hits)) rejected_clst_to_txt(rejected_clusters, clst_file) if not (all_systems or rejected_clusters): logger.info("No Systems found in this dataset.") tsv_filename = os.path.join(config.working_dir(), "all_best_solutions.tsv") with open(tsv_filename, "w") as tsv_file: solutions_to_tsv(best_solutions, track_multi_systems_hit, tsv_file) tsv_filename = os.path.join(config.working_dir(), "best_solution.tsv") with open(tsv_filename, "w") as tsv_file: # flattern the list and sort it one_best_solution = [ syst for sol in one_best_solution for syst in sol ] one_best_solution.sort( key=lambda syst: (syst.replicon_name, syst.position[0], syst.model.fqn, -syst.score)) systems_to_tsv(one_best_solution, track_multi_systems_hit, tsv_file) else: ####################### # Unordered replicons # ####################### ############################## # Write the results in files # ############################## logger.info("\n{:#^70}".format(" Writing down results ")) system_filename = os.path.join(config.working_dir(), "all_systems.txt") with open(system_filename, "w") as sys_file: likely_systems_to_txt(all_systems, track_multi_systems_hit, sys_file) # forbidden = [s for s in all_systems if s.forbidden_occ] # system_filename = os.path.join(config.working_dir(), "forbidden_components.tsv") # with open(system_filename, "w") as sys_file: # likely_systems_to_tsv(forbidden, track_multi_systems_hit, sys_file) system_filename = os.path.join(config.working_dir(), "all_systems.tsv") with open(system_filename, "w") as sys_file: likely_systems_to_tsv(all_systems, track_multi_systems_hit, sys_file) cluster_filename = os.path.join(config.working_dir(), "uncomplete_systems.txt") with open(cluster_filename, "w") as clst_file: unlikely_systems_to_txt(rejected_clusters, clst_file) if not (all_systems or rejected_clusters): logger.info("No Systems found in this dataset.") logger.info("END")
class Test(MacsyTest): def __init__(self, methodName='runTest'): super(Test, self).__init__(methodName) def fake_init(obj, cfg): obj.cfg = cfg obj._idx = Indexes(cfg) obj.sequence_idx = obj._idx.find_my_indexes() obj.topology_file = cfg.topology_file() obj._DB = {} self.fake_init = fake_init self.real_init = RepliconDB.__init__ def setUp(self): self.args = argparse.Namespace() self.args.db_type = 'gembase' self.args.models_dir = self.find_data('models') self.args.res_search_dir = tempfile.gettempdir() self.args.log_level = 30 self.args.out_dir = os.path.join(self.args.res_search_dir, 'test_macsyfinder_repliconDB') if os.path.exists(self.args.out_dir): shutil.rmtree(self.args.out_dir) os.mkdir(self.args.out_dir) seq_db = self.find_data("base", "test_base.fa") shutil.copy(seq_db, self.args.out_dir) self.args.sequence_db = os.path.join(self.args.out_dir, os.path.basename(seq_db)) self.cfg = Config(MacsyDefaults(), self.args) self.ESCO030p01_genes = [('000010', 886), ('000020', 291), ('000030', 656), ('000040', 500), ('000050', 407), ('000060', 144), ('000070', 183), ('000080', 121), ('000090', 199), ('000100', 325), ('000110', 425), ('000120', 171), ('000130', 277), ('000140', 133), ('000150', 108), ('000160', 295), ('000170', 273), ('000180', 367), ('000190', 573), ('000200', 343), ('000210', 295), ('000220', 108), ('000230', 117), ('000240', 153), ('000250', 479), ('000260', 706), ('000270', 998), ('000280', 171), ('000290', 108), ('000300', 295), ('000310', 165), ('000320', 243), ('000330', 295), ('000340', 108), ('000350', 1755), ('000360', 248), ('000370', 286), ('000380', 186), ('000390', 83), ('000400', 153), ('000410', 69), ('000420', 295), ('000430', 108), ('000440', 145), ('000450', 59), ('000460', 124), ('000470', 246), ('000480', 325), ('000490', 54), ('000500', 95), ('000510', 83), ('000520', 56), ('000530', 401), ('000540', 320), ('000550', 256), ('000560', 73), ('000570', 144), ('000580', 258), ('000590', 133), ('000600', 140), ('000610', 63), ('000620', 138), ('000630', 68), ('000640', 169), ('000650', 127), ('000660', 295), ('000670', 108), ('000670', 108)] self.PSAE001c01_genes = [('006940', 803), ('013980', 759), ('017350', 600), ('018920', 776), ('026600', 273), ('031420', 658), ('043580', 416), ('051090', 714), ('055870', 449), ('055880', 447), ('055890', 588), ('055900', 292), ('055910', 262), ('055920', 166), ('055930', 288), ('055940', 194), ('055950', 567), ('055960', 188), ('055970', 247), ('055980', 252), ('055990', 455), ('056000', 450), ('056010', 260), ('056020', 246), ('056030', 70), ('056040', 133), ('056050', 284), ('056060', 585), ('056070', 435), ('056080', 342), ('056090', 252), ('056100', 122), ('056110', 213), ('056120', 400), ('056130', 134), ('056140', 138), ('056150', 397), ('056160', 298), ('056170', 186), ('056180', 445), ('056190', 414), ('056200', 132), ('056210', 674), ('056220', 319), ('056230', 394), ('056240', 207), ('056250', 401), ('056260', 611), ('056270', 257), ('056280', 169), ('056290', 454), ('056300', 141), ('056310', 458), ('056320', 286), ('056330', 514), ('056340', 178), ('056350', 156), ('056360', 85), ('056370', 289), ('056380', 126), ('056390', 290), ('056400', 262), ('056410', 214), ('056420', 630), ('056430', 127), ('056440', 455), ('056440', 455)] self.NCDB_genes = [('056134', 289), ('056135', 126), ('056136', 290), ('056137', 262), ('056138', 214), ('056139', 630), ('056140', 127), ('056141', 803), ('056141', 803)] self.idx = Indexes(self.cfg) self.idx.build() def tearDown(self): try: shutil.rmtree(self.cfg.working_dir()) except: pass RepliconDB.__init__ = self.real_init def test_fill_topology(self): self.args.topology_file = self.args.sequence_db + ".topo" db_send = {'ESCO030p01': 'circular', 'PSAE001c01': 'linear'} with open(self.args.topology_file, 'w') as f: for k, v in list(db_send.items()): f.write('{0} : {1}\n'.format(k, v)) cfg = Config(MacsyDefaults(), self.args) RepliconDB.__init__ = self.fake_init db = RepliconDB(cfg) rcv_topo = db._fill_topology() self.assertDictEqual(db_send, rcv_topo) def test_fill_ordered_replicon_min_max(self): seq_ori = self.find_data("base", "ordered_replicon_base.fasta") shutil.copy(seq_ori, self.args.out_dir) self.args.sequence_db = os.path.join(self.args.out_dir, os.path.basename(seq_ori)) cfg = Config(MacsyDefaults(), self.args) idx = Indexes(cfg) idx.build() RepliconDB.__init__ = self.fake_init db = RepliconDB(cfg) db._fill_ordered_min_max(cfg.replicon_topology()) self.assertEqual(len(db._DB), 1) rep = db[RepliconDB.ordered_replicon_name] self.assertEqual(rep.topology, cfg.replicon_topology()) self.assertEqual(rep.min, 1) self.assertEqual(rep.max, 52) def test_fill_gembase_min_max_default_topology(self): RepliconDB.__init__ = self.fake_init db = RepliconDB(self.cfg) db._fill_gembase_min_max({}, self.cfg.replicon_topology()) self.assertEqual(len(db._DB), 3) self.assertEqual(set(db._DB.keys()), set(['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx'])) PRRU001c01 = db['ESCO030p01'] self.assertEqual(PRRU001c01.topology, 'circular') self.assertEqual(PRRU001c01.min, 1) self.assertEqual(PRRU001c01.max, 67) self.assertEqual(PRRU001c01.genes, self.ESCO030p01_genes) PSAE001c01 = db['PSAE001c01'] self.assertEqual(PSAE001c01.topology, 'circular') self.assertEqual(PSAE001c01.min, 68) self.assertEqual(PSAE001c01.max, 133) self.assertEqual(PSAE001c01.genes, self.PSAE001c01_genes) DBNC = db['NC_xxxxx_xx'] self.assertEqual(DBNC.topology, 'circular') self.assertEqual(DBNC.min, 134) self.assertEqual(DBNC.max, 141) self.assertEqual(DBNC.genes, self.NCDB_genes) def test_fill_gembase_min_max_oredered_replicon(self): seq_ori = self.find_data("base", "ordered_replicon_base.fasta") shutil.copy(seq_ori, self.args.out_dir) self.args.sequence_db = os.path.join(self.args.out_dir, os.path.basename(seq_ori)) cfg = Config(MacsyDefaults(), self.args) idx = Indexes(cfg) idx.build() RepliconDB.__init__ = self.fake_init db = RepliconDB(cfg) with self.assertRaises(MacsypyError) as ctx: with self.catch_log() as log: db._fill_gembase_min_max({}, self.cfg.replicon_topology()) self.assertEqual( str(ctx.exception), f"Error during sequence-db '{self.args.sequence_db}' parsing. " f"Are you sure db-type is 'gembase'?") def test_fill_gembase_min_max_with_topology(self): self.args.topology_file = self.args.sequence_db + ".topo" with open(self.args.topology_file, 'w') as f: f.write( '# topology file\nESCO030p01 : circular\nPSAE001c01 : linear\n' ) cfg = Config(MacsyDefaults(), self.args) RepliconDB.__init__ = self.fake_init db = RepliconDB(cfg) topo_dict = db._fill_topology() db._fill_gembase_min_max(topo_dict, 'circular') self.assertEqual(len(db._DB), 3) self.assertEqual(set(db._DB.keys()), set(['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx'])) ESCO030p01 = db['ESCO030p01'] self.assertEqual(ESCO030p01.topology, 'circular') self.assertEqual(ESCO030p01.min, 1) self.assertEqual(ESCO030p01.max, 67) self.assertEqual(ESCO030p01.genes, self.ESCO030p01_genes) PSAE001c01 = db['PSAE001c01'] self.assertEqual(PSAE001c01.topology, 'linear') self.assertEqual(PSAE001c01.min, 68) self.assertEqual(PSAE001c01.max, 133) self.assertEqual(PSAE001c01.genes, self.PSAE001c01_genes) DBNC = db['NC_xxxxx_xx'] self.assertEqual(DBNC.topology, 'circular') self.assertEqual(DBNC.min, 134) self.assertEqual(DBNC.max, 141) self.assertEqual(DBNC.genes, self.NCDB_genes) def test_in(self): db = RepliconDB(self.cfg) self.assertIn('ESCO030p01', db) self.assertIn('PSAE001c01', db) self.assertIn('NC_xxxxx_xx', db) self.assertNotIn('toto', db) def test_getitem(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) self.assertEqual(ESCO030p01, db['ESCO030p01']) self.assertEqual(PSAE001c01, db['PSAE001c01']) self.assertEqual(NCXX, db['NC_xxxxx_xx']) self.assertRaises(KeyError, db.__getitem__, 'foo') def test_get(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) self.assertEqual(ESCO030p01, db.get('ESCO030p01')) self.assertEqual(PSAE001c01, db.get('PSAE001c01')) self.assertEqual(NCXX, db.get('NC_xxxxx_xx', 'foo')) self.assertIsNone(db.get('foo')) self.assertEqual('bar', db.get('foo', 'bar')) def test_items(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) self.assertCountEqual(list(db.items()), [('ESCO030p01', ESCO030p01), ('NC_xxxxx_xx', NCXX), ('PSAE001c01', PSAE001c01)]) def test_iteritems(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) iter_items = db.iteritems() for item in [('ESCO030p01', ESCO030p01), ('PSAE001c01', PSAE001c01), ('NC_xxxxx_xx', NCXX)]: with self.subTest(item=item): self.assertEqual(next(iter_items), item) def test_names(self): db = RepliconDB(self.cfg) exp_name = ['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx'] self.assertListEqual(db.replicon_names(), exp_name) def test_replicon_infos(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) values = db.replicon_infos() self.assertCountEqual(values, [ESCO030p01, NCXX, PSAE001c01])
class Test(MacsyTest): def __init__(self, methodName='runTest'): super(Test, self).__init__(methodName) def fake_init(obj, cfg): obj.cfg = cfg idx = Indexes(cfg) obj.sequence_idx = idx.find_my_indexes() obj.topology_file = cfg.topology_file() obj._DB = {} self.fake_init = fake_init self.real_init = RepliconDB.__init__ def setUp(self): self.args = argparse.Namespace() self.args.db_type = 'gembase' self.args.models_dir = self.find_data('models') self.args.res_search_dir = tempfile.gettempdir() self.args.log_level = 30 self.args.out_dir = os.path.join(self.args.res_search_dir, 'test_macsyfinder_repliconDB') if os.path.exists(self.args.out_dir): shutil.rmtree(self.args.out_dir) os.mkdir(self.args.out_dir) seq_db = self.find_data("base", "test_base.fa") shutil.copy(seq_db, self.args.out_dir) self.args.sequence_db = os.path.join(self.args.out_dir, os.path.basename(seq_db)) self.cfg = Config(MacsyDefaults(), self.args) self.ESCO030p01_genes = [('000010', '886'), ('000020', '291'), ('000030', '656'), ('000040', '500'), ('000050', '407'), ('000060', '144'), ('000070', '183'), ('000080', '121'), ('000090', '199'), ('000100', '325'), ('000110', '425'), ('000120', '171'), ('000130', '277'), ('000140', '133'), ('000150', '108'), ('000160', '295'), ('000170', '273'), ('000180', '367'), ('000190', '573'), ('000200', '343'), ('000210', '295'), ('000220', '108'), ('000230', '117'), ('000240', '153'), ('000250', '479'), ('000260', '706'), ('000270', '998'), ('000280', '171'), ('000290', '108'), ('000300', '295'), ('000310', '165'), ('000320', '243'), ('000330', '295'), ('000340', '108'), ('000350', '1755'), ('000360', '248'), ('000370', '286'), ('000380', '186'), ('000390', '83'), ('000400', '153'), ('000410', '69'), ('000420', '295'), ('000430', '108'), ('000440', '145'), ('000450', '59'), ('000460', '124'), ('000470', '246'), ('000480', '325'), ('000490', '54'), ('000500', '95'), ('000510', '83'), ('000520', '56'), ('000530', '401'), ('000540', '320'), ('000550', '256'), ('000560', '73'), ('000570', '144'), ('000580', '258'), ('000590', '133'), ('000600', '140'), ('000610', '63'), ('000620', '138'), ('000630', '68'), ('000640', '169'), ('000650', '127'), ('000660', '295'), ('000670', '108'), ('000670', '108')] self.PSAE001c01_genes = [('006940', '803'), ('013980', '759'), ('017350', '600'), ('018920', '776'), ('026600', '273'), ('031420', '658'), ('043580', '416'), ('051090', '714'), ('055870', '449'), ('055880', '447'), ('055890', '588'), ('055900', '292'), ('055910', '262'), ('055920', '166'), ('055930', '288'), ('055940', '194'), ('055950', '567'), ('055960', '188'), ('055970', '247'), ('055980', '252'), ('055990', '455'), ('056000', '450'), ('056010', '260'), ('056020', '246'), ('056030', '70'), ('056040', '133'), ('056050', '284'), ('056060', '585'), ('056070', '435'), ('056080', '342'), ('056090', '252'), ('056100', '122'), ('056110', '213'), ('056120', '400'), ('056130', '134'), ('056140', '138'), ('056150', '397'), ('056160', '298'), ('056170', '186'), ('056180', '445'), ('056190', '414'), ('056200', '132'), ('056210', '674'), ('056220', '319'), ('056230', '394'), ('056240', '207'), ('056250', '401'), ('056260', '611'), ('056270', '257'), ('056280', '169'), ('056290', '454'), ('056300', '141'), ('056310', '458'), ('056320', '286'), ('056330', '514'), ('056340', '178'), ('056350', '156'), ('056360', '85'), ('056370', '289'), ('056380', '126'), ('056390', '290'), ('056400', '262'), ('056410', '214'), ('056420', '630'), ('056430', '127'), ('056440', '455'), ('056440', '455')] self.NCDB_genes = [('056134', '289'), ('056135', '126'), ('056136', '290'), ('056137', '262'), ('056138', '214'), ('056139', '630'), ('056140', '127'), ('056141', '803'), ('056141', '803')] idx = Indexes(self.cfg) idx._build_my_indexes() def tearDown(self): try: shutil.rmtree(self.cfg.working_dir()) except: pass RepliconDB.__init__ = self.real_init def test_fill_topology(self): self.args.topology_file = self.args.sequence_db + ".topo" db_send = {'ESCO030p01': 'circular', 'PSAE001c01': 'linear'} with open(self.args.topology_file, 'w') as f: for k, v in list(db_send.items()): f.write('{0} : {1}\n'.format(k, v)) cfg = Config(MacsyDefaults(), self.args) RepliconDB.__init__ = self.fake_init db = RepliconDB(cfg) rcv_topo = db._fill_topology() self.assertDictEqual(db_send, rcv_topo) def test_fill_ordered_replicon_min_max(self): seq_ori = self.find_data("base", "ordered_replicon_base.fasta") shutil.copy(seq_ori, self.args.out_dir) self.args.sequence_db = os.path.join(self.args.out_dir, os.path.basename(seq_ori)) cfg = Config(MacsyDefaults(), self.args) idx = Indexes(cfg) idx._build_my_indexes() RepliconDB.__init__ = self.fake_init db = RepliconDB(cfg) db._fill_ordered_min_max(cfg.replicon_topology()) self.assertEqual(len(db._DB), 1) rep = db[RepliconDB.ordered_replicon_name] self.assertEqual(rep.topology, cfg.replicon_topology()) self.assertEqual(rep.min, 1) self.assertEqual(rep.max, 52) def test_fill_gembase_min_max_default_topology(self): RepliconDB.__init__ = self.fake_init db = RepliconDB(self.cfg) db._fill_gembase_min_max({}, self.cfg.replicon_topology()) self.assertEqual(len(db._DB), 3) self.assertEqual(set(db._DB.keys()), set(['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx'])) PRRU001c01 = db['ESCO030p01'] self.assertEqual(PRRU001c01.topology, 'circular') self.assertEqual(PRRU001c01.min, 1) self.assertEqual(PRRU001c01.max, 67) self.assertEqual(PRRU001c01.genes, self.ESCO030p01_genes) PSAE001c01 = db['PSAE001c01'] self.assertEqual(PSAE001c01.topology, 'circular') self.assertEqual(PSAE001c01.min, 68) self.assertEqual(PSAE001c01.max, 133) self.assertEqual(PSAE001c01.genes, self.PSAE001c01_genes) DBNC = db['NC_xxxxx_xx'] self.assertEqual(DBNC.topology, 'circular') self.assertEqual(DBNC.min, 134) self.assertEqual(DBNC.max, 141) self.assertEqual(DBNC.genes, self.NCDB_genes) def test_fill_gembase_min_max_with_topology(self): self.args.topology_file = self.args.sequence_db + ".topo" with open(self.args.topology_file, 'w') as f: f.write( '# topology file\nESCO030p01 : circular\nPSAE001c01 : linear\n' ) cfg = Config(MacsyDefaults(), self.args) RepliconDB.__init__ = self.fake_init db = RepliconDB(cfg) topo_dict = db._fill_topology() db._fill_gembase_min_max(topo_dict, 'circular') self.assertEqual(len(db._DB), 3) self.assertEqual(set(db._DB.keys()), set(['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx'])) ESCO030p01 = db['ESCO030p01'] self.assertEqual(ESCO030p01.topology, 'circular') self.assertEqual(ESCO030p01.min, 1) self.assertEqual(ESCO030p01.max, 67) self.assertEqual(ESCO030p01.genes, self.ESCO030p01_genes) PSAE001c01 = db['PSAE001c01'] self.assertEqual(PSAE001c01.topology, 'linear') self.assertEqual(PSAE001c01.min, 68) self.assertEqual(PSAE001c01.max, 133) self.assertEqual(PSAE001c01.genes, self.PSAE001c01_genes) DBNC = db['NC_xxxxx_xx'] self.assertEqual(DBNC.topology, 'circular') self.assertEqual(DBNC.min, 134) self.assertEqual(DBNC.max, 141) self.assertEqual(DBNC.genes, self.NCDB_genes) def test_in(self): db = RepliconDB(self.cfg) self.assertIn('ESCO030p01', db) self.assertIn('PSAE001c01', db) self.assertIn('NC_xxxxx_xx', db) self.assertNotIn('toto', db) def test_getitem(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) self.assertEqual(ESCO030p01, db['ESCO030p01']) self.assertEqual(PSAE001c01, db['PSAE001c01']) self.assertEqual(NCXX, db['NC_xxxxx_xx']) self.assertRaises(KeyError, db.__getitem__, 'foo') def test_get(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) self.assertEqual(ESCO030p01, db.get('ESCO030p01')) self.assertEqual(PSAE001c01, db.get('PSAE001c01')) self.assertEqual(NCXX, db.get('NC_xxxxx_xx', 'foo')) self.assertIsNone(db.get('foo')) self.assertEqual('bar', db.get('foo', 'bar')) def test_items(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) self.assertCountEqual(list(db.items()), [('ESCO030p01', ESCO030p01), ('NC_xxxxx_xx', NCXX), ('PSAE001c01', PSAE001c01)]) def test_iteritems(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) self.assertCountEqual(iter(db.items()), [('ESCO030p01', ESCO030p01), ('NC_xxxxx_xx', NCXX), ('PSAE001c01', PSAE001c01)]) def test_replicon_infos(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) values = db.replicon_infos() self.assertCountEqual(values, [ESCO030p01, NCXX, PSAE001c01])
class TestSearchGenes(MacsyTest): def setUp(self): self.tmp_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_search_genes') if os.path.exists(self.tmp_dir): shutil.rmtree(self.tmp_dir) os.mkdir(self.tmp_dir) args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_base.fa") args.db_type = 'gembase' args.models_dir = self.find_data('models') args.log_level = 30 args.out_dir = os.path.join(self.tmp_dir, 'job_1') args.res_search_dir = args.out_dir os.mkdir(args.out_dir) self.cfg = Config(MacsyDefaults(), args) self.model_name = 'foo' self.model_location = ModelLocation( path=os.path.join(args.models_dir, self.model_name)) idx = Indexes(self.cfg) idx._build_my_indexes() self.profile_factory = ProfileFactory(self.cfg) def tearDown(self): try: shutil.rmtree(self.cfg.working_dir()) #pass except: pass @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH') def test_search(self): gene_name = "abc" c_gene_abc = CoreGene(self.model_location, gene_name, self.profile_factory) report = search_genes([c_gene_abc], self.cfg) expected_hit = [ Hit(c_gene_abc, "ESCO030p01_000260", 706, "ESCO030p01", 26, float(1.000e-200), float(660.800), float(1.000), float(0.714), 160, 663) ] self.assertEqual(len(report), 1) self.assertEqual(expected_hit[0], report[0].hits[0]) @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH') def test_search_recover(self): # first job searching using hmmsearch gene_name = "abc" c_gene_abc = CoreGene(self.model_location, gene_name, self.profile_factory) report = search_genes([c_gene_abc], self.cfg) expected_hit = [ Hit(c_gene_abc, "ESCO030p01_000260", 706, "ESCO030p01", 26, float(1.000e-200), float(660.800), float(1.000), float(0.714), 160, 663) ] # second job using recover # disable hmmer to be sure that test use the recover inner function self.cfg.hmmer = lambda: "hmmer_disable" # and create a new dir for the second job previous_job_path = self.cfg.working_dir() self.cfg.previous_run = lambda: previous_job_path self.cfg.out_dir = lambda: os.path.join(self.tmp_dir, 'job_2') os.mkdir(self.cfg.out_dir()) # rerun with previous run # but we have to reset the profile attached to the gene gene._profile._report self.profile_factory = ProfileFactory(self.cfg) c_gene_abc = CoreGene(self.model_location, gene_name, self.profile_factory) report = search_genes([c_gene_abc], self.cfg) self.assertEqual(len(report), 1) self.assertEqual(expected_hit[0], report[0].hits[0])
class TestIndex(MacsyTest): def setUp(self): args = argparse.Namespace() args.db_type = 'gembase' args.models_dir = self.find_data('models') args.out_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes') if os.path.exists(args.out_dir): shutil.rmtree( os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes')) os.makedirs(args.out_dir) seq_db = self.find_data("base", "test_1.fasta") shutil.copy(seq_db, args.out_dir) args.index_dir = args.out_dir args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db)) self.cfg = Config(MacsyDefaults(), args) def tearDown(self): try: shutil.rmtree(self.cfg.working_dir()) except: pass def test_find_my_indexes(self): idx = Indexes(self.cfg) self.assertIsNone(idx.find_my_indexes()) new_idx = os.path.join(os.path.dirname(self.cfg.sequence_db()), idx.name + ".idx") with open(new_idx, 'w'): pass self.assertEqual(idx.find_my_indexes(), new_idx) def test_build_no_idx(self): idx = Indexes(self.cfg) my_idx = idx.build() self.assertEqual( my_idx, os.path.join(os.path.dirname(self.cfg.sequence_db()), idx.name + ".idx")) def test_build_with_idx(self): idx = Indexes(self.cfg) # case new style idx with open( os.path.join(os.path.dirname(self.cfg.sequence_db()), idx.name + ".idx"), 'w') as idx_file: idx_content_new = f"{self.cfg.sequence_db()}\nVICH001.B.00001.C001_01359{idx._field_separator}200{idx._field_separator}1\n" idx_file.write(idx_content_new) my_idx = idx.build() self.assertEqual(os.path.getsize(idx_file.name), len(idx_content_new)) # case old style no path as first line idx_path = os.path.join(os.path.dirname(self.cfg.sequence_db()), idx.name + ".idx") with open(idx_path, 'w') as idx_file: idx_content_old = "VICH001.B.00001.C001_01359;200;1\n" idx_file.write(idx_content_old) with self.catch_log(log_name='macsypy') as log: _ = idx.build() log_msg = log.get_value().strip() self.assertEqual( log_msg, f"The '{idx_path}' index file is in old format. Force index building." ) # case old style bad separator idx_path = os.path.join(os.path.dirname(self.cfg.sequence_db()), idx.name + ".idx") with open(idx_path, 'w') as idx_file: idx_content_old = f"{self.cfg.sequence_db()}\nVICH001.B.00001.C001_01359;200;1\n" idx_file.write(idx_content_old) with self.catch_log(log_name='macsypy') as log: _ = idx.build() log_msg = log.get_value().strip() self.assertEqual( log_msg, f"The '{idx_path}' index file is in old format. Force index building." ) # case idx seems valid read it with open( os.path.join(os.path.dirname(self.cfg.sequence_db()), idx.name + ".idx")) as idx_file_test: data = idx_file_test.read() new_content = f"""{self.cfg.sequence_db()} VICH001.B.00001.C001_01359{idx._field_separator}200{idx._field_separator}1 VICH001.B.00001.C001_01360{idx._field_separator}484{idx._field_separator}2 VICH001.B.00001.C001_01361{idx._field_separator}406{idx._field_separator}3 VICH001.B.00001.C001_01390{idx._field_separator}326{idx._field_separator}4 VICH001.B.00001.C001_01391{idx._field_separator}54{idx._field_separator}5 VICH001.B.00001.C001_01392{idx._field_separator}206{idx._field_separator}6 VICH001.B.00001.C001_01393{idx._field_separator}477{idx._field_separator}7 VICH001.B.00001.C001_01394{idx._field_separator}126{idx._field_separator}8 VICH001.B.00001.C001_01395{idx._field_separator}405{idx._field_separator}9 VICH001.B.00001.C001_01396{idx._field_separator}572{idx._field_separator}10 VICH001.B.00001.C001_01397{idx._field_separator}721{idx._field_separator}11 VICH001.B.00001.C001_01398{idx._field_separator}467{idx._field_separator}12 VICH001.B.00001.C001_01399{idx._field_separator}720{idx._field_separator}13 VICH001.B.00001.C001_01400{idx._field_separator}559{idx._field_separator}14 VICH001.B.00001.C001_01401{idx._field_separator}153{idx._field_separator}15 VICH001.B.00001.C001_01402{idx._field_separator}4558{idx._field_separator}16 VICH001.B.00001.C001_01500{idx._field_separator}120{idx._field_separator}17 VICH001.B.00001.C001_01501{idx._field_separator}344{idx._field_separator}18 VICH001.B.00001.C001_01502{idx._field_separator}478{idx._field_separator}19 VICH001.B.00001.C001_01503{idx._field_separator}724{idx._field_separator}20 VICH001.B.00001.C001_01504{idx._field_separator}309{idx._field_separator}21 VICH001.B.00001.C001_01505{idx._field_separator}390{idx._field_separator}22 VICH001.B.00001.C001_01506{idx._field_separator}419{idx._field_separator}23 VICH001.B.00001.C001_01540{idx._field_separator}353{idx._field_separator}24 VICH001.B.00001.C001_01541{idx._field_separator}229{idx._field_separator}25 VICH001.B.00001.C001_01542{idx._field_separator}267{idx._field_separator}26 VICH001.B.00001.C001_01543{idx._field_separator}328{idx._field_separator}27 VICH001.B.00001.C001_01544{idx._field_separator}258{idx._field_separator}28 VICH001.B.00001.C001_01545{idx._field_separator}228{idx._field_separator}29 VICH001.B.00001.C001_01546{idx._field_separator}538{idx._field_separator}30 VICH001.B.00001.C001_01547{idx._field_separator}77{idx._field_separator}31 VICH001.B.00001.C001_01548{idx._field_separator}476{idx._field_separator}32 VICH001.B.00001.C001_01549{idx._field_separator}324{idx._field_separator}33 VICH001.B.00001.C001_01550{idx._field_separator}387{idx._field_separator}34 VICH001.B.00001.C001_01551{idx._field_separator}382{idx._field_separator}35 VICH001.B.00001.C001_01552{idx._field_separator}149{idx._field_separator}36 VICH001.B.00001.C001_01553{idx._field_separator}319{idx._field_separator}37 VICH001.B.00001.C001_01554{idx._field_separator}237{idx._field_separator}38 VICH001.B.00001.C001_01555{idx._field_separator}74{idx._field_separator}39 VICH001.B.00001.C001_01556{idx._field_separator}362{idx._field_separator}40 VICH001.B.00001.C001_01557{idx._field_separator}170{idx._field_separator}41 VICH001.B.00001.C001_01558{idx._field_separator}77{idx._field_separator}42 VICH001.B.00001.C001_01559{idx._field_separator}296{idx._field_separator}43 VICH001.B.00001.C001_01560{idx._field_separator}405{idx._field_separator}44 VICH001.B.00001.C001_01561{idx._field_separator}182{idx._field_separator}45 VICH001.B.00001.C001_01562{idx._field_separator}445{idx._field_separator}46 VICH001.B.00001.C001_01563{idx._field_separator}212{idx._field_separator}47 VICH001.B.00001.C001_01564{idx._field_separator}387{idx._field_separator}48 VICH001.B.00001.C001_01565{idx._field_separator}414{idx._field_separator}49 """ self.assertEqual(data, new_content) def test_build_force(self): idx = Indexes(self.cfg) idx.build(force=True) my_idx = idx.find_my_indexes() self.assertNotEqual(os.path.getsize(my_idx), 0) @unittest.skipIf(platform.system() == 'Windows' or os.getuid() == 0, 'Skip test on Windows or if run as root') def test_build_not_writable(self): # Skip test on Windows, since setting the folder permissions is not affecting files inside # in Singularity container tess are run as root and this test as non sense idx = Indexes(self.cfg) idx_dir = os.path.join(os.path.dirname(self.cfg.sequence_db())) os.chmod(idx_dir, 0000) try: with self.assertRaises(IOError) as ctx: with self.catch_log(): idx.build() self.assertEqual(f"The '{idx_dir}' dir is not writable.", str(ctx.exception)) finally: os.chmod(idx_dir, 0o777) @unittest.skipIf(platform.system() == 'Windows' or os.getuid() == 0, 'Skip test on Windows or if run as root') def test_index_dir(self): # case index-dir is not specify sequence-db dir is not writable args = argparse.Namespace() args.db_type = 'gembase' args.models_dir = self.find_data('models') args.out_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes') args.sequence_db = os.path.join( args.out_dir, os.path.basename(self.cfg.sequence_db())) cfg = Config(MacsyDefaults(), args) idx = Indexes(cfg) index_dir = idx._index_dir(build=False) expc_idx_dir = os.path.dirname(cfg.sequence_db()) self.assertEqual(index_dir, expc_idx_dir) try: os.chmod(index_dir, 0000) with self.assertRaises(ValueError) as ctx: _ = idx._index_dir(build=True) self.assertEqual( f"The '{index_dir}' dir is not writable. Change rights or specify --index-dir.", str(ctx.exception)) finally: os.chmod(index_dir, 0o777) args = argparse.Namespace() args.db_type = 'gembase' args.models_dir = self.find_data('models') args.out_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes') args.index_dir = os.path.join(args.out_dir, 'index_dir') args.sequence_db = os.path.join( args.out_dir, os.path.basename(self.cfg.sequence_db())) cfg = Config(MacsyDefaults(), args) idx = Indexes(cfg) # case --index-dir does not exists with self.assertRaises(ValueError) as ctx: _ = idx._index_dir(build=False) self.assertEqual(str(ctx.exception), f"No such directory: {args.index_dir}") # case --index-dir is not writable os.makedirs(args.index_dir) os.chmod(args.index_dir, 0000) try: # but I do not care I only read index_dir = idx._index_dir(build=False) self.assertEqual(index_dir, args.index_dir) # it's important to build indexes with self.assertRaises(ValueError) as ctx: _ = idx._index_dir(build=True) self.assertEqual(str(ctx.exception), f"The '{index_dir}' dir is not writable.") finally: os.chmod(args.index_dir, 0o777) # case the sequence_db value is just a filename not a path current_dir = os.getcwd() try: os.chdir(args.out_dir) args = argparse.Namespace() args.db_type = 'gembase' args.models_dir = self.find_data('models') args.out_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes') args.sequence_db = os.path.basename(self.cfg.sequence_db()) cfg = Config(MacsyDefaults(), args) idx = Indexes(cfg) idx_dir = idx._index_dir(build=True) self.assertEqual(idx_dir, os.getcwd()) finally: os.chdir(current_dir) def test_build_my_indexes(self): args = argparse.Namespace() args.db_type = 'gembase' args.out_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes') if os.path.exists(args.out_dir): shutil.rmtree( os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes')) os.makedirs(args.out_dir) seq_db = self.find_data("base", "test_base_with_errors.fa") shutil.copy(seq_db, args.out_dir) args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db)) self.cfg = Config(MacsyDefaults(), args) idx = Indexes(self.cfg) with self.assertRaises(MacsypyError) as e: # the directory for index exist and is writable but # the sequence file is corrupted and cannot be read correctly with self.catch_log(): idx._build_my_indexes(args.out_dir) self.assertTrue( str(e.exception).startswith( "unable to index the sequence dataset:")) def test_iter(self): idx = Indexes(self.cfg) with self.assertRaises(MacsypyError) as ctx: next(iter(idx)) self.assertEqual(str(ctx.exception), 'Build index before to use it.') idx.build() expected_idx = [('VICH001.B.00001.C001_01359', 200, 1), ('VICH001.B.00001.C001_01360', 484, 2), ('VICH001.B.00001.C001_01361', 406, 3), ('VICH001.B.00001.C001_01390', 326, 4), ('VICH001.B.00001.C001_01391', 54, 5), ('VICH001.B.00001.C001_01392', 206, 6), ('VICH001.B.00001.C001_01393', 477, 7), ('VICH001.B.00001.C001_01394', 126, 8), ('VICH001.B.00001.C001_01395', 405, 9), ('VICH001.B.00001.C001_01396', 572, 10), ('VICH001.B.00001.C001_01397', 721, 11), ('VICH001.B.00001.C001_01398', 467, 12), ('VICH001.B.00001.C001_01399', 720, 13), ('VICH001.B.00001.C001_01400', 559, 14), ('VICH001.B.00001.C001_01401', 153, 15), ('VICH001.B.00001.C001_01402', 4558, 16), ('VICH001.B.00001.C001_01500', 120, 17), ('VICH001.B.00001.C001_01501', 344, 18), ('VICH001.B.00001.C001_01502', 478, 19), ('VICH001.B.00001.C001_01503', 724, 20), ('VICH001.B.00001.C001_01504', 309, 21), ('VICH001.B.00001.C001_01505', 390, 22), ('VICH001.B.00001.C001_01506', 419, 23), ('VICH001.B.00001.C001_01540', 353, 24), ('VICH001.B.00001.C001_01541', 229, 25), ('VICH001.B.00001.C001_01542', 267, 26), ('VICH001.B.00001.C001_01543', 328, 27), ('VICH001.B.00001.C001_01544', 258, 28), ('VICH001.B.00001.C001_01545', 228, 29), ('VICH001.B.00001.C001_01546', 538, 30), ('VICH001.B.00001.C001_01547', 77, 31), ('VICH001.B.00001.C001_01548', 476, 32), ('VICH001.B.00001.C001_01549', 324, 33), ('VICH001.B.00001.C001_01550', 387, 34), ('VICH001.B.00001.C001_01551', 382, 35), ('VICH001.B.00001.C001_01552', 149, 36), ('VICH001.B.00001.C001_01553', 319, 37), ('VICH001.B.00001.C001_01554', 237, 38), ('VICH001.B.00001.C001_01555', 74, 39), ('VICH001.B.00001.C001_01556', 362, 40), ('VICH001.B.00001.C001_01557', 170, 41), ('VICH001.B.00001.C001_01558', 77, 42), ('VICH001.B.00001.C001_01559', 296, 43), ('VICH001.B.00001.C001_01560', 405, 44), ('VICH001.B.00001.C001_01561', 182, 45), ('VICH001.B.00001.C001_01562', 445, 46), ('VICH001.B.00001.C001_01563', 212, 47), ('VICH001.B.00001.C001_01564', 387, 48), ('VICH001.B.00001.C001_01565', 414, 49)] self.assertListEqual(list(iter(idx)), expected_idx)
class TestProfile(MacsyTest): def setUp(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') args.res_search_dir = tempfile.gettempdir() args.log_level = 0 self.cfg = Config(MacsyDefaults(), args) if os.path.exists(self.cfg.working_dir()): shutil(self.cfg.working_dir()) os.makedirs(self.cfg.working_dir()) self.model_name = 'foo' self.model_location = ModelLocation(path=os.path.join(args.models_dir, self.model_name)) self.profile_factory = ProfileFactory(self.cfg) def tearDown(self): try: shutil.rmtree(self.cfg.working_dir()) except: pass def test_len(self): model = Model("foo/T2SS", 10) gene_name = 'abc' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile("abc") profile = Profile(gene, self.cfg, path) self.assertEqual(len(profile), 501) def test_ga_threshold(self): model = Model("foo/T2SS", 10) gene_name = 'abc' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile("abc") profile = Profile(gene, self.cfg, path) self.assertFalse(profile.ga_threshold) gene_name = 'T5aSS_PF03797' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile("T5aSS_PF03797") profile = Profile(gene, self.cfg, path) self.assertTrue(profile.ga_threshold) def test_str(self): model = Model("foo/T2SS", 10) gene_name = 'abc' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile("abc") profile = Profile(gene, self.cfg, path) s = "{0} : {1}".format(gene.name, path) self.assertEqual(str(profile), s) @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH') def test_execute(self): for db_type in ("gembase", "ordered_replicon", "unordered"): self.cfg._set_db_type(db_type) model = Model("foo/T2SS", 10) gene_name = 'T5aSS_PF03797' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) # case GA threshold in profile profile_path = self.model_location.get_profile("T5aSS_PF03797") profile = Profile(gene, self.cfg, profile_path) report = profile.execute() hmmer_raw_out = profile.hmm_raw_output with open(hmmer_raw_out, 'r') as hmmer_raw_out_file: first_l = hmmer_raw_out_file.readline() # a hmmsearch output file has been produced self.assertTrue(first_l.startswith("# hmmsearch :: search profile(s) against a sequence database")) for i in range(5): # skip 4 lines l = hmmer_raw_out_file.readline() # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}" self.assertTrue(l.find(profile_path) != -1) for i in range(3): # skip 2 lines l = hmmer_raw_out_file.readline() self.assertEqual("# model-specific thresholding: GA cutoffs", l.strip()) # test if profile is executed only once per run report_bis = profile.execute() self.assertIs(report, report_bis) # case GA threshold in profile but --no-cut-ga is set args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') args.res_search_dir = tempfile.gettempdir() args.log_level = 0 args.e_value_search = 0.5 args.no_cut_ga = True cfg = Config(MacsyDefaults(), args) profile = Profile(gene, cfg, profile_path) report = profile.execute() hmmer_raw_out = profile.hmm_raw_output with open(hmmer_raw_out, 'r') as hmmer_raw_out_file: for i in range(9): l = hmmer_raw_out_file.readline() self.assertEqual("# sequence reporting threshold: E-value <= 0.5", l.strip()) # case cut-ga but no GA threshold in hmmprofile gene_name = 'abc' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) # case -cut-ga and GA threshold in profile profile_path = self.model_location.get_profile("abc") profile = Profile(gene, self.cfg, profile_path) with self.catch_log() as log: report = profile.execute() hmmer_raw_out = profile.hmm_raw_output with open(hmmer_raw_out, 'r') as hmmer_raw_out_file: first_l = hmmer_raw_out_file.readline() # a hmmsearch output file has been produced self.assertTrue(first_l.startswith("# hmmsearch :: search profile(s) against a sequence database")) for i in range(5): # skip 4 lines l = hmmer_raw_out_file.readline() # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}" self.assertTrue(l.find(profile_path) != -1) for i in range(3): # skip 2 lines l = hmmer_raw_out_file.readline() self.assertEqual('# sequence reporting threshold: E-value <= 0.1', l.strip()) def test_execute_unknown_binary(self): self.cfg._options['hmmer'] = "Nimportnaoik" model = Model("foo/T2SS", 10) gene_name = 'abc' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile("abc", ) profile = Profile(gene, self.cfg, path) with self.catch_log(): with self.assertRaises(RuntimeError): profile.execute() def test_execute_hmmer_failed(self): fake_hmmer = os.path.join(tempfile.gettempdir(), 'hmmer_failed') with open(fake_hmmer, 'w') as hmmer: hmmer.write("""#! {} import sys sys.exit(127) """.format(sysconfig.sys.executable)) try: os.chmod(hmmer.name, 0o755) self.cfg._options['hmmer'] = hmmer.name model = Model("foo/T2SS", 10) gene_name = 'abc' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) path = self.model_location.get_profile("abc", ) profile = Profile(gene, self.cfg, path) with self.catch_log(): with self.assertRaisesRegex(RuntimeError, "an error occurred during Hmmer " "execution: command = .* : return code = 127 .*") as ctx: profile.execute() finally: try: os.unlink(fake_hmmer) except Exception: pass