def test_min_genes_required_cfg(self): # test min_genes_required is specified from configuration # so this value must overload the value read from xml def_2_parse = set() model_fqn = 'foo/model_5' def_2_parse.add(model_fqn) parsed = set() min_genes_required = [[model_fqn, '4']] self.args.min_genes_required = min_genes_required self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.min_genes_required, 4)
def test_get_def_to_detect(self): cmd_args = argparse.Namespace() cmd_args.models_dir = os.path.join(self._data_dir, 'fake_model_dir') cmd_args.models = ('set_1', 'def_1_1', 'def_1_2', 'def_1_3') registry = ModelRegistry() models_location = scan_models_dir(cmd_args.models_dir) for ml in models_location: registry.add(ml) # case where models are specified on command line res, model_family, model_vers = get_def_to_detect( ('set_1', ['def_1_1', 'def_1_2', 'def_1_3']), registry) model_loc = registry['set_1'] self.assertEqual(model_family, 'set_1') self.assertEqual(model_vers, '0.0b2') exp = [ model_loc.get_definition(name) for name in ('set_1/def_1_1', 'set_1/def_1_2', 'set_1/def_1_3') ] self.assertListEqual(res, exp) # case we search all models res, model_family, model_vers = get_def_to_detect(('set_1', ['all']), registry) self.assertEqual(model_family, 'set_1') self.assertEqual(model_vers, '0.0b2') exp = model_loc.get_all_definitions() self.assertListEqual(res, exp) # case the models required does not exists with self.assertRaises(ValueError): get_def_to_detect(('set_1', ['FOO', 'BAR']), registry)
def test_inter_gene_max_space_cfg(self): # test inter_gene_max_space is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' inter_gene_max_space_cfg = [[model_fqn, '222']] self.args.inter_gene_max_space = inter_gene_max_space_cfg self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.inter_gene_max_space, 222)
def test_add_get(self): mr = ModelRegistry() model_complex_expected = ModelLocation(path=self.complex_dir) with self.assertRaises(KeyError) as ctx: mr[model_complex_expected.name] self.assertEqual(str(ctx.exception), '"No such model definition: \'complex\'"') mr.add(model_complex_expected) self.assertEqual(model_complex_expected, mr[model_complex_expected.name])
def list_models(args): """ :param args: The command line argument once parsed :type args: :class:`argparse.Namespace` object :return: a string representation of all models and submodels installed. :rtype: str """ config = Config(MacsyDefaults(), args) registry = ModelRegistry() models_loc_available = scan_models_dir( config.models_dir(), profile_suffix=config.profile_suffix(), relative_path=config.relative_path()) for model_loc in models_loc_available: registry.add(model_loc) return str(registry)
def _find_all_installed_packages() -> ModelRegistry: """ :return: all models installed """ defaults = MacsyDefaults() config = Config(defaults, argparse.Namespace()) system_model_dir = config.models_dir() user_model_dir = os.path.join(os.path.expanduser('~'), '.macsyfinder', 'data') model_dirs = (system_model_dir, user_model_dir) if os.path.exists(user_model_dir) else (system_model_dir,) registry = ModelRegistry() for model_dir in model_dirs: try: for model_loc in scan_models_dir(model_dir, profile_suffix=config.profile_suffix): registry.add(model_loc) except PermissionError as err: _log.warning(f"{model_dir} is not readable: {err} : skip it.") return registry
def setUp(self): defaults = MacsyDefaults() self.args = argparse.Namespace() self.args.sequence_db = self.find_data("base", "test_1.fasta") self.args.db_type = 'gembase' self.args.models_dir = self.find_data('models') self.args.res_search_dir = tempfile.gettempdir() self.cfg = Config(defaults, self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.profile_factory = ProfileFactory(self.cfg) self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory)
def _find_all_installed_packages(models_dir=None) -> ModelRegistry: """ :return: all models installed """ defaults = MacsyDefaults() args = argparse.Namespace() if models_dir is not None: args.models_dir = models_dir config = Config(defaults, args) model_dirs = config.models_dir() registry = ModelRegistry() for model_dir in model_dirs: try: for model_loc in scan_models_dir( model_dir, profile_suffix=config.profile_suffix()): registry.add(model_loc) except PermissionError as err: _log.warning(f"{model_dir} is not readable: {err} : skip it.") return registry
def test_models(self): mr = ModelRegistry() model_complex_expected = ModelLocation(path=self.complex_dir) model_simple_expected = ModelLocation(path=self.simple_dir) mr.add(model_complex_expected) mr.add(model_simple_expected) models_received = sorted(mr.models()) models_expected = sorted( [model_complex_expected, model_simple_expected]) self.assertListEqual(models_received, models_expected)
def test_max_nb_genes_cfg(self): self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) # max_nb_genes is specified in xml # no user configuration on this self.cfg = Config(MacsyDefaults(), self.args) model_fqn = 'foo/model_6' # 4 genes in this model but xml specify 3 self.cfg = Config(MacsyDefaults(), self.args) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.max_nb_genes, 3) # max_nb_genes is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' # 4 genes in this model max_nb_genes = [[model_fqn, '6']] self.args.max_nb_genes = max_nb_genes self.cfg = Config(MacsyDefaults(), self.args) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.max_nb_genes, 6)
def test_multi_loci_cfg(self): # test multi_loci is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' self.args.multi_loci = model_fqn self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertTrue(m.multi_loci)
def test_str(self): mr = ModelRegistry() model_complex_expected = ModelLocation(path=self.complex_dir) model_simple_expected = ModelLocation(path=self.simple_dir) mr.add(model_complex_expected) mr.add(model_simple_expected) expected_output = """complex /subdef_1 /def_1_1 /def_1_2 /subdef_2 /def_2_1 /def_2_2 simple /def_1 /def_2 """ self.assertEqual(expected_output, str(mr))
class TestModelParser(MacsyTest): def setUp(self): defaults = MacsyDefaults() self.args = argparse.Namespace() self.args.sequence_db = self.find_data("base", "test_1.fasta") self.args.db_type = 'gembase' self.args.models_dir = self.find_data('models') self.args.res_search_dir = tempfile.gettempdir() self.cfg = Config(defaults, self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.profile_factory = ProfileFactory(self.cfg) self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) def tearDown(self): try: shutil.rmtree(self.cfg.working_dir()) except: pass def test_parse_with_exchangeable(self): model_name = 'model_1' model_family = 'foo' fqn = f"{model_family}/{model_name}" #def_2_parse = set() #def_2_parse.add(fqn) models_2_detect = [self.model_registry['foo'].get_definition(fqn)] self.parser.parse(models_2_detect) self.assertEqual(len(self.model_bank), 1) m1 = self.model_bank[fqn] self.assertEqual(m1.name, model_name) self.assertEqual(m1.fqn, fqn) self.assertEqual(m1.inter_gene_max_space, 20) self.assertEqual(m1.min_mandatory_genes_required, 2) self.assertEqual(m1.min_genes_required, 4) self.assertTrue(m1.multi_loci) self.assertEqual(len(m1.mandatory_genes), 2) mandatory_genes_name = sorted([g.name for g in m1.mandatory_genes]) theoric_list = sorted(["sctJ_FLG", "sctN_FLG"]) self.assertListEqual(mandatory_genes_name, theoric_list) self.assertEqual(len(m1.accessory_genes), 2) accessory_genes_name = sorted([g.name for g in m1.accessory_genes]) theoric_list = sorted(["flgB", "flgC"]) self.assertListEqual(accessory_genes_name, theoric_list) self.assertEqual(len(m1.neutral_genes), 2) neutral_genes_name = sorted([g.name for g in m1.neutral_genes]) theoric_list = sorted(["fliE", "tadZ"]) self.assertListEqual(neutral_genes_name, theoric_list) self.assertEqual(len(m1.forbidden_genes), 1) forbidden_genes_name = sorted([g.name for g in m1.forbidden_genes]) theoric_list = sorted(["sctC"]) self.assertListEqual(forbidden_genes_name, theoric_list) sctJ_FLG = m1.get_gene('sctJ_FLG') sctJ_FLG_exchangeables = sctJ_FLG.exchangeables self.assertEqual(len(sctJ_FLG_exchangeables), 2) self.assertEqual(sctJ_FLG_exchangeables[0].name, 'sctJ') self.assertEqual(sctJ_FLG_exchangeables[1].name, 'abc') self.assertTrue(isinstance(sctJ_FLG_exchangeables[0], Exchangeable)) self.assertTrue(isinstance(sctJ_FLG_exchangeables[0]._gene, CoreGene)) self.assertTrue( isinstance(sctJ_FLG_exchangeables[0].alternate_of(), ModelGene)) self.assertTrue(sctJ_FLG_exchangeables[0].loner) self.assertFalse(sctJ_FLG.is_exchangeable) sctJ = m1.get_gene('sctJ') self.assertTrue(sctJ.is_exchangeable) def test_exchangeable_inheritance(self): def_2_parse = set() def_2_parse.add('foo/model_1') models_2_detect = [ self.model_registry['foo'].get_definition('foo/model_1') ] self.parser.parse(models_2_detect) m1 = self.model_bank['foo/model_1'] sctJ = m1.get_gene('sctJ') self.assertTrue(sctJ.is_exchangeable) self.assertTrue(sctJ.loner) self.assertTrue(sctJ.multi_system) self.assertFalse(sctJ.multi_model) sctJ_FLG = m1.get_gene('sctJ_FLG') self.assertTrue(sctJ_FLG.multi_system) abc = m1.get_gene('abc') self.assertFalse(abc.multi_system) sctN = m1.get_gene('sctN') sctN_FLG = m1.get_gene('sctN_FLG') self.assertFalse(sctN_FLG.loner) self.assertTrue(sctN.loner) self.assertIsNone(sctN_FLG.inter_gene_max_space) self.assertEqual(sctN.inter_gene_max_space, 10) self.assertFalse(sctN_FLG.multi_model) self.assertFalse(sctN.multi_model) gspD = m1.get_gene('gspD') self.assertFalse(sctN_FLG.multi_system) self.assertTrue(gspD.multi_model) self.assertTrue(gspD.multi_system) def test_model_w_unkown_attr(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/model_w_unknown_attribute') ] with self.assertRaises(MacsypyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "unable to parse model definition 'foo/model_w_unknown_attribute' : " "The model definition model_w_unknown_attribute.xml has an unknow attribute 'multi-loci'. " "Please fix the definition.") def test_gene_w_unkown_attr(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/gene_w_unknown_attribute') ] with self.assertRaises(MacsypyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "unable to parse model definition 'foo/gene_w_unknown_attribute' : " "The model definition gene_w_unknown_attribute.xml has an unknown attribute 'multi-system' for a gene." " Please fix the definition.") def test_wo_presence(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/fail_wo_presence') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition 'foo/fail_wo_presence': gene 'sctN_FLG' without presence" ) def test_invalid_presence(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/fail_invalid_presence') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model 'fail_invalid_presence' definition: presence value must be either: " "'mandatory', 'accessory', 'neutral', 'forbidden' not foo_bar") def test_gene_no_name(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/gene_no_name') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition 'foo/gene_no_name': gene without name") def test_invalid_homolog(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/invalid_homolog') ] with self.assertRaises(MacsypyError) as context: self.parser.parse(model_2_detect) self.assertEqual(str(context.exception), "'foo/foo_bar': No such profile") def test_invalid_homolog_2(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/invalid_homolog_2') ] with self.assertRaises(SyntaxError) as ctx: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(ctx.exception), "Invalid model definition 'foo/invalid_homolog_2': gene without name" ) def test_bad_min_genes_required(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_genes_required') ] with self.assertRaises(ModelInconsistencyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), 'model \'bad_min_genes_required\' is not consistent: min_genes_required 16 must be lesser ' 'or equal than the number of "accessory" and "mandatory" components in the model: 6' ) def test_bad_min_genes_required_2(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_genes_required_2') ] with self.catch_log(): with self.assertRaisesRegex( SyntaxError, "Invalid model definition (.*): " "min_genes_required must be an integer: 16.5"): self.parser.parse(model_2_detect) def test_bad_min_mandatory_genes_required(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_mandatory_genes_required') ] with self.catch_log(): with self.assertRaises(ModelInconsistencyError) as context: self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), 'model \'bad_min_mandatory_genes_required\' is not consistent: min_genes_required 16 must ' 'be lesser or equal than the number of "accessory" and "mandatory" components in the model: 6' ) def test_bad_min_mandatory_genes_required_2(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_mandatory_genes_required_2') ] with self.assertRaises(ModelInconsistencyError) as context: with self.catch_log(): # error raised by System initialization # which occur before check_consistency # the last test : not(model.min_mandatory_genes_required <= model.min_genes_required) # seems to be useless self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "foo/bad_min_mandatory_genes_required_2: min_genes_required '6' must be greater or equal" " than min_mandatory_genes_required '8'") def test_bad_min_mandatory_genes_required_4(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_mandatory_genes_required_4') ] with self.assertRaisesRegex( SyntaxError, "Invalid model definition (.*): " "min_mandatory_genes_required must be an integer: 12.5"): with self.catch_log(): self.parser.parse(model_2_detect) def test_min_mandatory_genes_required_lesser_than_mandatory_genes(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_mandatory_genes_required_3') ] with self.assertRaises(ModelInconsistencyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "model 'bad_min_mandatory_genes_required_3' is not consistent:" " 'min_mandatory_genes_required': 6 must be lesser or equal than the number of 'mandatory' " "components in the model: 5") def test_only_one_accessory(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/only_one_accessory') ] with self.assertRaises(ModelInconsistencyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual(str(context.exception), f"model 'only_one_accessory' is not consistent: there is only one gene in your model. " \ f"So its status should be 'mandatory'.") def test_bad_max_nb_genes(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/bad_max_nb_genes') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) model_name, def_name = model_2_detect[0].split_fqn( model_2_detect[0].fqn) self.assertEqual( str(context.exception), "Invalid model definition ({0}.xml): max_nb_genes must be an integer: HOHOHO" .format( os.path.join(self.cfg.models_dir()[0], model_name, 'definitions', def_name))) def test_bad_inter_gene_max_space(self): fqn = 'foo/bad_inter_gene_max_space' model_family, model_name = fqn.split('/') model_2_detect = [self.model_registry['foo'].get_definition(fqn)] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition ({}): inter_gene_max_space must be an integer: 12.5" .format( os.path.join(self.cfg.models_dir()[0], model_family, 'definitions', model_name + ".xml"))) def test_no_inter_gene_max_space(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/no_inter_gene_max_space') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition ({}): inter_gene_max_space must be defined" .format( os.path.join(self.cfg.models_dir()[0], "foo/definitions/no_inter_gene_max_space.xml"))) def test_loner(self): model_fqn = 'foo/model_5' model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)] self.parser.parse(model_2_detect) m5 = self.model_bank[model_fqn] m5_flgC = m5.get_gene('flgC') self.assertFalse(m5_flgC.loner) m5_tadZ = m5.get_gene('tadZ') self.assertTrue(m5_tadZ.loner) model_fqn = 'foo/model_6' model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)] self.parser.parse(model_2_detect) m6 = self.model_bank[model_fqn] m6_flgC = m6.get_gene('flgC') self.assertFalse(m6_flgC.loner) m6_tadZ = m6.get_gene('tadZ') self.assertFalse(m6_tadZ.loner) def test_multi_system(self): model_fqn = 'foo/model_5' model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)] self.parser.parse(model_2_detect) m = self.model_bank[model_fqn] flgC = m.get_gene('flgC') self.assertFalse(flgC.multi_system) fliE = m.get_gene('fliE') self.assertTrue(fliE.multi_system) def test_multi_model(self): model_fqn = 'foo/model_5' model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)] self.parser.parse(model_2_detect) m = self.model_bank[model_fqn] flgC = m.get_gene('flgC') self.assertFalse(flgC.multi_model) abc = m.get_gene('abc') self.assertTrue(abc.multi_model) def test_gene_inter_gene_max_space(self): model_fqn = ['foo/model_5', 'foo/model_6'] models_2_detect = [ self.model_registry['foo'].get_definition(fqn) for fqn in model_fqn ] self.parser.parse(models_2_detect) m5 = self.model_bank['foo/model_5'] self.assertEqual(m5.name, 'model_5') self.assertEqual(m5.fqn, 'foo/model_5') self.assertEqual(m5.inter_gene_max_space, 20) m5_flgB = m5.get_gene('flgB') m5_flgC = m5.get_gene('flgC') self.assertIsNone(m5_flgB.inter_gene_max_space) self.assertEqual(m5_flgC.inter_gene_max_space, 2) m6 = self.model_bank['foo/model_6'] m6_flgC = m6.get_gene('flgC') self.assertEqual(m6_flgC.inter_gene_max_space, 12) def test_inter_gene_max_space_cfg(self): # test inter_gene_max_space is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' inter_gene_max_space_cfg = [[model_fqn, '222']] self.args.inter_gene_max_space = inter_gene_max_space_cfg self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.inter_gene_max_space, 222) def test_min_mandatory_genes_required_cfg(self): # test min_mandatory_genes_required is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' min_mandatory_genes_required = [[model_fqn, '3']] self.args.min_mandatory_genes_required = min_mandatory_genes_required self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.min_mandatory_genes_required, 3) def test_min_genes_required_cfg(self): # test min_genes_required is specified from configuration # so this value must overload the value read from xml def_2_parse = set() model_fqn = 'foo/model_5' def_2_parse.add(model_fqn) parsed = set() min_genes_required = [[model_fqn, '4']] self.args.min_genes_required = min_genes_required self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.min_genes_required, 4) def test_max_nb_genes_cfg(self): self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) # max_nb_genes is specified in xml # no user configuration on this self.cfg = Config(MacsyDefaults(), self.args) model_fqn = 'foo/model_6' # 4 genes in this model but xml specify 3 self.cfg = Config(MacsyDefaults(), self.args) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.max_nb_genes, 3) # max_nb_genes is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' # 4 genes in this model max_nb_genes = [[model_fqn, '6']] self.args.max_nb_genes = max_nb_genes self.cfg = Config(MacsyDefaults(), self.args) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.max_nb_genes, 6) def test_multi_loci_cfg(self): # test multi_loci is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' self.args.multi_loci = model_fqn self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertTrue(m.multi_loci) def test_bad_gene_inter_gene_max_space_2(self): model_fqn = 'foo/bad_inter_gene_max_space_2' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.assertRaises(SyntaxError) as ctx: with self.catch_log(): self.parser.parse(models_2_detect) self.assertEqual( str(ctx.exception), "Invalid model definition 'bad_inter_gene_max_space_2': " "inter_gene_max_space must be an integer: 2.5") def test_bad_exchangeable_inter_gene_max_space(self): fqn = 'foo/bad_exchangeable_inter_gene_max_space' model_2_detect = [self.model_registry['foo'].get_definition(fqn)] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition 'bad_exchangeable_inter_gene_max_space': " "inter_gene_max_space must be an integer: 1.5") def test_parse_model_old_syntax(self): # the attribute vers is not set model_fqn = 'foo/model_old_1' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, "unable to parse model definition 'foo/model_old_1' : " "The model definition model_old_1.xml is not versioned. Please update your model." ) # the root is system instead of mmodel model_fqn = 'foo/model_old_2' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, f"unable to parse model definition '{model_fqn}' : " "The model definition model_old_2.xml is obsolete. Please update your model." ) # there still system_ref attribute model_fqn = 'foo/model_old_3' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, f"unable to parse model definition '{model_fqn}' : " "The model definition model_old_3.xml is obsolete. Please update your model." ) # there still homologs tag model_fqn = 'foo/model_old_4' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, f"unable to parse model definition '{model_fqn}' : " "The model definition model_old_4.xml is obsolete. Please update your model." ) # there still analogs tag model_fqn = 'foo/model_old_5' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, f"unable to parse model definition '{model_fqn}' : " "The model definition model_old_5.xml is obsolete. Please update your model." )
def search_systems(config, model_bank, gene_bank, profile_factory, logger): """ Do the job, this function is the orchestrator of all the macsyfinder mechanics at the end several files are produced containing the results - macsyfinder.conf: The set of variables used to runt this job - macsyfinder.systems: The list of the potential systems - macsyfinder.rejected_cluster: The list of all clusters and clustrs combination which has been rejected and the reason - macsyfinder.log: the copy of the standard output :param config: The MacSyFinder Configuration :type config: :class:`macsypy.config.Config` object :param model_bank: The bank populated with the available models :type model_bank: :class:`macsypy.model.ModelBank` object :param gene_bank: the bank containing all genes :type gene_bank: :class:`macsypy.gene.GeneBank` object :param profile_factory: The profile factory :type profile_factory: :class:`macsypy.gene.ProfileFactory` :param logger: The logger use to display information to the user. It must be initialized. see :func:`macsypy.init_logger` :type logger: :class:`colorlog.Logger` object :return: the systems and rejected clusters found :rtype: ([:class:`macsypy.system.System`, ...], [:class:`macsypy.cluster.RejectedCluster`, ...]) """ working_dir = config.working_dir() config.save(path_or_buf=os.path.join(working_dir, config.cfg_name)) registry = ModelRegistry() models_loc_available = scan_models_dir( config.models_dir(), profile_suffix=config.profile_suffix(), relative_path=config.relative_path()) for model_loc in models_loc_available: registry.add(model_loc) # build indexes idx = Indexes(config) idx.build(force=config.idx) # create models parser = DefinitionParser(config, model_bank, gene_bank, registry, profile_factory) try: models_def_to_detect = get_def_to_detect(config.models(), registry) except KeyError as err: sys.exit(f"macsyfinder: {err}") parser.parse(models_def_to_detect) logger.info( f"MacSyFinder's results will be stored in working_dir{working_dir}") logger.info(f"Analysis launched on {config.sequence_db()} for model(s):") for m in models_def_to_detect: logger.info(f"\t- {m.fqn}") models_to_detect = [ model_bank[model_loc.fqn] for model_loc in models_def_to_detect ] all_genes = [] for model in models_to_detect: genes = model.mandatory_genes + model.accessory_genes + model.neutral_genes + model.forbidden_genes # Exchangeable (formerly homologs/analogs) are also added because they can "replace" an important gene... ex_genes = [] for g in genes: ex_genes += g.exchangeables all_genes += (genes + ex_genes) ############################################# # this part of code is executed in parallel ############################################# try: all_reports = search_genes(all_genes, config) except Exception as err: sys.exit(str(err)) ############################################# # end of parallel code ############################################# all_hits = [ hit for subl in [report.hits for report in all_reports] for hit in subl ] if len(all_hits) > 0: # It's important to keep this sorting to have in last all_hits version # the hits with the same replicon_name and position sorted by score # the best score in first hits_by_replicon = {} for hit in all_hits: if hit.replicon_name in hits_by_replicon: hits_by_replicon[hit.replicon_name].append(hit) else: hits_by_replicon[hit.replicon_name] = [hit] for rep_name in hits_by_replicon: hits_by_replicon[rep_name] = get_best_hits( hits_by_replicon[rep_name], key='score') hits_by_replicon[rep_name].sort(key=attrgetter('position')) models_to_detect = sorted(models_to_detect, key=attrgetter('name')) db_type = config.db_type() if db_type in ('ordered_replicon', 'gembase'): systems, rejected_clusters = _search_in_ordered_replicon( hits_by_replicon, models_to_detect, config, logger) return systems, rejected_clusters elif db_type == "unordered": likely_systems, rejected_hits = _search_in_unordered_replicon( hits_by_replicon, models_to_detect, logger) return likely_systems, rejected_hits else: assert False, f"dbtype have an invalid value {db_type}" else: # No hits detected return [], []