Esempio n. 1
0
    def test_get_def_to_detect(self):
        cmd_args = argparse.Namespace()
        cmd_args.models_dir = os.path.join(self._data_dir, 'fake_model_dir')
        cmd_args.models = ('set_1', 'def_1_1', 'def_1_2', 'def_1_3')
        registry = ModelRegistry()
        models_location = scan_models_dir(cmd_args.models_dir)
        for ml in models_location:
            registry.add(ml)

        # case where models are specified on command line
        res, model_family, model_vers = get_def_to_detect(
            ('set_1', ['def_1_1', 'def_1_2', 'def_1_3']), registry)
        model_loc = registry['set_1']
        self.assertEqual(model_family, 'set_1')
        self.assertEqual(model_vers, '0.0b2')
        exp = [
            model_loc.get_definition(name)
            for name in ('set_1/def_1_1', 'set_1/def_1_2', 'set_1/def_1_3')
        ]
        self.assertListEqual(res, exp)

        # case we search all models
        res, model_family, model_vers = get_def_to_detect(('set_1', ['all']),
                                                          registry)
        self.assertEqual(model_family, 'set_1')
        self.assertEqual(model_vers, '0.0b2')
        exp = model_loc.get_all_definitions()
        self.assertListEqual(res, exp)

        # case the models required does not exists
        with self.assertRaises(ValueError):
            get_def_to_detect(('set_1', ['FOO', 'BAR']), registry)
Esempio n. 2
0
 def test_models(self):
     mr = ModelRegistry()
     model_complex_expected = ModelLocation(path=self.complex_dir)
     model_simple_expected = ModelLocation(path=self.simple_dir)
     mr.add(model_complex_expected)
     mr.add(model_simple_expected)
     models_received = sorted(mr.models())
     models_expected = sorted(
         [model_complex_expected, model_simple_expected])
     self.assertListEqual(models_received, models_expected)
Esempio n. 3
0
    def test_add_get(self):
        mr = ModelRegistry()
        model_complex_expected = ModelLocation(path=self.complex_dir)
        with self.assertRaises(KeyError) as ctx:
            mr[model_complex_expected.name]

        self.assertEqual(str(ctx.exception),
                         '"No such model definition: \'complex\'"')
        mr.add(model_complex_expected)
        self.assertEqual(model_complex_expected,
                         mr[model_complex_expected.name])
Esempio n. 4
0
def list_models(args):
    """
    :param args: The command line argument once parsed
    :type args: :class:`argparse.Namespace` object
    :return: a string representation of all models and submodels installed.
    :rtype: str
    """
    config = Config(MacsyDefaults(), args)
    registry = ModelRegistry()
    models_loc_available = scan_models_dir(
        config.models_dir(),
        profile_suffix=config.profile_suffix(),
        relative_path=config.relative_path())
    for model_loc in models_loc_available:
        registry.add(model_loc)
    return str(registry)
Esempio n. 5
0
def _find_all_installed_packages() -> ModelRegistry:
    """
    :return: all models installed
    """
    defaults = MacsyDefaults()
    config = Config(defaults, argparse.Namespace())
    system_model_dir = config.models_dir()
    user_model_dir = os.path.join(os.path.expanduser('~'), '.macsyfinder', 'data')
    model_dirs = (system_model_dir, user_model_dir) if os.path.exists(user_model_dir) else (system_model_dir,)
    registry = ModelRegistry()
    for model_dir in model_dirs:
        try:
            for model_loc in scan_models_dir(model_dir, profile_suffix=config.profile_suffix):
                registry.add(model_loc)
        except PermissionError as err:
            _log.warning(f"{model_dir} is not readable: {err} : skip it.")
    return registry
Esempio n. 6
0
    def test_str(self):
        mr = ModelRegistry()
        model_complex_expected = ModelLocation(path=self.complex_dir)
        model_simple_expected = ModelLocation(path=self.simple_dir)
        mr.add(model_complex_expected)
        mr.add(model_simple_expected)
        expected_output = """complex
        /subdef_1
                 /def_1_1
                 /def_1_2
        /subdef_2
                 /def_2_1
                 /def_2_2
simple
       /def_1
       /def_2
"""
        self.assertEqual(expected_output, str(mr))
Esempio n. 7
0
def _find_all_installed_packages(models_dir=None) -> ModelRegistry:
    """
    :return: all models installed
    """
    defaults = MacsyDefaults()
    args = argparse.Namespace()
    if models_dir is not None:
        args.models_dir = models_dir
    config = Config(defaults, args)
    model_dirs = config.models_dir()
    registry = ModelRegistry()
    for model_dir in model_dirs:
        try:
            for model_loc in scan_models_dir(
                    model_dir, profile_suffix=config.profile_suffix()):
                registry.add(model_loc)
        except PermissionError as err:
            _log.warning(f"{model_dir} is not readable: {err} : skip it.")
    return registry
class TestModelParser(MacsyTest):
    def setUp(self):
        defaults = MacsyDefaults()
        self.args = argparse.Namespace()
        self.args.sequence_db = self.find_data("base", "test_1.fasta")
        self.args.db_type = 'gembase'
        self.args.models_dir = self.find_data('models')
        self.args.res_search_dir = tempfile.gettempdir()

        self.cfg = Config(defaults, self.args)
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.profile_factory = ProfileFactory(self.cfg)
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except:
            pass

    def test_parse_with_exchangeable(self):
        model_name = 'model_1'
        model_family = 'foo'
        fqn = f"{model_family}/{model_name}"
        #def_2_parse = set()
        #def_2_parse.add(fqn)
        models_2_detect = [self.model_registry['foo'].get_definition(fqn)]
        self.parser.parse(models_2_detect)
        self.assertEqual(len(self.model_bank), 1)

        m1 = self.model_bank[fqn]
        self.assertEqual(m1.name, model_name)
        self.assertEqual(m1.fqn, fqn)
        self.assertEqual(m1.inter_gene_max_space, 20)
        self.assertEqual(m1.min_mandatory_genes_required, 2)
        self.assertEqual(m1.min_genes_required, 4)
        self.assertTrue(m1.multi_loci)

        self.assertEqual(len(m1.mandatory_genes), 2)
        mandatory_genes_name = sorted([g.name for g in m1.mandatory_genes])
        theoric_list = sorted(["sctJ_FLG", "sctN_FLG"])
        self.assertListEqual(mandatory_genes_name, theoric_list)

        self.assertEqual(len(m1.accessory_genes), 2)
        accessory_genes_name = sorted([g.name for g in m1.accessory_genes])
        theoric_list = sorted(["flgB", "flgC"])
        self.assertListEqual(accessory_genes_name, theoric_list)

        self.assertEqual(len(m1.neutral_genes), 2)
        neutral_genes_name = sorted([g.name for g in m1.neutral_genes])
        theoric_list = sorted(["fliE", "tadZ"])
        self.assertListEqual(neutral_genes_name, theoric_list)

        self.assertEqual(len(m1.forbidden_genes), 1)
        forbidden_genes_name = sorted([g.name for g in m1.forbidden_genes])
        theoric_list = sorted(["sctC"])
        self.assertListEqual(forbidden_genes_name, theoric_list)

        sctJ_FLG = m1.get_gene('sctJ_FLG')
        sctJ_FLG_exchangeables = sctJ_FLG.exchangeables
        self.assertEqual(len(sctJ_FLG_exchangeables), 2)
        self.assertEqual(sctJ_FLG_exchangeables[0].name, 'sctJ')
        self.assertEqual(sctJ_FLG_exchangeables[1].name, 'abc')
        self.assertTrue(isinstance(sctJ_FLG_exchangeables[0], Exchangeable))
        self.assertTrue(isinstance(sctJ_FLG_exchangeables[0]._gene, CoreGene))
        self.assertTrue(
            isinstance(sctJ_FLG_exchangeables[0].alternate_of(), ModelGene))
        self.assertTrue(sctJ_FLG_exchangeables[0].loner)
        self.assertFalse(sctJ_FLG.is_exchangeable)
        sctJ = m1.get_gene('sctJ')
        self.assertTrue(sctJ.is_exchangeable)

    def test_exchangeable_inheritance(self):
        def_2_parse = set()
        def_2_parse.add('foo/model_1')
        models_2_detect = [
            self.model_registry['foo'].get_definition('foo/model_1')
        ]
        self.parser.parse(models_2_detect)
        m1 = self.model_bank['foo/model_1']

        sctJ = m1.get_gene('sctJ')
        self.assertTrue(sctJ.is_exchangeable)
        self.assertTrue(sctJ.loner)
        self.assertTrue(sctJ.multi_system)
        self.assertFalse(sctJ.multi_model)
        sctJ_FLG = m1.get_gene('sctJ_FLG')
        self.assertTrue(sctJ_FLG.multi_system)
        abc = m1.get_gene('abc')
        self.assertFalse(abc.multi_system)

        sctN = m1.get_gene('sctN')
        sctN_FLG = m1.get_gene('sctN_FLG')

        self.assertFalse(sctN_FLG.loner)
        self.assertTrue(sctN.loner)
        self.assertIsNone(sctN_FLG.inter_gene_max_space)
        self.assertEqual(sctN.inter_gene_max_space, 10)
        self.assertFalse(sctN_FLG.multi_model)
        self.assertFalse(sctN.multi_model)
        gspD = m1.get_gene('gspD')
        self.assertFalse(sctN_FLG.multi_system)
        self.assertTrue(gspD.multi_model)
        self.assertTrue(gspD.multi_system)

    def test_model_w_unkown_attr(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/model_w_unknown_attribute')
        ]
        with self.assertRaises(MacsypyError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "unable to parse model definition 'foo/model_w_unknown_attribute' : "
            "The model definition model_w_unknown_attribute.xml has an unknow attribute 'multi-loci'. "
            "Please fix the definition.")

    def test_gene_w_unkown_attr(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/gene_w_unknown_attribute')
        ]
        with self.assertRaises(MacsypyError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "unable to parse model definition 'foo/gene_w_unknown_attribute' : "
            "The model definition gene_w_unknown_attribute.xml has an unknown attribute 'multi-system' for a gene."
            " Please fix the definition.")

    def test_wo_presence(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition('foo/fail_wo_presence')
        ]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "Invalid model definition 'foo/fail_wo_presence': gene 'sctN_FLG' without presence"
        )

    def test_invalid_presence(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/fail_invalid_presence')
        ]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "Invalid model 'fail_invalid_presence' definition: presence value must be either: "
            "'mandatory', 'accessory', 'neutral', 'forbidden' not foo_bar")

    def test_gene_no_name(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition('foo/gene_no_name')
        ]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "Invalid model definition 'foo/gene_no_name': gene without name")

    def test_invalid_homolog(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition('foo/invalid_homolog')
        ]
        with self.assertRaises(MacsypyError) as context:
            self.parser.parse(model_2_detect)
        self.assertEqual(str(context.exception),
                         "'foo/foo_bar': No such profile")

    def test_invalid_homolog_2(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition('foo/invalid_homolog_2')
        ]
        with self.assertRaises(SyntaxError) as ctx:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(ctx.exception),
            "Invalid model definition 'foo/invalid_homolog_2': gene without name"
        )

    def test_bad_min_genes_required(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/bad_min_genes_required')
        ]
        with self.assertRaises(ModelInconsistencyError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            'model \'bad_min_genes_required\' is not consistent: min_genes_required 16 must be lesser '
            'or equal than the number of "accessory" and "mandatory" components in the model: 6'
        )

    def test_bad_min_genes_required_2(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/bad_min_genes_required_2')
        ]
        with self.catch_log():
            with self.assertRaisesRegex(
                    SyntaxError, "Invalid model definition (.*): "
                    "min_genes_required must be an integer: 16.5"):
                self.parser.parse(model_2_detect)

    def test_bad_min_mandatory_genes_required(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/bad_min_mandatory_genes_required')
        ]
        with self.catch_log():
            with self.assertRaises(ModelInconsistencyError) as context:
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            'model \'bad_min_mandatory_genes_required\' is not consistent: min_genes_required 16 must '
            'be lesser or equal than the number of "accessory" and "mandatory" components in the model: 6'
        )

    def test_bad_min_mandatory_genes_required_2(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/bad_min_mandatory_genes_required_2')
        ]
        with self.assertRaises(ModelInconsistencyError) as context:
            with self.catch_log():
                # error raised by System initialization
                # which occur before check_consistency
                # the last test : not(model.min_mandatory_genes_required <= model.min_genes_required)
                # seems to be useless
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "foo/bad_min_mandatory_genes_required_2: min_genes_required '6' must be greater or equal"
            " than min_mandatory_genes_required '8'")

    def test_bad_min_mandatory_genes_required_4(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/bad_min_mandatory_genes_required_4')
        ]
        with self.assertRaisesRegex(
                SyntaxError, "Invalid model definition (.*): "
                "min_mandatory_genes_required must be an integer: 12.5"):
            with self.catch_log():
                self.parser.parse(model_2_detect)

    def test_min_mandatory_genes_required_lesser_than_mandatory_genes(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/bad_min_mandatory_genes_required_3')
        ]
        with self.assertRaises(ModelInconsistencyError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "model 'bad_min_mandatory_genes_required_3' is not consistent:"
            " 'min_mandatory_genes_required': 6 must be lesser or equal than the number of 'mandatory' "
            "components in the model: 5")

    def test_only_one_accessory(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition('foo/only_one_accessory')
        ]
        with self.assertRaises(ModelInconsistencyError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(str(context.exception),
                         f"model 'only_one_accessory' is not consistent: there is only one gene in your model. " \
                         f"So its status should be 'mandatory'.")

    def test_bad_max_nb_genes(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition('foo/bad_max_nb_genes')
        ]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        model_name, def_name = model_2_detect[0].split_fqn(
            model_2_detect[0].fqn)
        self.assertEqual(
            str(context.exception),
            "Invalid model definition ({0}.xml): max_nb_genes must be an integer: HOHOHO"
            .format(
                os.path.join(self.cfg.models_dir()[0], model_name,
                             'definitions', def_name)))

    def test_bad_inter_gene_max_space(self):
        fqn = 'foo/bad_inter_gene_max_space'
        model_family, model_name = fqn.split('/')
        model_2_detect = [self.model_registry['foo'].get_definition(fqn)]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "Invalid model definition ({}): inter_gene_max_space must be an integer: 12.5"
            .format(
                os.path.join(self.cfg.models_dir()[0], model_family,
                             'definitions', model_name + ".xml")))

    def test_no_inter_gene_max_space(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/no_inter_gene_max_space')
        ]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)

        self.assertEqual(
            str(context.exception),
            "Invalid model definition ({}): inter_gene_max_space must be defined"
            .format(
                os.path.join(self.cfg.models_dir()[0],
                             "foo/definitions/no_inter_gene_max_space.xml")))

    def test_loner(self):
        model_fqn = 'foo/model_5'
        model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)]
        self.parser.parse(model_2_detect)

        m5 = self.model_bank[model_fqn]
        m5_flgC = m5.get_gene('flgC')
        self.assertFalse(m5_flgC.loner)
        m5_tadZ = m5.get_gene('tadZ')
        self.assertTrue(m5_tadZ.loner)

        model_fqn = 'foo/model_6'
        model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)]
        self.parser.parse(model_2_detect)
        m6 = self.model_bank[model_fqn]
        m6_flgC = m6.get_gene('flgC')
        self.assertFalse(m6_flgC.loner)
        m6_tadZ = m6.get_gene('tadZ')
        self.assertFalse(m6_tadZ.loner)

    def test_multi_system(self):
        model_fqn = 'foo/model_5'
        model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)]
        self.parser.parse(model_2_detect)

        m = self.model_bank[model_fqn]
        flgC = m.get_gene('flgC')
        self.assertFalse(flgC.multi_system)
        fliE = m.get_gene('fliE')
        self.assertTrue(fliE.multi_system)

    def test_multi_model(self):
        model_fqn = 'foo/model_5'
        model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)]
        self.parser.parse(model_2_detect)

        m = self.model_bank[model_fqn]
        flgC = m.get_gene('flgC')
        self.assertFalse(flgC.multi_model)
        abc = m.get_gene('abc')
        self.assertTrue(abc.multi_model)

    def test_gene_inter_gene_max_space(self):
        model_fqn = ['foo/model_5', 'foo/model_6']
        models_2_detect = [
            self.model_registry['foo'].get_definition(fqn) for fqn in model_fqn
        ]
        self.parser.parse(models_2_detect)

        m5 = self.model_bank['foo/model_5']
        self.assertEqual(m5.name, 'model_5')
        self.assertEqual(m5.fqn, 'foo/model_5')
        self.assertEqual(m5.inter_gene_max_space, 20)
        m5_flgB = m5.get_gene('flgB')
        m5_flgC = m5.get_gene('flgC')
        self.assertIsNone(m5_flgB.inter_gene_max_space)
        self.assertEqual(m5_flgC.inter_gene_max_space, 2)
        m6 = self.model_bank['foo/model_6']
        m6_flgC = m6.get_gene('flgC')
        self.assertEqual(m6_flgC.inter_gene_max_space, 12)

    def test_inter_gene_max_space_cfg(self):
        # test inter_gene_max_space is specified from configuration
        # so this value must overload the value read from xml
        model_fqn = 'foo/model_5'

        inter_gene_max_space_cfg = [[model_fqn, '222']]
        self.args.inter_gene_max_space = inter_gene_max_space_cfg

        self.cfg = Config(MacsyDefaults(), self.args)
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertEqual(m.inter_gene_max_space, 222)

    def test_min_mandatory_genes_required_cfg(self):
        # test min_mandatory_genes_required is specified from configuration
        # so this value must overload the value read from xml
        model_fqn = 'foo/model_5'

        min_mandatory_genes_required = [[model_fqn, '3']]
        self.args.min_mandatory_genes_required = min_mandatory_genes_required

        self.cfg = Config(MacsyDefaults(), self.args)
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertEqual(m.min_mandatory_genes_required, 3)

    def test_min_genes_required_cfg(self):
        # test min_genes_required is specified from configuration
        # so this value must overload the value read from xml
        def_2_parse = set()
        model_fqn = 'foo/model_5'
        def_2_parse.add(model_fqn)
        parsed = set()

        min_genes_required = [[model_fqn, '4']]
        self.args.min_genes_required = min_genes_required

        self.cfg = Config(MacsyDefaults(), self.args)
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertEqual(m.min_genes_required, 4)

    def test_max_nb_genes_cfg(self):
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)

        # max_nb_genes is specified in xml
        # no user configuration on this
        self.cfg = Config(MacsyDefaults(), self.args)
        model_fqn = 'foo/model_6'  # 4 genes in this model but xml specify 3
        self.cfg = Config(MacsyDefaults(), self.args)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertEqual(m.max_nb_genes, 3)

        # max_nb_genes is specified from configuration
        # so this value must overload the value read from xml
        model_fqn = 'foo/model_5'  # 4 genes in this model
        max_nb_genes = [[model_fqn, '6']]
        self.args.max_nb_genes = max_nb_genes
        self.cfg = Config(MacsyDefaults(), self.args)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertEqual(m.max_nb_genes, 6)

    def test_multi_loci_cfg(self):
        # test multi_loci is specified from configuration
        # so this value must overload the value read from xml
        model_fqn = 'foo/model_5'

        self.args.multi_loci = model_fqn

        self.cfg = Config(MacsyDefaults(), self.args)
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertTrue(m.multi_loci)

    def test_bad_gene_inter_gene_max_space_2(self):
        model_fqn = 'foo/bad_inter_gene_max_space_2'
        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        with self.assertRaises(SyntaxError) as ctx:
            with self.catch_log():
                self.parser.parse(models_2_detect)

        self.assertEqual(
            str(ctx.exception),
            "Invalid model definition 'bad_inter_gene_max_space_2': "
            "inter_gene_max_space must be an integer: 2.5")

    def test_bad_exchangeable_inter_gene_max_space(self):
        fqn = 'foo/bad_exchangeable_inter_gene_max_space'
        model_2_detect = [self.model_registry['foo'].get_definition(fqn)]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "Invalid model definition 'bad_exchangeable_inter_gene_max_space': "
            "inter_gene_max_space must be an integer: 1.5")

    def test_parse_model_old_syntax(self):
        # the attribute vers is not set
        model_fqn = 'foo/model_old_1'
        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        with self.catch_log(log_name='macsypy') as log:
            with self.assertRaises(MacsypyError) as ctx:
                self.parser.parse(models_2_detect)
            log_msg = log.get_value().strip()
        self.assertEqual(
            log_msg, "unable to parse model definition 'foo/model_old_1' : "
            "The model definition model_old_1.xml is not versioned. Please update your model."
        )

        # the root is system instead of mmodel
        model_fqn = 'foo/model_old_2'
        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        with self.catch_log(log_name='macsypy') as log:
            with self.assertRaises(MacsypyError) as ctx:
                self.parser.parse(models_2_detect)
            log_msg = log.get_value().strip()
        self.assertEqual(
            log_msg, f"unable to parse model definition '{model_fqn}' : "
            "The model definition model_old_2.xml is obsolete. Please update your model."
        )

        # there still system_ref attribute
        model_fqn = 'foo/model_old_3'
        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        with self.catch_log(log_name='macsypy') as log:
            with self.assertRaises(MacsypyError) as ctx:
                self.parser.parse(models_2_detect)
            log_msg = log.get_value().strip()
        self.assertEqual(
            log_msg, f"unable to parse model definition '{model_fqn}' : "
            "The model definition model_old_3.xml is obsolete. Please update your model."
        )

        # there still homologs tag
        model_fqn = 'foo/model_old_4'
        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        with self.catch_log(log_name='macsypy') as log:
            with self.assertRaises(MacsypyError) as ctx:
                self.parser.parse(models_2_detect)
            log_msg = log.get_value().strip()
        self.assertEqual(
            log_msg, f"unable to parse model definition '{model_fqn}' : "
            "The model definition model_old_4.xml is obsolete. Please update your model."
        )

        # there still analogs tag
        model_fqn = 'foo/model_old_5'
        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        with self.catch_log(log_name='macsypy') as log:
            with self.assertRaises(MacsypyError) as ctx:
                self.parser.parse(models_2_detect)
            log_msg = log.get_value().strip()
        self.assertEqual(
            log_msg, f"unable to parse model definition '{model_fqn}' : "
            "The model definition model_old_5.xml is obsolete. Please update your model."
        )
Esempio n. 9
0
def search_systems(config, model_bank, gene_bank, profile_factory, logger):
    """
    Do the job, this function is the orchestrator of all the macsyfinder mechanics
    at the end several files are produced containing the results

      - macsyfinder.conf: The set of variables used to runt this job
      - macsyfinder.systems: The list of the potential systems
      - macsyfinder.rejected_cluster: The list of all clusters and clustrs combination
                                      which has been rejected and the reason
      - macsyfinder.log: the copy of the standard output

    :param config: The MacSyFinder Configuration
    :type config: :class:`macsypy.config.Config` object
    :param model_bank: The bank populated with the available models
    :type model_bank: :class:`macsypy.model.ModelBank` object
    :param gene_bank: the bank containing all genes
    :type gene_bank: :class:`macsypy.gene.GeneBank` object
    :param profile_factory: The profile factory
    :type profile_factory: :class:`macsypy.gene.ProfileFactory`
    :param logger: The logger use to display information to the user.
                   It must be initialized. see :func:`macsypy.init_logger`
    :type logger: :class:`colorlog.Logger` object
    :return: the systems and rejected clusters found
    :rtype: ([:class:`macsypy.system.System`, ...], [:class:`macsypy.cluster.RejectedCluster`, ...])
    """
    working_dir = config.working_dir()
    config.save(path_or_buf=os.path.join(working_dir, config.cfg_name))
    registry = ModelRegistry()
    models_loc_available = scan_models_dir(
        config.models_dir(),
        profile_suffix=config.profile_suffix(),
        relative_path=config.relative_path())
    for model_loc in models_loc_available:
        registry.add(model_loc)
    # build indexes
    idx = Indexes(config)
    idx.build(force=config.idx)

    # create models
    parser = DefinitionParser(config, model_bank, gene_bank, registry,
                              profile_factory)
    try:
        models_def_to_detect = get_def_to_detect(config.models(), registry)
    except KeyError as err:
        sys.exit(f"macsyfinder: {err}")

    parser.parse(models_def_to_detect)

    logger.info(
        f"MacSyFinder's results will be stored in working_dir{working_dir}")
    logger.info(f"Analysis launched on {config.sequence_db()} for model(s):")

    for m in models_def_to_detect:
        logger.info(f"\t- {m.fqn}")

    models_to_detect = [
        model_bank[model_loc.fqn] for model_loc in models_def_to_detect
    ]
    all_genes = []
    for model in models_to_detect:
        genes = model.mandatory_genes + model.accessory_genes + model.neutral_genes + model.forbidden_genes
        # Exchangeable (formerly homologs/analogs) are also added because they can "replace" an important gene...
        ex_genes = []

        for g in genes:
            ex_genes += g.exchangeables
        all_genes += (genes + ex_genes)
    #############################################
    # this part of code is executed in parallel
    #############################################
    try:
        all_reports = search_genes(all_genes, config)
    except Exception as err:
        sys.exit(str(err))
    #############################################
    # end of parallel code
    #############################################
    all_hits = [
        hit for subl in [report.hits for report in all_reports] for hit in subl
    ]

    if len(all_hits) > 0:
        # It's important to keep this sorting to have in last all_hits version
        # the hits with the same replicon_name and position sorted by score
        # the best score in first
        hits_by_replicon = {}
        for hit in all_hits:
            if hit.replicon_name in hits_by_replicon:
                hits_by_replicon[hit.replicon_name].append(hit)
            else:
                hits_by_replicon[hit.replicon_name] = [hit]

        for rep_name in hits_by_replicon:
            hits_by_replicon[rep_name] = get_best_hits(
                hits_by_replicon[rep_name], key='score')
            hits_by_replicon[rep_name].sort(key=attrgetter('position'))

        models_to_detect = sorted(models_to_detect, key=attrgetter('name'))
        db_type = config.db_type()
        if db_type in ('ordered_replicon', 'gembase'):
            systems, rejected_clusters = _search_in_ordered_replicon(
                hits_by_replicon, models_to_detect, config, logger)
            return systems, rejected_clusters
        elif db_type == "unordered":
            likely_systems, rejected_hits = _search_in_unordered_replicon(
                hits_by_replicon, models_to_detect, logger)
            return likely_systems, rejected_hits
        else:
            assert False, f"dbtype have an invalid value {db_type}"
    else:
        # No hits detected
        return [], []