def test_MacsyDefaults_virtual_env(self): virtual_env = os.environ.get('VIRTUAL_ENV') with tempfile.TemporaryDirectory() as fake_virtual_env: os.environ['VIRTUAL_ENV'] = fake_virtual_env system_models_dir = os.path.join(fake_virtual_env, 'share', 'macsyfinder', 'models') os.makedirs(system_models_dir) self.defaults['system_models_dir'] = [ path for path in (system_models_dir, os.path.join(os.path.expanduser('~'), '.macsyfinder', 'models')) if os.path.exists(path) ] try: defaults = MacsyDefaults() self.maxDiff = None self.assertDictEqual(defaults, self.defaults) new_defaults = {k: v for k, v in self.defaults.items()} new_defaults['previous_run'] = True new_defaults['worker'] = 5 defaults = MacsyDefaults(previous_run=True, worker=5) self.assertDictEqual(defaults, new_defaults) finally: if virtual_env: os.environ['VIRTUAL_ENV'] = virtual_env
def test_MacsyDefaults_no_virtual_env(self): virtual_env = os.environ.get('VIRTUAL_ENV') common_path = os.path.join('share', 'macsyfinder') prefixes = ('/', os.path.join('/', 'usr', 'local')) system_models_dir = [ os.path.join(root, common_path) for root in prefixes ] system_models_dir.append( os.path.join(os.path.expanduser('~'), '.macsyfinder', 'models')) self.defaults['system_models_dir'] = [ path for path in system_models_dir if os.path.exists(path) ] del os.environ['VIRTUAL_ENV'] try: defaults = MacsyDefaults() self.maxDiff = None self.assertDictEqual(defaults, self.defaults) new_defaults = {k: v for k, v in self.defaults.items()} new_defaults['previous_run'] = True new_defaults['worker'] = 5 defaults = MacsyDefaults(previous_run=True, worker=5) self.assertDictEqual(defaults, new_defaults) finally: if virtual_env: os.environ['VIRTUAL_ENV'] = virtual_env
def test_MacsyDefaults(self): defaults = MacsyDefaults() self.assertDictEqual(defaults, self.defaults) new_defaults = {k: v for k, v in self.defaults.items()} new_defaults['previous_run'] = True new_defaults['worker'] = 5 defaults = MacsyDefaults(previous_run=True, worker=5) self.assertDictEqual(defaults, new_defaults)
def setUp(self): self._current_dir = os.getcwd() self.tmp_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_Config') if os.path.exists(self.tmp_dir): shutil.rmtree(self.tmp_dir) os.mkdir(self.tmp_dir) self.defaults = MacsyDefaults() self.parsed_args = Namespace()
def setUp(self): self.tmp_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_search_genes') if os.path.exists(self.tmp_dir): shutil.rmtree(self.tmp_dir) os.mkdir(self.tmp_dir) args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_base.fa") args.db_type = 'gembase' args.models_dir = self.find_data('models') args.log_level = 30 args.out_dir = os.path.join(self.tmp_dir, 'job_1') args.res_search_dir = args.out_dir os.mkdir(args.out_dir) self.cfg = Config(MacsyDefaults(), args) self.model_name = 'foo' self.model_location = ModelLocation( path=os.path.join(args.models_dir, self.model_name)) idx = Indexes(self.cfg) idx._build_my_indexes() self.profile_factory = ProfileFactory(self.cfg)
def test_build_my_indexes(self): args = argparse.Namespace() args.db_type = 'gembase' args.out_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes') if os.path.exists(args.out_dir): shutil.rmtree( os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes')) os.makedirs(args.out_dir) seq_db = self.find_data("base", "test_base_with_errors.fa") shutil.copy(seq_db, args.out_dir) args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db)) self.cfg = Config(MacsyDefaults(), args) idx = Indexes(self.cfg) with self.assertRaises(MacsypyError) as e: # the directory for index exist and is writable but # the sequence file is corrupted and cannot be read correctly with self.catch_log(): idx._build_my_indexes(args.out_dir) self.assertTrue( str(e.exception).startswith( "unable to index the sequence dataset:"))
def test_set_base_options(self): msf_cfg.theme = msf_cfg.Theme() cp = msf_cfg.ConfigParserWithComments() ask_ori = msf_cfg.ask defaults = MacsyDefaults() resp = [ "Yes", # enter section ? "ordered", # db_type "linear", # replicon_topology "None", # sequence_db ] g = (r for r in resp) def fake_ask(*args, **kwargs): return next(g) try: msf_cfg.ask = fake_ask with self.catch_io(out=True): msf_cfg.set_base_options(cp, defaults) stdout = sys.stdout.getvalue().strip() self.assertEqual( stdout, f"{msf_cfg.theme.SECTION}Configuring base options:{msf_cfg.theme.RESET}" ) self.assertTrue(cp.has_section("base")) self.assertEqual(cp.get("base", "db_type"), "ordered") self.assertEqual(cp.get("base", "replicon_topology"), "linear") self.assertEqual(cp.get("base", "sequence_db"), "None") finally: msf_cfg.ask = ask_ori
def test_set_general_options(self): msf_cfg.theme = msf_cfg.Theme() cp = msf_cfg.ConfigParserWithComments() ask_ori = msf_cfg.ask defaults = MacsyDefaults() resp = [ "Yes", # enter section ? 'warning', # log_level 0, # worker True # mute ] g = (r for r in resp) def fake_ask(*args, **kwargs): return next(g) try: msf_cfg.ask = fake_ask with self.catch_io(out=True): msf_cfg.set_general_options(cp, defaults) stdout = sys.stdout.getvalue().strip() self.assertEqual( stdout, f"{msf_cfg.theme.SECTION}Configuring general options:{msf_cfg.theme.RESET}" ) self.assertTrue(cp.has_section("general")) self.assertEqual(cp.get("general", "log_level"), "warning") self.assertEqual(cp.get("general", "worker"), "0") self.assertEqual(cp.get("general", "mute"), "True") finally: msf_cfg.ask = ask_ori
def test_set_section_use_defaults(self): msf_cfg.theme = msf_cfg.Theme() cp = msf_cfg.ConfigParserWithComments() sec_name = "new_section" options = { 'hmmer': { "question": "that is the question", "validator": msf_cfg.check_str, "default": "Yes", "explanation": "" } } defaults = MacsyDefaults(hmmer="Yes") with self.catch_io(out=True): msf_cfg.set_section(sec_name, options, cp, defaults, use_defaults=True) stdout = sys.stdout.getvalue().strip() self.assertTrue(cp.has_section(sec_name)) self.assertFalse(cp.has_option(sec_name, 'hmmer')) self.assertEqual( stdout, f"{msf_cfg.theme.SECTION}Configuring new_section options:{msf_cfg.theme.RESET}" )
def test_fill_gembase_min_max_with_topology(self): self.args.topology_file = self.args.sequence_db + ".topo" with open(self.args.topology_file, 'w') as f: f.write( '# topology file\nESCO030p01 : circular\nPSAE001c01 : linear\n' ) cfg = Config(MacsyDefaults(), self.args) RepliconDB.__init__ = self.fake_init db = RepliconDB(cfg) topo_dict = db._fill_topology() db._fill_gembase_min_max(topo_dict, 'circular') self.assertEqual(len(db._DB), 3) self.assertEqual(set(db._DB.keys()), set(['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx'])) ESCO030p01 = db['ESCO030p01'] self.assertEqual(ESCO030p01.topology, 'circular') self.assertEqual(ESCO030p01.min, 1) self.assertEqual(ESCO030p01.max, 67) self.assertEqual(ESCO030p01.genes, self.ESCO030p01_genes) PSAE001c01 = db['PSAE001c01'] self.assertEqual(PSAE001c01.topology, 'linear') self.assertEqual(PSAE001c01.min, 68) self.assertEqual(PSAE001c01.max, 133) self.assertEqual(PSAE001c01.genes, self.PSAE001c01_genes) DBNC = db['NC_xxxxx_xx'] self.assertEqual(DBNC.topology, 'circular') self.assertEqual(DBNC.min, 134) self.assertEqual(DBNC.max, 141) self.assertEqual(DBNC.genes, self.NCDB_genes)
def test_get_def_to_detect(self): cmd_args = argparse.Namespace() cmd_args.models_dir = os.path.join(self._data_dir, 'fake_model_dir') cmd_args.models = [('set_1', 'def_1_1', 'def_1_2', 'def_1_3')] config = Config( MacsyDefaults( models_dir=os.path.join(self._data_dir, 'fake_model_dir')), cmd_args) registry = ModelRegistry() models_location = scan_models_dir(cmd_args.models_dir) for ml in models_location: registry.add(ml) # case where models are specified on command line res = get_def_to_detect([('set_1', ['def_1_1', 'def_1_2', 'def_1_3'])], registry) model_loc = registry['set_1'] exp = [ model_loc.get_definition(name) for name in ('set_1/def_1_1', 'set_1/def_1_2', 'set_1/def_1_3') ] self.assertListEqual(res, exp) # case we search all models res = get_def_to_detect([('set_1', ['all'])], registry) exp = model_loc.get_all_definitions() self.assertListEqual(res, exp) # case the models required does not exists with self.assertRaises(ValueError): get_def_to_detect([('set_1', ['FOO', 'BAR'])], registry)
def test_build_my_db(self): gene_name = "gspD" args = argparse.Namespace() args.db_type = 'gembase' args.models_dir = self.find_data('models') args.log_level = 30 self.cfg = Config(MacsyDefaults(), args) gspD_hmmer_path = self.find_data( os.path.join('hmm', 'gspD.search_hmm.out')) hmm_prof = macsyprofile.HmmProfile(gene_name, 596, gspD_hmmer_path, self.cfg) db = hmm_prof._build_my_db(gspD_hmmer_path) self.assertDictEqual( db, { 'PSAE001c01_031420': None, 'PSAE001c01_051090': None, 'PSAE001c01_018920': None, 'PSAE001c01_043580': None, 'PSAE001c01_017350': None, 'PSAE001c01_013980': None, 'PSAE001c01_026600': None, 'NC_xxxxx_xx_056141': None, 'PSAE001c01_006940': None })
def test_min_genes_required_cfg(self): # test min_genes_required is specified from configuration # so this value must overload the value read from xml def_2_parse = set() model_fqn = 'foo/model_5' def_2_parse.add(model_fqn) parsed = set() min_genes_required = [[model_fqn, '4']] self.args.min_genes_required = min_genes_required self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.min_genes_required, 4)
def test_execute_hmm_w_GA_n_nocutga(self): # case GA threshold in profile but --no-cut-ga is set args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') args.res_search_dir = tempfile.gettempdir() args.log_level = 0 args.e_value_search = 0.5 args.no_cut_ga = True cfg = Config(MacsyDefaults(), args) model = Model("foo/T2SS", 10) gene_name = 'T5aSS_PF03797' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) gene = ModelGene(c_gene, model) profile_path = self.model_location.get_profile("T5aSS_PF03797") profile = Profile(gene, cfg, profile_path) report = profile.execute() hmmer_raw_out = profile.hmm_raw_output with open(hmmer_raw_out, 'r') as hmmer_raw_out_file: for i in range(9): l = hmmer_raw_out_file.readline() self.assertEqual( "# sequence reporting threshold: E-value <= 0.5", l.strip())
def test_fill_my_db(self): gene_name = "gspD" args = argparse.Namespace() args.db_type = 'gembase' args.models_dir = self.find_data('models') args.log_level = 30 args.sequence_db = self.find_data("base", "test_base.fa") args.index_dir = self.tmpdir cfg = Config(MacsyDefaults(), args) gspD_hmmer_path = self.find_data('hmm', 'gspD.search_hmm.out') idx = Indexes(cfg) macsyfinder_idx = idx.build() hmm_prof = macsyprofile.HmmProfile(gene_name, 596, gspD_hmmer_path, cfg) db = hmm_prof._build_my_db(gspD_hmmer_path) hmm_prof._fill_my_db(macsyfinder_idx, db) self.assertDictEqual( db, { 'PSAE001c01_031420': (658, 73), 'PSAE001c01_051090': (714, 75), 'PSAE001c01_018920': (776, 71), 'PSAE001c01_043580': (416, 74), 'PSAE001c01_017350': (600, 70), 'PSAE001c01_013980': (759, 69), 'PSAE001c01_026600': (273, 72), 'NC_xxxxx_xx_056141': (803, 141), 'PSAE001c01_006940': (803, 68) })
def test_search_systems_unordered(self): logger = logging.getLogger('macsypy.macsyfinder') macsypy.logger_set_level(level='ERROR') defaults = MacsyDefaults() out_dir = os.path.join(self.tmp_dir, 'macsyfinder_test_search_systems') os.mkdir(out_dir) seq_db = self.find_data('base', 'VICH001.B.00001.C001.prt') model_dir = self.find_data('data_set', 'models') # test unordered replicon args = f"--sequence-db {seq_db} --db-type=unordered --models-dir {model_dir} --models set_1 all -w 4 -o {out_dir}" _, parsed_args = parse_args(args.split()) config = Config(defaults, parsed_args) model_bank = ModelBank() gene_bank = GeneBank() profile_factory = ProfileFactory(config) systems, uncomplete_sys = search_systems(config, model_bank, gene_bank, profile_factory, logger) expected_sys_id = [ 'Unordered_T2SS_4', 'Unordered_MSH_3', 'Unordered_T4P_5', 'Unordered_T4bP_6' ] self.assertListEqual([s.id for s in systems], expected_sys_id) expected_uncomplete_sys_id = [ 'Unordered_Archaeal-T4P_1', 'Unordered_ComM_2', 'Unordered_Tad_7' ] self.assertListEqual([s.id for s in uncomplete_sys], expected_uncomplete_sys_id)
def test_search_systems_model_unknown(self): logger = logging.getLogger('macsypy.macsyfinder') macsypy.logger_set_level(level='ERROR') defaults = MacsyDefaults() out_dir = os.path.join(self.tmp_dir, 'macsyfinder_test_search_systems') os.mkdir(out_dir) seq_db = self.find_data('base', 'test_1.fasta') model_dir = self.find_data('data_set', 'models') args = f"--sequence-db {seq_db} --db-type=gembase --models-dir {model_dir} --models nimporaoik -w 4 -o {out_dir}" _, parsed_args = parse_args(args.split()) config = Config(defaults, parsed_args) model_bank = ModelBank() gene_bank = GeneBank() profile_factory = ProfileFactory(config) exit_ori = sys.exit sys.exit = self.fake_exit try: with self.assertRaises(TypeError) as ctx: _ = search_systems(config, model_bank, gene_bank, profile_factory, logger) self.assertEqual( str(ctx.exception), "macsyfinder: \"No such model definition: 'nimporaoik'\"") finally: sys.exit = exit_ori
def setUp(self) -> None: args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') cfg = Config(MacsyDefaults(), args) model_name = 'foo' models_location = ModelLocation(path=os.path.join(args.models_dir, model_name)) model = Model("foo/T2SS", 10) profile_factory = ProfileFactory(cfg) gene_name = "gspD" self.cg_gspd = CoreGene(models_location, gene_name, profile_factory) self.mg_gspd = ModelGene(self.cg_gspd, model, loner=True, multi_system=True) gene_name = "sctJ" self.cg_sctj = CoreGene(models_location, gene_name, profile_factory) self.mg_sctj = ModelGene(self.cg_sctj, model) model.add_mandatory_gene(self.mg_gspd) model.add_accessory_gene(self.mg_sctj) self.chit_1 = CoreHit(self.cg_gspd, "hit_1", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20) self.chit_2 = CoreHit(self.cg_sctj, "hit_2", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20) self.chit_3 = CoreHit(self.cg_gspd, "hit_3", 803, "replicon_id", 10, 1.0, 1.0, 1.0, 1.0, 10, 20) self.chit_4 = CoreHit(self.cg_gspd, "hit_4", 803, "replicon_id", 20, 1.0, 1.0, 1.0, 1.0, 10, 20) self.mhit_1 = ModelHit(self.chit_1, self.mg_gspd, GeneStatus.MANDATORY) self.mhit_2 = ModelHit(self.chit_2, self.mg_sctj, GeneStatus.ACCESSORY) self.mhit_3 = ModelHit(self.chit_3, self.mg_gspd, GeneStatus.MANDATORY) self.mhit_4 = ModelHit(self.chit_4, self.mg_gspd, GeneStatus.MANDATORY)
def setUp(self): args = argparse.Namespace() args.db_type = 'gembase' args.models_dir = self.find_data('models') args.res_search_dir = tempfile.gettempdir() args.log_level = 30 args.out_dir = os.path.join(args.res_search_dir, 'test_macsyfinder_Report') if os.path.exists(args.out_dir): shutil.rmtree(args.out_dir) os.mkdir(args.out_dir) seq_db = self.find_data("base", "test_base.fa") shutil.copy(seq_db, args.out_dir) args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db)) self.cfg = Config(MacsyDefaults(), args) os.mkdir(os.path.join(self.cfg.out_dir(), self.cfg.hmmer_dir())) self.model_name = 'foo' self.model_location = ModelLocation( path=os.path.join(args.models_dir, self.model_name)) # we need to reset the ProfileFactory # because it's a like a singleton # so other tests are influenced by ProfileFactory and it's configuration # for instance search_genes get profile without hmmer_exe self.profile_factory = ProfileFactory(self.cfg) idx = Indexes(self.cfg) idx.build()
def setUp(self) -> None: args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') cfg = Config(MacsyDefaults(), args) model_name = 'foo' models_location = ModelLocation(path=os.path.join(args.models_dir, model_name)) model = Model("foo/T2SS", 10) profile_factory = ProfileFactory(cfg) gene_name = "gspD" self.c_gene_gspd = CoreGene(models_location, gene_name, profile_factory) self.gene_gspd = ModelGene(self.c_gene_gspd, model) gene_name = "sctJ" self.c_gene_sctj = CoreGene(models_location, gene_name, profile_factory) self.gene_sctj = ModelGene(self.c_gene_sctj, model) model.add_mandatory_gene(self.gene_gspd) model.add_accessory_gene(self.gene_sctj) self.hit_1 = Hit(self.c_gene_gspd, "hit_1", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20) self.hit_2 = Hit(self.c_gene_sctj, "hit_2", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20)
def test_inter_gene_max_space_cfg(self): # test inter_gene_max_space is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' inter_gene_max_space_cfg = [[model_fqn, '222']] self.args.inter_gene_max_space = inter_gene_max_space_cfg self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.inter_gene_max_space, 222)
def main(args=None) -> None: """ The main entrypoint of the script :param args: """ args = sys.argv[1:] if args is None else args parsed_args = parse_args(args) col_init() global theme if parsed_args.no_color: theme = Theme(ERROR=Style.BRIGHT, WARN='', SECTION='', RESET=Style.RESET_ALL, RETRY='', QUESTION='', EMPHASIZE='', EXPLANATION='', DEFAULT='') elif parsed_args.white_bg: theme = Theme(ERROR=Style.BRIGHT + Fore.RED, WARN=Fore.LIGHTRED_EX, RESET=Style.RESET_ALL, RETRY=Style.BRIGHT + Fore.MAGENTA, QUESTION=Style.BRIGHT, EMPHASIZE=Style.BRIGHT, EXPLANATION=Style.RESET_ALL, DEFAULT=Style.BRIGHT + Fore.LIGHTBLACK_EX) else: # parsed_args.dark_bg is always True # add in options only for coherence # and is the default pass config = ConfigParserWithComments() defaults = MacsyDefaults() conf_path = 'macsyfinder.conf' if os.path.exists(conf_path): go_on = ask(f"The '{conf_path}' already exists Overwrite/abort", check_choice, expected=["O", "a"], default="O", question_color=theme.QUESTION) if go_on == "a": sys.exit(1) print(prolog()) set_path_options(config, defaults, use_defaults=parsed_args.defaults) set_hmmer_options(config, defaults, use_defaults=parsed_args.defaults) set_score_options(config, defaults, use_defaults=parsed_args.defaults) set_general_options(config, defaults, use_defaults=parsed_args.defaults) set_base_options(config, defaults, use_defaults=parsed_args.defaults) serialize(config, conf_path) print(epilog(conf_path))
def setUp(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') args.res_search_dir = tempfile.gettempdir() args.log_level = 30 self.cfg = Config(MacsyDefaults(), args) self.system_bank = ModelBank()
def test_MacsyDefaults_with_MACSY_DATA(self): import macsypy.config macsydata = macsypy.config.__MACSY_DATA__ macsypy.config.__MACSY_DATA__ = 'niportnaoik' self.defaults['models_dir'] = 'niportnaoik/data/models' try: defaults = MacsyDefaults() self.assertDictEqual(defaults, self.defaults) finally: macsypy.config.__MACSY_DATA__ = macsydata
def setUp(self) -> None: args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') cfg = Config(MacsyDefaults(), args) model_name = 'foo' self.models_location = ModelLocation(path=os.path.join(args.models_dir, model_name)) # we need to reset the ProfileFactory # because it's a like a singleton # so other tests are influenced by ProfileFactory and it's configuration # for instance search_genes get profile without hmmer_exe profile_factory = ProfileFactory(cfg) model = Model(model_name, 10) self.profile_factory = ProfileFactory(cfg) gene_name = "gspD" c_gene_gspd = CoreGene(self.models_location, gene_name, self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model, multi_system=True) gene_name = "sctJ" c_gene_sctj = CoreGene(self.models_location, gene_name, self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model, multi_system=True) gene_name = "sctN" c_gene_sctn = CoreGene(self.models_location, gene_name, self.profile_factory) gene_sctn = Exchangeable(c_gene_sctn, gene_sctj) gene_sctj.add_exchangeable(gene_sctn) model.add_mandatory_gene(gene_gspd) model.add_accessory_gene(gene_sctj) # CoreHit(gene, hit_id, hit_seq_length, replicon_name, position, i_eval, score, # profile_coverage, sequence_coverage, begin_match, end_match # pos score chit_1 = CoreHit(c_gene_gspd, "hit_1", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20) chit_2 = CoreHit(c_gene_sctj, "hit_2", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20) chit_3 = CoreHit(c_gene_gspd, "hit_3", 803, "replicon_id", 10, 1.0, 3.0, 1.0, 1.0, 10, 20) chit_4 = CoreHit(c_gene_sctn, "hit_4", 803, "replicon_id", 14, 1.0, 4.0, 1.0, 1.0, 10, 20) chit_5 = CoreHit(c_gene_gspd, "hit_5", 803, "replicon_id", 20, 1.0, 2.0, 1.0, 1.0, 10, 20) self.mhit_1 = ModelHit(chit_1, gene_gspd, GeneStatus.MANDATORY) self.mhit_2 = ModelHit(chit_2, gene_sctj, GeneStatus.ACCESSORY) self.mhit_3 = ModelHit(chit_3, gene_gspd, GeneStatus.MANDATORY) self.mhit_4 = ModelHit(chit_4, gene_sctn, GeneStatus.ACCESSORY) self.mhit_5 = ModelHit(chit_5, gene_gspd, GeneStatus.MANDATORY) self.ms_1 = MultiSystem(chit_1, gene_ref=gene_gspd, gene_status=GeneStatus.MANDATORY) self.ms_2 = MultiSystem(chit_2, gene_ref=gene_sctj, gene_status=GeneStatus.ACCESSORY) self.ms_3 = MultiSystem(chit_3, gene_ref=gene_gspd, gene_status=GeneStatus.MANDATORY) self.ms_4 = MultiSystem(chit_4, gene_ref=gene_sctn, gene_status=GeneStatus.ACCESSORY) self.ms_5 = MultiSystem(chit_5, gene_ref=gene_gspd, gene_status=GeneStatus.MANDATORY)
def setUp(self) -> None: args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') self.cfg = Config(MacsyDefaults(), args) self.model_name = 'foo' self.model_location = ModelLocation( path=os.path.join(args.models_dir, self.model_name)) self.profile_factory = ProfileFactory(self.cfg)
def main(args=None, log_level=None) -> None: """ main entry point to macsy_merge_results :param args: the arguments passed on the command line :type args: list of str :param log_level: the output verbosity :type log_level: a positive int or a string among 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL' """ global _log args = sys.argv[1:] if args is None else args parsed_args = parse_args(args) outdir_err = None if not os.path.exists(parsed_args.out_dir): try: os.mkdir(parsed_args.out_dir) except PermissionError as err: outdir_err = f"Cannot create {parsed_args.out_dir} : {err}" elif not os.path.isdir(parsed_args.out_dir): outdir_err = f"{parsed_args.out_dir} is not a directory" elif not os.access(parsed_args.out_dir, os.W_OK): outdir_err = f"{parsed_args.out_dir} is not writable" if outdir_err: log = colorlog.getLogger('macsypy.merge') stdout_handler = colorlog.StreamHandler(sys.stdout) stdout_formatter = colorlog.ColoredFormatter("%(log_color)s%(message)s", datefmt=None, reset=True, log_colors={'CRITICAL': 'bold_red'}, secondary_log_colors={}, style='%' ) stdout_handler.setFormatter(stdout_formatter) log.addHandler(stdout_handler) log.critical(outdir_err) sys.tracebacklimit = 0 raise IOError() from None macsypy.init_logger(log_file=os.path.join(parsed_args.out_dir, 'macsy_merge_results.out'), out=not parsed_args.mute) _log = colorlog.getLogger('macsypy.merge') if not log_level: # logs are specify from args options config = MacsyDefaults() log_level = max(config.log_level - (10 * parsed_args.verbose) + (10 * parsed_args.quiet), 1) macsypy.logger_set_level(log_level) else: # used by unit tests to mute or unmute logs macsypy.logger_set_level(log_level) merge_results(parsed_args.results_dirs, out_dir=parsed_args.out_dir)
def test_fill_topology(self): self.args.topology_file = self.args.sequence_db + ".topo" db_send = {'ESCO030p01': 'circular', 'PSAE001c01': 'linear'} with open(self.args.topology_file, 'w') as f: for k, v in list(db_send.items()): f.write('{0} : {1}\n'.format(k, v)) cfg = Config(MacsyDefaults(), self.args) RepliconDB.__init__ = self.fake_init db = RepliconDB(cfg) rcv_topo = db._fill_topology() self.assertDictEqual(db_send, rcv_topo)
def setUp(self) -> None: self.args = argparse.Namespace() self.args.sequence_db = self.find_data("base", "test_1.fasta") self.args.db_type = 'gembase' self.args.models_dir = self.find_data('models') self.args.res_search_dir = "blabla" self.cfg = Config(MacsyDefaults(), self.args) self.model_name = 'foo' self.model_location = ModelLocation(path=os.path.join(self.args.models_dir, self.model_name)) self.profile_factory = ProfileFactory(self.cfg) self.hit_weights = HitWeight(**self.cfg.hit_weights())
def test_hit_weight_default(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') cfg = Config(MacsyDefaults(), args) hit_weight = HitWeight(**cfg.hit_weights()) self.assertEqual(hit_weight.mandatory, 1) self.assertEqual(hit_weight.accessory, 0.5) self.assertEqual(hit_weight.itself, 1) self.assertEqual(hit_weight.exchangeable, 0.8) self.assertEqual(hit_weight.loner_multi_system, 0.7)