def _search_in_ordered_replicon(hits_by_replicon, models_to_detect, config, logger): systems = [] rejected_clusters = [] rep_db = RepliconDB(config) for rep_name in hits_by_replicon: logger.info( "\n{:#^60}".format(f" Hits analysis for replicon {rep_name} ")) rep_info = rep_db[rep_name] for model in models_to_detect: logger.info(f"Check model {model.fqn}") hits_related_one_model = model.filter(hits_by_replicon[rep_name]) logger.debug("{:#^80}".format(" hits related to {} ".format( model.name))) logger.debug("".join([str(h) for h in hits_related_one_model])) logger.debug("#" * 80) logger.info("Building clusters") hit_weights = HitWeight(**config.hit_weights()) clusters = cluster.build_clusters(hits_related_one_model, rep_info, model, hit_weights) logger.debug("{:#^80}".format("CLUSTERS")) logger.debug("\n" + "\n".join([str(c) for c in clusters])) logger.debug("#" * 80) logger.info("Searching systems") if model.multi_loci: # The loners are already in clusters lists with their context # so they are take in account clusters_combination = [ itertools.combinations(clusters, i) for i in range(1, len(clusters) + 1) ] else: # we must add loners manually # but only if the cluster does not already contains them loners = cluster.get_loners(hits_related_one_model, model, hit_weights) clusters_combination = [] for one_cluster in clusters: one_clust_combination = [one_cluster] filtered_loners = cluster.filter_loners( one_cluster, loners) one_clust_combination.extend(filtered_loners) clusters_combination.append([one_clust_combination]) for one_combination_set in clusters_combination: for one_clust_combination in one_combination_set: ordered_matcher = OrderedMatchMaker( model, redundancy_penalty=config.redundancy_penalty()) res = ordered_matcher.match(one_clust_combination) if isinstance(res, System): systems.append(res) else: rejected_clusters.append(res) if systems: systems.sort(key=lambda syst: (syst.replicon_name, syst.position[0], syst.model.fqn, -syst.score)) return systems, rejected_clusters
def test_hit_weight_default(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') cfg = Config(MacsyDefaults(), args) hit_weight = HitWeight(**cfg.hit_weights()) self.assertEqual(hit_weight.mandatory, 1) self.assertEqual(hit_weight.accessory, 0.5) self.assertEqual(hit_weight.itself, 1) self.assertEqual(hit_weight.exchangeable, 0.8) self.assertEqual(hit_weight.loner_multi_system, 0.7)
def setUp(self) -> None: self.args = argparse.Namespace() self.args.sequence_db = self.find_data("base", "test_1.fasta") self.args.db_type = 'gembase' self.args.models_dir = self.find_data('models') self.args.res_search_dir = "blabla" self.cfg = Config(MacsyDefaults(), self.args) self.model_name = 'foo' self.model_location = ModelLocation(path=os.path.join(self.args.models_dir, self.model_name)) self.profile_factory = ProfileFactory(self.cfg) self.hit_weights = HitWeight(**self.cfg.hit_weights())
def setUp(self) -> None: args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') self.cfg = Config(MacsyDefaults(), args) self.model_name = 'foo' self.model_location = ModelLocation( path=os.path.join(args.models_dir, self.model_name)) self.profile_factory = ProfileFactory(self.cfg) self.hit_weights = HitWeight(**self.cfg.hit_weights()) # reset the uniq id number for AbstractSetOfHits # to have predictable results AbstractSetOfHits._id = itertools.count(1)
def test_hit_weight_not_default(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') args.mandatory_weight = 2.0 args.accessory_weight = 3.0 args.neutral_weight = 4.0 args.exchangeable_weight = 5.0 args.itself_weight = 6.0 args.loner_multi_system_weight = 12 cfg = Config(MacsyDefaults(), args) hit_weight = HitWeight(**cfg.hit_weights()) self.assertEqual(hit_weight.mandatory, 2.0) self.assertEqual(hit_weight.accessory, 3.0) self.assertEqual(hit_weight.neutral, 4.0) self.assertEqual(hit_weight.exchangeable, 5.0) self.assertEqual(hit_weight.itself, 6.0) self.assertEqual(hit_weight.loner_multi_system, 12.0)
def test_systems_to_txt(self): system_str = f"""# macsyfinder {macsypy.__version__} # {' '.join(sys.argv)} # No Systems found """ f_out = StringIO() track_multi_systems_hit = HitSystemTracker([]) systems_to_txt([], track_multi_systems_hit, f_out) self.assertMultiLineEqual(system_str, f_out.getvalue()) args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') cfg = Config(MacsyDefaults(), args) model_name = 'foo' models_location = ModelLocation( path=os.path.join(args.models_dir, model_name)) # we need to reset the ProfileFactory # because it's a like a singleton # so other tests are influenced by ProfileFactory and it's configuration # for instance search_genes get profile without hmmer_exe profile_factory = ProfileFactory(cfg) model = Model("foo/T2SS", 10) # test if id is well incremented gene_name = "gspD" c_gene_gspd = CoreGene(models_location, gene_name, profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) gene_name = "sctJ" c_gene_sctj = CoreGene(models_location, gene_name, profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_accessory_gene(gene_sctj) hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY) hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY) system_1 = System(model, [ Cluster([v_hit_1, v_hit_2], model, HitWeight(**cfg.hit_weights())) ], cfg.redundancy_penalty()) system_str = f"""# macsyfinder {macsypy.__version__} # {' '.join(sys.argv)} # Systems found: system id = replicon_id_T2SS_{next(System._id) - 1} model = foo/T2SS replicon = replicon_id clusters = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 1)] occ = 1 wholeness = 1.000 loci nb = 1 score = 1.500 mandatory genes: \t- gspD: 1 (gspD) accessory genes: \t- sctJ: 1 (sctJ) neutral genes: ============================================================ """ f_out = StringIO() track_multi_systems_hit = HitSystemTracker([system_1]) systems_to_txt([system_1], track_multi_systems_hit, f_out) self.assertMultiLineEqual(system_str, f_out.getvalue())
def test_rejected_clst_to_txt(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') args.res_search_dir = "blabla" cfg = Config(MacsyDefaults(), args) model_name = 'foo' models_location = ModelLocation( path=os.path.join(args.models_dir, model_name)) profile_factory = ProfileFactory(cfg) model = Model("foo/T2SS", 11) gene_name = "gspD" c_gene_gspd = CoreGene(models_location, gene_name, profile_factory) gene_1 = ModelGene(c_gene_gspd, model) gene_name = "sctC" c_gene_sctc = CoreGene(models_location, gene_name, profile_factory) gene_2 = ModelGene(c_gene_sctc, model) model.add_mandatory_gene(gene_1) model.add_accessory_gene(gene_2) # Hit(gene, model, hit_id, hit_seq_length, replicon_name, position, i_eval, score, # profile_coverage, sequence_coverage, begin_match, end_match h10 = Hit(c_gene_gspd, "h10", 10, "replicon_1", 10, 1.0, 10.0, 1.0, 1.0, 10, 20) v_h10 = ValidHit(h10, gene_1, GeneStatus.MANDATORY) h20 = Hit(c_gene_sctc, "h20", 10, "replicon_1", 20, 1.0, 20.0, 1.0, 1.0, 10, 20) v_h20 = ValidHit(h20, gene_2, GeneStatus.ACCESSORY) h40 = Hit(c_gene_gspd, "h10", 10, "replicon_1", 40, 1.0, 10.0, 1.0, 1.0, 10, 20) v_h40 = ValidHit(h40, gene_1, GeneStatus.MANDATORY) h50 = Hit(c_gene_sctc, "h20", 10, "replicon_1", 50, 1.0, 20.0, 1.0, 1.0, 10, 20) v_h50 = ValidHit(h50, gene_2, GeneStatus.ACCESSORY) hit_weights = HitWeight(**cfg.hit_weights()) c1 = Cluster([v_h10, v_h20], model, hit_weights) c2 = Cluster([v_h40, v_h50], model, hit_weights) r_c = RejectedClusters(model, [c1, c2], ["The reasons to reject this clusters"]) rej_clst_str = f"""# macsyfinder {macsypy.__version__} # {' '.join(sys.argv)} # Rejected clusters: Cluster: - model = T2SS - replicon = replicon_1 - hits = (h10, gspD, 10), (h20, sctC, 20) Cluster: - model = T2SS - replicon = replicon_1 - hits = (h10, gspD, 40), (h20, sctC, 50) These clusters have been rejected because: \t- The reasons to reject this clusters ============================================================ """ f_out = StringIO() rejected_clst_to_txt([r_c], f_out) self.maxDiff = None self.assertMultiLineEqual(rej_clst_str, f_out.getvalue()) rej_clst_str = f"""# macsyfinder {macsypy.__version__} # {' '.join(sys.argv)} # No Rejected clusters """ f_out = StringIO() rejected_clst_to_txt([], f_out) self.assertMultiLineEqual(rej_clst_str, f_out.getvalue())
def test_solutions_to_tsv(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') cfg = Config(MacsyDefaults(), args) model_name = 'foo' models_location = ModelLocation( path=os.path.join(args.models_dir, model_name)) # we need to reset the ProfileFactory # because it's a like a singleton # so other tests are influenced by ProfileFactory and it's configuration # for instance search_genes get profile without hmmer_exe profile_factory = ProfileFactory(cfg) model_A = Model("foo/A", 10) model_B = Model("foo/B", 10) model_C = Model("foo/C", 10) c_gene_sctn_flg = CoreGene(models_location, "sctN_FLG", profile_factory) gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B) c_gene_sctj_flg = CoreGene(models_location, "sctJ_FLG", profile_factory) gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B) c_gene_flgB = CoreGene(models_location, "flgB", profile_factory) gene_flgB = ModelGene(c_gene_flgB, model_B) c_gene_tadZ = CoreGene(models_location, "tadZ", profile_factory) gene_tadZ = ModelGene(c_gene_tadZ, model_B) c_gene_sctn = CoreGene(models_location, "sctN", profile_factory) gene_sctn = ModelGene(c_gene_sctn, model_A) gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_hom) c_gene_sctj = CoreGene(models_location, "sctJ", profile_factory) gene_sctj = ModelGene(c_gene_sctj, model_A) gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj) gene_sctj.add_exchangeable(gene_sctj_an) c_gene_gspd = CoreGene(models_location, "gspD", profile_factory) gene_gspd = ModelGene(c_gene_gspd, model_A) gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd) gene_gspd.add_exchangeable(gene_gspd_an) c_gene_abc = CoreGene(models_location, "abc", profile_factory) gene_abc = ModelGene(c_gene_abc, model_A) gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc) gene_abc.add_exchangeable(gene_abc_ho) model_A.add_mandatory_gene(gene_sctn) model_A.add_mandatory_gene(gene_sctj) model_A.add_accessory_gene(gene_gspd) model_A.add_forbidden_gene(gene_abc) model_B.add_mandatory_gene(gene_sctn_flg) model_B.add_mandatory_gene(gene_sctj_flg) model_B.add_accessory_gene(gene_flgB) model_B.add_accessory_gene(gene_tadZ) model_C.add_mandatory_gene(gene_sctn_flg) model_C.add_mandatory_gene(gene_sctj_flg) model_C.add_mandatory_gene(gene_flgB) model_C.add_accessory_gene(gene_tadZ) model_C.add_accessory_gene(gene_gspd) h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) model_A._min_mandatory_genes_required = 2 model_A._min_genes_required = 2 hit_weights = HitWeight(**cfg.hit_weights()) c1 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_A, hit_weights) c2 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY) ], model_A, hit_weights) model_B._min_mandatory_genes_required = 1 model_B._min_genes_required = 2 c3 = Cluster([ ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY), ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY), ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY) ], model_B, hit_weights) model_C._min_mandatory_genes_required = 1 model_C._min_genes_required = 2 c4 = Cluster([ ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY), ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY), ValidHit(h_flgB, gene_flgB, GeneStatus.MANDATORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_C, hit_weights) sys_A = System(model_A, [c1, c2], cfg.redundancy_penalty()) sys_A.id = "sys_id_A" sys_B = System(model_B, [c3], cfg.redundancy_penalty()) sys_B.id = "sys_id_B" sys_C = System(model_C, [c4], cfg.redundancy_penalty()) sys_C.id = "sys_id_C" sol_1 = [sys_A, sys_B] sol_2 = [sys_A, sys_C] sol_id_1 = '1' sol_id_2 = '2' sol_tsv = f"""# macsyfinder {macsypy.__version__} # {' '.join(sys.argv)} # Systems found: """ sol_tsv += "\t".join([ "sol_id", "replicon", "hit_id", "gene_name", "hit_pos", "model_fqn", "sys_id", "sys_loci", "sys_wholeness", "sys_score", "sys_occ", "hit_gene_ref", "hit_status", "hit_seq_len", "hit_i_eval", "hit_score", "hit_profile_cov", "hit_seq_cov", "hit_begin_match", "hit_end_match", "used_in" ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/B', 'sys_id_B', '1', '0.750', '2.000', '1', 'sctJ_FLG', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/B', 'sys_id_B', '1', '0.750', '2.000', '1', 'tadZ', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/B', 'sys_id_B', '1', '0.750', '2.000', '1', 'flgB', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/C', 'sys_id_C', '1', '0.800', '3.000', '1', 'sctJ_FLG', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/C', 'sys_id_C', '1', '0.800', '3.000', '1', 'tadZ', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/C', 'sys_id_C', '1', '0.800', '3.000', '1', 'flgB', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/C', 'sys_id_C', '1', '0.800', '3.000', '1', 'gspD', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_A' ]) sol_tsv += "\n" sol_tsv += "\n" f_out = StringIO() hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B]) solutions_to_tsv([sol_1, sol_2], hit_multi_sys_tracker, f_out) self.assertMultiLineEqual(sol_tsv, f_out.getvalue())
def test_systems_to_tsv(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') cfg = Config(MacsyDefaults(), args) model_name = 'foo' models_location = ModelLocation( path=os.path.join(args.models_dir, model_name)) # we need to reset the ProfileFactory # because it's a like a singleton # so other tests are influenced by ProfileFactory and it's configuration # for instance search_genes get profile without hmmer_exe profile_factory = ProfileFactory(cfg) model = Model("foo/T2SS", 10) gene_name = "gspD" c_gene_gspd = CoreGene(models_location, gene_name, profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) gene_name = "sctJ" c_gene_sctj = CoreGene(models_location, gene_name, profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_accessory_gene(gene_sctj) hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY) hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY) system_1 = System(model, [ Cluster([v_hit_1, v_hit_2], model, HitWeight(**cfg.hit_weights())) ], cfg.redundancy_penalty()) system_tsv = f"""# macsyfinder {macsypy.__version__} # {' '.join(sys.argv)} # Systems found: """ system_tsv += "\t".join([ "replicon", "hit_id", "gene_name", "hit_pos", "model_fqn", "sys_id", "sys_loci", "sys_wholeness", "sys_score", "sys_occ", "hit_gene_ref", "hit_status", "hit_seq_len", "hit_i_eval", "hit_score", "hit_profile_cov", "hit_seq_cov", "hit_begin_match", "hit_end_match", "used_in" ]) system_tsv += "\n" system_tsv += "\t".join([ "replicon_id", "hit_1", "gspD", "1", "foo/T2SS", system_1.id, "1", "1.000", "1.500", "1", "gspD", "mandatory", "803", "1.0", "1.000", "1.000", "1.000", "10", "20", "" ]) system_tsv += "\n" system_tsv += "\t".join([ "replicon_id", "hit_2", "sctJ", "1", "foo/T2SS", system_1.id, "1", "1.000", "1.500", "1", "sctJ", "accessory", "803", "1.0", "1.000", "1.000", "1.000", "10", "20", "" ]) system_tsv += "\n\n" f_out = StringIO() track_multi_systems_hit = HitSystemTracker([system_1]) systems_to_tsv([system_1], track_multi_systems_hit, f_out) self.assertMultiLineEqual(system_tsv, f_out.getvalue()) # test No system found system_str = f"""# macsyfinder {macsypy.__version__} # {' '.join(sys.argv)} # No Systems found """ f_out = StringIO() track_multi_systems_hit = HitSystemTracker([]) systems_to_tsv([], track_multi_systems_hit, f_out) self.assertMultiLineEqual(system_str, f_out.getvalue())
def _build_clusters(cfg, profile_factory): model_name = 'foo' model_location = ModelLocation( path=os.path.join(cfg.models_dir()[0], model_name)) models = {} cg_sctn_flg = CoreGene(model_location, "sctN_FLG", profile_factory) cg_sctj_flg = CoreGene(model_location, "sctJ_FLG", profile_factory) cg_flgB = CoreGene(model_location, "flgB", profile_factory) cg_tadZ = CoreGene(model_location, "tadZ", profile_factory) cg_sctn = CoreGene(model_location, "sctN", profile_factory) cg_sctj = CoreGene(model_location, "sctJ", profile_factory) cg_gspd = CoreGene(model_location, "gspD", profile_factory) cg_abc = CoreGene(model_location, "abc", profile_factory) cg_sctc = CoreGene(model_location, "sctC", profile_factory) ########### # Model A # ########### models['A'] = Model("foo/A", 10) mgA_sctn = ModelGene(cg_sctn, models['A']) mgA_sctn_hom = Exchangeable(cg_sctn_flg, mgA_sctn) mgA_sctn.add_exchangeable(mgA_sctn_hom) mgA_sctj = ModelGene(cg_sctj, models['A']) mgA_sctj_an = Exchangeable(cg_sctj_flg, mgA_sctj) mgA_sctj.add_exchangeable(mgA_sctj_an) mgA_gspd = ModelGene(cg_gspd, models['A']) mgA_gspd_an = Exchangeable(cg_flgB, mgA_gspd) mgA_gspd.add_exchangeable(mgA_gspd_an) mgA_abc = ModelGene(cg_abc, models['A']) mgA_abc_ho = Exchangeable(cg_tadZ, mgA_abc) mgA_abc.add_exchangeable(mgA_abc_ho) models['A'].add_mandatory_gene(mgA_sctn) models['A'].add_mandatory_gene(mgA_sctj) models['A'].add_accessory_gene(mgA_gspd) models['A'].add_forbidden_gene(mgA_abc) models['A']._min_mandatory_genes_required = 2 models['A']._min_genes_required = 2 ########### # Model B # ########### models['B'] = Model("foo/B", 10) mgB_sctn_flg = ModelGene(cg_sctn_flg, models['B']) mgB_sctj_flg = ModelGene(cg_sctj_flg, models['B']) mgB_flgB = ModelGene(cg_flgB, models['B']) mgB_tadZ = ModelGene(cg_tadZ, models['B']) models['B'].add_mandatory_gene(mgB_sctn_flg) models['B'].add_mandatory_gene(mgB_sctj_flg) models['B'].add_accessory_gene(mgB_flgB) models['B'].add_accessory_gene(mgB_tadZ) models['B']._min_mandatory_genes_required = 1 models['B']._min_genes_required = 2 ########### # Model C # ########### models['C'] = Model("foo/C", 10) mgC_sctn_flg = ModelGene(cg_sctn_flg, models['C']) mgC_sctj_flg = ModelGene(cg_sctj_flg, models['C']) mgC_flgB = ModelGene(cg_flgB, models['C']) mgC_tadZ = ModelGene(cg_tadZ, models['C']) mgC_gspd = ModelGene(cg_gspd, models['C']) models['C'].add_mandatory_gene(mgC_sctn_flg) models['C'].add_mandatory_gene(mgC_sctj_flg) models['C'].add_mandatory_gene(mgC_flgB) models['C'].add_accessory_gene(mgC_tadZ) models['C'].add_accessory_gene(mgC_gspd) models['C']._min_mandatory_genes_required = 1 models['C']._min_genes_required = 2 ########### # Model D # ########### models['D'] = Model("foo/D", 10) mgD_abc = ModelGene(cg_abc, models['D']) mgD_sctn = ModelGene(cg_sctn, models['D']) models['D'].add_mandatory_gene(mgD_abc) models['D'].add_accessory_gene(mgD_sctn) models['D']._min_mandatory_genes_required = 1 models['D']._min_genes_required = 1 ########### # Model E # ########### models['E'] = Model("foo/E", 10) mgE_gspd = ModelGene(cg_gspd, models['E']) models['E'].add_accessory_gene(mgE_gspd) models['E']._min_mandatory_genes_required = 0 models['E']._min_genes_required = 1 ########### # Model F # ########### models['F'] = Model("foo/F", 10) mgF_abc = ModelGene(cg_abc, models['F']) models['F'].add_mandatory_gene(mgF_abc) models['F']._min_mandatory_genes_required = 1 models['F']._min_genes_required = 1 ##################### # Model G idem as C # ##################### models['G'] = Model("foo/G", 10) mgG_sctn_flg = ModelGene(cg_sctn_flg, models['G']) mgG_sctj_flg = ModelGene(cg_sctj_flg, models['G']) mgG_flgB = ModelGene(cg_flgB, models['G']) mgG_tadZ = ModelGene(cg_tadZ, models['G']) mgG_gspd = ModelGene(cg_gspd, models['G']) models['G'].add_mandatory_gene(mgG_sctn_flg) models['G'].add_mandatory_gene(mgG_sctj_flg) models['G'].add_mandatory_gene(mgG_flgB) models['G'].add_accessory_gene(mgG_tadZ) models['G'].add_accessory_gene(mgG_gspd) ##################### # Model H idem as D # ##################### models['H'] = Model("foo/H", 10) mgH_abc = ModelGene(cg_abc, models['H']) mgH_sctn = ModelGene(cg_sctn, models['H']) models['H'].add_mandatory_gene(mgH_abc) models['H'].add_accessory_gene(mgH_sctn) models['H']._min_mandatory_genes_required = 1 models['H']._min_genes_required = 1 ########### # Model I # ########### models['I'] = Model("foo/I", 10) mgI_abc = ModelGene(cg_abc, models['I']) mgI_flgB = ModelGene(cg_flgB, models['I']) mgI_tadZ = ModelGene(cg_tadZ, models['I']) models['I'].add_mandatory_gene(mgI_abc) models['I'].add_mandatory_gene(mgI_flgB) models['I'].add_accessory_gene(mgI_tadZ) models['I']._min_mandatory_genes_required = 1 models['I']._min_genes_required = 1 ########### # model J # ########### models['J'] = Model("foo/J", 10) mgJ_abc = ModelGene(cg_abc, models['J']) mgJ_gspd = ModelGene(cg_gspd, models['J']) mgJ_tadZ = ModelGene(cg_tadZ, models['J']) mgJ_sctc = ModelGene(cg_sctc, models['J']) models['J'].add_mandatory_gene(mgJ_abc) models['J'].add_mandatory_gene(mgJ_gspd) models['J'].add_accessory_gene(mgJ_tadZ) models['J'].add_accessory_gene(mgJ_sctc) models['J']._min_mandatory_genes_required = 1 models['J']._min_genes_required = 1 ########### # model K # ########### models['K'] = Model("foo/K", 10) mgK_flgB = ModelGene(cg_flgB, models['K']) mgK_sctn_flg = ModelGene(cg_sctn_flg, models['K']) mgK_sctj_flg = ModelGene(cg_sctj_flg, models['K']) mgK_sctn = ModelGene(cg_sctn, models['K']) models['K'].add_mandatory_gene(mgK_flgB) models['K'].add_mandatory_gene(mgK_sctn_flg) models['K'].add_accessory_gene(mgK_sctj_flg) models['K'].add_accessory_gene(mgK_sctn) models['K']._min_mandatory_genes_required = 1 models['K']._min_genes_required = 1 ########### # model L # ########### models['L'] = Model("foo/L", 10) mgL_flgB = ModelGene(cg_flgB, models['L']) mgL_sctn_flg = ModelGene(cg_sctn_flg, models['L']) mgL_sctj_flg = ModelGene(cg_sctj_flg, models['L']) mgL_sctn = ModelGene(cg_sctn, models['L'], loner=True) models['L'].add_mandatory_gene(mgL_flgB) models['L'].add_mandatory_gene(mgL_sctn_flg) models['L'].add_accessory_gene(mgL_sctj_flg) models['L'].add_accessory_gene(mgL_sctn) ########### # model M # ########### models['M'] = Model("foo/L", 10) mgM_sctj = ModelGene(cg_sctj, models['M']) mgM_gspd = ModelGene(cg_gspd, models['M']) mgM_sctn = ModelGene(cg_sctn, models['M'], multi_system=True) mgM_tadZ = ModelGene(cg_tadZ, models['M']) mgM_abc = ModelGene(cg_abc, models['M']) models['M'].add_mandatory_gene(mgM_sctj) models['M'].add_mandatory_gene(mgM_gspd) models['M'].add_accessory_gene(mgM_sctn) models['M'].add_accessory_gene(mgM_tadZ) models['M'].add_accessory_gene(mgM_abc) ########### # model N # ########### models['N'] = Model("foo/N", 10) mgN_flgB = ModelGene(cg_flgB, models['N']) mgN_sctn_flg = ModelGene(cg_sctn_flg, models['N']) mgN_sctj = ModelGene(cg_sctj, models['N']) mgN_sctj_flg = ModelGene(cg_sctj_flg, models['N']) mgN_sctn = ModelGene(cg_sctn, models['N'], loner=True) mgN_tadZ = ModelGene(cg_tadZ, models['N'], loner=True) models['N'].add_mandatory_gene(mgN_flgB) models['N'].add_mandatory_gene(mgN_sctn_flg) models['N'].add_accessory_gene(mgN_sctj) models['N'].add_accessory_gene(mgN_sctj_flg) models['N'].add_accessory_gene(mgN_sctn) models['N'].add_accessory_gene(mgN_tadZ) ########### # model O # ########### models['O'] = Model("foo/O", 10) mgO_sctj = ModelGene(cg_sctj, models['O'], multi_system=True) mgO_sctj_flg = Exchangeable(cg_sctj_flg, mgO_sctj) mgO_sctj.add_exchangeable(mgO_sctj_flg) mgO_gspd = ModelGene(cg_gspd, models['O'], loner=True, multi_system=True) mgO_sctn = ModelGene(cg_sctn, models['O'], multi_system=True) mgO_sctn_flg = Exchangeable(cg_sctn_flg, mgO_sctn) mgO_sctn.add_exchangeable(mgO_sctn_flg) mgO_tadZ = ModelGene(cg_tadZ, models['O'], loner=True) mgO_abc = ModelGene(cg_abc, models['O']) models['O'].add_mandatory_gene(mgO_sctj) models['O'].add_mandatory_gene(mgO_gspd) models['O'].add_accessory_gene(mgO_sctn) models['O'].add_accessory_gene(mgO_tadZ) models['O'].add_neutral_gene(mgO_abc) ch_sctj = CoreHit(cg_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) ch_sctn = CoreHit(cg_sctn, "hit_sctn", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20) ch_gspd = CoreHit(cg_gspd, "hit_gspd", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20) ch_sctn_flg = CoreHit(cg_sctn_flg, "hit_sctn_flg", 803, "replicon_id", 4, 1.0, 1.0, 1.0, 1.0, 10, 20) ch_sctj = CoreHit(cg_sctj, "hit_sctj", 803, "replicon_id", 5, 1.0, 1.0, 1.0, 1.0, 10, 20) ch_sctj_flg = CoreHit(cg_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 6, 1.0, 1.0, 1.0, 1.0, 10, 20) ch_flgB = CoreHit(cg_flgB, "hit_flgB", 803, "replicon_id", 7, 1.0, 1.0, 1.0, 1.0, 10, 20) ch_tadZ = CoreHit(cg_tadZ, "hit_tadZ", 803, "replicon_id", 8, 1.0, 1.0, 1.0, 1.0, 10, 20) ch_abc = CoreHit(cg_abc, "hit_abc", 803, "replicon_id", 9, 1.0, 1.0, 1.0, 1.0, 10, 20) hit_weights = HitWeight(**cfg.hit_weights()) clusters = {} clusters['c1'] = Cluster([ ModelHit(ch_sctj, gene_ref=mgA_sctj, gene_status=GeneStatus.MANDATORY), ModelHit(ch_sctn, gene_ref=mgA_sctn, gene_status=GeneStatus.MANDATORY), ModelHit(ch_gspd, gene_ref=mgA_gspd, gene_status=GeneStatus.ACCESSORY) ], models['A'], hit_weights) clusters['c2'] = Cluster([ ModelHit(ch_sctj, gene_ref=mgA_sctj, gene_status=GeneStatus.MANDATORY), ModelHit(ch_sctn, gene_ref=mgA_sctn, gene_status=GeneStatus.MANDATORY) ], models['A'], hit_weights) clusters['c3'] = Cluster([ ModelHit(ch_sctj_flg, gene_ref=mgB_sctj_flg, gene_status=GeneStatus.MANDATORY), ModelHit(ch_tadZ, gene_ref=mgB_tadZ, gene_status=GeneStatus.ACCESSORY), ModelHit(ch_flgB, gene_ref=mgB_flgB, gene_status=GeneStatus.ACCESSORY) ], models['B'], hit_weights) clusters['c4'] = Cluster([ ModelHit(ch_sctj_flg, gene_ref=mgC_sctj_flg, gene_status=GeneStatus.MANDATORY), ModelHit(ch_tadZ, gene_ref=mgC_tadZ, gene_status=GeneStatus.ACCESSORY), ModelHit(ch_flgB, gene_ref=mgC_flgB, gene_status=GeneStatus.MANDATORY), ModelHit(ch_gspd, gene_ref=mgC_gspd, gene_status=GeneStatus.ACCESSORY) ], models['C'], hit_weights) clusters['c5'] = Cluster([ ModelHit(ch_abc, gene_ref=mgD_abc, gene_status=GeneStatus.MANDATORY), ModelHit(ch_sctn, gene_ref=mgD_sctn, gene_status=GeneStatus.ACCESSORY) ], models['D'], hit_weights) clusters['c6'] = Cluster([ ModelHit(ch_gspd, gene_ref=mgE_gspd, gene_status=GeneStatus.ACCESSORY) ], models['E'], hit_weights) clusters['c7'] = Cluster( [ModelHit(ch_abc, gene_ref=mgF_abc, gene_status=GeneStatus.MANDATORY)], models['F'], hit_weights) clusters['c8'] = Cluster([ ModelHit(ch_flgB, gene_ref=mgI_flgB, gene_status=GeneStatus.MANDATORY), ModelHit(ch_tadZ, gene_ref=mgI_tadZ, gene_status=GeneStatus.ACCESSORY) ], models['I'], hit_weights) clusters['c9'] = Cluster([ ModelHit(ch_abc, gene_ref=mgJ_abc, gene_status=GeneStatus.MANDATORY), ModelHit(ch_tadZ, gene_ref=mgJ_tadZ, gene_status=GeneStatus.ACCESSORY) ], models['J'], hit_weights) clusters['c10'] = Cluster([ ModelHit(ch_flgB, gene_ref=mgK_flgB, gene_status=GeneStatus.MANDATORY), ModelHit(ch_sctn, gene_ref=mgK_sctn, gene_status=GeneStatus.ACCESSORY) ], models['K'], hit_weights) clusters['c11'] = Cluster([ ModelHit(ch_flgB, gene_ref=mgL_flgB, gene_status=GeneStatus.MANDATORY), ModelHit(ch_sctn_flg, gene_ref=mgL_sctn_flg, gene_status=GeneStatus.MANDATORY) ], models['L'], hit_weights) clusters['c12'] = Cluster([ ModelHit(ch_sctj_flg, gene_ref=mgL_sctj_flg, gene_status=GeneStatus.ACCESSORY), ModelHit(ch_sctn, gene_ref=mgL_sctn, gene_status=GeneStatus.ACCESSORY) ], models['L'], hit_weights) clusters['c13'] = Cluster( [Loner(ch_sctn, gene_ref=mgL_sctn, gene_status=GeneStatus.ACCESSORY)], models['L'], hit_weights) clusters['c14'] = Cluster([ ModelHit(ch_sctj, mgM_sctj, gene_status=GeneStatus.MANDATORY), MultiSystem( ch_sctn, gene_ref=mgM_sctn, gene_status=GeneStatus.ACCESSORY), ModelHit(ch_gspd, gene_ref=mgM_gspd, gene_status=GeneStatus.ACCESSORY) ], models['M'], hit_weights) clusters['c15'] = Cluster([ ModelHit(ch_tadZ, gene_ref=mgM_tadZ, gene_status=GeneStatus.ACCESSORY), ModelHit(ch_abc, gene_ref=mgM_abc, gene_status=GeneStatus.ACCESSORY) ], models['M'], hit_weights) clusters['c16'] = Cluster([ MultiSystem( ch_sctn, gene_ref=mgM_sctn, gene_status=GeneStatus.ACCESSORY) ], models['M'], hit_weights) clusters['c17'] = Cluster([ ModelHit(ch_flgB, mgL_flgB, GeneStatus.MANDATORY), ModelHit(ch_sctn_flg, mgL_sctn_flg, GeneStatus.MANDATORY) ], models['N'], hit_weights) clusters['c18'] = Cluster([ ModelHit(ch_sctj, mgN_sctj, GeneStatus.MANDATORY), ModelHit(ch_sctj_flg, mgL_sctj_flg, GeneStatus.MANDATORY) ], models['N'], hit_weights) clusters['c19'] = Cluster([Loner(ch_sctn, mgL_sctn, GeneStatus.ACCESSORY)], models['N'], hit_weights) clusters['c20'] = Cluster([Loner(ch_tadZ, mgN_tadZ, GeneStatus.ACCESSORY)], models['N'], hit_weights) clusters['c21'] = Cluster([ ModelHit(ch_sctj, mgO_sctj, GeneStatus.MANDATORY), ModelHit(ch_abc, mgO_abc, GeneStatus.NEUTRAL), ModelHit(ch_tadZ, mgO_tadZ, GeneStatus.ACCESSORY) ], models['O'], hit_weights) clusters['c22'] = Cluster([ ModelHit(ch_sctn_flg, mgO_sctn_flg, GeneStatus.ACCESSORY), ModelHit(ch_gspd, mgO_gspd, GeneStatus.MANDATORY), ModelHit(ch_tadZ, mgO_tadZ, GeneStatus.ACCESSORY) ], models['O'], hit_weights) clusters['c23'] = Cluster( [Loner(ch_gspd, mgO_gspd, gene_status=GeneStatus.MANDATORY)], models['O'], hit_weights) clusters['c24'] = Cluster( [MultiSystem(ch_gspd, mgO_gspd, gene_status=GeneStatus.MANDATORY)], models['O'], hit_weights) clusters['c25'] = Cluster( [MultiSystem(ch_sctn, mgO_sctn, gene_status=GeneStatus.ACCESSORY)], models['O'], hit_weights) clusters['c26'] = Cluster([ MultiSystem( ch_sctj_flg, mgO_sctj_flg, gene_status=GeneStatus.MANDATORY) ], models['O'], hit_weights) return models, clusters
def _build_systems(cfg, profile_factory): model_name = 'foo' model_location = ModelLocation( path=os.path.join(cfg.models_dir(), model_name)) model_A = Model("foo/A", 10) model_B = Model("foo/B", 10) model_C = Model("foo/C", 10) model_D = Model("foo/D", 10) model_E = Model("foo/E", 10) model_F = Model("foo/F", 10) model_G = Model("foo/G", 10) model_H = Model("foo/H", 10) c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG", profile_factory) gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B) c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG", profile_factory) gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B) c_gene_flgB = CoreGene(model_location, "flgB", profile_factory) gene_flgB = ModelGene(c_gene_flgB, model_B) c_gene_tadZ = CoreGene(model_location, "tadZ", profile_factory) gene_tadZ = ModelGene(c_gene_tadZ, model_B) c_gene_sctn = CoreGene(model_location, "sctN", profile_factory) gene_sctn = ModelGene(c_gene_sctn, model_A) gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_hom) c_gene_sctj = CoreGene(model_location, "sctJ", profile_factory) gene_sctj = ModelGene(c_gene_sctj, model_A) gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj) gene_sctj.add_exchangeable(gene_sctj_an) c_gene_gspd = CoreGene(model_location, "gspD", profile_factory) gene_gspd = ModelGene(c_gene_gspd, model_A) gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd) gene_gspd.add_exchangeable(gene_gspd_an) c_gene_abc = CoreGene(model_location, "abc", profile_factory) gene_abc = ModelGene(c_gene_abc, model_A) gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc) gene_abc.add_exchangeable(gene_abc_ho) model_A.add_mandatory_gene(gene_sctn) model_A.add_mandatory_gene(gene_sctj) model_A.add_accessory_gene(gene_gspd) model_A.add_forbidden_gene(gene_abc) model_B.add_mandatory_gene(gene_sctn_flg) model_B.add_mandatory_gene(gene_sctj_flg) model_B.add_accessory_gene(gene_flgB) model_B.add_accessory_gene(gene_tadZ) model_C.add_mandatory_gene(gene_sctn_flg) model_C.add_mandatory_gene(gene_sctj_flg) model_C.add_mandatory_gene(gene_flgB) model_C.add_accessory_gene(gene_tadZ) model_C.add_accessory_gene(gene_gspd) model_D.add_mandatory_gene(gene_abc) model_D.add_accessory_gene(gene_sctn) model_E.add_accessory_gene(gene_gspd) model_F.add_mandatory_gene(gene_abc) # idem as C model_G.add_mandatory_gene(gene_sctn_flg) model_G.add_mandatory_gene(gene_sctj_flg) model_G.add_mandatory_gene(gene_flgB) model_G.add_accessory_gene(gene_tadZ) model_G.add_accessory_gene(gene_gspd) # idem as D model_H.add_mandatory_gene(gene_abc) model_H.add_accessory_gene(gene_sctn) h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20) h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 4, 1.0, 1.0, 1.0, 1.0, 10, 20) h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 5, 1.0, 1.0, 1.0, 1.0, 10, 20) h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 6, 1.0, 1.0, 1.0, 1.0, 10, 20) h_abc = Hit(c_gene_abc, "hit_abc", 803, "replicon_id", 7, 1.0, 1.0, 1.0, 1.0, 10, 20) model_A._min_mandatory_genes_required = 2 model_A._min_genes_required = 2 hit_weights = HitWeight(**cfg.hit_weights()) c1 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_A, hit_weights) c2 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY) ], model_A, hit_weights) model_B._min_mandatory_genes_required = 1 model_B._min_genes_required = 2 c3 = Cluster([ ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY), ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY), ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY) ], model_B, hit_weights) model_C._min_mandatory_genes_required = 1 model_C._min_genes_required = 2 c4 = Cluster([ ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY), ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY), ValidHit(h_flgB, gene_flgB, GeneStatus.MANDATORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_C, hit_weights) model_D._min_mandatory_genes_required = 1 model_D._min_genes_required = 1 c5 = Cluster([ ValidHit(h_abc, gene_abc, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.ACCESSORY) ], model_D, hit_weights) model_E._min_mandatory_genes_required = 0 model_E._min_genes_required = 1 c6 = Cluster([ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)], model_E, hit_weights) model_F._min_mandatory_genes_required = 1 model_F._min_genes_required = 1 c7 = Cluster([ValidHit(h_abc, gene_abc, GeneStatus.MANDATORY)], model_F, hit_weights) systems = {} systems['A'] = System(model_A, [c1, c2], cfg.redundancy_penalty()) # 5 hits # we need to tweek the replicon_id to have stable ressults # whatever the number of tests ran # or the tests order systems['A'].id = "replicon_id_A" systems['B'] = System(model_B, [c3], cfg.redundancy_penalty()) # 3 hits systems['B'].id = "replicon_id_B" systems['C'] = System(model_C, [c4], cfg.redundancy_penalty()) # 4 hits systems['C'].id = "replicon_id_C" systems['D'] = System(model_D, [c5], cfg.redundancy_penalty()) # 2 hits systems['D'].id = "replicon_id_D" systems['E'] = System(model_E, [c6], cfg.redundancy_penalty()) # 1 hit systems['E'].id = "replicon_id_E" systems['F'] = System(model_F, [c7], cfg.redundancy_penalty()) # 1 hit systems['F'].id = "replicon_id_F" systems['G'] = System(model_G, [c4], cfg.redundancy_penalty()) # 4 hits systems['G'].id = "replicon_id_G" systems['H'] = System(model_H, [c5], cfg.redundancy_penalty()) # 2 hits systems['H'].id = "replicon_id_H" return systems