def test_add_forbidden_gene(self): system = System(self.cfg, "foo", 10) gene = Gene(self.cfg, 'sctJ_FLG', system, self.profile_registry) system.add_forbidden_gene(gene) self.assertEqual(system._forbidden_genes, [gene]) self.assertEqual(system._accessory_genes, []) self.assertEqual(system._mandatory_genes, [])
def test_multi_loci(self): name = 'True' inter_gene_max_space = 40 system = System(self.cfg, name, inter_gene_max_space, multi_loci=True) self.assertTrue(system.multi_loci) name = 'False' inter_gene_max_space = 40 system = System(self.cfg, name, inter_gene_max_space) self.assertFalse(system.multi_loci)
def test_iter(self): systems = [System(self.cfg, 'foo', 10), System(self.cfg, 'bar', 10)] for s in systems: system_bank.add_system(s) i = 0 for s in system_bank: self.assertIn(s, systems) i = i + 1 self.assertEqual(i, len(systems))
def test_add_homolog(self): system_foo = System(self.cfg, "foo", 10) system_bar = System(self.cfg, "bar", 10) gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry) gene_ref = Gene(self.cfg, 'sctJ', system_bar, self.profile_registry) homolog = Homolog(self.cfg, gene, gene_ref) gene.add_homolog(homolog) self.assertEqual(len(gene.homologs), 1) self.assertEqual(gene.homologs[0], homolog)
def test_min_mandatory_genes_required(self): name = 'foo' min_mandatory_genes_required = 40 system = System(self.cfg, name, 10, min_mandatory_genes_required = min_mandatory_genes_required) gene = Gene(self.cfg, 'sctJ_FLG', system, self.profile_registry) system.add_mandatory_gene( gene ) self.assertEqual(system.min_mandatory_genes_required, min_mandatory_genes_required) #see https://projets.pasteur.fr/issues/1850 system = System(self.cfg, name, 10) self.assertEqual(system.min_mandatory_genes_required, len(system.mandatory_genes))
def test_get_homologs(self): system_foo = System(self.cfg, "foo", 10) system_bar = System(self.cfg, "bar", 10) gene = Gene(self.cfg, 'sctN', system_foo, self.profile_registry) sctJ_FLG = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry) sctJ = Gene(self.cfg, 'sctJ', system_bar, self.profile_registry) homolog_1 = Homolog(sctJ_FLG, gene) gene.add_homolog(homolog_1) homolog_2 = Homolog(sctJ, gene) gene.add_homolog(homolog_2) self.assertEqual(gene.get_homologs(), [homolog_1, homolog_2])
def test_max_nb_genes(self): name = 'foo' inter_gene_max_space = 40 max_nb_genes = 10 system = System(self.cfg, name, inter_gene_max_space, max_nb_genes=max_nb_genes) self.assertEqual(system.max_nb_genes, max_nb_genes) name = 'bar' system = System(self.cfg, name, inter_gene_max_space) self.assertIsNone(system.max_nb_genes)
def test_str(self): """ """ system_foo = System(self.cfg, "foo", 10) gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry) system_bar = System(self.cfg, "bar", 20) gene_homolog = Gene(self.cfg, 'sctJ', system_bar, self.profile_registry) homolog = Homolog(gene_homolog, gene, self.cfg) gene.add_homolog(homolog) s = """name : sctJ_FLG inter_gene_max_space: 10 homologs: sctJ""" self.assertEqual(str(gene), s)
def test_contains(self): system_foo = System(self.cfg, "foo", 10) gene_in = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry) gene_bank.add_gene(gene_in) self.assertIn(gene_in, gene_bank) gene_out = Gene(self.cfg, 'abc', system_foo, self.profile_registry) self.assertNotIn( gene_out, gene_bank)
def test_add_get_system(self): system_name = 'foo' self.assertRaises(KeyError, system_bank.__getitem__, system_name) system_foo = System(self.cfg, system_name, 10) system_bank.add_system(system_foo) self.assertTrue(isinstance(system_foo, System)) self.assertEqual(system_foo, system_bank[system_name])
def test_get_uniq_object(self): system_foo = System(self.cfg, "foo", 10) gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry) path = self.profile_registry.get('sctJ_FLG') profile1 = profile_factory.get_profile(gene, self.cfg, path) profile2 = profile_factory.get_profile(gene, self.cfg, path) self.assertEqual(profile1, profile2)
def test_get_profile(self): system_foo = System(self.cfg, "foo", 10) gene_name = 'sctJ_FLG' gene = Gene(self.cfg, gene_name, system_foo, self.profile_registry) profile = profile_factory.get_profile(gene, self.cfg, self.profile_registry ) self.assertTrue( isinstance( profile, Profile )) self.assertEqual( profile.gene.name, gene_name )
def test_str(self): system = System(self.cfg, "T2SS", 10) gene_name = "gspD" gene = Gene(self.cfg, "gspD", system, self.profile_registry) hit_prop = { 'id': "PSAE001c01_006940", 'hit_seq_len': 803, 'replicon_name': "PSAE001c01", 'position': 694, 'i_eval': float(1.2e-234), 'score': float(779.2), 'gene_name': gene.name, 'system_name': system.name, 'profil_coverage': float(1.0), 'sequence_coverage': float(638.000000), 'begin': 104, 'end': 741 } hit = Hit(gene, system, hit_prop['id'], hit_prop['hit_seq_len'], hit_prop['replicon_name'], hit_prop['position'], hit_prop['i_eval'], hit_prop['score'], hit_prop['profil_coverage'], hit_prop['sequence_coverage'], hit_prop['begin'], hit_prop['end']) s = "%(id)s\t%(replicon_name)s\t%(position)d\t%(hit_seq_len)d\t%(gene_name)s\t%(system_name)s\t%(i_eval)s\t%(score)s\t%(profil_coverage)f\t%(sequence_coverage)f\t%(begin)d\t%(end)d\n" % hit_prop self.assertEqual(s, str(hit))
def test_str(self): system = System(self.cfg, "T2SS", 10) gene_name = "gspD" gene = Gene(self.cfg, "gspD", system, self.profile_registry) hit_prop = { 'id': "PSAE001c01_006940", 'hit_seq_len': 803, 'replicon_name': "PSAE001c01", 'position': 694, 'i_eval': float(1.2e-234), 'score': float(779.2), 'gene_name': gene.name, 'system_name': system.name, 'profil_coverage': float(1.0), 'sequence_coverage': float(638.000000), 'begin': 104, 'end': 741 } hit = Hit(gene, system, hit_prop['id'], hit_prop['hit_seq_len'], hit_prop['replicon_name'], hit_prop['position'], hit_prop['i_eval'], hit_prop['score'], hit_prop['profil_coverage'], hit_prop['sequence_coverage'], hit_prop['begin'], hit_prop['end']) s = "{id}\t{replicon_name}\t{position:d}\t{hit_seq_len:d}\t{gene_name}\t{system_name}\t{i_eval:.3e}\t{score:.3f}\t{profil_coverage:.3f}\t{sequence_coverage:.3f}\t{begin:d}\t{end:d}\n".format( **hit_prop) self.assertEqual(s, str(hit))
def test_str(self): system = System(self.cfg, "T2SS", 10) gene = Gene(self.cfg, "abc", system, self.profile_registry) path = self.profile_registry.get("abc") profile = Profile(gene, self.cfg, path) s = "{0} : {1}".format(gene.name, path) self.assertEqual(str(profile), s)
def test_execute_unknown_binary(self): self.cfg.options['hmmer_exe'] = "Nimportnaoik" system = System(self.cfg, "T2SS", 10) gene = Gene(self.cfg, "abc", system, self.profile_registry) path = self.profile_registry.get("abc") profile = Profile(gene, self.cfg, path) self.assertRaises(RuntimeError, profile.execute)
def test_GembaseHMMReport_extract(self): system = System(self.cfg, "T2SS", 10) gene_name = "gspD" gene = Gene(self.cfg, gene_name, system, self.profile_registry) shutil.copy( os.path.join(self._data_dir, "hmm", gene_name + self.cfg.res_search_suffix), self.cfg.working_dir) report_path = os.path.join(self.cfg.working_dir, gene_name + self.cfg.res_search_suffix) report = GembaseHMMReport(gene, report_path, self.cfg) report.extract() self.assertEqual(len(report.hits), 6) # gene, system, hit_id, hit_seq_length replicon_name, pos_hit, i_eval, score, profile_coverage, sequence_coverage, begin_match, end_match hits = [ Hit(gene, system, "NC_xxxxx_xx_056141", 803, "NC_xxxxx_xx", 141, float(2e-236), float(779.2), float(1.000000), (741.0 - 104.0 + 1) / 803, 104, 741), Hit(gene, system, "PSAE001c01_006940", 803, "PSAE001c01", 68, float(1.2e-234), float(779.2), float(1.000000), (741.0 - 104.0 + 1) / 803, 104, 741), Hit(gene, system, "PSAE001c01_013980", 759, "PSAE001c01", 69, float(3.7e-76), float(255.8), float(1.000000), (736.0 - 105.0 + 1) / 759, 105, 736), Hit(gene, system, "PSAE001c01_017350", 600, "PSAE001c01", 70, float(3.2e-27), float(94.2), float(0.500000), (506.0 - 226.0 + 1) / 600, 226, 506), Hit(gene, system, "PSAE001c01_018920", 776, "PSAE001c01", 71, float(6.1e-183), float(608.4), float(1.000000), (606.0 - 48.0 + 1) / 776, 48, 606), Hit(gene, system, "PSAE001c01_031420", 658, "PSAE001c01", 73, float(1.8e-210), float(699.3), float(1.000000), (614.0 - 55.0 + 1) / 658, 55, 614) ] self.assertListEqual(hits, report.hits)
def test_system(self): """ test getter/setter for system property """ system_foo = System(self.cfg, "foo", 10) gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry) self.assertEqual(gene.system, system_foo)
def test_get_uniq_object(self): system_foo = System(self.cfg, "foo", 10) gene_in = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry) gene_bank.add_gene(gene_in) gene1 = gene_bank['sctJ_FLG'] gene2 = gene_bank['sctJ_FLG'] self.assertEqual(gene1, gene2) self.assertIs(gene1, gene2)
def test_is_aligned(self): system = System(self.cfg, "T2SS", 10) gene_ref = Gene(self.cfg, 'sctJ_FLG', system, self.profile_registry) gene = Gene(self.cfg, 'sctJ', system, self.profile_registry) homolog = Homolog(gene, gene_ref) self.assertFalse(homolog.is_aligned()) homolog = Homolog(gene, gene_ref, aligned=True) self.assertTrue(homolog.is_aligned())
def test_add_get_gene(self): gene_name = 'sctJ_FLG' self.assertRaises(KeyError, gene_bank.__getitem__, gene_name) system_foo = System( "foo", self.cfg, 10) gene = Gene(self.cfg, gene_name, system_foo, self.profile_registry) gene_bank.add_gene(gene) gene_from_bank = gene_bank[gene_name] self.assertTrue(isinstance(gene_from_bank, Gene)) self.assertEqual(gene_from_bank, gene)
def test_SystemSerializer_tsv(self): model = Model("foo/T2SS", 10) c_gene_gspd = CoreGene(self.model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) c_gene_sctj = CoreGene(self.model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_accessory_gene(gene_sctj) c_gene_sctn = CoreGene(self.model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model) c_gene_sctn_flg = CoreGene(self.model_location, "sctN_FLG", self.profile_factory) gene_sctn_flg = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_flg) model.add_accessory_gene(gene_sctn) h_gspd = Hit(c_gene_gspd, "h_gspd", 803, "replicon_id", 10, 1.0, 1.0, 1.0, 1.0, 10, 20) v_h_gspd = ValidHit(h_gspd, gene_gspd, GeneStatus.MANDATORY) h_sctj = Hit(c_gene_sctj, "h_sctj", 803, "replicon_id", 20, 1.0, 1.0, 1.0, 1.0, 20, 30) v_h_sctj = ValidHit(h_sctj, gene_sctj, GeneStatus.ACCESSORY) h_sctn_flg = Hit(c_gene_sctn_flg, "h_sctn_flg", 803, "replicon_id", 30, 1.0, 1.0, 1.0, 1.0, 30, 40) v_h_sctn_flg = ValidHit(h_sctn_flg, gene_sctn_flg, GeneStatus.ACCESSORY) c1 = Cluster([v_h_gspd, v_h_sctj], model, self.hit_weights) c2 = Cluster([v_h_sctn_flg], model, self.hit_weights) sys_multi_loci = System(model, [c1, c2], self.cfg.redundancy_penalty()) hit_multi_sys_tracker = HitSystemTracker([sys_multi_loci]) system_serializer = TsvSystemSerializer() sys_tsv = "\t".join([ "replicon_id", "h_gspd", "gspD", "10", "foo/T2SS", sys_multi_loci.id, "1", "1.000", "1.900", "1", "gspD", "mandatory", "803", "1.0", "1.000", "1.000", "1.000", "10", "20", "" ]) sys_tsv += "\n" sys_tsv += "\t".join([ "replicon_id", "h_sctj", "sctJ", "20", "foo/T2SS", sys_multi_loci.id, "1", "1.000", "1.900", "1", "sctJ", "accessory", "803", "1.0", "1.000", "1.000", "1.000", "20", "30", "" ]) sys_tsv += "\n" sys_tsv += "\t".join([ "replicon_id", "h_sctn_flg", "sctN_FLG", "30", "foo/T2SS", sys_multi_loci.id, "1", "1.000", "1.900", "1", "sctN", "accessory", "803", "1.0", "1.000", "1.000", "1.000", "30", "40", "" ]) sys_tsv += "\n" self.assertEqual( sys_tsv, system_serializer.serialize(sys_multi_loci, hit_multi_sys_tracker))
def test_iter(self): system_foo = System(self.cfg, "foo", 10) genes = [Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry), Gene(self.cfg, 'abc', system_foo, self.profile_registry)] for g in genes: gene_bank.add_gene(g) i = 0 for g in gene_bank: self.assertIn(g, genes) i = i + 1 self.assertEqual(i, len(genes))
def test_HMMReport(self): system = System(self.cfg, "T2SS", 10) gene_name = "gspD" gene = Gene(self.cfg, gene_name, system, self.profile_registry) shutil.copy( os.path.join(self._data_dir, "hmm", gene_name + self.cfg.res_search_suffix), self.cfg.working_dir) report_path = os.path.join(self.cfg.working_dir, gene_name + self.cfg.res_search_suffix) self.assertRaises(TypeError, HMMReport, gene, report_path, self.cfg)
def test_GembaseHMMReport_extract_concurent(self): system = System(self.cfg, "T2SS", 10) gene_name = "gspD" gene = Gene(self.cfg, gene_name, system, self.profile_registry) shutil.copy( os.path.join(self._data_dir, "hmm", gene_name + self.cfg.res_search_suffix), self.cfg.working_dir) report_path = os.path.join(self.cfg.working_dir, gene_name + self.cfg.res_search_suffix) reports = [] for i in range(5): report = GembaseHMMReport(gene, report_path, self.cfg) reports.append(report) import threading def worker(report): report.extract() for report in reports: t = threading.Thread(target=worker, args=(report, )) t.start() main_thread = threading.currentThread() for t in threading.enumerate(): if t is main_thread: continue t.join() #gene, system, hit_id, hit_seq_length replicon_name, pos_hit, i_eval, score, profile_coverage, sequence_coverage, begin_match, end_match hits = [ Hit(gene, system, "NC_xxxxx_xx_056141", 803, "NC_xxxxx_xx", 141, float(2e-236), float(779.2), float(1.000000), (741.0 - 104.0 + 1) / 803, 104, 741), Hit(gene, system, "PSAE001c01_006940", 803, "PSAE001c01", 68, float(1.2e-234), float(779.2), float(1.000000), (741.0 - 104.0 + 1) / 803, 104, 741), Hit(gene, system, "PSAE001c01_013980", 759, "PSAE001c01", 69, float(3.7e-76), float(255.8), float(1.000000), (736.0 - 105.0 + 1) / 759, 105, 736), Hit(gene, system, "PSAE001c01_017350", 600, "PSAE001c01", 70, float(3.2e-27), float(94.2), float(0.500000), (506.0 - 226.0 + 1) / 600, 226, 506), Hit(gene, system, "PSAE001c01_018920", 776, "PSAE001c01", 71, float(6.1e-183), float(608.4), float(1.000000), (606.0 - 48.0 + 1) / 776, 48, 606), Hit(gene, system, "PSAE001c01_031420", 658, "PSAE001c01", 73, float(1.8e-210), float(699.3), float(1.000000), (614.0 - 55.0 + 1) / 658, 55, 614) ] for report in reports: report.save_extract() self.assertEqual(len(report.hits), len(hits)) self.assertListEqual(report.hits, hits)
def test_cmp(self): system = System(self.cfg, "T2SS", 10) gene_name = "gspD" gene = Gene(self.cfg, "gspD", system, self.profile_registry) h0 = Hit(gene, system, "PSAE001c01_006940", 803, "PSAE001c01", 3450, float(1.2e-234), float(779.2), float(1.000000), (741.0 - 104.0 + 1) / 803, 104, 741) h1 = Hit(gene, system, "PSAE001c01_013980", 759, "PSAE001c01", 4146, float(3.7e-76), float(255.8), float(1.000000), (736.0 - 105.0 + 1) / 759, 105, 736) self.assertGreater(h1, h0) self.assertLess(h0, h1)
def test_exchangeable(self): """ test getter for exchangeable property """ system_foo = System(self.cfg, "foo", 10) gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry) self.assertFalse(gene.exchangeable) gene = Gene(self.cfg, 'sctJ', system_foo, self.profile_registry, exchangeable=True) self.assertTrue(gene.exchangeable)
def test_multi_system(self): """ test getter for multi_system property """ system_foo = System(self.cfg, "foo", 10) gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry) self.assertFalse(gene.multi_system) gene = Gene(self.cfg, 'sctJ', system_foo, self.profile_registry, multi_system=True) self.assertTrue(gene.multi_system)
def test_loner(self): """ test getter for loner property """ system_foo = System(self.cfg, "foo", 10) gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry) self.assertFalse(gene.loner) gene = Gene(self.cfg, 'sctJ', system_foo, self.profile_registry, loner=True) self.assertTrue(gene.loner)
def test_inter_gene_max_space(self): """ test getter for inter_gene_max_space property """ system_inter_gene_max_space = 40 gene_inter_gene_max_space = 50 system_foo = System(self.cfg, "foo", system_inter_gene_max_space) gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry) self.assertEqual(gene.inter_gene_max_space, system_inter_gene_max_space) gene = Gene(self.cfg, 'sctJ', system_foo, self.profile_registry, inter_gene_max_space=gene_inter_gene_max_space) self.assertEqual(gene.inter_gene_max_space, gene_inter_gene_max_space)
def test_execute(self): system = System(self.cfg, "T2SS", 10) gene = Gene(self.cfg, "abc", system, self.profile_registry) path = self.profile_registry.get("abc") profile = Profile(gene, self.cfg, path) report = profile.execute() hmmer_raw_out = profile.hmm_raw_output with open(hmmer_raw_out, 'r') as hmmer_raw_out_file: first_l = hmmer_raw_out_file.readline() # a hmmsearch output file has been produced self.assertTrue(first_l.startswith("# hmmsearch :: search profile(s) against a sequence database")) for i in range(5): # skip 4 lines l = hmmer_raw_out_file.readline() # a hmmsearch used the abc profile line should become with: "# query HMM file:" path = os.path.join(self.cfg.profile_dir, gene.name + self.cfg.profile_suffix) self.assertTrue(l.find(path) != -1)
def test_get_gene_ref(self): system = System(self.cfg, "foo", 10) gene_name = 'sctJ_FLG' gene_ref = Gene(self.cfg, gene_name, system, self.profile_registry) homolog_name = 'sctJ' gene_homolg = Gene(self.cfg, homolog_name, system, self.profile_registry) homolog = Homolog(gene_homolg, gene_ref) gene_ref.add_homolog(homolog) for meth in (system.add_forbidden_gene, system.add_accessory_gene, system.add_mandatory_gene): system._mandatory_genes = [] system._accessory_genes = [] system._forbidden_genes = [] meth(gene_ref) self.assertEqual(gene_ref, system.get_gene_ref(homolog)) self.assertIsNone(system.get_gene_ref(gene_ref)) gene_ukn = Gene(self.cfg, 'abc', system, self.profile_registry) self.assertRaises(KeyError, system.get_gene_ref, gene_ukn)
def test_mandatory_genes(self): system = System(self.cfg, "foo", 10) gene = Gene(self.cfg, 'sctJ_FLG', system, self.profile_registry) system.add_mandatory_gene(gene) self.assertEqual(system.mandatory_genes, [gene])
def test_get_gene(self): system = System(self.cfg, "foo", 10) gene_name = 'sctJ_FLG' gene = Gene(self.cfg, gene_name, system, self.profile_registry) for meth in (system.add_forbidden_gene, system.add_accessory_gene, system.add_mandatory_gene): system._mandatory_genes = [] system._accessory_genes = [] system._forbidden_genes = [] meth(gene) self.assertEqual(gene, system.get_gene(gene_name)) self.assertRaises(KeyError, system.get_gene, 'bar') homolog_name = 'sctJ' gene_homolog = Gene(self.cfg, homolog_name, system, self.profile_registry) homolog = Homolog(gene_homolog, gene) gene.add_homolog(homolog) for meth in (system.add_forbidden_gene, system.add_accessory_gene, system.add_mandatory_gene): system._mandatory_genes = [] system._accessory_genes = [] system._forbidden_genes = [] meth(gene) self.assertEqual(homolog, system.get_gene(homolog_name)) analog_name = 'sctC' gene_analog = Gene(self.cfg, analog_name, system, self.profile_registry) analog = Analog(gene_analog, gene) gene.add_analog(analog) for meth in (system.add_forbidden_gene, system.add_accessory_gene, system.add_mandatory_gene): system._mandatory_genes = [] system._accessory_genes = [] system._forbidden_genes = [] meth(gene) self.assertEqual(analog, system.get_gene(analog_name))