def test_items(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) self.assertItemsEqual(db.items(), [('ESCO030p01',ESCO030p01), ('NC_xxxxx_xx',NCXX), ('PSAE001c01',PSAE001c01)])
def test_getitem(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes) self.assertEqual(ESCO030p01, db['ESCO030p01']) self.assertEqual(PSAE001c01, db['PSAE001c01']) self.assertRaises(KeyError, db.__getitem__, 'foo')
def test_getitem(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) self.assertEqual(ESCO030p01, db['ESCO030p01']) self.assertEqual(PSAE001c01, db['PSAE001c01']) self.assertEqual(NCXX, db['NC_xxxxx_xx']) self.assertRaises(KeyError, db.__getitem__, 'foo')
def test_replicon_infos(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) values = db.replicon_infos() self.assertCountEqual(values, [ESCO030p01, NCXX, PSAE001c01])
def test_get(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes) self.assertEqual(ESCO030p01, db.get('ESCO030p01')) self.assertEqual(PSAE001c01, db.get('PSAE001c01')) self.assertIsNone(db.get('foo')) self.assertEqual('bar', db.get('foo', 'bar'))
def test_get(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) self.assertEqual(ESCO030p01, db.get('ESCO030p01')) self.assertEqual(PSAE001c01, db.get('PSAE001c01')) self.assertEqual(NCXX, db.get('NC_xxxxx_xx', 'foo')) self.assertIsNone(db.get('foo')) self.assertEqual('bar', db.get('foo', 'bar'))
def test_iteritems(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133, self.PSAE001c01_genes) NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes) iter_items = db.iteritems() for item in [('ESCO030p01', ESCO030p01), ('PSAE001c01', PSAE001c01), ('NC_xxxxx_xx', NCXX)]: with self.subTest(item=item): self.assertEqual(next(iter_items), item)
def test_items(self): db = RepliconDB(self.cfg) ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes) self.assertItemsEqual(db.items(), [('ESCO030p01', ESCO030p01), ('PSAE001c01', PSAE001c01)]) db = RepliconDB(self.cfg) PRRU001c01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes) PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes)
def test_get_loners(self): model = Model("foo/T2SS", 11) # handle name, topology type, and min/max positions in the sequence dataset for a replicon and list of genes. # each genes is representing by a tuple (seq_id, length)""" rep_info = RepliconInfo('linear', 1, 60, [(f"g_{i}", i * 10) for i in range(1, 7)]) core_genes = [] model_genes = [] for g_name in ('gspD', 'sctC', 'sctJ', 'sctN', 'abc'): core_gene = CoreGene(self.model_location, g_name, self.profile_factory) core_genes.append(core_gene) model_genes.append(ModelGene(core_gene, model)) model_genes[3]._loner = True model_genes[4]._loner = True model.add_mandatory_gene(model_genes[0]) model.add_mandatory_gene(model_genes[1]) model.add_accessory_gene(model_genes[2]) model.add_accessory_gene(model_genes[3]) model.add_neutral_gene(model_genes[4]) # Hit(gene, model, hit_id, hit_seq_length, replicon_name, position, i_eval, score, # profile_coverage, sequence_coverage, begin_match, end_match h10 = Hit(core_genes[0], "h10", 10, "replicon_1", 10, 1.0, 10.0, 1.0, 1.0, 10, 20) h20 = Hit(core_genes[1], "h20", 10, "replicon_1", 20, 1.0, 20.0, 1.0, 1.0, 10, 20) h30 = Hit(core_genes[2], "h30", 10, "replicon_1", 30, 1.0, 30.0, 1.0, 1.0, 10, 20) h61 = Hit(core_genes[3], "h61", 10, "replicon_1", 60, 1.0, 61.0, 1.0, 1.0, 10, 20) h80 = Hit(core_genes[4], "h80", 10, "replicon_1", 80, 1.0, 80.0, 1.0, 1.0, 10, 20) # loners are clusters of one hit loners = get_loners([h10, h20, h30, h61, h80], model, self.hit_weights) hit_from_clusters = [h.hits[0] for h in loners] self.assertListEqual(hit_from_clusters, [h61, h80])
def test_build_clusters(self): # handle name, topology type, and min/max positions in the sequence dataset for a replicon and list of genes. # each genes is representing by a tuple (seq_id, length)""" rep_info = RepliconInfo('linear', 1, 60, [(f"g_{i}", i * 10) for i in range(1, 7)]) model = Model("foo/T2SS", 11) core_genes = [] model_genes = [] for g_name in ('gspD', 'sctC', 'sctJ', 'sctN', 'abc'): core_gene = CoreGene(self.model_location, g_name, self.profile_factory) core_genes.append(core_gene) model_genes.append(ModelGene(core_gene, model)) model_genes[4]._loner = True model.add_mandatory_gene(model_genes[0]) model.add_mandatory_gene(model_genes[1]) model.add_accessory_gene(model_genes[2]) model.add_accessory_gene(model_genes[3]) model.add_neutral_gene(model_genes[4]) # Hit(gene, hit_id, hit_seq_length, replicon_name, position, i_eval, score, # profile_coverage, sequence_coverage, begin_match, end_match h10 = Hit(core_genes[0], "h10", 10, "replicon_1", 10, 1.0, 10.0, 1.0, 1.0, 10, 20) h11 = Hit(core_genes[0], "h11", 10, "replicon_1", 10, 1.0, 11.0, 1.0, 1.0, 10, 20) h20 = Hit(core_genes[1], "h20", 10, "replicon_1", 20, 1.0, 20.0, 1.0, 1.0, 10, 20) h21 = Hit(core_genes[2], "h21", 10, "replicon_1", 20, 1.0, 21.0, 1.0, 1.0, 10, 20) h30 = Hit(core_genes[2], "h30", 10, "replicon_1", 30, 1.0, 30.0, 1.0, 1.0, 10, 20) h31 = Hit(core_genes[1], "h31", 10, "replicon_1", 30, 1.0, 31.0, 1.0, 1.0, 10, 20) h50 = Hit(core_genes[2], "h50", 10, "replicon_1", 50, 1.0, 50.0, 1.0, 1.0, 10, 20) h51 = Hit(core_genes[2], "h51", 10, "replicon_1", 50, 1.0, 51.0, 1.0, 1.0, 10, 20) h60 = Hit(core_genes[2], "h60", 10, "replicon_1", 60, 1.0, 60.0, 1.0, 1.0, 10, 20) h61 = Hit(core_genes[3], "h61", 10, "replicon_1", 60, 1.0, 61.0, 1.0, 1.0, 10, 20) # case replicon is linear, 2 clusters hits = [h10, h11, h20, h21, h30, h31, h50, h51, h60, h61] random.shuffle(hits) clusters = build_clusters(hits, rep_info, model, self.hit_weights) self.assertEqual(len(clusters), 2) self.assertListEqual(clusters[0].hits, [h11, h21, h31]) self.assertListEqual(clusters[1].hits, [h51, h61]) # case replicon is linear with a single hit (not loner) between 2 clusters h70 = Hit(core_genes[3], "h70", 10, "replicon_1", 70, 1.0, 80.0, 1.0, 1.0, 10, 20) h80 = Hit(core_genes[4], "h80", 10, "replicon_1", 80, 1.0, 80.0, 1.0, 1.0, 10, 20) hits = [h10, h11, h20, h21, h50, h51, h70, h80] random.shuffle(hits) clusters = build_clusters(hits, rep_info, model, self.hit_weights) self.assertEqual(len(clusters), 2) self.assertListEqual(clusters[0].hits, [h11, h21]) self.assertListEqual(clusters[1].hits, [h70, h80]) # replicon is linear, 3 clusters, the last one contains only one hit (loner) rep_info = RepliconInfo('linear', 1, 100, [(f"g_{i}", i*10) for i in range(1, 101)]) h80 = Hit(core_genes[4], "h80", 10, "replicon_1", 80, 1.0, 80.0, 1.0, 1.0, 10, 20) hits = [h10, h11, h20, h21, h30, h31, h50, h51, h60, h61, h80] random.shuffle(hits) clusters = build_clusters(hits, rep_info, model, self.hit_weights) self.assertEqual(len(clusters), 3) self.assertListEqual(clusters[0].hits, [h11, h21, h31]) self.assertListEqual(clusters[1].hits, [h51, h61]) self.assertListEqual(clusters[2].hits, [h80]) # replicon is circular contains only one cluster rep_info = RepliconInfo('circular', 1, 60, [(f"g_{i}", i*10) for i in range(1, 7)]) hits = [h10, h20, h30] clusters = build_clusters(hits, rep_info, model, self.hit_weights) self.assertEqual(len(clusters), 1) self.assertListEqual(clusters[0].hits, [h10, h20, h30]) # replicon is circular the last cluster is merge with the first So we have only one cluster rep_info = RepliconInfo('circular', 1, 60, [(f"g_{i}", i*10) for i in range(1, 7)]) hits = [h10, h11, h20, h21, h30, h31, h50, h51, h60, h61] clusters = build_clusters(hits, rep_info, model, self.hit_weights) self.assertEqual(len(clusters), 1) self.assertListEqual(clusters[0].hits, [h51, h61, h11, h21, h31]) # replicon is circular the last hit is incorporate to the first cluster rep_info = RepliconInfo('circular', 1, 80, [(f"g_{i}", i*10) for i in range(1, 9)]) h80 = Hit(core_genes[3], "h80", 10, "replicon_1", 80, 1.0, 80.0, 1.0, 1.0, 10, 20) hits = [h10, h11, h20, h21, h30, h31, h50, h51, h60, h61, h80] random.shuffle(hits) clusters = build_clusters(hits, rep_info, model, self.hit_weights) self.assertEqual(len(clusters), 2) self.assertListEqual(clusters[0].hits, [h80, h11, h21, h31]) self.assertListEqual(clusters[1].hits, [h51, h61]) # replicon is circular the last hit is not merged with the first cluster rep_info = RepliconInfo('linear', 1, 80, [(f"g_{i}", i*10) for i in range(1, 9)]) hits = [h10, h11, h20, h21, h30, h31, h50, h51, h60, h61, h80] random.shuffle(hits) clusters = build_clusters(hits, rep_info, model, self.hit_weights) self.assertEqual(len(clusters), 2) self.assertEqual(len(clusters), 2) self.assertListEqual(clusters[0].hits, [h11, h21, h31]) self.assertListEqual(clusters[1].hits, [h51, h61]) # case replicon is linear, 2 clusters, the hits 11,21,31 and 51,61 are contiguous h10 = Hit(core_genes[0], "h10", 10, "replicon_1", 10, 1.0, 11.0, 1.0, 1.0, 10, 20) h11 = Hit(core_genes[2], "h11", 10, "replicon_1", 11, 1.0, 21.0, 1.0, 1.0, 10, 20) h12 = Hit(core_genes[1], "h12", 10, "replicon_1", 12, 1.0, 31.0, 1.0, 1.0, 10, 20) h50 = Hit(core_genes[2], "h50", 10, "replicon_1", 50, 1.0, 51.0, 1.0, 1.0, 10, 20) h51 = Hit(core_genes[3], "h51", 10, "replicon_1", 51, 1.0, 61.0, 1.0, 1.0, 10, 20) hits = [h10, h11, h12, h50, h51] random.shuffle(hits) clusters = build_clusters(hits, rep_info, model, self.hit_weights) self.assertEqual(len(clusters), 2) self.assertListEqual(clusters[0].hits, [h10, h11, h12]) self.assertListEqual(clusters[1].hits, [h50, h51]) # case replicon is linear # one cluster with one hit loner h80 = Hit(core_genes[4], "h80", 10, "replicon_1", 80, 1.0, 80.0, 1.0, 1.0, 10, 20) hits = [h80] random.shuffle(hits) clusters = build_clusters(hits, rep_info, model, self.hit_weights) self.assertEqual(len(clusters), 1) self.assertListEqual(clusters[0].hits, [h80]) # case replicon is linear, no hits clusters = build_clusters([], rep_info, model, self.hit_weights) self.assertListEqual(clusters, [])