Example #1
0
 def test_items(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes)
     NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
     self.assertItemsEqual(db.items(), [('ESCO030p01',ESCO030p01), ('NC_xxxxx_xx',NCXX),
                                        ('PSAE001c01',PSAE001c01)])
Example #2
0
 def test_getitem(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67,
                               self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133,
                               self.PSAE001c01_genes)
     self.assertEqual(ESCO030p01, db['ESCO030p01'])
     self.assertEqual(PSAE001c01, db['PSAE001c01'])
     self.assertRaises(KeyError, db.__getitem__, 'foo')
Example #3
0
 def test_getitem(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes)
     NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
     self.assertEqual(ESCO030p01, db['ESCO030p01'])
     self.assertEqual(PSAE001c01, db['PSAE001c01'])
     self.assertEqual(NCXX, db['NC_xxxxx_xx'])
     self.assertRaises(KeyError, db.__getitem__, 'foo')
 def test_replicon_infos(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                               self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                               self.PSAE001c01_genes)
     NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
     values = db.replicon_infos()
     self.assertCountEqual(values, [ESCO030p01, NCXX, PSAE001c01])
Example #5
0
 def test_get(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67,
                               self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133,
                               self.PSAE001c01_genes)
     self.assertEqual(ESCO030p01, db.get('ESCO030p01'))
     self.assertEqual(PSAE001c01, db.get('PSAE001c01'))
     self.assertIsNone(db.get('foo'))
     self.assertEqual('bar', db.get('foo', 'bar'))
Example #6
0
 def test_get(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes)
     NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
     self.assertEqual(ESCO030p01, db.get('ESCO030p01'))
     self.assertEqual(PSAE001c01, db.get('PSAE001c01'))
     self.assertEqual(NCXX, db.get('NC_xxxxx_xx', 'foo'))
     self.assertIsNone(db.get('foo'))
     self.assertEqual('bar', db.get('foo', 'bar'))
 def test_iteritems(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                               self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                               self.PSAE001c01_genes)
     NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
     iter_items = db.iteritems()
     for item in [('ESCO030p01', ESCO030p01), ('PSAE001c01', PSAE001c01),
                  ('NC_xxxxx_xx', NCXX)]:
         with self.subTest(item=item):
             self.assertEqual(next(iter_items), item)
Example #8
0
 def test_items(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67,
                               self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133,
                               self.PSAE001c01_genes)
     self.assertItemsEqual(db.items(), [('ESCO030p01', ESCO030p01),
                                        ('PSAE001c01', PSAE001c01)])
     db = RepliconDB(self.cfg)
     PRRU001c01 = RepliconInfo(self.cfg.replicon_topology, 1, 67,
                               self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133,
                               self.PSAE001c01_genes)
Example #9
0
    def test_get_loners(self):
        model = Model("foo/T2SS", 11)
        # handle name, topology type, and min/max positions in the sequence dataset for a replicon and list of genes.
        # each genes is representing by a tuple (seq_id, length)"""
        rep_info = RepliconInfo('linear', 1, 60, [(f"g_{i}", i * 10) for i in range(1, 7)])

        core_genes = []
        model_genes = []
        for g_name in ('gspD', 'sctC', 'sctJ', 'sctN', 'abc'):
            core_gene = CoreGene(self.model_location, g_name, self.profile_factory)
            core_genes.append(core_gene)
            model_genes.append(ModelGene(core_gene, model))
        model_genes[3]._loner = True
        model_genes[4]._loner = True

        model.add_mandatory_gene(model_genes[0])
        model.add_mandatory_gene(model_genes[1])
        model.add_accessory_gene(model_genes[2])
        model.add_accessory_gene(model_genes[3])
        model.add_neutral_gene(model_genes[4])

        #     Hit(gene, model, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #         profile_coverage, sequence_coverage, begin_match, end_match
        h10 = Hit(core_genes[0], "h10", 10, "replicon_1", 10, 1.0, 10.0, 1.0, 1.0, 10, 20)
        h20 = Hit(core_genes[1], "h20", 10, "replicon_1", 20, 1.0, 20.0, 1.0, 1.0, 10, 20)
        h30 = Hit(core_genes[2], "h30", 10, "replicon_1", 30, 1.0, 30.0, 1.0, 1.0, 10, 20)
        h61 = Hit(core_genes[3], "h61", 10, "replicon_1", 60, 1.0, 61.0, 1.0, 1.0, 10, 20)
        h80 = Hit(core_genes[4], "h80", 10, "replicon_1", 80, 1.0, 80.0, 1.0, 1.0, 10, 20)

        # loners are clusters of one hit
        loners = get_loners([h10, h20, h30, h61, h80], model, self.hit_weights)
        hit_from_clusters = [h.hits[0] for h in loners]
        self.assertListEqual(hit_from_clusters, [h61, h80])
Example #10
0
    def test_build_clusters(self):
        # handle name, topology type, and min/max positions in the sequence dataset for a replicon and list of genes.
        # each genes is representing by a tuple (seq_id, length)"""
        rep_info = RepliconInfo('linear', 1, 60, [(f"g_{i}", i * 10) for i in range(1, 7)])

        model = Model("foo/T2SS", 11)

        core_genes = []
        model_genes = []
        for g_name in ('gspD', 'sctC', 'sctJ', 'sctN', 'abc'):
            core_gene = CoreGene(self.model_location, g_name, self.profile_factory)
            core_genes.append(core_gene)
            model_genes.append(ModelGene(core_gene, model))
        model_genes[4]._loner = True

        model.add_mandatory_gene(model_genes[0])
        model.add_mandatory_gene(model_genes[1])
        model.add_accessory_gene(model_genes[2])
        model.add_accessory_gene(model_genes[3])
        model.add_neutral_gene(model_genes[4])

        #     Hit(gene, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #         profile_coverage, sequence_coverage, begin_match, end_match
        h10 = Hit(core_genes[0], "h10", 10, "replicon_1", 10, 1.0, 10.0, 1.0, 1.0, 10, 20)
        h11 = Hit(core_genes[0], "h11", 10, "replicon_1", 10, 1.0, 11.0, 1.0, 1.0, 10, 20)
        h20 = Hit(core_genes[1], "h20", 10, "replicon_1", 20, 1.0, 20.0, 1.0, 1.0, 10, 20)
        h21 = Hit(core_genes[2], "h21", 10, "replicon_1", 20, 1.0, 21.0, 1.0, 1.0, 10, 20)
        h30 = Hit(core_genes[2], "h30", 10, "replicon_1", 30, 1.0, 30.0, 1.0, 1.0, 10, 20)
        h31 = Hit(core_genes[1], "h31", 10, "replicon_1", 30, 1.0, 31.0, 1.0, 1.0, 10, 20)
        h50 = Hit(core_genes[2], "h50", 10, "replicon_1", 50, 1.0, 50.0, 1.0, 1.0, 10, 20)
        h51 = Hit(core_genes[2], "h51", 10, "replicon_1", 50, 1.0, 51.0, 1.0, 1.0, 10, 20)
        h60 = Hit(core_genes[2], "h60", 10, "replicon_1", 60, 1.0, 60.0, 1.0, 1.0, 10, 20)
        h61 = Hit(core_genes[3], "h61", 10, "replicon_1", 60, 1.0, 61.0, 1.0, 1.0, 10, 20)

        # case replicon is linear, 2 clusters
        hits = [h10, h11, h20, h21, h30, h31, h50, h51, h60, h61]
        random.shuffle(hits)
        clusters = build_clusters(hits, rep_info, model, self.hit_weights)
        self.assertEqual(len(clusters), 2)
        self.assertListEqual(clusters[0].hits, [h11, h21, h31])
        self.assertListEqual(clusters[1].hits, [h51, h61])

        # case replicon is linear with a single hit (not loner) between 2 clusters
        h70 = Hit(core_genes[3], "h70", 10, "replicon_1", 70, 1.0, 80.0, 1.0, 1.0, 10, 20)
        h80 = Hit(core_genes[4], "h80", 10, "replicon_1", 80, 1.0, 80.0, 1.0, 1.0, 10, 20)
        hits = [h10, h11, h20, h21, h50, h51, h70, h80]
        random.shuffle(hits)
        clusters = build_clusters(hits, rep_info, model, self.hit_weights)
        self.assertEqual(len(clusters), 2)
        self.assertListEqual(clusters[0].hits, [h11, h21])
        self.assertListEqual(clusters[1].hits, [h70, h80])

        # replicon is linear, 3 clusters, the last one contains only one hit (loner)
        rep_info = RepliconInfo('linear', 1, 100, [(f"g_{i}", i*10) for i in range(1, 101)])
        h80 = Hit(core_genes[4], "h80", 10, "replicon_1", 80, 1.0, 80.0, 1.0, 1.0, 10, 20)
        hits = [h10, h11, h20, h21, h30, h31, h50, h51, h60, h61, h80]
        random.shuffle(hits)
        clusters = build_clusters(hits, rep_info, model, self.hit_weights)
        self.assertEqual(len(clusters), 3)
        self.assertListEqual(clusters[0].hits, [h11, h21, h31])
        self.assertListEqual(clusters[1].hits, [h51, h61])
        self.assertListEqual(clusters[2].hits, [h80])

        # replicon is circular contains only one cluster
        rep_info = RepliconInfo('circular', 1, 60, [(f"g_{i}", i*10) for i in range(1, 7)])
        hits = [h10, h20, h30]
        clusters = build_clusters(hits, rep_info, model, self.hit_weights)
        self.assertEqual(len(clusters), 1)
        self.assertListEqual(clusters[0].hits, [h10, h20, h30])

        # replicon is circular the last cluster is merge  with the first So we have only one cluster
        rep_info = RepliconInfo('circular', 1, 60, [(f"g_{i}", i*10) for i in range(1, 7)])
        hits = [h10, h11, h20, h21, h30, h31, h50, h51, h60, h61]
        clusters = build_clusters(hits, rep_info, model, self.hit_weights)
        self.assertEqual(len(clusters), 1)
        self.assertListEqual(clusters[0].hits, [h51, h61, h11, h21, h31])

        # replicon is circular the last hit is incorporate to the first cluster
        rep_info = RepliconInfo('circular', 1, 80, [(f"g_{i}", i*10) for i in range(1, 9)])
        h80 = Hit(core_genes[3], "h80", 10, "replicon_1", 80, 1.0, 80.0, 1.0, 1.0, 10, 20)
        hits = [h10, h11, h20, h21, h30, h31, h50, h51, h60, h61, h80]
        random.shuffle(hits)
        clusters = build_clusters(hits, rep_info, model, self.hit_weights)
        self.assertEqual(len(clusters), 2)
        self.assertListEqual(clusters[0].hits, [h80, h11, h21, h31])
        self.assertListEqual(clusters[1].hits, [h51, h61])

        # replicon is circular the last hit is not merged with the first cluster
        rep_info = RepliconInfo('linear', 1, 80, [(f"g_{i}", i*10) for i in range(1, 9)])
        hits = [h10, h11, h20, h21, h30, h31, h50, h51, h60, h61, h80]
        random.shuffle(hits)
        clusters = build_clusters(hits, rep_info, model, self.hit_weights)
        self.assertEqual(len(clusters), 2)
        self.assertEqual(len(clusters), 2)
        self.assertListEqual(clusters[0].hits, [h11, h21, h31])
        self.assertListEqual(clusters[1].hits, [h51, h61])

        # case replicon is linear, 2 clusters, the hits 11,21,31 and 51,61 are contiguous
        h10 = Hit(core_genes[0], "h10", 10, "replicon_1", 10, 1.0, 11.0, 1.0, 1.0, 10, 20)
        h11 = Hit(core_genes[2], "h11", 10, "replicon_1", 11, 1.0, 21.0, 1.0, 1.0, 10, 20)
        h12 = Hit(core_genes[1], "h12", 10, "replicon_1", 12, 1.0, 31.0, 1.0, 1.0, 10, 20)
        h50 = Hit(core_genes[2], "h50", 10, "replicon_1", 50, 1.0, 51.0, 1.0, 1.0, 10, 20)
        h51 = Hit(core_genes[3], "h51", 10, "replicon_1", 51, 1.0, 61.0, 1.0, 1.0, 10, 20)
        hits = [h10, h11, h12, h50, h51]
        random.shuffle(hits)
        clusters = build_clusters(hits, rep_info, model, self.hit_weights)
        self.assertEqual(len(clusters), 2)
        self.assertListEqual(clusters[0].hits, [h10, h11, h12])
        self.assertListEqual(clusters[1].hits, [h50, h51])

        # case replicon is linear
        # one cluster with one hit loner
        h80 = Hit(core_genes[4], "h80", 10, "replicon_1", 80, 1.0, 80.0, 1.0, 1.0, 10, 20)
        hits = [h80]
        random.shuffle(hits)
        clusters = build_clusters(hits, rep_info, model, self.hit_weights)
        self.assertEqual(len(clusters), 1)
        self.assertListEqual(clusters[0].hits, [h80])

        # case replicon is linear, no hits
        clusters = build_clusters([], rep_info, model, self.hit_weights)
        self.assertListEqual(clusters, [])