Beispiel #1
0
def merge_grid_cover(final_db, other_dbs):
    if not isfile(final_db):
        copyfile(other_dbs[0], final_db)
        other_dbs = other_dbs[1:]
    final_db = GridCoverDB.load_from_filepath(final_db)
    for other_db_filename in other_dbs:
        other_db = GridCoverDB.load_from_filepath(other_db_filename)
        final_db.load_other(other_db)
Beispiel #2
0
 def test_get_centroids(self):
     ramifier = RotatingRamifier.from_file(4, KMER_ROTATION)
     db = GridCoverDB(sqlite3.connect(':memory:'),
                      ramifier=ramifier,
                      box_side_len=0.5)
     db.py_add_point_to_cluster(np.array([0., 0., 0., 0.]), KMER_30 + 'A')
     db.py_add_point_to_cluster(np.array([0., 0., 0., 0.]), KMER_30 + 'T')
     db.py_add_point_to_cluster(np.array([1., 0., 0., 0.]), KMER_30 + 'C')
     db.commit()
     centroids = db.centroids()
     self.assertEqual(centroids.shape, (2, 4))
Beispiel #3
0
def cli_dump_kmers(outfile, cluster_ids, grid_cover):
    grid = GridCoverDB.load_from_filepath(grid_cover)
    for centroid_index, kmer in grid.get_kmers():
        if cluster_ids:
            print(f'{centroid_index},{kmer}', file=outfile)
        else:
            print(kmer, file=outfile)
Beispiel #4
0
def cli_dump_kmers(outfile, grid_cover):
    grid = GridCoverDB.load_from_filepath(grid_cover)
    counts = {}
    for centroid_index, _ in grid.get_kmers():
        counts[centroid_index] = 1 + counts.get(centroid_index, 0)
    for centroid_index, count in counts.items():
        print(f'{centroid_index},{count}', file=outfile)
Beispiel #5
0
def build_grid_cover(grid_db):
    db = GridCoverDB.load_from_filepath(grid_db)
    start = time()
    n_centers = db.centroids().shape[0]
    with click.progressbar(list(range(n_centers))) as centroid_ids:
        for centroid_id in centroid_ids:
            db.build_and_store_bloom_grid(centroid_id)
    db.close()
    add_time = time() - start
    click.echo(f'Built {n_centers} bloom filters in {add_time:.5}s.', err=True)
Beispiel #6
0
def cli_dump_kmers(grid_cover):
    click.echo(grid_cover)
    grid = GridCoverDB.load_from_filepath(grid_cover)
    n_centers = grid.centroids().shape[0]
    click.echo(f'centers\t{n_centers}')
    n_kmers = len(grid.get_kmers())
    click.echo(f'kmers\t{n_kmers}')
    box_side = grid.box_side_len
    click.echo(f'box_side\t{box_side}')
    dims = grid.ramifier.d
    click.echo(f'dims\t{dims}')
Beispiel #7
0
 def test_build_grid_cover_from_fasta(self):
     ramifier = RotatingRamifier.from_file(4, KMER_ROTATION)
     db = GridCoverDB(sqlite3.connect(':memory:'),
                      ramifier=ramifier,
                      box_side_len=0.5)
     grid = GridCoverBuilder(db)
     grid.fast_add_kmers_from_fasta(KMER_FASTA)
     grid.commit()
     n_centers = grid.db.centroids().shape[0]
     n_points = len(grid.db.get_kmers())
     self.assertGreater(n_centers, 0)
     self.assertLess(n_centers, 98)
     self.assertEqual(n_points, 98)
Beispiel #8
0
 def test_add_kmer(self):
     ramifier = RotatingRamifier.from_file(4, KMER_ROTATION)
     db = GridCoverDB(sqlite3.connect(':memory:'),
                      ramifier=ramifier,
                      box_side_len=0.5)
     db.py_add_point_to_cluster(np.array([0., 0., 0., 0.]), KMER_31)
     db.commit()
     members = db.py_get_cluster_members(0)
     self.assertEqual(len(members), 1)
     self.assertIn(KMER_31,
                   [reverse_convert_kmer(member) for member in members])
Beispiel #9
0
 def test_save_and_reload(self):
     DB_SAVE_TEMP_FILE = join(dirname(__file__), 'temp.db_save_temp.sqlite')
     ramifier = RotatingRamifier.from_file(4, KMER_ROTATION)
     db = GridCoverDB(sqlite3.connect(DB_SAVE_TEMP_FILE),
                      ramifier=ramifier,
                      box_side_len=0.5)
     db.py_add_point_to_cluster(np.array([0., 0., 0., 0.]), KMER_31)
     db.close()
     del db
     db = GridCoverDB.load_from_filepath(DB_SAVE_TEMP_FILE)
     members = db.py_get_cluster_members(0)
     self.assertEqual(len(members), 1)
     self.assertIn(KMER_31,
                   [reverse_convert_kmer(member) for member in members])
     remove(DB_SAVE_TEMP_FILE)
Beispiel #10
0
 def test_save(self):
     DB_SAVE_TEMP_FILE = join(dirname(__file__), 'temp.db_save_temp.sqlite')
     ramifier = RotatingRamifier.from_file(4, KMER_ROTATION)
     db = GridCoverDB(sqlite3.connect(DB_SAVE_TEMP_FILE),
                      ramifier=ramifier,
                      box_side_len=0.5)
     db.py_add_point_to_cluster(np.array([0., 0., 0., 0.]), KMER_31)
     db.close()
     remove(DB_SAVE_TEMP_FILE)
Beispiel #11
0
def cli_dump_kmers(outfile, grid_cover):
    grid = GridCoverDB.load_from_filepath(grid_cover)
    pd.DataFrame(grid.centroids()).to_csv(outfile, header=None, index=None)
Beispiel #12
0
 def test_pre_build_blooms(self):
     ramifier = RotatingRamifier.from_file(4, KMER_ROTATION)
     db = GridCoverDB(sqlite3.connect(':memory:'),
                      ramifier=ramifier,
                      box_side_len=0.5)
     db.py_add_point_to_cluster(np.array([0., 0., 0., 0.]), KMER_30 + 'A')
     db.py_add_point_to_cluster(np.array([0., 0., 0., 0.]), KMER_30 + 'T')
     db.py_add_point_to_cluster(np.array([1., 0., 0., 0.]), KMER_30 + 'C')
     db.commit()
     for centroid_id in [0, 1]:
         db.build_and_store_bloom_grid(centroid_id)
     bg_0 = db.retrieve_bloom_grid(0)
     bg_1 = db.retrieve_bloom_grid(1)
     self.assertEqual(max(bg_0.py_count_grid_contains(KMER_30 + 'A')),
                      32 - bg_0.col_k)
     self.assertEqual(max(bg_1.py_count_grid_contains(KMER_30 + 'C')),
                      32 - bg_1.col_k)
     self.assertRaises(IndexError, lambda: db.retrieve_bloom_grid(2))
Beispiel #13
0
 def test_merge_dbs(self):
     ramifier = RotatingRamifier.from_file(4, KMER_ROTATION)
     db1 = GridCoverDB(sqlite3.connect(':memory:'),
                       ramifier=ramifier,
                       box_side_len=0.5)
     db1.py_add_point_to_cluster(np.array([0., 0., 0., 0.]), KMER_30 + 'A')
     db1.py_add_point_to_cluster(np.array([1., 0., 0., 0.]), KMER_30 + 'T')
     db1.commit()
     db2 = GridCoverDB(sqlite3.connect(':memory:'),
                       ramifier=ramifier,
                       box_side_len=0.5)
     db2.py_add_point_to_cluster(np.array([0., 0., 0., 0.]), KMER_30 + 'C')
     db2.py_add_point_to_cluster(np.array([1., 1., 0., 0.]), KMER_30 + 'G')
     db2.commit()
     db1.load_other(db2)
     centroids = db1.centroids()
     self.assertEqual(centroids.shape, (3, 4))
     kmers = [el[1] for el in db1.get_kmers()]
     self.assertEqual(len(kmers), 4)
     for char in 'ATCG':
         self.assertIn(KMER_30 + char, kmers)