Ejemplo n.º 1
0
def build_grid_cover_fasta(radius, dimension, threads, outfile, predb_list):
    environ[
        'OPENBLAS_NUM_THREADS'] = f'{threads}'  # numpy uses one of these two libraries
    environ['MKL_NUM_THREADS'] = f'{threads}'
    predb_list = [line.strip() for line in predb_list]
    logger = lambda n: stderr.write(f'\rAdded {n:,} k-mers to db')
    start = time()
    predb = PreDB.load_from_filepath(predb_list[0])
    grid = GridCoverBuilder.build_from_predb(outfile,
                                             predb,
                                             radius,
                                             logger=logger)
    with click.progressbar(predb_list) as predbs:
        for i, predb_filename in enumerate(predbs):
            if i == 0:
                continue
            predb = PreDB.load_from_filepath(predb_filename)
            n_added = grid.add_kmers_from_predb(predb, logger=logger)
    grid.db._build_indices()  # indices are disabled by `GCB.build_from_predb`
    n_centers = grid.db.centroids().shape[0]
    grid.close()
    add_time = time() - start
    click.echo((f'Added {n_added:,} kmers to {outfile} in {add_time:.5}s. '
                f'{n_centers:,} clusters.'),
               err=True)
Ejemplo n.º 2
0
 def test_build_grid_cover_from_pre(self):
     predb = PreDB.load_from_filepath(PRE_DB)
     grid = GridCoverBuilder.build_from_predb(':memory:', predb, 0.5)
     grid.db._build_indices()
     grid.commit()
     n_centers = grid.db.centroids().shape[0]
     n_points = len(grid.db.get_kmers())
     self.assertGreater(n_centers, 0)
     self.assertLess(n_centers, 98)
     self.assertEqual(n_points, 98)
Ejemplo n.º 3
0
def build_grid_cover(radius, dimension, threads, num_kmers, start_offset,
                     outfile, preload, rotation, kmer_table):
    environ[
        'OPENBLAS_NUM_THREADS'] = f'{threads}'  # numpy uses one of these two libraries
    environ['MKL_NUM_THREADS'] = f'{threads}'
    ramifier = RotatingRamifier.from_file(dimension, rotation)
    grid = GridCoverBuilder.from_filepath(outfile, ramifier, radius)
    start = time()
    n_added = grid.fast_add_kmers_from_file(kmer_table, num_to_add=num_kmers)
    grid.commit()
    n_centers = grid.db.centroids().shape[0]
    grid.close()
    add_time = time() - start
    click.echo(
        f'Added {n_added:,} kmers to {outfile} in {add_time:.5}s. {n_centers:,} clusters.',
        err=True)
Ejemplo n.º 4
0
def build_grid_cover_fasta(radius, dimension, threads, outfile, rotation,
                           fasta_list):
    environ[
        'OPENBLAS_NUM_THREADS'] = f'{threads}'  # numpy uses one of these two libraries
    environ['MKL_NUM_THREADS'] = f'{threads}'
    fasta_list = [line.strip() for line in fasta_list]
    ramifier = RotatingRamifier.from_file(dimension, rotation)
    grid = GridCoverBuilder.from_filepath(outfile, ramifier, radius)
    start = time()
    with click.progressbar(fasta_list) as fastas:
        for fasta_filename in fastas:
            n_added = grid.fast_add_kmers_from_fasta(fasta_filename)
    n_centers = grid.db.centroids().shape[0]
    grid.close()
    add_time = time() - start
    click.echo((f'Added {n_added:,} kmers to {outfile} in {add_time:.5}s. '
                f'{n_centers:,} clusters.'),
               err=True)
Ejemplo n.º 5
0
 def test_build_grid_cover_from_fasta(self):
     ramifier = RotatingRamifier.from_file(4, KMER_ROTATION)
     db = GridCoverDB(sqlite3.connect(':memory:'),
                      ramifier=ramifier,
                      box_side_len=0.5)
     grid = GridCoverBuilder(db)
     grid.fast_add_kmers_from_fasta(KMER_FASTA)
     grid.commit()
     n_centers = grid.db.centroids().shape[0]
     n_points = len(grid.db.get_kmers())
     self.assertGreater(n_centers, 0)
     self.assertLess(n_centers, 98)
     self.assertEqual(n_points, 98)