def query_by_sqlite(self, queries, db): max_set_size = 999 # Cannot query sqlite with > 999 '?' entries, so # query in batches. sequence_to_query_id = {} queries_list = list(queries) seqs = set() for i, query in enumerate(queries_list): seqs.add(query.sequence) try: sequence_to_query_id[query.sequence].append(i) except KeyError: sequence_to_query_id[query.sequence] = [i] results = [] for chunk in SequenceDatabase.grouper(seqs, max_set_size): for entry in db.table('otus').where_in( 'sequence', [seq for seq in chunk if seq is not None]).get(): for qid in sequence_to_query_id[entry.sequence]: otu = OtuTableEntry() otu.marker = entry.marker otu.sample_name = entry.sample_name otu.sequence = entry.sequence otu.count = entry.num_hits otu.coverage = entry.coverage otu.taxonomy = entry.taxonomy results.append(QueryResult(queries_list[qid], otu, 0)) return results
def print_samples(self, **kwargs): db = SequenceDatabase.acquire(kwargs.pop('db')) sample_names = kwargs.pop('sample_names') taxonomy = kwargs.pop('taxonomy') output_io = kwargs.pop('output_io') if len(kwargs) > 0: raise Exception("Unexpected arguments detected: %s" % kwargs) dbm = self._connect_to_sqlite(db) max_set_size = 999 # Cannot query sqlite with > 999 '?' entries, so # query in batches. if sample_names: query_chunks = set(sample_names) else: query_chunks = [taxonomy] otus = OtuTable() total_printed = 0 for chunk in SequenceDatabase.grouper(query_chunks, max_set_size): if sample_names: it = dbm.table('otus').where_in( 'sample_name', [sample for sample in chunk if sample is not None]).get() elif taxonomy: it = dbm.table('otus').where('taxonomy', 'like', "%%%s%%" % taxonomy).get() else: raise Exception("Programming error") for entry in it: otu = OtuTableEntry() otu.marker = entry.marker otu.sample_name = entry.sample_name otu.sequence = entry.sequence otu.count = entry.num_hits otu.coverage = entry.coverage otu.taxonomy = entry.taxonomy otus.add([otu]) total_printed += 1 otus.write_to(output_io) logging.info("Printed %i OTU table entries" % total_printed)
def print_samples(self, **kwargs): db = SequenceDatabase.acquire(kwargs.pop('db')) sample_names = kwargs.pop('sample_names') taxonomy = kwargs.pop('taxonomy') output_io = kwargs.pop('output_io') if len(kwargs) > 0: raise Exception("Unexpected arguments detected: %s" % kwargs) dbm = self._connect_to_sqlite(db) max_set_size = 999 # Cannot query sqlite with > 999 '?' entries, so # query in batches. if sample_names: query_chunks = set(sample_names) else: query_chunks = [taxonomy] otus = OtuTable() total_printed = 0 for chunk in SequenceDatabase.grouper(query_chunks, max_set_size): if sample_names: it = dbm.table('otus').where_in( 'sample_name', [sample for sample in chunk if sample is not None]).get() elif taxonomy: it = dbm.table('otus').where( 'taxonomy', 'like', "%%%s%%" % taxonomy).get() else: raise Exception("Programming error") for entry in it: otu = OtuTableEntry() otu.marker = entry.marker otu.sample_name = entry.sample_name otu.sequence = entry.sequence otu.count = entry.num_hits otu.coverage = entry.coverage otu.taxonomy = entry.taxonomy otus.add([otu]) total_printed += 1 otus.write_to(output_io) logging.info("Printed %i OTU table entries" % total_printed)