def sketch_scalar_product(M, c): result_sketch = CMSketch(M.m, M.d) for i in range(M.d): for j in range(M.m): new_val = c*M.val_at(i, j) result_sketch.update(i, j, new_val) return result_sketch
def _make_sketch(kmer_counts_dict: defaultdict) -> CountMinSketch: # Read the dictionary into a compressed data structure to allow deleting kmer_counts_dict NUM_ROWS = 8 kmer_counts = CountMinSketch(NUM_ROWS) for kmer, count in kmer_counts_dict.items(): kmer_counts.update(kmer, count) return kmer_counts
def sketch_sum(M1, M2): if M1.m != M2.m: print "Sketches don't align on hashtable length.\n" return elif M1.d != M2.d: print "Sketches don't align on # of hashtables.\n" return else: result_sketch = CMSketch(M2.m, M2.d) for i in range(M2.d): for j in range(M2.m): new_val = M1.val_at(i, j) + M2.val_at(i, j) result_sketch.update(i, j, new_val) return result_sketch
def _make_sketch(self, kmer_counts_dict: defaultdict) -> CountMinSketch: if self.print_runtime: print("\n>--- STARTING TO MAKE COUNTMIN SKETCH AT T = {:.2f} ---". format(time.time() - self.start_time)) # Read the dictionary into a compressed data structure NUM_ROWS = 10 kmer_counts = CountMinSketch(NUM_ROWS) for i, (kmer, count) in enumerate(kmer_counts_dict.items()): if self.print_runtime and i % 50000 == 0: print(">Processed {0} kmers by time T={1:.2f}".format( i, time.time() - self.start_time)) kmer_counts.update(kmer, count) if self.print_runtime: print(">FINISHED MAKING COUNTMIN SKETCH AT T = {:.2f}".format( time.time() - self.start_time)) if self.print_syssizeof: print(">SIZE OF COUNTMIN SKETCH: {:,}".format( sys.getsizeof(kmer_counts))) return kmer_counts