Esempio n. 1
0
def sketch_scalar_product(M, c):
	result_sketch = CMSketch(M.m, M.d)
	for i in range(M.d):
		for j in range(M.m):
			new_val = c*M.val_at(i, j)
			result_sketch.update(i, j, new_val)
	return result_sketch
Esempio n. 2
0
 def _make_sketch(kmer_counts_dict: defaultdict) -> CountMinSketch:
     # Read the dictionary into a compressed data structure to allow deleting kmer_counts_dict
     NUM_ROWS = 8
     kmer_counts = CountMinSketch(NUM_ROWS)
     for kmer, count in kmer_counts_dict.items():
         kmer_counts.update(kmer, count)
     return kmer_counts
Esempio n. 3
0
def sketch_sum(M1, M2):
	if M1.m != M2.m:
		print "Sketches don't align on hashtable length.\n"
		return
	elif M1.d != M2.d:
		print "Sketches don't align on # of hashtables.\n"
		return
	else:
		result_sketch = CMSketch(M2.m, M2.d)
		for i in range(M2.d):
			for j in range(M2.m):
				new_val = M1.val_at(i, j) + M2.val_at(i, j)
				result_sketch.update(i, j, new_val)
		return result_sketch
Esempio n. 4
0
    def _make_sketch(self, kmer_counts_dict: defaultdict) -> CountMinSketch:
        if self.print_runtime:
            print("\n>--- STARTING TO MAKE COUNTMIN SKETCH AT T = {:.2f} ---".
                  format(time.time() - self.start_time))

        # Read the dictionary into a compressed data structure
        NUM_ROWS = 10
        kmer_counts = CountMinSketch(NUM_ROWS)
        for i, (kmer, count) in enumerate(kmer_counts_dict.items()):
            if self.print_runtime and i % 50000 == 0:
                print(">Processed {0} kmers by time T={1:.2f}".format(
                    i,
                    time.time() - self.start_time))
            kmer_counts.update(kmer, count)

        if self.print_runtime:
            print(">FINISHED MAKING COUNTMIN SKETCH AT T = {:.2f}".format(
                time.time() - self.start_time))
        if self.print_syssizeof:
            print(">SIZE OF COUNTMIN SKETCH: {:,}".format(
                sys.getsizeof(kmer_counts)))
        return kmer_counts