def create_kmer_hash(f, s, w=1, wheel_path='/mnt/Wheels.txt', block_size=10000, out_path='/mnt/Kmer_Hash.txt', reverse_compliments=True): W = get_wheels(wheel_path, spoke_limit=s, wheel_limit=w) k = len(W[0]['p']) f.seek(0) H = bitarray(2**s) # APPARENTLY BITARRAY IS NOT GUARANTEED TO INITIALIZE EMPTY H.setall(False) last = None while last != f.tell(): last = f.tell() try: A, B = generator_to_bins(read_generator(f, max_reads=block_size, kmer_size=k), W, rc=reverse_compliments) for b in range(len(B)): for a in range(len(A)): H[B[b][a]] = True except: pass print f.tell() fo = open(out_path, 'wb') H.tofile(fo) fo.close() return H
def write_hashed_reads(read_file, out_file, s, w=1, wheel_path='/mnt/Wheels.txt', block_size=10000): W = get_wheels(wheel_path, spoke_limit=s, wheel_limit=w) k = len(W[0]['p']) hash_prefix = 'k, bins: ' read_file.seek(0) last = None while last != read_file.tell(): last = read_file.tell() try: A, B = generator_to_bins( read_generator(read_file, max_reads=block_size, verbose_ids=True, kmer_size=k), W) # WRITING JUST ONE WHEEL HERE, ASSUMING SORTED BY READ B0 = [] last_a = None for a in range(len(A)): if A[a] != last_a: if B0: out_file.write(last_a + hash_prefix + str([k] + B0) + '\n') B0 = [] last_a = A[a] B0.append(B[0][a]) except Exception, err: print Exception, str(err) print read_file.tell()
def hash_test_kmers(W): db = conn['test_genome'] docs = db.kmers.find({},timeout=False) A,B = generator_to_bins(docs,W,return_terminals=True) H = defaultdict() for j in range(len(B)): for i in xrange(len(A)): H[B[j][i]] = True return H
def hash_test_kmers(W): db = conn['test_genome'] docs = db.kmers.find({}, timeout=False) A, B = generator_to_bins(docs, W, return_terminals=True) H = defaultdict() for j in range(len(B)): for i in xrange(len(A)): H[B[j][i]] = True return H
def hash_count_part(args): read_lines,k,W = args H = defaultdict(int) try: A,B = generator_to_bins(reads_from_string(read_lines,kmersize=k),W,rc=True) for b in range(len(B)): for a in range(len(A)): H[B[b][a]] += 1 except Exception,err: print str(err)
def hash_count_part(args): read_lines, k, W = args H = defaultdict(int) try: A, B = generator_to_bins(reads_from_string(read_lines, kmersize=k), W, rc=True) for b in range(len(B)): for a in range(len(A)): H[B[b][a]] += 1 except Exception, err: print str(err)
def create_kmer_hash_counts_fasta(f,s,w=1,wheel_path='/mnt/Wheels.txt',block_size=1,out_path='/mnt/Kmer_Hash_Counts.txt'): W = get_wheels(wheel_path,spoke_limit=s,wheel_limit=w) k = len(W[0]['p']) H = (c_uint8*2**s)() last = None f.seek(0) while last != f.tell(): last = f.tell() try: A,B = generator_to_bins(read_generator(f,max_reads=block_size,kmer_size=k),W) for b in range(len(B)): for a in range(len(A)): H[B[b][a]] = min(255,H[B[b][a]]+1) except Exception,err: print str(err)
def create_kmer_hash_counts_fasta(f, s, w=1, wheel_path='/mnt/Wheels.txt', block_size=1, out_path='/mnt/Kmer_Hash_Counts.txt'): W = get_wheels(wheel_path, spoke_limit=s, wheel_limit=w) k = len(W[0]['p']) H = (c_uint8 * 2**s)() last = None f.seek(0) while last != f.tell(): last = f.tell() try: A, B = generator_to_bins( read_generator(f, max_reads=block_size, kmer_size=k), W) for b in range(len(B)): for a in range(len(A)): H[B[b][a]] = min(255, H[B[b][a]] + 1) except Exception, err: print str(err)
def create_kmer_hash(f,s,w=1,wheel_path='/mnt/Wheels.txt',block_size=10000,out_path='/mnt/Kmer_Hash.txt',reverse_compliments=True): W = get_wheels(wheel_path,spoke_limit=s,wheel_limit=w) k = len(W[0]['p']) f.seek(0) H = bitarray(2**s) # APPARENTLY BITARRAY IS NOT GUARANTEED TO INITIALIZE EMPTY H.setall(False) last = None while last != f.tell(): last = f.tell() try: A,B = generator_to_bins(read_generator(f,max_reads=block_size,kmer_size=k),W,rc=reverse_compliments) for b in range(len(B)): for a in range(len(A)): H[B[b][a]] = True except: pass print f.tell() fo = open(out_path,'wb') H.tofile(fo) fo.close() return H
def write_hashed_reads(read_file,out_file,s,w=1,wheel_path='/mnt/Wheels.txt',block_size=10000): W = get_wheels(wheel_path,spoke_limit=s,wheel_limit=w) k = len(W[0]['p']) hash_prefix = 'k, bins: ' read_file.seek(0) last = None while last != read_file.tell(): last = read_file.tell() try: A,B = generator_to_bins(read_generator(read_file,max_reads=block_size,verbose_ids=True,kmer_size=k),W) # WRITING JUST ONE WHEEL HERE, ASSUMING SORTED BY READ B0 = [] last_a = None for a in range(len(A)): if A[a] != last_a: if B0: out_file.write(last_a+hash_prefix+str([k] + B0)+'\n') B0 = [] last_a = A[a] B0.append(B[0][a]) except Exception, err: print Exception,str(err) print read_file.tell()