예제 #1
0
def create_kmer_hash(f,
                     s,
                     w=1,
                     wheel_path='/mnt/Wheels.txt',
                     block_size=10000,
                     out_path='/mnt/Kmer_Hash.txt',
                     reverse_compliments=True):
    W = get_wheels(wheel_path, spoke_limit=s, wheel_limit=w)
    k = len(W[0]['p'])
    f.seek(0)
    H = bitarray(2**s)
    # APPARENTLY BITARRAY IS NOT GUARANTEED TO INITIALIZE EMPTY
    H.setall(False)
    last = None
    while last != f.tell():
        last = f.tell()
        try:
            A, B = generator_to_bins(read_generator(f,
                                                    max_reads=block_size,
                                                    kmer_size=k),
                                     W,
                                     rc=reverse_compliments)
            for b in range(len(B)):
                for a in range(len(A)):
                    H[B[b][a]] = True
        except:
            pass
        print f.tell()
    fo = open(out_path, 'wb')
    H.tofile(fo)
    fo.close()
    return H
예제 #2
0
def write_hashed_reads(read_file,
                       out_file,
                       s,
                       w=1,
                       wheel_path='/mnt/Wheels.txt',
                       block_size=10000):
    W = get_wheels(wheel_path, spoke_limit=s, wheel_limit=w)
    k = len(W[0]['p'])
    hash_prefix = 'k, bins: '
    read_file.seek(0)
    last = None
    while last != read_file.tell():
        last = read_file.tell()
        try:
            A, B = generator_to_bins(
                read_generator(read_file,
                               max_reads=block_size,
                               verbose_ids=True,
                               kmer_size=k), W)
            # WRITING JUST ONE WHEEL HERE, ASSUMING SORTED BY READ
            B0 = []
            last_a = None
            for a in range(len(A)):
                if A[a] != last_a:
                    if B0:
                        out_file.write(last_a + hash_prefix + str([k] + B0) +
                                       '\n')
                        B0 = []
                    last_a = A[a]
                B0.append(B[0][a])
        except Exception, err:
            print Exception, str(err)
        print read_file.tell()
예제 #3
0
def hash_test_kmers(W):
	db = conn['test_genome']
	docs = db.kmers.find({},timeout=False)
	A,B = generator_to_bins(docs,W,return_terminals=True)
	H = defaultdict()
	for j in range(len(B)):
		for i in xrange(len(A)):
			H[B[j][i]] = True
	return H
예제 #4
0
def hash_test_kmers(W):
    db = conn['test_genome']
    docs = db.kmers.find({}, timeout=False)
    A, B = generator_to_bins(docs, W, return_terminals=True)
    H = defaultdict()
    for j in range(len(B)):
        for i in xrange(len(A)):
            H[B[j][i]] = True
    return H
예제 #5
0
def hash_count_part(args):
	read_lines,k,W = args
	H = defaultdict(int)
	try:
		A,B = generator_to_bins(reads_from_string(read_lines,kmersize=k),W,rc=True)
		for b in range(len(B)):
			for a in range(len(A)):
				H[B[b][a]] += 1
	except Exception,err:
		print str(err)
예제 #6
0
def hash_count_part(args):
    read_lines, k, W = args
    H = defaultdict(int)
    try:
        A, B = generator_to_bins(reads_from_string(read_lines, kmersize=k),
                                 W,
                                 rc=True)
        for b in range(len(B)):
            for a in range(len(A)):
                H[B[b][a]] += 1
    except Exception, err:
        print str(err)
예제 #7
0
def create_kmer_hash_counts_fasta(f,s,w=1,wheel_path='/mnt/Wheels.txt',block_size=1,out_path='/mnt/Kmer_Hash_Counts.txt'):
	W = get_wheels(wheel_path,spoke_limit=s,wheel_limit=w)
	k = len(W[0]['p'])
	H = (c_uint8*2**s)()
	last = None
	f.seek(0)
	while last != f.tell():
		last = f.tell()
		try:
			A,B = generator_to_bins(read_generator(f,max_reads=block_size,kmer_size=k),W)
			for b in range(len(B)):
				for a in range(len(A)):
					H[B[b][a]] = min(255,H[B[b][a]]+1)
		except Exception,err:
			print str(err)
예제 #8
0
def create_kmer_hash_counts_fasta(f,
                                  s,
                                  w=1,
                                  wheel_path='/mnt/Wheels.txt',
                                  block_size=1,
                                  out_path='/mnt/Kmer_Hash_Counts.txt'):
    W = get_wheels(wheel_path, spoke_limit=s, wheel_limit=w)
    k = len(W[0]['p'])
    H = (c_uint8 * 2**s)()
    last = None
    f.seek(0)
    while last != f.tell():
        last = f.tell()
        try:
            A, B = generator_to_bins(
                read_generator(f, max_reads=block_size, kmer_size=k), W)
            for b in range(len(B)):
                for a in range(len(A)):
                    H[B[b][a]] = min(255, H[B[b][a]] + 1)
        except Exception, err:
            print str(err)
예제 #9
0
def create_kmer_hash(f,s,w=1,wheel_path='/mnt/Wheels.txt',block_size=10000,out_path='/mnt/Kmer_Hash.txt',reverse_compliments=True):
	W = get_wheels(wheel_path,spoke_limit=s,wheel_limit=w)
	k = len(W[0]['p'])
	f.seek(0)
	H = bitarray(2**s)
	# APPARENTLY BITARRAY IS NOT GUARANTEED TO INITIALIZE EMPTY
	H.setall(False)
	last = None
	while last != f.tell():
		last = f.tell()
		try:
			A,B = generator_to_bins(read_generator(f,max_reads=block_size,kmer_size=k),W,rc=reverse_compliments)
			for b in range(len(B)):
				for a in range(len(A)):
					H[B[b][a]] = True
		except:
			pass
		print f.tell()
	fo = open(out_path,'wb')
	H.tofile(fo)
	fo.close()
	return H
예제 #10
0
def write_hashed_reads(read_file,out_file,s,w=1,wheel_path='/mnt/Wheels.txt',block_size=10000):
	W = get_wheels(wheel_path,spoke_limit=s,wheel_limit=w)
	k = len(W[0]['p'])
	hash_prefix = 'k, bins: '
	read_file.seek(0)
	last = None
	while last != read_file.tell():
		last = read_file.tell()
		try:
			A,B = generator_to_bins(read_generator(read_file,max_reads=block_size,verbose_ids=True,kmer_size=k),W)
			# WRITING JUST ONE WHEEL HERE, ASSUMING SORTED BY READ
			B0 = []
			last_a = None
			for a in range(len(A)):
				if A[a] != last_a:
					if B0:
						out_file.write(last_a+hash_prefix+str([k] + B0)+'\n')
						B0 = []
					last_a = A[a]
				B0.append(B[0][a])
		except Exception, err:
			print Exception,str(err)
		print read_file.tell()