Esempio n. 1
0
#counts = {}

for read in screed.open(sample_filename):
	misses = 0
	miss_str = ''
	for i in range(len(read.sequence) - k + 1):
		kmer = read.sequence[i:i+k]
		#was_in = []
		for i in range(0,len(bfs)):
			bf = bfs[i]
			if kmer in bf:
				misses += 0
				miss_str += '1'
				#was_in.append(i)
			elif dna.reverse_complement(kmer) in bf:
				misses += 0
				miss_str += '1'
				#was_in.append(i)
			else:
				misses += 1
				miss_str += '0'
	if misses > 0 :
		print misses
		print miss_str
		
#		if str(was_in) in counts:
#			counts[str(was_in)] += 1
#		else:
#			counts[str(was_in)] = 1
Esempio n. 2
0
	# error_state tracks the state of the read
	# 0 : no error yet
	# 1 : in the first error
	# 2 : past the first error
	# 3 : second error found
	error_state = [0] * len(bfs)
	# error_len tracks the length of the first error
	error_len = [0] * len(bfs)

	for i in range(len(read.sequence) - k + 1):
		kmer = read.sequence[i:i+k]
		for i in range(0,len(bfs)):
			# if the state is 3, then no need to check
			if error_state[i] != 3:
				bf = bfs[i]
				if (kmer in bf) or (dna.reverse_complement(kmer) in bf):
					# move out of the first error
					if error_state[i] == 1:
						error_state[i] = 2
				else:
					# first error, switch states and up the length
					if error_state[i] == 0:
						error_state[i] = 1
						error_len[i] += 1
					# continuation of the first error, up the length
					elif error_state[i] == 1:
						error_len[i] += 1
					#second error found
					elif error_state[i] == 2:
						error_state[i] = 3
Esempio n. 3
0
	with open(filename, 'rb') as fp:
		data = zlib.decompress(fp.read())
		bf.tables = cPickle.loads(data)
	bfs.append(bf)

buckets = [[0]*options.num_buckets for x in range(0,len(bfs))]
step = float(1)/float(options.num_buckets);

for read in screed.open(options.sample_filename):
	kmers_found = [0] * len(bfs)
	num_kmers = len(read.sequence) - k + 1
	for i in range(num_kmers):
		kmer = read.sequence[i:i+k]
		for i in range(0,len(bfs)):
			bf = bfs[i]
			if kmer in bf or dna.reverse_complement(kmer) in bf:
				kmers_found[i] += 1
	print kmers_found,
	for i in range(len(bfs)):
		bucket = \
			min(9,int((float(kmers_found[i])/float(num_kmers))/float(step)))
		print bucket,
		buckets[i][bucket] += 1	
	print

for bucket in buckets:
	print "\t".join(map(str, bucket))

		#if str(was_in) in counts:
			#counts[str(was_in)] += 1
		#else: