Exemple #1
0
def matrix_from_file_paths(path_list,s):
	M = []
	for p in path_list:
		H = array(open_count_hash(p,s),dtype=float32)
		if len(M):
			M = concatenate((M,[H]))
		else:
			M = [H]
	return M
def condition_and_map_clusters(eigenvectors,nonzeros,global_weights,file_paths,completed_count_files,s,out_prefix='/mnt/'):
	M = []
	for fp in file_paths:
		cluster_id = fp[fp[:fp.index('_velvet')].rfind('/')+1:fp.index('_velvet')]
		outfile = out_prefix+cluster_id+'.txt'
		if outfile in completed_count_files:
			H = open_count_hash(outfile,s)
		else:
			f = open(fp,'r')
			#H = create_kmer_hash_counts(f,s,block_size=15000,out_path=outfile,temp_file_size=5*10**5)
			H = create_kmer_hash_counts_fasta(f,s,block_size=1,out_path=outfile)
		H = array(H,dtype=float32)[nonzeros]
		# THIS IS ASKING FOR BAD MAMAJU - NOT USING THE ORIGINAL FUNCTION TO CONDITION...
		H = log(H + 1)*global_weights
		H = dot(H,eigenvectors)
		if len(M):
			M = concatenate((M,[H]))
		else:
			M = [H]
		print cluster_id
	return M