コード例 #1
0
ファイル: assembler.py プロジェクト: shulp2211/hyperseq
def do_wheels(s, w=1):
    db = conn['test_genome']
    d = len(db.kmers.find_one()['s'])
    set_wheels(d,
               realm='test_genome',
               spokes=s,
               wheels=w,
               out_path='/mnt/test_')
    W = get_wheels('/mnt/test_Wheels.txt')
    return W
コード例 #2
0
ファイル: assembler.py プロジェクト: brian-cleary/hyperseq
def do_wheels(s,w=1):
	db = conn['test_genome']
	d = len(db.kmers.find_one()['s'])
	set_wheels(d,realm='test_genome',spokes=s,wheels=w,out_path='/mnt/test_')
	W = get_wheels('/mnt/test_Wheels.txt')
	return W
コード例 #3
0
# Latent Strain Analysis - Full Procedure

# CREATING THE HASH
	from fastq_reader import rand_kmers_for_wheel
	rand_kmers_for_wheel('/mnt/grinder_output',35,2000)
	from hyper_sequences import set_wheels
	set_wheels(35,random_kmer_path='/mnt/grinder_output/random_kmers.fastq',wheels=1,out_path='/mnt/grinder_output/')

# HASHING THE RAW READS
	# via MapReduce (large data sets)
		# test like: $ head -n100 somefile.fastq | python fastq_read_mapper.py | sort -k1 | python read_reducer.py
		# upload all fastq files to S3 bucket data directory (eg s3://cleary-metagenomics/data)
		# create an elastic map reduce job:
			# input dir: s3n://cleary-metagenomics/data
			# output dir: s3n://cleary-metagenomics/hashed_reads
			# mapper: s3n://cleary-metagenomics/job/fastq_read_mapper.py
			# reducer: s3n://cleary-metagenomics/job/read_reducer.py
			# extra args: -cacheFile s3n://cleary-metagenomics/cache_files/Wheels.txt#Wheels.txt
		# with 20m reads this took 2h18m running with 19 m1.large
		# download results: s3cmd get s3://cleary-metagenomics/hashed_reads/part*
		from merge_read_bins import merge_files
		merge_files('/mnt2/mr_out/','/mnt/grinder_output/',out_prefix='/mnt/grinder_output/hashed_reads/')
		# merging 34 parts of ~700Mb each took couple hours
		from read_partitioning import hash_counts_from_hashq
		for hf in hashed_read_files:
			hash_counts_from_hashq(28,hf,out_path='/mnt/grinder_output/'+hf[hf.rfind('/')+1:hf.index('.')]+'Kmer_Hash_Counts.txt')
	# on a single node (small data sets)
		# read_partitioning: create_kmer_hash_counts (should write hashq!)

# CONDITION, SVD, AND CLUSTER KMER ABUNDANCE MATRIX
	from eigenhashes import *