fn_totalkmers = '%s_%s_totalkmers.pkl' % (name, str(k)) path_totalkmers = os.path.join(work_dir, fn_totalkmers) if not os.path.exists(work_dir): os.makedirs(work_dir) if os.path.exists(fn_totalkmers): print('%s already exists' % (fn_totalkmers)) sys.exit() print('Calculating kmer totals for %s using model %s.' % (path_reads, path_model)) total_kmers = r2v.calc_total_kmers(path_reads, path_model, k, verbose=True, v=v) print('Dump total kmers to %s' % (path_totalkmers)) six.moves.cPickle.dump(total_kmers, open(path_totalkmers, 'wb'), protocol=4) r2v.embed_reads(path_reads, path_totalkmers, path_model, work_dir, normread=nr, k=k, a=a, verbose=True, v=v)
name = 'ag' a = 1e-05 path_sample = argv[1] path_model = argv[2] fn_model_base = path_model.split('/')[-1] k = int(fn_model_base.split('_')[1]) fn_model_base = '_'.join(fn_model_base.split('_')[1:-1]) fn_out = '%s_%s_total_kmers_split.pkl' % (name, fn_model_base) dir_totalkmers = '/mnt/HA/groups/rosenGrp/embed_samples/data/%s/total_kmers' % ( name) path_totalkmers = os.path.join(dir_totalkmers, fn_out) path_out = '/mnt/HA/groups/rosenGrp/embed_samples/data/%s/embeddings_split' % ( name) path_out = os.path.join(path_out, fn_model_base) if not os.path.exists(path_out): os.makedirs(path_out) r2v.embed_reads(path_sample, path_totalkmers, path_model, path_out, k=k, a=a, delim=' ', verbose=True, v=1000)
if os.path.exists(fn_totalkmers): print('%s already exists' % (fn_totalkmers)) sys.exit() print('Calculating kmer totals for samples in %s using model %s.' % (samp_dir, path_model)) total_kmers = r2v.calc_total_kmers_split(samp_dir, path_model, k, verbose=True, v=v) print('Dump total kmers to %s' % (path_totalkmers)) six.moves.cPickle.dump(total_kmers, open(path_totalkmers, 'wb'), protocol=4) for samp in glob(samp_dir + '/*'): print('Embedding sample %s.' % (samp)) r2v.embed_reads(samp, path_totalkmers, path_model, work_dir, k=k, a=a, svm=False, normread=False, to_sample=True, delim=' ', verbose=True, v=1000)