Exemple #1
0
fn_totalkmers = '%s_%s_totalkmers.pkl' % (name, str(k))
path_totalkmers = os.path.join(work_dir, fn_totalkmers)

if not os.path.exists(work_dir):
    os.makedirs(work_dir)

if os.path.exists(fn_totalkmers):
    print('%s already exists' % (fn_totalkmers))
    sys.exit()

print('Calculating kmer totals for %s using model %s.' %
      (path_reads, path_model))
total_kmers = r2v.calc_total_kmers(path_reads,
                                   path_model,
                                   k,
                                   verbose=True,
                                   v=v)

print('Dump total kmers to %s' % (path_totalkmers))
six.moves.cPickle.dump(total_kmers, open(path_totalkmers, 'wb'), protocol=4)

r2v.embed_reads(path_reads,
                path_totalkmers,
                path_model,
                work_dir,
                normread=nr,
                k=k,
                a=a,
                verbose=True,
                v=v)
Exemple #2
0
name = 'ag'
a = 1e-05

path_sample = argv[1]
path_model = argv[2]

fn_model_base = path_model.split('/')[-1]
k = int(fn_model_base.split('_')[1])
fn_model_base = '_'.join(fn_model_base.split('_')[1:-1])
fn_out = '%s_%s_total_kmers_split.pkl' % (name, fn_model_base)

dir_totalkmers = '/mnt/HA/groups/rosenGrp/embed_samples/data/%s/total_kmers' % (
    name)
path_totalkmers = os.path.join(dir_totalkmers, fn_out)

path_out = '/mnt/HA/groups/rosenGrp/embed_samples/data/%s/embeddings_split' % (
    name)
path_out = os.path.join(path_out, fn_model_base)
if not os.path.exists(path_out):
    os.makedirs(path_out)

r2v.embed_reads(path_sample,
                path_totalkmers,
                path_model,
                path_out,
                k=k,
                a=a,
                delim=' ',
                verbose=True,
                v=1000)
if os.path.exists(fn_totalkmers):
    print('%s already exists' % (fn_totalkmers))
    sys.exit()

print('Calculating kmer totals for samples in %s using model %s.' %
      (samp_dir, path_model))
total_kmers = r2v.calc_total_kmers_split(samp_dir,
                                         path_model,
                                         k,
                                         verbose=True,
                                         v=v)

print('Dump total kmers to %s' % (path_totalkmers))
six.moves.cPickle.dump(total_kmers, open(path_totalkmers, 'wb'), protocol=4)

for samp in glob(samp_dir + '/*'):
    print('Embedding sample %s.' % (samp))
    r2v.embed_reads(samp,
                    path_totalkmers,
                    path_model,
                    work_dir,
                    k=k,
                    a=a,
                    svm=False,
                    normread=False,
                    to_sample=True,
                    delim=' ',
                    verbose=True,
                    v=1000)