Exemple #1
0
def word2wav(text,PATH='/data/lisa/exp/kumarrit/vctk'):
    lib.load_params('cmudict_attention_nmt_best.pkl')
    model = nltk.corpus.cmudict.dict()
    tknzr = nltk.tokenize.WordPunctTokenizer()
    tokens = tknzr.tokenize(text.lower())
    char_to_idx = pickle.load(open(PATH+'/phon2code.pkl'))
    idx_to_char = {x:y for y,x in char_to_idx.iteritems()}
    result = []
    for tkn in tokens:
        if tkn in ['.',',']:
            result += ['pau']
        else:
            # try:
            #     result += [re.sub('\d+','',c.lower()) for c in random.choice(model[tkn])]
            # except:
            #     result += [idx_to_char[x] for x in word2phon(tkn)]
            result += [idx_to_char[x] for x in word2phon(tkn)]

    print result
    ctx = np.asarray([36]+[char_to_idx[x] for x in result]+[0]).reshape((1,-1)).astype('int32')
    pred_X = predict_fn(
          np.asarray([np.random.choice(N_SPEAKERS),]).reshape((1,)).astype('int32'),
          ctx
        )
    generate.generate_wav(pred_X[0],base='generated_sample_16',do_post_filtering=False)
Exemple #2
0
def vocoder2wav(vocoder_frames,
                path_to_save=".",
                name_prefix="samples",
                dataset="vctk",
                post_filter=False):
    """
    Returns a list of raw audio and a list of (sampled_frequencies, time, specgram_values)
    """
    assert (numpy.abs(vocoder_frames).max() <
            20.), "Make sure that range of frame values is small"

    if not os.path.exists(path_to_save):
        os.makedirs(path_to_save)

    raw_audio = []
    audio_spec = []

    for i, this_sample in enumerate(vocoder_frames):
        generate_wav(this_sample,
                     path_to_save,
                     name_prefix + '_' + str(i),
                     sptk_dir=SPTK_DIR,
                     world_dir=WORLD_DIR,
                     norm_info_file=os.path.join(os.environ['FUEL_DATA_PATH'],
                                                 dataset, NORM_FILE),
                     do_post_filtering=post_filter)

        rate, data = scipy.io.wavfile.read(
            os.path.join(path_to_save, name_prefix + '_' + str(i) + '.wav'))
        raw_audio.append(data)
        f, t, Sxx = signal.spectrogram(data, rate)
        audio_spec.append((f, t, Sxx))

    return raw_audio, audio_spec
Exemple #3
0
def char2wav(text,PATH='/data/lisa/exp/kumarrit/vctk'):
    lib.load_params('vctk_nmt_best.pkl')
    model = nltk.corpus.cmudict.dict()
    char_to_idx = pickle.load(open(PATH+'/char2code.pkl'))
    result = list(text)
    ctx = np.asarray([36]+[char_to_idx[x] for x in result]+[0]).reshape((1,-1)).astype('int32')
    phons = nmt_fn(ctx,np.ones_like(ctx).astype('float32')).flatten()
    try:
        end_idx = np.where(phons==0)[0][0]
        phons = phons[:end_idx].tolist()
    except:
        phons = phons.tolist()
    pred_X = predict_fn(
          np.asarray([np.random.choice(N_SPEAKERS),]).reshape((1,)).astype('int32'),
          np.asarray(phons,dtype=np.int32).reshape((1,-1))
        )
    generate.generate_wav(pred_X[0],base='generated_sample_16',do_post_filtering=False)
def writewav():
    data = np.load('test_X.npy')
    out_data = []
    for i in xrange(data.shape[0]):
        for j in xrange(1,data.shape[1]):
            if data[i][j].sum()==0:
                break
        out_data.append(data[i][:j])
    [generate.generate_wav(out_data[i],base='generated_sample_%d'%i,do_post_filtering=False) for i in xrange(data.shape[0])]

    data = np.load('train_X.npy')
    out_data = []
    for i in xrange(data.shape[0]):
        for j in xrange(data.shape[1]):
            if data[i][j].sum()==0:
                break
        out_data.append(data[i][:j])
    print data.shape
    [generate.generate_wav(out_data[i],base='original_sample_%d'%i,do_post_filtering=False) for i in xrange(data.shape[0])]
Exemple #5
0
        os.makedirs(to_save_path)

    parrot.sampleRnn.sample_raw(
        gen_x.swapaxes(0, 1).copy(), features_lengths, args.samples_name,
        to_save_path)
    print "Successfully sampled raw audio..."

norm_info_file = os.path.join(data_dir, args.dataset,
                              'norm_info_mgc_lf0_vuv_bap_63_MVN.dat')

for idx, this_sample in enumerate(gen_x):
    this_sample = this_sample[:features_lengths[idx]]
    generate_wav(this_sample,
                 os.path.join(args.save_dir, 'samples'),
                 args.samples_name + '_' + str(idx),
                 sptk_dir=args.sptk_dir,
                 world_dir=args.world_dir,
                 norm_info_file=norm_info_file,
                 do_post_filtering=args.do_post_filtering)

if args.plot_raw:
    from scipy.io import wavfile
    raw_audio = data_tr['raw_audio'].swapaxes(0, 1)

    for idx in range(args.num_samples):
        this_raw = numpy.concatenate(
            raw_audio[idx])[:80 * int(features_mask_tr.sum(axis=0)[idx])]
        wavfile.write(
            os.path.join(args.save_dir, 'samples',
                         'raw_' + args.samples_name + '_' + str(idx) + '.wav'),
            16000, this_raw)
Exemple #6
0
def sampler(save_dir, samples_name, do_post_filtering):
    test_stream = datasets.parrot_stream(DATASET,
                                         use_speaker=False,
                                         which_sets=('test', ),
                                         batch_size=BATCH_SIZE,
                                         seq_size=10000)

    test_iterator = test_stream.get_epoch_iterator()

    latents_generated = numpy.random.normal(size=(NUM_REPEAT, LATENT_DIM))

    latents_generated = lib.floatX(
        numpy.tile(latents_generated, (BATCH_SIZE, 1)))

    actual_so_far_raw, mask_raw, text_features_raw, reset = next(test_iterator)

    text_features_raw_repeated = numpy.repeat(text_features_raw,
                                              NUM_REPEAT,
                                              axis=1)

    samples_so_far = sample_fn(text_features_raw_repeated, latents_generated)

    mask_so_far = mask_raw

    actual_so_far = actual_so_far_raw.transpose((1, 0, 2))

    mask_so_far_repeated = numpy.repeat(mask_so_far, NUM_REPEAT, axis=1)

    norm_info_file = os.path.join(data_dir, DATASET,
                                  'norm_info_mgc_lf0_vuv_bap_63_MVN.dat')

    if not os.path.exists(os.path.join(save_dir, 'samples')):
        os.makedirs(os.path.join(save_dir, 'samples'))

    if not os.path.exists(os.path.join(save_dir, 'actual_samples')):
        os.makedirs(os.path.join(save_dir, 'actual_samples'))
    """
    TODO: Remove this commented section.

    """

    for i, this_sample in enumerate(actual_so_far):
        this_sample = this_sample[:int(mask_so_far.sum(axis=0)[i])]

        generate_wav(this_sample,
                     os.path.join(save_dir, 'actual_samples'),
                     samples_name + '_' + str(i),
                     sptk_dir=SPTK_DIR,
                     world_dir=WORLD_DIR,
                     norm_info_file=norm_info_file,
                     do_post_filtering=do_post_filtering)

    for i, this_sample in enumerate(samples_so_far):
        this_sample = this_sample[:int(mask_so_far_repeated.sum(axis=0)[i])]

        generate_wav(this_sample,
                     os.path.join(save_dir, 'samples'),
                     samples_name + '_' + str(i // NUM_REPEAT) + '_latent_' +
                     str(i % NUM_REPEAT),
                     sptk_dir=SPTK_DIR,
                     world_dir=WORLD_DIR,
                     norm_info_file=norm_info_file,
                     do_post_filtering=do_post_filtering)
Exemple #7
0
def save(gen,orig,i=0):
    generate.generate_wav(gen,base='generated_sample_%d'%i,do_post_filtering=False)
    generate.generate_wav(orig,base='original_sample_%d'%i,do_post_filtering=False)
Exemple #8
0
            #     10000, sorting_mult=1, labels_type=saved_args.labels_type,
            #     quantize_features=saved_args.quantized_input)

            gen_x, gen_k, gen_w, gen_pi, gen_phi, gen_pi_att = parrot.sample_model(
                labels_tr_old, labels_mask_tr_old, features_mask_tr_old,
                new_speakers_tr_old, latent_var_tr_old,
                LATENT_NUM * args.num_samples)

            for j, this_sample in enumerate(gen_x.swapaxes(1, 0)):
                this_sample = this_sample[:int(
                    features_mask_tr_old.sum(axis=0)[j])]
                generate_wav(this_sample,
                             os.path.join(args.save_dir, 'samples',
                                          'adaptation'),
                             "sample_{}_{}_iters_{}".format(
                                 args.samples_name, j, i),
                             sptk_dir=args.sptk_dir,
                             world_dir=args.world_dir,
                             norm_info_file=norm_info_file,
                             do_post_filtering=args.do_post_filtering)
            if len(costs) != 0:
                print "cost at iter {} is {}".format(i,
                                                     numpy.mean(costs[-200:]))

print "Successfully sampled the parrot."

if saved_args.quantized_input:
    _, dequantize = get_quantizers()
    gen_x = dequantize(gen_x)

# import ipdb; ipdb.set_trace()