def word2wav(text,PATH='/data/lisa/exp/kumarrit/vctk'): lib.load_params('cmudict_attention_nmt_best.pkl') model = nltk.corpus.cmudict.dict() tknzr = nltk.tokenize.WordPunctTokenizer() tokens = tknzr.tokenize(text.lower()) char_to_idx = pickle.load(open(PATH+'/phon2code.pkl')) idx_to_char = {x:y for y,x in char_to_idx.iteritems()} result = [] for tkn in tokens: if tkn in ['.',',']: result += ['pau'] else: # try: # result += [re.sub('\d+','',c.lower()) for c in random.choice(model[tkn])] # except: # result += [idx_to_char[x] for x in word2phon(tkn)] result += [idx_to_char[x] for x in word2phon(tkn)] print result ctx = np.asarray([36]+[char_to_idx[x] for x in result]+[0]).reshape((1,-1)).astype('int32') pred_X = predict_fn( np.asarray([np.random.choice(N_SPEAKERS),]).reshape((1,)).astype('int32'), ctx ) generate.generate_wav(pred_X[0],base='generated_sample_16',do_post_filtering=False)
def vocoder2wav(vocoder_frames, path_to_save=".", name_prefix="samples", dataset="vctk", post_filter=False): """ Returns a list of raw audio and a list of (sampled_frequencies, time, specgram_values) """ assert (numpy.abs(vocoder_frames).max() < 20.), "Make sure that range of frame values is small" if not os.path.exists(path_to_save): os.makedirs(path_to_save) raw_audio = [] audio_spec = [] for i, this_sample in enumerate(vocoder_frames): generate_wav(this_sample, path_to_save, name_prefix + '_' + str(i), sptk_dir=SPTK_DIR, world_dir=WORLD_DIR, norm_info_file=os.path.join(os.environ['FUEL_DATA_PATH'], dataset, NORM_FILE), do_post_filtering=post_filter) rate, data = scipy.io.wavfile.read( os.path.join(path_to_save, name_prefix + '_' + str(i) + '.wav')) raw_audio.append(data) f, t, Sxx = signal.spectrogram(data, rate) audio_spec.append((f, t, Sxx)) return raw_audio, audio_spec
def char2wav(text,PATH='/data/lisa/exp/kumarrit/vctk'): lib.load_params('vctk_nmt_best.pkl') model = nltk.corpus.cmudict.dict() char_to_idx = pickle.load(open(PATH+'/char2code.pkl')) result = list(text) ctx = np.asarray([36]+[char_to_idx[x] for x in result]+[0]).reshape((1,-1)).astype('int32') phons = nmt_fn(ctx,np.ones_like(ctx).astype('float32')).flatten() try: end_idx = np.where(phons==0)[0][0] phons = phons[:end_idx].tolist() except: phons = phons.tolist() pred_X = predict_fn( np.asarray([np.random.choice(N_SPEAKERS),]).reshape((1,)).astype('int32'), np.asarray(phons,dtype=np.int32).reshape((1,-1)) ) generate.generate_wav(pred_X[0],base='generated_sample_16',do_post_filtering=False)
def writewav(): data = np.load('test_X.npy') out_data = [] for i in xrange(data.shape[0]): for j in xrange(1,data.shape[1]): if data[i][j].sum()==0: break out_data.append(data[i][:j]) [generate.generate_wav(out_data[i],base='generated_sample_%d'%i,do_post_filtering=False) for i in xrange(data.shape[0])] data = np.load('train_X.npy') out_data = [] for i in xrange(data.shape[0]): for j in xrange(data.shape[1]): if data[i][j].sum()==0: break out_data.append(data[i][:j]) print data.shape [generate.generate_wav(out_data[i],base='original_sample_%d'%i,do_post_filtering=False) for i in xrange(data.shape[0])]
os.makedirs(to_save_path) parrot.sampleRnn.sample_raw( gen_x.swapaxes(0, 1).copy(), features_lengths, args.samples_name, to_save_path) print "Successfully sampled raw audio..." norm_info_file = os.path.join(data_dir, args.dataset, 'norm_info_mgc_lf0_vuv_bap_63_MVN.dat') for idx, this_sample in enumerate(gen_x): this_sample = this_sample[:features_lengths[idx]] generate_wav(this_sample, os.path.join(args.save_dir, 'samples'), args.samples_name + '_' + str(idx), sptk_dir=args.sptk_dir, world_dir=args.world_dir, norm_info_file=norm_info_file, do_post_filtering=args.do_post_filtering) if args.plot_raw: from scipy.io import wavfile raw_audio = data_tr['raw_audio'].swapaxes(0, 1) for idx in range(args.num_samples): this_raw = numpy.concatenate( raw_audio[idx])[:80 * int(features_mask_tr.sum(axis=0)[idx])] wavfile.write( os.path.join(args.save_dir, 'samples', 'raw_' + args.samples_name + '_' + str(idx) + '.wav'), 16000, this_raw)
def sampler(save_dir, samples_name, do_post_filtering): test_stream = datasets.parrot_stream(DATASET, use_speaker=False, which_sets=('test', ), batch_size=BATCH_SIZE, seq_size=10000) test_iterator = test_stream.get_epoch_iterator() latents_generated = numpy.random.normal(size=(NUM_REPEAT, LATENT_DIM)) latents_generated = lib.floatX( numpy.tile(latents_generated, (BATCH_SIZE, 1))) actual_so_far_raw, mask_raw, text_features_raw, reset = next(test_iterator) text_features_raw_repeated = numpy.repeat(text_features_raw, NUM_REPEAT, axis=1) samples_so_far = sample_fn(text_features_raw_repeated, latents_generated) mask_so_far = mask_raw actual_so_far = actual_so_far_raw.transpose((1, 0, 2)) mask_so_far_repeated = numpy.repeat(mask_so_far, NUM_REPEAT, axis=1) norm_info_file = os.path.join(data_dir, DATASET, 'norm_info_mgc_lf0_vuv_bap_63_MVN.dat') if not os.path.exists(os.path.join(save_dir, 'samples')): os.makedirs(os.path.join(save_dir, 'samples')) if not os.path.exists(os.path.join(save_dir, 'actual_samples')): os.makedirs(os.path.join(save_dir, 'actual_samples')) """ TODO: Remove this commented section. """ for i, this_sample in enumerate(actual_so_far): this_sample = this_sample[:int(mask_so_far.sum(axis=0)[i])] generate_wav(this_sample, os.path.join(save_dir, 'actual_samples'), samples_name + '_' + str(i), sptk_dir=SPTK_DIR, world_dir=WORLD_DIR, norm_info_file=norm_info_file, do_post_filtering=do_post_filtering) for i, this_sample in enumerate(samples_so_far): this_sample = this_sample[:int(mask_so_far_repeated.sum(axis=0)[i])] generate_wav(this_sample, os.path.join(save_dir, 'samples'), samples_name + '_' + str(i // NUM_REPEAT) + '_latent_' + str(i % NUM_REPEAT), sptk_dir=SPTK_DIR, world_dir=WORLD_DIR, norm_info_file=norm_info_file, do_post_filtering=do_post_filtering)
def save(gen,orig,i=0): generate.generate_wav(gen,base='generated_sample_%d'%i,do_post_filtering=False) generate.generate_wav(orig,base='original_sample_%d'%i,do_post_filtering=False)
# 10000, sorting_mult=1, labels_type=saved_args.labels_type, # quantize_features=saved_args.quantized_input) gen_x, gen_k, gen_w, gen_pi, gen_phi, gen_pi_att = parrot.sample_model( labels_tr_old, labels_mask_tr_old, features_mask_tr_old, new_speakers_tr_old, latent_var_tr_old, LATENT_NUM * args.num_samples) for j, this_sample in enumerate(gen_x.swapaxes(1, 0)): this_sample = this_sample[:int( features_mask_tr_old.sum(axis=0)[j])] generate_wav(this_sample, os.path.join(args.save_dir, 'samples', 'adaptation'), "sample_{}_{}_iters_{}".format( args.samples_name, j, i), sptk_dir=args.sptk_dir, world_dir=args.world_dir, norm_info_file=norm_info_file, do_post_filtering=args.do_post_filtering) if len(costs) != 0: print "cost at iter {} is {}".format(i, numpy.mean(costs[-200:])) print "Successfully sampled the parrot." if saved_args.quantized_input: _, dequantize = get_quantizers() gen_x = dequantize(gen_x) # import ipdb; ipdb.set_trace()