def score(batch_size=16): valid_stream = parrot_stream('blizzard', False, ('valid', ), batch_size, noise_level=0., labels_type='phonemes', seq_size=100000) itr = valid_stream.get_epoch_iterator() costs = [] times = [] for val_X, val_mask, val_ctx, val_reset, val_noise_level in itr: val_X = val_X.transpose((1, 0, 2)) val_mask = val_mask.T val_ctx = val_ctx.T start = time.time() _loss = test_fn(val_X, val_ctx, val_mask) costs.append(_loss) times.append(time.time() - start) print "\n\nValidation Completed!" print "\tMean cost: ", np.mean(np.asarray(costs), axis=0) / EMB_DIM print "\tMean time: ", np.mean(times) return np.mean(np.asarray(costs), axis=0) / EMB_DIM
params_mode = 'last_' else: params_mode = 'best_' args.samples_name = params_mode + args.samples_name with open( os.path.join(args.save_dir, "pkl", params_mode + args.experiment_name + ".tar"), 'rb') as src: parameters = load_parameters(src) test_stream = parrot_stream(args.dataset, saved_args.use_speaker, ('test', ), args.num_samples, args.num_steps, sorting_mult=1, labels_type=saved_args.labels_type, raw_data=args.plot_raw) data_tr = next(test_stream.get_epoch_iterator()) data_tr = {source: data for source, data in zip(test_stream.sources, data_tr)} print "Loaded sources from test_stream: ", data_tr.keys() features_tr = data_tr.get('features', None) features_mask_tr = data_tr.get('features_mask', None) speaker_tr = data_tr.get('speaker_index', None) labels_tr = data_tr.get('labels', None) labels_mask_tr = data_tr.get('labels_mask', None) start_flag_tr = data_tr.get('start_flag', None) raw_audio_tr = data_tr.get('raw_audio', None)
def sampler(save_dir, samples_name, do_post_filtering): test_stream = datasets.parrot_stream(DATASET, use_speaker=False, which_sets=('test', ), batch_size=BATCH_SIZE, seq_size=10000) test_iterator = test_stream.get_epoch_iterator() latents_generated = numpy.random.normal(size=(NUM_REPEAT, LATENT_DIM)) latents_generated = lib.floatX( numpy.tile(latents_generated, (BATCH_SIZE, 1))) actual_so_far_raw, mask_raw, text_features_raw, reset = next(test_iterator) text_features_raw_repeated = numpy.repeat(text_features_raw, NUM_REPEAT, axis=1) samples_so_far = sample_fn(text_features_raw_repeated, latents_generated) mask_so_far = mask_raw actual_so_far = actual_so_far_raw.transpose((1, 0, 2)) mask_so_far_repeated = numpy.repeat(mask_so_far, NUM_REPEAT, axis=1) norm_info_file = os.path.join(data_dir, DATASET, 'norm_info_mgc_lf0_vuv_bap_63_MVN.dat') if not os.path.exists(os.path.join(save_dir, 'samples')): os.makedirs(os.path.join(save_dir, 'samples')) if not os.path.exists(os.path.join(save_dir, 'actual_samples')): os.makedirs(os.path.join(save_dir, 'actual_samples')) """ TODO: Remove this commented section. """ for i, this_sample in enumerate(actual_so_far): this_sample = this_sample[:int(mask_so_far.sum(axis=0)[i])] generate_wav(this_sample, os.path.join(save_dir, 'actual_samples'), samples_name + '_' + str(i), sptk_dir=SPTK_DIR, world_dir=WORLD_DIR, norm_info_file=norm_info_file, do_post_filtering=do_post_filtering) for i, this_sample in enumerate(samples_so_far): this_sample = this_sample[:int(mask_so_far_repeated.sum(axis=0)[i])] generate_wav(this_sample, os.path.join(save_dir, 'samples'), samples_name + '_' + str(i // NUM_REPEAT) + '_latent_' + str(i % NUM_REPEAT), sptk_dir=SPTK_DIR, world_dir=WORLD_DIR, norm_info_file=norm_info_file, do_post_filtering=do_post_filtering)
generate_wav(this_sample, os.path.join(save_dir, 'samples'), samples_name + '_' + str(i // NUM_REPEAT) + '_latent_' + str(i % NUM_REPEAT), sptk_dir=SPTK_DIR, world_dir=WORLD_DIR, norm_info_file=norm_info_file, do_post_filtering=do_post_filtering) sampler(os.path.join(OUT_DIR, "samples", "initial_samples"), "sample", False) train_stream = datasets.parrot_stream(DATASET, use_speaker=False, which_sets=('train', ), batch_size=BATCH_SIZE, seq_size=10000) valid_stream = datasets.parrot_stream(DATASET, use_speaker=False, which_sets=('valid', ), batch_size=BATCH_SIZE, seq_size=10000) total_iters = 0 total_time = 0. train_costs = [] valid_costs = []
args = train_parse() exp_name = args.experiment_name save_dir = args.save_dir print "Saving config ..." with open(os.path.join(save_dir, 'config', exp_name + '.pkl'), 'w') as f: cPickle.dump(args, f) print "Finished saving." w_init = initialization.IsotropicGaussian(0.01) b_init = initialization.Constant(0.) train_stream = parrot_stream( args.dataset, args.use_speaker, ('train',), args.batch_size, noise_level=args.feedback_noise_level, labels_type=args.labels_type, seq_size=args.seq_size, raw_data=args.raw_output) if args.feedback_noise_level is None: val_noise_level = None else: val_noise_level = 0. valid_stream = parrot_stream( args.dataset, args.use_speaker, ('valid',), args.batch_size, noise_level=val_noise_level, labels_type=args.labels_type, seq_size=args.seq_size, raw_data=args.raw_output) example_batch = next(train_stream.get_epoch_iterator()) for idx, source in enumerate(train_stream.sources):
on_unused_input='warn') test_fn = theano.function([X, ctx, mask], test_cost, on_unused_input='warn') lib.load_params( '/data/lisa/exp/kumarrit/ppgn-speech/blizzard_tanh_recognizer_best.pkl' ) print "Compiled Function!" i = 0 train_stream = parrot_stream('blizzard', False, ('train', ), BATCH_SIZE, noise_level=0., labels_type='phonemes', seq_size=100000) i = 0 best_score = 1000. for i in xrange(i, NB_EPOCHS): costs = [] iter = 0 times = [] itr = train_stream.get_epoch_iterator() for train_X, train_mask, train_ctx, train_reset, train_noise_level in itr: train_X = train_X.transpose((1, 0, 2)) train_mask = train_mask.T train_ctx = train_ctx.T iter += 1
(args.num_samples, max_length, saved_args.input_dim), dtype='float32') for i, sample in enumerate(labels_tr): padded_labels_tr[i, :len(sample)] = sample features_mask_tr[i, :len(sample)] = 1. labels_tr = padded_labels_tr features_mask_tr = features_mask_tr.swapaxes(0, 1) labels_tr = labels_tr.swapaxes(0, 1) else: test_stream = parrot_stream(args.new_dataset, False, ('test', ), args.num_samples, 10000, sorting_mult=1, labels_type=saved_args.labels_type, quantize_features=saved_args.quantized_input) data_tr = next(test_stream.get_epoch_iterator()) data_tr = { source: data for source, data in zip(test_stream.sources, data_tr) } print "Loaded sources from test_stream: ", data_tr.keys() features_tr = data_tr.get('features', None) features_mask_tr = data_tr.get('features_mask', None) speaker_tr = data_tr.get('speaker_index', None) labels_tr = data_tr.get('labels', None)
(args.num_samples, max_length, saved_args.input_dim), dtype='float32') for i, sample in enumerate(labels_tr): padded_labels_tr[i, :len(sample)] = sample features_mask_tr[i, :len(sample)] = 1. labels_tr = padded_labels_tr features_mask_tr = features_mask_tr.swapaxes(0, 1) labels_tr = labels_tr.swapaxes(0, 1) else: test_stream = parrot_stream(args.dataset, saved_args.use_speaker, ('test', ), args.num_samples, args.num_steps, sorting_mult=1, labels_type=saved_args.labels_type, quantize_features=saved_args.quantized_input, raw_data=saved_args.raw_output) data_tr = next(test_stream.get_epoch_iterator()) data_tr = { source: data for source, data in zip(test_stream.sources, data_tr) } print "Loaded sources from test_stream: ", data_tr.keys() features_tr = data_tr.get('features', None) features_mask_tr = data_tr.get('features_mask', None) speaker_tr = data_tr.get('speaker_index', None) labels_tr = data_tr.get('labels', None)