Exemple #1
0
def score(batch_size=16):
    valid_stream = parrot_stream('blizzard',
                                 False, ('valid', ),
                                 batch_size,
                                 noise_level=0.,
                                 labels_type='phonemes',
                                 seq_size=100000)
    itr = valid_stream.get_epoch_iterator()
    costs = []
    times = []
    for val_X, val_mask, val_ctx, val_reset, val_noise_level in itr:
        val_X = val_X.transpose((1, 0, 2))
        val_mask = val_mask.T
        val_ctx = val_ctx.T
        start = time.time()
        _loss = test_fn(val_X, val_ctx, val_mask)
        costs.append(_loss)
        times.append(time.time() - start)

    print "\n\nValidation Completed!"
    print "\tMean cost: ", np.mean(np.asarray(costs), axis=0) / EMB_DIM
    print "\tMean time: ", np.mean(times)
    return np.mean(np.asarray(costs), axis=0) / EMB_DIM
Exemple #2
0
    params_mode = 'last_'
else:
    params_mode = 'best_'

args.samples_name = params_mode + args.samples_name

with open(
        os.path.join(args.save_dir, "pkl",
                     params_mode + args.experiment_name + ".tar"),
        'rb') as src:
    parameters = load_parameters(src)

test_stream = parrot_stream(args.dataset,
                            saved_args.use_speaker, ('test', ),
                            args.num_samples,
                            args.num_steps,
                            sorting_mult=1,
                            labels_type=saved_args.labels_type,
                            raw_data=args.plot_raw)

data_tr = next(test_stream.get_epoch_iterator())
data_tr = {source: data for source, data in zip(test_stream.sources, data_tr)}

print "Loaded sources from test_stream: ", data_tr.keys()
features_tr = data_tr.get('features', None)
features_mask_tr = data_tr.get('features_mask', None)
speaker_tr = data_tr.get('speaker_index', None)
labels_tr = data_tr.get('labels', None)
labels_mask_tr = data_tr.get('labels_mask', None)
start_flag_tr = data_tr.get('start_flag', None)
raw_audio_tr = data_tr.get('raw_audio', None)
Exemple #3
0
def sampler(save_dir, samples_name, do_post_filtering):
    test_stream = datasets.parrot_stream(DATASET,
                                         use_speaker=False,
                                         which_sets=('test', ),
                                         batch_size=BATCH_SIZE,
                                         seq_size=10000)

    test_iterator = test_stream.get_epoch_iterator()

    latents_generated = numpy.random.normal(size=(NUM_REPEAT, LATENT_DIM))

    latents_generated = lib.floatX(
        numpy.tile(latents_generated, (BATCH_SIZE, 1)))

    actual_so_far_raw, mask_raw, text_features_raw, reset = next(test_iterator)

    text_features_raw_repeated = numpy.repeat(text_features_raw,
                                              NUM_REPEAT,
                                              axis=1)

    samples_so_far = sample_fn(text_features_raw_repeated, latents_generated)

    mask_so_far = mask_raw

    actual_so_far = actual_so_far_raw.transpose((1, 0, 2))

    mask_so_far_repeated = numpy.repeat(mask_so_far, NUM_REPEAT, axis=1)

    norm_info_file = os.path.join(data_dir, DATASET,
                                  'norm_info_mgc_lf0_vuv_bap_63_MVN.dat')

    if not os.path.exists(os.path.join(save_dir, 'samples')):
        os.makedirs(os.path.join(save_dir, 'samples'))

    if not os.path.exists(os.path.join(save_dir, 'actual_samples')):
        os.makedirs(os.path.join(save_dir, 'actual_samples'))
    """
    TODO: Remove this commented section.

    """

    for i, this_sample in enumerate(actual_so_far):
        this_sample = this_sample[:int(mask_so_far.sum(axis=0)[i])]

        generate_wav(this_sample,
                     os.path.join(save_dir, 'actual_samples'),
                     samples_name + '_' + str(i),
                     sptk_dir=SPTK_DIR,
                     world_dir=WORLD_DIR,
                     norm_info_file=norm_info_file,
                     do_post_filtering=do_post_filtering)

    for i, this_sample in enumerate(samples_so_far):
        this_sample = this_sample[:int(mask_so_far_repeated.sum(axis=0)[i])]

        generate_wav(this_sample,
                     os.path.join(save_dir, 'samples'),
                     samples_name + '_' + str(i // NUM_REPEAT) + '_latent_' +
                     str(i % NUM_REPEAT),
                     sptk_dir=SPTK_DIR,
                     world_dir=WORLD_DIR,
                     norm_info_file=norm_info_file,
                     do_post_filtering=do_post_filtering)
Exemple #4
0
        generate_wav(this_sample,
                     os.path.join(save_dir, 'samples'),
                     samples_name + '_' + str(i // NUM_REPEAT) + '_latent_' +
                     str(i % NUM_REPEAT),
                     sptk_dir=SPTK_DIR,
                     world_dir=WORLD_DIR,
                     norm_info_file=norm_info_file,
                     do_post_filtering=do_post_filtering)


sampler(os.path.join(OUT_DIR, "samples", "initial_samples"), "sample", False)

train_stream = datasets.parrot_stream(DATASET,
                                      use_speaker=False,
                                      which_sets=('train', ),
                                      batch_size=BATCH_SIZE,
                                      seq_size=10000)

valid_stream = datasets.parrot_stream(DATASET,
                                      use_speaker=False,
                                      which_sets=('valid', ),
                                      batch_size=BATCH_SIZE,
                                      seq_size=10000)

total_iters = 0
total_time = 0.

train_costs = []
valid_costs = []
Exemple #5
0
args = train_parse()

exp_name = args.experiment_name
save_dir = args.save_dir

print "Saving config ..."
with open(os.path.join(save_dir, 'config', exp_name + '.pkl'), 'w') as f:
    cPickle.dump(args, f)
print "Finished saving."

w_init = initialization.IsotropicGaussian(0.01)
b_init = initialization.Constant(0.)

train_stream = parrot_stream(
    args.dataset, args.use_speaker, ('train',), args.batch_size,
    noise_level=args.feedback_noise_level, labels_type=args.labels_type,
    seq_size=args.seq_size, raw_data=args.raw_output)

if args.feedback_noise_level is None:
    val_noise_level = None
else:
    val_noise_level = 0.

valid_stream = parrot_stream(
    args.dataset, args.use_speaker, ('valid',), args.batch_size,
    noise_level=val_noise_level, labels_type=args.labels_type,
    seq_size=args.seq_size, raw_data=args.raw_output)

example_batch = next(train_stream.get_epoch_iterator())

for idx, source in enumerate(train_stream.sources):
Exemple #6
0
                               on_unused_input='warn')

    test_fn = theano.function([X, ctx, mask],
                              test_cost,
                              on_unused_input='warn')

    lib.load_params(
        '/data/lisa/exp/kumarrit/ppgn-speech/blizzard_tanh_recognizer_best.pkl'
    )

    print "Compiled Function!"

    i = 0
    train_stream = parrot_stream('blizzard',
                                 False, ('train', ),
                                 BATCH_SIZE,
                                 noise_level=0.,
                                 labels_type='phonemes',
                                 seq_size=100000)

    i = 0
    best_score = 1000.
    for i in xrange(i, NB_EPOCHS):
        costs = []
        iter = 0
        times = []
        itr = train_stream.get_epoch_iterator()
        for train_X, train_mask, train_ctx, train_reset, train_noise_level in itr:
            train_X = train_X.transpose((1, 0, 2))
            train_mask = train_mask.T
            train_ctx = train_ctx.T
            iter += 1
Exemple #7
0
        (args.num_samples, max_length, saved_args.input_dim), dtype='float32')

    for i, sample in enumerate(labels_tr):
        padded_labels_tr[i, :len(sample)] = sample
        features_mask_tr[i, :len(sample)] = 1.

    labels_tr = padded_labels_tr

    features_mask_tr = features_mask_tr.swapaxes(0, 1)
    labels_tr = labels_tr.swapaxes(0, 1)
else:

    test_stream = parrot_stream(args.new_dataset,
                                False, ('test', ),
                                args.num_samples,
                                10000,
                                sorting_mult=1,
                                labels_type=saved_args.labels_type,
                                quantize_features=saved_args.quantized_input)

    data_tr = next(test_stream.get_epoch_iterator())
    data_tr = {
        source: data
        for source, data in zip(test_stream.sources, data_tr)
    }

    print "Loaded sources from test_stream: ", data_tr.keys()
    features_tr = data_tr.get('features', None)
    features_mask_tr = data_tr.get('features_mask', None)
    speaker_tr = data_tr.get('speaker_index', None)
    labels_tr = data_tr.get('labels', None)
Exemple #8
0
        (args.num_samples, max_length, saved_args.input_dim), dtype='float32')

    for i, sample in enumerate(labels_tr):
        padded_labels_tr[i, :len(sample)] = sample
        features_mask_tr[i, :len(sample)] = 1.

    labels_tr = padded_labels_tr

    features_mask_tr = features_mask_tr.swapaxes(0, 1)
    labels_tr = labels_tr.swapaxes(0, 1)
else:

    test_stream = parrot_stream(args.dataset,
                                saved_args.use_speaker, ('test', ),
                                args.num_samples,
                                args.num_steps,
                                sorting_mult=1,
                                labels_type=saved_args.labels_type,
                                quantize_features=saved_args.quantized_input,
                                raw_data=saved_args.raw_output)

    data_tr = next(test_stream.get_epoch_iterator())
    data_tr = {
        source: data
        for source, data in zip(test_stream.sources, data_tr)
    }

    print "Loaded sources from test_stream: ", data_tr.keys()
    features_tr = data_tr.get('features', None)
    features_mask_tr = data_tr.get('features_mask', None)
    speaker_tr = data_tr.get('speaker_index', None)
    labels_tr = data_tr.get('labels', None)