Beispiel #1
0
#cost = cost.sum(axis=1) / target_mask.sum(axis=1)
#cost = cost.mean(axis=0)

# Use this one instead.
cost = cost.sum()
cost = cost / target_mask.sum()

# By default we report cross-entropy cost in bits.
# Switch to nats by commenting out this line:
# log_2(e) = 1.44269504089
cost = cost * lib.floatX(numpy.log2(numpy.e))

### Getting the params, grads, updates, and Theano functions ###
params = lib.get_params(cost,
                        lambda x: hasattr(x, 'param') and x.param == True)
lib.print_params_info(params, path=FOLDER_PREFIX)

grads = T.grad(cost, wrt=params, disconnected_inputs='warn')
grads = [
    T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in grads
]

updates = lasagne.updates.adam(grads, params, learning_rate=LEARNING_RATE)

# Training function
train_fn = theano.function([sequences, h0, reset, mask], [cost, new_h0],
                           updates=updates,
                           on_unused_input='warn')

# Validation and Test function, hence no updates
test_fn = theano.function([sequences, h0, reset, mask], [cost, new_h0],
Beispiel #2
0
#prev_samples = prev_samples.reshape((1, BATCH_SIZE, 1, -1))
#prev_samples = T.nnet.neighbours.images2neibs(prev_samples, (1, FRAME_SIZE), neib_step=(1, 1), mode='valid')
#prev_samples = prev_samples.reshape((BATCH_SIZE,SEQ_LEN, FRAME_SIZE))

encoder_outputs, new_h0 = encoder(input_sequences, h0, reset)

#decoder_outputs = decoder(encoder_outputs,prev_samples)

cost = T.nnet.categorical_crossentropy(T.nnet.softmax(encoder_outputs),
                                       target_sequences.flatten()).mean()

cost = cost * lib.floatX(1.44269504089)

params = lib.search(cost, lambda x: hasattr(x, 'param'))

lib.print_params_info(cost, params)

grads = T.grad(cost, wrt=params, disconnected_inputs='warn')

grads = [
    T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in grads
]

print "Gradients Computed"

updates = lasagne.updates.adam(grads, params, learning_rate=lr)

train_fn = theano.function([sequences, h0, reset, lr], [cost, new_h0],
                           updates=updates,
                           on_unused_input='warn')
)

readout = lib.ops.Linear(
    'Generator.GRU.Output.MLP.1',
    T.concatenate([state[:,:,-1],tiled_speaker],-1),
    DEC_DIM+SPEAKER_DIM,
    OUTPUT_DIM
)

mask_mult = T.shape_padright(mask)

cost = T.sum(T.sqr(X-readout)*mask_mult)/(T.sum(mask)*63.)
test_cost = T.sum(T.sqr(X-predict_readout)*T.shape_padright(mask))/(T.sum(mask)*63.)

params = lib.search(cost, lambda x: hasattr(x, "param") and x.param==True)
lib.print_params_info(params)

grads = T.grad(cost, wrt=params, disconnected_inputs='warn')
grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in grads]

print "Gradients Computed"

updates = lasagne.updates.adam(grads, params, learning_rate=learn_rate)

train_fn = theano.function(
    [noise_vocoder,X,spkr_ids,ctx,mask,learn_rate],
    cost,
    updates=updates,
    on_unused_input='warn'
)
Beispiel #4
0
#params = ip_params + other_params
#lib.print_params_info(params, path=FOLDER_PREFIX)
#
#grads = T.grad(cost, wrt=params, disconnected_inputs='warn')
#grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in grads]
#
#updates = lasagne.updates.adam(grads, params, learning_rate=LEARNING_RATE)

###########
all_params = lib.get_params(cost,
                            lambda x: hasattr(x, 'param') and x.param == True)
ip_params = lib.get_params(ip_cost, lambda x: hasattr(x, 'param') and x.param==True\
    and 'BigFrameLevel' in x.name)
other_params = [p for p in all_params if p not in ip_params]
all_params = ip_params + other_params
lib.print_params_info(ip_params, path=FOLDER_PREFIX)
lib.print_params_info(other_params, path=FOLDER_PREFIX)
lib.print_params_info(all_params, path=FOLDER_PREFIX)

ip_grads = T.grad(ip_cost, wrt=ip_params, disconnected_inputs='warn')
ip_grads = [
    T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in ip_grads
]

other_grads = T.grad(cost, wrt=other_params, disconnected_inputs='warn')
other_grads = [
    T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP))
    for g in other_grads
]

grads = T.grad(cost, wrt=all_params, disconnected_inputs='warn')
Beispiel #5
0
# all zero for some of the shorter files in mini-batch.
#cost = cost.sum(axis=1) / target_mask.sum(axis=1)
#cost = cost.mean(axis=0)

# Use this one instead.
cost = cost.sum()
cost = cost / target_mask.sum()

# By default we report cross-entropy cost in bits.
# Switch to nats by commenting out this line:
# log_2(e) = 1.44269504089
cost = cost * lib.floatX(numpy.log2(numpy.e))

### Getting the params, grads, updates, and Theano functions ###
params = lib.get_params(cost, lambda x: hasattr(x, 'param') and x.param==True)
lib.print_params_info(params, path=FOLDER_PREFIX)

grads = T.grad(cost, wrt=params, disconnected_inputs='warn')
grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in grads]

updates = lasagne.updates.adam(grads, params, learning_rate=LEARNING_RATE)

# Training function
train_fn = theano.function(
    [sequences, mask],
    cost,
    updates=updates,
    on_unused_input='warn'
)

# Validation and Test function
Beispiel #6
0
#prev_samples = prev_samples.reshape((BATCH_SIZE,SEQ_LEN, FRAME_SIZE))

encoder_outputs, new_h0 = encoder(input_sequences, h0, reset)

#decoder_outputs = decoder(encoder_outputs,prev_samples)

cost = T.nnet.categorical_crossentropy(
    T.nnet.softmax(encoder_outputs),
    target_sequences.flatten()
).mean()

cost = cost * lib.floatX(1.44269504089)

params = lib.search(cost, lambda x: hasattr(x, 'param'))

lib.print_params_info(cost, params)

grads = T.grad(cost, wrt=params, disconnected_inputs='warn')

grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in grads]

print "Gradients Computed"

updates = lasagne.updates.adam(grads, params, learning_rate = lr)

train_fn = theano.function(
    [sequences, h0, reset,lr],
    [cost, new_h0],
    updates=updates,
    on_unused_input='warn'
)
Beispiel #7
0
def train_loop(inputs,
               cost,
               train_data,
               times,
               prints=None,
               inject_total_iters=False,
               test_data=None,
               callback=None,
               optimizer=lasagne.updates.adam,
               save_params=False,
               nan_guard=False):

    params = lib.search(cost, lambda x: hasattr(x, 'param'))
    lib.print_params_info(params)

    grads = T.grad(cost, wrt=params, disconnected_inputs='warn')

    grads = [T.clip(g, lib.floatX(-1), lib.floatX(1)) for g in grads]

    updates = optimizer(grads, params)

    if prints is None:
        prints = [('cost', cost)]
    else:
        prints = [('cost', cost)] + prints

    print "Compiling train function..."
    if nan_guard:
        from theano.compile.nanguardmode import NanGuardMode
        mode = NanGuardMode(nan_is_error=True,
                            inf_is_error=True,
                            big_is_error=True)
    else:
        mode = None
    train_fn = theano.function(inputs, [p[1] for p in prints],
                               updates=updates,
                               on_unused_input='warn',
                               mode=mode)

    print "Compiling eval function..."
    eval_fn = theano.function(inputs, [p[1] for p in prints],
                              on_unused_input='warn')

    print "Training!"

    total_iters = 0
    total_seconds = 0.
    last_print = 0
    last_gen = 0

    if len(times) >= 4:
        gen_every = times[3]
    else:
        gen_every = times[1]

    if len(times) >= 5:
        early_stop = times[4]
        if len(times) >= 6:
            early_stop_min = times[5]
        else:
            early_stop_min = 0
    else:
        early_stop = None
        early_stop_min = None

    best_test_cost = np.inf
    best_test_cost_iter = 0.

    all_outputs = []
    all_stats = []
    for epoch in itertools.count():

        generator = train_data()
        while True:
            try:
                inputs = generator.__next__()
            except StopIteration:
                break

            if inject_total_iters:
                inputs = [np.int32(total_iters)] + list(inputs)

            start_time = time.time()
            outputs = train_fn(*inputs)
            total_seconds += time.time() - start_time
            total_iters += 1

            all_outputs.append(outputs)

            if total_iters == 1:
                try:  # This only matters on Ishaan's computer
                    import experiment_tools
                    experiment_tools.register_crash_notifier()
                except ImportError:
                    pass

            if (times[0]=='iters' and total_iters-last_print == times[1]) or \
                (times[0]=='seconds' and total_seconds-last_print >= times[1]):

                mean_outputs = np.array(all_outputs).mean(axis=0)

                if test_data is not None:
                    if inject_total_iters:
                        test_outputs = [
                            eval_fn(np.int32(total_iters), *inputs)
                            for inputs in test_data()
                        ]
                    else:
                        test_outputs = [
                            eval_fn(*inputs) for inputs in test_data()
                        ]
                    test_mean_outputs = np.array(test_outputs).mean(axis=0)

                stats = collections.OrderedDict()
                stats['epoch'] = epoch
                stats['iters'] = total_iters
                for i, p in enumerate(prints):
                    stats['train ' + p[0]] = mean_outputs[i]
                if test_data is not None:
                    for i, p in enumerate(prints):
                        stats['test ' + p[0]] = test_mean_outputs[i]
                stats['secs'] = total_seconds
                stats['secs/iter'] = total_seconds / total_iters

                if test_data != None and (stats['test cost'] < best_test_cost
                                          or
                                          (early_stop_min != None
                                           and total_iters <= early_stop_min)):
                    best_test_cost = stats['test cost']
                    best_test_cost_iter = total_iters

                print_str = ""
                for k, v in stats.items():
                    if isinstance(v, int):
                        print_str += "{}:{}\t".format(k, v)
                    else:
                        print_str += "{}:{:.4f}\t".format(k, v)
                print print_str[:-1]  # omit the last \t

                all_stats.append(stats)

                all_outputs = []
                last_print += times[1]

            if (times[0]=='iters' and total_iters-last_gen==gen_every) or \
                (times[0]=='seconds' and total_seconds-last_gen >= gen_every):
                tag = "iters{}_time{}".format(total_iters, total_seconds)
                if callback is not None:
                    callback(tag)
                if save_params:
                    lib.save_params('params_{}.pkl'.format(tag))

                last_gen += gen_every

            if (times[0]=='iters' and total_iters == times[2]) or \
                (times[0]=='seconds' and total_seconds >= times[2]) or \
                (test_data != None and early_stop != None and total_iters > (3*early_stop) and (total_iters-best_test_cost_iter) > early_stop):

                if (test_data != None and early_stop != None and total_iters >
                    (3 * early_stop)
                        and (total_iters - best_test_cost_iter) > early_stop):
                    print "Early stop! Best test cost was {} at iter {}".format(
                        best_test_cost, best_test_cost_iter)

                print "Done!"

                try:  # This only matters on Ishaan's computer
                    import experiment_tools
                    experiment_tools.send_sms("done!")
                except ImportError:
                    pass

                return all_stats
def create_encoder_decoder():

    input_var = T.tensor3('input')
    input_var_normalised = (input_var - floatX(0.5))

    mu, log_square_sigma = Encoder(input_var_normalised)

    mu = lib.floatX(2.) * T.tanh(mu / lib.floatX(2.))

    sampled_z = gaussian_sampler(mu, log_square_sigma)

    reconstructed = Decoder(sampled_z)

    reconstruction_cost = T.nnet.binary_crossentropy(
        reconstructed.reshape((reconstructed.shape[0], -1)),
        input_var.reshape((input_var.shape[0], -1))).sum(axis=1)

    kl_cost = KL_with_standard_gaussian(mu, log_square_sigma)

    loss = T.mean(kl_cost + reconstruction_cost)

    params = lib.search(loss,
                        lambda x: hasattr(x, 'param') and x.param == True)
    lib.print_params_info(params)

    grads = T.grad(loss, wrt=params, disconnected_inputs='warn')
    grads = [T.clip(g, lib.floatX(-1.), lib.floatX(1.)) for g in grads]

    lr = T.scalar('lr')

    updates = lasagne.updates.adam(grads,
                                   params,
                                   learning_rate=lr,
                                   epsilon=EPS)

    generated_z = T.matrix('generated_z')

    generated_samples = Decoder(generated_z)

    print "Compiling functions ..."

    train_fn = theano.function(
        [input_var, lr],
        [
            loss,
            kl_cost.mean(),
            mu.min(),
            mu.max(), mu,
            sampled_z.min(),
            sampled_z.max()
        ],
        updates=updates,
        # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
    )

    reconstruct_fn = theano.function([input_var], reconstructed)

    val_fn = theano.function(
        [input_var],
        [
            loss,
            kl_cost.mean(),
            mu.min(),
            mu.max(), mu,
            sampled_z.min(),
            sampled_z.max()
        ],
        # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
    )

    generate_fn = theano.function([generated_z], generated_samples)

    encode_fn = theano.function([input_var], mu)

    return train_fn, val_fn, generate_fn, reconstruct_fn, encode_fn
Beispiel #9
0
tiled_speaker = T.tile(emb_spkr[:,None,:],[1,seq_len,1])
if ENCODING:
    emb_ctx = T.concatenate([T.nnet.relu(ConvolutionalMapper(aligned_ctx,mode='train')),tiled_speaker],-1)

predict_readout = lib.ops.RNN(
    'GRU',
    'Generator.GRU',
    emb_ctx,
    EMB_DIM+SPEAKER_DIM,
    DEC_DIM,
    OUTPUT_DIM,
    n_layers=N_RNN,
    mode='open-loop-rnn'
)

lib.print_params_info(lib._params.values())

predict_fn = theano.function(
    [spkr_ids,ctx],
    predict_readout,
    on_unused_input='warn'
)

direct_fn = theano.function(
    [spkr_ids,aligned_ctx],
    predict_readout,
    on_unused_input='warn'
)

nmt_fn = theano.function(
    [chars,chars_mask],
Beispiel #10
0
def train_loop(
    inputs,
    cost,
    train_data,
    times,
    prints=None,
    inject_total_iters=False,
    test_data=None,
    callback=None,
    optimizer=lasagne.updates.adam,
    save_params=False,
    nan_guard=False
    ):

    params = lib.search(cost, lambda x: hasattr(x, 'param'))
    lib.print_params_info(params)

    grads = T.grad(cost, wrt=params, disconnected_inputs='warn')
    grads = [T.clip(g, lib.floatX(-1), lib.floatX(1)) for g in grads]

    updates = optimizer(grads, params)

    if prints is None:
        prints = [('cost', cost)]
    else:
        prints = [('cost', cost)] + prints

    print "Compiling train function..."
    if nan_guard:
        from theano.compile.nanguardmode import NanGuardMode
        mode = NanGuardMode(
            nan_is_error=True, 
            inf_is_error=True, 
            big_is_error=True
        )
    else:
        mode = None
    train_fn = theano.function(
        inputs,
        [p[1] for p in prints],
        updates=updates,
        on_unused_input='warn',
        mode=mode
    )

    print "Compiling eval function..."
    eval_fn = theano.function(
        inputs,
        [p[1] for p in prints],
        on_unused_input='warn'
    )

    print "Training!"
    total_iters = 0
    total_seconds = 0.
    last_print = 0
    all_outputs = []
    all_stats = []
    for epoch in itertools.count():

        for inputs in train_data():

            if inject_total_iters:
                inputs = [np.int32(total_iters)] + list(inputs)

            start_time = time.time()
            outputs = train_fn(*inputs)
            total_seconds += time.time() - start_time
            total_iters += 1

            all_outputs.append(outputs)

            if total_iters == 1:
                try: # This only matters on Ishaan's computer
                    import experiment_tools
                    experiment_tools.register_crash_notifier()
                except ImportError:
                    pass

            if (times[0]=='iters' and total_iters-last_print == times[1]) or \
                (times[0]=='seconds' and total_seconds-last_print >= times[1]):

                mean_outputs = np.array(all_outputs).mean(axis=0)

                if test_data is not None:
                    if inject_total_iters:
                        test_outputs = [
                            eval_fn(np.int32(total_iters), *inputs)
                            for inputs in test_data()
                        ]
                    else:
                        test_outputs = [
                            eval_fn(*inputs) 
                            for inputs in test_data()
                        ]
                    test_mean_outputs = np.array(test_outputs).mean(axis=0)

                stats = collections.OrderedDict()
                stats['epoch'] = epoch
                stats['iters'] = total_iters
                for i,p in enumerate(prints):
                    stats['train '+p[0]] = mean_outputs[i]
                if test_data is not None:
                    for i,p in enumerate(prints):
                        stats['test '+p[0]] = test_mean_outputs[i]
                stats['secs'] = total_seconds
                stats['secs/iter'] = total_seconds / total_iters

                print_str = ""
                for k,v in stats.items():
                    if isinstance(v, int):
                        print_str += "{}:{}\t".format(k,v)
                    else:
                        print_str += "{}:{:.4f}\t".format(k,v)
                print print_str[:-1] # omit the last \t

                all_stats.append(stats)

                tag = "iters{}_time{}".format(total_iters, total_seconds)
                if callback is not None:
                    callback(tag)
                if save_params:
                    lib.save_params('params_{}.pkl'.format(tag))

                all_outputs = []
                last_print += times[1]

            if (times[0]=='iters' and total_iters == times[2]) or \
                (times[0]=='seconds' and total_seconds >= times[2]):

                print "Done!"

                try: # This only matters on Ishaan's computer
                    import experiment_tools
                    experiment_tools.send_sms("done!")
                except ImportError:
                    pass

                return all_stats
#other_params = [p for p in params if p not in ip_params]
#params = ip_params + other_params
#lib.print_params_info(params, path=FOLDER_PREFIX)
#
#grads = T.grad(cost, wrt=params, disconnected_inputs='warn')
#grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in grads]
#
#updates = lasagne.updates.adam(grads, params, learning_rate=LEARNING_RATE)

###########
all_params = lib.get_params(cost, lambda x: hasattr(x, 'param') and x.param==True)
ip_params = lib.get_params(ip_cost, lambda x: hasattr(x, 'param') and x.param==True\
    and 'BigFrameLevel' in x.name)
other_params = [p for p in all_params if p not in ip_params]
all_params = ip_params + other_params
lib.print_params_info(ip_params, path=FOLDER_PREFIX)
lib.print_params_info(other_params, path=FOLDER_PREFIX)
lib.print_params_info(all_params, path=FOLDER_PREFIX)

ip_grads = T.grad(ip_cost, wrt=ip_params, disconnected_inputs='warn')
ip_grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in ip_grads]

other_grads = T.grad(cost, wrt=other_params, disconnected_inputs='warn')
other_grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in other_grads]

grads = T.grad(cost, wrt=all_params, disconnected_inputs='warn')
grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in grads]

ip_updates = lasagne.updates.adam(ip_grads, ip_params)
other_updates = lasagne.updates.adam(other_grads, other_params)
updates = lasagne.updates.adam(grads, all_params)