Beispiel #1
0

h, _ = theano.scan(step, sequences=[in_fork], outputs_info=[h0])

h_o = slice_state(h, n_hid)

y_pred = softmax([h_o], [n_hid], n_classes, name="h2", random_state=random_state)
loss = categorical_crossentropy(y_pred, y_sym)
cost = loss.mean(axis=1).sum(axis=0)

params = list(get_params().values())
params = params
grads = tensor.grad(cost, params)

learning_rate = 0.0001
opt = adam(params, learning_rate)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym, h0], [cost, h], updates=updates)
cost_function = theano.function([X_sym, y_sym, h0], [cost, h])
predict_function = theano.function([X_sym, h0], [y_pred, h])


def train_loop(itr):
    mb = next(itr)
    X_mb, y_mb = mb[:-1], mb[1:]
    cost, h = fit_function(X_mb, y_mb, train_h_init)
    train_h_init[:] = h[-1, :]
    return [cost]

Beispiel #2
0
samp = gaussian_log_sample_layer([code_mu], [code_log_sigma], graph, 'samp',
                                 random_state)

# decode path aka p
l1_dec = softplus_layer([samp], graph, 'l1_dec',  n_dec_layer[0], random_state)
l2_dec = softplus_layer([l1_dec], graph, 'l2_dec', n_dec_layer[1], random_state)
out = linear_layer([l2_dec], graph, 'out', n_input, random_state)

nll = squared_error(out, X_sym).mean()
# log p(x) = -nll so swap sign
# want to minimize cost in optimization so multiply by -1
cost = -1 * (-nll - kl)
params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.0003
opt = adam(params)
updates = opt.updates(params, grads, learning_rate)

# Checkpointing
try:
    checkpoint_dict = load_last_checkpoint()
    fit_function = checkpoint_dict["fit_function"]
    cost_function = checkpoint_dict["cost_function"]
    encode_function = checkpoint_dict["encode_function"]
    decode_function = checkpoint_dict["decode_function"]
    previous_epoch_results = checkpoint_dict["previous_epoch_results"]
except KeyError:
    fit_function = theano.function([X_sym], [nll, kl, nll + kl],
                                   updates=updates)
    cost_function = theano.function([X_sym], [nll + kl])
    encode_function = theano.function([X_sym], [code_mu, code_log_sigma])
Beispiel #3
0
out = sigmoid_layer([l3_dec], graph, 'out', n_input, random_state=random_state)

nll = binary_crossentropy(out, X_sym).mean()
# log p(x) = -nll so swap sign
# want to minimize cost in optimization so multiply by -1
base_cost = -1 * (-nll - kl)

# -log q(y | x) is negative log likelihood already
alpha = 0.1
err = categorical_crossentropy(y_pred, y_sym).mean()
cost = base_cost + alpha * err

params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.0001
opt = adam(params, learning_rate)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [nll, kl, nll + kl],
                               updates=updates)
cost_function = theano.function([X_sym, y_sym], [nll + kl])
predict_function = theano.function([X_sym], [y_pred])
encode_function = theano.function([X_sym], [code_mu, code_log_sigma])
decode_function = theano.function([samp, y_sym], [out])

checkpoint_dict = create_or_continue_from_checkpoint_dict(locals())

train_itr = minibatch_iterator([X, y],
                               minibatch_size,
                               stop_index=train_end,
                               axis=0)