def test_feedforward_theano_mix(): del_shared() minibatch_size = 100 random_state = np.random.RandomState(1999) X_sym = tensor.fmatrix() y_sym = tensor.fmatrix() l1_o = linear([X_sym], [X.shape[1]], proj_dim=20, name='l1', random_state=random_state) l1_o = .999 * l1_o y_pred = softmax([l1_o], [20], proj_dim=n_classes, name='out', random_state=random_state) cost = categorical_crossentropy(y_pred, y_sym).mean() params = list(get_params().values()) grads = theano.grad(cost, params) learning_rate = 0.001 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, y_sym], [cost], mode="FAST_COMPILE") train_itr = minibatch_iterator([X, y], minibatch_size, axis=0) valid_itr = minibatch_iterator([X, y], minibatch_size, axis=0) X_train, y_train = next(train_itr) X_valid, y_valid = next(valid_itr) fit_function(X_train, y_train) cost_function(X_valid, y_valid)
predict_function = theano.function([X_sym], [y_pred]) checkpoint_dict = create_checkpoint_dict(locals()) def error(*args): xargs = args[:-1] y = args[-1] final_args = xargs y_pred = predict_function(*final_args)[0] return 1 - np.mean( (np.argmax(y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel())) train_itr = minibatch_iterator([X, y], minibatch_size, axis=0, stop_index=60000) valid_itr = minibatch_iterator([X, y], minibatch_size, axis=0, start_index=60000) TL = TrainingLoop(fit_function, cost_function, train_itr, valid_itr, checkpoint_dict=checkpoint_dict, list_of_train_output_names=["train_cost"], valid_output_name="valid_cost", n_epochs=100, optimizer_object=opt)
params = list(get_params().values()) grads = theano.grad(cost, params) learning_rate = 0.0003 opt = adam(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym], [nll, kl, nll + kl], updates=updates) cost_function = theano.function([X_sym], [nll + kl]) encode_function = theano.function([X_sym], [code_mu, code_log_sigma]) decode_function = theano.function([samp], [out]) checkpoint_dict = create_checkpoint_dict(locals()) train_itr = minibatch_iterator([X], minibatch_size, stop_index=60000, axis=0) valid_itr = minibatch_iterator([X], minibatch_size, start_index=60000, stop_index=70000, axis=0) def train_loop(itr): X_mb = next(itr) return [fit_function(X_mb)[2]] def valid_loop(itr): X_mb = next(itr) return cost_function(X_mb)
learning_rate = 0.0001 opt = adam(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [nll, kl, nll + kl], updates=updates) cost_function = theano.function([X_sym, y_sym], [nll + kl]) predict_function = theano.function([X_sym], [y_pred]) encode_function = theano.function([X_sym], [code_mu, code_log_sigma]) decode_function = theano.function([samp, y_sym], [out]) checkpoint_dict = create_or_continue_from_checkpoint_dict(locals()) train_itr = minibatch_iterator([X, y], minibatch_size, stop_index=train_end, axis=0) valid_itr = minibatch_iterator([X, y], minibatch_size, start_index=train_end, axis=0) TL = TrainingLoop(fit_function, cost_function, train_itr, valid_itr, checkpoint_dict=checkpoint_dict, list_of_train_output_names=["nll", "kl", "lower_bound"], valid_output_name="valid_lower_bound", n_epochs=2000) epoch_results = TL.run()
from collections import OrderedDict import numpy as np random_state = np.random.RandomState(1999) graph = OrderedDict() base_string = "cat" true_strings = sorted(list(set(["".join(i) for i in [ s for s in itertools.permutations(base_string)]]))) ocr = make_ocr(true_strings) X = ocr["data"] vocab = ocr["vocabulary"] y = convert_to_one_hot(ocr["target"], n_classes=len(vocab)).astype( theano.config.floatX) minibatch_size = mbs = 2 train_itr = minibatch_iterator([X, y], minibatch_size, make_mask=True, axis=1) X_mb, X_mb_mask, y_mb, y_mb_mask = next(train_itr) train_itr.reset() valid_itr = minibatch_iterator([X, y], minibatch_size, make_mask=True, axis=1) datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask] names_list = ["X", "X_mask", "y", "y_mask"] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph, list_of_test_values=datasets_list) n_hid = 256 n_out = 8 h = location_attention_tanh_recurrent_layer( [X_sym], [y_sym], X_mask_sym, y_mask_sym, n_hid, graph, 'l1_att_rec', random_state=random_state)
full_sines = full_sines[:, :, None] return full_sines n_timesteps = 50 minibatch_size = 4 full_sines = make_sines(10 * n_timesteps, minibatch_size) all_sines = full_sines[:n_timesteps] n_full = 10 * n_timesteps X = all_sines[:-1] y = all_sines[1:] n_in = 1 n_hid = 20 n_out = 1 train_itr = minibatch_iterator([X, y], minibatch_size, axis=1) valid_itr = minibatch_iterator([X, y], minibatch_size, axis=1) h_init = np.zeros((minibatch_size, 2 * n_hid)).astype("float32") X_sym = tensor.tensor3() y_sym = tensor.tensor3() h0 = tensor.fmatrix() random_state = np.random.RandomState(1999) X_fork = lstm_fork([X_sym], [n_in], n_hid, name="h1", random_state=random_state) def step(in_t, h_tm1): h_t = lstm(in_t, h_tm1, [n_in], n_hid, name=None, random_state=random_state) return h_t
out = sigmoid_layer([l2_dec], graph, 'out', n_input, random_state=random_state) nll = binary_crossentropy(out, X_sym).mean() # log p(x) = -nll so swap sign # want to minimize cost in optimization so multiply by -1 cost = -1 * (-nll - kl) params, grads = get_params_and_grads(graph, cost) learning_rate = 0.0003 opt = adam(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym], [nll, kl, nll + kl], updates=updates) cost_function = theano.function([X_sym], [nll + kl]) encode_function = theano.function([X_sym], [code_mu, code_log_sigma]) decode_function = theano.function([samp], [out]) checkpoint_dict = create_or_continue_from_checkpoint_dict(locals()) train_itr = minibatch_iterator([X], minibatch_size, stop_index=train_end, axis=0) valid_itr = minibatch_iterator([X], minibatch_size, start_index=train_end, axis=0) TL = TrainingLoop( fit_function, cost_function, train_itr, valid_itr, checkpoint_dict=checkpoint_dict, list_of_train_output_names=["nll", "kl", "lower_bound"], valid_output_name="valid_lower_bound", n_epochs=2000) epoch_results = TL.run()