def test_batch_normalization(): random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph, list_of_test_values=[X, y]) on_off = tensor.iscalar() on_off.tag.test_value = 1 l1 = relu_layer([X_sym], graph, "proj", proj_dim=5, batch_normalize=True, mode_switch=on_off, random_state=random_state) l2 = relu_layer([l1], graph, "proj2", proj_dim=5, batch_normalize=True, mode_switch=on_off, random_state=random_state) f = theano.function([X_sym, on_off], [l2], mode="FAST_COMPILE") params, grads = get_params_and_grads(graph, l2.mean()) opt = sgd(params, .1) updates = opt.updates(params, grads) train_f = theano.function([X_sym, on_off], [l2], mode="FAST_COMPILE", updates=updates) valid_f = theano.function([X_sym, on_off], [l2], mode="FAST_COMPILE") X1 = random_state.rand(*X.shape) X2 = np.vstack([X1, .5 * X1]) t1 = train_f(X1, 0)[0] t2 = valid_f(X1, 1)[0] t3 = train_f(X2, 0)[0] t4 = valid_f(X1, 1)[0] t5 = valid_f(X1, 1)[0] assert_almost_equal(t4, t5) assert_raises(AssertionError, assert_almost_equal, t2, t4)
def test_vae(): minibatch_size = 100 random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) l1_enc = relu_layer([X_sym, y_sym], graph, 'l1_enc', proj_dim=20, random_state=random_state) mu = linear_layer([l1_enc], graph, 'mu', proj_dim=10, random_state=random_state) log_sigma = linear_layer([l1_enc], graph, 'log_sigma', proj_dim=10, random_state=random_state) samp = gaussian_log_sample_layer([mu], [log_sigma], graph, 'gaussian_log_sample', random_state=random_state) l1_dec = relu_layer([samp], graph, 'l1_dec', proj_dim=20, random_state=random_state) out = sigmoid_layer([l1_dec], graph, 'out', proj_dim=X.shape[1], random_state=random_state) kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean() cost = binary_crossentropy(out, X_sym).mean() + kl params, grads = get_params_and_grads(graph, cost) learning_rate = 0.001 opt = sgd(params) updates = opt.updates(params, grads, learning_rate) train_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE") iterate_function(train_function, [X, y], minibatch_size, list_of_output_names=["cost"], n_epochs=1)
y = mnist["target"] n_targets = 10 y = convert_to_one_hot(y, n_targets) # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 20 n_hid = 1000 l1 = relu_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) L2 = sum([(w**2).sum() for w in weights]) cost = nll + .0001 * L2 params, grads = get_params_and_grads(graph, cost) learning_rate = 1E-4 momentum = 0.95 opt = rmsprop(params, learning_rate, momentum) updates = opt.updates(params, grads)
valid_indices = mnist["valid_indices"] X = mnist["data"] y = mnist["target"] n_targets = 10 y = convert_to_one_hot(y, n_targets) # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 20 n_hid = 1000 l1 = relu_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) L2 = sum([(w ** 2).sum() for w in weights]) cost = nll + .0001 * L2 params, grads = get_params_and_grads(graph, cost) learning_rate = 1E-4 momentum = 0.95 opt = rmsprop(params, learning_rate, momentum) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
X = mnist["data"] y = mnist["target"] n_targets = 10 y = convert_to_one_hot(y, n_targets) # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 100 n_hid = 512 # q(y_pred | x) l1 = relu_layer([X_sym], graph, 'l1', n_hid, random_state) l2 = relu_layer([l1], graph, 'l2', n_hid, random_state) y_pred = softmax_layer([l2], graph, 'y_pred', n_targets, random_state) nll = categorical_crossentropy(y_pred, y_sym).mean() cost = nll params, grads = get_params_and_grads(graph, cost) learning_rate = 0.0002 opt = adam(params) updates = opt.updates(params, grads, learning_rate) # Checkpointing try: checkpoint_dict = load_last_checkpoint() fit_function = checkpoint_dict["fit_function"]
y = convert_to_one_hot(y, n_targets) # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph, list_of_test_values=[X[:10], y[:10]]) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 128 n_hid = 1000 on_off = tensor.iscalar() on_off.tag.test_value = 0 l1 = relu_layer([X_sym], graph, 'l1', proj_dim=n_hid, batch_normalize=True, mode_switch=on_off, random_state=random_state) y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) L2 = sum([(w ** 2).sum() for w in weights]) cost = nll + .0001 * L2 params, grads = get_params_and_grads(graph, cost) learning_rate = 0.1 momentum = 0.9 opt = sgd_nesterov(params, learning_rate, momentum) updates = opt.updates(params, grads)