def test_vae(): minibatch_size = 100 random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) l1_enc = relu_layer([X_sym, y_sym], graph, 'l1_enc', proj_dim=20, random_state=random_state) mu = linear_layer([l1_enc], graph, 'mu', proj_dim=10, random_state=random_state) log_sigma = linear_layer([l1_enc], graph, 'log_sigma', proj_dim=10, random_state=random_state) samp = gaussian_log_sample_layer([mu], [log_sigma], graph, 'gaussian_log_sample', random_state=random_state) l1_dec = relu_layer([samp], graph, 'l1_dec', proj_dim=20, random_state=random_state) out = sigmoid_layer([l1_dec], graph, 'out', proj_dim=X.shape[1], random_state=random_state) kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean() cost = binary_crossentropy(out, X_sym).mean() + kl params, grads = get_params_and_grads(graph, cost) learning_rate = 0.001 opt = sgd(params) updates = opt.updates(params, grads, learning_rate) train_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE") iterate_function(train_function, [X, y], minibatch_size, list_of_output_names=["cost"], n_epochs=1)
def test_gaussian_kl(): graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) fake_sigma = (.5 * X_sym + .001) ** 2 kl = gaussian_kl([X_sym, X_sym], [fake_sigma, fake_sigma], graph, 'gaussian_kl') theano.function([X_sym], [kl], mode="FAST_COMPILE")
def test_feedforward_theano_mix(): minibatch_size = 100 random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) l1_o = linear_layer([X_sym], graph, 'l1', proj_dim=20, random_state=random_state) l1_o = .999 * l1_o y_pred = softmax_layer([l1_o], graph, 'pred', n_classes, random_state=random_state) cost = categorical_crossentropy(y_pred, y_sym).mean() params, grads = get_params_and_grads(graph, cost) learning_rate = 0.001 opt = sgd(params) updates = opt.updates(params, grads, learning_rate) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, y_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(len(X)) valid_indices = np.arange(len(X)) early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X, y], minibatch_size, train_indices, valid_indices, fit_function_output_names=["cost"], cost_function_output_name="valid_cost", n_epochs=1)
def test_batch_normalization(): random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph, list_of_test_values=[X, y]) on_off = tensor.iscalar() on_off.tag.test_value = 1 l1 = relu_layer([X_sym], graph, "proj", proj_dim=5, batch_normalize=True, mode_switch=on_off, random_state=random_state) l2 = relu_layer([l1], graph, "proj2", proj_dim=5, batch_normalize=True, mode_switch=on_off, random_state=random_state) f = theano.function([X_sym, on_off], [l2], mode="FAST_COMPILE") params, grads = get_params_and_grads(graph, l2.mean()) opt = sgd(params, .1) updates = opt.updates(params, grads) train_f = theano.function([X_sym, on_off], [l2], mode="FAST_COMPILE", updates=updates) valid_f = theano.function([X_sym, on_off], [l2], mode="FAST_COMPILE") X1 = random_state.rand(*X.shape) X2 = np.vstack([X1, .5 * X1]) t1 = train_f(X1, 0)[0] t2 = valid_f(X1, 1)[0] t3 = train_f(X2, 0)[0] t4 = valid_f(X1, 1)[0] t5 = valid_f(X1, 1)[0] assert_almost_equal(t4, t5) assert_raises(AssertionError, assert_almost_equal, t2, t4)
def test_tanh_rnn(): # random state so script is deterministic random_state = np.random.RandomState(1999) # home of the computational graph graph = OrderedDict() # number of hidden features n_hid = 10 # number of output_features = input_features n_out = X.shape[-1] # input (where first dimension is time) datasets_list = [X, X_mask, y, y_mask] names_list = ["X", "X_mask", "y", "y_mask"] test_values_list = [X, X_mask, y, y_mask] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph, list_of_test_values=test_values_list) # Setup weights l1 = linear_layer([X_sym], graph, 'l1_proj', n_hid, random_state) h = tanh_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec', random_state) # linear output activation y_hat = linear_layer([h], graph, 'l2_proj', n_out, random_state) # error between output and target cost = squared_error(y_hat, y_sym) cost = masked_cost(cost, y_mask_sym).mean() # Parameters of the model params, grads = get_params_and_grads(graph, cost) # Use stochastic gradient descent to optimize opt = sgd(params) learning_rate = 0.001 updates = opt.updates(params, grads, learning_rate) fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(X.shape[1]) valid_indices = np.arange(X.shape[1]) early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X, y], minibatch_size, train_indices, valid_indices, fit_function_output_names=["cost"], cost_function_output_name="valid_cost", n_epochs=1)
def test_conditional_gru_recurrent(): random_state = np.random.RandomState(1999) graph = OrderedDict() n_hid = 5 n_out = n_chars # input (where first dimension is time) datasets_list = [X_mb, X_mask, y_mb, y_mask] names_list = ["X", "X_mask", "y", "y_mask"] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph) h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, 'l1_end', random_state) shifted_y_sym = shift_layer([y_sym], graph, 'shift') h_dec, context = conditional_gru_recurrent_layer([y_sym], [h], y_mask_sym, n_hid, graph, 'l2_dec', random_state) # linear output activation y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj', n_out, random_state=random_state) # error between output and target cost = categorical_crossentropy(y_hat, y_sym) cost = masked_cost(cost, y_mask_sym).mean() # Parameters of the model """ params, grads = get_params_and_grads(graph, cost) # Use stochastic gradient descent to optimize opt = sgd(params) learning_rate = 0.00000 updates = opt.updates(params, grads, learning_rate) fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], updates=updates, mode="FAST_COMPILE") """ cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(len(X)) valid_indices = np.arange(len(X)) early_stopping_trainer(cost_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X, y], minibatch_size, list_of_minibatch_functions=[text_minibatch_func], list_of_train_output_names=["cost"], valid_output_name="valid_cost", n_epochs=1)
def test_conditional_gru_recurrent(): random_state = np.random.RandomState(1999) graph = OrderedDict() n_hid = 5 n_out = n_chars # input (where first dimension is time) datasets_list = [X_mb, X_mask, y_mb, y_mask] names_list = ["X", "X_mask", "y", "y_mask"] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph) h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, 'l1_end', random_state) shifted_y_sym = shift_layer([y_sym], graph, 'shift') h_dec, context = conditional_gru_recurrent_layer([y_sym], [h], y_mask_sym, n_hid, graph, 'l2_dec', random_state) # linear output activation y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj', n_out, random_state) # error between output and target cost = categorical_crossentropy(y_hat, y_sym) cost = masked_cost(cost, y_mask_sym).mean() # Parameters of the model """ params, grads = get_params_and_grads(graph, cost) # Use stochastic gradient descent to optimize opt = sgd(params) learning_rate = 0.00000 updates = opt.updates(params, grads, learning_rate) fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], updates=updates, mode="FAST_COMPILE") """ cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(len(X)) valid_indices = np.arange(len(X)) early_stopping_trainer(cost_function, cost_function, checkpoint_dict, [X, y], minibatch_size, train_indices, valid_indices, list_of_minibatch_functions=[text_minibatch_func], fit_function_output_names=["cost"], cost_function_output_name="valid_cost", n_epochs=1)
def test_tanh_rnn(): # random state so script is deterministic random_state = np.random.RandomState(1999) # home of the computational graph graph = OrderedDict() # number of hidden features n_hid = 10 # number of output_features = input_features n_out = X.shape[-1] # input (where first dimension is time) datasets_list = [X, X_mask, y, y_mask] names_list = ["X", "X_mask", "y", "y_mask"] test_values_list = [X, X_mask, y, y_mask] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph, list_of_test_values=test_values_list) # Setup weights l1 = linear_layer([X_sym], graph, 'l1_proj', proj_dim=n_hid, random_state=random_state) h = tanh_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec', random_state) # linear output activation y_hat = linear_layer([h], graph, 'l2_proj', proj_dim=n_out, random_state=random_state) # error between output and target cost = squared_error(y_hat, y_sym) cost = masked_cost(cost, y_mask_sym).mean() # Parameters of the model params, grads = get_params_and_grads(graph, cost) # Use stochastic gradient descent to optimize learning_rate = 0.001 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(X.shape[1]) valid_indices = np.arange(X.shape[1]) early_stopping_trainer(fit_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X, y], minibatch_size, list_of_train_output_names=["cost"], valid_output_name="valid_cost", n_epochs=1)
def test_conv2d_layer(): random_state = np.random.RandomState(42) graph = OrderedDict() # 3 channel mnist X_r = np.random.randn(10, 3, 28, 28).astype(theano.config.floatX) X_sym = add_datasets_to_graph([X_r], ["X"], graph) l1 = conv2d_layer([X_sym], graph, 'l1', 5, random_state=random_state) # test that they can stack as well l2 = conv2d_layer([l1], graph, 'l2', 6, random_state=random_state) f = theano.function([X_sym], [l1, l2], mode="FAST_COMPILE") l1, l2 = f(X_r)
def test_softmax_sample_layer(): random_state = np.random.RandomState(42) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) softmax = softmax_layer([X_sym], graph, 'softmax', proj_dim=20, random_state=random_state) samp = softmax_sample_layer([softmax], graph, 'softmax_sample', random_state=random_state) out = linear_layer([samp], graph, 'out', proj_dim=10, random_state=random_state) f = theano.function([X_sym], [out], mode="FAST_COMPILE")
def test_gaussian_sample_layer(): random_state = np.random.RandomState(42) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) mu = linear_layer([X_sym], graph, 'mu', proj_dim=20, random_state=random_state) sigma = softplus_layer([X_sym], graph, 'sigma', proj_dim=20, random_state=random_state) samp = gaussian_sample_layer([mu], [sigma], graph, 'gaussian_sample', random_state=random_state) out = linear_layer([samp], graph, 'out', proj_dim=10, random_state=random_state) f = theano.function([X_sym], [out], mode="FAST_COMPILE")
def test_dropout_layer(): random_state = np.random.RandomState(42) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) on_off = tensor.iscalar() dropped = dropout_layer([X_sym], graph, 'dropout', on_off, random_state=random_state) f = theano.function([X_sym, on_off], [dropped], mode="FAST_COMPILE") drop = f(np.ones_like(X), 1)[0] full = f(np.ones_like(X), 0)[0] # Make sure drop switch works assert_almost_equal((full.sum() / 2) / drop.sum(), 1., decimal=2)
def test_maxout_layer(): random_state = np.random.RandomState(42) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) single_o = maxout_layer([X_sym], graph, 'single', proj_dim=5, random_state=random_state) concat_o = maxout_layer([X_sym, y_sym], graph, 'concat', proj_dim=5, random_state=random_state) # Check that strict mode raises an error if repeated assert_raises(AssertionError, maxout_layer, [X_sym], graph, 'concat') f = theano.function([X_sym, y_sym], [single_o, concat_o], mode="FAST_COMPILE") single, concat = f(X, y)
def test_softmax_zeros_layer(): graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) single_o = softmax_zeros_layer([X_sym], graph, 'single', proj_dim=5) concat_o = softmax_zeros_layer([X_sym, y_sym], graph, 'concat', proj_dim=5) # Check that things can be reused repeated_o = softmax_layer([X_sym], graph, 'single', strict=False) # Check that strict mode raises an error if repeated assert_raises(AttributeError, softmax_layer, [X_sym], graph, 'concat') f = theano.function([X_sym, y_sym], [single_o, concat_o, repeated_o], mode="FAST_COMPILE") single, concat, repeat = f(X, y) assert_almost_equal(single, repeat)
def test_dropout_layer(): random_state = np.random.RandomState(42) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) on_off = tensor.iscalar() dropped = dropout_layer([X_sym], graph, 'dropout', on_off, random_state=random_state) f = theano.function([X_sym, on_off], [dropped], mode="FAST_COMPILE") drop = f(np.ones_like(X), 1)[0] full = f(np.ones_like(X), 0)[0] # Make sure drop switch works assert_almost_equal((full.sum() / 2) / drop.sum(), 1., decimal=2)
def run_common_layer(layer): random_state = np.random.RandomState(42) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) single_o = layer([X_sym], graph, 'single', proj_dim=5, random_state=random_state) concat_o = layer([X_sym, y_sym], graph, 'concat', proj_dim=5, random_state=random_state) # Check that things can be reused repeated_o = layer([X_sym], graph, 'single', strict=False) # Check that strict mode raises an error if repeated assert_raises(AttributeError, layer, [X_sym], graph, 'concat') f = theano.function([X_sym, y_sym], [single_o, concat_o, repeated_o], mode="FAST_COMPILE") single, concat, repeat = f(X, y) assert_almost_equal(single, repeat)
def test_vae(): minibatch_size = 10 random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) l1_enc = softplus_layer([X_sym], graph, 'l1_enc', proj_dim=100, random_state=random_state) mu = linear_layer([l1_enc], graph, 'mu', proj_dim=50, random_state=random_state) log_sigma = linear_layer([l1_enc], graph, 'log_sigma', proj_dim=50, random_state=random_state) samp = gaussian_log_sample_layer([mu], [log_sigma], graph, 'gaussian_log_sample', random_state=random_state) l1_dec = softplus_layer([samp], graph, 'l1_dec', proj_dim=100, random_state=random_state) out = sigmoid_layer([l1_dec], graph, 'out', proj_dim=X.shape[1], random_state=random_state) kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean() cost = binary_crossentropy(out, X_sym).mean() + kl params, grads = get_params_and_grads(graph, cost) learning_rate = 0.00000 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(len(X)) valid_indices = np.arange(len(X)) early_stopping_trainer(fit_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X], minibatch_size, list_of_train_output_names=["cost"], valid_output_name="valid_cost", n_epochs=1)
def test_feedforward_classifier(): minibatch_size = 100 random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) l1_o = linear_layer([X_sym], graph, "l1", proj_dim=20, random_state=random_state) y_pred = softmax_layer([l1_o], graph, "pred", n_classes, random_state=random_state) cost = categorical_crossentropy(y_pred, y_sym).mean() params, grads = get_params_and_grads(graph, cost) learning_rate = 0.001 opt = sgd(params) updates = opt.updates(params, grads, learning_rate) train_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE") iterate_function(train_function, [X, y], minibatch_size, list_of_output_names=["cost"], n_epochs=1)
def test_rnn_correlated_mixture_density(): # graph holds information necessary to build layers from parents random_state = np.random.RandomState(1999) graph = OrderedDict() minibatch_size = 5 X_seq = np.array([bernoulli_X for i in range(minibatch_size)]) y_seq = np.array([bernoulli_y for i in range(minibatch_size)]) X_mb, X_mb_mask = make_masked_minibatch(X_seq, slice(0, minibatch_size)) y_mb, y_mb_mask = make_masked_minibatch(y_seq, slice(0, minibatch_size)) datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask] names_list = ["X", "X_mask", "y", "y_mask"] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph) n_hid = 5 train_indices = np.arange(len(X_seq)) valid_indices = np.arange(len(X_seq)) l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) h = gru_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec', random_state=random_state) rval = bernoulli_and_correlated_log_gaussian_mixture_layer( [h], graph, 'hw', proj_dim=2, n_components=3, random_state=random_state) binary, coeffs, mus, log_sigmas, corr = rval cost = bernoulli_and_correlated_log_gaussian_mixture_cost( binary, coeffs, mus, log_sigmas, corr, y_sym) cost = masked_cost(cost, y_mask_sym).mean() cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = create_checkpoint_dict(locals()) epoch_results = fixed_n_epochs_trainer( cost_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X_seq, y_seq], minibatch_size, list_of_minibatch_functions=[make_masked_minibatch, make_masked_minibatch], list_of_train_output_names=["train_cost"], valid_output_name="valid_cost", n_epochs=1)
def test_softmax_sample_layer(): random_state = np.random.RandomState(42) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) softmax = softmax_layer([X_sym], graph, 'softmax', proj_dim=20, random_state=random_state) samp = softmax_sample_layer([softmax], graph, 'softmax_sample', random_state=random_state) out = linear_layer([samp], graph, 'out', proj_dim=10, random_state=random_state) f = theano.function([X_sym], [out], mode="FAST_COMPILE")
def test_fixed_projection_layer(): random_state = np.random.RandomState(1999) rand_projection = random_state.randn(64, 12) graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) out = fixed_projection_layer([X_sym], rand_projection, graph, 'proj') out2 = fixed_projection_layer([X_sym], rand_projection, graph, 'proj', pre=rand_projection[:, 0]) out3 = fixed_projection_layer([X_sym], rand_projection, graph, 'proj', post=rand_projection[0]) final = linear_layer([out2], graph, 'linear', 17, random_state=random_state) # Test that it compiles with and without bias f = theano.function([X_sym], [out, out2, out3, final], mode="FAST_COMPILE") # Test updates params, grads = get_params_and_grads(graph, final.mean()) opt = sgd(params) updates = opt.updates(params, grads, .1) f2 = theano.function([X_sym], [out2, final], updates=updates) ret = f(np.ones_like(X))[0] assert ret.shape[1] != X.shape[1] ret2 = f(np.ones_like(X))[1] assert ret.shape[1] != X.shape[1] out1, final1 = f2(X) out2, final2 = f2(X) # Make sure fixed basis is unchanged assert_almost_equal(out1, out2) # Make sure linear layer is updated assert_raises(AssertionError, assert_almost_equal, final1, final2)
def test_correlated_mixture_density(): # graph holds information necessary to build layers from parents random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([bernoulli_X, bernoulli_y], ["X", "y"], graph) n_hid = 20 minibatch_size = len(bernoulli_X) train_indices = np.arange(len(bernoulli_X)) valid_indices = np.arange(len(bernoulli_X)) l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) rval = bernoulli_and_correlated_log_gaussian_mixture_layer( [l1], graph, 'hw', proj_dim=2, n_components=3, random_state=random_state) binary, coeffs, mus, log_sigmas, corr = rval cost = bernoulli_and_correlated_log_gaussian_mixture_cost( binary, coeffs, mus, log_sigmas, corr, y_sym).mean() params, grads = get_params_and_grads(graph, cost) learning_rate = 1E-6 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, y_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = create_checkpoint_dict(locals()) epoch_results = fixed_n_epochs_trainer( fit_function, cost_function, train_indices, valid_indices, checkpoint_dict, [bernoulli_X, bernoulli_y], minibatch_size, list_of_train_output_names=["train_cost"], valid_output_name="valid_cost", n_epochs=1)
def test_fixed_projection_layer(): random_state = np.random.RandomState(1999) rand_projection = random_state.randn(64, 12) graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) out = fixed_projection_layer([X_sym], rand_projection, graph, 'proj') out2 = fixed_projection_layer([X_sym], rand_projection, graph, 'proj', pre=rand_projection[:, 0]) out3 = fixed_projection_layer([X_sym], rand_projection, graph, 'proj', post=rand_projection[0]) final = linear_layer([out2], graph, 'linear', 17, random_state=random_state) # Test that it compiles with and without bias f = theano.function([X_sym], [out, out2, out3, final], mode="FAST_COMPILE") # Test updates params, grads = get_params_and_grads( graph, final.mean()) opt = sgd(params, .1) updates = opt.updates(params, grads) f2 = theano.function([X_sym], [out2, final], updates=updates) ret = f(np.ones_like(X))[0] assert ret.shape[1] != X.shape[1] ret2 = f(np.ones_like(X))[1] assert ret.shape[1] != X.shape[1] out1, final1 = f2(X) out2, final2 = f2(X) # Make sure fixed basis is unchanged assert_almost_equal(out1, out2) # Make sure linear layer is updated assert_raises(AssertionError, assert_almost_equal, final1, final2)
def run_common_layer(layer): random_state = np.random.RandomState(42) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) single_o = layer([X_sym], graph, 'single', proj_dim=5, random_state=random_state) concat_o = layer([X_sym, y_sym], graph, 'concat', proj_dim=5, random_state=random_state) # Check that things can be reused repeated_o = layer([X_sym], graph, 'single', strict=False) # Check that strict mode raises an error if repeated assert_raises(AttributeError, layer, [X_sym], graph, 'concat') f = theano.function([X_sym, y_sym], [single_o, concat_o, repeated_o], mode="FAST_COMPILE") single, concat, repeat = f(X, y) assert_almost_equal(single, repeat)
def test_gaussian_log_sample_layer(): random_state = np.random.RandomState(42) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) mu = linear_layer([X_sym], graph, 'mu', proj_dim=20, random_state=random_state) log_sigma = linear_layer([X_sym], graph, 'log_sigma', proj_dim=20, random_state=random_state) samp = gaussian_log_sample_layer([mu], [log_sigma], graph, 'gaussian_sample', random_state=random_state) out = linear_layer([samp], graph, 'out', proj_dim=10, random_state=random_state) f = theano.function([X_sym], [out], mode="FAST_COMPILE")
minibatch_size = 32 n_emb = 50 n_hid = 100 X_story_mb, X_story_mask = make_embedding_minibatch( X_story, slice(0, minibatch_size)) X_query_mb, X_query_mask = make_embedding_minibatch( X_query, slice(0, minibatch_size)) embedding_datasets = [X_story_mb, X_query_mb] masks = [X_story_mask, X_query_mask] r = add_embedding_datasets_to_graph(embedding_datasets, masks, "babi_data", graph) (X_story_syms, X_query_syms), (X_story_mask_sym, X_query_mask_sym) = r y_sym = add_datasets_to_graph([y_answer], ["y"], graph) l1_story = embedding_layer(X_story_syms, vocab_size, n_emb, graph, 'l1_story', random_state=random_state) masked_story = X_story_mask_sym.dimshuffle(0, 1, 'x') * l1_story h_story = gru_recurrent_layer([masked_story], X_story_mask_sym, n_hid, graph, 'story_rec', random_state) l1_query = embedding_layer(X_query_syms, vocab_size, n_emb, graph, 'l1_query', random_state) h_query = gru_recurrent_layer([l1_query], X_query_mask_sym, n_hid, graph, 'query_rec', random_state) y_pred = softmax_layer([h_query[-1], h_story[-1]], graph, 'y_pred', y_answer.shape[1], random_state=random_state) cost = categorical_crossentropy(y_pred, y_sym).mean()
minibatch_size = 32 n_emb = 50 n_hid = 100 X_story_mb, X_story_mask = make_embedding_minibatch( X_story, slice(0, minibatch_size)) X_query_mb, X_query_mask = make_embedding_minibatch( X_query, slice(0, minibatch_size)) embedding_datasets = [X_story_mb, X_query_mb] masks = [X_story_mask, X_query_mask] r = add_embedding_datasets_to_graph(embedding_datasets, masks, "babi_data", graph) (X_story_syms, X_query_syms), (X_story_mask_sym, X_query_mask_sym) = r y_sym = add_datasets_to_graph([y_answer], ["y"], graph) l1_story = embedding_layer(X_story_syms, vocab_size, n_emb, graph, 'l1_story', random_state) masked_story = X_story_mask_sym.dimshuffle(0, 1, 'x') * l1_story h_story = gru_recurrent_layer([masked_story], X_story_mask_sym, n_hid, graph, 'story_rec', random_state) l1_query = embedding_layer(X_query_syms, vocab_size, n_emb, graph, 'l1_query', random_state) h_query = gru_recurrent_layer([l1_query], X_query_mask_sym, n_hid, graph, 'query_rec', random_state) y_pred = softmax_layer([h_query[-1], h_story[-1]], graph, 'y_pred', y_answer.shape[1], random_state) cost = categorical_crossentropy(y_pred, y_sym).mean()
from dagbldr.utils import TrainingLoop from dagbldr.nodes import tanh_layer, softmax_zeros_layer from dagbldr.nodes import categorical_crossentropy mnist = fetch_mnist() train_indices = mnist["train_indices"] valid_indices = mnist["valid_indices"] X = mnist["data"] y = mnist["target"] n_targets = 10 y = convert_to_one_hot(y, n_targets) # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph, list_of_test_values=[X[:10], y[:10]]) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 128 n_hid = 1000 on_off = tensor.iscalar() on_off.tag.test_value = 0 l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, batch_normalize=True, mode_switch=on_off,
from dagbldr.nodes import gaussian_log_sample_layer, gaussian_log_kl from dagbldr.nodes import squared_error fer = fetch_fer() data = fer["data"] mean_norm = fer["mean0"] train_indices = fer["train_indices"] valid_indices = fer["valid_indices"] X = data - mean_norm pca_tf = fer["pca_matrix"] X = np.dot(X, pca_tf.T) # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 100 n_code = 400 n_enc_layer = [600, 600] n_dec_layer = [600, 600] width = 48 height = 48 n_input = width * height # encode path aka q l1_enc = softplus_layer([X_sym], graph, 'l1_enc', n_enc_layer[0], random_state) l2_enc = softplus_layer([l1_enc], graph, 'l2_enc', n_enc_layer[1], random_state)
base_string = "cat" true_strings = sorted(list(set(["".join(i) for i in [ s for s in itertools.permutations(base_string)]]))) ocr = make_ocr(true_strings) X = ocr["data"] vocab = ocr["vocabulary"] y = convert_to_one_hot(ocr["target"], n_classes=len(vocab)).astype( theano.config.floatX) minibatch_size = mbs = 2 train_itr = minibatch_iterator([X, y], minibatch_size, make_mask=True, axis=1) X_mb, X_mb_mask, y_mb, y_mb_mask = next(train_itr) train_itr.reset() valid_itr = minibatch_iterator([X, y], minibatch_size, make_mask=True, axis=1) datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask] names_list = ["X", "X_mask", "y", "y_mask"] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph, list_of_test_values=datasets_list) n_hid = 256 n_out = 8 h = location_attention_tanh_recurrent_layer( [X_sym], [y_sym], X_mask_sym, y_mask_sym, n_hid, graph, 'l1_att_rec', random_state=random_state) X_hat = sigmoid_layer([h], graph, 'output', proj_dim=n_out, random_state=random_state) cost = binary_crossentropy(X_hat, X_sym).mean() cost = masked_cost(cost, X_mask_sym).mean() params, grads = get_params_and_grads(graph, cost) opt = adadelta(params) updates = opt.updates(params, grads)
def test_gaussian_log_kl(): graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) kl = gaussian_log_kl([X_sym, X_sym], [X_sym, X_sym], graph, 'gaussian_log_kl') theano.function([X_sym], [kl], mode="FAST_COMPILE")
from dagbldr.utils import early_stopping_trainer from dagbldr.nodes import relu_layer, softmax_zeros_layer from dagbldr.nodes import categorical_crossentropy mnist = fetch_mnist() train_indices = mnist["train_indices"] valid_indices = mnist["valid_indices"] X = mnist["data"] y = mnist["target"] n_targets = 10 y = convert_to_one_hot(y, n_targets) # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph, list_of_test_values=[X[:10], y[:10]]) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 128 n_hid = 1000 on_off = tensor.iscalar() on_off.tag.test_value = 0 l1 = relu_layer([X_sym], graph, 'l1', proj_dim=n_hid, batch_normalize=True, mode_switch=on_off, random_state=random_state) y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) L2 = sum([(w ** 2).sum() for w in weights])
sine_x, sine_y = make_noisy_sinusoid(n_samples=10000) # Swap X and Y to create a one to many relationship sine_x, sine_y = sine_y, sine_x # Make 1 minibatch with feature dimension 1 sine_x = sine_x[:, None] sine_y = sine_y[:, None] X = sine_x y = sine_y # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph, list_of_test_values=[sine_x, sine_y]) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = len(sine_y) / 20 n_hid = 20 n_out = 1 l1 = tanh_layer([X_sym], graph, "l1", proj_dim=n_hid, random_state=random_state) coeffs, mus, log_sigmas = log_gaussian_mixture_layer( [l1], graph, "mdn", proj_dim=1, n_components=24, random_state=random_state ) cost = log_gaussian_mixture_cost(coeffs, mus, log_sigmas, y_sym).mean() params, grads = get_params_and_grads(graph, cost) opt = adadelta(params)
def test_log_gaussian_error(): graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) cost = log_gaussian_error(.5 * X_sym, .5 * X_sym, X_sym) theano.function([X_sym], cost, mode="FAST_COMPILE")
def test_categorical_crossentropy(): graph = OrderedDict() y_sym = add_datasets_to_graph([y], ["y"], graph) cost = categorical_crossentropy(.99 * y_sym + .001, y_sym) theano.function([y_sym], cost, mode="FAST_COMPILE")
def test_binary_entropy(): graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) cost = binary_entropy(X_sym) theano.function([X_sym], cost, mode="FAST_COMPILE")
sine_x, sine_y = make_noisy_sinusoid(n_samples=10000) # Swap X and Y to create a one to many relationship sine_x, sine_y = sine_y, sine_x # Make 1 minibatch with feature dimension 1 sine_x = sine_x[:, None] sine_y = sine_y[:, None] X = sine_x y = sine_y # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph, list_of_test_values=[sine_x, sine_y]) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = len(sine_y) / 20 n_hid = 20 n_out = 1 l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) coeffs, mus, log_sigmas = log_gaussian_mixture_layer([l1], graph,
from dagbldr.utils import convert_to_one_hot, early_stopping_trainer from dagbldr.nodes import conv2d_layer, pool2d_layer from dagbldr.nodes import softmax_layer, categorical_crossentropy mnist = fetch_mnist() train_indices = mnist["train_indices"] valid_indices = mnist["valid_indices"] X = mnist["images"] y = mnist["target"] n_targets = 10 y = convert_to_one_hot(y, n_targets) minibatch_size = 128 # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X[:minibatch_size], y[:minibatch_size]], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) l1 = conv2d_layer([X_sym], graph, 'conv1', 8, random_state=random_state) l2 = pool2d_layer([l1], graph, 'pool1') l3 = conv2d_layer([l2], graph, 'conv2', 16, random_state=random_state) l4 = pool2d_layer([l3], graph, 'pool2') l5 = l4.reshape((l4.shape[0], -1)) y_pred = softmax_layer([l5], graph, 'y_pred', n_targets, random_state=random_state) nll = categorical_crossentropy(y_pred, y_sym).mean() cost = nll
def test_masked_cost(): graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) cost = gaussian_error(.5 * X_sym, .5 * X_sym, X_sym) masked = masked_cost(X_sym, y_sym) theano.function([X_sym, y_sym], [cost, masked], mode="FAST_COMPILE")
def test_vae(): minibatch_size = 10 random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) l1_enc = softplus_layer([X_sym], graph, 'l1_enc', proj_dim=100, random_state=random_state) mu = linear_layer([l1_enc], graph, 'mu', proj_dim=50, random_state=random_state) log_sigma = linear_layer([l1_enc], graph, 'log_sigma', proj_dim=50, random_state=random_state) samp = gaussian_log_sample_layer([mu], [log_sigma], graph, 'gaussian_log_sample', random_state=random_state) l1_dec = softplus_layer([samp], graph, 'l1_dec', proj_dim=100, random_state=random_state) out = sigmoid_layer([l1_dec], graph, 'out', proj_dim=X.shape[1], random_state=random_state) kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean() cost = binary_crossentropy(out, X_sym).mean() + kl params, grads = get_params_and_grads(graph, cost) learning_rate = 0.00000 opt = sgd(params) updates = opt.updates(params, grads, learning_rate) fit_function = theano.function([X_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(len(X)) valid_indices = np.arange(len(X)) early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X], minibatch_size, train_indices, valid_indices, fit_function_output_names=["cost"], cost_function_output_name="valid_cost", n_epochs=1)
X = data["data"] y = data["target"] vocab_size = data["vocabulary_size"] vocab = data["vocabulary"] train_indices = data["train_indices"] valid_indices = train_indices X_mb, X_mb_mask = make_masked_minibatch(X, slice(0, len(X))) y_mb, y_mb_mask = make_masked_minibatch(y, slice(0, len(y))) n_hid = 256 n_out = vocab_size + 1 datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask] names_list = ["X", "X_mask", "y", "y_mask"] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph) h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, 'l1_rec', random_state=random_state) y_pred = softmax_layer([h], graph, 'l2_proj', n_out, random_state=random_state) cost = log_ctc_cost(y_sym, y_mask_sym, y_pred, X_mask_sym).mean() params, grads = get_params_and_grads(graph, cost) opt = adadelta(params) updates = opt.updates(params, grads)
X = data["data"] y = data["target"] vocab_size = data["vocabulary_size"] vocab = data["vocabulary"] train_indices = data["train_indices"] valid_indices = train_indices X_mb, X_mb_mask = make_masked_minibatch(X, slice(0, len(X))) y_mb, y_mb_mask = make_masked_minibatch(y, slice(0, len(y))) n_hid = 256 n_out = vocab_size + 1 datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask] names_list = ["X", "X_mask", "y", "y_mask"] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(datasets_list, names_list, graph) h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, "l1_rec", random_state=random_state) y_pred = softmax_layer([h], graph, "l2_proj", n_out, random_state=random_state) cost = log_ctc_cost(y_sym, y_mask_sym, y_pred, X_mask_sym).mean() params, grads = get_params_and_grads(graph, cost) opt = adadelta(params) updates = opt.updates(params, grads) checkpoint_dict = {} fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], updates=updates) cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost]) predict_function = theano.function([X_sym, X_mask_sym], [y_pred])
from dagbldr.utils import TrainingLoop from dagbldr.utils import create_checkpoint_dict from dagbldr.nodes import relu_layer, softmax_zeros_layer from dagbldr.nodes import categorical_crossentropy mnist = fetch_mnist() train_indices = mnist["train_indices"] valid_indices = mnist["valid_indices"] X = mnist["data"] y = mnist["target"] n_targets = 10 y = convert_to_one_hot(y, n_targets) # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 20 n_hid = 1000 l1 = relu_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) L2 = sum([(w**2).sum() for w in weights])
from dagbldr.utils import TrainingLoop from dagbldr.nodes import tanh_layer, softmax_zeros_layer from dagbldr.nodes import categorical_crossentropy mnist = fetch_mnist() train_indices = mnist["train_indices"] valid_indices = mnist["valid_indices"] X = mnist["data"] y = mnist["target"] n_targets = 10 y = convert_to_one_hot(y, n_targets) # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 20 n_hid = 1000 l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) L2 = sum([(w ** 2).sum() for w in weights]) cost = nll + .0001 * L2 params, grads = get_params_and_grads(graph, cost)