def test_rnn_correlated_mixture_density(): # graph holds information necessary to build layers from parents random_state = np.random.RandomState(1999) graph = OrderedDict() minibatch_size = 5 X_seq = np.array([bernoulli_X for i in range(minibatch_size)]) y_seq = np.array([bernoulli_y for i in range(minibatch_size)]) X_mb, X_mb_mask = make_masked_minibatch(X_seq, slice(0, minibatch_size)) y_mb, y_mb_mask = make_masked_minibatch(y_seq, slice(0, minibatch_size)) datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask] names_list = ["X", "X_mask", "y", "y_mask"] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph) n_hid = 5 train_indices = np.arange(len(X_seq)) valid_indices = np.arange(len(X_seq)) l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) h = gru_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec', random_state=random_state) rval = bernoulli_and_correlated_log_gaussian_mixture_layer( [h], graph, 'hw', proj_dim=2, n_components=3, random_state=random_state) binary, coeffs, mus, log_sigmas, corr = rval cost = bernoulli_and_correlated_log_gaussian_mixture_cost( binary, coeffs, mus, log_sigmas, corr, y_sym) cost = masked_cost(cost, y_mask_sym).mean() cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = create_checkpoint_dict(locals()) epoch_results = fixed_n_epochs_trainer( cost_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X_seq, y_seq], minibatch_size, list_of_minibatch_functions=[make_masked_minibatch, make_masked_minibatch], list_of_train_output_names=["train_cost"], valid_output_name="valid_cost", n_epochs=1)
def test_correlated_mixture_density(): # graph holds information necessary to build layers from parents random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([bernoulli_X, bernoulli_y], ["X", "y"], graph) n_hid = 20 minibatch_size = len(bernoulli_X) train_indices = np.arange(len(bernoulli_X)) valid_indices = np.arange(len(bernoulli_X)) l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) rval = bernoulli_and_correlated_log_gaussian_mixture_layer( [l1], graph, 'hw', proj_dim=2, n_components=3, random_state=random_state) binary, coeffs, mus, log_sigmas, corr = rval cost = bernoulli_and_correlated_log_gaussian_mixture_cost( binary, coeffs, mus, log_sigmas, corr, y_sym).mean() params, grads = get_params_and_grads(graph, cost) learning_rate = 1E-6 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, y_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = create_checkpoint_dict(locals()) epoch_results = fixed_n_epochs_trainer( fit_function, cost_function, train_indices, valid_indices, checkpoint_dict, [bernoulli_X, bernoulli_y], minibatch_size, list_of_train_output_names=["train_cost"], valid_output_name="valid_cost", n_epochs=1)
valid_indices = mnist["valid_indices"] X = mnist["data"] y = mnist["target"] n_targets = 10 y = convert_to_one_hot(y, n_targets) # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 20 n_hid = 1000 l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) L2 = sum([(w ** 2).sum() for w in weights]) cost = nll + .0001 * L2 params, grads = get_params_and_grads(graph, cost) learning_rate = 1E-4 momentum = 0.95 opt = rmsprop(params, learning_rate, momentum) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
y = mnist["target"] n_targets = 10 y = convert_to_one_hot(y, n_targets) # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 20 n_hid = 1000 l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) L2 = sum([(w**2).sum() for w in weights]) cost = nll + .0001 * L2 params, grads = get_params_and_grads(graph, cost) learning_rate = 1E-4 momentum = 0.95 opt = rmsprop(params, learning_rate, momentum) updates = opt.updates(params, grads)
graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph, list_of_test_values=[X[:10], y[:10]]) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 128 n_hid = 1000 on_off = tensor.iscalar() on_off.tag.test_value = 0 l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, batch_normalize=True, mode_switch=on_off, random_state=random_state) y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) L2 = sum([(w**2).sum() for w in weights]) cost = nll + .0001 * L2 params, grads = get_params_and_grads(graph, cost) learning_rate = 0.1 momentum = 0.9 opt = sgd_nesterov(params, learning_rate, momentum) updates = opt.updates(params, grads)
y = convert_to_one_hot(y, n_targets) # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph, list_of_test_values=[X[:10], y[:10]]) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 128 n_hid = 1000 on_off = tensor.iscalar() on_off.tag.test_value = 0 l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, batch_normalize=True, mode_switch=on_off, random_state=random_state) y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) L2 = sum([(w ** 2).sum() for w in weights]) cost = nll + .0001 * L2 params, grads = get_params_and_grads(graph, cost) learning_rate = 0.1 momentum = 0.9 opt = sgd_nesterov(params, learning_rate, momentum) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym, on_off], [cost], updates=updates)