def test_tanh_rnn(): # random state so script is deterministic random_state = np.random.RandomState(1999) # home of the computational graph graph = OrderedDict() # number of hidden features n_hid = 10 # number of output_features = input_features n_out = X.shape[-1] # input (where first dimension is time) datasets_list = [X, X_mask, y, y_mask] names_list = ["X", "X_mask", "y", "y_mask"] test_values_list = [X, X_mask, y, y_mask] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph, list_of_test_values=test_values_list) # Setup weights l1 = linear_layer([X_sym], graph, 'l1_proj', n_hid, random_state) h = tanh_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec', random_state) # linear output activation y_hat = linear_layer([h], graph, 'l2_proj', n_out, random_state) # error between output and target cost = squared_error(y_hat, y_sym) cost = masked_cost(cost, y_mask_sym).mean() # Parameters of the model params, grads = get_params_and_grads(graph, cost) # Use stochastic gradient descent to optimize opt = sgd(params) learning_rate = 0.001 updates = opt.updates(params, grads, learning_rate) fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(X.shape[1]) valid_indices = np.arange(X.shape[1]) early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X, y], minibatch_size, train_indices, valid_indices, fit_function_output_names=["cost"], cost_function_output_name="valid_cost", n_epochs=1)
def test_tanh_rnn(): # random state so script is deterministic random_state = np.random.RandomState(1999) # home of the computational graph graph = OrderedDict() # number of hidden features n_hid = 10 # number of output_features = input_features n_out = X.shape[-1] # input (where first dimension is time) datasets_list = [X, X_mask, y, y_mask] names_list = ["X", "X_mask", "y", "y_mask"] test_values_list = [X, X_mask, y, y_mask] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph, list_of_test_values=test_values_list) # Setup weights l1 = linear_layer([X_sym], graph, 'l1_proj', proj_dim=n_hid, random_state=random_state) h = tanh_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec', random_state) # linear output activation y_hat = linear_layer([h], graph, 'l2_proj', proj_dim=n_out, random_state=random_state) # error between output and target cost = squared_error(y_hat, y_sym) cost = masked_cost(cost, y_mask_sym).mean() # Parameters of the model params, grads = get_params_and_grads(graph, cost) # Use stochastic gradient descent to optimize learning_rate = 0.001 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(X.shape[1]) valid_indices = np.arange(X.shape[1]) early_stopping_trainer(fit_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X, y], minibatch_size, list_of_train_output_names=["cost"], valid_output_name="valid_cost", n_epochs=1)
l1_enc = softplus_layer([X_sym], graph, 'l1_enc', n_enc_layer[0], random_state) l2_enc = softplus_layer([l1_enc], graph, 'l2_enc', n_enc_layer[1], random_state) code_mu = linear_layer([l2_enc], graph, 'code_mu', n_code, random_state) code_log_sigma = linear_layer([l2_enc], graph, 'code_log_sigma', n_code, random_state) kl = gaussian_log_kl([code_mu], [code_log_sigma], graph, 'kl').mean() samp = gaussian_log_sample_layer([code_mu], [code_log_sigma], graph, 'samp', random_state) # decode path aka p l1_dec = softplus_layer([samp], graph, 'l1_dec', n_dec_layer[0], random_state) l2_dec = softplus_layer([l1_dec], graph, 'l2_dec', n_dec_layer[1], random_state) out = linear_layer([l2_dec], graph, 'out', n_input, random_state) nll = squared_error(out, X_sym).mean() # log p(x) = -nll so swap sign # want to minimize cost in optimization so multiply by -1 cost = -1 * (-nll - kl) params, grads = get_params_and_grads(graph, cost) learning_rate = 0.0003 opt = adam(params) updates = opt.updates(params, grads, learning_rate) # Checkpointing try: checkpoint_dict = load_last_checkpoint() fit_function = checkpoint_dict["fit_function"] cost_function = checkpoint_dict["cost_function"] encode_function = checkpoint_dict["encode_function"]
def test_squared_error(): graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) cost = squared_error(.5 * X_sym, X_sym) theano.function([X_sym], cost, mode="FAST_COMPILE")
def test_squared_error(): cost = squared_error(.5 * X_sym, X_sym) theano.function([X_sym], cost, mode="FAST_COMPILE")