cost_function = theano.function([X_sym], [nll + kl]) encode_function = theano.function([X_sym], [code_mu, code_log_sigma]) decode_function = theano.function([samp], [out]) checkpoint_dict = create_checkpoint_dict(locals()) train_itr = minibatch_iterator([X], minibatch_size, stop_index=60000, axis=0) valid_itr = minibatch_iterator([X], minibatch_size, start_index=60000, stop_index=70000, axis=0) def train_loop(itr): X_mb = next(itr) return [fit_function(X_mb)[2]] def valid_loop(itr): X_mb = next(itr) return cost_function(X_mb) TL = TrainingLoop(train_loop, train_itr, valid_loop, valid_itr, n_epochs=5000, checkpoint_every_n_epochs=50, checkpoint_dict=checkpoint_dict) epoch_results = TL.run()
def error(*args): xargs = args[:-1] y = args[-1] final_args = xargs y_pred = predict_function(*final_args)[0] return 1 - np.mean( (np.argmax(y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel())) train_itr = minibatch_iterator([X, y], minibatch_size, axis=0, stop_index=60000) valid_itr = minibatch_iterator([X, y], minibatch_size, axis=0, start_index=60000) TL = TrainingLoop(fit_function, cost_function, train_itr, valid_itr, checkpoint_dict=checkpoint_dict, list_of_train_output_names=["train_cost"], valid_output_name="valid_cost", n_epochs=100, optimizer_object=opt) epoch_results = TL.run()
decode_function = theano.function([samp], [out]) checkpoint_dict = create_checkpoint_dict(locals()) train_itr = minibatch_iterator([X], minibatch_size, stop_index=60000, axis=0) valid_itr = minibatch_iterator([X], minibatch_size, start_index=60000, stop_index=70000, axis=0) def train_loop(itr): X_mb = next(itr) return [fit_function(X_mb)[2]] def valid_loop(itr): X_mb = next(itr) return cost_function(X_mb) TL = TrainingLoop(train_loop, train_itr, valid_loop, valid_itr, n_epochs=5000, checkpoint_every_n_epochs=50, checkpoint_dict=checkpoint_dict) epoch_results = TL.run()
opt = adam(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [nll, kl, nll + kl], updates=updates) cost_function = theano.function([X_sym, y_sym], [nll + kl]) predict_function = theano.function([X_sym], [y_pred]) encode_function = theano.function([X_sym], [code_mu, code_log_sigma]) decode_function = theano.function([samp, y_sym], [out]) checkpoint_dict = create_or_continue_from_checkpoint_dict(locals()) train_itr = minibatch_iterator([X, y], minibatch_size, stop_index=train_end, axis=0) valid_itr = minibatch_iterator([X, y], minibatch_size, start_index=train_end, axis=0) TL = TrainingLoop(fit_function, cost_function, train_itr, valid_itr, checkpoint_dict=checkpoint_dict, list_of_train_output_names=["nll", "kl", "lower_bound"], valid_output_name="valid_lower_bound", n_epochs=2000) epoch_results = TL.run()
opt = rmsprop(params, learning_rate, momentum) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates) cost_function = theano.function([X_sym, y_sym], [cost]) predict_function = theano.function([X_sym], [y_pred]) checkpoint_dict = create_checkpoint_dict(locals()) def error(*args): xargs = args[:-1] y = args[-1] final_args = xargs y_pred = predict_function(*final_args)[0] return 1 - np.mean( (np.argmax(y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel())) TL = TrainingLoop(fit_function, error, train_indices, valid_indices, checkpoint_dict=checkpoint_dict, minibatch_size=minibatch_size, list_of_train_output_names=["train_cost"], valid_output_name="valid_error", n_epochs=1000, optimizer_object=opt) epoch_results = TL.run([X, y])
indices = indices[not_same] s = "".join([vocab[i] for i in indices]) ctc_string = s + last_char return ctc_string, non_ctc_string def print_ctc_prediction(X_sym, X_mask_sym, y_sym, y_mask_sym): all_y_pred = predict_function(X_sym, X_mask_sym)[0] for n in range(all_y_pred.shape[1]): y_pred = all_y_pred[:, n] ctc_string, non_ctc_string = prediction_strings(y_pred) print(ctc_string) print(non_ctc_string) TL = TrainingLoop( fit_function, cost_function, train_indices, valid_indices, checkpoint_dict=checkpoint_dict, minibatch_size=len(y), monitor_function=print_ctc_prediction, list_of_minibatch_functions=[make_masked_minibatch, make_masked_minibatch], list_of_train_output_names=["cost"], valid_output_name="valid_cost", valid_frequency=100, n_epochs=1000, ) TL.run([X, y])
learning_rate = 1E-4 momentum = 0.95 opt = rmsprop(params, learning_rate, momentum) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates) cost_function = theano.function([X_sym, y_sym], [cost]) predict_function = theano.function([X_sym], [y_pred]) checkpoint_dict = create_checkpoint_dict(locals()) def error(*args): xargs = args[:-1] y = args[-1] final_args = xargs y_pred = predict_function(*final_args)[0] return 1 - np.mean((np.argmax( y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel())) TL = TrainingLoop(fit_function, error, train_indices, valid_indices, checkpoint_dict=checkpoint_dict, minibatch_size=minibatch_size, list_of_train_output_names=["train_cost"], valid_output_name="valid_error", n_epochs=1000, optimizer_object=opt) epoch_results = TL.run([X, y])
last_char = vocab[indices[-1]] indices = indices[not_same] s = "".join([vocab[i] for i in indices]) ctc_string = s + last_char return ctc_string, non_ctc_string def print_ctc_prediction(X_sym, X_mask_sym, y_sym, y_mask_sym): all_y_pred = predict_function(X_sym, X_mask_sym)[0] for n in range(all_y_pred.shape[1]): y_pred = all_y_pred[:, n] ctc_string, non_ctc_string = prediction_strings(y_pred) print(ctc_string) print(non_ctc_string) TL = TrainingLoop( fit_function, cost_function, train_indices, valid_indices, checkpoint_dict=checkpoint_dict, minibatch_size=len(y), monitor_function=print_ctc_prediction, list_of_minibatch_functions=[make_masked_minibatch, make_masked_minibatch], list_of_train_output_names=["cost"], valid_output_name="valid_cost", valid_frequency=100, n_epochs=1000) TL.run([X, y])
def test_loop(): # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 10 y_pred = softmax_zeros_layer([X_sym], graph, "y_pred", proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) cost = nll params, grads = get_params_and_grads(graph, cost) learning_rate = 0.13 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates) cost_function = theano.function([X_sym, y_sym], [cost]) predict_function = theano.function([X_sym], [y_pred]) checkpoint_dict = { "fit_function": fit_function, "cost_function": cost_function, "predict_function": predict_function, } def error(*args): xargs = args[:-1] y = args[-1] final_args = xargs y_pred = predict_function(*final_args)[0] return 1 - np.mean((np.argmax(y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel())) TL1 = TrainingLoop( fit_function, error, train_indices[:10], valid_indices[:10], minibatch_size, checkpoint_dict=checkpoint_dict, list_of_train_output_names=["train_cost"], valid_output_name="valid_error", n_epochs=1, optimizer_object=opt, ) epoch_results1 = TL1.run([X, y]) TL1.train_indices = train_indices[10:20] TL1.valid_indices = valid_indices[10:20] epoch_results1 = TL1.run([X, y]) TL2 = TrainingLoop( fit_function, error, train_indices[:20], valid_indices[:20], minibatch_size, checkpoint_dict=checkpoint_dict, list_of_train_output_names=["train_cost"], valid_output_name="valid_error", n_epochs=1, optimizer_object=opt, ) epoch_results2 = TL2.run([X, y]) r1 = TL1.__dict__["checkpoint_dict"]["previous_results"]["train_cost"][-1] r2 = TL2.__dict__["checkpoint_dict"]["previous_results"]["train_cost"][-1] assert r1 == r2