# The reason for the closure is so that the gradient can depend # on both the input to the original function (x), and the output of the # original function (ans). def make_grad_logsumexp(ans, x): # If you want to be able to take higher-order derivatives, then all the # code inside this function must be itself differentiable by autogradwithbay. def gradient_product(g): # This closure multiplies g with the Jacobian of logsumexp (d_ans/d_x). # Because autogradwithbay uses reverse-mode differentiation, g contains # the gradient of the objective w.r.t. ans, the output of logsumexp. return np.full(x.shape, g) * np.exp(x - np.full(x.shape, ans)) return gradient_product # Now we tell autogradwithbay that logsumexmp has a gradient-making function. logsumexp.defgrad(make_grad_logsumexp) if __name__ == '__main__': # Now we can use logsumexp() inside a larger function that we want # to differentiate. def example_func(y): z = y**2 lse = logsumexp(z) return np.sum(lse) grad_of_example = grad(example_func) print("Gradient: ", grad_of_example(npr.randn(10))) # Check the gradients numerically, just to be safe. quick_grad_check(example_func, npr.randn(10))
training_text = one_hot_to_string(train_inputs[:,t,:]) predicted_text = one_hot_to_string(logprobs[:,t,:]) print(training_text.replace('\n', ' ') + "|" + predicted_text.replace('\n', ' ')) # Wrap function to only have one argument, for scipy.minimize. def training_loss(weights): return -loglike_fun(weights, train_inputs, train_inputs) def callback(weights): print("Train loss:", training_loss(weights)) print_training_prediction(weights) # Build gradient of loss function using autogradwithbay. training_loss_and_grad = value_and_grad(training_loss) init_weights = npr.randn(num_weights) * param_scale # Check the gradients numerically, just to be safe quick_grad_check(training_loss, init_weights) print("Training LSTM...") result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG', options={'maxiter':train_iters}, callback=callback) trained_weights = result.x print() print("Generating text from RNN...") num_letters = 30 for t in range(20): text = "" for i in range(num_letters): seqs = string_to_one_hot(text, output_size)[:, np.newaxis, :]