def check_vjp(func, func_vjp, args): for i in range(len(args)): out = grad(func_vjp, wrt=id(args[i]))(*args) expected = numerical_jvp(func, args, i) np.testing.assert_allclose(out, expected, atol=default_tol, rtol=default_tol)
def differentiate_n_times_num(graph, wrt_vars, order): if order == 1: backprop_graphs = ad.grad(graph, wrt_vars) numeric_grads = [numerical_gradient(graph, var) for var in wrt_vars] else: for o in range(order): if o == 0: backprop_graphs = ad.grad(graph, wrt_vars) else: if o == order - 1: numeric_grads = [ numerical_gradient(graph, var) for graph, var in zip(backprop_graphs, wrt_vars) ] backprop_graphs = [ ad.grad(graph, [var])[0] for graph, var in zip(backprop_graphs, wrt_vars) ] return backprop_graphs, numeric_grads
def differentiate_n_times(my_graph, tf_graph, my_vars, tf_vars, order=1, my_curr_grad=None, tf_curr_grad=None): # all vars share the first graph derivation and speed up the test # but higher order graphs are different for i in range(order): if i == 0: my_graphs = ad.grad(my_graph, my_vars, previous_grad=my_curr_grad) if tf_graph is not None: tf_graphs = tf.gradients(tf_graph, tf_vars, grad_ys=tf_curr_grad) else: for i in range(len(my_graphs)): my_graphs[i] = ad.grad(my_graphs[i], [my_vars[i]])[0] if tf_graphs[i] is not None: tf_graphs[i] = tf.gradients(tf_graphs[i], tf_vars[i])[0] return my_graphs, tf_graphs
def loss_domain(model, points): """ calculate loss at all the points. three terms: L1 : domain L2: Initial Condition L3 : Boundary Condition """ t = ad.Variable(np.array([0.5]), name="t") x = ad.Variable(np.array([points[1]]), name="x") points = ad.Reshape(ad.Concat(t, x, 0), (1, 2)) du_dt, du_dx = ad.grad((1 + x) * (1 - x) * model.output(points), [t, x]) d2u_dx2 = diff_n_times((1 + x) * (1 - x) * model.output(points), x, 2) total_loss = (du_dt) + ( (1 + x) * (1 - x) * model.output(points) * du_dx) - (0.00318309886 * d2u_dx2) return ad.Pow(total_loss, 2)
def loss_domain(model, points): """ calculate loss at all the points. three terms: L1 : domain L2: Initial Condition L3 : Boundary Condition """ t = ad.Variable(np.array([points[0]]), name="t") x = ad.Variable(np.array([points[0]]), name="x") points = ad.Reshape(ad.Concat(t, x, 0), (1, 2)) u = model.output(points) du_dt, du_dx = ad.grad(u, [t, x]) print("du_dt", du_dt()) print("du_dx", du_dx()) d2u_dx2 = diff_n_times(u, x, 2) print("d2u_dx2", d2u_dx2()) total_loss = du_dt + u * du_dx - (0.01 / np.pi) * d2u_dx2 print("loss", total_loss()) return total_loss
def diff_n_times(graph, wrt, n): for i in range(n): graph = ad.grad(graph, [wrt])[0] return graph
for i in range(100): L1.value = L1.value + (loss_domain( model, samplings_domain[i])()) + (loss_initial( model, samplings_initial[i])()) #L2.value =L2.value +(loss_initial(model,samplings[i])()) init_loss = L1 / 100 print("initial_loss", init_loss()) for i in range(1): #L2 = loss_initial(model,samplings[i]) #total_loss = L1 params = model.get_weights() grad_params = [0 for _ in params] grad_params = ad.grad( loss_domain(model, samplings_domain[i]) + loss_initial(model, samplings_initial[i]), params) new_params = [0 for _ in params] #print("gradients taken!") """ for i in range(len(params)): new_params[i] = params[i] - lr* grad_params[i] """ new_params = optimizer([i() for i in params], [i() for i in grad_params]) model.set_weights(new_params) L3 = ad.Variable(0, name="L3") #L4= ad.Variable(0,name="L4") for i in range(100):
mnist = MNIST(batch_size=batch_size) start = time.time() for epoch in range(2): for step, (images, labels) in enumerate(mnist.train_loader): x = ad.Variable(images.view(-1, 28 * 28).numpy(), name="images") y_true = ad.Variable(np.eye(10)[labels.numpy()], name="labels") y_logit = nn(x) sce = ad.SoftmaxCEWithLogits(labels=y_true, logits=y_logit) cost = ad.Einsum("i->", sce) / batch_size cost.plot_comp_graph() input() w_list_grads = ad.grad(cost, nn.w) w_list_grads[0].plot_comp_graph() input() new_w_list = optimizer([i() for i in nn.w], [i() for i in w_list_grads]) optimizer.apply_new_weights(nn.w, new_w_list) if step % 100 == 0: text = "epoch {}, step {}, cost {:.2f}, grad norm {:.2f}, time {:.2f}" print( text.format(epoch, step, cost(), ad.FrobeniusNorm(*w_list_grads)(), time.time() - start)) start = time.time()
): # autoregressively generate new characters next_char_onehot, next_char = sample_char(logits) seed_text += next_char x = ad.Variable(next_char_onehot, name="x") h = ad.Tanh(h @ w + x @ u + b_h) logits = h @ v + b_o return seed_text for step in range(10000): x_batch_onehot = text_loader.to_one_hot( text_loader.next_batch(batch_size, seq_len=unroll_steps)) h = ad.Variable(np.zeros((1, hidden_size)), name="h") costs = [] for unroll_step in range(unroll_steps - 1): x = ad.Variable(x_batch_onehot[:, unroll_step, :], name="x") h = ad.Tanh(h @ w + x @ u + b_h) logits = h @ v + b_o y = ad.Variable(x_batch_onehot[:, unroll_step + 1, :]) cost = ad.Einsum("i->", ad.SoftmaxCEWithLogits(labels=y, logits=logits)) costs.append(cost) total_cost = ad.Add(*costs) / unroll_steps param_grads = ad.grad(total_cost, params) new_params = optimizer([i() for i in params], [i() for i in param_grads]) optimizer.apply_new_weights(params, new_params) if step % 20 == 0: text = "step: {}, cost: {:.2f} \n------------------------------ \n {} \n------------------------------" print(text.format(step, float(total_cost()), sample_text()))