Example #1
0
def check_vjp(func, func_vjp, args):
    for i in range(len(args)):
        out = grad(func_vjp, wrt=id(args[i]))(*args)
        expected = numerical_jvp(func, args, i)
        np.testing.assert_allclose(out,
                                   expected,
                                   atol=default_tol,
                                   rtol=default_tol)
Example #2
0
def differentiate_n_times_num(graph, wrt_vars, order):
    if order == 1:
        backprop_graphs = ad.grad(graph, wrt_vars)
        numeric_grads = [numerical_gradient(graph, var) for var in wrt_vars]
    else:
        for o in range(order):
            if o == 0:
                backprop_graphs = ad.grad(graph, wrt_vars)
            else:
                if o == order - 1:
                    numeric_grads = [
                        numerical_gradient(graph, var)
                        for graph, var in zip(backprop_graphs, wrt_vars)
                    ]
                backprop_graphs = [
                    ad.grad(graph, [var])[0]
                    for graph, var in zip(backprop_graphs, wrt_vars)
                ]
    return backprop_graphs, numeric_grads
Example #3
0
def differentiate_n_times(my_graph,
                          tf_graph,
                          my_vars,
                          tf_vars,
                          order=1,
                          my_curr_grad=None,
                          tf_curr_grad=None):
    # all vars share the first graph derivation and speed up the test
    # but higher order graphs are different

    for i in range(order):
        if i == 0:
            my_graphs = ad.grad(my_graph, my_vars, previous_grad=my_curr_grad)
            if tf_graph is not None:
                tf_graphs = tf.gradients(tf_graph,
                                         tf_vars,
                                         grad_ys=tf_curr_grad)
        else:
            for i in range(len(my_graphs)):
                my_graphs[i] = ad.grad(my_graphs[i], [my_vars[i]])[0]
                if tf_graphs[i] is not None:
                    tf_graphs[i] = tf.gradients(tf_graphs[i], tf_vars[i])[0]
    return my_graphs, tf_graphs
def loss_domain(model, points):
    """
    calculate loss at all the points. 
    three terms: 
    L1 : domain 
    L2: Initial Condition
    L3 : Boundary Condition 
    """
    t = ad.Variable(np.array([0.5]), name="t")
    x = ad.Variable(np.array([points[1]]), name="x")
    points = ad.Reshape(ad.Concat(t, x, 0), (1, 2))

    du_dt, du_dx = ad.grad((1 + x) * (1 - x) * model.output(points), [t, x])

    d2u_dx2 = diff_n_times((1 + x) * (1 - x) * model.output(points), x, 2)
    total_loss = (du_dt) + (
        (1 + x) *
        (1 - x) * model.output(points) * du_dx) - (0.00318309886 * d2u_dx2)

    return ad.Pow(total_loss, 2)
def loss_domain(model, points):
    """
    calculate loss at all the points. 
    three terms: 
    L1 : domain 
    L2: Initial Condition
    L3 : Boundary Condition 
    """
    t = ad.Variable(np.array([points[0]]), name="t")
    x = ad.Variable(np.array([points[0]]), name="x")

    points = ad.Reshape(ad.Concat(t, x, 0), (1, 2))
    u = model.output(points)
    du_dt, du_dx = ad.grad(u, [t, x])
    print("du_dt", du_dt())
    print("du_dx", du_dx())

    d2u_dx2 = diff_n_times(u, x, 2)
    print("d2u_dx2", d2u_dx2())
    total_loss = du_dt + u * du_dx - (0.01 / np.pi) * d2u_dx2
    print("loss", total_loss())

    return total_loss
def diff_n_times(graph, wrt, n):
    for i in range(n):
        graph = ad.grad(graph, [wrt])[0]
    return graph
        for i in range(100):
            L1.value = L1.value + (loss_domain(
                model, samplings_domain[i])()) + (loss_initial(
                    model, samplings_initial[i])())
            #L2.value =L2.value +(loss_initial(model,samplings[i])())

        init_loss = L1 / 100
        print("initial_loss", init_loss())
        for i in range(1):

            #L2 = loss_initial(model,samplings[i])
            #total_loss = L1
            params = model.get_weights()
            grad_params = [0 for _ in params]
            grad_params = ad.grad(
                loss_domain(model, samplings_domain[i]) +
                loss_initial(model, samplings_initial[i]), params)

            new_params = [0 for _ in params]
            #print("gradients taken!")
            """
            for i in range(len(params)):
                new_params[i] = params[i] - lr* grad_params[i]
            """
            new_params = optimizer([i() for i in params],
                                   [i() for i in grad_params])
            model.set_weights(new_params)

        L3 = ad.Variable(0, name="L3")
        #L4= ad.Variable(0,name="L4")
        for i in range(100):
Example #8
0
mnist = MNIST(batch_size=batch_size)

start = time.time()
for epoch in range(2):
    for step, (images, labels) in enumerate(mnist.train_loader):
        x = ad.Variable(images.view(-1, 28 * 28).numpy(), name="images")
        y_true = ad.Variable(np.eye(10)[labels.numpy()], name="labels")

        y_logit = nn(x)

        sce = ad.SoftmaxCEWithLogits(labels=y_true, logits=y_logit)
        cost = ad.Einsum("i->", sce) / batch_size
        cost.plot_comp_graph()
        input()

        w_list_grads = ad.grad(cost, nn.w)
        w_list_grads[0].plot_comp_graph()
        input()

        new_w_list = optimizer([i() for i in nn.w],
                               [i() for i in w_list_grads])
        optimizer.apply_new_weights(nn.w, new_w_list)

        if step % 100 == 0:
            text = "epoch {}, step {}, cost {:.2f}, grad norm {:.2f}, time {:.2f}"
            print(
                text.format(epoch, step, cost(),
                            ad.FrobeniusNorm(*w_list_grads)(),
                            time.time() - start))
            start = time.time()
Example #9
0
                   ):  # autoregressively generate new characters
        next_char_onehot, next_char = sample_char(logits)
        seed_text += next_char
        x = ad.Variable(next_char_onehot, name="x")
        h = ad.Tanh(h @ w + x @ u + b_h)
        logits = h @ v + b_o
    return seed_text


for step in range(10000):
    x_batch_onehot = text_loader.to_one_hot(
        text_loader.next_batch(batch_size, seq_len=unroll_steps))
    h = ad.Variable(np.zeros((1, hidden_size)), name="h")
    costs = []
    for unroll_step in range(unroll_steps - 1):
        x = ad.Variable(x_batch_onehot[:, unroll_step, :], name="x")
        h = ad.Tanh(h @ w + x @ u + b_h)
        logits = h @ v + b_o
        y = ad.Variable(x_batch_onehot[:, unroll_step + 1, :])
        cost = ad.Einsum("i->", ad.SoftmaxCEWithLogits(labels=y,
                                                       logits=logits))
        costs.append(cost)
    total_cost = ad.Add(*costs) / unroll_steps
    param_grads = ad.grad(total_cost, params)
    new_params = optimizer([i() for i in params], [i() for i in param_grads])
    optimizer.apply_new_weights(params, new_params)

    if step % 20 == 0:
        text = "step: {}, cost: {:.2f} \n------------------------------ \n {} \n------------------------------"
        print(text.format(step, float(total_cost()), sample_text()))