# The reason for the closure is so that the gradient can depend
# on both the input to the original function (x), and the output of the
# original function (ans).
def make_grad_logsumexp(ans, x):
    # If you want to be able to take higher-order derivatives, then all the
    # code inside this function must be itself differentiable by autogradwithbay.
    def gradient_product(g):
        # This closure multiplies g with the Jacobian of logsumexp (d_ans/d_x).
        # Because autogradwithbay uses reverse-mode differentiation, g contains
        # the gradient of the objective w.r.t. ans, the output of logsumexp.
        return np.full(x.shape, g) * np.exp(x - np.full(x.shape, ans))
    return gradient_product

# Now we tell autogradwithbay that logsumexmp has a gradient-making function.
logsumexp.defgrad(make_grad_logsumexp)


if __name__ == '__main__':
    # Now we can use logsumexp() inside a larger function that we want
    # to differentiate.
    def example_func(y):
        z = y**2
        lse = logsumexp(z)
        return np.sum(lse)

    grad_of_example = grad(example_func)
    print("Gradient: ", grad_of_example(npr.randn(10)))

    # Check the gradients numerically, just to be safe.
    quick_grad_check(example_func, npr.randn(10))
def logistic_predictions(weights, inputs):
    # Outputs probability of a label being true according to logistic model.
    return sigmoid(np.dot(inputs, weights))

def training_loss(weights):
    # Training loss is the negative log-likelihood of the training labels.
    preds = logistic_predictions(weights, inputs)
    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
    return -np.sum(np.log(label_probabilities))

# Build a toy dataset.
inputs = np.array([[0.52, 1.12,  0.77],
                   [0.88, -1.08, 0.15],
                   [0.52, 0.06, -1.30],
                   [0.74, -2.49, 1.39]])
targets = np.array([True, True, False, True])

# Build a function that returns gradients of training loss using autogradwithbay.
training_gradient_fun = grad(training_loss)

# Check the gradients numerically, just to be safe.
weights = np.array([0.0, 0.0, 0.0])
quick_grad_check(training_loss, weights)

# Optimize weights using gradient descent.
print("Initial loss:", training_loss(weights))
for i in range(100):
    weights -= training_gradient_fun(weights) * 0.01

print("Trained loss:", training_loss(weights))
Ejemplo n.º 3
0
    batch_size = 256
    num_epochs = 50

    # Load and process MNIST data (borrowing from Kayak)
    N_data, train_images, train_labels, test_images, test_labels = load_mnist()

    # Make neural net functions
    N_weights, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg)
    loss_grad = grad(loss_fun)

    # Initialize weights
    rs = npr.RandomState()
    W = rs.randn(N_weights) * param_scale

    # Check the gradients numerically, just to be safe
    quick_grad_check(loss_fun, W, (train_images, train_labels))

    print("    Epoch      |    Train err  |   Test err  ")

    def print_perf(epoch, W):
        test_perf  = frac_err(W, test_images, test_labels)
        train_perf = frac_err(W, train_images, train_labels)
        print("{0:15}|{1:15}|{2:15}".format(epoch, train_perf, test_perf))

    # Train with sgd
    batch_idxs = make_batches(train_images.shape[0], batch_size)
    cur_dir = np.zeros(N_weights)

    for epoch in range(num_epochs):
        print_perf(epoch, W)
        for idxs in batch_idxs: