def black_box_variational_inference(logprob, D, num_samples):
    """Implements http://arxiv.org/abs/1401.0118, and uses the
    local reparameterization trick from http://arxiv.org/abs/1506.02557"""
    # sys.stdout = Logger("experiment.txt")

    def unpack_params(params):
        # Variational dist is a diagonal Gaussian.
        mean, log_std = params[:D], params[D:]
        return mean, log_std

    def gaussian_entropy(log_std):
        return 0.5 * D * (1.0 + np.log(2*np.pi)) + np.sum(log_std)

    rs = npr.RandomState(0)
    def variational_objective(params, t):
        """Provides a stochastic estimate of the variational lower bound."""
        mean, log_std = unpack_params(params)
        samples = rs.randn(num_samples, D) * np.exp(log_std) + mean
        lower_bound = gaussian_entropy(log_std) + np.mean(logprob(samples, t))
        loss = np.mean(logprob(samples, t))
        print("loss is "+ str(loss))
        return -lower_bound

    gradient = grad(variational_objective)

    return variational_objective, gradient, unpack_params
def fit_maxlike(x, r_guess):
    # follows Wikipedia's section on negative binomial max likelihood
    assert np.var(x) > np.mean(x), "Likelihood-maximizing parameters don't exist!"
    loglike = lambda r, p: np.sum(negbin_loglike(r, p, x))
    p = lambda r: np.sum(x) / np.sum(r+x)
    rprime = lambda r: grad(loglike)(r, p(r))
    r = newton(rprime, r_guess)
    return r, p(r)
def logistic_predictions(weights, inputs):
    # Outputs probability of a label being true according to logistic model.
    return sigmoid(np.dot(inputs, weights))

def training_loss(weights):
    # Training loss is the negative log-likelihood of the training labels.
    preds = logistic_predictions(weights, inputs)
    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
    return -np.sum(np.log(label_probabilities))

# Build a toy dataset.
inputs = np.array([[0.52, 1.12,  0.77],
                   [0.88, -1.08, 0.15],
                   [0.52, 0.06, -1.30],
                   [0.74, -2.49, 1.39]])
targets = np.array([True, True, False, True])

# Build a function that returns gradients of training loss using autogradwithbay.
training_gradient_fun = grad(training_loss)

# Check the gradients numerically, just to be safe.
weights = np.array([0.0, 0.0, 0.0])
quick_grad_check(training_loss, weights)

# Optimize weights using gradient descent.
print("Initial loss:", training_loss(weights))
for i in range(100):
    weights -= training_gradient_fun(weights) * 0.01

print("Trained loss:", training_loss(weights))
def newton(f, x0):
    # wrap scipy.optimize.newton with our automatic derivatives
    return scipy.optimize.newton(f, x0, fprime=grad(f), fprime2=grad(grad(f)))
Example #5
0
    def predictive_gradients(self, Xnew):
        # todo, check if the gradient is correct or not
        """
        Compute the derivatives of the predicted latent function with respect to X*

        Given a set of points at which to predict X* (size [N*,Q]), compute the
        derivatives of the mean and variance. Resulting arrays are sized:
         dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).

        Note that this is not the same as computing the mean and variance of the derivative of the function!

         dv_dX*  -- [N*, Q],    (since all outputs have the same variance)
        :param X: The points at which to get the predictive gradients
        :type X: np.ndarray (Xnew x self.input_dim)
        :returns: dmu_dX, dv_dX
        :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]

        """
        # dmu_dX = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
        # for i in range(self.output_dim):
        #     dmu_dX[:,:,i] = self.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self.X)
        def meanCal(CurX):

            #
            # inputs = np.expand_dims(CurX, 0)

            mean, log_std = self.unpack_params(self.update_param)
            rs = npr.RandomState(0)

            samples = []
            samples.append(np.exp(log_std) + mean)
            samples = np.array(samples)
            inputnew = []
            for i in range(0, Xnew.shape[0]):
                inputs = rs.randn(self.num_samples, 1) + CurX[i]
                inputnew.append(inputs)
            outputnew = []
            inputnew = np.array(inputnew)
            inputs = inputnew
            init = 0
            for W, b in self.unpack_layers(samples):
                # dotvalue = np.dot(inputs, W)
                # outputs = dotvalue+ b

                # for i in range(0,Xnew.shape[0]):
                #     if init==0:
                #         outputnew.append(np.einsum('mnd,mdo->mno', inputnew[i], W) + b)
                #         inputnew[i] = self.nonlinearity(outputnew[i])
                #     else:
                #         outputnew[i] =  np.einsum('mnd,mdo->mno', inputnew[i], W) + b
                #         inputnew[i] = self.nonlinearity(outputnew[i])
                outputs = np.einsum("mnd,mdo->mno", inputs, W) + b
                inputs = self.nonlinearity(outputs)
                init = init + 1
            meanresult = np.mean(inputnew, axis=1)
            meanresult = np.expand_dims(meanresult, 0)
            return meanresult

        hypergrad = grad(meanCal)

        def varCal(CurX):

            inputs = np.expand_dims(CurX, 0)

            mean, log_std = self.unpack_params(self.update_param)
            rs = npr.RandomState(0)

            samples = []
            samples.append(np.exp(log_std) + mean)
            samples = np.array(samples)
            inputs = rs.randn(self.num_samples, 1) + inputs
            for W, b in self.unpack_layers(samples):
                # dotvalue = np.dot(inputs, W)
                # outputs = dotvalue+ b
                outputs = np.einsum("mnd,mdo->mno", inputs, W) + b
                inputs = self.nonlinearity(outputs)
            variance = outputs.var(axis=1)
            return variance

        hypergrad1 = grad(varCal)

        mean1, var1 = self.predict(Xnew)
        mean_gra = np.expand_dims(hypergrad(Xnew), 0)
        var_gra = hypergrad1(Xnew)
        return mean_gra, var_gra
    import imp, urllib
    add_color_channel = lambda x : x.reshape((x.shape[0], 1, x.shape[1], x.shape[2]))
    one_hot = lambda x, K : np.array(x[:,None] == np.arange(K)[None, :], dtype=int)
    source, _ = urllib.urlretrieve(
        'https://raw.githubusercontent.com/HIPS/Kayak/master/examples/data.py')
    data = imp.load_source('data', source).mnist()
    train_images, train_labels, test_images, test_labels = data
    train_images = add_color_channel(train_images) / 255.0
    test_images  = add_color_channel(test_images)  / 255.0
    train_labels = one_hot(train_labels, 10)
    test_labels = one_hot(test_labels, 10)
    N_data = train_images.shape[0]

    # Make neural net functions
    N_weights, pred_fun, loss_fun, frac_err = make_nn_funs(input_shape, layer_specs, L2_reg)
    loss_grad = grad(loss_fun)

    # Initialize weights
    rs = npr.RandomState()
    W = rs.randn(N_weights) * param_scale

    # Check the gradients numerically, just to be safe
    # quick_grad_check(loss_fun, W, (train_images[:50], train_labels[:50]))

    print("    Epoch      |    Train err  |   Test error  ")
    def print_perf(epoch, W):
        test_perf  = frac_err(W, test_images, test_labels)
        train_perf = frac_err(W, train_images, train_labels)
        print("{0:15}|{1:15}|{2:15}".format(epoch, train_perf, test_perf))

    # Train with sgd
from __future__ import absolute_import
from __future__ import print_function
import autogradwithbay.numpy as np
import matplotlib.pyplot as plt
from autogradwithbay import grad
from builtins import range, map

def fun(x):
    return np.sin(x)

d_fun = grad(fun)    # First derivative
dd_fun = grad(d_fun) # Second derivative

x = np.linspace(-10, 10, 100)
plt.plot(x, list(map(fun, x)), x, list(map(d_fun, x)), x, list(map(dd_fun, x)))

plt.xlim([-10, 10])
plt.ylim([-1.2, 1.2])
plt.axis('off')
plt.savefig("sinusoid.png")
plt.clf()

# Taylor approximation to sin function
def fun(x):
    currterm = x
    ans = currterm
    for i in range(1000):
        print(i, end=' ')
        currterm = - currterm * x ** 2 / ((2 * i + 3) * (2 * i + 2))
        ans = ans + currterm
        if np.abs(currterm) < 0.2: break # (Very generous tolerance!)
    # Implement a 3-hidden layer neural network.
    num_weights, predictions, logprob = make_nn_funs(layer_sizes=[1, 20, 20, 20, 1], nonlinearity=rbf)

    inputs, targets = build_toy_dataset()
    objective = lambda weights, t: -logprob(weights, inputs, targets)

    # Set up figure.
    fig = plt.figure(figsize=(12, 8), facecolor="white")
    ax = fig.add_subplot(111, frameon=False)
    plt.show(block=False)

    def callback(params, t, g):
        print("Iteration {} log likelihood {}".format(t, -objective(params, t)))

        # Plot data and functions.
        plt.cla()
        ax.plot(inputs.ravel(), targets.ravel(), "bx")
        plot_inputs = np.reshape(np.linspace(-7, 7, num=300), (300, 1))
        outputs = predictions(params, plot_inputs)
        ax.plot(plot_inputs, outputs)
        ax.set_ylim([-1, 1])
        plt.draw()
        plt.pause(1.0 / 60.0)

    rs = npr.RandomState(0)
    init_params = 0.1 * rs.randn(num_weights)

    print("Optimizing network parameters...")
    optimized_params = adam(grad(objective), init_params, step_size=0.01, num_iters=1000, callback=callback)