Beispiel #1
0
def gradient_lower_bound(params,y,X,num_samples,N):
    eps = np.random.normal(0,1,(num_samples,np.shape(X)[-1]+1))
    n = np.shape(y)[0]
    u = np.random.uniform(0,1,num_samples*N*n)
    gradient_lower_bound = grad(lower_bound)
    g = gradient_lower_bound(params,y,X,eps,N,u)
    return g
Beispiel #2
0
def expectation(params,y,X,eps,N,z,u):
    mu = params[0:(len(params)-2)/2]
    Sigma = np.exp(params[(len(params)-2)/2:-2])
    tauParams = params[-2:]
    E = 0
    n = X.shape[0]
    for j in range(np.shape(eps)[0]):
        beta = mu+Sigma*eps[j,:]
        ll=log_likelihood(beta,y,X,z[j*(n*N):(j+1)*(n*N)],u[j*(n*N):(j+1)*(n*N)],tauParams,N)
        E += ll
    return E/np.shape(eps)[0]
Beispiel #3
0
def natural_sample(natparam, num_samples):
    neghalfJ, h, _, _ = unpack_dense(natparam)
    sample_shape = np.shape(h) + (num_samples,)
    J = -2*neghalfJ
    L = np.linalg.cholesky(J)
    noise = np.linalg.solve(T(L), npr.randn(*sample_shape))
    return np.linalg.solve(J, h)[...,None,:] + T(noise)
Beispiel #4
0
def KL_via_sampling(params,eps):
    #also need to include lognormal as a replacement for gamma distribution
    #this is giving log of negatives
    d = np.shape(params)[0]-1
    mu = params[0:d,0]
    Sigma = params[0:d,1:d+1]
    di = np.diag_indices(d)
    Sigma[di] = np.exp(Sigma[di])
    muPrior = np.zeros(d)
    sigmaPrior = np.identity(d)
    E = 0
    for j in range(np.shape(eps)[0]):
        beta = mu+np.dot(Sigma,eps[j,:])
        E+= np.log(normal_pdf(beta,mu,Sigma)/normal_pdf(beta,muPrior,sigmaPrior))
    E = np.mean(E)
    return E
Beispiel #5
0
def make_grad_logsumexp(ans, x, axis=None, b=1.0, keepdims=False):
    shape, dtype = anp.shape(x), anp.result_type(x)
    def vjp(g):
        g_repeated,   _ = repeat_to_match_shape(g,   shape, dtype, axis, keepdims)
        ans_repeated, _ = repeat_to_match_shape(ans, shape, dtype, axis, keepdims)
        return g_repeated * b * anp.exp(x - ans_repeated)
    return vjp
 def init():
     offset = 2.0
     #if optimum[0] < np.inf:
     #    xmin = min(results['ADAM'][0][0], optimum[0]) - offset
     #    xmax = max(results['ADAM'][0][0], optimum[0]) + offset
     #else:
     xmin = domain[0, 0]
     xmax = domain[0, 1]
     #if optimum[1] < np.inf:
     #    ymin = min(results['ADAM'][1][0], optimum[1]) - offset
     #    ymax = max(results['ADAM'][1][0], optimum[1]) + offset
     #else:
     ymin = domain[1, 0]
     ymax = domain[1, 1]
     x = np.arange(xmin, xmax, 0.01)
     y = np.arange(ymin, ymax, 0.01)
     X, Y = np.meshgrid(x, y)
     Z = np.zeros(np.shape(Y))
     for a, _ in np.ndenumerate(Y):
         Z[a] = func(X[a], Y[a])
     level = fdict['level']
     if level is None:
         level = np.linspace(Z.min(), Z.max(), 20)
     else:
         if level[0] == 'normal':
             level = np.linspace(Z.min(), Z.max(), level[1])
         if level[0] == 'log':
             level = np.logspace(np.log(Z.min()), np.log(Z.max()), level[1])
     CF = ax[0].contour(X,Y,Z, levels=level)
     #plt.colorbar(CF, orientation='horizontal', format='%.2f')
     ax[0].grid()
     ax[0].plot(results['ADAM'][0][0], results['ADAM'][1][0], 
         'h', markersize=15, color = '0.75')
     if optimum[0] < np.inf and optimum[1] < np.inf:
         ax[0].plot(optimum[0], optimum[1], '*', markersize=40, 
             markeredgewidth = 2, alpha = 0.5, color = '0.75')
     ax[0].legend(loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.15))
     
     ax[1].plot(0, results['ADAM'][2][0], 'o')
     ax[1].axis([0, T, -0.5, max_err + 0.5])
     ax[1].set_xlabel('num. iteration')
     ax[1].set_ylabel('loss')
     
     line1.set_data([], [])
     line2.set_data([], [])
     line3.set_data([], [])
     line4.set_data([], [])
     line5.set_data([], [])
     
     err1.set_data([], [])
     err2.set_data([], [])
     err3.set_data([], [])
     err4.set_data([], [])
     err5.set_data([], [])
     
     return line1, line2, line3, line4, line5, \
         err1, err2, err3, err4, err5, 
Beispiel #7
0
def expectation(params,y,X,eps,N,u):
    mu = params[0:len(params)/2]
    Sigma = np.exp(params[len(params)/2:])
    E = 0
    n = X.shape[0]
    for j in range(np.shape(eps)[0]):
        beta = mu+Sigma*eps[j,:]
        E+=log_likelihood(beta,y,X)#,u[j*(n*N):(j+1)*(n*N)])
    return E/len(beta)
Beispiel #8
0
def KL_two_gaussians(params):
    d = np.shape(params)[0]-1
    mu = params[0:d,0]
    toSigma = params[0:d,1:d+1]
    intSigma = toSigma-np.diag(np.diag(toSigma))+np.diag(np.exp(np.diag(toSigma)))
    Sigma = intSigma-np.tril(intSigma)+np.transpose(np.triu(intSigma))
    muPrior = np.zeros(d)
    sigmaPrior = np.identity(d)
    #print Sigma
    #print np.linalg.det(Sigma)
    return 1/2*(np.log(np.linalg.det(Sigma)/np.linalg.det(sigmaPrior))-d+np.trace(np.dot(np.linalg.inv(Sigma),sigmaPrior))+np.dot(np.transpose(mu-muPrior),np.dot(np.linalg.inv(Sigma),mu-muPrior)))
Beispiel #9
0
def expectation(params,y,X,eps,N,u):
    #for each sample of theta, calculate likelihood
    #likelihood has participants
    #for each participant, we have N particles
    #with L samples, n participants, N particles per participant and sample, we have
    #L*n*N particles
    #get the first column to be mu
    d = np.shape(X)[-1]+1
    mu = params[0:d,0]
    toSigma = params[0:d,1:d+1]
    intSigma = toSigma-np.diag(np.diag(toSigma))+np.diag(np.exp(np.diag(toSigma)))
    Sigma = intSigma-np.tril(intSigma)+np.transpose(np.triu(intSigma))
    print mu
    print Sigma
    n = X.shape[0]
    E = 0
    #iterate over number of samples of theta
    for j in range(np.shape(eps)[0]):
        beta = mu+np.dot(Sigma,eps[j,:])
        #this log likelihood will iterate over both the participants and the particles
        E+=log_likelihood(beta,y,X,u[j*(n*N):(j+1)*(n*N)])
    return E/len(beta)
Beispiel #10
0
def grad_odeint(yt, func, y0, t, func_args, **kwargs):
    # Extended from "Scalable Inference of Ordinary Differential
    # Equation Models of Biochemical Processes", Sec. 2.4.2
    # Fabian Froehlich, Carolin Loos, Jan Hasenauer, 2017
    # https://arxiv.org/abs/1711.08079
    
    T, D = np.shape(yt)
    flat_args, unflatten = flatten(func_args)
    
    def flat_func(y, t, flat_args):
        return func(y, t, *unflatten(flat_args))

    def unpack(x):
        #      y,      vjp_y,      vjp_t,    vjp_args
        return x[0:D], x[D:2 * D], x[2 * D], x[2 * D + 1:]

    def augmented_dynamics(augmented_state, t, flat_args):
        # Orginal system augmented with vjp_y, vjp_t and vjp_args.
        y, vjp_y, _, _ = unpack(augmented_state)
        vjp_all, dy_dt = make_vjp(flat_func, argnum=(0, 1, 2))(y, t, flat_args)
        vjp_y, vjp_t, vjp_args = vjp_all(-vjp_y)
        return np.hstack((dy_dt, vjp_y, vjp_t, vjp_args))

    def vjp_all(g):
        
        vjp_y = g[-1, :]
        vjp_t0 = 0
        time_vjp_list = []
        vjp_args = np.zeros(np.size(flat_args))
        
        for i in range(T - 1, 0, -1):

            # Compute effect of moving measurement time.
            vjp_cur_t = np.dot(func(yt[i, :], t[i], *func_args), g[i, :])
            time_vjp_list.append(vjp_cur_t)
            vjp_t0 = vjp_t0 - vjp_cur_t

            # Run augmented system backwards to the previous observation.
            aug_y0 = np.hstack((yt[i, :], vjp_y, vjp_t0, vjp_args))
            aug_ans = odeint(augmented_dynamics, aug_y0,
                             np.array([t[i], t[i - 1]]), tuple((flat_args,)), **kwargs)
            _, vjp_y, vjp_t0, vjp_args = unpack(aug_ans[1])

            # Add gradient from current output.
            vjp_y = vjp_y + g[i - 1, :]

        time_vjp_list.append(vjp_t0)
        vjp_times = np.hstack(time_vjp_list)[::-1]

        return None, vjp_y, vjp_times, unflatten(vjp_args)
    return vjp_all
Beispiel #11
0
def expectation(params, y, X, eps, N, u):
    #for each sample of theta, calculate likelihood
    #likelihood has participants
    #for each participant, we have N particles
    #with L samples, n participants, N particles per participant and sample, we have
    #L*n*N particles
    #get the first column to be mu
    d = np.shape(X)[-1] + 1
    mu = params[0:d, 0]
    toSigma = params[0:d, 1:d + 1]
    intSigma = toSigma - np.diag(np.diag(toSigma)) + np.diag(
        np.exp(np.diag(toSigma)))
    Sigma = intSigma - np.tril(intSigma) + np.transpose(np.triu(intSigma))
    print mu
    print Sigma
    n = X.shape[0]
    E = 0
    #iterate over number of samples of theta
    for j in range(np.shape(eps)[0]):
        beta = mu + np.dot(Sigma, eps[j, :])
        #this log likelihood will iterate over both the participants and the particles
        E += log_likelihood(beta, y, X, u[j * (n * N):(j + 1) * (n * N)])
    return E / len(beta)
Beispiel #12
0
def RMSprop(g, alpha, max_its, w, num_pts, batch_size, **kwargs):
    # rmsprop params
    gamma = 0.9
    eps = 10**-8
    if 'gamma' in kwargs:
        gamma = kwargs['gamma']
    if 'eps' in kwargs:
        eps = kwargs['eps']

    # flatten the input function, create gradient based on flat function
    g_flat, unflatten, w = flatten_func(g, w)
    grad = value_and_grad(g_flat)

    # initialize average gradient
    avg_sq_grad = np.ones(np.size(w))

    # record history
    w_hist = [unflatten(w)]
    train_hist = [g_flat(w, np.arange(num_pts))]

    # how many mini-batches equal the entire dataset?
    num_batches = int(np.ceil(np.divide(num_pts, batch_size)))

    # over the line
    for k in range(max_its):
        # loop over each minibatch
        train_cost = 0
        for b in range(num_batches):
            # collect indices of current mini-batch
            batch_inds = np.arange(b * batch_size,
                                   min((b + 1) * batch_size, num_pts))

            # plug in value into func and derivative
            cost_eval, grad_eval = grad(w, batch_inds)
            grad_eval.shape = np.shape(w)

            # update exponential average of past gradients
            avg_sq_grad = gamma * avg_sq_grad + (1 - gamma) * grad_eval**2

            # take descent step
            w = w - alpha * grad_eval / (avg_sq_grad**(0.5) + eps)

        # update training and validation cost
        train_cost = g_flat(w, np.arange(num_pts))

        # record weight update, train and val costs
        w_hist.append(unflatten(w))
        train_hist.append(train_cost)

    return w_hist, train_hist
Beispiel #13
0
 def predict(self, Xnew, predvar=False):
     """ Returns the predictive mean and variance of the GP
     """
     Xnew = (Xnew - self.Xmean)/self.Xstd
     alpha = solve(self.L.T, solve(self.L,self.Y*self.Ystd+self.Ymean) )
     if predvar:
         m = np.shape(Xnew)[0]
         Knew_N,_ = self.K(self.lengthscale, Xnew, self.X)
         Knew_new = np.array( [self.scalarK(Xnew[i], Xnew[i], self.lengthscale) for i in range(m)] ).reshape([m,1])
         v = solve(self.L, Knew_N.T)
         return np.dot(Knew_N, alpha), np.diag( Knew_new + self.likelihood_variance - np.dot(v.T, v) ).reshape(m,1)
     else:
         Knew_N,_ = self.K(self.lengthscale, Xnew, self.X)
         return np.dot(Knew_N, alpha)
    def maxout_feature_transforms(self,a, w):    
        # loop through each layer matrix
        for W1,W2 in w:
            #  pad with ones (to compactly take care of bias) for next layer computation        
            o = np.ones((1,np.shape(a)[1]))
            a = np.vstack((o,a))

            # compute inner product with current layer weights
            a1 = np.dot(a.T, W1).T
            a2 = np.dot(a.T, W2).T

            # output of layer activation
            a = self.activation(a1,a2)
        return a
Beispiel #15
0
def sample_latent_variables_from_posterior(encoder_output):

    # Params of a diagonal Gaussian.

    D = np.shape(encoder_output)[-1] // 2
    mean, log_std = encoder_output[:, :D], encoder_output[:, D:]
    # TODO use the reparametrization trick to generate one sample from q(z|x) per each batch datapoint
    # use npr.randn for that.
    # The output of this function is a matrix of size the batch x the number of latent dimensions

    # the sampling is done based on 15th equation the noise is generated random (via npr.randn)
    # also in case of log_std, I remove the log via exp, and put the square by multiplying 0.5
    #return mean + np.exp(0.5 * log_std) * npr.randn(mean.shape[0], mean.shape[1])
    return mean + np.exp(log_std) *  npr.randn(mean.shape[0],  mean.shape[1])
Beispiel #16
0
    def choose_convolutions(self, kernel_sizes, **kwargs):
        # setup convolution layer
        #img_size = int(self.x.shape[0]**(0.5))
        transformer = convolutional_layer.Setup(kernel_sizes, **kwargs)
        self.conv_layer = transformer.conv_layer
        self.conv_initializer = transformer.conv_initializer

        # determine output size of conv layer based on image size / kernel sizes
        # by passing image through the convolution layer
        kernels = self.conv_initializer()
        if 'kernels' in kwargs:
            kernels = kwargs['kernels']
        final_features = self.conv_layer(self.x[:, :1].T, kernels)
        self.conv_output_size = np.shape(final_features)[1]
Beispiel #17
0
def compute_maxout_features(x, inner_weights):
    # pad data with ones to deal with bias
    o = np.ones((np.shape(x)[0], 1))
    a_padded = np.concatenate((o, x), axis=1)

    # loop through weights and update each layer of the network
    for W1, W2 in inner_weights:
        # output of layer activation
        a = activation(np.dot(a_padded, W1), np.dot(a_padded, W2))

        ### normalize output of activation
        # compute the mean and standard deviation of the activation output distributions
        a_means = np.mean(a, axis=0)
        a_stds = np.std(a, axis=0)

        # normalize the activation outputs
        a_normed = normalize(a, a_means, a_stds)

        # pad with ones for bias
        o = np.ones((np.shape(a_normed)[0], 1))
        a_padded = np.concatenate((o, a_normed), axis=1)

    return a_padded
Beispiel #18
0
def reflect_over_XZ_plane(input_vector):
    # Takes in a vector or an array and flips the y-coordinates.
    output_vector = input_vector
    shape = np.shape(output_vector)
    if len(shape) == 1 and shape[0] == 3:  # Vector of 3 items
        output_vector = output_vector * np.array([1, -1, 1])
    elif len(shape) == 2 and shape[1] == 3:  # 2D Nx3 vector
        output_vector = output_vector * np.array([1, -1, 1])
    elif len(shape) == 3 and shape[2] == 3:  # 3D MxNx3 vector
        output_vector = output_vector * np.array([1, -1, 1])
    else:
        raise Exception("Invalid input for reflect_over_XZ_plane!")

    return output_vector
def matvec_mul_last2dims(x, y):
    # x is m1 x m2 ....x m_d2 x s x r
    # y is m1 x m2 ... x m_d1 x s

    # we do x^T y, along the last two dimension of x
    # the y can have more dimension than x (d1 >= d2),
    # in which case we sum y over the extra dimensions

    assert np.shape(x)[-2] == np.shape(y)[-1]

    d1 = len(np.shape(y)) - 1
    d2 = len(np.shape(x)) - 2

    assert d2 <= d1

    einsum_indx1 = list(range(d2))
    einsum_indx1.append(d1)
    einsum_indx1.append(d1 + 1)
    einsum_indx2 = list(range(d1 + 1))
    einsum_indx_out = list(range(d2))
    einsum_indx_out.append(d1 + 1)

    return np.einsum(x, einsum_indx1, y, einsum_indx2, einsum_indx_out)
def mat_mul_last2dims(x1, x2):
    # multiply the last two dimensions of two arrays

    # x1 is m1 x m2 ....x m_d x s x r
    # x2 is m1 x m2 ... x m_d x s x t

    # x1 and x2 are an (m1 x m2 ....x m_d) array of matrices,
    # whose last two dimensions specify a matrix.

    # We return 'x1^T x2', this matrix multiplication done along
    # the last two dimensions

    assert len(np.shape(x1)) == (len(np.shape(x2)))
    assert np.shape(x2)[-2] == np.shape(x1)[-2]

    d = len(np.shape(x1))
    einsum_indx2 = list(range(d - 1))
    einsum_indx2.append(d)
    einsum_indx_out = list(range(d - 2))
    einsum_indx_out.append(d - 1)
    einsum_indx_out.append(d)

    return np.einsum(x1, list(range(d)), x2, einsum_indx2, einsum_indx_out)
    def conv_model(self,x,w):   
        c = self.conv_layer(x.T,w[0]).T
        
        # pass through fully connected layers
        f = self.feature_transforms(c,w[1])
  
        # tack a 1 onto the top of each input point all at once
        o = np.ones((1,np.shape(f)[1]))
        f = np.vstack((o,f))

        # compute linear combination and return
        a = np.dot(f.T,w[2])

        return a.T
Beispiel #22
0
def forward_pass(W1, W2, W3, b1, b2, b3, x):
    """
    forward-pass for an fully connected neural network with 2 hidden layers of M neurons
    Inputs:
        W1 : (M, 784) weights of first (hidden) layer
        W2 : (M, M) weights of second (hidden) layer
        W3 : (10, M) weights of third (output) layer
        b1 : (M, 1) biases of first (hidden) layer
        b2 : (M, 1) biases of second (hidden) layer
        b3 : (10, 1) biases of third (output) layer
        x : (N, 784) training inputs
    Outputs:
        Fhat : (N, 10) output of the neural network at training inputs
    """
    H1 = np.maximum(0,
                    np.dot(x, W1.T) +
                    b1.T)  # layer 1 neurons with ReLU activation, shape (N, M)
    H2 = np.maximum(0,
                    np.dot(H1, W2.T) +
                    b2.T)  # layer 2 neurons with ReLU activation, shape (N, M)
    Fhat = np.dot(
        H2, W3.T
    ) + b3.T  # layer 3 (output) neurons with linear activation, shape (N, 10)

    # Implement a stable log-softmax activation function at the ouput layer

    # Compute max of each row
    a = np.ones(np.shape(Fhat)) * np.expand_dims(
        np.amax(Fhat, axis=1),
        axis=1)  # a is typically max of g ; make to the same shape as Fhat
    log_sum_exp = np.ones(np.shape(Fhat)) * np.expand_dims(
        np.log(np.sum(np.exp(np.subtract(Fhat, a)),
                      axis=1)), axis=1)  # Compute using logSumExp trick
    # Element-wise subtraction
    Fhat = np.subtract(np.subtract(Fhat, a), log_sum_exp)

    return Fhat
def get_mixture_weights_from_stick_break_propns(stick_break_propns):
    """
    Computes stick lengths (i.e. mixture weights) from stick breaking
    proportions.

    Parameters
    ----------
    stick_break_propns : ndarray
        Array of stick breaking proportions, with sticks along last dimension

    Returns
    -------
    mixture_weights : ndarray
        An array  the same size as stick_break_propns,
        with the mixture weights computed for each row of
        stick breaking proportions.

    """

    # if input is a vector, make it a 1 x k_approx array
    if len(np.shape(stick_break_propns)) == 1:
        stick_break_propns = np.array([stick_break_propns])

    # number of components
    k_approx = np.shape(stick_break_propns)[-1]
    # number of mixtures
    ones_shape = stick_break_propns.shape[0:-1] + (1,)

    stick_break_propns_1m = 1 - stick_break_propns
    stick_remain = np.concatenate((np.ones(ones_shape),
                        _cumprod_through_log(stick_break_propns_1m, axis = -1)), axis = -1)
    stick_add = np.concatenate((stick_break_propns,
                                np.ones(ones_shape)), axis = -1)

    mixture_weights = (stick_remain * stick_add).squeeze()

    return mixture_weights
Beispiel #24
0
def gradient_descent(g, alpha, max_its, w, num_pts, batch_size, **kwargs):
    # pluck out args
    beta = 0
    if 'beta' in kwargs:
        beta = kwargs['beta']
    normalize = False
    if 'normalize' in kwargs:
        normalize = kwargs['normalize']

    # flatten the input function, create gradient based on flat function
    g_flat, unflatten, w = flatten_func(g, w)
    grad = value_and_grad(g_flat)

    # record history
    w_hist = []
    w_hist.append(unflatten(w))

    # how many mini-batches equal the entire dataset?
    num_batches = int(np.ceil(np.divide(num_pts, batch_size)))

    # initialization for momentum direction
    h = np.zeros((w.shape))

    # over the line
    for k in range(max_its):
        # loop over each minibatch
        for b in range(num_batches):
            # collect indices of current mini-batch
            batch_inds = np.arange(b * batch_size,
                                   min((b + 1) * batch_size, num_pts))

            # plug in value into func and derivative
            cost_eval, grad_eval = grad(w, batch_inds)
            grad_eval.shape = np.shape(w)

            # normalize?
            if normalize == True:
                grad_eval = np.sign(grad_eval)

            # momentum step
            # h = beta*h - (1 - beta)*grad_eval

            # take descent step with momentum
            w = w - alpha * grad_eval

        # record weight update
        w_hist.append(unflatten(w))

    return w_hist
Beispiel #25
0
def KL_two_gaussians(params):
    d = np.shape(params)[0] - 1
    mu = params[0:d, 0]
    toSigma = params[0:d, 1:d + 1]
    intSigma = toSigma - np.diag(np.diag(toSigma)) + np.diag(
        np.exp(np.diag(toSigma)))
    Sigma = intSigma - np.tril(intSigma) + np.transpose(np.triu(intSigma))
    muPrior = np.zeros(d)
    sigmaPrior = np.identity(d)
    #print Sigma
    #print np.linalg.det(Sigma)
    return 1 / 2 * (np.log(np.linalg.det(Sigma) / np.linalg.det(sigmaPrior)) -
                    d + np.trace(np.dot(np.linalg.inv(Sigma), sigmaPrior)) +
                    np.dot(np.transpose(mu - muPrior),
                           np.dot(np.linalg.inv(Sigma), mu - muPrior)))
Beispiel #26
0
    def plot_data(self):
        # construct figure
        fig, axs = plt.subplots(1, 3, figsize=(9,4))

        # create subplot with 2 panels
        gs = gridspec.GridSpec(1, 3, width_ratios=[1,5,1]) 
        ax1 = plt.subplot(gs[0]); ax1.axis('off') 
        ax2 = plt.subplot(gs[1]); 
        ax3 = plt.subplot(gs[2]); ax3.axis('off')
        
        if np.shape(self.x)[1] == 2:
            ax2 = plt.subplot(gs[1],projection='3d'); 

        # scatter points
        self.scatter_pts(ax2)
Beispiel #27
0
def KW(TempK, Sal, Pbar, RGas, WhichKs):
    """Calculate water dissociation constant for the given options."""
    # Evaluate at atmospheric pressure
    KW = np.full(np.shape(TempK), np.nan)
    KW = np.where(WhichKs == 6, 0.0, KW)  # GEOSECS doesn't include OH effects
    KW = np.where(WhichKs == 7, p1atm.kH2O_SWS_M79(TempK, Sal), KW)
    KW = np.where(WhichKs == 8, p1atm.kH2O_SWS_HO58_M79(TempK, Sal), KW)
    KW = np.where(
        (WhichKs != 6) & (WhichKs != 7) & (WhichKs != 8),
        p1atm.kH2O_SWS_M95(TempK, Sal),
        KW,
    )
    # Now correct for seawater pressure
    KW = KW * pcx.KWfac(TempK, Pbar, RGas, WhichKs)
    return KW
def gradient_descent(g, w_init, alpha, max_its, verbose):
    # flatten the input function
    g_flat, unflatten, w = flatten_func(g, w_init)

    # compute gradient of flattened input function
    # when evaluated this returns both the evaluation of the gradient and the original function
    grad = value_and_grad(g_flat)
    cost_eval, grad_eval = grad(w)
    grad_eval.shape = np.shape(w)

    # record history
    w_hist = [unflatten(w)]
    train_hist = [cost_eval]

    # gradient descent loop
    for k in range(max_its):
        # take descent step with momentum
        w = w - alpha * grad_eval

        # plug in updated w into func and gradient
        cost_eval, grad_eval = grad(w)
        grad_eval.shape = np.shape(w)

        # store updates
        w_hist.append(unflatten(w))
        train_hist.append(cost_eval)

        # print update
        if verbose == True:
            print('step ' + str(k + 1) + ' complete, train cost = ' +
                  str(np.round(train_hist[-1], 4)[0]))

    # print update and return
    if verbose == True:
        print('finished all ' + str(max_its) + ' steps')
    return w_hist, train_hist
Beispiel #29
0
def normalized_gradient_descent(g, alpha, max_its, w, beta):
    # flatten the input function, create gradient based on flat function
    g_flat, unflatten, w = flatten_func(g, w)
    grad = compute_grad(g_flat)

    # Record histories
    weight_hist = []
    weight_hist.append(unflatten(w))
    cost_hist = []

    # run the gradient descent loop
    z = np.zeros((np.shape(w)))  # momentum term

    for k in range(max_its):
        # evaluate the gradient, compute its length
        grad_eval = grad(w)
        grad_eval.shape = np.shape(w)
        grad_norm = np.linalg.norm(grad_eval)

        # check that magnitude of gradient is not too small, if yes pick a random direction to move
        if grad_norm == 0:
            # pick random direction and normalize to have unit legnth
            grad_eval = 10**-6 * np.sign(2 * np.random.rand(len(w)) - 1)
            grad_norm = np.linalg.norm(grad_eval)

        grad_eval /= grad_norm

        # take descent step with momentum
        z = beta * z + grad_eval
        w = w - alpha * z

        # Record and update histories
        weight_hist.append(unflatten(w))
        cost_hist.append(g_flat(w))

    return weight_hist, cost_hist
Beispiel #30
0
def gaussian_mix_init(mu_arr, var_arr, prob_arr):
    # default, dimension>1
    gs = list()
    if mu_arr.ndim == 1:
        num_g, d = np.shape(mu_arr)[0], 1
        gs = [(gaussian_init(np.array([mu_arr[i]]), np.array(var_arr[i])))
              for i in range(num_g)]
    else:
        num_g, d = np.shape(mu_arr)
        gs = [(gaussian_init(mu_arr[i, :], var_arr[i, :, :]))
              for i in range(num_g)]

    def log_gaussian_mix(x):
        log_gs = np.array([g[0](x) for g in gs]).T
        prob_gs = np.exp(log_gs)
        probs = np.sum(prob_gs * prob_arr, 1)
        return np.log(probs)

    def generator(size):
        indices = np.argmax(npr.multinomial(1, prob_arr, size), axis=1)
        samples = [gs[id][1](1)[0] for id in indices]
        return np.array(samples)

    return log_gaussian_mix, generator
Beispiel #31
0
def rms_norm(array):
    """
    Compute the rms norm of the array.

    Arguments:
    array :: ndarray (N) - The array to compute the norm of.

    Returns:
    norm :: float - The rms norm of the array.
    """
    square_norm = anp.sum(array * anp.conjugate(array))
    size = anp.prod(anp.shape(array))
    rms_norm_ = anp.sqrt(square_norm / size)

    return rms_norm_
Beispiel #32
0
    def _laplace_neg_hessian_params(self, data, input, mask, tag, x, Ez,
                                    Ezzp1):
        T, D = np.shape(x)
        x_mask = np.ones((T, D), dtype=bool)

        J_ini, J_dyn_11, J_dyn_21, J_dyn_22 = self.dynamics.\
            neg_hessian_expected_log_dynamics_prob(Ez, x, input, x_mask, tag)
        J_transitions = self.transitions.\
            neg_hessian_expected_log_trans_prob(x, input, x_mask, tag, Ezzp1)
        J_dyn_11 += J_transitions

        J_obs = self.emissions.\
            neg_hessian_log_emissions_prob(data, input, mask, tag, x, Ez)

        return J_ini, J_dyn_11, J_dyn_21, J_dyn_22, J_obs
Beispiel #33
0
def gradient_descent(g, w, alpha, max_its, beta, version):
    g_flat, unflatten, w = flatten_func(g, w)
    grad = compute_grad(g_flat)

    w_hist = []
    w_hist.append(unflatten(w))

    z = np.zeros((np.shape(w)))

    for k in range(max_its):
        grad_eval = grad(w)
        grad_eval.shape = np.shape(w)

        if version == 'normalized':
            grad_norm = np.linalg.norm(grad_eval)
            if grad_norm == 0:
                grad_norm += 10 ** -6 * np.sign(2 * np.random.rand(1) - 1)
            grad_eval /= grad_norm

        z = beta * z + grad_eval
        w = w - alpha * z

        w_hist.append(unflatten(w))
    return w_hist
Beispiel #34
0
def KWfac(TempK, Pbar, RGas, WhichKs):
    """Calculate pressure correction factor for KW."""
    TempC = convert.TempK2C(TempK)
    deltaV = np.full(np.shape(TempK), np.nan)
    Kappa = np.full(np.shape(TempK), np.nan)
    F = WhichKs == 8  # freshwater case
    # This is from Millero, 1983.
    deltaV = np.where(F, -25.6 + 0.2324 * TempC - 0.0036246 * TempC**2, deltaV)
    Kappa = np.where(F, (-7.33 + 0.1368 * TempC - 0.001233 * TempC**2) / 1000,
                     Kappa)
    # Note: the temperature dependence of KappaK1 and KappaKW for freshwater
    # in Millero, 1983 are the same.
    F = WhichKs != 8
    # GEOSECS doesn't include OH term, so this won't matter.
    # Peng et al didn't include pressure, but here I assume that the KW
    # correction is the same as for the other seawater cases.
    # This is from Millero, 1983 and his programs CO2ROY(T).BAS.
    deltaV = np.where(F, -20.02 + 0.1119 * TempC - 0.001409 * TempC**2, deltaV)
    # Millero, 1992 and Millero, 1995 have:
    Kappa = np.where(F, (-5.13 + 0.0794 * TempC) / 1000,
                     Kappa)  # Millero, 1983
    # Millero, 1995 has this too, but Millero, 1992 is different.
    # Millero, 1979 does not list values for these.
    return Kfac(deltaV, Kappa, Pbar, TempK, RGas)
Beispiel #35
0
def gradient_descent(g, alpha, max_its, w, beta):
    # flatten the input function, create gradient based on flat function
    g_flat, unflatten, w = flatten_func(g, w)
    grad = compute_grad(g_flat)

    # record history
    w_hist = []
    w_hist.append(unflatten(w))

    # start gradient descent loop
    z = np.zeros((np.shape(w)))  # momentum term

    # over the line
    for k in range(max_its):
        # plug in value into func and derivative
        grad_eval = grad(w)
        grad_eval.shape = np.shape(w)
        """ normalized or unnormalized descent step? """
        # take descent step with momentum
        z = beta * z + grad_eval
        w = w - alpha * z

        # record weight update
        w_hist.append(unflatten(w))
Beispiel #36
0
def get_link_g(w, q, ln_q, ln_1_q, ln_s):

    w = w.reshape(-1, 3)

    n = numpy.shape(w)[0]

    g = numpy.zeros((n, 3))

    for i in range(0, 3):
        tmp_grad = autograd.elementwise_grad(e_link_log_lik, i)
        g[:, i] = tmp_grad(w[:, 0].reshape(-1, 1), w[:, 1].reshape(-1, 1),
                           w[:, 2].reshape(-1,
                                           1), q, ln_q, ln_1_q, ln_s).ravel()

    return g.ravel()
    def _line_search(self, x, dk):
        t = 1
        delta_x = dk
        grad_x = copy(-dk)
        f_x = self.objectiveFunction(x)
        if np.shape(f_x) is not ():
            print('opa')
            f_x = np.dot(f_x.T, f_x)
        f_x_tdeltax = self.objectiveFunction(x + t * delta_x)

        if np.shape(f_x_tdeltax) is not ():
            f_x_tdeltax = np.dot(f_x_tdeltax.T, f_x_tdeltax)

        while ~np.isclose(f_x_tdeltax,
                          f_x + self.alpha * t *
                          (np.transpose(grad_x) @ delta_x),
                          rtol=1e-3):
            t = self.beta * t
            f_x_tdeltax = self.objectiveFunction(x + t * delta_x)
            if np.shape(f_x_tdeltax) is not ():
                f_x_tdeltax = np.dot(f_x_tdeltax.T, f_x_tdeltax)
            if t < 2 * self.xtol:
                break
        return t, f_x_tdeltax
Beispiel #38
0
    def test_random_point(self):
        # Just test that rand returns a point on the manifold and two
        # different matrices generated by rand aren't too close together
        n = self.n
        manifold = self.manifold
        x = manifold.random_point()

        assert np.shape(x) == (n, n)

        # Check symmetry
        np_testing.assert_allclose(x, multisym(x))

        # Check positivity of eigenvalues
        w = np.linalg.eigvalsh(x)
        assert (w > [0]).all()
Beispiel #39
0
def minibatch_gradient_descent(g, alpha_choice, max_its, w, batch_size,
                               num_pts):
    # flatten the input function, create gradient based on flat function
    g_flat, unflatten, w = flatten_func(g, w)

    # compute the gradient function of our input function - note this is a function too
    # that - when evaluated - returns both the gradient and function evaluations (remember
    # as discussed in Chapter 3 we always ge the function evaluation 'for free' when we use
    # an Automatic Differntiator to evaluate the gradient)
    gradient = value_and_grad(g_flat)

    # run the gradient descent loop
    weight_history = []  # container for weight history
    cost_history = []  # container for corresponding cost function history
    alpha = 0

    # record history
    weight_history.append(unflatten(w))
    cost_history.append(g_flat(w, np.arange(num_pts)))

    # how many mini-batches equal the entire dataset?
    num_batches = int(np.ceil(np.divide(num_pts, batch_size)))
    # over the line
    for k in range(max_its):
        # check if diminishing steplength rule used
        if alpha_choice == 'diminishing':
            alpha = 1 / float(k)
        else:
            alpha = alpha_choice

        # loop over each minibatch
        for b in range(num_batches):
            # collect indices of current mini-batch
            batch_inds = np.arange(b * batch_size,
                                   min((b + 1) * batch_size, num_pts))

            # plug in value into func and derivative
            cost_eval, grad_eval = gradient(w, batch_inds)
            grad_eval.shape = np.shape(w)

            # take descent step with momentum
            w = w - alpha * grad_eval

        # record weight update
        weight_history.append(unflatten(w))
        cost_history.append(g_flat(w, np.arange(num_pts)))

    return weight_history, cost_history
def gradient_descent(g,w,x_train,y_train,alpha,max_its,batch_size,**kwargs): 
    verbose = True
    if 'verbose' in kwargs:
        verbose = kwargs['verbose']
    
    # flatten the input function, create gradient based on flat function
    g_flat, unflatten, w = flatten_func(g, w)
    grad = value_and_grad(g_flat)

    # record history
    num_train = y_train.shape[1]
    w_hist = [unflatten(w)]
    train_hist = [g_flat(w,x_train,y_train,np.arange(num_train))]
    
    # how many mini-batches equal the entire dataset?
    num_batches = int(np.ceil(np.divide(num_train, batch_size)))

    # over the line
    for k in range(max_its):                   
        # loop over each minibatch
        start = timer()
        train_cost = 0
        for b in range(num_batches):
            # collect indices of current mini-batch
            batch_inds = np.arange(b*batch_size, min((b+1)*batch_size, num_train))
            
            # plug in value into func and derivative
            cost_eval,grad_eval = grad(w,x_train,y_train,batch_inds)
            grad_eval.shape = np.shape(w)
    
            # take descent step with momentum
            w = w - alpha*grad_eval

        end = timer()
        
        # update training and validation cost
        train_cost = g_flat(w,x_train,y_train,np.arange(num_train))

        # record weight update, train and val costs
        w_hist.append(unflatten(w))
        train_hist.append(train_cost)

        if verbose == True:
            print ('step ' + str(k+1) + ' done in ' + str(np.round(end - start,1)) + ' secs, train cost = ' + str(np.round(train_hist[-1][0],4)))

    if verbose == True:
        print ('finished all ' + str(max_its) + ' steps')
    return w_hist,train_hist
Beispiel #41
0
def log_likelihood(beta, y,X,z,u,tauParams,N):
    ll = 0
    #generate N*n particles
    inv_lognormal = 1./generate_lognormal(tauParams,u)
    if np.isnan(inv_lognormal).any():
        print 'some nans'
        print 5/0
    alpha = np.zeros(len(inv_lognormal))#np.sqrt(inv_lognormal)*z
    print 'mean inv lognormal'
    print np.mean(inv_lognormal)
    count = 0
    ll = 0
    t = np.shape(y)[1]
    #iterate over participants
    for i in range(y.shape[0]):
        l_individual = likelihood_individual(beta,y[i,:],X[i,:,:],alpha[i*N:(i+1)*N])
        ll += np.log(l_individual)
    return ll
Beispiel #42
0
def quick_grad_check(fun, arg0, extra_args=(), kwargs={}, verbose=True,
                     eps=EPS, rtol=RTOL, atol=ATOL, rs=None):
    """Checks the gradient of a function (w.r.t. to its first arg) in a random direction"""

    if verbose:
        print("Checking gradient of {0} at {1}".format(fun, arg0))

    if rs is None:
        rs = np.random.RandomState()

    random_dir = rs.standard_normal(np.shape(arg0))
    random_dir = random_dir / np.sqrt(np.sum(random_dir * random_dir))
    unary_fun = lambda x : fun(arg0 + x * random_dir, *extra_args, **kwargs)
    numeric_grad = unary_nd(unary_fun, 0.0, eps=eps)

    analytic_grad = np.sum(grad(fun)(arg0, *extra_args, **kwargs) * random_dir)

    assert np.allclose(numeric_grad, analytic_grad, rtol=rtol, atol=atol), \
        "Check failed! nd={0}, ad={1}".format(numeric_grad, analytic_grad)

    if verbose:
        print("Gradient projection OK (numeric grad: {0}, analytic grad: {1})".format(
            numeric_grad, analytic_grad))
        return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
    elif len(mat.shape) == 3:
        return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2))
    else:
        raise ArithmeticError

def generalized_outer_product(mat):
    if len(mat.shape) == 1:
        return np.outer(mat, mat)
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError

def covgrad(x, mean, cov):
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))

logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs, -np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=0)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs,  np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=1)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs, -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov)), argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs, -g * ans * np.linalg.solve(cov, x - mean)), argnum=0)
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs,  g * ans * np.linalg.solve(cov, x - mean)), argnum=1)
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs, -g * ans * covgrad(x, mean, cov)),          argnum=2)

entropy.defvjp_is_zero(argnums=(0,))
entropy.defvjp(lambda g, ans, vs, gvs, mean, cov: unbroadcast(vs, gvs, 0.5 * g * np.linalg.inv(cov).T), argnum=1)
        return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
    elif len(mat.shape) == 3:
        return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2))
    else:
        raise ArithmeticError

def generalized_outer_product(mat):
    if len(mat.shape) == 1:
        return np.outer(mat, mat)
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError

def covgrad(x, mean, cov):
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))

logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, x,    lambda g: -np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=0)
logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, mean, lambda g:  np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=1)
logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, cov,  lambda g: -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov)), argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, x,    lambda g: -g * ans * np.linalg.solve(cov, x - mean)), argnum=0)
pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, mean, lambda g:  g * ans * np.linalg.solve(cov, x - mean)), argnum=1)
pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, cov,  lambda g: -g * ans * covgrad(x, mean, cov)),          argnum=2)

entropy.defgrad_is_zero(argnums=(0,))
entropy.defgrad(lambda ans, mean, cov: unbroadcast(ans, cov, lambda g:  0.5 * g * np.linalg.inv(cov).T), argnum=1)
Beispiel #45
0
        assert shape(a) == shape(b)
        return binop(a, b)
    return wrapped
make_binop = (lambda make_binop: lambda *args:
              add_binop_size_check(make_binop(*args)))(make_binop)

add      = make_binop(operator.add,     tuple)
sub      = make_binop(operator.sub,     tuple)
mul      = make_binop(operator.mul,     tuple)
div      = make_binop(operator.truediv, tuple)
allclose = make_binop(np.allclose,      all)
contract = make_binop(inner,            sum)

shape      = make_unop(np.shape, tuple)
unbox      = make_unop(getval,   tuple)
sqrt       = make_unop(np.sqrt,  tuple)
square     = make_unop(lambda a: a**2, tuple)
randn_like = make_unop(lambda a: npr.normal(size=np.shape(a)), tuple)
zeros_like = make_unop(lambda a: np.zeros(np.shape(a)), tuple)
flatten    = make_unop(lambda a: np.ravel(a), np.concatenate)

scale      = make_scalar_op(operator.mul, tuple)
add_scalar = make_scalar_op(operator.add, tuple)

norm = lambda x: np.sqrt(contract(x, x))
rand_dir_like = lambda x: scale(1./norm(x), randn_like(x))

isobjarray = lambda x: isinstance(x, np.ndarray) and x.dtype == np.object
tuplify = Y(lambda f: lambda a: a if not istuple(a) and not isobjarray(a) else tuple(map(f, a)))
depth = Y(lambda f: lambda a: np.ndim(a) if not istuple(a) else 1+(min(map(f, a)) if len(a) else 1))
Beispiel #46
0
def randn_like(x):
    return npr.RandomState(0).randn(*np.shape(x))
def unpack_gaussian_params(params):
    # Variational dist is a diagonal Gaussian.
    D = np.shape(params)[0] // 2
    mean, log_std = params[:D], params[D:]
    return mean, log_std
Beispiel #48
0
    J = np.linalg.inv(cov)
    solved = np.matmul(J, np.expand_dims(x - mean, -1))
    return 1./2 * (generalized_outer_product(solved) - J)

def solve(allow_singular):
    if allow_singular:
        return lambda A, x: np.dot(np.linalg.pinv(A), x)
    else:
        return np.linalg.solve

defvjp(logpdf,
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(x, lambda g: -np.expand_dims(g, 1) * solve(allow_singular)(cov, (x - mean).T).T),
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(mean, lambda g:  np.expand_dims(g, 1) * solve(allow_singular)(cov, (x - mean).T).T),
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(cov, lambda g: -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov, allow_singular)))

# Same as log pdf, but multiplied by the pdf (ans).
defvjp(pdf,
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(x, lambda g: -np.expand_dims(ans * g, 1) * solve(allow_singular)(cov, (x - mean).T).T),
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(mean, lambda g:  np.expand_dims(ans * g, 1) * solve(allow_singular)(cov, (x - mean).T).T),
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(cov, lambda g: -np.reshape(ans * g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov, allow_singular)))

defvjp(entropy, None,
       lambda ans, mean, cov:
       unbroadcast_f(cov, lambda g: 0.5 * g * np.linalg.inv(cov).T))
def sample_diag_gaussian(params, num_samples, rs):
    mean, log_std = unpack_gaussian_params(params)
    D = np.shape(mean)[0]
    return rs.randn(num_samples, D) * np.exp(log_std) + mean
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError

def covgrad(x, mean, cov, allow_singular=False):
    if allow_singular:
        raise NotImplementedError("The multivariate normal pdf is not "
                "differentiable w.r.t. a singular covariance matix")
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))

def solve(allow_singular):
    if allow_singular:
        return lambda A, x: np.dot(np.linalg.pinv(A), x)
    else:
        return np.linalg.solve

logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs, -np.expand_dims(g, 1) * solve(allow_singular)(cov, (x - mean).T).T), argnum=0)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs,  np.expand_dims(g, 1) * solve(allow_singular)(cov, (x - mean).T).T), argnum=1)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs, -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov, allow_singular)), argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs, -np.expand_dims(ans * g, 1) * solve(allow_singular)(cov, (x - mean).T).T), argnum=0)
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs,  np.expand_dims(ans * g, 1) * solve(allow_singular)(cov, (x - mean).T).T), argnum=1)
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs, -np.reshape(ans * g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov, allow_singular)), argnum=2)

entropy.defvjp_is_zero(argnums=(0,))
entropy.defvjp(lambda g, ans, vs, gvs, mean, cov: unbroadcast(vs, gvs, 0.5 * g * np.linalg.inv(cov).T), argnum=1)
Beispiel #51
0
 def getshape(val):
     val = getval(val)
     assert np.isscalar(val) or isinstance(val, np.ndarray), \
         'Jacobian requires input and output to be scalar- or array-valued'
     return np.shape(val)
def unpack_gaussian_params(params):
    # Params of a diagonal Gaussian.
    D = np.shape(params)[-1] // 2
    mean, log_std = params[:, :D], params[:, D:]
    return mean, log_std
Beispiel #53
0
        return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
    elif len(mat.shape) == 3:
        return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2))
    else:
        raise ArithmeticError

def generalized_outer_product(mat):
    if len(mat.shape) == 1:
        return np.outer(mat, mat)
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError

def covgrad(x, mean, cov):
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))

logpdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, x,    lambda g: -np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=0)
logpdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, mean, lambda g:  np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=1)
logpdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, cov,  lambda g: -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov)), argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, x,    lambda g: -g * ans * np.linalg.solve(cov, x - mean)), argnum=0)
pdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, mean, lambda g:  g * ans * np.linalg.solve(cov, x - mean)), argnum=1)
pdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, cov,  lambda g: -g * ans * covgrad(x, mean, cov)),          argnum=2)

entropy.defgrad_is_zero(argnums=(0,))
entropy.defgrad(lambda ans, mean, cov: unbroadcast(ans, cov, lambda g:  0.5 * g * np.linalg.inv(cov).T), argnum=1)