Example #1
0
 def RBF(self,hyper,xi,xj=None):
     if xj is None:
         xj = xi
     sigma_f     = np.array(hyper[0])
     lengthscale = np.array(hyper[1:])
     r           = np.expand_dims(xi*lengthscale,1) - np.expand_dims(xj*lengthscale,0)
     return sigma_f * np.exp(-0.5 * np.sum(r**2,axis=2))
Example #2
0
def line_point_dist(lines, ps):
    """
    Closest distance of a point to a line segment defined by two points (a, b).
    The arguments can also be lists of lines and points, in that case the distance for
    each combination is returned, with shape lines.shape[:-2] + ps.shape[:-1].
    """

    assert(lines.shape[-2:] == (2, 2))
    assert(ps.shape[-1] == 2)
    a = lines[...,0,:]
    b = lines[...,1,:]
    for _ in range(max(len(ps.shape)-1, 1)):
        a = np.expand_dims(a, -2)
        b = np.expand_dims(b, -2)
    # ps = np.expand_dims(ps, 0)

    v_hat = (b - a) / np.expand_dims(norm(b - a), -1)
    
    # d_along.shape == (v_hat.shape[0], ps.shape[0])
    # i.e. one scalar product for each line-point combination
    d_along = np.sum(v_hat*(ps - a), axis=-1)
    d_normal = np.abs(cross(v_hat, ps - a))
    assert(d_along.shape == d_normal.shape)

    d_ends = np.min(np.array([norm(ps-a), norm(ps-b)]), axis=0)

    # if p lies along the sides of the line use the normal distance,
    # else the distance to one of the ends
    mask = (0 <= d_along) & (d_along <= norm(b - a))
    return np.where(mask, d_normal, d_ends)
Example #3
0
def periodic_kernel(x, xstar, hyp):
    """
    Implements the periodic kernel function for Gaussian Process

    x: input data with shape (N,d)
    xstar: inpt data with data (Nstar,d)
    hyp: (log(sigma_f),log(l1),log(l2),...,log(period)) with shape (d+2,)
    
    returns:
        a covariance matrix with shape (N,Nstar)
    """
    sigma_f = np.exp(hyp[0])
    N = x.shape[0]
    Nstar = xstar.shape[0]
    l = np.exp(hyp[1:-1])  #shape (d,)
    l = np.repeat(np.repeat(l[np.newaxis, :], Nstar, axis=0)[np.newaxis, :],
                  N,
                  axis=0)  #shape (N,Nstar,d)
    period = np.exp(hyp[-1])

    diff = np.sin(
        np.pi * np.abs(np.expand_dims(x, 1) - np.expand_dims(xstar, 0)) /
        period) / l  #result of shape (N,Nstar,d)
    K = sigma_f * np.exp(-2 *
                         (diff**2).sum(axis=2))  #should be of shape (N,Nstar)

    return K
Example #4
0
def rbf_covariance(kernel_params, x, xp):
    output_scale = np.exp(kernel_params[0])
    lengthscales = np.exp(kernel_params[1:])
    diffs = np.expand_dims(x / lengthscales[:, np.newaxis],
                           2) - np.expand_dims(
                               xp / lengthscales[:, np.newaxis], 1)
    return output_scale * np.exp(-0.5 * np.sum(diffs**2, axis=0))
Example #5
0
def angle_axis_rotation_matrix(angle, axis, axis_already_normalized=False):
    # Gives the rotation matrix from an angle and an axis.
    # An implmentation of https://en.wikipedia.org/wiki/Rotation_matrix#Rotation_matrix_from_axis_and_angle
    # Inputs:
    #   * angle: can be one angle or a vector (1d ndarray) of angles. Given in radians.
    #   * axis: a 1d numpy array of length 3 (x,y,z). Represents the angle.
    #   * axis_already_normalized: boolean, skips normalization for speed if you flag this true.
    # Outputs:
    #   * If angle is a scalar, returns a 3x3 rotation matrix.
    #   * If angle is a vector, returns a 3x3xN rotation matrix.
    if not axis_already_normalized:
        axis = axis / np.linalg.norm(axis)

    sintheta = np.sin(angle)
    costheta = np.cos(angle)
    cpm = np.array(
        [[0, -axis[2], axis[1]],
         [axis[2], 0, -axis[0]],
         [-axis[1], axis[0], 0]]
    )  # The cross product matrix of the rotation axis vector
    outer_axis = np.outer(axis, axis)

    angle = np.array(angle)  # make sure angle is a ndarray
    if len(angle.shape) == 0:  # is a scalar
        rot_matrix = costheta * np.eye(3) + sintheta * cpm + (1 - costheta) * outer_axis
        return rot_matrix
    else:  # angle is assumed to be a 1d ndarray
        rot_matrix = costheta * np.expand_dims(np.eye(3), 2) + sintheta * np.expand_dims(cpm, 2) + (
                1 - costheta) * np.expand_dims(outer_axis, 2)
        return rot_matrix
Example #6
0
def log_py_zM_ord_j(lambda_ord_j, y_oh_j, zM, k, nj_ord_j): 
    ''' Compute log p(y_j | zM, s1 = k1) of each ordinal variable 
    
    lambda_ord_j ( (nj_ord_j + r - 1) 1darray): Coefficients of the ordinal distributions in the GLLVM layer
    y_oh_j (numobs 1darray): The jth ordinal variable in the dataset
    zM (M x r x k ndarray): M Monte Carlo copies of z for each component k1 of the mixture
    k (int): The number of components of the mixture
    nj_ord_j (int): The number of possible values values of the jth ordinal variable
    --------------------------------------------------------------
    returns (ndarray): The p(y_j | zM, s1 = k1) for the jth ordinal variable
    '''    

    r = zM.shape[1]
    M = zM.shape[0]
    epsilon = 1E-10 # Numeric stability
    lambda0 = lambda_ord_j[:(nj_ord_j - 1)]
    Lambda = lambda_ord_j[-r:]
 
    broad_lambda0 = lambda0.reshape((nj_ord_j - 1, 1, 1, 1))
    eta = broad_lambda0 - (np.transpose(zM, (0, 2, 1)) @ Lambda.reshape((1, r, 1)))[np.newaxis]
    
    gamma = expit(eta)
    
    gamma_prev = np.concatenate([np.zeros((1,M, k, 1)), gamma])
    gamma_next = np.concatenate([gamma, np.ones((1,M, k, 1))])
    pi = gamma_next - gamma_prev
    
    pi = np.where(pi <= 0, epsilon, pi)
    pi = np.where(pi >= 1, 1 - epsilon, pi)
    
    yg = np.expand_dims(y_oh_j.T, 1)[..., np.newaxis, np.newaxis] 
    
    log_p_y_z = yg * np.log(np.expand_dims(pi, axis=2)) 
   
    return log_p_y_z.sum((0))
    def sample(self, n, seed=3):
        dim = self.d

        def mu(x):
            mu = np.zeros((dim))
            mu[0] = x
            return mu

        mu0, cov0, = mu(self.params['mu0']), self.params['sig0'] * np.eye(dim)
        mu1, cov1, = mu(self.params['mu1']), self.params['sig1'] * np.eye(dim)
        muR, covR, = mu(self.params['muR']), self.params['sigR'] * np.eye(dim)

        with NumpySeedContext(seed=seed):
            self.p0 = stats.multivariate_normal(mu0, cov0)
            self.p1 = stats.multivariate_normal(mu1, cov1)
            self.q = stats.multivariate_normal(muR, covR)
            X = self.p0.rvs(size=n)
            Y = self.p1.rvs(size=n)
            Q = self.q.rvs(size=n)

        if X.ndim == 1:
            X = np.expand_dims(X, axis=1)
        if Y.ndim == 1:
            Y = np.expand_dims(Y, axis=1)
        if Q.ndim == 1:
            Q = np.expand_dims(Q, axis=1)
        return Data(X), \
               Data(Y), \
               Data(Q)
Example #8
0
def RBF_eKdK(mu, sigma, X, lengthscales=None, kernel_variance=1):
    """
    x ~ N(mu, sigma), Dx1
    X is DxM
    Return E_x [  k(X, x) * dk(x, X)  ], an M x (D x M) array
    """
    if lengthscales is None:
        lengthscales = np.ones((mu.shape[0], 1))

    # d x m1 x m2
    exKK = RBF_exKK(mu=mu,
                    sigma=sigma,
                    X=X,
                    lengthscales=lengthscales,
                    kernel_variance=kernel_variance)

    # m1 x m2
    eKK = RBF_eKK(mu=mu,
                  sigma=sigma,
                  X=X,
                  lengthscales=lengthscales,
                  kernel_variance=kernel_variance)

    # d x m1 x m2,
    # As exKK naturally uses the first argument and
    # X is the second argument in the derivative kernel, we should expand it, such that we iterate along m2 dimension
    eKdK = (exKK - np.expand_dims(X, axis=1) * np.expand_dims(eKK, axis=0)) / (
        (lengthscales**2)[:, :, None])

    # We then finally modify the order of axis and the dimensionality to get
    # the expected m1 - d - m2 order with M x (DM) shape

    return np.reshape(eKdK.swapaxes(0, 1), (X.shape[1], -1), order='F')
Example #9
0
def RBF_exxKK(mu, sigma, X, lengthscales=None, kernel_variance=1):
    """
    x ~ N(mu, sigma), Dx1
    X is DxM
    Return E_x [ (x*x.T) * k(X, x) * k(x, X) ], a D x D x M x M array
    """
    if lengthscales is None:
        lengthscales = np.ones((mu.shape[0], 1))

    # M x M array
    eKK = RBF_eKK(mu=mu,
                  sigma=sigma,
                  X=X,
                  lengthscales=lengthscales,
                  kernel_variance=kernel_variance)

    # D x D array
    var_gauss = 1 / (1 / ((lengthscales**2) / 2) + (1 / sigma))

    X_pairwise_sums = X[:, :, None] + X[:, :, None].swapaxes(1, 2)

    # D x M x M array
    mean_gauss = ((X_pairwise_sums / 2) /
                  (((lengthscales**2) / 2)[:, :, None]) +
                  (mu / sigma)[:, :, None]) * (var_gauss[:, :, None])

    # D x D x M x M array
    mean_outer = np.expand_dims(mean_gauss, axis=1) * np.expand_dims(
        mean_gauss, axis=0)

    return np.expand_dims(np.expand_dims(eKK, axis=0),
                          axis=0) * (var_gauss[:, :, None, None] + mean_outer)
Example #10
0
def generate_text(n, model):
    result = []
    start = ''
    if PAN_TADEUSZ:
        start = "Jam jest Jacek"
    else:
        start = "no it was n't black monday"
    raw_start = list(map(lambda l: c2i[l], start))
    placeholders = model.reset_state(1)
    placeholders[model.scope.is_training] = False

    expected_nodes = [model.predicts[0], model.first_memory]
    for i in raw_start:
        x = np.expand_dims(onehot(i), axis=0)
        x = np.expand_dims(x, axis=0)
        placeholders[model.batch] = x
        predicts, state = sess.run(expected_nodes, feed_dict=placeholders)
        for i in range(len(model.cells)):
            placeholders[model.cells[i].h_t] = state[i][0]
            placeholders[model.cells[i].c_t] = state[i][1]

    letter = c2i[" "]
    for i in range(n):
        x = np.expand_dims(onehot(letter), axis=0)
        x = np.expand_dims(x, axis=0)
        placeholders[model.batch] = x
        predicts, state = sess.run(expected_nodes, feed_dict=placeholders)
        letter = get_letter(predicts)

        for i in range(len(model.cells)):
            placeholders[model.cells[i].h_t] = state[i][0]
            placeholders[model.cells[i].c_t] = state[i][1]
        result.append(i2c[letter])

    return start + ''.join(result)
 def kernel(self, x, xp, hyp):
     output_scale = np.exp(hyp[0])
     lengthscales = np.exp(hyp[1])
     print(lengthscales)
     diffs = np.expand_dims(x / lengthscales, 1) - \
             np.expand_dims(xp / lengthscales, 0)
     return output_scale * np.exp(-0.5 * np.sum(diffs ** 2, axis=2))
Example #12
0
def log_py_zM_categ_j(lambda_categ_j, y_categ_j, zM, k, nj_categ_j):
    ''' Compute log p(y_j | zM, s1 = k1) of each categorical variable 
    
    lambda_categ_j (nj_categ x (r + 1) ndarray): Coefficients of the categorical distributions in the GLLVM layer
    y_categ_j (numobs 1darray): The jth categorical variable in the dataset
    zM (M x r x k ndarray): M Monte Carlo copies of z for each component k1 of the mixture
    k (int): The number of components of the mixture
    nj_categ_j (int): The number of possible values values of the jth categorical variable
    --------------------------------------------------------------
    returns (ndarray): The p(y_j | zM, s1 = k1) for the jth categorical variable
    '''  
    epsilon = 1E-10

    r = zM.shape[1]
    nj = y_categ_j.shape[1]
        
    zM_broad = np.expand_dims(np.expand_dims(np.transpose(zM, (0, 2, 1)), 2), 3)
    lambda_categ_j_ = lambda_categ_j.reshape(nj, r + 1, order = 'C')

    eta = zM_broad @ lambda_categ_j_[:, 1:][n_axis, n_axis, ..., n_axis] # Check que l'on fait r et pas k ?
    eta = eta + lambda_categ_j_[:,0].reshape(1, 1, nj_categ_j, 1, 1) # Add the constant
    
    pi = softmax_(eta.astype(np.float), axis = 2)
    # Numeric stability
    pi = np.where(pi <= 0, epsilon, pi)
    pi = np.where(pi >= 1, 1 - epsilon, pi)

    yg = np.expand_dims(np.expand_dims(y_categ_j, 1), 1)[..., np.newaxis, np.newaxis] 
    log_p_y_z = yg * np.log(pi[n_axis]) 
    
    # Reshaping output
    log_p_y_z = log_p_y_z.sum((3)) # Suming over the modalities nj
    log_p_y_z = log_p_y_z[:,:,:,0,0] # Deleting useless axes
        
    return np.transpose(log_p_y_z,(1,0, 2))
Example #13
0
def make_pinwheel_data(num_spokes=5, points_per_spoke=40, rate=1.0, noise_std=0.005):
    """Make synthetic data in the shape of a pinwheel."""
    spoke_angles = np.linspace(0, 2 * np.pi, num_spokes + 1)[:-1]
    rs = npr.RandomState(0)
    x = np.linspace(0.1, 1, points_per_spoke)
    xs = np.concatenate([x * np.cos(angle + x * rate) + noise_std * rs.randn(len(x)) for angle in spoke_angles])
    ys = np.concatenate([x * np.sin(angle + x * rate) + noise_std * rs.randn(len(x)) for angle in spoke_angles])
    return np.concatenate([np.expand_dims(xs, 1), np.expand_dims(ys, 1)], axis=1)
Example #14
0
def fwd_grad_logsumexp(g, ans, x, axis=None, b=1.0, keepdims=False):
    if not keepdims:
        if isinstance(axis, int):
            ans = anp.expand_dims(ans, axis)
        elif isinstance(axis, tuple):
            for ax in sorted(axis):
                ans = anp.expand_dims(ans, ax)
    return anp.sum(g * b * anp.exp(x - ans), axis=axis, keepdims=keepdims)
Example #15
0
 def vec_compute_params(self, params, x, xp):
     n_params = params.shape[0]
     var = params[:, 0]
     hvar = params[:, 1]
     off = params[:, 2]
     diffs = agnp.expand_dims(var * (x - off), 1) * agnp.expand_dims(
         (y - off), 0)
     return hvar + diffs
Example #16
0
def fwd_grad_logsumexp(g, ans, x, axis=None, b=1.0, keepdims=False):
    if not keepdims:
        if isinstance(axis, int):
            ans = np.expand_dims(ans, axis)
        elif isinstance(axis, tuple):
            for ax in sorted(axis):
                ans = np.expand_dims(ans, ax)
    return np.sum(g * b * np.exp(x - ans), axis=axis, keepdims=keepdims)
Example #17
0
def non_uniform_approx_nearest(points, values):
    """Approximate derivatives using nearest points in non-uniform grid."""
    ndim = points.shape[-1]
    k = triangular(ndim + 1)
    diffs = np.expand_dims(points, axis=0) - np.expand_dims(points, axis=1)
    norms = np.linalg.norm(diffs, axis=-1)
    nearest_k = np.argpartition(norms, k)[..., :k]
    return taylor_approx(points, points[nearest_k], values[nearest_k])
Example #18
0
 def exp_sin_squared(x,y,a,b,c):
     if y is None:
         y = x
     diffs = np.expand_dims(x,1)-np.expand_dims(y,0)
     sqdist = np.sum(diffs**2, axis=2)
     assert np.all(sqdist>=0),sqdist[sqdist<0]
     out = b*b*np.exp(-np.sin(sqdist/c*np.pi)**2/a**2*2)
     return out
Example #19
0
    def differentiate(self):

        "get gradient values using finite difference"

        C = np.repeat(np.expand_dims(self.cost_fn.C, axis=0), [self.T, 1, 1])
        F = np.repeat(np.expand_dims(self.dyn_model.F, axis=0), [self.T, 1, 1])
        c = np.repeat(np.expand_dims(self.cost_fn.c, axis=0), (self.T, 1))
        f = np.repeat(np.expand_dims(self.dyn_model.f, axis=0), (self.T, 1))
        return C, F, c, f
Example #20
0
    def rbf_covariance(self, X, y=None, signal_variance=1.0, length_scale=1.0):

        if y is None:
            y = X

        D = np.expand_dims(X / length_scale, 1) - np.expand_dims(
            y / length_scale, 0)

        return signal_variance * np.exp(-0.5 * np.sum(D**2, axis=2))
Example #21
0
    def gp0(self, m, s):
        """
        Compute joint predictions for MGP with uncertain inputs.
        """
        assert hasattr(self, "hyp")
        if not hasattr(self, "K"):
            self.cache()

        x = np.atleast_2d(self.inputs)
        y = np.atleast_2d(self.targets)
        n, D = x.shape
        n, E = y.shape

        X = self.hyp
        iK = self.iK
        beta = self.alpha

        m = np.atleast_2d(m)
        inp = x - m

        # Compute the predicted mean and IO covariance.
        iL = np.stack([np.diag(exp(-X[i, :D])) for i in range(E)])
        iN = np.matmul(inp, iL)
        B = iL @ s @ iL + np.eye(D)
        t = np.stack([solve(B[i].T, iN[i].T).T for i in range(E)])
        q = exp(-np.sum(iN * t, 2) / 2)
        qb = q * beta.T
        tiL = np.matmul(t, iL)
        c = exp(2 * X[:, D]) / sqrt(det(B))

        M = np.sum(qb, 1) * c
        V = (np.transpose(tiL, [0, 2, 1]) @ np.expand_dims(qb, 2)).reshape(
            E, D).T * c
        k = 2 * X[:, D].reshape(E, 1) - np.sum(iN**2, 2) / 2

        # Compute the predicted covariance.
        inp = np.expand_dims(inp, 0) / np.expand_dims(exp(2 * X[:, :D]), 1)
        ii = np.repeat(inp[:, newaxis, :, :], E, 1)
        ij = np.repeat(inp[newaxis, :, :, :], E, 0)

        iL = np.stack([np.diag(exp(-2 * X[i, :D])) for i in range(E)])
        siL = np.expand_dims(iL, 0) + np.expand_dims(iL, 1)
        R = np.matmul(s, siL) + np.eye(D)
        t = 1 / sqrt(det(R))
        iRs = np.stack(
            [solve(R.reshape(-1, D, D)[i], s) for i in range(E * E)])
        iRs = iRs.reshape(E, E, D, D)
        Q = exp(k[:, newaxis, :, newaxis] + k[newaxis, :, newaxis, :] +
                maha(ii, -ij, iRs / 2))

        S = np.einsum('ji,iljk,kl->il', beta, Q, beta)
        tr = np.hstack([np.sum(Q[i, i] * iK[i]) for i in range(E)])
        S = (S - np.diag(tr)) * t + np.diag(exp(2 * X[:, D]))
        S = S - np.matmul(M[:, newaxis], M[newaxis, :])

        return M, S, V
Example #22
0
    def predict_mean(self, x_new):

        k_dims = [self.kernels[d].eval(self.kernels[d].params,
                                   np.expand_dims(np.unique(self.X[:, d]), 1),
                                   np.expand_dims(x_new[:, d], 1))
                  for d in self.X.shape[1]]
        kx = np.squeeze(kron_list(k_dims))
        mean = np.sum(np.multiply(kx, self.alpha)) + self.mu[0]

        return mean
Example #23
0
 def kernel1(self, x, xp, hyp, active_dims=None):
     if active_dims is None:
         active_dims = np.arange(self.dim)
     output_scale = np.exp(hyp[0])
     lengthscales = np.exp(hyp[1:])
     lengthscales = lengthscales + 0.000001
     diffs = np.expand_dims(
         (x[active_dims].T / lengthscales).T, 2) - np.expand_dims(
             (xp[active_dims].T / lengthscales).T, 1)
     return output_scale * np.exp(-0.5 * np.sum(diffs**2, axis=0))
Example #24
0
def make_pinwheel_data(num_classes, num_per_class, rate=2.0, noise_std=0.001):
    spoke_angles = np.linspace(0, 2*np.pi, num_classes+1)[:-1]

    rs = npr.RandomState(0)
    x = np.linspace(0.1, 1, num_per_class)
    xs = np.concatenate([rate *x * np.cos(angle + x * rate) + noise_std * rs.randn(num_per_class)
                         for angle in spoke_angles])
    ys = np.concatenate([rate *x * np.sin(angle + x * rate) + noise_std * rs.randn(num_per_class)
                         for angle in spoke_angles])
    return np.concatenate([np.expand_dims(xs, 1), np.expand_dims(ys,1)], axis=1)
Example #25
0
def mykron(A, B):
    """
    Efficient Kronecker product.
    """
    a1, a2 = A.shape
    b1, b2 = B.shape
    C = np.reshape(
        np.expand_dims(A, (1, 3)) * np.expand_dims(B, (0, 2)),
        (a1 * b1, a2 * b2))
    return C
Example #26
0
def make_pinwheel_data(num_spokes=5, points_per_spoke=40, rate=1.0, noise_std=0.005):
    """Make synthetic data in the shape of a pinwheel."""
    spoke_angles = np.linspace(0, 2 * np.pi, num_spokes + 1)[:-1]
    rs = npr.RandomState(0)
    x = np.linspace(0.1, 1, points_per_spoke)
    xs = np.concatenate([x * np.cos(angle + x * rate) + noise_std * rs.randn(len(x))
                         for angle in spoke_angles])
    ys = np.concatenate([x * np.sin(angle + x * rate) + noise_std * rs.randn(len(x))
                         for angle in spoke_angles])
    return np.concatenate([np.expand_dims(xs, 1), np.expand_dims(ys,1)], axis=1)
Example #27
0
def softmax(x):
    y = np.atleast_2d(x)
    axis = 1
    y = y - np.expand_dims(np.max(y, axis=axis), axis)
    y = np.exp(y)
    ax_sum = np.expand_dims(np.sum(y, axis=axis), axis)
    p = y / ax_sum
    if len(x.shape) == 1:
        p = p.flatten()
    return p
Example #28
0
def fwd_grad_chooser(g, ans, gvs, vs, x, axis=None, keepdims=False):
    if anp.isscalar(x):
        return g
    if not keepdims:
        if isinstance(axis, int):
            ans = anp.expand_dims(ans, axis)
        elif isinstance(axis, tuple):
            for ax in sorted(axis):
                ans = anp.expand_dims(ans, ax)
    chosen_locations = x == ans
    return anp.sum(g * chosen_locations, axis=axis, keepdims=keepdims)
def sample_qf_q_and_p(logprob, t, combined_params, k, num_samples, rs):
    add_dim_to_pair = lambda (a, b, c): (np.expand_dims(
        a, 1), np.expand_dims(b, 1), np.expand_dims(c, 1))
    combined_qs_and_samples = [
        add_dim_to_pair(sample_q_and_p(logprob, t, params, num_samples, rs))
        for params in np.split(combined_params, k)
    ]
    combined_qs, combined_ps, combined_samples = zip(*combined_qs_and_samples)
    return np.concatenate(combined_qs, axis=1),\
           np.concatenate(combined_ps, axis=1),\
           np.concatenate(combined_samples, axis=1)  # should be NxK, and NxKxD
Example #30
0
def _sqdist(x,y,Torch=False):
    if y is None:
        y = x
    if Torch:
        diffs = torch.unsqueeze(x,1)-torch.unsqueeze(y,0)
        sqdist = torch.sum(diffs**2, axis=2, keepdim=False)
    else:
        diffs = np.expand_dims(x,1)-np.expand_dims(y,0)
        sqdist = np.sum(diffs**2, axis=2)
        del diffs
    return sqdist
Example #31
0
def rbf(X, Y, gamma):
    # X: (num_samples1, num_features)
    # Y: (num_samples2, num_features)
    # return: (num_samples1, num_samples2)

    X_ = np.expand_dims(X, 1)  # unsqueeze -> (num_samples1, 1, num_features)
    Y_ = np.expand_dims(Y, 0)  # unsqueeze -> (1, num_samples2, num_features)
    dm = X_ - Y_  # (num_samples1, num_samples2, num_features)

    norm = np.sum(dm**2, axis=2)
    return np.exp(-gamma * norm)
 def get_log_prob_nk(self, x):
     # Up to a constant.
     v = self.mix_par.values
     x_centered = np.expand_dims(x, 2) - np.expand_dims(np.transpose(v['loc']), 0)
     log_det_k = np.array(
         [ np.linalg.slogdet(v['info'][k,:,:])[1] \
         for k in range(self.num_components) ])
     lp_nk = \
         -0.5 * np.einsum('nik,kij,njk->nk',
                          x_centered, v['info'], x_centered) + \
         0.5 * np.expand_dims(log_det_k, 0)
     return lp_nk
Example #33
0
def train_pruned_model(args, mdl, results, top_vec, coeff):

    all_w = []
    results['args'] = args
    init_loss = mdl.loss(mdl.params_flat)
    init_grad_norm = np.linalg.norm(mdl.gradient(mdl.params_flat))

    print('Initial loss: {}, norm grad: {}'.format(init_loss, init_grad_norm))
    results['init_full_loss'] = init_loss
    results['init_full_grad_norm'] = init_grad_norm

    results['history1'] = []
    results['history1_columns'] = [
        'iter_no', 'batch_loss', 'batch_grad_norm', 'batch_param_norm'
    ]
    results['history2'] = []
    results['history2_columns'] = ['full_hessian', 'full_hessian_evals']

    for iter_no in tqdm(range(args.max_iterations)):
        inputs, targets = get_batch_samples(iter_no, args, mdl)
        batch_loss = mdl.loss(mdl.params_flat, inputs, targets)
        batch_grad = mdl.gradient(mdl.params_flat, inputs, targets)
        batch_grad_norm = np.linalg.norm(batch_grad)
        batch_param_norm = np.linalg.norm(mdl.params_flat)

        if iter_no % args.freq == 0:

            # calculating hessian
            # Calculating Hessian
            hess = mdl.hessian(mdl.params_flat)
            # Converting the Hessian to Tensor
            hess = torch.tensor(hess).float()
            c = torch.mv(hess.transpose(0, 1), torch.tensor(top_vec).float())
            if np.size(coeff) == 0:
                coeff = c.detach().cpu().numpy()
                coeff = np.expand_dims(coeff, axis=0)
            else:
                coeff = np.concatenate(
                    (coeff, np.expand_dims(c.detach().cpu().numpy(), axis=0)),
                    0)

#    saving weights in all iterations
        if batch_grad_norm <= args.stopping_grad_norm:
            break
        mdl.params_flat -= batch_grad * args.learning_rate
        all_w.append(np.power(math.e, mdl.params_flat))

    final_loss = mdl.loss(mdl.params_flat)
    final_grad_norm = np.linalg.norm(mdl.gradient(mdl.params_flat))
    print('Final loss: {}, norm grad: {}\n'.format(final_loss,
                                                   final_grad_norm))

    return mdl.params, coeff
def calcSigma(x1, x2,l):
	''' Creating a Covariance Matrix
		INPUTS:
			X1, X2: arrays containing the x values from two separate samples
			l     : length scale parameter
		OUTPUTS:
			Sigma: a covariance matrix between x1 and X2
		--------------------------------------------------------
		Notes:
	'''
	length_scale = l
	diffs = np.expand_dims(x1 /length_scale,1)\
		 - np.expand_dims(x2 /length_scale,0)
	return np.exp(-0.5 * np.sum(diffs**2,axis=2))
Example #35
0
 def predictions(weights, inputs):
     inputs = np.expand_dims(inputs, 0)
     for W, b in unpack_layers(weights):
         outputs = np.einsum('mnd,mdo->mno', inputs, W) + b
         inputs = nonlinearity(outputs)
     #return outputs - logsumexp(outputs, axis=1, keepdims=True)
     return outputs
Example #36
0
def covgrad(x, mean, cov, allow_singular=False):
    if allow_singular:
        raise NotImplementedError("The multivariate normal pdf is not "
                "differentiable w.r.t. a singular covariance matix")
    J = np.linalg.inv(cov)
    solved = np.matmul(J, np.expand_dims(x - mean, -1))
    return 1./2 * (generalized_outer_product(solved) - J)
Example #37
0
 def plot_gmm(params, ax, num_points=100):
     angles = np.expand_dims(np.linspace(0, 2*np.pi, num_points), 1)
     xs, ys = np.cos(angles), np.sin(angles)
     circle_pts = np.concatenate([xs, ys], axis=1) * 2.0
     for log_proportion, mean, chol in zip(*unpack_params(params)):
         cur_pts = mean + np.dot(circle_pts, chol)
         ax.plot(cur_pts[:, 0], cur_pts[:, 1], '-')
Example #38
0
 def log_marginal_likelihood(params, data):
     cluster_lls = []
     for log_proportion, mean, chol in zip(*unpack_params(params)):
         cov = np.dot(chol.T, chol) + 0.000001 * np.eye(D)
         cluster_log_likelihood = log_proportion + mvn.logpdf(data, mean, cov)
         cluster_lls.append(np.expand_dims(cluster_log_likelihood, axis=0))
     cluster_lls = np.concatenate(cluster_lls, axis=0)
     return np.sum(logsumexp(cluster_lls, axis=0))
 def predictions(weights, inputs):
     """weights is shape (num_weight_samples x num_weights)
        inputs  is shape (num_datapoints x D)"""
     inputs = np.expand_dims(inputs, 0)
     for W, b in unpack_layers(weights):
         outputs = np.einsum('mnd,mdo->mno', inputs, W) + b
         inputs = nonlinearity(outputs)
     return outputs
Example #40
0
def cost(theta):
    # Unpack parameters
    nu = np.concatenate([theta[1], [0]], axis=0)
    
    S = theta[0]
    logdetS = np.expand_dims(np.linalg.slogdet(S)[1], 1)
    y = np.concatenate([samples.T, np.ones((1, N))], axis=0)

    # Calculate log_q
    y = np.expand_dims(y, 0)
    
    # 'Probability' of y belonging to each cluster
    log_q = -0.5 * (np.sum(y * np.linalg.solve(S, y), axis=1) + logdetS)

    alpha = np.exp(nu)
    alpha = alpha / np.sum(alpha)
    alpha = np.expand_dims(alpha, 1)
    
    loglikvec = logsumexp(np.log(alpha) + log_q, axis=0)
    return -np.sum(loglikvec)
Example #41
0
 def unpack_params(params):
     """Unpacks parameter vector into the proportions, means and covariances
     of each mixture component.  The covariance matrices are parametrized by
     their Cholesky decompositions."""
     log_proportions    = parser.get(params, 'log proportions')
     normalized_log_proportions = log_proportions - logsumexp(log_proportions)
     means              = parser.get(params, 'means')
     lower_tris = np.tril(parser.get(params, 'lower triangles'), k=-1)
     diag_chols = np.exp( parser.get(params, 'log diagonals'))
     chols = []
     for lower_tri, diag in zip(lower_tris, diag_chols):
         chols.append(np.expand_dims(lower_tri + np.diag(diag), 0))
     chols = np.concatenate(chols, axis=0)
     return normalized_log_proportions, means, chols
    def callback(params, t, g):
        print("Iteration {} lower bound {}".format(t, -objective(params, t)))

        # Sample functions from posterior.
        mean, cov = unpack_params(params)
        rs = npr.RandomState(0)
        sample_weights = rs.randn(10, num_weights) * np.sqrt(cov) + mean
        plot_inputs = np.linspace(-8, 8, num=200)
        outputs = predictions(sample_weights, np.expand_dims(plot_inputs, 1))

        # Plot data and functions.
        plt.cla()
        ax.plot(inputs.ravel(), targets.ravel(), 'bx')
        ax.plot(plot_inputs, outputs[:, :, 0].T)
        ax.set_ylim([-2, 3])
        plt.draw()
        plt.pause(1.0/60.0)
    # Initialize variational parameters
    rs = npr.RandomState(0)
    num_samples = 2
    init_mean = rs.randn(num_weights)
    init_log_std = -5 * np.ones(num_weights)
    variational_params = np.concatenate([init_mean, init_log_std])



    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_data[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        variational_params = update_nn(variational_params, batch_data, batch_labels)

        if (step % 10) == 0:
            correct = 0
            num_test = len(test_labels)
            for ix, val in enumerate(test_labels):
                outputs = generate_nn_output(variational_params,
                                            np.expand_dims(test_data[ix,:],0),
                                            num_weights,
                                            num_samples)
                predicted_class = np.argmax(np.mean(outputs, axis=0))
                actual_class = np.argmax(val)
                if actual_class == predicted_class:
                    correct += 1

            print ('Accuracy at step %d: %2.3f' % (step, float(correct)/num_test*100))
Example #44
0
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError

def covgrad(x, mean, cov, allow_singular=False):
    if allow_singular:
        raise NotImplementedError("The multivariate normal pdf is not "
                "differentiable w.r.t. a singular covariance matix")
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))

def solve(allow_singular):
    if allow_singular:
        return lambda A, x: np.dot(np.linalg.pinv(A), x)
    else:
        return np.linalg.solve

logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs, -np.expand_dims(g, 1) * solve(allow_singular)(cov, (x - mean).T).T), argnum=0)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs,  np.expand_dims(g, 1) * solve(allow_singular)(cov, (x - mean).T).T), argnum=1)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs, -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov, allow_singular)), argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs, -np.expand_dims(ans * g, 1) * solve(allow_singular)(cov, (x - mean).T).T), argnum=0)
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs,  np.expand_dims(ans * g, 1) * solve(allow_singular)(cov, (x - mean).T).T), argnum=1)
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs, -np.reshape(ans * g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov, allow_singular)), argnum=2)

entropy.defvjp_is_zero(argnums=(0,))
entropy.defvjp(lambda g, ans, vs, gvs, mean, cov: unbroadcast(vs, gvs, 0.5 * g * np.linalg.inv(cov).T), argnum=1)
Example #45
0
gammaln      = primitive(scipy.special.gammaln)
gammainc     = primitive(scipy.special.gammainc)
gammaincc    = primitive(scipy.special.gammaincc)
gammasgn     = primitive(scipy.special.gammasgn)
rgamma       = primitive(scipy.special.rgamma)
multigammaln = primitive(scipy.special.multigammaln)

defvjp(gammasgn, None)
defvjp(polygamma, None, lambda ans, n, x: lambda g: g * polygamma(n + 1, x))
defvjp(psi,      lambda ans, x: lambda g: g * polygamma(1, x))
defvjp(digamma,  lambda ans, x: lambda g: g * polygamma(1, x))
defvjp(gamma,    lambda ans, x: lambda g: g * ans * psi(x))
defvjp(gammaln,  lambda ans, x: lambda g: g * psi(x))
defvjp(rgamma,   lambda ans, x: lambda g: g * psi(x) / -gamma(x))
defvjp(multigammaln,lambda ans, a, d: lambda g:
       g * np.sum(digamma(np.expand_dims(a, -1) - np.arange(d)/2.), -1),
       None)

def make_gammainc_vjp_arg1(sign):
    def gammainc_vjp_arg1(ans, a, x):
        coeffs = sign * np.exp(-x) * np.power(x, a - 1) / gamma(a)
        return unbroadcast_f(x, lambda g: g * coeffs)
    return gammainc_vjp_arg1
defvjp(gammainc, make_gammainc_vjp_arg1(1), argnums=[1])
defvjp(gammaincc, make_gammainc_vjp_arg1(-1), argnums=[1])

### Bessel functions ###

j0 = primitive(scipy.special.j0)
y0 = primitive(scipy.special.y0)
j1 = primitive(scipy.special.j1)
        return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
    elif len(mat.shape) == 3:
        return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2))
    else:
        raise ArithmeticError

def generalized_outer_product(mat):
    if len(mat.shape) == 1:
        return np.outer(mat, mat)
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError

def covgrad(x, mean, cov):
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))

logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, x,    lambda g: -np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=0)
logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, mean, lambda g:  np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=1)
logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, cov,  lambda g: -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov)), argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, x,    lambda g: -g * ans * np.linalg.solve(cov, x - mean)), argnum=0)
pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, mean, lambda g:  g * ans * np.linalg.solve(cov, x - mean)), argnum=1)
pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, cov,  lambda g: -g * ans * covgrad(x, mean, cov)),          argnum=2)

entropy.defgrad_is_zero(argnums=(0,))
entropy.defgrad(lambda ans, mean, cov: unbroadcast(ans, cov, lambda g:  0.5 * g * np.linalg.inv(cov).T), argnum=1)
Example #47
0
    if allow_singular:
        raise NotImplementedError("The multivariate normal pdf is not "
                "differentiable w.r.t. a singular covariance matix")
    J = np.linalg.inv(cov)
    solved = np.matmul(J, np.expand_dims(x - mean, -1))
    return 1./2 * (generalized_outer_product(solved) - J)

def solve(allow_singular):
    if allow_singular:
        return lambda A, x: np.dot(np.linalg.pinv(A), x)
    else:
        return np.linalg.solve

defvjp(logpdf,
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(x, lambda g: -np.expand_dims(g, 1) * solve(allow_singular)(cov, (x - mean).T).T),
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(mean, lambda g:  np.expand_dims(g, 1) * solve(allow_singular)(cov, (x - mean).T).T),
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(cov, lambda g: -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov, allow_singular)))

# Same as log pdf, but multiplied by the pdf (ans).
defvjp(pdf,
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(x, lambda g: -np.expand_dims(ans * g, 1) * solve(allow_singular)(cov, (x - mean).T).T),
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(mean, lambda g:  np.expand_dims(ans * g, 1) * solve(allow_singular)(cov, (x - mean).T).T),
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(cov, lambda g: -np.reshape(ans * g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov, allow_singular)))

defvjp(entropy, None,
Example #48
0
def rbf_covariance(kernel_params, x, xp):
    output_scale = np.exp(kernel_params[0])
    lengthscales = np.exp(kernel_params[1:])
    diffs = np.expand_dims(x /lengthscales, 1)\
          - np.expand_dims(xp/lengthscales, 0)
    return output_scale * np.exp(-0.5 * np.sum(diffs**2, axis=2))
Example #49
0
File: niw.py Project: mattjj/svae
def standard_to_natural(S, m, kappa, nu):
    b = np.expand_dims(kappa, -1) * m
    A = S + outer(b, m)
    return pack_dense(A, b, kappa, nu)
Example #50
0
File: niw.py Project: mattjj/svae
def natural_to_standard(natparam):
    A, b, kappa, nu = unpack_dense(natparam)
    m = b / np.expand_dims(kappa, -1)
    S = A - outer(b, m)
    return S, m, kappa, nu
Example #51
0
 def fun(x): return to_scalar(np.expand_dims(x, 2))
 d_fun = lambda x : to_scalar(grad(fun)(x))
def fast_array_from_list(xs):
    # import pdb; pdb.set_trace()
    # print 'I am in fast_array_from_list'
    return np.concatenate([np.expand_dims(x, axis=0) for x in xs], axis=0)
Example #53
0
 def RBF(x,xp):
     output_scale = params[num+'kernel_noise']
     lengthscales = params[num+'kernel_lenscale']
     diffs =  np.expand_dims(x / lengthscales, 1) - np.expand_dims(xp / lengthscales, 0)
     return  output_scale * np.exp(-0.5 * np.sum(diffs ** 2, axis=2))
Example #54
0
        return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
    elif len(mat.shape) == 3:
        return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2))
    else:
        raise ArithmeticError

def generalized_outer_product(mat):
    if len(mat.shape) == 1:
        return np.outer(mat, mat)
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError

def covgrad(x, mean, cov):
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))

logpdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, x,    lambda g: -np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=0)
logpdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, mean, lambda g:  np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=1)
logpdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, cov,  lambda g: -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov)), argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, x,    lambda g: -g * ans * np.linalg.solve(cov, x - mean)), argnum=0)
pdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, mean, lambda g:  g * ans * np.linalg.solve(cov, x - mean)), argnum=1)
pdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, cov,  lambda g: -g * ans * covgrad(x, mean, cov)),          argnum=2)

entropy.defgrad_is_zero(argnums=(0,))
entropy.defgrad(lambda ans, mean, cov: unbroadcast(ans, cov, lambda g:  0.5 * g * np.linalg.inv(cov).T), argnum=1)
        return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
    elif len(mat.shape) == 3:
        return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2))
    else:
        raise ArithmeticError

def generalized_outer_product(mat):
    if len(mat.shape) == 1:
        return np.outer(mat, mat)
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError

def covgrad(x, mean, cov):
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))

logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs, -np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=0)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs,  np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=1)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs, -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov)), argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs, -g * ans * np.linalg.solve(cov, x - mean)), argnum=0)
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs,  g * ans * np.linalg.solve(cov, x - mean)), argnum=1)
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs, -g * ans * covgrad(x, mean, cov)),          argnum=2)

entropy.defvjp_is_zero(argnums=(0,))
entropy.defvjp(lambda g, ans, vs, gvs, mean, cov: unbroadcast(vs, gvs, 0.5 * g * np.linalg.inv(cov).T), argnum=1)
    fig = plt.figure(figsize=(8,8), facecolor='white')
    ax = fig.add_subplot(111, frameon=False)
    plt.ion()
    plt.show(block=False)

    for step in range(num_steps):
        # Grab a random datum
        datum_id = npr.randint(0, num_datums)

        # Assess expected reward across all possible actions (loop over context + action vectors)
        rewards = []
        contexts = np.zeros((num_actions, F))
        for aa in range(num_actions):
            contexts[aa,:] = np.hstack((x[datum_id, :], [aa]))
            outputs = generate_nn_output(variational_params,
                                         np.expand_dims(contexts[aa,:],0),
                                         num_weights,
                                         num_samples)
            rewards.append(np.mean(outputs))

        # Check which is greater and choose that [1,0] = eat | [0,1] do not eat
        # If argmax returns 0, then we eat, otherwise we don't
        action_chosen = np.argmax(rewards)
        reward, oracle_reward = reward_function(action_chosen, y[datum_id])

        # Calculate the cumulative regret
        cumulative_regret += oracle_reward - agent_reward

        # Store the experience of that reward as a training/data pair
        experience.append([contexts[action_chosen, :], reward])