Ejemplo n.º 1
0
    def log_pdf(self, x):
        if self.theta is None:
            raise RuntimeError("Model not fitted yet.")
        assert_array_shape(x, ndim=1, dims={0: self.D})

        phi = rff_feature_map_single(x, self.omega, self.u)
        return np.dot(phi, self.theta)
Ejemplo n.º 2
0
 def log_pdf(self, x):
     if self.theta is None:
         raise RuntimeError("Model not fitted yet.")
     assert_array_shape(x, ndim=1, dims={0: self.D})
     
     phi = rff_feature_map_single(x, self.omega, self.u)
     return np.dot(phi, self.theta)
Ejemplo n.º 3
0
    def fit(self, X):
        assert_array_shape(X, ndim=2, dims={1: self.D})
        if self.basis is None:
            self.basis = X

        self.alpha, self.beta = fit(self.basis, X, self.sigma, self.lmbda)
        self.X = X
Ejemplo n.º 4
0
    def objective(self, X):
        assert_array_shape(X, ndim=2, dims={1: self.D})

        L_X = self.inc_cholesky["R"].T
        L_Y = incomplete_cholesky_new_points_gaussian(
            self.X, X, self.sigma, self.inc_cholesky["I"], self.inc_cholesky["R"], self.inc_cholesky["nu"]
        ).T
        b = compute_b(self.X, X, L_X, L_Y, self.sigma)
        return objective(self.X, X, self.sigma, self.lmbda, self.alpha, L_X, L_Y, b)
Ejemplo n.º 5
0
    def objective(self, X):
        assert_array_shape(X, ndim=2, dims={1: self.D})

        L_X = self.inc_cholesky["R"].T
        L_Y = incomplete_cholesky_new_points_gaussian(
            self.X, X, self.sigma, self.inc_cholesky['I'],
            self.inc_cholesky['R'], self.inc_cholesky['nu']).T
        b = compute_b(self.X, X, L_X, L_Y, self.sigma)
        return objective(self.X, X, self.sigma, self.lmbda, self.alpha, L_X,
                         L_Y, b)
Ejemplo n.º 6
0
 def fit(self, X):
     assert_array_shape(X, ndim=2, dims={1: self.D})
     
     # sub-sample if data is larger than previously set N
     if len(X) > self.N:
         inds = np.random.permutation(len(X))[:self.N]
         self.X = X[inds]
     else:
         self.X = np.copy(X)
         
     self.fit_wrapper_()
Ejemplo n.º 7
0
    def fit(self, X):
        assert_array_shape(X, ndim=2, dims={1: self.D})

        # sub-sample if data is larger than previously set N
        if len(X) > self.N:
            inds = np.random.permutation(len(X))[: self.N]
            self.X = X[inds]
        else:
            self.X = np.copy(X)

        self.alpha = self.fit_wrapper_()
Ejemplo n.º 8
0
    def fit(self, X):
        assert_array_shape(X, ndim=2, dims={1: self.D})

        # sub-sample if data is larger than previously set N
        if len(X) > self.N:
            logger.info("Sub-sampling %d/%d data." % (self.N, len(X)))
            inds = np.random.permutation(len(X))[:self.N]
            self.X = X[inds]
        else:
            self.X = np.copy(X)

        self.alpha = self.fit_wrapper_()
Ejemplo n.º 9
0
    def update_fit(self, X, log_weights=None):
        assert_array_shape(X, ndim=2, dims={1: self.D})
        N = len(X)

        # dont do anything if no data observed
        if N == 0:
            return

        if log_weights is None:
            log_weights = np.log(np.ones(N))
        assert_array_shape(log_weights, ndim=1, dims={0: N})

        # first update: use first of X and log_weights, and then discard
        if self.log_sum_weights is None:
            # assume have observed fake terms, which is needed for making the system well-posed
            # the L_C says that the fake terms had covariance self.lmbda, which is a regulariser
            self.L_C = np.eye(self.m) * np.sqrt(self.lmbda)
            self.log_sum_weights = log_weights[0]
            self.b = compute_b(X[0].reshape(1, self.D), self.omega, self.u)
            self.n = 1

            X = X[1:]
            log_weights = log_weights[1:]
            N -= 1

        # dont do anything if no data observed
        if N == 0:
            return

        old_L_C = np.array(self.L_C, copy=True)
        self.b, self.L_C = update_b_L_C_weighted(X, self.b, self.L_C,
                                                 self.log_sum_weights,
                                                 log_weights, self.omega,
                                                 self.u)

        if np.any(np.isnan(self.L_C)) or np.any(np.isinf(self.L_C)):
            logger.warning(
                "Numerical error while updating Cholesky factor of C.\n"
                "Before update:\n%s\n"
                "After update:\n%s\n"
                "Updating data:\n%s\n"
                "Updating log weights:\n%s\n" %
                (str(old_L_C), str(self.L_C), str(X), str(log_weights)))
            raise RuntimeError(
                "Numerical error while updating Cholesky factor of C.")

        # update terms and weights
        self.n += len(X)
        self.log_sum_weights = log_sum_exp(
            list(log_weights) + [self.log_sum_weights])

        # finally update solution
        self.theta = fit_L_C_precomputed(self.b, self.L_C)
Ejemplo n.º 10
0
 def grad(self, x):
     assert_array_shape(x, ndim=1, dims={0: self.D})
     # now x is of shape (D,)
     # assume M datapoints in x
     Kxx = self.sigma**2
     KxX, Kl = Matern_kernel(x[np.newaxis, :], self.X, sigma=self.sigma)	 # shape (1, K)
     xX_grad = (self.X - x) * Kl.T
     tmp = np.dot(KxX, self.K_inv)	# should be of shape (1, K)
     A = Kxx + self.lmbda - np.sum(tmp * KxX)	# should be a scalar
     B = np.dot(KxX, self.X_grad) - np.dot(tmp + 1, xX_grad)		# shape (1, D) 
     gradient = -B[0] / A	# shape (D,)
     return gradient
Ejemplo n.º 11
0
def grad(x, basis, sigma, alpha, beta):
    m, D = basis.shape
    assert_array_shape(x, ndim=1, dims={0: D})

    xi_grad = 0
    betasum_grad = 0
    for a, x_a in enumerate(basis):
        xi_grad += np.sum(gaussian_kernel_dx_i_dx_i_dx_j(x, x_a, sigma), axis=0) / m
        left_arg_hessian = gaussian_kernel_dx_i_dx_j(x, x_a, sigma)
        betasum_grad += beta[a, :].dot(left_arg_hessian)

    return alpha * xi_grad + betasum_grad
Ejemplo n.º 12
0
 def grad(self, x):
     assert_array_shape(x, ndim=1, dims={0: self.D})
     # now x is of shape (D,)
     # assume M datapoints in x
     Kxx = 1	# should be a scalar: Kxx = exp(-(x-x)**2 / self.sigma) = 1
     KxX = gaussian_kernel(x[np.newaxis, :], self.X, sigma=self.sigma)	 # shape (1, K)
     xX_grad = gaussian_kernel_grad(x, self.X, self.sigma)	# should be shape (K, D)
     tmp = np.dot(KxX, self.K_inv)	# should be of shape (1, K)
     A = Kxx + self.lmbda - np.sum(tmp * KxX)	# should be a scalar
     B = np.dot(KxX, self.X_grad) - np.dot(tmp + 1, xX_grad)		# shape (1, D) 
     gradient = -B[0] / A	# shape (D,)
     return gradient
Ejemplo n.º 13
0
        def hessian(self, x):
            """
            Computes the Hessian of the learned log-density function.
            
            WARNING: This implementation slow, so don't call repeatedly.
            """
            assert_array_shape(x, ndim=1, dims={0: self.D})

            H = np.zeros((self.D, self.D))
            for i, a in enumerate(self.alpha):
                H += a * gaussian_kernel_hessian_theano(x, self.X[i], self.sigma)

            return H
Ejemplo n.º 14
0
        def third_order_derivative_tensor(self, x):
            """
            Computes the third order derivative tensor of the learned log-density function.
            
            WARNING: This implementation is slow, so don't call repeatedly.
            """
            assert_array_shape(x, ndim=1, dims={0: self.D})

            G3 = np.zeros((self.D, self.D, self.D))
            for i, a in enumerate(self.alpha):
                G3 += a * gaussian_kernel_third_order_derivative_tensor_theano(x, self.X[i], self.sigma)

            return G3
Ejemplo n.º 15
0
 def hessian(self, x):
     """
     Computes the Hessian of the learned log-density function.
     
     WARNING: This implementation slow, so don't call repeatedly.
     """
     assert_array_shape(x, ndim=1, dims={0: self.D})
     
     H = np.zeros((self.D, self.D))
     for i, a in enumerate(self.alpha):
         H += a * gaussian_kernel_hessian_theano(x, self.X[i], self.sigma)
 
     return H
 def grad(self, x):
     assert_array_shape(x, ndim=1, dims={0: self.D})
     # now x is of shape (D,)
     # assume M datapoints in x
     Kxx = 1  # should be a scalar: Kxx = (1 + (x - x)^2 / 2sigma)^-1 = 1
     KxX = Cauchy_kernel(x[np.newaxis, :], self.X,
                         sigma=self.sigma)  # shape (1, K)
     xX_grad = (self.X - x) / self.sigma * KxX.T**2
     tmp = np.dot(KxX, self.K_inv)  # should be of shape (1, K)
     A = Kxx + self.lmbda - np.sum(tmp * KxX)  # should be a scalar
     B = np.dot(KxX, self.X_grad) - np.dot(tmp + 1, xX_grad)  # shape (1, D)
     gradient = -B[0] / A  # shape (D,)
     return gradient
Ejemplo n.º 17
0
 def third_order_derivative_tensor(self, x):
     """
     Computes the third order derivative tensor of the learned log-density function.
     
     WARNING: This implementation is slow, so don't call repeatedly.
     """
     assert_array_shape(x, ndim=1, dims={0: self.D})
     
     G3 = np.zeros((self.D, self.D, self.D))
     for i, a in enumerate(self.alpha):
         G3 += a * gaussian_kernel_third_order_derivative_tensor_theano(x, self.X[i], self.sigma)
 
     return G3
Ejemplo n.º 18
0
 def update_fit(self, X, log_weights=None):
     assert_array_shape(X, ndim=2, dims={1: self.D})
     N = len(X)
     
     # dont do anything if no data observed
     if N == 0:
         return
     
     if log_weights is None:
         log_weights = np.log(np.ones(N))
     assert_array_shape(log_weights, ndim=1, dims={0: N})
     
     # first update: use first of X and log_weights, and then discard
     if self.log_sum_weights is None:
         # assume have observed fake terms, which is needed for making the system well-posed
         # the L_C says that the fake terms had covariance self.lmbda, which is a regulariser
         self.L_C = np.eye(self.m) * np.sqrt(self.lmbda)
         self.log_sum_weights = log_weights[0]
         self.b = compute_b(X[0].reshape(1, self.D), self.omega, self.u)
         self.n = 1
         
         X = X[1:]
         log_weights = log_weights[1:]
         N -= 1
         
     # dont do anything if no data observed
     if N == 0:
         return
     
     old_L_C = np.array(self.L_C, copy=True)
     self.b, self.L_C = update_b_L_C_weighted(X, self.b, self.L_C,
                                              self.log_sum_weights,
                                              log_weights,
                                              self.omega, self.u)
     
     if np.any(np.isnan(self.L_C)) or np.any(np.isinf(self.L_C)):
         logger.warning("Numerical error while updating Cholesky factor of C.\n"
                        "Before update:\n%s\n"
                        "After update:\n%s\n"
                        "Updating data:\n%s\n"
                        "Updating log weights:\n%s\n"
                        % (str(old_L_C), str(self.L_C), str(X), str(log_weights))
                        )
         raise RuntimeError("Numerical error while updating Cholesky factor of C.")
     
     # update terms and weights
     self.n += len(X)
     self.log_sum_weights = log_sum_exp(list(log_weights) + [self.log_sum_weights])
     
     # finally update solution
     self.theta = fit_L_C_precomputed(self.b, self.L_C)
Ejemplo n.º 19
0
 def xvalidate_objective(self, X, num_folds=5, num_repetitions=1):
     assert_array_shape(X, ndim=2, dims={1: self.D})
     assert_positive_int(num_folds)
     assert_positive_int(num_repetitions)
     
     O = np.zeros((num_repetitions, num_folds))
     for i in range(num_repetitions):
         
         xval = XVal(N=len(X), num_folds=num_folds)
         for j, (train, test) in enumerate(xval):
             self.fit(X[train])
             O[i, j] = self.objective(X[test])
     
     return O
Ejemplo n.º 20
0
 def hessian(self, x):
     """
     Computes the Hessian of the learned log-density function.
     
     WARNING: This implementation slow, so don't call repeatedly.
     """
     assert_array_shape(x, ndim=1, dims={0: self.D})
     
     H = np.zeros((self.D, self.D))
     for i, theta_i in enumerate(self.theta):
         H += theta_i * rff_feature_map_comp_hessian_theano(x, self.omega[:, i], self.u[i])
 
     # RFF is a monte carlo average, so have to normalise by np.sqrt(m) here
     return H / np.sqrt(self.m)
Ejemplo n.º 21
0
 def third_order_derivative_tensor(self, x):
     """
     Computes the third order derivative tensor of the learned log-density function.
     
     WARNING: This implementation is slow, so don't call repeatedly.
     """
     assert_array_shape(x, ndim=1, dims={0: self.D})
     
     G3 = np.zeros((self.D, self.D, self.D))
     for i, theta_i in enumerate(self.theta):
         G3 += theta_i * rff_feature_map_comp_third_order_tensor_theano(x, self.omega[:, i], self.u[i])
 
     # RFF is a monte carlo average, so have to normalise by np.sqrt(m) here
     return G3 / np.sqrt(self.m)
Ejemplo n.º 22
0
 def xvalidate_objective(self, X, num_folds=5, num_repetitions=1):
     assert_array_shape(X, ndim=2, dims={1: self.D})
     assert_positive_int(num_folds)
     assert_positive_int(num_repetitions)
     
     O = np.zeros((num_repetitions, num_folds))
     for i in range(num_repetitions):
         
         xval = XVal(N=len(X), num_folds=num_folds)
         for j, (train, test) in enumerate(xval):
             self.fit(X[train])
             O[i, j] = self.objective(X[test])
     
     return O
Ejemplo n.º 23
0
        def hessian(self, x):
            """
            Computes the Hessian of the learned log-density function.
            
            WARNING: This implementation slow, so don't call repeatedly.
            """
            assert_array_shape(x, ndim=1, dims={0: self.D})

            H = np.zeros((self.D, self.D))
            for i, theta_i in enumerate(self.theta):
                H += theta_i * rff_feature_map_comp_hessian_theano(
                    x, self.omega[:, i], self.u[i])

            # RFF is a monte carlo average, so have to normalise by np.sqrt(m) here
            return H / np.sqrt(self.m)
Ejemplo n.º 24
0
        def third_order_derivative_tensor(self, x):
            """
            Computes the third order derivative tensor of the learned log-density function.
            
            WARNING: This implementation is slow, so don't call repeatedly.
            """
            assert_array_shape(x, ndim=1, dims={0: self.D})

            G3 = np.zeros((self.D, self.D, self.D))
            for i, theta_i in enumerate(self.theta):
                G3 += theta_i * rff_feature_map_comp_third_order_tensor_theano(
                    x, self.omega[:, i], self.u[i])

            # RFF is a monte carlo average, so have to normalise by np.sqrt(m) here
            return G3 / np.sqrt(self.m)
Ejemplo n.º 25
0
def log_pdf(x, basis, sigma, alpha, beta):
    m, D = basis.shape
    assert_array_shape(x, ndim=1, dims={0: D})

    SE_dx_dx_l = lambda x, y: gaussian_kernel_dx_dx(x, y.reshape(1, -1), sigma)
    SE_dx_l = lambda x, y: gaussian_kernel_grad(x, y.reshape(1, -1), sigma)

    xi = 0
    betasum = 0
    for a in range(m):
        x_a = basis[a]
        xi += np.sum(SE_dx_dx_l(x, x_a)) / m
        gradient_x_xa = np.squeeze(SE_dx_l(x, x_a))
        betasum += np.dot(gradient_x_xa, beta[a, :])

    return np.float(alpha * xi + betasum)
Ejemplo n.º 26
0
def second_order_grad(x, X, sigma, alpha, beta, basis=None):
    """ Computes $\frac{\partial^2 log p(x)}{\partial x_i^2} """

    if basis is None:
        basis = X

    _, D = X.shape
    m, _ = basis.shape
    assert_array_shape(x, ndim=1, dims={0: D})

    xi_grad = 0
    betasum_grad = 0
    for a, x_a in enumerate(basis):
        xi_grad += np.sum(gaussian_kernel_dx_i_dx_i_dx_j_dx_j(x, x_a, sigma),
                          axis=0) / m
        left_arg_hessian = gaussian_kernel_dx_i_dx_i_dx_j(x, x_a, sigma)
        betasum_grad += beta[a, :].dot(left_arg_hessian)

    return alpha * xi_grad + betasum_grad
Ejemplo n.º 27
0
    def fit(self, X, log_weights=None):
        assert_array_shape(X, ndim=2, dims={1: self.D})
        N = len(X)

        # in any case, delete previous solution
        self._initialise_solution()

        if N <= 0:
            # dont do anything if no data observed
            return
        elif N == 1:
            # can get away with single update as not expensive
            self.update_fit(X, log_weights)
            return

        if log_weights is None:
            # b is the same as first x is used as b straight away in update_fit
            self.b = compute_b(X, self.omega, self.u)

            # remove first term from C computation as it is replaced with regulariser
            C = compute_C(X[1:], self.omega, self.u)

            # C so far consists of the average of N-1 terms
            # additional term is regulariser C (first in update_fit)
            # use Knuth online-update for new mean, which is average of N terms
            # effectively, this is equal to
            # C = (C * (N - 1) + np.eye(self.m) * self.lmbda) / N
            delta = np.eye(self.m) * self.lmbda - C
            C += delta / N
            self.L_C = np.linalg.cholesky(C)

            # as all weights are equal, this corresponds to repeated update_fit calls
            self.log_sum_weights = np.log(N)

            self.n = N

            self.theta = fit_L_C_precomputed(self.b, self.L_C)
        else:
            # weighted batch learning here corresponds to repeated online-learning
            assert_array_shape(log_weights, ndim=1, dims={0: N})
            self.update_fit(X, log_weights)
Ejemplo n.º 28
0
    def fit(self, X, log_weights=None):
        assert_array_shape(X, ndim=2, dims={1: self.D})
        N = len(X)

        # in any case, delete previous solution
        self._initialise_solution()
        
        if N <= 0:
            # dont do anything if no data observed
            return
        elif N == 1:
            # can get away with single update as not expensive
            self.update_fit(X, log_weights)
            return
        
        if log_weights is None:
            # b is the same as first x is used as b straight away in update_fit
            self.b = compute_b(X, self.omega, self.u)
            
            # remove first term from C computation as it is replaced with regulariser
            C = compute_C(X[1:], self.omega, self.u)
            
            # C so far consists of the average of N-1 terms
            # additional term is regulariser C (first in update_fit)
            # use Knuth online-update for new mean, which is average of N terms
            # effectively, this is equal to
            # C = (C * (N - 1) + np.eye(self.m) * self.lmbda) / N
            delta = np.eye(self.m) * self.lmbda - C
            C += delta / N
            self.L_C = np.linalg.cholesky(C)
            
            # as all weights are equal, this corresponds to repeated update_fit calls
            self.log_sum_weights = np.log(N)
            
            self.n = N
            
            self.theta = fit_L_C_precomputed(self.b, self.L_C)
        else:
            # weighted batch learning here corresponds to repeated online-learning
            assert_array_shape(log_weights, ndim=1, dims={0: N})
            self.update_fit(X, log_weights)
Ejemplo n.º 29
0
    def log_pdf(self, x):
        assert_array_shape(x, ndim=1, dims={0: self.D})

        k = gaussian_kernel(self.X, x.reshape(1, self.D), self.sigma)[:, 0]
        return np.dot(self.alpha, k)
Ejemplo n.º 30
0
 def log_pdf_multiple(self, X):
     assert_array_shape(X, ndim=2, dims={1: self.D})
     
     Phi = rff_feature_map(X, self.omega, self.u)
     return np.dot(Phi, self.theta)
Ejemplo n.º 31
0
 def objective(self, X):
     assert_array_shape(X, ndim=2, dims={1: self.D})
     
     # note we need to recompute b and C here
     return objective_L_C_precomputed(X, self.theta, self.omega, self.u, self.b, self.L_C)
Ejemplo n.º 32
0
    def log_pdf_multiple(self, X):
        assert_array_shape(X, ndim=2, dims={1: self.D})

        k = gaussian_kernel(self.X, X, self.sigma)
        return np.dot(self.alpha, k)
Ejemplo n.º 33
0
    def log_pdf_multiple(self, X):
        assert_array_shape(X, ndim=2, dims={1: self.D})

        Phi = rff_feature_map(X, self.omega, self.u)
        return np.dot(Phi, self.theta)
Ejemplo n.º 34
0
    def objective(self, X):
        assert_array_shape(X, ndim=2, dims={1: self.D})

        # note we need to recompute b and C here
        return objective_L_C_precomputed(X, self.theta, self.omega, self.u,
                                         self.b, self.L_C)
Ejemplo n.º 35
0
    def grad(self, x):
        assert_array_shape(x, ndim=1, dims={0: self.D})

        k = gaussian_kernel_grad(x, self.X, self.sigma)
        return np.dot(self.alpha, k)
Ejemplo n.º 36
0
    def log_pdf(self, x):
        assert_array_shape(x, ndim=1, dims={0: self.D})

        k = gaussian_kernel(self.X, x.reshape(1, self.D), self.sigma)[:, 0]
        return np.dot(self.alpha, k)
Ejemplo n.º 37
0
    def objective(self, X):
        assert_array_shape(X, ndim=2, dims={1: self.D})

        return objective(self.X, X, self.sigma, self.lmbda, self.alpha, self.K)
Ejemplo n.º 38
0
    def grad(self, x):
        assert_array_shape(x, ndim=1, dims={0: self.D})

        k = gaussian_kernel_grad(x, self.X, self.sigma)
        return np.dot(self.alpha, k)
Ejemplo n.º 39
0
    def log_pdf_multiple(self, X):
        assert_array_shape(X, ndim=2, dims={1: self.D})

        k = gaussian_kernel(self.X, X, self.sigma)
        return np.dot(self.alpha, k)
Ejemplo n.º 40
0
    def objective(self, X):
        assert_array_shape(X, ndim=2, dims={1: self.D})

        return objective(self.X, X, self.sigma, self.lmbda, self.alpha, self.K)
 def objective(self, X):
     assert_array_shape(X, ndim=2, dims={1: self.D})
     return self.compute_objective(X, self.X, self.sigma, self.alpha,
                                   self.beta)
Ejemplo n.º 42
0
 def grad(self, x):
     assert_array_shape(x, ndim=1, dims={0: self.D})
     KXx = Cauchy_kernel(self.X, x[np.newaxis, :], sigma=self.sigma)    
     xX_grad = (self.X - x) / self.sigma * KXx**2
     gradient = np.dot(self.alpha, xX_grad)
     return gradient