Example #1
0
    def update(self, Z, num_new=1, log_weights=None):
        assert (len(Z) >= num_new)

        # dont do anything if no data observed
        if num_new == 0:
            return

        if log_weights is not None:
            assert len(log_weights) == len(Z)
        else:
            log_weights = np.zeros(len(Z))

        Z_new = Z[-num_new:]
        log_weights_new = log_weights[-num_new:]

        # first update: use first of X and log_weights, and then discard
        if self.log_sum_weights is None:
            # assume have observed fake terms, which is needed for making the system well-posed
            # the L_C says that the fake terms had covariance self.lmbda, which is a regulariser
            self.L_C = np.eye(self.D) * np.sqrt(self.gamma2)
            self.log_sum_weights = log_weights_new[0]
            self.mu = Z_new[0]

            Z_new = Z_new[1:]
            log_weights_new = log_weights_new[1:]
            num_new -= 1

        # dont do anything if no data observed
        if len(Z_new) == 0:
            return

        # generate lmbdas that correspond to weighted averages
        lmbdas = log_weights_to_lmbdas(self.log_sum_weights, log_weights_new)

        # low-rank update of Cholesky, costs O(d^2) only
        old_L_C = np.array(self.L_C, copy=True)
        self.mu, self.L_C = update_mean_cov_L_lmbda(Z_new, self.mu, self.L_C,
                                                    lmbdas)

        if np.any(np.isnan(self.L_C)) or np.any(np.isinf(self.L_C)):
            logger.warning(
                "Numerical error while updating Cholesky factor of C.\n"
                "Before update:\n%s\n"
                "After update:\n%s\n"
                "Updating data:\n%s\n"
                "Updating log weights:\n%s\n"
                "Updating lmbdas:\n%s\n" % (str(old_L_C), str(
                    self.L_C), str(Z_new), str(log_weights_new), str(lmbdas)))
            raise RuntimeError(
                "Numerical error while updating Cholesky factor of C.")

        # update terms and weights
        self.log_sum_weights = logsumexp(
            list(log_weights) + [self.log_sum_weights])
Example #2
0
    def update(self, Z, num_new=1, log_weights=None):
        assert(len(Z) >= num_new)
        
        # dont do anything if no data observed
        if num_new == 0:
            return
        
        if log_weights is not None:
            assert len(log_weights) == len(Z)
        else:
            log_weights = np.zeros(len(Z))

        Z_new = Z[-num_new:]
        log_weights_new = log_weights[-num_new:]
        
        # first update: use first of X and log_weights, and then discard
        if self.log_sum_weights is None:
            # assume have observed fake terms, which is needed for making the system well-posed
            # the L_C says that the fake terms had covariance self.lmbda, which is a regulariser
            self.L_C = np.eye(self.D) * np.sqrt(self.gamma2)
            self.log_sum_weights = log_weights_new[0]
            self.mu = Z_new[0]
            
            Z_new = Z_new[1:]
            log_weights_new = log_weights_new[1:]
            num_new -= 1
            
        # dont do anything if no data observed
        if len(Z_new) == 0:
            return
        
        # generate lmbdas that correspond to weighted averages
        lmbdas = log_weights_to_lmbdas(self.log_sum_weights, log_weights_new)
        
        # low-rank update of Cholesky, costs O(d^2) only
        old_L_C = np.array(self.L_C, copy=True)
        self.mu, self.L_C = update_mean_cov_L_lmbda(Z_new, self.mu, self.L_C, lmbdas)
        
        if np.any(np.isnan(self.L_C)) or np.any(np.isinf(self.L_C)):
            logger.warning("Numerical error while updating Cholesky factor of C.\n"
                           "Before update:\n%s\n"
                           "After update:\n%s\n"
                           "Updating data:\n%s\n"
                           "Updating log weights:\n%s\n"
                           "Updating lmbdas:\n%s\n"
                           
                           % (str(old_L_C), str(self.L_C), str(Z_new), str(log_weights_new), str(lmbdas))
                           )
            raise RuntimeError("Numerical error while updating Cholesky factor of C.")
        
        # update terms and weights
        self.log_sum_weights = logsumexp(list(log_weights) + [self.log_sum_weights])
def test_update_mean_cov_L_lmbda_converges_to_weighted_mean_and_cov():
    N_init = 10
    N = 10000
    D = 2
    X = np.random.randn(N, D)
    weights = np.random.rand(N)
    
    old_mean = np.average(X[:N_init], axis=0, weights=weights[:N_init])
    old_cov_L = np.linalg.cholesky(np.cov(X[:N_init].T, ddof=0))
    
    sum_old_weights = np.sum(weights[:N_init])
    lmbdas = weights_to_lmbdas(sum_old_weights, weights[N_init:])
    
    mean, cov_L = update_mean_cov_L_lmbda(X[N_init:], old_mean, old_cov_L, lmbdas)

    full_mean = np.average(X, axis=0, weights=weights)
    
    # the above method uses N rather than N-1 to normalise covariance (biased)
    full_cov = np.cov(X.T, ddof=0, aweights=weights)
    cov = np.dot(cov_L, cov_L.T)
    
    assert_allclose(full_mean, mean)
    assert_allclose(full_cov, cov, atol=1e-2)
Example #4
0
def test_update_mean_cov_L_lmbda_converges_to_weighted_mean_and_cov():
    N_init = 10
    N = 10000
    D = 2
    X = np.random.randn(N, D)
    weights = np.random.rand(N)

    old_mean = np.average(X[:N_init], axis=0, weights=weights[:N_init])
    old_cov_L = np.linalg.cholesky(np.cov(X[:N_init].T, ddof=0))

    sum_old_weights = np.sum(weights[:N_init])
    lmbdas = weights_to_lmbdas(sum_old_weights, weights[N_init:])

    mean, cov_L = update_mean_cov_L_lmbda(X[N_init:], old_mean, old_cov_L,
                                          lmbdas)

    full_mean = np.average(X, axis=0, weights=weights)

    # the above method uses N rather than N-1 to normalise covariance (biased)
    full_cov = np.cov(X.T, ddof=0, aweights=weights)
    cov = np.dot(cov_L, cov_L.T)

    assert_allclose(full_mean, mean)
    assert_allclose(full_cov, cov, atol=1e-2)