Beispiel #1
0
def update_hyperstate(agent, hyperstate, hyperparameters, datum, dim,
                      learn_diff):
    state, action, reward, next_state, _ = [
        np.atleast_2d(np.copy(dat)) for dat in datum
    ]
    Llowers, Xy = [list(ele) for ele in hyperstate]
    assert len(Llowers) == len(hyperparameters)
    assert len(Xy) == len(hyperparameters)
    assert len(hyperparameters) == dim
    state_action = np.concatenate([state, action], axis=-1)
    y = np.concatenate(
        [next_state - state if learn_diff else next_state, reward],
        axis=-1)[..., :dim]

    for i in range(len(Llowers)):
        Llowers[i] = Llowers[i].transpose([0, 2, 1])
    for i, hp in zip(range(dim), hyperparameters):
        length_scale, signal_sd, noise_sd, prior_sd = hp
        basis = _basis(state_action, agent.random_matrices[i], agent.biases[i],
                       agent.basis_dims[i], length_scale, signal_sd)
        cholupdate(Llowers[i][0], basis[0].copy())
        Xy[i] += np.matmul(basis[:, None, :].transpose([0, 2, 1]),
                           y[:, None, :][..., i:i + 1])
    for i in range(len(Llowers)):
        Llowers[i] = Llowers[i].transpose([0, 2, 1])

    return [Llowers, Xy]
Beispiel #2
0
def update_hyperstate(agent, hyperstate_params, hyperparameters_state,
                      hyperparameters_reward, datum, learn_diff):
    state, action, reward, next_state, _ = [
        np.atleast_2d(np.copy(dat)) for dat in datum
    ]
    Llower_state, Xy_state, Llower_reward, Xy_reward = hyperstate_params

    state_action = np.concatenate([state, action], axis=-1)
    state_ = next_state - state if learn_diff else next_state

    basis_state = _basis(state_action, agent.random_matrix_state,
                         agent.bias_state, agent.basis_dim_state,
                         hyperparameters_state[0], hyperparameters_state[1])
    Llower_state = Llower_state.transpose([0, 2, 1])
    for i in range(len(Llower_state)):
        cholupdate(Llower_state[i], basis_state[i].copy())
    Llower_state = Llower_state.transpose([0, 2, 1])
    Xy_state += np.matmul(basis_state[..., None, :].transpose([0, 2, 1]),
                          state_[..., None, :])

    basis_reward = _basis(state_action, agent.random_matrix_reward,
                          agent.bias_reward, agent.basis_dim_reward,
                          hyperparameters_reward[0], hyperparameters_reward[1])
    Llower_reward = Llower_reward.transpose([0, 2, 1])
    for i in range(len(Llower_reward)):
        cholupdate(Llower_reward[i], basis_reward[i].copy())
    Llower_reward = Llower_reward.transpose([0, 2, 1])
    Xy_reward += np.matmul(basis_reward[..., None, :].transpose([0, 2, 1]),
                           reward[..., None, :])

    return [Llower_state, Xy_state, Llower_reward, Xy_reward]
def cholup(R, x, sgn):
    u = x.copy()
    if sgn == '+':
        cholupdate(R, u)
    elif sgn == '-':
        choldowndate(R, u)
    return R
Beispiel #4
0
 def _insertOne(self, x):
     assert x.ndim <= 1
     assert x.size == self._dim
     self._count += 1
     self._n += 1
     self._k += 1
     self._mu = self._mu + (x - self._mu) / self._k
     choldate.cholupdate(self._U, np.copy(x))
Beispiel #5
0
 def test_update(self):
     V = numpy.dot(self.X.transpose(),self.X)
     R = numpy.linalg.cholesky(V).transpose()
     u = numpy.random.normal(size=R.shape[0])
     V1 = V + numpy.outer(u,u)
     R1 = numpy.linalg.cholesky(V1).transpose()
     R_ = R.copy()
     u_ = u.copy()
     cholupdate(R_,u_)
     self.assertAlmostEqual(numpy.max((numpy.abs(R_) - numpy.abs(R1))**2),0)
Beispiel #6
0
    def _update_hyperstate(self, X, y, update_hyperstate):
        if update_hyperstate:
            basis = _basis(X, self.random_matrix, self.bias, self.basis_dim, self.length_scale, self.signal_sd)

            self.Llower_tiled = self.Llower_tiled.transpose([0, 2, 1])
            assert len(self.Llower_tiled) == len(basis)
            for i in range(len(self.Llower_tiled)):
                cholupdate(self.Llower_tiled[i], basis[i].copy())
            self.Llower_tiled = self.Llower_tiled.transpose([0, 2, 1])

            self.Xy_tiled += np.matmul(basis[:, None, :].transpose([0, 2, 1]), y[:, None, :])
Beispiel #7
0
    def _downdate(self, Ktt, Yt, i, Ht=None, computeSigma2=True):
        if self.post is None:
            raise RuntimeError('you should call fit or autoFit before')

        n = Ktt.shape[0]
        T = numpy.r_[numpy.arange(i), numpy.arange(i + 1, n)]
        Yt1 = Yt[T]

        # Covariance
        Kti = Ktt[T, i]
        Kii = Ktt[i, i]

        # Cholsky downdates (cf Osborne2010 p216)
        RC = self.post.RC
        RC11 = RC[:i, :i]
        RC13 = RC[:i, i + 1:]
        S23 = RC[i, i + 1:].copy()
        S33 = RC[i + 1:, i + 1:].copy()
        cholupdate(S33, S23)  # inplace
        RC33 = S33
        RC1 = numpy.r_[numpy.c_[RC11, RC13], numpy.c_[numpy.zeros(RC13.T.shape), RC33]]

        if Ht is not None:
            Ht1 = Ht[T, :]
            Hi = Ht[i, :]

            RHCH1 = cholpsd(numpy.dot(Ht1.T, solve_chol(RC1, Ht1)))

            # System resolution(cf RasmussenWilliams2006 Ch2 p28 Eq2.42)
            Ri = Hi - numpy.dot(Ht1.T, solve_chol(RC1, Kti))
            bet = solve_chol(RHCH1, numpy.dot(Ht1.T, solve_chol(RC1, Yt1)))
            invCY = solve_chol(RC1, (Yt1 - numpy.dot(Ht1, bet)))
            mu = numpy.dot(Hi.T, bet) + numpy.dot(Kti.T, invCY)
        else:
            invCY = solve_chol(RC1, Yt1)
            mu = numpy.dot(Kti.T, invCY)
            bet = None

        # sigma2
        if computeSigma2:
            Vf = linalg.solve(RC1.T, Kti)
            covf = Kii - (Vf * Vf).sum(axis=0).reshape(-1, 1)

            if Ht is not None:
                Vb = linalg.solve(RHCH1.T, Ri)
                covb = (Vb * Vb).sum(axis=0).reshape(-1, 1)
                sigma2 = covb + covf
            else:
                sigma2 = covf
            return mu, sigma2
        else:
            return mu
Beispiel #8
0
    def update_cluster_params(mean, cov_chol, data_point, n_cluster):
        kappa_0 = cgs_utils.init_kappa_0()
        new_mean = (mean * (kappa_0 + n_cluster) +
                    data_point) / (kappa_0 + n_cluster + 1)

        u_vec = np.sqrt(
            (kappa_0 + n_cluster + 1) /
            (kappa_0 + n_cluster)) * (data_point - new_mean).astype(np.float64)
        current_cov_chol = cov_chol.astype(np.float64).T

        choldate.cholupdate(current_cov_chol, u_vec.copy())

        return new_mean.astype(np.float32), current_cov_chol.T.astype(
            np.float32)
    def _loss(self, thetas, X, Llower_state, XXtr_state, Xytr_state, hyperparameters_state):
        X = X.copy()
        Llower_state = Llower_state.copy()
        XXtr_state = XXtr_state.copy()
        Xytr_state = Xytr_state.copy()
        hyperparameters_state = hyperparameters_state.copy()
        rng_state = np.random.get_state()
        #try:
        np.random.seed(2)

        rewards = []
        state = X
        for unroll_step in range(self.unroll_steps):
            action = self._forward(thetas, state, hyperstate_params=[Llower_state, Xytr_state])
            state_action = np.concatenate([state, action], axis=-1)

            length_scale, signal_sd, noise_sd, prior_sd = hyperparameters_state
            basis_state = _basis(state_action, self.random_matrix_state, self.bias_state, self.basis_dim_state, length_scale, signal_sd)
            basis_state = basis_state[:, None, ...]
            mu, sigma = self._predict(Llower_state, Xytr_state, basis_state, noise_sd)
            state_ = mu + np.sqrt(sigma) * np.random.standard_normal(size=mu.shape)

            if self.learn_diff:
                state_tmp = state.copy()
                state = np.clip(state + state_, self.observation_space_low, self.observation_space_high)
                state_ = state - state_tmp
            else:
                state_ = np.clip(state_, self.observation_space_low, self.observation_space_high)
                state = state_.copy()

            reward = -self.env.loss_func(state)
            rewards.append((self.discount_factor**unroll_step)*reward)

            if self.update_hyperstate == 1 or self.policy_use_hyperstate == 1:
                #Update state hyperstate
                Llower_state = Llower_state.transpose([0, 2, 1])
                for i in range(len(Llower_state)):
                    cholupdate(Llower_state[i], basis_state[i, 0].copy())
                Llower_state = Llower_state.transpose([0, 2, 1])
                Xytr_state += np.matmul(basis_state.transpose([0, 2, 1]), state_[..., None, :])

        rewards = np.stack(rewards, axis=-1).sum(axis=-1)
        loss = -np.mean(rewards)
        np.random.set_state(rng_state)
        return loss
Beispiel #10
0
def choldate_calc_gram_chol(jac):
    """Calculate Cholesky factor of Jacobian Gram matrix using choldate.

    This is **only** valid for generators in which the Jacobian J has a
    block structure [A | B] where B is lower triangular (or diagonal). The
    product is equal to A.dot(A.T) + B.dot(B.T) and so if B is triangular
    and A.dot(A.T) is low-rank (A has more rows than columns) then the
    Cholesky of J.dot(J.T) can be more efficiently computed in this case
    by performing low-rank Cholesky updates of B by the columns of A.
    """
    if choldate_available:
        gram_chol = jac[:, -jac.shape[0]:].T * 1.
        for col in jac[:, :-jac.shape[0]].T:
            cholupdate(gram_chol, col.copy())
        return gram_chol, False
    else:
        logger.warn('choldate not installed falling back to SciPy cho_factor.')
        return scipy_calc_gram_chol(jac)
def UpdateFactor(factor, index, Z, theta_rbf, theta_band, K, X_var_d):

    # make sure we use upper matrix
    assert factor[1] == False

    #    Z_new = np.array(Z)
    #    Z_new[index, :] = pars
    K_new = kernelRBF(Z, theta_rbf, theta_band)
    K_new[np.diag_indices_from(K_new)] += X_var_d
    u = K_new[index, :] - K[index, :]
    u[index] = 1.
    cholupdate(factor[0], u.copy())
    u[index] = 0.
    choldowndate(factor[0], u)
    w = np.zeros_like(u)
    w[index] = 1.
    choldowndate(factor[0], w)

    return factor, K_new
Beispiel #12
0
    def step_simplex(self, state, randomization, logpdf):

        self.total_l1_part += 1
        lam = self.lagrange

        data, opt_vars = state
        simplex, cube = opt_vars

        if self.lagrange is None:
            raise NotImplementedError(
                "The bound form has not been implemented")

        nactive = simplex.shape[0]
        stepsize = 1.5 / np.sqrt(nactive)

        rand = randomization
        random_sample = rand.rvs(size=nactive)
        step = np.dot(self.chol_adapt, random_sample)
        proposal = np.fabs(simplex + step)

        log_ratio = (logpdf((data, (proposal, cube))) - logpdf(state))

        # update cholesky factor

        alpha = np.minimum(np.exp(log_ratio), 1)
        target = 2.4 / np.sqrt(nactive)
        multiplier = ((self.total_l1_part + 1)**(-0.8) *
                      (np.exp(log_ratio) - target))
        rank_one = np.sqrt(
            np.fabs(multiplier)) * step / np.linalg.norm(random_sample)

        if multiplier > 0:
            cholupdate(self.chol_adapt, rank_one)  # update done in place
        else:
            choldowndate(self.chol_adapt, rank_one)  # update done in place

        if np.log(np.random.uniform()) < log_ratio:
            simplex = proposal
            self.accept_l1_part += 1

        return simplex
    def step_simplex(self, state, randomization, logpdf):

        self.total_l1_part += 1
        lam = self.lagrange

        data, opt_vars = state
        simplex, cube = opt_vars

        if self.lagrange is None:
            raise NotImplementedError("The bound form has not been implemented")

        nactive = simplex.shape[0]
        stepsize = 1.5/np.sqrt(nactive)

        rand = randomization
        random_sample = rand.rvs(size=nactive)
        step = np.dot(self.chol_adapt, random_sample)
        proposal = np.fabs(simplex + step)

        log_ratio = (logpdf((data, (proposal, cube))) -
                     logpdf(state))

        # update cholesky factor
    
        alpha = np.minimum(np.exp(log_ratio), 1)
        target = 2.4 / np.sqrt(nactive)
        multiplier = ((self.total_l1_part+1)**(-0.8) * 
                      (np.exp(log_ratio) - target))
        rank_one = np.sqrt(np.fabs(multiplier)) * step / np.linalg.norm(random_sample)

        if multiplier > 0:
            cholupdate(self.chol_adapt, rank_one) # update done in place
        else:
            choldowndate(self.chol_adapt, rank_one) # update done in place

        if np.log(np.random.uniform()) < log_ratio:
            simplex = proposal
            self.accept_l1_part += 1
        
        return simplex
Beispiel #14
0
def chol_rank1_update(L, x):
    # choldate only uses float64
    cholupdate(L.T, x.copy())
Beispiel #15
0
def MaskSolve(A, b, w=5, progress=True, niter=None):
    '''
    Finds the solution `x` to the linear problem

        A x = b

    for all contiguous `w`-sized masks applied to
    the rows and columns of `A` and to the entries
    of `b`.

    Returns an array `X` of shape `(N - w + 1, N - w)`,
    where the `nth` row is the solution to the equation

        A[![n,n+w)] x = b[![n,n+w)]

    where ![n,n+w) indicates that indices in the range
    [n,n+w) have been masked.

    '''

    # Ensure we have choldate installed
    if cholupdate is None:
        log.info("Running the slow version of `MaskSolve`.")
        log.info("Install the `choldate` package for better performance.")
        log.info("https://github.com/rodluger/choldate")
        return MaskSolveSlow(A, b, w=w, progress=progress, niter=niter)

    # Number of data points
    N = b.shape[0]

    # How many iterations? Default is to go through
    # the entire dataset
    if niter is None:
        niter = N - w + 1

    # Our result matrix
    X = np.empty((niter, N - w))

    # Solve the first two steps explicitly.
    for n in range(2):
        mask = np.arange(n, w + n)
        A_ = np.delete(np.delete(A, mask, axis=0), mask, axis=1)
        b_ = np.delete(b, mask)
        U = cholesky(A_)
        X[n] = cho_solve((U, False), b_)

    # Iterate!
    for n in prange(1, niter - 1):

        # Update the data vector.
        b_[n] = b[n]

        # Remove a row.
        S33 = U[n + 1:, n + 1:]
        S23 = U[n, n + 1:]
        cholupdate(S33, S23)

        # Add a row.
        A12 = A[:n, n]
        A22 = A[n, n]
        A23 = A[n, n + w + 1:]
        S11 = U[:n, :n]
        S12 = solve_triangular(S11.T, A12, lower=True,
                               check_finite=False, trans=0, overwrite_b=True)
        S22 = np.sqrt(A22 - np.dot(S12.T, S12))
        S13 = U[:n, n + 1:]
        S23 = (A23 - np.dot(S12.T, S13)) / S22
        choldowndate(S33, np.array(S23))
        U[:n, n] = S12
        U[n, n] = S22
        U[n, n + 1:] = S23
        U[n + 1:, n + 1:] = S33

        # Now we can solve our linear equation
        X[n + 1] = cho_solve((U, False), b_)

    # Return the matrix
    return X
Beispiel #16
0
def solve_maxent(Sigma,
                 tol=1e-5,
                 verbose=False,
                 num_iter=10,
                 smoothing=0,
                 converge_tol=1e-4):
    """
	Uses a coordinate-descent algorithm to find the solution to the smoothed
	maximum entropy problem. 
	:param Sigma: p x p covariance matrix
	:param tol: Minimum eigenvalue of 2Sigma - S and S
	:param num_iter: Number of coordinate descent iterations
	:param verbose: if true, will give progress reports
	:param smoothing: computes smoothed maxent loss
	"""

    if smoothing > 0:
        raise NotImplementedError(f"Smoothing is not implemented yet")

    # Initial constants
    time0 = time.time()
    V = Sigma  # I'm too lazy to write Sigma out over and over
    p = V.shape[0]
    inds = np.arange(p)
    loss = np.inf

    # Initialize values
    decayed_improvement = 1
    S = np.linalg.eigh(V)[0].min() * np.eye(p)
    L = np.linalg.cholesky(2 * V - S)

    for i in range(num_iter):
        np.random.shuffle(inds)
        for j in inds:
            diff = 2 * V - S

            # Solve cholesky equation
            tildey = 2 * V[j].copy()
            tildey[j] = 0
            x = sp.linalg.solve_triangular(a=L, b=tildey, lower=True)

            # Use cholesky eq to get new update
            zeta = diff[j, j]
            x22 = np.power(x, 2).sum()
            qinvterm = zeta * x22 / (zeta + x22)

            # Inverse of Qj using SWM formula
            sjstar = (2 * V[j, j] - qinvterm) / 2

            # Rank one update for cholesky
            delta = S[j, j] - sjstar
            x = np.zeros(p)
            x[j] = np.sqrt(np.abs(delta))
            if delta > 0:
                choldate.cholupdate(L.T, x)
            else:
                choldate.choldowndate(L.T, x)

            # Set new value for S
            S[j, j] = sjstar

        # Check for convergence
        prev_loss = loss
        loss = maxent_loss(V, S, smoothing=smoothing)
        if i != 0:
            decayed_improvement = decayed_improvement / 10 + 9 * (prev_loss -
                                                                  loss) / 10
        if verbose:
            print(
                f"After iter {i} at time {np.around(time.time() - time0,3)}, loss={loss}, decayed_improvement={decayed_improvement}"
            )
        if decayed_improvement < converge_tol:
            if verbose:
                print(f"Converged after iteration {i} with loss={loss}")
            break

    # Ensure validity of solution
    S = utilities.shift_until_PSD(S, tol=tol)
    S, _ = utilities.scale_until_PSD(V, S, tol=tol, num_iter=10)
    return S
Beispiel #17
0
    def _loss(self,
              thetas,
              X,
              Llowers,
              XXtr,
              Xytr,
              A=[],
              hyperparameters=None,
              sess=None):
        rng_state = np.random.get_state()
        X = np.copy(X)
        Llowers = [np.copy(ele) for ele in Llowers]
        XXtr = [np.copy(ele) for ele in XXtr]
        Xytr = [np.copy(ele) for ele in Xytr]
        hyperparameters = [np.copy(ele) for ele in hyperparameters]
        try:
            np.random.seed(2)

            rewards = []
            state = X
            for unroll_step in xrange(self.unroll_steps):
                action = self._forward(thetas,
                                       state,
                                       hyperstate=[Llowers, Xytr])
                reward, basis_reward = self._reward(state, action, sess,
                                                    Llowers[-1], Xytr[-1],
                                                    hyperparameters[-1])
                rewards.append((self.discount_factor**unroll_step) * reward)
                state_action = np.concatenate([state, action], axis=-1)

                means = []
                covs = []
                bases = []
                for i in xrange(self.state_dim):
                    length_scale, signal_sd, noise_sd, prior_sd = hyperparameters[
                        i]
                    basis = _basis(state_action, self.random_matrices[i],
                                   self.biases[i], self.basis_dims[i],
                                   length_scale, signal_sd)
                    basis = np.expand_dims(basis, axis=1)
                    bases.append(basis)
                    pred_mu, pred_sigma = self._predict(
                        Llowers[i], Xytr[i], basis, noise_sd)
                    means.append(pred_mu)
                    covs.append(pred_sigma)
                means = np.concatenate(means, axis=-1)
                covs = np.concatenate(covs, axis=-1)

                bases.append(basis_reward)

                state_ = np.stack([
                    np.random.multivariate_normal(mean=mean, cov=np.diag(cov))
                    for mean, cov in zip(means, covs)
                ],
                                  axis=0)
                state = state + state_ if self.learn_diff else state_
                if self.learn_diff == 0:
                    state_ = np.clip(state_, self.observation_space_low,
                                     self.observation_space_high)
                state = np.clip(state, self.observation_space_low,
                                self.observation_space_high)

                #                #Removable
                #                import copy
                #                Llowers2 = copy.deepcopy(Llowers)
                #                Xytr2 = copy.deepcopy(Xytr)
                #                XXtr2 = copy.deepcopy(XXtr)
                #                #Removable -END-

                if self.update_hyperstate == 1 or self.policy_use_hyperstate == 1:
                    y = np.concatenate([state_, reward],
                                       axis=-1)[..., :self.state_dim +
                                                self.learn_reward]
                    y = y[..., np.newaxis, np.newaxis]
                    for i in xrange(self.state_dim + self.learn_reward):
                        Llowers[i] = Llowers[i].transpose([0, 2, 1])
                    for i in xrange(self.state_dim + self.learn_reward):
                        for j in xrange(len(Llowers[i])):
                            cholupdate(Llowers[i][j], bases[i][j, 0].copy())
                        Xytr[i] += np.matmul(bases[i].transpose([0, 2, 1]),
                                             y[:, i, ...])


#                        #Removable
#                        _, _, noise_sd, prior_sd = hyperparameters[i]
#                        XXtr2[i], Xytr2[i], Llowers2[i] = self._update_hyperstate(XXtr2[i], XXtr2[i] + np.matmul(np.transpose(bases[i], [0, 2, 1]), bases[i]), Xytr2[i], Xytr2[i] + np.matmul(np.transpose(bases[i], [0, 2, 1]), y[:, i, ...]), Llowers2[i], (noise_sd/prior_sd)**2)
#                        print i
#                        print np.allclose(Llowers[i], Llowers2[i].transpose([0, 2, 1]))
#                        print np.allclose(Xytr[i], Xytr2[i])
#                        #Removable -END-

                    for i in xrange(self.state_dim + self.learn_reward):
                        Llowers[i] = Llowers[i].transpose([0, 2, 1])

            rewards = np.concatenate(rewards, axis=-1)
            rewards = np.sum(rewards, axis=-1)
            loss = -np.mean(rewards)
            np.random.set_state(rng_state)
            return loss
        except Exception as e:
            np.random.set_state(rng_state)
            print e, 'Returning 10e100'
            return 10e100
Beispiel #18
0
def solve_mvr(Sigma,
              tol=1e-5,
              verbose=False,
              num_iter=10,
              smoothing=0,
              rej_rate=0,
              converge_tol=1):
    """
	Uses a coordinate-descent algorithm to find the solution to the smoothed
	MVR problem. 
	:param Sigma: p x p covariance matrix
	:param tol: Minimum eigenvalue of 2Sigma - S and S
	:param num_iter: Number of coordinate descent iterations
	:param rej_rate: Expected proportion of rejections for knockoffs under the
	metropolized knockoff sampling framework.
	:param verbose: if true, will give progress reports
	:param smoothing: computes smoothed mvr loss
	"""

    # Initial constants
    time0 = time.time()
    V = Sigma  # I'm too lazy to write Sigma out over and over
    p = V.shape[0]
    inds = np.arange(p)
    loss = np.inf
    acc_rate = 1 - rej_rate
    # Takes a bit longer for rej_rate adjusted to converge
    if acc_rate < 1:
        converge_tol = 1e-2

    # Initialize values
    decayed_improvement = 10
    min_eig = np.linalg.eigh(V)[0].min()
    S = min_eig * np.eye(p)
    L = np.linalg.cholesky(2 * V - S + smoothing * np.eye(p))

    for i in range(num_iter):
        np.random.shuffle(inds)
        for j in inds:
            # 1. Compute coefficients cn and cd
            ej = np.zeros(p)  # jth basis element
            ej[j] = 1
            # 1a. Compute cd
            vd = sp.linalg.solve_triangular(a=L, b=ej, lower=True)
            cd = np.power(vd, 2).sum()
            # 1b. Compute vn
            vn = sp.linalg.solve_triangular(a=L.T, b=vd, lower=False)
            cn = -1 * np.power(vn, 2).sum()

            # 2. Construct quadratic equation
            # We want to minimize 1/(sj + delta) - (delta * cn)/(1 - delta * cd)
            coef2 = -1 * cn - np.power(cd, 2)
            coef1 = 2 * (-1 * cn * (S[j, j] + smoothing) + cd)
            coef0 = -1 * cn * (S[j, j] + smoothing)**2 - 1
            orig_options = np.roots(np.array([coef2, coef1, coef0]))

            # 3. Eliminate complex solutions
            options = np.array(
                [delta for delta in orig_options if np.imag(delta) == 0])
            # Eliminate solutions which violate PSD-ness
            upper_bound = 1 / cd
            lower_bound = -1 * S[j, j]
            options = np.array([
                delta for delta in options
                if delta < upper_bound and delta > lower_bound
            ])
            if options.shape[0] == 0:
                raise RuntimeError(
                    f"All quadratic solutions ({orig_options}) were infeasible or imaginary"
                )

            # 4. If multiple solutions left (unlikely), pick the smaller one
            losses = 1 / (S[j, j] + options) - (options * cn) / (1 -
                                                                 options * cd)
            if losses[0] == losses.min():
                delta = options[0]
            else:
                delta = options[1]

            # 5. Account for rejections
            if acc_rate < 1:
                extra_space = min(min_eig, 0.02) / (
                    i + 2)  # Helps deal with coord desc
                opt_postrej_value = S[j, j] + delta
                opt_prerej_value = opt_postrej_value / (acc_rate)
                opt_prerej_value = min(S[j, j] + upper_bound - extra_space,
                                       max(opt_prerej_value, extra_space))
                delta = opt_prerej_value - S[j, j]

            # Update S and L
            x = np.zeros(p)
            x[j] = np.sqrt(np.abs(delta))
            if delta > 0:
                choldate.choldowndate(L.T, x)
            else:
                choldate.cholupdate(L.T, x)

            # Set new value for S
            S[j, j] += delta

        # Check for convergence
        prev_loss = loss
        loss = mvr_loss(V, acc_rate * S, smoothing=smoothing)
        if i != 0:
            decayed_improvement = decayed_improvement / 10 + 9 * (prev_loss -
                                                                  loss) / 10
        if verbose:
            print(
                f"After iter {i} at time {np.around(time.time() - time0,3)}, loss={loss}, decayed_improvement={decayed_improvement}"
            )
        if decayed_improvement < converge_tol:
            if verbose:
                print(f"Converged after iteration {i} with loss={loss}")
            break
    # Ensure validity of solution
    S = utilities.shift_until_PSD(S, tol=tol)
    S, _ = utilities.scale_until_PSD(V, S, tol=tol, num_iter=10)
    return S
Beispiel #19
0
Created on Feb 15, 2013

@author: jasonrudy
'''
from choldate import cholupdate, choldowndate
import numpy

#Create a random positive definite matrix, V
numpy.random.seed(1)
X = numpy.random.normal(size=(100,10))
V = numpy.dot(X.transpose(),X)

#Calculate the upper Cholesky factor, R
R = numpy.linalg.cholesky(V).transpose()

#Create a random update vector, u
u = numpy.random.normal(size=R.shape[0])

#Calculate the updated positive definite matrix, V1, and its Cholesky factor, R1
V1 = V + numpy.outer(u,u)
R1 = numpy.linalg.cholesky(V1).transpose()

#The following is equivalent to the above
R1_ = R.copy()
cholupdate(R1_,u.copy())
assert(numpy.all((R1 - R1_)**2 < 1e-16))

#And downdating is the inverse of updating
R_ = R1.copy()
choldowndate(R_,u.copy())
assert(numpy.all((R - R_)**2 < 1e-16))
dim = 150

X = np.random.normal(size=[batch_size, samples, dim])
M = np.matmul(X.transpose([0, 2, 1]), X) + np.tile(np.eye(X.shape[-1])[None, ...], [len(X), 1, 1])

u = np.random.normal(size=[batch_size, 1, dim])
U = np.matmul(u.transpose([0, 2, 1]), u)

MU = M + U

A = np.linalg.cholesky(M).transpose([0, 2, 1])

start = time.time()
B = np.linalg.cholesky(MU)
print time.time() - start
B = B.transpose([0, 2, 1])

start = time.time()
for i in range(batch_size):
    cholupdate(A[i], u[i, 0].copy())
print time.time() - start

start = time.time()
C = [spla.cholesky(MU[i]) for i in range(batch_size)]
print time.time() - start
C = np.stack(C, axis=0)

print np.allclose(A, B)
print np.allclose(A, C)
print np.allclose(B, C)
Beispiel #21
0
Created on Feb 15, 2013

@author: jasonrudy
'''
from choldate import cholupdate, choldowndate
import numpy

#Create a random positive definite matrix, V
numpy.random.seed(1)
X = numpy.random.normal(size=(100, 10))
V = numpy.dot(X.transpose(), X)

#Calculate the upper Cholesky factor, R
R = numpy.linalg.cholesky(V).transpose()

#Create a random update vector, u
u = numpy.random.normal(size=R.shape[0])

#Calculate the updated positive definite matrix, V1, and its Cholesky factor, R1
V1 = V + numpy.outer(u, u)
R1 = numpy.linalg.cholesky(V1).transpose()

#The following is equivalent to the above
R1_ = R.copy()
cholupdate(R1_, u.copy())
assert (numpy.all((R1 - R1_)**2 < 1e-16))

#And downdating is the inverse of updating
R_ = R1.copy()
choldowndate(R_, u.copy())
assert (numpy.all((R - R_)**2 < 1e-16))
Beispiel #22
0
def _solve_maxent_sdp_cd(
    Sigma,
    solve_sdp,
    tol=1e-5,
    verbose=False,
    num_iter=50,
    converge_tol=1e-4,
    choldate_warning=True,
    mu=0.9,
    lambd=0.5,
):
    """
    This function is internally used to compute the S-matrices
    used to generate maximum entropy and SDP knockoffs. Users
    should not call this function---they should call ``solve_maxent``
    or ``solve_sdp`` directly.

    Parameters
    ----------
    Sigma : np.ndarray
        ``(p, p)``-shaped covariance matrix of X
    tol : float
        Minimum permissible eigenvalue of 2Sigma - S and S.
    verbose : bool
        If True, prints updates during optimization.
    num_iter : int
        The number of coordinate descent iterations. Defaults to 50.
    converge_tol : float
        A parameter specifying the criteria for convergence.
    choldate_warning : bool
        If True, will warn the user if choldate is not installed. 
        Defaults to True
    solve_sdp : bool
        If True, will solve SDP. Otherwise, will solve maxent formulation.
    lambd : float
        Initial barrier constant
    mu : float
        Barrier decay constant

    Returns
    -------
    S : np.ndarray
        ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs
    """

    # Warning if choldate not available
    if not CHOLDATE_AVAILABLE and choldate_warning:
        warnings.warn(constants.CHOLDATE_WARNING)

    # Initial constants
    time0 = time.time()
    V = Sigma  # Shorthand prevents lines from spilling over
    p = V.shape[0]
    inds = np.arange(p)
    loss = np.inf

    # Initialize values
    decayed_improvement = 1
    mineig = np.linalg.eigh(V)[0].min()
    if solve_sdp:
        S = 0.01 * mineig * np.eye(p)
    else:
        S = mineig * np.eye(p)
    L = np.linalg.cholesky(2 * V - S)
    lambd = min(2 * mineig, lambd)

    # Loss function
    if solve_sdp:
        loss_fn = lambda V, S: S.shape[0] - np.diag(S).sum()
    else:
        loss_fn = maxent_loss

    for i in range(num_iter):
        np.random.shuffle(inds)
        for j in inds:
            diff = 2 * V - S

            # Solve cholesky equation
            tildey = 2 * V[j].copy()
            tildey[j] = 0
            x = sp.linalg.solve_triangular(a=L, b=tildey, lower=True)

            # Use cholesky eq to get new update
            zeta = diff[j, j]
            x22 = np.power(x, 2).sum()
            qinvterm = zeta * x22 / (zeta + x22)

            # Inverse of Qj using SWM formula
            if solve_sdp:
                sjstar = max(min(1, 2 * V[j, j] - qinvterm - lambd), 0)
            else:
                sjstar = (2 * V[j, j] - qinvterm) / 2

            # Rank one update for cholesky
            delta = S[j, j] - sjstar
            x = np.zeros(p)
            x[j] = np.sqrt(np.abs(delta))
            if delta > 0:
                if CHOLDATE_AVAILABLE:
                    choldate.cholupdate(L.T, x)
                else:
                    cholupdate(L.T, x, add=False)
            else:
                if CHOLDATE_AVAILABLE:
                    choldate.choldowndate(L.T, x)
                else:
                    cholupdate(L.T, x, add=True)

            # Set new value for S
            S[j, j] = sjstar

        # Check for convergence
        prev_loss = loss
        loss = loss_fn(V, S)
        if i != 0:
            loss_diff = prev_loss - loss
            if solve_sdp:
                loss_diff = max(loss_diff, lambd)
            decayed_improvement = decayed_improvement / 10 + 9 * (
                loss_diff) / 10
        if verbose:
            print(
                f"After iter {i} at time {np.around(time.time() - time0,3)}, loss={loss}, decayed_improvement={decayed_improvement}"
            )
        if decayed_improvement < converge_tol:
            if verbose:
                print(f"Converged after iteration {i} with loss={loss}")
            break

        # Update barrier parameter if solving SDP
        if solve_sdp:
            lambd = mu * lambd

    # Ensure validity of solution
    S = utilities.shift_until_PSD(S, tol=tol)
    S, _ = utilities.scale_until_PSD(V, S, tol=tol, num_iter=10)
    return S
Beispiel #23
0
def _solve_mvr_ungrouped(
    Sigma,
    tol=1e-5,
    verbose=False,
    num_iter=50,
    smoothing=0,
    rej_rate=0,
    converge_tol=1e-2,
    choldate_warning=True,
):
    """
    Computes S-matrix used to generate minimum variance-based
    reconstructability knockoffs using coordinate descent.

    Parameters
    ----------
    Sigma : np.ndarray
        ``(p, p)``-shaped covariance matrix of X
    tol : float
        Minimum permissible eigenvalue of 2Sigma - S and S.
    verbose : bool
        If True, prints updates during optimization.
    num_iter : int
        The number of coordinate descent iterations. Defaults to 50.
    smoothing : float
        Add ``smoothing`` to all eigenvalues of the feature-knockoff
        precision matrix before inverting to avoid numerical
        instability. Defaults to 0.
    converge_tol : float
        A parameter specifying the criteria for convergence.
    choldate_warning : bool
        If True, will warn the user if choldate is not installed. 
        Defaults to True.

    Returns
    -------
    S : np.ndarray
        ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs
    """

    # Warning if choldate not available
    if not CHOLDATE_AVAILABLE and choldate_warning:
        warnings.warn(constants.CHOLDATE_WARNING)

    # Initial constants
    time0 = time.time()
    V = Sigma  # Shorthand prevents lines from spilling over
    p = V.shape[0]
    inds = np.arange(p)
    loss = np.inf
    acc_rate = 1 - rej_rate

    # Initialize values
    decayed_improvement = 10
    min_eig = np.linalg.eigh(V)[0].min()
    S = min_eig * np.eye(p)
    L = np.linalg.cholesky(2 * V - S + smoothing * np.eye(p))

    for i in range(num_iter):
        np.random.shuffle(inds)
        for j in inds:
            # 1. Compute coefficients cn and cd
            ej = np.zeros(p)  # jth basis element
            ej[j] = 1
            # 1a. Compute cd
            vd = sp.linalg.solve_triangular(a=L, b=ej, lower=True)
            cd = np.power(vd, 2).sum()
            # 1b. Compute vn
            vn = sp.linalg.solve_triangular(a=L.T, b=vd, lower=False)
            cn = -1 * np.power(vn, 2).sum()

            # 2. Construct/solve quadratic equation
            delta = _solve_mvr_quadratic(
                cn=cn,
                cd=cd,
                sj=S[j, j],
                min_eig=min_eig,
                i=i,
                smoothing=smoothing,
                acc_rate=acc_rate,
            )

            # 3. Update S and L
            x = np.zeros(p)
            x[j] = np.sqrt(np.abs(delta))
            if delta > 0:
                if CHOLDATE_AVAILABLE:
                    choldate.choldowndate(L.T, x)
                else:
                    cholupdate(L.T, x, add=False)
            else:
                if CHOLDATE_AVAILABLE:
                    choldate.cholupdate(L.T, x)
                else:
                    cholupdate(L.T, x, add=True)

            # Set new value for S
            S[j, j] += delta

        # Check for convergence
        prev_loss = loss
        loss = mvr_loss(V, acc_rate * S, smoothing=smoothing)
        if i != 0:
            decayed_improvement = decayed_improvement / 10 + 9 * (prev_loss -
                                                                  loss) / 10
        if verbose:
            print(
                f"After iter {i} at time {np.around(time.time() - time0,3)}, loss={loss}, decayed_improvement={decayed_improvement}"
            )
        if decayed_improvement < converge_tol:
            if verbose:
                print(f"Converged after iteration {i} with loss={loss}")
            break

    return S