def update_hyperstate(agent, hyperstate, hyperparameters, datum, dim, learn_diff): state, action, reward, next_state, _ = [ np.atleast_2d(np.copy(dat)) for dat in datum ] Llowers, Xy = [list(ele) for ele in hyperstate] assert len(Llowers) == len(hyperparameters) assert len(Xy) == len(hyperparameters) assert len(hyperparameters) == dim state_action = np.concatenate([state, action], axis=-1) y = np.concatenate( [next_state - state if learn_diff else next_state, reward], axis=-1)[..., :dim] for i in range(len(Llowers)): Llowers[i] = Llowers[i].transpose([0, 2, 1]) for i, hp in zip(range(dim), hyperparameters): length_scale, signal_sd, noise_sd, prior_sd = hp basis = _basis(state_action, agent.random_matrices[i], agent.biases[i], agent.basis_dims[i], length_scale, signal_sd) cholupdate(Llowers[i][0], basis[0].copy()) Xy[i] += np.matmul(basis[:, None, :].transpose([0, 2, 1]), y[:, None, :][..., i:i + 1]) for i in range(len(Llowers)): Llowers[i] = Llowers[i].transpose([0, 2, 1]) return [Llowers, Xy]
def update_hyperstate(agent, hyperstate_params, hyperparameters_state, hyperparameters_reward, datum, learn_diff): state, action, reward, next_state, _ = [ np.atleast_2d(np.copy(dat)) for dat in datum ] Llower_state, Xy_state, Llower_reward, Xy_reward = hyperstate_params state_action = np.concatenate([state, action], axis=-1) state_ = next_state - state if learn_diff else next_state basis_state = _basis(state_action, agent.random_matrix_state, agent.bias_state, agent.basis_dim_state, hyperparameters_state[0], hyperparameters_state[1]) Llower_state = Llower_state.transpose([0, 2, 1]) for i in range(len(Llower_state)): cholupdate(Llower_state[i], basis_state[i].copy()) Llower_state = Llower_state.transpose([0, 2, 1]) Xy_state += np.matmul(basis_state[..., None, :].transpose([0, 2, 1]), state_[..., None, :]) basis_reward = _basis(state_action, agent.random_matrix_reward, agent.bias_reward, agent.basis_dim_reward, hyperparameters_reward[0], hyperparameters_reward[1]) Llower_reward = Llower_reward.transpose([0, 2, 1]) for i in range(len(Llower_reward)): cholupdate(Llower_reward[i], basis_reward[i].copy()) Llower_reward = Llower_reward.transpose([0, 2, 1]) Xy_reward += np.matmul(basis_reward[..., None, :].transpose([0, 2, 1]), reward[..., None, :]) return [Llower_state, Xy_state, Llower_reward, Xy_reward]
def cholup(R, x, sgn): u = x.copy() if sgn == '+': cholupdate(R, u) elif sgn == '-': choldowndate(R, u) return R
def _insertOne(self, x): assert x.ndim <= 1 assert x.size == self._dim self._count += 1 self._n += 1 self._k += 1 self._mu = self._mu + (x - self._mu) / self._k choldate.cholupdate(self._U, np.copy(x))
def test_update(self): V = numpy.dot(self.X.transpose(),self.X) R = numpy.linalg.cholesky(V).transpose() u = numpy.random.normal(size=R.shape[0]) V1 = V + numpy.outer(u,u) R1 = numpy.linalg.cholesky(V1).transpose() R_ = R.copy() u_ = u.copy() cholupdate(R_,u_) self.assertAlmostEqual(numpy.max((numpy.abs(R_) - numpy.abs(R1))**2),0)
def _update_hyperstate(self, X, y, update_hyperstate): if update_hyperstate: basis = _basis(X, self.random_matrix, self.bias, self.basis_dim, self.length_scale, self.signal_sd) self.Llower_tiled = self.Llower_tiled.transpose([0, 2, 1]) assert len(self.Llower_tiled) == len(basis) for i in range(len(self.Llower_tiled)): cholupdate(self.Llower_tiled[i], basis[i].copy()) self.Llower_tiled = self.Llower_tiled.transpose([0, 2, 1]) self.Xy_tiled += np.matmul(basis[:, None, :].transpose([0, 2, 1]), y[:, None, :])
def _downdate(self, Ktt, Yt, i, Ht=None, computeSigma2=True): if self.post is None: raise RuntimeError('you should call fit or autoFit before') n = Ktt.shape[0] T = numpy.r_[numpy.arange(i), numpy.arange(i + 1, n)] Yt1 = Yt[T] # Covariance Kti = Ktt[T, i] Kii = Ktt[i, i] # Cholsky downdates (cf Osborne2010 p216) RC = self.post.RC RC11 = RC[:i, :i] RC13 = RC[:i, i + 1:] S23 = RC[i, i + 1:].copy() S33 = RC[i + 1:, i + 1:].copy() cholupdate(S33, S23) # inplace RC33 = S33 RC1 = numpy.r_[numpy.c_[RC11, RC13], numpy.c_[numpy.zeros(RC13.T.shape), RC33]] if Ht is not None: Ht1 = Ht[T, :] Hi = Ht[i, :] RHCH1 = cholpsd(numpy.dot(Ht1.T, solve_chol(RC1, Ht1))) # System resolution(cf RasmussenWilliams2006 Ch2 p28 Eq2.42) Ri = Hi - numpy.dot(Ht1.T, solve_chol(RC1, Kti)) bet = solve_chol(RHCH1, numpy.dot(Ht1.T, solve_chol(RC1, Yt1))) invCY = solve_chol(RC1, (Yt1 - numpy.dot(Ht1, bet))) mu = numpy.dot(Hi.T, bet) + numpy.dot(Kti.T, invCY) else: invCY = solve_chol(RC1, Yt1) mu = numpy.dot(Kti.T, invCY) bet = None # sigma2 if computeSigma2: Vf = linalg.solve(RC1.T, Kti) covf = Kii - (Vf * Vf).sum(axis=0).reshape(-1, 1) if Ht is not None: Vb = linalg.solve(RHCH1.T, Ri) covb = (Vb * Vb).sum(axis=0).reshape(-1, 1) sigma2 = covb + covf else: sigma2 = covf return mu, sigma2 else: return mu
def update_cluster_params(mean, cov_chol, data_point, n_cluster): kappa_0 = cgs_utils.init_kappa_0() new_mean = (mean * (kappa_0 + n_cluster) + data_point) / (kappa_0 + n_cluster + 1) u_vec = np.sqrt( (kappa_0 + n_cluster + 1) / (kappa_0 + n_cluster)) * (data_point - new_mean).astype(np.float64) current_cov_chol = cov_chol.astype(np.float64).T choldate.cholupdate(current_cov_chol, u_vec.copy()) return new_mean.astype(np.float32), current_cov_chol.T.astype( np.float32)
def _loss(self, thetas, X, Llower_state, XXtr_state, Xytr_state, hyperparameters_state): X = X.copy() Llower_state = Llower_state.copy() XXtr_state = XXtr_state.copy() Xytr_state = Xytr_state.copy() hyperparameters_state = hyperparameters_state.copy() rng_state = np.random.get_state() #try: np.random.seed(2) rewards = [] state = X for unroll_step in range(self.unroll_steps): action = self._forward(thetas, state, hyperstate_params=[Llower_state, Xytr_state]) state_action = np.concatenate([state, action], axis=-1) length_scale, signal_sd, noise_sd, prior_sd = hyperparameters_state basis_state = _basis(state_action, self.random_matrix_state, self.bias_state, self.basis_dim_state, length_scale, signal_sd) basis_state = basis_state[:, None, ...] mu, sigma = self._predict(Llower_state, Xytr_state, basis_state, noise_sd) state_ = mu + np.sqrt(sigma) * np.random.standard_normal(size=mu.shape) if self.learn_diff: state_tmp = state.copy() state = np.clip(state + state_, self.observation_space_low, self.observation_space_high) state_ = state - state_tmp else: state_ = np.clip(state_, self.observation_space_low, self.observation_space_high) state = state_.copy() reward = -self.env.loss_func(state) rewards.append((self.discount_factor**unroll_step)*reward) if self.update_hyperstate == 1 or self.policy_use_hyperstate == 1: #Update state hyperstate Llower_state = Llower_state.transpose([0, 2, 1]) for i in range(len(Llower_state)): cholupdate(Llower_state[i], basis_state[i, 0].copy()) Llower_state = Llower_state.transpose([0, 2, 1]) Xytr_state += np.matmul(basis_state.transpose([0, 2, 1]), state_[..., None, :]) rewards = np.stack(rewards, axis=-1).sum(axis=-1) loss = -np.mean(rewards) np.random.set_state(rng_state) return loss
def choldate_calc_gram_chol(jac): """Calculate Cholesky factor of Jacobian Gram matrix using choldate. This is **only** valid for generators in which the Jacobian J has a block structure [A | B] where B is lower triangular (or diagonal). The product is equal to A.dot(A.T) + B.dot(B.T) and so if B is triangular and A.dot(A.T) is low-rank (A has more rows than columns) then the Cholesky of J.dot(J.T) can be more efficiently computed in this case by performing low-rank Cholesky updates of B by the columns of A. """ if choldate_available: gram_chol = jac[:, -jac.shape[0]:].T * 1. for col in jac[:, :-jac.shape[0]].T: cholupdate(gram_chol, col.copy()) return gram_chol, False else: logger.warn('choldate not installed falling back to SciPy cho_factor.') return scipy_calc_gram_chol(jac)
def UpdateFactor(factor, index, Z, theta_rbf, theta_band, K, X_var_d): # make sure we use upper matrix assert factor[1] == False # Z_new = np.array(Z) # Z_new[index, :] = pars K_new = kernelRBF(Z, theta_rbf, theta_band) K_new[np.diag_indices_from(K_new)] += X_var_d u = K_new[index, :] - K[index, :] u[index] = 1. cholupdate(factor[0], u.copy()) u[index] = 0. choldowndate(factor[0], u) w = np.zeros_like(u) w[index] = 1. choldowndate(factor[0], w) return factor, K_new
def step_simplex(self, state, randomization, logpdf): self.total_l1_part += 1 lam = self.lagrange data, opt_vars = state simplex, cube = opt_vars if self.lagrange is None: raise NotImplementedError( "The bound form has not been implemented") nactive = simplex.shape[0] stepsize = 1.5 / np.sqrt(nactive) rand = randomization random_sample = rand.rvs(size=nactive) step = np.dot(self.chol_adapt, random_sample) proposal = np.fabs(simplex + step) log_ratio = (logpdf((data, (proposal, cube))) - logpdf(state)) # update cholesky factor alpha = np.minimum(np.exp(log_ratio), 1) target = 2.4 / np.sqrt(nactive) multiplier = ((self.total_l1_part + 1)**(-0.8) * (np.exp(log_ratio) - target)) rank_one = np.sqrt( np.fabs(multiplier)) * step / np.linalg.norm(random_sample) if multiplier > 0: cholupdate(self.chol_adapt, rank_one) # update done in place else: choldowndate(self.chol_adapt, rank_one) # update done in place if np.log(np.random.uniform()) < log_ratio: simplex = proposal self.accept_l1_part += 1 return simplex
def step_simplex(self, state, randomization, logpdf): self.total_l1_part += 1 lam = self.lagrange data, opt_vars = state simplex, cube = opt_vars if self.lagrange is None: raise NotImplementedError("The bound form has not been implemented") nactive = simplex.shape[0] stepsize = 1.5/np.sqrt(nactive) rand = randomization random_sample = rand.rvs(size=nactive) step = np.dot(self.chol_adapt, random_sample) proposal = np.fabs(simplex + step) log_ratio = (logpdf((data, (proposal, cube))) - logpdf(state)) # update cholesky factor alpha = np.minimum(np.exp(log_ratio), 1) target = 2.4 / np.sqrt(nactive) multiplier = ((self.total_l1_part+1)**(-0.8) * (np.exp(log_ratio) - target)) rank_one = np.sqrt(np.fabs(multiplier)) * step / np.linalg.norm(random_sample) if multiplier > 0: cholupdate(self.chol_adapt, rank_one) # update done in place else: choldowndate(self.chol_adapt, rank_one) # update done in place if np.log(np.random.uniform()) < log_ratio: simplex = proposal self.accept_l1_part += 1 return simplex
def chol_rank1_update(L, x): # choldate only uses float64 cholupdate(L.T, x.copy())
def MaskSolve(A, b, w=5, progress=True, niter=None): ''' Finds the solution `x` to the linear problem A x = b for all contiguous `w`-sized masks applied to the rows and columns of `A` and to the entries of `b`. Returns an array `X` of shape `(N - w + 1, N - w)`, where the `nth` row is the solution to the equation A[![n,n+w)] x = b[![n,n+w)] where ![n,n+w) indicates that indices in the range [n,n+w) have been masked. ''' # Ensure we have choldate installed if cholupdate is None: log.info("Running the slow version of `MaskSolve`.") log.info("Install the `choldate` package for better performance.") log.info("https://github.com/rodluger/choldate") return MaskSolveSlow(A, b, w=w, progress=progress, niter=niter) # Number of data points N = b.shape[0] # How many iterations? Default is to go through # the entire dataset if niter is None: niter = N - w + 1 # Our result matrix X = np.empty((niter, N - w)) # Solve the first two steps explicitly. for n in range(2): mask = np.arange(n, w + n) A_ = np.delete(np.delete(A, mask, axis=0), mask, axis=1) b_ = np.delete(b, mask) U = cholesky(A_) X[n] = cho_solve((U, False), b_) # Iterate! for n in prange(1, niter - 1): # Update the data vector. b_[n] = b[n] # Remove a row. S33 = U[n + 1:, n + 1:] S23 = U[n, n + 1:] cholupdate(S33, S23) # Add a row. A12 = A[:n, n] A22 = A[n, n] A23 = A[n, n + w + 1:] S11 = U[:n, :n] S12 = solve_triangular(S11.T, A12, lower=True, check_finite=False, trans=0, overwrite_b=True) S22 = np.sqrt(A22 - np.dot(S12.T, S12)) S13 = U[:n, n + 1:] S23 = (A23 - np.dot(S12.T, S13)) / S22 choldowndate(S33, np.array(S23)) U[:n, n] = S12 U[n, n] = S22 U[n, n + 1:] = S23 U[n + 1:, n + 1:] = S33 # Now we can solve our linear equation X[n + 1] = cho_solve((U, False), b_) # Return the matrix return X
def solve_maxent(Sigma, tol=1e-5, verbose=False, num_iter=10, smoothing=0, converge_tol=1e-4): """ Uses a coordinate-descent algorithm to find the solution to the smoothed maximum entropy problem. :param Sigma: p x p covariance matrix :param tol: Minimum eigenvalue of 2Sigma - S and S :param num_iter: Number of coordinate descent iterations :param verbose: if true, will give progress reports :param smoothing: computes smoothed maxent loss """ if smoothing > 0: raise NotImplementedError(f"Smoothing is not implemented yet") # Initial constants time0 = time.time() V = Sigma # I'm too lazy to write Sigma out over and over p = V.shape[0] inds = np.arange(p) loss = np.inf # Initialize values decayed_improvement = 1 S = np.linalg.eigh(V)[0].min() * np.eye(p) L = np.linalg.cholesky(2 * V - S) for i in range(num_iter): np.random.shuffle(inds) for j in inds: diff = 2 * V - S # Solve cholesky equation tildey = 2 * V[j].copy() tildey[j] = 0 x = sp.linalg.solve_triangular(a=L, b=tildey, lower=True) # Use cholesky eq to get new update zeta = diff[j, j] x22 = np.power(x, 2).sum() qinvterm = zeta * x22 / (zeta + x22) # Inverse of Qj using SWM formula sjstar = (2 * V[j, j] - qinvterm) / 2 # Rank one update for cholesky delta = S[j, j] - sjstar x = np.zeros(p) x[j] = np.sqrt(np.abs(delta)) if delta > 0: choldate.cholupdate(L.T, x) else: choldate.choldowndate(L.T, x) # Set new value for S S[j, j] = sjstar # Check for convergence prev_loss = loss loss = maxent_loss(V, S, smoothing=smoothing) if i != 0: decayed_improvement = decayed_improvement / 10 + 9 * (prev_loss - loss) / 10 if verbose: print( f"After iter {i} at time {np.around(time.time() - time0,3)}, loss={loss}, decayed_improvement={decayed_improvement}" ) if decayed_improvement < converge_tol: if verbose: print(f"Converged after iteration {i} with loss={loss}") break # Ensure validity of solution S = utilities.shift_until_PSD(S, tol=tol) S, _ = utilities.scale_until_PSD(V, S, tol=tol, num_iter=10) return S
def _loss(self, thetas, X, Llowers, XXtr, Xytr, A=[], hyperparameters=None, sess=None): rng_state = np.random.get_state() X = np.copy(X) Llowers = [np.copy(ele) for ele in Llowers] XXtr = [np.copy(ele) for ele in XXtr] Xytr = [np.copy(ele) for ele in Xytr] hyperparameters = [np.copy(ele) for ele in hyperparameters] try: np.random.seed(2) rewards = [] state = X for unroll_step in xrange(self.unroll_steps): action = self._forward(thetas, state, hyperstate=[Llowers, Xytr]) reward, basis_reward = self._reward(state, action, sess, Llowers[-1], Xytr[-1], hyperparameters[-1]) rewards.append((self.discount_factor**unroll_step) * reward) state_action = np.concatenate([state, action], axis=-1) means = [] covs = [] bases = [] for i in xrange(self.state_dim): length_scale, signal_sd, noise_sd, prior_sd = hyperparameters[ i] basis = _basis(state_action, self.random_matrices[i], self.biases[i], self.basis_dims[i], length_scale, signal_sd) basis = np.expand_dims(basis, axis=1) bases.append(basis) pred_mu, pred_sigma = self._predict( Llowers[i], Xytr[i], basis, noise_sd) means.append(pred_mu) covs.append(pred_sigma) means = np.concatenate(means, axis=-1) covs = np.concatenate(covs, axis=-1) bases.append(basis_reward) state_ = np.stack([ np.random.multivariate_normal(mean=mean, cov=np.diag(cov)) for mean, cov in zip(means, covs) ], axis=0) state = state + state_ if self.learn_diff else state_ if self.learn_diff == 0: state_ = np.clip(state_, self.observation_space_low, self.observation_space_high) state = np.clip(state, self.observation_space_low, self.observation_space_high) # #Removable # import copy # Llowers2 = copy.deepcopy(Llowers) # Xytr2 = copy.deepcopy(Xytr) # XXtr2 = copy.deepcopy(XXtr) # #Removable -END- if self.update_hyperstate == 1 or self.policy_use_hyperstate == 1: y = np.concatenate([state_, reward], axis=-1)[..., :self.state_dim + self.learn_reward] y = y[..., np.newaxis, np.newaxis] for i in xrange(self.state_dim + self.learn_reward): Llowers[i] = Llowers[i].transpose([0, 2, 1]) for i in xrange(self.state_dim + self.learn_reward): for j in xrange(len(Llowers[i])): cholupdate(Llowers[i][j], bases[i][j, 0].copy()) Xytr[i] += np.matmul(bases[i].transpose([0, 2, 1]), y[:, i, ...]) # #Removable # _, _, noise_sd, prior_sd = hyperparameters[i] # XXtr2[i], Xytr2[i], Llowers2[i] = self._update_hyperstate(XXtr2[i], XXtr2[i] + np.matmul(np.transpose(bases[i], [0, 2, 1]), bases[i]), Xytr2[i], Xytr2[i] + np.matmul(np.transpose(bases[i], [0, 2, 1]), y[:, i, ...]), Llowers2[i], (noise_sd/prior_sd)**2) # print i # print np.allclose(Llowers[i], Llowers2[i].transpose([0, 2, 1])) # print np.allclose(Xytr[i], Xytr2[i]) # #Removable -END- for i in xrange(self.state_dim + self.learn_reward): Llowers[i] = Llowers[i].transpose([0, 2, 1]) rewards = np.concatenate(rewards, axis=-1) rewards = np.sum(rewards, axis=-1) loss = -np.mean(rewards) np.random.set_state(rng_state) return loss except Exception as e: np.random.set_state(rng_state) print e, 'Returning 10e100' return 10e100
def solve_mvr(Sigma, tol=1e-5, verbose=False, num_iter=10, smoothing=0, rej_rate=0, converge_tol=1): """ Uses a coordinate-descent algorithm to find the solution to the smoothed MVR problem. :param Sigma: p x p covariance matrix :param tol: Minimum eigenvalue of 2Sigma - S and S :param num_iter: Number of coordinate descent iterations :param rej_rate: Expected proportion of rejections for knockoffs under the metropolized knockoff sampling framework. :param verbose: if true, will give progress reports :param smoothing: computes smoothed mvr loss """ # Initial constants time0 = time.time() V = Sigma # I'm too lazy to write Sigma out over and over p = V.shape[0] inds = np.arange(p) loss = np.inf acc_rate = 1 - rej_rate # Takes a bit longer for rej_rate adjusted to converge if acc_rate < 1: converge_tol = 1e-2 # Initialize values decayed_improvement = 10 min_eig = np.linalg.eigh(V)[0].min() S = min_eig * np.eye(p) L = np.linalg.cholesky(2 * V - S + smoothing * np.eye(p)) for i in range(num_iter): np.random.shuffle(inds) for j in inds: # 1. Compute coefficients cn and cd ej = np.zeros(p) # jth basis element ej[j] = 1 # 1a. Compute cd vd = sp.linalg.solve_triangular(a=L, b=ej, lower=True) cd = np.power(vd, 2).sum() # 1b. Compute vn vn = sp.linalg.solve_triangular(a=L.T, b=vd, lower=False) cn = -1 * np.power(vn, 2).sum() # 2. Construct quadratic equation # We want to minimize 1/(sj + delta) - (delta * cn)/(1 - delta * cd) coef2 = -1 * cn - np.power(cd, 2) coef1 = 2 * (-1 * cn * (S[j, j] + smoothing) + cd) coef0 = -1 * cn * (S[j, j] + smoothing)**2 - 1 orig_options = np.roots(np.array([coef2, coef1, coef0])) # 3. Eliminate complex solutions options = np.array( [delta for delta in orig_options if np.imag(delta) == 0]) # Eliminate solutions which violate PSD-ness upper_bound = 1 / cd lower_bound = -1 * S[j, j] options = np.array([ delta for delta in options if delta < upper_bound and delta > lower_bound ]) if options.shape[0] == 0: raise RuntimeError( f"All quadratic solutions ({orig_options}) were infeasible or imaginary" ) # 4. If multiple solutions left (unlikely), pick the smaller one losses = 1 / (S[j, j] + options) - (options * cn) / (1 - options * cd) if losses[0] == losses.min(): delta = options[0] else: delta = options[1] # 5. Account for rejections if acc_rate < 1: extra_space = min(min_eig, 0.02) / ( i + 2) # Helps deal with coord desc opt_postrej_value = S[j, j] + delta opt_prerej_value = opt_postrej_value / (acc_rate) opt_prerej_value = min(S[j, j] + upper_bound - extra_space, max(opt_prerej_value, extra_space)) delta = opt_prerej_value - S[j, j] # Update S and L x = np.zeros(p) x[j] = np.sqrt(np.abs(delta)) if delta > 0: choldate.choldowndate(L.T, x) else: choldate.cholupdate(L.T, x) # Set new value for S S[j, j] += delta # Check for convergence prev_loss = loss loss = mvr_loss(V, acc_rate * S, smoothing=smoothing) if i != 0: decayed_improvement = decayed_improvement / 10 + 9 * (prev_loss - loss) / 10 if verbose: print( f"After iter {i} at time {np.around(time.time() - time0,3)}, loss={loss}, decayed_improvement={decayed_improvement}" ) if decayed_improvement < converge_tol: if verbose: print(f"Converged after iteration {i} with loss={loss}") break # Ensure validity of solution S = utilities.shift_until_PSD(S, tol=tol) S, _ = utilities.scale_until_PSD(V, S, tol=tol, num_iter=10) return S
Created on Feb 15, 2013 @author: jasonrudy ''' from choldate import cholupdate, choldowndate import numpy #Create a random positive definite matrix, V numpy.random.seed(1) X = numpy.random.normal(size=(100,10)) V = numpy.dot(X.transpose(),X) #Calculate the upper Cholesky factor, R R = numpy.linalg.cholesky(V).transpose() #Create a random update vector, u u = numpy.random.normal(size=R.shape[0]) #Calculate the updated positive definite matrix, V1, and its Cholesky factor, R1 V1 = V + numpy.outer(u,u) R1 = numpy.linalg.cholesky(V1).transpose() #The following is equivalent to the above R1_ = R.copy() cholupdate(R1_,u.copy()) assert(numpy.all((R1 - R1_)**2 < 1e-16)) #And downdating is the inverse of updating R_ = R1.copy() choldowndate(R_,u.copy()) assert(numpy.all((R - R_)**2 < 1e-16))
dim = 150 X = np.random.normal(size=[batch_size, samples, dim]) M = np.matmul(X.transpose([0, 2, 1]), X) + np.tile(np.eye(X.shape[-1])[None, ...], [len(X), 1, 1]) u = np.random.normal(size=[batch_size, 1, dim]) U = np.matmul(u.transpose([0, 2, 1]), u) MU = M + U A = np.linalg.cholesky(M).transpose([0, 2, 1]) start = time.time() B = np.linalg.cholesky(MU) print time.time() - start B = B.transpose([0, 2, 1]) start = time.time() for i in range(batch_size): cholupdate(A[i], u[i, 0].copy()) print time.time() - start start = time.time() C = [spla.cholesky(MU[i]) for i in range(batch_size)] print time.time() - start C = np.stack(C, axis=0) print np.allclose(A, B) print np.allclose(A, C) print np.allclose(B, C)
Created on Feb 15, 2013 @author: jasonrudy ''' from choldate import cholupdate, choldowndate import numpy #Create a random positive definite matrix, V numpy.random.seed(1) X = numpy.random.normal(size=(100, 10)) V = numpy.dot(X.transpose(), X) #Calculate the upper Cholesky factor, R R = numpy.linalg.cholesky(V).transpose() #Create a random update vector, u u = numpy.random.normal(size=R.shape[0]) #Calculate the updated positive definite matrix, V1, and its Cholesky factor, R1 V1 = V + numpy.outer(u, u) R1 = numpy.linalg.cholesky(V1).transpose() #The following is equivalent to the above R1_ = R.copy() cholupdate(R1_, u.copy()) assert (numpy.all((R1 - R1_)**2 < 1e-16)) #And downdating is the inverse of updating R_ = R1.copy() choldowndate(R_, u.copy()) assert (numpy.all((R - R_)**2 < 1e-16))
def _solve_maxent_sdp_cd( Sigma, solve_sdp, tol=1e-5, verbose=False, num_iter=50, converge_tol=1e-4, choldate_warning=True, mu=0.9, lambd=0.5, ): """ This function is internally used to compute the S-matrices used to generate maximum entropy and SDP knockoffs. Users should not call this function---they should call ``solve_maxent`` or ``solve_sdp`` directly. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. converge_tol : float A parameter specifying the criteria for convergence. choldate_warning : bool If True, will warn the user if choldate is not installed. Defaults to True solve_sdp : bool If True, will solve SDP. Otherwise, will solve maxent formulation. lambd : float Initial barrier constant mu : float Barrier decay constant Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # Warning if choldate not available if not CHOLDATE_AVAILABLE and choldate_warning: warnings.warn(constants.CHOLDATE_WARNING) # Initial constants time0 = time.time() V = Sigma # Shorthand prevents lines from spilling over p = V.shape[0] inds = np.arange(p) loss = np.inf # Initialize values decayed_improvement = 1 mineig = np.linalg.eigh(V)[0].min() if solve_sdp: S = 0.01 * mineig * np.eye(p) else: S = mineig * np.eye(p) L = np.linalg.cholesky(2 * V - S) lambd = min(2 * mineig, lambd) # Loss function if solve_sdp: loss_fn = lambda V, S: S.shape[0] - np.diag(S).sum() else: loss_fn = maxent_loss for i in range(num_iter): np.random.shuffle(inds) for j in inds: diff = 2 * V - S # Solve cholesky equation tildey = 2 * V[j].copy() tildey[j] = 0 x = sp.linalg.solve_triangular(a=L, b=tildey, lower=True) # Use cholesky eq to get new update zeta = diff[j, j] x22 = np.power(x, 2).sum() qinvterm = zeta * x22 / (zeta + x22) # Inverse of Qj using SWM formula if solve_sdp: sjstar = max(min(1, 2 * V[j, j] - qinvterm - lambd), 0) else: sjstar = (2 * V[j, j] - qinvterm) / 2 # Rank one update for cholesky delta = S[j, j] - sjstar x = np.zeros(p) x[j] = np.sqrt(np.abs(delta)) if delta > 0: if CHOLDATE_AVAILABLE: choldate.cholupdate(L.T, x) else: cholupdate(L.T, x, add=False) else: if CHOLDATE_AVAILABLE: choldate.choldowndate(L.T, x) else: cholupdate(L.T, x, add=True) # Set new value for S S[j, j] = sjstar # Check for convergence prev_loss = loss loss = loss_fn(V, S) if i != 0: loss_diff = prev_loss - loss if solve_sdp: loss_diff = max(loss_diff, lambd) decayed_improvement = decayed_improvement / 10 + 9 * ( loss_diff) / 10 if verbose: print( f"After iter {i} at time {np.around(time.time() - time0,3)}, loss={loss}, decayed_improvement={decayed_improvement}" ) if decayed_improvement < converge_tol: if verbose: print(f"Converged after iteration {i} with loss={loss}") break # Update barrier parameter if solving SDP if solve_sdp: lambd = mu * lambd # Ensure validity of solution S = utilities.shift_until_PSD(S, tol=tol) S, _ = utilities.scale_until_PSD(V, S, tol=tol, num_iter=10) return S
def _solve_mvr_ungrouped( Sigma, tol=1e-5, verbose=False, num_iter=50, smoothing=0, rej_rate=0, converge_tol=1e-2, choldate_warning=True, ): """ Computes S-matrix used to generate minimum variance-based reconstructability knockoffs using coordinate descent. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. smoothing : float Add ``smoothing`` to all eigenvalues of the feature-knockoff precision matrix before inverting to avoid numerical instability. Defaults to 0. converge_tol : float A parameter specifying the criteria for convergence. choldate_warning : bool If True, will warn the user if choldate is not installed. Defaults to True. Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # Warning if choldate not available if not CHOLDATE_AVAILABLE and choldate_warning: warnings.warn(constants.CHOLDATE_WARNING) # Initial constants time0 = time.time() V = Sigma # Shorthand prevents lines from spilling over p = V.shape[0] inds = np.arange(p) loss = np.inf acc_rate = 1 - rej_rate # Initialize values decayed_improvement = 10 min_eig = np.linalg.eigh(V)[0].min() S = min_eig * np.eye(p) L = np.linalg.cholesky(2 * V - S + smoothing * np.eye(p)) for i in range(num_iter): np.random.shuffle(inds) for j in inds: # 1. Compute coefficients cn and cd ej = np.zeros(p) # jth basis element ej[j] = 1 # 1a. Compute cd vd = sp.linalg.solve_triangular(a=L, b=ej, lower=True) cd = np.power(vd, 2).sum() # 1b. Compute vn vn = sp.linalg.solve_triangular(a=L.T, b=vd, lower=False) cn = -1 * np.power(vn, 2).sum() # 2. Construct/solve quadratic equation delta = _solve_mvr_quadratic( cn=cn, cd=cd, sj=S[j, j], min_eig=min_eig, i=i, smoothing=smoothing, acc_rate=acc_rate, ) # 3. Update S and L x = np.zeros(p) x[j] = np.sqrt(np.abs(delta)) if delta > 0: if CHOLDATE_AVAILABLE: choldate.choldowndate(L.T, x) else: cholupdate(L.T, x, add=False) else: if CHOLDATE_AVAILABLE: choldate.cholupdate(L.T, x) else: cholupdate(L.T, x, add=True) # Set new value for S S[j, j] += delta # Check for convergence prev_loss = loss loss = mvr_loss(V, acc_rate * S, smoothing=smoothing) if i != 0: decayed_improvement = decayed_improvement / 10 + 9 * (prev_loss - loss) / 10 if verbose: print( f"After iter {i} at time {np.around(time.time() - time0,3)}, loss={loss}, decayed_improvement={decayed_improvement}" ) if decayed_improvement < converge_tol: if verbose: print(f"Converged after iteration {i} with loss={loss}") break return S