def create(**kwargs): # m>k k = kwargs['k'] #class m = kwargs['m'] #instance n = kwargs['n'] #dim p = 5 #p-largest q = 10 X = problem_util.normalized_data_matrix(m,n,1) Y = np.random.randint(0, k-1, (q,m)) Theta = cp.Variable(n,k) t = cp.Variable(q) texp = cp.Variable(m) f = cp.sum_largest(t, p)+cp.sum_entries(texp) + cp.sum_squares(Theta) C = [] C.append(cp.log_sum_exp(X*Theta, axis=1) <= texp) for i in range(q): Yi = one_hot(Y[i], k) C.append(-cp.sum_entries(cp.mul_elemwise(X.T.dot(Yi), Theta)) == t[i]) t_eval = lambda: np.array([ -cp.sum_entries(cp.mul_elemwise(X.T.dot(one_hot(Y[i], k)), Theta)).value for i in range(q)]) f_eval = lambda: cp.sum_largest(t_eval(), p).value \ + cp.sum_entries(cp.log_sum_exp(X*Theta, axis=1)).value \ + cp.sum_squares(Theta).value return cp.Problem(cp.Minimize(f), C), f_val
def logit_estimation(X, y: dict, avail: dict, attributes): ''' :argument y: chosen edges in path i :argument avail: choice scenarios (set) for trip i :argument X: network attributes :argument attributes: attributes to fit discrete choice model ''' #Estimated parameters to be optimized (learned) cp_theta = {i: cp.Variable(1) for i in attributes} nodes_decision = { i: [y_j[0] for y_j in y_i] for i, y_i in zip(range(len(y)), y.values()) } nodes_chosen = { i: [y_j[1] for y_j in y_i] for i, y_i in zip(range(len(y)), y.values()) } X_avail = {} for i, avail_path in avail.items(): X_avail[i] = { attribute: get_avail_attribute(avail_path, X[attribute]) for attribute in attributes } # X_avail = {attribute: get_avail_attribute(avail, X[attribute]) for attribute in attributes} # Loglikelihood function Z = [] for i, observed_path in avail.items(): Z_i = [] for j, k in zip(nodes_decision[i], nodes_chosen[i]): Z_chosen_attr = [] Z_logsum_attr = [] for attribute in attributes: Z_chosen_attr.append(X[attribute][j, k] * cp_theta[attribute]) Z_logsum_attr.append(X_avail[i][attribute][j] * cp_theta[attribute]) Z_i.append( cp.sum(Z_chosen_attr) - cp.log_sum_exp(cp.sum(Z_logsum_attr))) Z.append(cp.sum(Z_i)) # Z = [X['travel_time'][i,j] * cp_theta['travel_time'] + X['cost'][i,j] * cp_theta['cost'] + X['h'][i,j] * cp_theta['h'] # - cp.log_sum_exp(X_avail['travel_time'][i] * cp_theta['travel_time'] + X_avail['cost'][i] * cp_theta['cost'] + X_avail['h'][i] * cp_theta['h']) # for i,j in zip(nodes_decision,nodes_chosen) # ] # axis = 1 is for rows cp_objective_logit = cp.Maximize(cp.sum(Z)) cp_problem_logit = cp.Problem(cp_objective_logit, constraints=[]) #Excluding extra attributes cp_problem_logit.solve() return {key: val.value for key, val in cp_theta.items()}
def test_logistic_regression(self) -> None: np.random.seed(0) N, n = 5, 2 X_np = np.random.randn(N, n) a_true = np.random.randn(n, 1) def sigmoid(z): return 1 / (1 + np.exp(-z)) y = np.round(sigmoid(X_np @ a_true + np.random.randn(N, 1) * 0.5)) a = cp.Variable((n, 1)) X = cp.Parameter((N, n)) lam = cp.Parameter(nonneg=True) log_likelihood = cp.sum( cp.multiply(y, X @ a) - cp.log_sum_exp( cp.hstack([np.zeros((N, 1)), X @ a]).T, axis=0, keepdims=True).T) problem = cp.Problem( cp.Minimize(-log_likelihood + lam * cp.sum_squares(a))) X.value = X_np lam.value = 1 # TODO(akshayka): too low but this problem is ill-conditioned gradcheck(problem, atol=1e-1, eps=1e-8) perturbcheck(problem, atol=1e-4)
def test_logistic_regression(self): set_seed(243) N, n = 10, 2 X_np = np.random.randn(N, n) a_true = np.random.randn(n, 1) y_np = np.round(sigmoid(X_np @ a_true + np.random.randn(N, 1) * 0.5)) X_tch = torch.from_numpy(X_np) X_tch.requires_grad_(True) lam_tch = 0.1 * torch.ones(1, requires_grad=True, dtype=torch.double) a = cp.Variable((n, 1)) X = cp.Parameter((N, n)) lam = cp.Parameter(1, nonneg=True) y = y_np log_likelihood = cp.sum( cp.multiply(y, X @ a) - cp.log_sum_exp(cp.hstack([np.zeros((N, 1)), X @ a]).T, axis=0, keepdims=True).T ) prob = cp.Problem( cp.Minimize(-log_likelihood + lam * cp.sum_squares(a))) fit_logreg = CvxpyLayer(prob, [X, lam], [a]) def layer_eps(*x): return fit_logreg(*x, solver_args={"eps": 1e-12}) torch.autograd.gradcheck(layer_eps, (X_tch, lam_tch), eps=1e-4, atol=1e-3, rtol=1e-3)
def test_logistic_regression(self): key = random.PRNGKey(0) N, n = 5, 2 key, k1, k2, k3 = random.split(key, num=4) X_np = random.normal(k1, shape=(N, n)) a_true = random.normal(k2, shape=(n, 1)) y_np = jnp.round( sigmoid(X_np @ a_true + random.normal(k3, shape=(N, 1)) * 0.5)) X_jax = jnp.array(X_np) lam_jax = 0.1 * jnp.ones(1) a = cp.Variable((n, 1)) X = cp.Parameter((N, n)) lam = cp.Parameter(1, nonneg=True) y = y_np log_likelihood = cp.sum( cp.multiply(y, X @ a) - cp.log_sum_exp( cp.hstack([np.zeros((N, 1)), X @ a]).T, axis=0, keepdims=True).T) prob = cp.Problem( cp.Minimize(-log_likelihood + lam * cp.sum_squares(a))) fit_logreg = CvxpyLayer(prob, [X, lam], [a]) check_grads(fit_logreg, (X_jax, lam_jax), order=1, modes=['rev'])
def _solve(self, sensitive, X, y): n_obs, n_features = X.shape theta = cp.Variable(n_features) y_hat = X @ theta log_likelihood = cp.sum( cp.multiply(y, y_hat) - cp.log_sum_exp(cp.hstack( [np.zeros((n_obs, 1)), cp.reshape(y_hat, (n_obs, 1))]), axis=1)) if self.penalty == "l1": log_likelihood -= cp.sum((1 / self.C) * cp.norm(theta[1:])) constraints = self.constraints(y_hat, y, sensitive, n_obs) problem = cp.Problem(cp.Maximize(log_likelihood), constraints) problem.solve(max_iters=self.max_iter) if problem.status in ["infeasible", "unbounded"]: raise ValueError(f"problem was found to be {problem.status}") self.n_iter_ = problem.solver_stats.num_iters if self.fit_intercept: self.coef_ = theta.value[np.newaxis, 1:] self.intercept_ = theta.value[0:1] else: self.coef_ = theta.value[np.newaxis, :] self.intercept_ = np.array([0.0])
def softmax_loss(Theta, X, y): m = len(y) n, k = Theta.size Y = sp.coo_matrix((np.ones(m), (np.arange(m), y)), shape=(m, k)) print cp.__file__ return (cp.sum_entries(cp.log_sum_exp(X*Theta, axis=1)) - cp.sum_entries(cp.mul_elemwise(Y, X*Theta)))
def tune_temp(logits, labels, binary_search=True, lower=0.2, upper=5.0, eps=0.0001): logits = np.array(logits) if binary_search: import torch import torch.nn.functional as F logits = torch.FloatTensor(logits) labels = torch.LongTensor(labels) t_guess = torch.FloatTensor([0.5*(lower + upper)]).requires_grad_() while upper - lower > eps: if torch.autograd.grad(F.cross_entropy(logits / t_guess, labels), t_guess)[0] > 0: upper = 0.5 * (lower + upper) else: lower = 0.5 * (lower + upper) t_guess = t_guess * 0 + 0.5 * (lower + upper) t = min([lower, 0.5 * (lower + upper), upper], key=lambda x: float(F.cross_entropy(logits / x, labels))) else: import cvxpy as cx set_size = np.array(logits).shape[0] t = cx.Variable() expr = sum((cx.Minimize(cx.log_sum_exp(logits[i, :] * t) - logits[i, labels[i]] * t) for i in range(set_size))) p = cx.Problem(expr, [lower <= t, t <= upper]) p.solve() # p.solve(solver=cx.SCS) t = 1 / t.value return t
def maxSoftMaxEpigraphProblem(problemOptions, solverOptions): k = problemOptions['k'] #class m = problemOptions['m'] #instances n = problemOptions['n'] #dim p = problemOptions['p'] #p-largest X = __normalized_data_matrix(m,n,1) Y = np.random.randint(0, k, m) # Problem construction def one_hot(y, k): m = len(y) return sps.coo_matrix((np.ones(m), (np.arange(m), y)), shape=(m, k)).todense() Theta = cp.Variable(n,k) beta = cp.Variable(1, k) t = cp.Variable(m) texp = cp.Variable(m) f = cp.sum_largest(t+texp, p) + cp.sum_squares(Theta) C = [] C.append(cp.log_sum_exp(X*Theta + np.ones((m, 1))*beta, axis=1) <= texp) Yi = one_hot(Y, k) C.append(t == cp.vstack([-(X[i]*Theta + beta)[Y[i]] for i in range(m)])) prob = cp.Problem(cp.Minimize(f), C) prob.solve(**solverOptions) return {'Problem':prob, 'name':'maxSoftMaxEpigraphProblem'}
def get_RCK_weights(returns, minimalWealthFraction=0.7, confidence=0.3, max_expo=0.25): n = len(returns.columns) pi = np.array([1. / len(returns)] * len(returns)) r = (returns + 1.).as_matrix().T b_rck = cvx.Variable(n) lambda_rck = cvx.Parameter(sign='positive') lambda_rck.value = np.log(confidence) / np.log(minimalWealthFraction) growth_rate = pi.T * cvx.log(r.T * b_rck) risk_constraint = cvx.log_sum_exp( np.log(pi) - lambda_rck * cvx.log(r.T * b_rck)) <= 0 constraints = [ cvx.sum_entries(b_rck) == 1, b_rck >= 0, b_rck <= max_expo, risk_constraint ] rck = cvx.Problem(cvx.Maximize(growth_rate), constraints) rck.solve(verbose=False) #print rck.value #print b_rck.value w = pd.Series(data=np.asarray(b_rck.value).flatten(), index=returns.columns) w = w / w.abs().sum() return w
def regress(genes,lambd,alpha,xs,ys,left,S): '''To perform the regression using convex optimisation.''' cost = 0 n_genes = np.shape(genes)[1] constr = [] beta = cvxpy.Variable(n_genes) # to prevent beta becoming very large. constr.append(cvxpy.norm(beta)<=1) x0,y0,k1,k2 = get_kink_point(xs,ys) if left: filtered_genes = genes[ys>y0] else: filtered_genes = genes[ys<y0] for i,gene_set in enumerate(genes): cost += beta.T*gene_set #the log sum exp constraint cost -= np.shape(filtered_genes)[0]*cvxpy.log_sum_exp(filtered_genes*beta) # if a linear regression is being used, this allows S to be an empty matrix. if lambd>0.0: cost -= lambd*alpha*cvxpy.power(cvxpy.norm(beta),2) cost -= lambd*(1.0-alpha)*cvxpy.quad_form(beta,S) prob = cvxpy.Problem(cvxpy.Maximize(cost),constr) # a slightly increased tolerance (default is 1e-7) to reduce run times a = prob.solve(solver=cvxpy.SCS,eps=1e-5) return beta.value
def test_log_sum_exp(self): expr = cp.log_sum_exp(self.x) self.x.value = [0, 1] e = np.exp(1) self.assertItemsAlmostEqual(expr.grad[self.x].toarray(), [1.0/(1+e), e/(1+e)]) expr = cp.log_sum_exp(self.A) self.A.value = np.array([[0, 1], [-1, 0]]) self.assertItemsAlmostEqual(expr.grad[self.A].toarray(), [1.0/(2+e+1.0/e), 1.0/e/(2+e+1.0/e), e/(2+e+1.0/e), 1.0/(2+e+1.0/e)]) expr = cp.log_sum_exp(self.A, axis=0) self.A.value = np.array([[0, 1], [-1, 0]]) self.assertItemsAlmostEqual(expr.grad[self.A].toarray(), np.transpose(np.array([[1.0/(1+1.0/e), 1.0/e/(1+1.0/e), 0, 0], [0, 0, e/(1+e), 1.0/(1+e)]])))
def log_cash(self, phi): tmp = [np.log(b) + phi[g] for g, b in izip(self.goods, self.b)] tmp = cvx.log_sum_exp(cvx.vstack(*tmp)) if tmp.is_constant(): return tmp.value else: return tmp
def exp_prob(): # {LP, EXP} x = cp.Variable(2) A = np.eye(2) prob = cp.Problem(cp.Minimize(cp.log_sum_exp(x)), [A * x >= 0]) return CVXProblem(problemID="exp_prob", problem=prob, opt_val=float('-inf'))
def logistic_regression(N, p, suppfrac): """ Create a logistic regression problem with N examples, p dimensions, and at most suppfrac of the optimal solution to be non-zero. """ X = np.random.randn(N, p) betastar = np.random.randn(p) nnz = int(np.floor((1.0 - suppfrac) * p)) # Num. nonzeros assert nnz <= p idxes = np.random.randint(0, p, nnz) betastar[idxes] = 0 probplus1 = 1.0 / (1.0 + np.exp(-X.dot(betastar))) y = np.random.binomial(1, probplus1) lam = 1.0 # 1.0 # Solve by ECOS. betahat = cp.Variable(p) logloss = sum( cp.log_sum_exp(cp.hstack(0, y[i] * X[i, :] * betahat)) for i in range(N)) prob = cp.Problem(cp.Minimize(logloss + lam * cp.norm(betahat, 1))) X = np.random.randn(N, p) betastar = np.random.randn(p) nnz = int(np.floor((1.0 - suppfrac) * p)) # Num. nonzeros assert nnz <= p idxes = np.random.randint(0, p, nnz) betastar[idxes] = 0 probplus1 = 1.0 / (1.0 + np.exp(-X.dot(betastar))) y = np.random.binomial(1, probplus1) lam = 1.0 # 1.0 # Solve by ECOS. betahat = cp.Variable(p) logloss = sum( cp.log_sum_exp(cp.hstack(0, y[i] * X[i, :] * betahat)) for i in range(N)) prob = cp.Problem(cp.Minimize(logloss + lam * cp.norm(betahat, 1))) data = prob.get_problem_data(cp.SCS) data['beta_from_x'] = cvxpy_beta_from_x(prob, betahat, data['A'].shape[0]) return (betahat, prob, data)
def neihgborhood(X, lambd_1, lambd_2, node): ''' Neighborhood selection using CVXPY Inputs: - X: List of n ndarray of shape (p * n_i) - lambda_1: Hyperparameter related to the fused penalty - lambda_2: Hyperparameter related to the lasso penalty - node: Considered node Output: - beta: ndrray of shape ((p-1) * n) containing the n learned neighborhood of Node ''' n = len(X) p = X[0].shape[0] beta = cp.Variable((p - 1, n)) not_a = list(range(p)) del not_a[node] log_lik = 0 # Construction of the objective function for i in range(n): n_i = X[i].shape[1] blob = beta[:, i] @ X[i][not_a, :] log_lik += (1 / n_i) * cp.sum( -cp.reshape(cp.multiply(X[i][node, :], blob), (n_i, )) + cp.log_sum_exp(cp.hstack( [-cp.reshape(blob, (n_i, 1)), cp.reshape(blob, (n_i, 1))]), axis=1)) l1 = cp.Parameter(nonneg=True) l2 = cp.Parameter(nonneg=True) reg = l2 * cp.norm(beta, p=1) + l1 * \ cp.sum(cp.norm(beta[:, 1:] - beta[:, :-1], p=2, axis=0)) # Penalty function function = 0.01 * log_lik + reg # Divide by 100 for numerical issues problem = cp.Problem(cp.Minimize(function)) l1.value = lambd_1 l2.value = lambd_2 problem.solve(solver=cp.ECOS, verbose=False) # Solve problem beta = np.round(beta.value, 5) return beta
def __init__(self, theta_shape, X, y, y_orig, init_lam=1, per_target_model=False): self.X = X self.y = y self.y_orig = y_orig self.per_target_model = per_target_model self.theta_intercept = cp.Variable() self.theta = cp.Variable(theta_shape[0], theta_shape[1]) theta_norm = cp.norm(self.theta, 1) self.lam = cp.Parameter(sign="positive", value=init_lam) # This is the log denominator of the probability of mutating (-log(1 + exp(-theta))) log_ll = -cp.sum_entries( cp.logistic(-(X * (self.theta[:, 0:1] + self.theta_intercept)))) # If no mutation happened, then we also need the log numerator of probability of not mutating # since exp(-theta)/(1 + exp(-theta)) is prob not mutate no_mutate_X = X[y == 0, :] no_mutate_numerator = -(no_mutate_X * (self.theta[:, 0:1] + self.theta_intercept)) log_ll = log_ll + cp.sum_entries(no_mutate_numerator) if per_target_model: # If per target, need the substitution probabilities too for orig_i in range(NUM_NUCLEOTIDES): for i in range(NUM_NUCLEOTIDES): if orig_i == i: continue # Find the elements that mutated to y and mutated from y_orig mutate_X_targ = X[(y == (i + 1)) & (y_orig == (orig_i + 1)), :] # Create the 3 column theta excluding the column corresponding to y_orig theta_3col = [] for j in range(NUM_NUCLEOTIDES): if j != orig_i: theta_3col.append(self.theta[:, j + 1] + self.theta_intercept) theta_3col = cp.hstack(theta_3col) target_ll = ( # log of numerator in softmax -(mutate_X_targ * (self.theta[:, i + 1] + self.theta_intercept)) # log of denominator in softmax - cp.log_sum_exp(-(mutate_X_targ * theta_3col), axis=1)) log_ll += cp.sum_entries(target_ll) self.problem = cp.Problem(cp.Maximize(log_ll - self.lam * theta_norm))
def linear_softmax_reg(X, Y, params): m, n = X.shape[0], X.shape[1] Theta = cp.Variable(n, len(params['d'])) f = cp.sum_entries( cp.log_sum_exp(X * Theta, axis=1) - cp.sum_entries(cp.mul_elemwise(Y, X * Theta), axis=1)) / m lam = 1e-5 # regularization cp.Problem(cp.Minimize(f + lam * cp.sum_squares(Theta)), []).solve() Theta = np.asarray(Theta.value) return Theta
def marginal_optimization(self, seed = None): logging.debug("Starting to merge marginals") # get number of cliques: n node_card = self.node_card; cliques = self.cliques d = self.nodes_num; n = self.cliques_num; m = self.clusters_num # get the junction tree matrix representation: O O = self.jt_rep() # get log_p is the array of numbers of sum(log(attribute's domain)) log_p = self.log_p_func() # get log_node_card: log(C1), log(C2), ..., log(Cd) log_node_card = np.log(node_card) # get value of sum_log_node_card: log(C1 * C2 *...* Cd) sum_log_node_card = sum(log_node_card) # get the difference operator M on cluster number: m M = self.construct_difference() # initial a seed Z prev_Z = seed if prev_Z is None: prev_Z = np.random.rand(n,m) # run the convex optimization for max_iter times logging.debug("Optimization starting...") for i in range(self.max_iter): logging.debug("The optimization iteration: "+str(i+1)) # sum of row of prev_Z tmp1 = cvx.sum_entries(prev_Z, axis=0).value # tmp2 = math.log(tmp1)-1+sum_log_node_card tmp2 = np.log(tmp1)-1+sum_log_node_card # tmp3: difference of pairwise columns = prev_Z * M tmp3 = np.dot(prev_Z,M) # convex optimization Z = cvx.Variable(n,m) t = cvx.Variable(1,m) r = cvx.Variable() objective = cvx.Minimize(cvx.log_sum_exp(t)-self._lambda*r) constraints = [ Z >= 0, Z*np.ones((m,1),dtype=int) == np.ones((n,1), dtype=int), r*np.ones((1,m*(m-1)/2), dtype=int) - 2*np.ones((1,n), dtype=int)*(cvx.mul_elemwise(tmp3, (Z*M))) + cvx.sum_entries(tmp3 * tmp3, axis=0) <= 0, np.ones((1,n),dtype=int)*Z >= 1, log_p*Z-t-np.dot(log_node_card,O)*Z+tmp2+cvx.mul_elemwise(np.power(tmp1,-1), np.ones((1,n), dtype = int)*Z) == 0 ] prob = cvx.Problem(objective, constraints) result = prob.solve(solver='SCS',verbose=False) prev_Z[0:n,0:m] = Z.value return prev_Z, O
def fit_OLD(self, x, y): # Detect the number of samples and classes nsamples = x.shape[0] ncols = x.shape[1] classes, cnt = np.unique(y, return_counts=True) nclasses = len(classes) # Convert classes to a categorical format yc = keras.utils.to_categorical(y, num_classes=nclasses) # Build a disciplined convex programming model w = cp.Variable(shape=(ncols, nclasses)) # Additional variables representing the actual predictions. yhat = cp.Variable(shape=(nsamples, nclasses), boolean=True) bigM = 1e3 constraints = [ cp.sum(yhat, axis=1) == 1, # only one class per sample. ] constraints += [ x @ w[:, i] - x @ w[:, i+1] <= bigM * (yhat[:, i] - yhat[:, i+1]) for i in range(nclasses - 1) ] log_reg = x @ w # out_xpr = [cp.exp(log_out_xpr[c]) for c in range(nclasses)] Z = [cp.log_sum_exp(log_reg[i]) for i in range(nsamples)] # log_likelihood = cp.sum( # cp.sum([cp.multiply(yc[:, c], log_out_xpr[c]) # for c in range(nclasses)]) - Z # ) log_likelihood = cp.sum( cp.sum([cp.multiply(yc[:, c], log_reg[:, c]) for c in range(nclasses)])) - cp.sum(Z) reg = 0 # Compute counts maxc = int(np.ceil(nsamples / nclasses)) for c in classes: reg += cp.square(maxc - cp.sum(yhat[c])) # Start the training process obj_func = - log_likelihood / nsamples + self.alpha * reg problem = cp.Problem(cp.Minimize(obj_func), constraints) problem.solve() # for c in range(nclasses): # wgt[c] = cp.Variable(ncols) # # xpr = cp.sum(cp.multiply(y, x @ wgt) - cp.logistic(x @ wgt)) # log_out_xpr[c] = x @ wgt[c] # out_xpr[c] = cp.exp(x @ wgt[c]) # if c == 0: log_likelihood = xpr # else: log_likelihood += xpr # problem = cp.Problem(cp.Maximize(log_likelihood/nsamples)) # # Start the training process # problem.solve() # Store the weights # print(wgt.value) self.weights = w.value
def newton_solver(G): """Solve for lambda using the matrix of moment conditions""" n = G.shape[0] # dimension du vecteur lambda à trouver lambd = cp.Variable(n) # variable à trouver objective = cp.Minimize(cp.log_sum_exp(lambd * G)) constraints = [] prob = cp.Problem(objective, constraints) # The optimal objective value is returned by `prob.solve()`. result = prob.solve(solver=cp.SCS) return (lambd.value)
def test_log_sum_exp(self): """Test log_sum_exp function that failed in Github issue. """ import cvxpy as cp import numpy as np np.random.seed(1) m = 5 n = 2 X = np.matrix(np.ones((m,n))) w = cp.Variable(n) expr2 = [cp.log_sum_exp(cp.vstack(0, X[i,:]*w)) for i in range(m)] expr3 = sum(expr2) obj = cp.Minimize(expr3) p = cp.Problem(obj) p.solve(solver=SCS, max_iters=1) # # Risk return tradeoff curve # def test_risk_return_tradeoff(self): # from math import sqrt # from cvxopt import matrix # from cvxopt.blas import dot # from cvxopt.solvers import qp, options # import scipy # n = 4 # S = matrix( [[ 4e-2, 6e-3, -4e-3, 0.0 ], # [ 6e-3, 1e-2, 0.0, 0.0 ], # [-4e-3, 0.0, 2.5e-3, 0.0 ], # [ 0.0, 0.0, 0.0, 0.0 ]] ) # pbar = matrix([.12, .10, .07, .03]) # N = 100 # # CVXPY # Sroot = numpy.asmatrix(scipy.linalg.sqrtm(S)) # x = cp.Variable(n, name='x') # mu = cp.Parameter(name='mu') # mu.value = 1 # TODO cp.Parameter("positive") # objective = cp.Minimize(-pbar*x + mu*quad_over_lin(Sroot*x,1)) # constraints = [sum_entries(x) == 1, x >= 0] # p = cp.Problem(objective, constraints) # mus = [ 10**(5.0*t/N-1.0) for t in range(N) ] # xs = [] # for mu_val in mus: # mu.value = mu_val # p.solve() # xs.append(x.value) # returns = [ dot(pbar,x) for x in xs ] # risks = [ sqrt(dot(x, S*x)) for x in xs ] # # QP solver
def tune_temp(logits, labels, correct): logits = np.array(logits) set_size = np.array(logits).shape[0] t = cx.Variable() expr = sum([cx.Minimize(cx.log_sum_exp(logits[i, :] * t) - logits[i, labels[i]] * t) for i in range(set_size)]) p = cx.Problem(expr, [0.25 <= t, t <= 4]) p.solve() t = 1 / t.value return t
def train(self, level=0, lamb=0.01): """ :param level: 0: 非正则化; 1: 1阶正则化; 2: 2阶正则化 :param lamb: 正则化系数水平 :return: 无 """ L = cvx.Parameter(sign="positive") L.value = lamb # 正则化系数 w = cvx.Variable(self.n + 1) # 参数向量 loss = 0 for i in range(self.m): # 构造成本函数和正则化项 loss += self.y_trans[i] * \ cvx.log_sum_exp(cvx.vstack(0, cvx.exp(self.x_trans[i, :].T * w))) + \ (1 - self.y_trans[i]) * \ cvx.log_sum_exp(cvx.vstack(0, cvx.exp(-1 * self.x_trans[i, :].T * w))) # 为什么一定要用log_sum_exp? cvx.log(1 + cvx.exp(x[i, :].T * w))为什么不行? if level > 0: reg = cvx.norm(w[:self.n], level) prob = cvx.Problem(cvx.Minimize(loss / self.m + L / (2 * self.m) * reg)) else: prob = cvx.Problem(cvx.Minimize(loss / self.m)) prob.solve() self.w = np.array(w.value)
def test_log_sum_exp(self): """Test log_sum_exp function that failed in Github issue. """ import numpy as np np.random.seed(1) m = 5 n = 2 X = np.ones((m, n)) w = cvx.Variable(n) expr2 = [cvx.log_sum_exp(cvx.hstack([0, X[i, :]*w])) for i in range(m)] expr3 = sum(expr2) obj = cvx.Minimize(expr3) p = cvx.Problem(obj) p.solve(solver=cvx.SCS, max_iters=1)
def __init__(self, seed, wsupport, expwsq, rvala=1, rvalb=1, tv=None): wmax = max(wsupport) assert wmax > 1 assert wmax >= expwsq assert min(wsupport) < 1 self.wsupport = np.sort(np.array(wsupport)) wnice = self.wsupport / wmax A = np.array([wnice, wnice * wnice]).reshape(2, -1) b = np.array([1 / wmax, expwsq / (wmax * wmax)]) mu = cp.Variable(len(b)) prob = cp.Problem(cp.Maximize(mu.T @ b - cp.log_sum_exp(mu.T @ A)), []) tol = 5e-12 prob.solve(solver='ECOS', verbose=False, max_iters=1000, feastol=tol, reltol=tol, abstol=tol) assert prob.status == 'optimal' logits = np.asarray((mu.T @ A).value).ravel() self.pw = softmax(logits) assert np.allclose(self.pw.dot(self.wsupport * self.wsupport), expwsq), pformat({ 'self.pw.dot(self.wsupport * self.wsupport)': self.pw.dot(self.wsupport * self.wsupport), 'expwsq': expwsq }) assert np.allclose(self.pw.dot(self.wsupport), 1), pformat({ 'self.pw.dot(self.wsupport)': self.pw.dot(self.wsupport), }) assert np.allclose(np.sum(self.pw), 1), pformat({'np.sum(self.pw)': np.sum(self.pw)}) self.rvala = rvala self.rvalb = rvalb self.tv = tv self.state = np.random.RandomState(seed) self.perm_state = None self.seed = seed
def CV(kfold, name, X_tr, Y_tr, X_te, Y_te, lambda_vals, kde_bandwidth, SAVE=True): X_tr = np.concatenate((X_tr, np.ones((X_tr.shape[0], 1))), axis=1) n = X_tr.shape[1] m = X_tr.shape[0] Y_tr = Y_tr.reshape(m, 1) X_te = np.concatenate((X_te, np.ones((X_te.shape[0], 1))), axis=1) Y_te = Y_te.reshape(X_te.shape[0], 1) test = np.concatenate((np.ones(n - 1), 0.0), axis=None).reshape(1, n) beta = cp.Variable((n, 1)) res = np.array([1]) constraints = [beta >= 0, test @ beta == res] lambd = cp.Parameter(nonneg=True) log_likelihood = cp.sum( cp.reshape(cp.multiply(Y_tr, X_tr @ beta), (m, )) - cp.log_sum_exp(cp.hstack([np.zeros((m, 1)), X_tr @ beta]), axis=1) - lambd * cp.norm(beta, 2)) problem = cp.Problem(cp.Maximize(log_likelihood), constraints) beta_vals = [] lambd.value = lambda_vals problem.solve() beta_vals.append(beta.value) res = sigmoid(np.dot(X_tr, beta.value)) res2 = sigmoid(np.dot(X_te, beta.value)) auc = metrics.roc_auc_score(Y_te, res2) if SAVE: if not os.path.exists('pdf/{}/{}/{}/model'.format( name, kde_bandwidth, kfold)): os.mkdir('pdf/{}/{}/{}/model'.format(name, kde_bandwidth, kfold)) np.save( './pdf/{}/{}/{}/model/model.npy'.format(name, kde_bandwidth, kfold), beta.value) return auc
def maxSoftMaxProblem(problemOptions, solverOptions): k = problemOptions['k'] #class m = problemOptions['m'] #instances n = problemOptions['n'] #dim p = problemOptions['p'] #p-largest X = __normalized_data_matrix(m,n,1) Y = np.random.randint(0, k, m) # Problem construction Theta = cp.Variable(n,k) beta = cp.Variable(1,k) obs = cp.vstack([-(X[i]*Theta + beta)[Y[i]] + cp.log_sum_exp(X[i]*Theta + beta) for i in range(m)]) prob = cp.Problem(cp.Minimize(cp.sum_largest(obs, p) + cp.sum_squares(Theta))) prob.solve(**solverOptions) return {'Problem':prob, 'name':'maxSoftMaxProblem'}
def test_log_sum_exp(self): """Test log_sum_exp function that failed in Github issue. """ import cvxpy as cp import numpy as np np.random.seed(1) m = 5 n = 2 X = np.matrix(np.ones((m, n))) w = cp.Variable(n) expr2 = [cp.log_sum_exp(cp.vstack(0, X[i, :]*w)) for i in range(m)] expr3 = sum(expr2) obj = cp.Minimize(expr3) p = cp.Problem(obj) p.solve(solver=SCS, max_iters=1)
def gstar(self, x, dh): # This corresponds to Step 3 of Algprithm 1 DSLEA and corresponds to the primal problem with linearized # concave part. See detailed comments for computation in function dh. # # Instead of numpy, we use expressions from cvxpy, which are equivalent var_in_inverse = cp.diag(cp.inv_pos(self.var_in)) # vec_exp = cp.exp(cp.matmul(cp.matmul(cp.transpose(self.kWeightsTop), var_in_inverse), (x-self.mean_in)) # + cp.transpose(self.kBiasTop)) # return cp.log(cp.sum(vec_exp)) - self.mean_out / self.var_out - cp.transpose(x)@dh return cp.log_sum_exp( cp.matmul( cp.matmul(cp.transpose(self.kWeightsTop), var_in_inverse), (x - self.mean_in)) + self.kBiasTop ) - self.mean_out / self.var_out - cp.transpose(x) @ dh
def test_paper_example_logreg_is_dpp(self) -> None: N, n = 3, 2 beta = cp.Variable((n, 1)) b = cp.Variable((1, 1)) X = cp.Parameter((N, n)) Y = np.ones((N, 1)) lambd1 = cp.Parameter(nonneg=True) lambd2 = cp.Parameter(nonneg=True) log_likelihood = (1. / N) * cp.sum( cp.multiply(Y, X @ beta + b) - cp.log_sum_exp(cp.hstack([np.zeros((N, 1)), X @ beta + b]).T, axis=0, keepdims=True).T) regularization = -lambd1 * cp.norm(beta, 1) - lambd2 * cp.sum_squares(beta) problem = cp.Problem(cp.Maximize(log_likelihood + regularization)) self.assertTrue(log_likelihood.is_dpp()) self.assertTrue(problem.is_dcp()) self.assertTrue(problem.is_dpp())
def form_agent_constr0(A, logB, x, phi): """ This formulation seems to reduce the number of variables, constraints, and NNZ in the A matrix. """ m,n = A.shape constr = [] for i in range(m): logcash = cvx.log_sum_exp(phi + logB[i,:]) ag_exp = cvx.log(x[i,:]*A[i,:]) - logcash t = cvx.Variable() constr += [ag_exp >= t] for j in range(n): expr = t >= np.log(A[i,j]) - phi[j] constr += [expr] return constr
def test_CVXPY(self): try: import cvxpy as cvx except ImportError: return m, n = 40, 30 A = np.random.randn(m, n) b = np.random.randn(m) x = cvx.Variable(n) p = cvx.Problem(cvx.Minimize(cvx.sum_squares(x)), [cvx.log_sum_exp(x) <= 10, A @ x <= b]) cvxpy_solve(p, presolve=True, iters=10, scs_opts={'eps': 1E-10}) self.assertTrue(np.alltrue(A @ x.value - b <= 1E-8))
def partB(): c0 = np.loadtxt("../Data/data/quiz4_class0.txt") c1 = np.loadtxt("../Data/data/quiz4_class1.txt") row0, col0 = c0.shape row1, col1 = c1.shape x = np.column_stack((np.vstack( (c0, c1)), np.ones(row0 + row1).reshape(-1, 1))) y = np.vstack((np.zeros(row0).reshape(-1, 1), np.ones(row1).reshape(-1, 1))) lambd = 0.01 theta = cvx.Variable((3, 1)) loss = -cvx.sum(cvx.multiply(y, x @ theta)) + cvx.sum( cvx.log_sum_exp(cvx.hstack([np.zeros((row1 + row0, 1)), x @ theta]), axis=1)) reg = cvx.sum_squares(theta) prob = cvx.Problem(cvx.Minimize(loss / (row1 + row0) + lambd * reg)) prob.solve() w = theta.value print(w) return w
def test_logistic_regression(self): np.random.seed(243) N, n = 10, 2 def sigmoid(z): return 1 / (1 + np.exp(-z)) X_np = np.random.randn(N, n) a_true = np.random.randn(n, 1) y_np = np.round(sigmoid(X_np @ a_true + np.random.randn(N, 1) * 0.5)) X_tf = tf.Variable(X_np) lam_tf = tf.Variable(1.0 * tf.ones(1)) a = cp.Variable((n, 1)) X = cp.Parameter((N, n)) lam = cp.Parameter(1, nonneg=True) y = y_np log_likelihood = cp.sum( cp.multiply(y, X @ a) - cp.log_sum_exp( cp.hstack([np.zeros((N, 1)), X @ a]).T, axis=0, keepdims=True).T) prob = cp.Problem( cp.Minimize(-log_likelihood + lam * cp.sum_squares(a))) fit_logreg = CvxpyLayer(prob, [X, lam], [a]) with tf.GradientTape(persistent=True) as tape: weights = fit_logreg(X_tf, lam_tf, solver_args={'eps': 1e-8})[0] summed = tf.math.reduce_sum(weights) grad_X_tf, grad_lam_tf = tape.gradient(summed, [X_tf, lam_tf]) def f_train(): prob.solve(solver=cp.SCS, eps=1e-8) return np.sum(a.value) numgrad_X_tf, numgrad_lam_tf = numerical_grad(f_train, [X, lam], [X_tf, lam_tf], delta=1e-6) np.testing.assert_allclose(grad_X_tf, numgrad_X_tf, atol=1e-2) np.testing.assert_allclose(grad_lam_tf, numgrad_lam_tf, atol=1e-2)
def partC(kernel): c0 = np.loadtxt("../Data/data/quiz4_class0.txt") c1 = np.loadtxt("../Data/data/quiz4_class1.txt") x = np.vstack((c0, c1)) row0, col0 = c0.shape row1, col1 = c1.shape y = np.vstack( (np.zeros(row0).reshape(-1, 1), np.ones(row1).reshape(-1, 1))).reshape(-1, 1) lambd = 0.01 alpha = cvx.Variable((row0 + row1, 1)) loss = -y.T @ kernel @ alpha + cvx.sum( cvx.log_sum_exp( cvx.hstack([np.zeros((row1 + row0, 1)), kernel @ alpha]), axis=1)) reg = cvx.quad_form(alpha, kernel) prob = cvx.Problem(cvx.Minimize(loss / (row1 + row0) + lambd * reg)) prob.solve() w = alpha.value print(w[:2]) return w
def foo_prox(a, b, x0, phi0, rho): n = len(a) x = cvx.Variable(n) phi = cvx.Variable(n) logb = np.log(b) logcash = cvx.log_sum_exp(phi + logb) ag_exp = cvx.log(x.T*a) - logcash t = cvx.Variable() constr = [x >= 0, ag_exp >= t] for j in range(n): expr = t >= np.log(a[j]) - phi[j] constr += [expr] obj = cvx.sum_squares(x-x0) + cvx.sum_squares(phi-phi0) obj = obj*rho/2.0 prob = cvx.Problem(cvx.Minimize(obj), constr) prob.solve(verbose=False, solver='ECOS') return np.array(x.value).flatten(), np.array(phi.value).flatten()
def softmax_loss(Theta, X, y): k = Theta.size[1] Y = one_hot(y, k) return (cp.sum_entries(cp.log_sum_exp(X*Theta, axis=1)) - cp.sum_entries(cp.mul_elemwise(X.T.dot(Y), Theta)))
return [cp.norm2(randn()*x) <= randn()*t] def C_soc_translated(): return [cp.norm2(x + randn()) <= t + randn()] def C_soc_scaled_translated(): return [cp.norm2(randn()*x + randn()) <= randn()*t + randn()] # Proximal operators PROX_TESTS = [ #prox("MATRIX_FRAC", lambda: cp.matrix_frac(p, X)), #prox("SIGMA_MAX", lambda: cp.sigma_max(X)), prox("AFFINE", lambda: randn(n).T*x), prox("CONSTANT", lambda: 0), prox("LAMBDA_MAX", lambda: cp.lambda_max(X)), prox("LOG_SUM_EXP", lambda: cp.log_sum_exp(x)), prox("MAX", lambda: cp.max_entries(x)), prox("NEG_LOG_DET", lambda: -cp.log_det(X)), prox("NON_NEGATIVE", None, C_non_negative_scaled), prox("NON_NEGATIVE", None, C_non_negative_scaled_elemwise), prox("NON_NEGATIVE", None, lambda: [x >= 0]), prox("NORM_1", f_norm1_weighted), prox("NORM_1", lambda: cp.norm1(x)), prox("NORM_2", lambda: cp.norm(X, "fro")), prox("NORM_2", lambda: cp.norm2(x)), prox("NORM_NUCLEAR", lambda: cp.norm(X, "nuc")), prox("SECOND_ORDER_CONE", None, C_soc_scaled), prox("SECOND_ORDER_CONE", None, C_soc_scaled_translated), prox("SECOND_ORDER_CONE", None, C_soc_translated), prox("SECOND_ORDER_CONE", None, lambda: [cp.norm(X, "fro") <= t]), prox("SECOND_ORDER_CONE", None, lambda: [cp.norm2(x) <= t]),
return [cp.norm2(randn()*x) <= randn()*t] def C_soc_translated(): return [cp.norm2(x + randn()) <= t + randn()] def C_soc_scaled_translated(): return [cp.norm2(randn()*x + randn()) <= randn()*t + randn()] # Proximal operators PROX_TESTS = [ #prox("MATRIX_FRAC", lambda: cp.matrix_frac(p, X)), #prox("SIGMA_MAX", lambda: cp.sigma_max(X)), prox("AFFINE", lambda: randn(n).T*x), prox("CONSTANT", lambda: 0), prox("LAMBDA_MAX", lambda: cp.lambda_max(X)), prox("LOG_SUM_EXP", lambda: cp.log_sum_exp(x)), prox("MAX", lambda: cp.max_entries(x)), prox("NEG_LOG_DET", lambda: -cp.log_det(X)), prox("NON_NEGATIVE", None, C_non_negative_scaled), prox("NON_NEGATIVE", None, C_non_negative_scaled_elemwise), prox("NON_NEGATIVE", None, lambda: [x >= 0]), prox("NORM_1", f_norm1_weighted), prox("NORM_1", lambda: cp.norm1(x)), prox("NORM_2", lambda: cp.norm(X, "fro")), prox("NORM_2", lambda: cp.norm2(x)), prox("NORM_NUCLEAR", lambda: cp.norm(X, "nuc")), #prox("QUAD_OVER_LIN", lambda: cp.quad_over_lin(p, q1)), prox("SECOND_ORDER_CONE", None, C_soc_scaled), prox("SECOND_ORDER_CONE", None, C_soc_scaled_translated), prox("SECOND_ORDER_CONE", None, C_soc_translated), prox("SECOND_ORDER_CONE", None, lambda: [cp.norm(X, "fro") <= t]),