def fit_ellipse_squared(x, y): """ fit ellipoid using squared loss """ assert len(x) == len(y) N = len(x) D = 5 dat = numpy.zeros((N, D)) dat[:,0] = x*x dat[:,1] = y*y #dat[:,2] = x*y dat[:,2] = x dat[:,3] = y dat[:,4] = numpy.ones(N) print dat.shape dat = cvxmod.matrix(dat) #### parameters # data X = cvxmod.param("X", N, D) #### varibales # parameter vector theta = cvxmod.optvar("theta", D) # simple objective objective = cvxmod.atoms.norm2(X*theta) # create problem p = cvxmod.problem(cvxmod.minimize(objective)) p.constr.append(theta[0] + theta[1] == 1) ###### set values X.value = dat #solver = "mosek" #p.solve(lpsolver=solver) p.solve() cvxmod.printval(theta) theta_ = numpy.array(cvxmod.value(theta)) ellipse = conic_to_ellipse(theta_) return ellipse
def compute_bbox_set_agreement(example_boxes, gold_boxes): nExB = len(example_boxes) nGtB = len(gold_boxes) if nExB == 0: if nGtB == 0: return 1 else: return 0 if nGtB == 0: print "WARNING: new object" return 0 A = cvxmod.zeros(rows=nExB, cols=nGtB) for iBox, ex in enumerate(example_boxes): for jBox, gt in enumerate(gold_boxes): A[iBox, jBox] = ex.overlap_score(gt) S = [] S2 = [] for iBox, ex in enumerate(example_boxes): S_tmp = [0] * (iBox) * nGtB + [1] * nGtB + [0] * (nExB - iBox - 1) * nGtB S.append(S_tmp) for jBox in range(0, nGtB): S2_tmp = [0] * nExB * nGtB for j2 in range(0, nExB): S2_tmp[j2 * nGtB + jBox] = 1 S2.append(S2_tmp) S = cvxmod.transpose(cvxmod.matrix(S, size=(nExB * nGtB, nExB))) S2 = cvxmod.transpose(cvxmod.matrix(S2, size=(nExB * nGtB, nGtB))) A2 = cvxmod.matrix(A, (1, nExB * nGtB)) x = cvxmod.optvar('x', rows=nExB * nGtB, cols=1) p = cvxmod.problem(cvxmod.maximize(A2 * x)) p.constr.append(x <= 1) p.constr.append(x >= 0) p.constr.append(S * x <= 1) p.constr.append(S2 * x <= 1) p.solve(True) overlap = cvxmod.value(p) / max(nExB, nGtB) assert (overlap < 1.0001) return overlap
def interior_point(X, y, lam): """ solve lasso using an interior point method requires cvxmod (Jacob Mattingley and Stephen Boyd) http://cvxmod.net/ """ import cvxmod as cvx n, m = X.shape X_cvx = cvx.matrix(np.array(X)) y_cvx = cvx.matrix(np.array(y)) theta = cvx.optvar('theta', m) p = cvx.problem(cvx.minimize(cvx.sum(cvx.atoms.power(X_cvx*theta - y_cvx, 2)) + (2*lam)*cvx.norm1(theta))) p.solve() return np.array(cvx.value(theta))
def fit(self, data): dat = phi_of_x(data) N = dat.shape[0] D = dat.shape[1] dat = cvxmod.matrix(dat) #### parameters # data X = cvxmod.param("X", N, D) #### varibales # parameter vector theta = cvxmod.optvar("theta", D) # simple objective objective = cvxmod.atoms.norm2(X*theta) # create problem p = cvxmod.problem(cvxmod.minimize(objective)) p.constr.append(theta[0] + theta[1] == 1) ###### set values X.value = dat p.solve() cvxmod.printval(theta) theta_ = numpy.array(cvxmod.value(theta)) #ellipse = conic_to_ellipse(theta_) #return ellipse return theta_
def fit_ellipse_stack_squared(dx, dy, dz, di): """ fit ellipoid using squared loss idea to learn all stacks together including smoothness """ # sanity check assert len(dx) == len(dy) assert len(dx) == len(dz) assert len(dx) == len(di) # unique zs dat = defaultdict(list) # resort data for idx in range(len(dx)): dat[dz[idx]].append( [dx[idx], dy[idx], di[idx]] ) # init ret ellipse_stack = [] for idx in range(max(dz)): ellipse_stack.append(Ellipse(0, 0, idx, 1, 1, 0)) total_N = len(dx) M = len(dat.keys()) D = 5 X_matrix = [] thetas = [] for z in dat.keys(): x = numpy.array(dat[z])[:,0] y = numpy.array(dat[z])[:,1] # intensities i = numpy.array(dat[z])[:,2] # log intensities i = numpy.log(i) # create matrix ity = numpy.diag(i) # dimensionality N = len(x) d = numpy.zeros((N, D)) d[:,0] = x*x d[:,1] = y*y #d[:,2] = x*y d[:,2] = x d[:,3] = y d[:,4] = numpy.ones(N) #d[:,0] = x*x #d[:,1] = y*y #d[:,2] = x*y #d[:,3] = x #d[:,4] = y #d[:,5] = numpy.ones(N) # consider intensities old_shape = d.shape d = numpy.dot(ity, d) assert d.shape == old_shape print d.shape d = cvxmod.matrix(d) #### parameters # da X = cvxmod.param("X" + str(z), N, D) X.value = d X_matrix.append(X) #### varibales # parameter vector theta = cvxmod.optvar("theta" + str(z), D) thetas.append(theta) # contruct objective objective = 0 for (i,X) in enumerate(X_matrix): #TODO try abs loss here! objective += cvxmod.sum(cvxmod.atoms.square(X*thetas[i])) #objective += cvxmod.sum(cvxmod.atoms.abs(X*thetas[i])) # add smoothness regularization reg_const = float(total_N) / float(M-1) for i in xrange(M-1): objective += reg_const * cvxmod.sum(cvxmod.atoms.square(thetas[i] - thetas[i+1])) print objective # create problem p = cvxmod.problem(cvxmod.minimize(objective)) # add constraints for i in xrange(M): p.constr.append(thetas[i][0] + thetas[i][1] == 1) ###### set values p.solve() # wrap up result ellipse_stack = {} active_layers = dat.keys() assert len(active_layers) == M for i in xrange(M): theta_ = numpy.array(cvxmod.value(thetas[i])) z_layer = active_layers[i] ellipse_stack[z_layer] = conic_to_ellipse(theta_) ellipse_stack[z_layer].cz = z_layer return ellipse_stack
def fit_ellipse_stack_abs(dx, dy, dz, di): """ fit ellipoid using squared loss idea to learn all stacks together including smoothness """ # sanity check assert len(dx) == len(dy) assert len(dx) == len(dz) assert len(dx) == len(di) # unique zs dat = defaultdict(list) # resort data for idx in range(len(dx)): dat[dz[idx]].append( [dx[idx], dy[idx], di[idx]] ) # init ret ellipse_stack = [] for idx in range(max(dz)): ellipse_stack.append(Ellipse(0, 0, idx, 1, 1, 0)) total_N = len(dx) M = len(dat.keys()) D = 5 X_matrix = [] thetas = [] slacks = [] eps_slacks = [] mean_di = float(numpy.mean(di)) for z in dat.keys(): x = numpy.array(dat[z])[:,0] y = numpy.array(dat[z])[:,1] # intensities i = numpy.array(dat[z])[:,2] # log intensities i = numpy.log(i) # create matrix ity = numpy.diag(i)# / mean_di # dimensionality N = len(x) d = numpy.zeros((N, D)) d[:,0] = x*x d[:,1] = y*y #d[:,2] = x*y d[:,2] = x d[:,3] = y d[:,4] = numpy.ones(N) #d[:,0] = x*x #d[:,1] = y*y #d[:,2] = x*y #d[:,3] = x #d[:,4] = y #d[:,5] = numpy.ones(N) print "old", d # consider intensities old_shape = d.shape d = numpy.dot(ity, d) print "new", d assert d.shape == old_shape print d.shape d = cvxmod.matrix(d) #### parameters # da X = cvxmod.param("X" + str(z), N, D) X.value = d X_matrix.append(X) #### varibales # parameter vector theta = cvxmod.optvar("theta" + str(z), D) thetas.append(theta) # construct obj objective = 0 # loss term for i in xrange(M): objective += cvxmod.atoms.norm1(X_matrix[i] * thetas[i]) # add smoothness regularization reg_const = 5 * float(total_N) / float(M-1) for i in xrange(M-1): objective += reg_const * cvxmod.norm1(thetas[i] - thetas[i+1]) # create problem prob = cvxmod.problem(cvxmod.minimize(objective)) # add constraints """ for (i,X) in enumerate(X_matrix): p.constr.append(X*thetas[i] <= slacks[i]) p.constr.append(-X*thetas[i] <= slacks[i]) #eps = 0.5 #p.constr.append(slacks[i] - eps <= eps_slacks[i]) #p.constr.append(0 <= eps_slacks[i]) """ # add non-degeneracy constraints for i in xrange(1, M-1): prob.constr.append(thetas[i][0] + thetas[i][1] == 1.0) # A + C = 1 # pinch ends prob.constr.append(cvxmod.sum(thetas[0]) >= -0.01) prob.constr.append(cvxmod.sum(thetas[-1]) >= -0.01) print prob ###### set values from cvxopt import solvers solvers.options['reltol'] = 1e-1 solvers.options['abstol'] = 1e-1 print solvers.options prob.solve() # wrap up result ellipse_stack = {} active_layers = dat.keys() assert len(active_layers) == M # reconstruct original parameterization for i in xrange(M): theta_ = numpy.array(cvxmod.value(thetas[i])) z_layer = active_layers[i] ellipse_stack[z_layer] = conic_to_ellipse(theta_) ellipse_stack[z_layer].cz = z_layer return ellipse_stack
def Main(): options, _ = MakeOpts().parse_args(sys.argv) assert options.genes_filename assert options.protein_levels_a and options.protein_levels_b print 'Reading genes list from', options.genes_filename gene_ids = util.ReadProteinIDs(options.genes_filename) print 'Reading protein data A from', options.protein_levels_a gene_counts_a = util.ReadProteinCounts(options.protein_levels_a) print 'Reading protein data B from', options.protein_levels_b gene_counts_b = util.ReadProteinCounts(options.protein_levels_b) my_counts_a = dict((id, (count, name)) for id, name, count in util.ExtractCounts(gene_counts_a, gene_ids)) my_counts_b = dict((id, (count, name)) for id, name, count in util.ExtractCounts(gene_counts_b, gene_ids)) overlap_ids = set(my_counts_a.keys()).intersection(my_counts_b.keys()) x = pylab.matrix([my_counts_a[id][0] for id in overlap_ids]) y = pylab.matrix([my_counts_b[id][0] for id in overlap_ids]) labels = [my_counts_b[id][1] for id in overlap_ids] xlog = pylab.log10(x) ylog = pylab.log10(y) a = cvxmod.optvar('a', 1) mx = cvxmod.matrix(xlog.T) my = cvxmod.matrix(ylog.T) p = cvxmod.problem(cvxmod.minimize(cvxmod.norm2(my - a - mx))) p.solve(quiet=True) offset = cvxmod.value(a) lin_factor = 10**offset lin_label = 'Y = %.2g*X' % lin_factor log_label = 'log10(Y) = %.2g + log10(X)' % offset f1 = pylab.figure(0) pylab.title('Linear scale') xylim = max([x.max(), y.max()]) + 5000 linxs = pylab.arange(0.0, xylim, 0.1) linys = linxs * lin_factor pylab.plot(x.tolist()[0], y.tolist()[0], 'g.', label='Protein Data') pylab.plot(linxs, linys, 'b-', label=lin_label) for x_val, y_val, label in zip(x.tolist()[0], y.tolist()[0], labels): pylab.text(x_val, y_val, label, fontsize=8) pylab.xlabel(options.a_label) pylab.ylabel(options.b_label) pylab.legend() pylab.xlim((0.0, xylim)) pylab.ylim((0.0, xylim)) f2 = pylab.figure(1) pylab.title('Log10 scale') xylim = max([xlog.max(), ylog.max()]) + 1.0 pylab.plot(xlog.tolist()[0], ylog.tolist()[0], 'g.', label='Log10 Protein Data') linxs = pylab.arange(0.0, xylim, 0.1) linys = linxs + offset pylab.plot(linxs, linys, 'b-', label=log_label) for x_val, y_val, label in zip(xlog.tolist()[0], ylog.tolist()[0], labels): pylab.text(x_val, y_val, label, fontsize=8) pylab.xlabel(options.a_label + ' (log10)') pylab.ylabel(options.b_label + ' (log10)') pylab.legend() pylab.xlim((0.0, xylim)) pylab.ylim((0.0, xylim)) pylab.show()
def solve_boosting(out, labels, nu, solver): ''' solve boosting formulation used by gelher and novozin @param out: matrix (N,F) of predictions (for each f_i) for all examples @param y: vector (N,1) label for each example @param p: regularization constant ''' N = out.size[0] F = out.size[1] assert(N==len(labels)) norm_fact = 1.0 / (nu * float(N)) print norm_fact label_matrix = cvxmod.zeros((N,N)) # avoid point-wise product for i in xrange(N): label_matrix[i,i] = labels[i] #### parameters f = cvxmod.param("f", N, F) y = cvxmod.param("y", N, N, symm=True) norm = cvxmod.param("norm", 1) #### varibales # rho rho = cvxmod.optvar("rho", 1) # dim = (N x 1) chi = cvxmod.optvar("chi", N) # dim = (F x 1) beta = cvxmod.optvar("beta", F) #objective = -rho + cvxmod.sum(chi) * norm_fact + square(norm2(beta)) objective = -rho + cvxmod.sum(chi) * norm_fact print objective # create problem p = cvxmod.problem(cvxmod.minimize(objective)) # create contraint for probability simplex #p.constr.append(beta |cvxmod.In| probsimp(F)) p.constr.append(cvxmod.sum(beta)==1.0) #p.constr.append(square(norm2(beta)) <= 1.0) p.constr.append(beta >= 0.0) # y f beta y f*beta y*f*beta # (N x N) (N x F) (F x 1) --> (N x N) (N x 1) --> (N x 1) p.constr.append(y * (f * beta) + chi >= rho) ###### set values f.value = out y.value = label_matrix norm.value = norm_fact p.solve(lpsolver=solver) weights = numpy.array(cvxmod.value(beta)) #print weights cvxmod.printval(chi) cvxmod.printval(beta) cvxmod.printval(rho) return p
def solve_svm(out, labels, nu, solver): ''' solve boosting formulation used by gelher and nowozin @param out: matrix (N,F) of predictions (for each f_i) for all examples @param labels: vector (N,1) label for each example @param nu: regularization constant @param solver: which solver to use. options: 'mosek', 'glpk' ''' # get dimension N = out.size[0] F = out.size[1] assert N == len(labels), str(N) + " " + str(len(labels)) norm_fact = 1.0 / (nu * float(N)) print "normalization factor %f" % (norm_fact) # avoid point-wise product label_matrix = cvxmod.zeros((N, N)) for i in xrange(N): label_matrix[i, i] = labels[i] #### parameters f = cvxmod.param("f", N, F) y = cvxmod.param("y", N, N, symm=True) norm = cvxmod.param("norm", 1) #### varibales # rho rho = cvxmod.optvar("rho", 1) # dim = (N x 1) chi = cvxmod.optvar("chi", N) # dim = (F x 1) beta = cvxmod.optvar("beta", F) #objective = -rho + cvxmod.sum(chi) * norm_fact + square(norm2(beta)) objective = -rho + cvxmod.sum(chi) * norm_fact print objective # create problem p = cvxmod.problem(cvxmod.minimize(objective)) # create contraints for probability simplex #p.constr.append(beta |cvxmod.In| probsimp(F)) p.constr.append(cvxmod.sum(beta) == 1.0) p.constr.append(beta >= 0.0) p.constr.append(chi >= 0.0) # attempt to perform non-sparse boosting #p.constr.append(square(norm2(beta)) <= 1.0) # y f beta y f*beta y*f*beta # (N x N) (N x F) (F x 1) --> (N x N) (N x 1) --> (N x 1) p.constr.append(y * (f * beta) + chi >= rho) # set values for parameters f.value = out y.value = label_matrix norm.value = norm_fact print "solving problem" print "=============================================" print p print "=============================================" # start solver p.solve(lpsolver=solver) # print variables cvxmod.printval(chi) cvxmod.printval(beta) cvxmod.printval(rho) return numpy.array(cvxmod.value(beta))
def fit_ellipse_linear(x, y): """ fit ellipse stack using absolute loss """ x = numpy.array(x) y = numpy.array(y) print "shapes", x.shape, y.shape assert len(x) == len(y) N = len(x) D = 6 dat = numpy.zeros((N, D)) dat[:,0] = x*x dat[:,1] = y*y dat[:,2] = y*x dat[:,3] = x dat[:,4] = y dat[:,5] = numpy.ones(N) print dat.shape dat = cvxmod.matrix(dat) # norm norm = numpy.zeros((N,N)) for i in range(N): norm[i,i] = numpy.sqrt(numpy.dot(dat[i], numpy.transpose(dat[i]))) norm = cvxmod.matrix(norm) #### parameters # data X = cvxmod.param("X", N, D) Q_grad = cvxmod.param("Q_grad", N, N) #### varibales # parameter vector theta = cvxmod.optvar("theta", D) # dim = (N x 1) s = cvxmod.optvar("s", N) # simple objective objective = cvxmod.sum(s) # create problem p = cvxmod.problem(cvxmod.minimize(objective)) # add constraints # (N x D) * (D X 1) = (N x N) * (N X 1) p.constr.append(X*theta <= Q_grad*s) p.constr.append(-X*theta <= Q_grad*s) #p.constr.append(theta[4] == 1) # trace constraint p.constr.append(theta[0] + theta[1] == 1) ###### set values X.value = dat Q_grad.value = norm #solver = "mosek" #p.solve(lpsolver=solver) p.solve() cvxmod.printval(theta) theta_ = numpy.array(cvxmod.value(theta)) ellipse = conic_to_ellipse(theta_) return ellipse
def fit_ellipse_eps_insensitive(x, y): """ fit ellipse using epsilon-insensitive loss """ x = numpy.array(x) y = numpy.array(y) print "shapes", x.shape, y.shape assert len(x) == len(y) N = len(x) D = 5 dat = numpy.zeros((N, D)) dat[:,0] = x*x dat[:,1] = y*y #dat[:,2] = y*x dat[:,2] = x dat[:,3] = y dat[:,4] = numpy.ones(N) print dat.shape dat = cvxmod.matrix(dat) #### parameters # data X = cvxmod.param("X", N, D) # parameter for eps-insensitive loss eps = cvxmod.param("eps", 1) #### varibales # parameter vector theta = cvxmod.optvar("theta", D) # dim = (N x 1) s = cvxmod.optvar("s", N) t = cvxmod.optvar("t", N) # simple objective objective = cvxmod.sum(t) # create problem p = cvxmod.problem(cvxmod.minimize(objective)) # add constraints # (N x D) * (D X 1) = (N X 1) p.constr.append(X*theta <= s) p.constr.append(-X*theta <= s) p.constr.append(s - eps <= t) p.constr.append(0 <= t) #p.constr.append(theta[4] == 1) # trace constraint p.constr.append(theta[0] + theta[1] == 1) ###### set values X.value = dat eps.value = 0.0 #solver = "mosek" #p.solve(lpsolver=solver) p.solve() cvxmod.printval(theta) theta_ = numpy.array(cvxmod.value(theta)) ellipse = conic_to_ellipse(theta_) return ellipse
def Main(): options, _ = MakeOpts().parse_args(sys.argv) assert options.genes_filename assert options.protein_levels_a and options.protein_levels_b print 'Reading genes list from', options.genes_filename gene_ids = util.ReadProteinIDs(options.genes_filename) print 'Reading protein data A from', options.protein_levels_a gene_counts_a = util.ReadProteinCounts(options.protein_levels_a) print 'Reading protein data B from', options.protein_levels_b gene_counts_b = util.ReadProteinCounts(options.protein_levels_b) my_counts_a = dict( (id, (count, name)) for id, name, count in util.ExtractCounts(gene_counts_a, gene_ids)) my_counts_b = dict( (id, (count, name)) for id, name, count in util.ExtractCounts(gene_counts_b, gene_ids)) overlap_ids = set(my_counts_a.keys()).intersection(my_counts_b.keys()) x = pylab.matrix([my_counts_a[id][0] for id in overlap_ids]) y = pylab.matrix([my_counts_b[id][0] for id in overlap_ids]) labels = [my_counts_b[id][1] for id in overlap_ids] xlog = pylab.log10(x) ylog = pylab.log10(y) a = cvxmod.optvar('a', 1) mx = cvxmod.matrix(xlog.T) my = cvxmod.matrix(ylog.T) p = cvxmod.problem(cvxmod.minimize(cvxmod.norm2(my - a - mx))) p.solve(quiet=True) offset = cvxmod.value(a) lin_factor = 10**offset lin_label = 'Y = %.2g*X' % lin_factor log_label = 'log10(Y) = %.2g + log10(X)' % offset f1 = pylab.figure(0) pylab.title('Linear scale') xylim = max([x.max(), y.max()]) + 5000 linxs = pylab.arange(0.0, xylim, 0.1) linys = linxs * lin_factor pylab.plot(x.tolist()[0], y.tolist()[0], 'g.', label='Protein Data') pylab.plot(linxs, linys, 'b-', label=lin_label) for x_val, y_val, label in zip(x.tolist()[0], y.tolist()[0], labels): pylab.text(x_val, y_val, label, fontsize=8) pylab.xlabel(options.a_label) pylab.ylabel(options.b_label) pylab.legend() pylab.xlim((0.0, xylim)) pylab.ylim((0.0, xylim)) f2 = pylab.figure(1) pylab.title('Log10 scale') xylim = max([xlog.max(), ylog.max()]) + 1.0 pylab.plot(xlog.tolist()[0], ylog.tolist()[0], 'g.', label='Log10 Protein Data') linxs = pylab.arange(0.0, xylim, 0.1) linys = linxs + offset pylab.plot(linxs, linys, 'b-', label=log_label) for x_val, y_val, label in zip(xlog.tolist()[0], ylog.tolist()[0], labels): pylab.text(x_val, y_val, label, fontsize=8) pylab.xlabel(options.a_label + ' (log10)') pylab.ylabel(options.b_label + ' (log10)') pylab.legend() pylab.xlim((0.0, xylim)) pylab.ylim((0.0, xylim)) pylab.show()