def iterate(cself, svm, obj_val, classes): # Fix classes with zero classification classes[np.nonzero(classes == 0.0)] = 1.0 cself.mention('Linearalizing constraints...') all_classes = np.matrix(np.vstack([-np.ones((bs.L_n, 1)), classes.reshape((-1, 1)), np.ones((bs.X_p, 1))])) D = spdiag(all_classes) # Update QP qp.update_H(D * K * D) qp.update_Aeq(all_classes.T) # Solve QP alphas, obj = qp.solve(self.verbose) # Update SVM svm = NSK(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._bags = self._all_bags svm._y = all_classes svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K if cself.check_tolerance(obj_val, obj): return None, svm # Use precomputed classifications from SVM new_classes = np.sign(svm._bag_predictions[bs.L_n:-bs.X_p]) return {'svm': svm, 'obj_val': obj, 'classes': new_classes}, None
def _compute_separator(self, K): sv = (self._alphas.flat > self.sv_cutoff) D = spdiag(self._y) self._b = (np.sum(D * sv) - np.sum(self._alphas.T * D * self._V * K)) / np.sum(sv) self._dotprods = (self._alphas.T * D * self._V * K).T self._predictions = self._b + self._dotprods
def K(X, Y): if type(X) == list: norm = lambda x: normalizer(x, k) x_norm = matrix(map(norm, X)) if id(X) == id(Y): # Optimize for symmetric case norms = x_norm.T * x_norm if all(len(bag) == 1 for bag in X): # Optimize for singleton bags instX = vstack(X) raw_kernel = k(instX, instX) else: # Only need to compute half of # the matrix if it's symmetric upper = matrix([i * [0] + [np.sum(k(x, y)) for y in Y[i:]] for i, x in enumerate(X, 1)]) diag = np.array([np.sum(k(x, x)) for x in X]) raw_kernel = upper + upper.T + spdiag(diag) else: y_norm = matrix(map(norm, Y)) norms = x_norm.T * y_norm raw_kernel = k(vstack(X), vstack(Y)) lensX = map(len, X) lensY = map(len, Y) if any(l != 1 for l in lensX): raw_kernel = vstack([np.sum(raw_kernel[i:j, :], axis=0) for i, j in slices(lensX)]) if any(l != 1 for l in lensY): raw_kernel = hstack([np.sum(raw_kernel[:, i:j], axis=1) for i, j in slices(lensY)]) return np.divide(raw_kernel, norms) else: return k(X, Y)
def _setup_svm(self, examples, classes, C): kernel = kernel_by_name(self.kernel, gamma=self.gamma, p=self.p) n = len(examples) e = np.matrix(np.ones((n, 1))) # Kernel and Hessian if kernel is None: K = None H = None else: K = _smart_kernel(kernel, examples) D = spdiag(classes) H = D * K * D # Term for -sum of alphas f = -e # Sum(y_i * alpha_i) = 0 A = classes.T.astype(float) b = np.matrix([0.0]) # 0 <= alpha_i <= C lb = np.matrix(np.zeros((n, 1))) if type(C) == float: ub = C * e else: # Allow for C to be an array ub = C return K, H, f, A, b, lb, ub
def predict(self, bags): """ @param bags : a sequence of n bags; each bag is an m-by-k array-like object containing m instances with k features @return : an array of length n containing real-valued label predictions (threshold at zero to produce binary predictions) """ if self._sv_bags is None or len(self._sv_bags) == 0: return np.zeros(len(bags)) else: kernel = kernel_by_name(self.kernel, p=self.p, gamma=self.gamma) K = kernel(list(map(np.asmatrix, bags)), self._sv_bags) return np.array(self._b + K * spdiag(self._sv_y) * self._sv_alphas).reshape((-1,))
def predict(self, X): """ @param X : an n-by-m array-like object containing n examples with m features @return : an array of length n containing real-valued label predictions (threshold at zero to produce binary predictions) """ if self._sv_X is None or len(self._sv_X) == 0: return np.zeros(len(X)) else: kernel = kernel_by_name(self.kernel, p=self.p, gamma=self.gamma) K = kernel(np.asmatrix(X), self._sv_X) return np.array(self._b + K * spdiag(self._sv_y) * self._sv_alphas).reshape((-1,))
def predict(self, bags): """ @param bags : a sequence of n bags; each bag is an m-by-k array-like object containing m instances with k features @return : an array of length n containing real-valued label predictions (threshold at zero to produce binary predictions) """ if self._sv_bags is None or len(self._sv_bags) == 0: return np.zeros(len(bags)) else: kernel = kernel_by_name(self.kernel, p=self.p, gamma=self.gamma) K = kernel(map(np.asmatrix, bags), self._sv_bags) return np.array(self._b + K * spdiag(self._sv_y) * self._sv_alphas).reshape((-1,))
def iterate(cself, svm, selectors, instances, K): cself.mention('Training SVM...') alphas, obj = qp.solve(cself.verbose) # Construct SVM from solution svm = SVM(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._X = instances svm._y = classes svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K cself.mention('Recomputing classes...') p_confs = svm.predict(bs.pos_instances) pos_selectors = bs.L_n + np.array([ l + np.argmax(p_confs[l:u]) for l, u in slices(bs.pos_groups) ]) new_selectors = np.hstack([neg_selectors, pos_selectors]) if selectors is None: sel_diff = len(new_selectors) else: sel_diff = np.nonzero(new_selectors - selectors)[0].size cself.mention('Selector differences: %d' % sel_diff) if sel_diff == 0: return None, svm elif sel_diff > 5: # Clear results to avoid a # bad starting point in # the next iteration qp.clear_results() cself.mention('Updating QP...') indices = (new_selectors, ) K = K_all[indices].T[indices].T D = spdiag(classes) qp.update_H(D * K * D) return { 'svm': svm, 'selectors': new_selectors, 'instances': bs.instances[indices], 'K': K }, None
def predict(self, bags): """ @param bags : a sequence of n bags; each bag is an m-by-k array-like object containing m instances with k features @return : an array of length n containing real-valued label predictions (threshold at zero to produce binary predictions) """ if self._b is None: return np.zeros(len(bags)) else: bags = [np.asmatrix(bag) for bag in bags] k = kernel_by_name(self.kernel, p=self.p, gamma=self.gamma) D = spdiag(self._y) return np.array([np.max(self._b + self._alphas.T * D * self._V * k(self._X, bag)) for bag in bags])
def _compute_separator(self, K): self._sv = np.nonzero(self._alphas.flat > self.sv_cutoff) self._sv_alphas = self._alphas[self._sv] self._sv_bags = [self._bags[i] for i in self._sv[0]] self._sv_y = self._y[self._sv] n = len(self._sv_bags) if n == 0: self._b = 0.0 self._bag_predictions = np.zeros(len(self._bags)) else: _sv_all_K = K[self._sv] _sv_K = _sv_all_K.T[self._sv].T e = np.matrix(np.ones((n, 1))) D = spdiag(self._sv_y) self._b = float(e.T * D * e - self._sv_alphas.T * D * _sv_K * e) / n self._bag_predictions = np.array(self._b + self._sv_alphas.T * D * _sv_all_K).reshape((-1,))
def iterate(cself, svm, obj_val, classes): # Fix classes with zero classification classes[np.nonzero(classes == 0.0)] = 1.0 cself.mention('Linearalizing constraints...') all_classes = np.matrix( np.vstack([ -np.ones((bs.L_n, 1)), classes.reshape((-1, 1)), np.ones((bs.X_p, 1)) ])) D = spdiag(all_classes) # Update QP qp.update_H(D * K * D) qp.update_Aeq(all_classes.T) # Solve QP alphas, obj = qp.solve(self.verbose) # Update SVM svm = NSK(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._bags = self._all_bags svm._y = all_classes svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K if cself.check_tolerance(obj_val, obj): return None, svm # Use precomputed classifications from SVM new_classes = np.sign(svm._bag_predictions[bs.L_n:-bs.X_p]) return { 'svm': svm, 'obj_val': obj, 'classes': new_classes }, None
def K(X, Y): if type(X) == list: def norm(x): return normalizer(x, k) x_norm = matrix([elem for elem in map(norm, X)]) if id(X) == id(Y): # Optimize for symmetric case norms = x_norm.T * x_norm if all(len(bag) == 1 for bag in X): # Optimize for singleton bags instX = vstack(X) raw_kernel = k(instX, instX) else: # Only need to compute half of # the matrix if it's symmetric upper = matrix([ i * [0] + [np.sum(k(x, y)) for y in Y[i:]] for i, x in enumerate(X, 1) ]) diag = np.array([np.sum(k(x, x)) for x in X]) raw_kernel = upper + upper.T + spdiag(diag) else: y_norm = matrix([elem for elem in map(norm, Y)]) norms = x_norm.T * y_norm raw_kernel = k(vstack(X), vstack(Y)) lensX = [elem for elem in map(len, X)] lensY = [elem for elem in map(len, Y)] if any(l != 1 for l in lensX): raw_kernel = vstack([ np.sum(raw_kernel[i:j, :], axis=0) for i, j in slices(lensX) ]) if any(l != 1 for l in lensY): raw_kernel = hstack([ np.sum(raw_kernel[:, i:j], axis=1) for i, j in slices(lensY) ]) return np.divide(raw_kernel, norms) else: return k(X, Y)
def iterate(cself, svm, classes): cself.mention('Training SVM...') D = spdiag(classes) qp.update_H(D * K * D) qp.update_Aeq(classes.T) alphas, obj = qp.solve(cself.verbose) # Construct SVM from solution svm = SVM(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._X = bs.instances svm._y = classes svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K cself.mention('Recomputing classes...') p_conf = svm._predictions[-bs.L_p:] pos_classes = np.vstack([ _update_classes(part) for part in partition(p_conf, bs.pos_groups) ]) new_classes = np.vstack( [-np.ones((bs.L_n, 1)), pos_classes]) class_changes = round( np.sum(np.abs(classes - new_classes) / 2)) cself.mention('Class Changes: %d' % class_changes) if class_changes == 0: return None, svm return {'svm': svm, 'classes': new_classes}, None
def fit(self, bags, y): """ @param bags : a sequence of n bags; each bag is an m-by-k array-like object containing m instances with k features @param y : an array-like object of length n containing -1/+1 labels """ self._bags = map(np.asmatrix, bags) bs = BagSplitter(self._bags, np.asmatrix(y).reshape((-1, 1))) self._X = np.vstack([bs.pos_instances, bs.pos_instances, bs.pos_instances, bs.neg_instances]) self._y = np.vstack([np.matrix(np.ones((bs.X_p + bs.L_p, 1))), -np.matrix(np.ones((bs.L_p + bs.L_n, 1)))]) if self.scale_C: C = self.C / float(len(self._bags)) else: C = self.C # Setup SVM and adjust constraints _, _, f, A, b, lb, ub = self._setup_svm(self._y, self._y, C) ub[:bs.X_p] *= (float(bs.L_n) / float(bs.X_p)) ub[bs.X_p: bs.X_p + 2 * bs.L_p] *= (float(bs.L_n) / float(bs.L_p)) K = kernel_by_name(self.kernel, gamma=self.gamma, p=self.p)(self._X, self._X) D = spdiag(self._y) ub0 = np.matrix(ub) ub0[bs.X_p: bs.X_p + 2 * bs.L_p] *= 0.5 def get_V(pos_classifications): eye_n = bs.L_n + 2 * bs.L_p top = np.zeros((bs.X_p, bs.L_p)) for row, (i, j) in enumerate(slices(bs.pos_groups)): top[row, i:j] = _grad_softmin(-pos_classifications[i:j], self.alpha).flat return sp.bmat([[sp.coo_matrix(top), None], [None, sp.eye(eye_n, eye_n)]]) V0 = get_V(np.matrix(np.zeros((bs.L_p, 1)))) qp = IterativeQP(D * V0 * K * V0.T * D, f, A, b, lb, ub0) best_obj = float('inf') best_svm = None for rr in range(self.restarts + 1): if rr == 0: if self.verbose: print('Non-random start...') # Train on instances alphas, obj = qp.solve(self.verbose) else: if self.verbose: print('Random restart %d of %d...' % (rr, self.restarts)) alphas = np.matrix([uniform(0.0, 1.0) for i in range(len(lb))]).T obj = Objective(0.0, 0.0) svm = MICA(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._X = self._X svm._y = self._y svm._V = V0 svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K class missCCCP(CCCP): def bailout(cself, svm, obj_val): return svm def iterate(cself, svm, obj_val): cself.mention('Linearizing constraints...') classifications = svm._predictions[bs.X_p: bs.X_p + bs.L_p] V = get_V(classifications) cself.mention('Computing slacks...') # Difference is [1 - y_i*(w*phi(x_i) + b)] pos_differences = 1.0 - classifications neg_differences = 1.0 + classifications # Slacks are positive differences only pos_slacks = np.multiply(pos_differences > 0, pos_differences) neg_slacks = np.multiply(neg_differences > 0, neg_differences) all_slacks = np.hstack([pos_slacks, neg_slacks]) cself.mention('Linearizing...') # Compute gradient across pairs slack_grads = np.vstack([_grad_softmin(pair, self.alpha) for pair in all_slacks]) # Stack results into one column slack_grads = np.vstack([np.ones((bs.X_p, 1)), slack_grads[:, 0], slack_grads[:, 1], np.ones((bs.L_n, 1))]) # Update QP qp.update_H(D * V * K * V.T * D) qp.update_ub(np.multiply(ub, slack_grads)) # Re-solve cself.mention('Solving QP...') alphas, obj = qp.solve(self.verbose) new_svm = MICA(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) new_svm._X = self._X new_svm._y = self._y new_svm._V = V new_svm._alphas = alphas new_svm._objective = obj new_svm._compute_separator(K) new_svm._K = K if cself.check_tolerance(obj_val, obj): return None, new_svm return {'svm': new_svm, 'obj_val': obj}, None cccp = missCCCP(verbose=self.verbose, svm=svm, obj_val=None, max_iters=self.max_iters) svm = cccp.solve() if svm is not None: obj = float(svm._objective) if obj < best_obj: best_svm = svm best_obj = obj if best_svm is not None: self._V = best_svm._V self._alphas = best_svm._alphas self._objective = best_svm._objective self._compute_separator(best_svm._K) self._bag_predictions = self.predict(self._bags)
def fit(self, bags, y): """ @param bags : a sequence of n bags; each bag is an m-by-k array-like object containing m instances with k features @param y : an array-like object of length n containing -1/+1 labels """ self._bags = map(np.asmatrix, bags) bs = BagSplitter(self._bags, np.asmatrix(y).reshape((-1, 1))) self._X = bs.instances Ln = bs.L_n Lp = bs.L_p Xp = bs.X_p m = Ln + Xp if self.scale_C: C = self.C / float(len(self._bags)) else: C = self.C K = kernel_by_name(self.kernel, gamma=self.gamma, p=self.p)(self._X, self._X) new_classes = np.matrix(np.vstack([-np.ones((Ln, 1)), np.ones((Xp, 1))])) self._y = new_classes D = spdiag(new_classes) setup = list(self._setup_svm(new_classes, new_classes, C))[1:] setup[0] = np.matrix([0]) qp = IterativeQP(*setup) c = cvxmat(np.hstack([np.zeros(Lp + 1), np.ones(Xp + Ln)])) b = cvxmat(np.ones((Xp, 1))) A = spz(Xp, Lp + 1 + Xp + Ln) for row, (i, j) in enumerate(slices(bs.pos_groups)): A[row, i:j] = 1.0 bottom_left = sparse(t([[-spI(Lp), spz(Lp)], [spz(m, Lp), spz(m)]])) bottom_right = sparse([spz(Lp, m), -spI(m)]) inst_cons = sparse(t([[spz(Xp, Lp), -spo(Xp)], [spz(Ln, Lp), spo(Ln)]])) G = sparse(t([[inst_cons, -spI(m)], [bottom_left, bottom_right]])) h = cvxmat(np.vstack([-np.ones((Xp, 1)), np.zeros((Ln + Lp + m, 1))])) def to_V(upsilon): bot = np.zeros((Xp, Lp)) for row, (i, j) in enumerate(slices(bs.pos_groups)): bot[row, i:j] = upsilon.flat[i:j] return sp.bmat([[sp.eye(Ln, Ln), None], [None, sp.coo_matrix(bot)]]) class MICACCCP(CCCP): def bailout(cself, alphas, upsilon, svm): return svm def iterate(cself, alphas, upsilon, svm): V = to_V(upsilon) cself.mention('Update QP...') qp.update_H(D * V * K * V.T * D) cself.mention('Solve QP...') alphas, obj = qp.solve(self.verbose) svm = MICA(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._X = self._X svm._y = self._y svm._V = V svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K cself.mention('Update LP...') for row, (i, j) in enumerate(slices(bs.pos_groups)): G[row, i:j] = cvxmat(-svm._dotprods[Ln + i: Ln + j].T) h[Xp: Xp + Ln] = cvxmat(-(1 + svm._dotprods[:Ln])) cself.mention('Solve LP...') sol, _ = linprog(c, G, h, A, b, verbose=self.verbose) new_upsilon = sol[:Lp] if cself.check_tolerance(np.linalg.norm(upsilon - new_upsilon)): return None, svm return {'alphas': alphas, 'upsilon': new_upsilon, 'svm': svm}, None best_obj = float('inf') best_svm = None for rr in range(self.restarts + 1): if rr == 0: if self.verbose: print('Non-random start...') upsilon0 = np.matrix(np.vstack([np.ones((size, 1)) / float(size) for size in bs.pos_groups])) else: if self.verbose: print('Random restart %d of %d...' % (rr, self.restarts)) upsilon0 = np.matrix(np.vstack([rand_convex(size).T for size in bs.pos_groups])) cccp = MICACCCP(verbose=self.verbose, alphas=None, upsilon=upsilon0, svm=None, max_iters=self.max_iters) svm = cccp.solve() if svm is not None: obj = float(svm._objective) if obj < best_obj: best_svm = svm best_obj = obj if best_svm is not None: self._V = best_svm._V self._alphas = best_svm._alphas self._objective = best_svm._objective self._compute_separator(best_svm._K) self._bag_predictions = self.predict(self._bags)
def fit(self, bags, y): """ @param bags : a sequence of n bags; each bag is an m-by-k array-like object containing m instances with k features @param y : an array-like object of length n containing -1/+1 labels """ self._bags = list(map(np.asmatrix, bags)) bs = BagSplitter(self._bags, np.asmatrix(y).reshape((-1, 1))) self._X = np.vstack([ bs.pos_instances, bs.pos_instances, bs.pos_instances, bs.neg_instances ]) self._y = np.vstack([ np.matrix(np.ones((bs.X_p + bs.L_p, 1))), -np.matrix(np.ones((bs.L_p + bs.L_n, 1))) ]) if self.scale_C: C = self.C / float(len(self._bags)) else: C = self.C # Setup SVM and adjust constraints _, _, f, A, b, lb, ub = self._setup_svm(self._y, self._y, C) ub[:bs.X_p] *= (float(bs.L_n) / float(bs.X_p)) ub[bs.X_p:bs.X_p + 2 * bs.L_p] *= (float(bs.L_n) / float(bs.L_p)) K = kernel_by_name(self.kernel, gamma=self.gamma, p=self.p)(self._X, self._X) D = spdiag(self._y) ub0 = np.matrix(ub) ub0[bs.X_p:bs.X_p + 2 * bs.L_p] *= 0.5 def get_V(pos_classifications): eye_n = bs.L_n + 2 * bs.L_p top = np.zeros((bs.X_p, bs.L_p)) for row, (i, j) in enumerate(slices(bs.pos_groups)): top[row, i:j] = _grad_softmin(-pos_classifications[i:j], self.alpha).flat return sp.bmat([[sp.coo_matrix(top), None], [None, sp.eye(eye_n, eye_n)]]) V0 = get_V(np.matrix(np.zeros((bs.L_p, 1)))) qp = IterativeQP(D * V0 * K * V0.T * D, f, A, b, lb, ub0) best_obj = float('inf') best_svm = None for rr in range(self.restarts + 1): if rr == 0: if self.verbose: print('Non-random start...') # Train on instances alphas, obj = qp.solve(self.verbose) else: if self.verbose: print('Random restart %d of %d...' % (rr, self.restarts)) alphas = np.matrix([uniform(0.0, 1.0) for i in range(len(lb))]).T obj = Objective(0.0, 0.0) svm = MICA(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) svm._X = self._X svm._y = self._y svm._V = V0 svm._alphas = alphas svm._objective = obj svm._compute_separator(K) svm._K = K class missCCCP(CCCP): def bailout(cself, svm, obj_val): return svm def iterate(cself, svm, obj_val): cself.mention('Linearizing constraints...') classifications = svm._predictions[bs.X_p:bs.X_p + bs.L_p] V = get_V(classifications) cself.mention('Computing slacks...') # Difference is [1 - y_i*(w*phi(x_i) + b)] pos_differences = 1.0 - classifications neg_differences = 1.0 + classifications # Slacks are positive differences only pos_slacks = np.multiply(pos_differences > 0, pos_differences) neg_slacks = np.multiply(neg_differences > 0, neg_differences) all_slacks = np.hstack([pos_slacks, neg_slacks]) cself.mention('Linearizing...') # Compute gradient across pairs slack_grads = np.vstack([ _grad_softmin(pair, self.alpha) for pair in all_slacks ]) # Stack results into one column slack_grads = np.vstack([ np.ones((bs.X_p, 1)), slack_grads[:, 0], slack_grads[:, 1], np.ones((bs.L_n, 1)) ]) # Update QP qp.update_H(D * V * K * V.T * D) qp.update_ub(np.multiply(ub, slack_grads)) # Re-solve cself.mention('Solving QP...') alphas, obj = qp.solve(self.verbose) new_svm = MICA(kernel=self.kernel, gamma=self.gamma, p=self.p, verbose=self.verbose, sv_cutoff=self.sv_cutoff) new_svm._X = self._X new_svm._y = self._y new_svm._V = V new_svm._alphas = alphas new_svm._objective = obj new_svm._compute_separator(K) new_svm._K = K if cself.check_tolerance(obj_val, obj): return None, new_svm return {'svm': new_svm, 'obj_val': obj}, None cccp = missCCCP(verbose=self.verbose, svm=svm, obj_val=None, max_iters=self.max_iters) svm = cccp.solve() if svm is not None: obj = float(svm._objective) if obj < best_obj: best_svm = svm best_obj = obj if best_svm is not None: self._V = best_svm._V self._alphas = best_svm._alphas self._objective = best_svm._objective self._compute_separator(best_svm._K) self._bag_predictions = self.predict(self._bags)