def predict(self, Y): # build test kernel kernel = get_kernel(Y, self.X[:, self.svs], self.kernel, self.kparam) # kernel = Kernel.get_kernel(Y, self.X, self.kernel, self.kparam) # for svdd we need the data norms additionally norms = get_diag_kernel(Y, self.kernel) # number of training examples res = self.cTc - 2. * kernel.dot(self.get_support()).T + norms # res = self.cTc - 2. * kernel.dot(self.alphas).T + norms return res.reshape(Y.shape[1]) - self.radius2
from tilitools.ocsvm_dual_qp import OcSvmDualQP from tilitools.utils_kernel import get_kernel, center_kernel, normalize_kernel from tilitools.utils import print_profiles if __name__ == '__main__': # kernel parameter and type ktype = 'linear' nu = 0.25 # generate raw training data Dtrain = (np.random.rand(2, 100)-0.)*3. Dtrain[1, :] = (np.random.rand(1, Dtrain.shape[1])-0.)*0.9 # for i in range(Dtrain.shape[1]): # Dtrain[:2, i] /= np.linalg.norm(Dtrain[:2, i]) kernel = get_kernel(Dtrain, Dtrain, ktype) svm = OcSvmDualQP(kernel, nu) svm.fit() skl_svm = LpOcSvmPrimalSGD(pnorm=2., nu=nu) skl_svm.fit(Dtrain) skl_svm.fit(Dtrain) delta = 0.1 x = np.arange(-4.0, 4.0, delta) y = np.arange(-4.0, 4.0, delta) X, Y = np.meshgrid(x, y) (sx,sy) = X.shape Xf = np.reshape(X,(1,sx*sy)) Yf = np.reshape(Y,(1,sx*sy))
Yf = np.reshape(Y, (1, sx * sy)) Dtest = np.append(Xf, Yf, axis=0) # 1.4. build the training kernels: # - same training sample for each kernel = they have the same size # - each kernel captures a feature representation of the samples Dtrain # e.g., kernel1 is a BOW kernel, kernel2 a Lexical Diversity kernel # here: kernel1 und kernel2 are Gaussian kernels with different shape parameters # and kernel3 is a simple linear kernel train_kernels = [] test_kernels = [] rbf_vals = [0.01, 1., 10., 100.] for val in rbf_vals: data = np.concatenate((Dtrain, Dtest), axis=1) print(data.shape) kernel = get_kernel(data, data, type='rbf', param=val) kernel = center_kernel(kernel) kernel = normalize_kernel(kernel) train_kernel = kernel[:Dtrain.shape[1], :].copy() test_kernel = kernel[:Dtrain.shape[1], :].copy() train_kernels.append(train_kernel[:, :Dtrain.shape[1]]) test_kernels.append(test_kernel[:, Dtrain.shape[1]:].T) # MKL: (default) use SSAD ad = ConvexSSAD([], Dy, 1.0, 1.0, 1.0 / (100 * 0.05), 1.0) # 2. STEP: TRAIN WITH A LIST OF KERNELS ssad = MKLWrapper(ad, train_kernels, P_NORM) ssad.fit()
def fit(self, X, max_iter=-1, center=False, normalize=False): """ :param X: Data matrix is assumed to be feats x samples. :param max_iter: *ignored*, just for compatibility. :return: Alphas and threshold for dual SVDDs. """ self.X = X.copy() dims, self.samples = X.shape if self.samples < 1: print('Invalid training data.') return -1 # number of training examples N = self.samples kernel = get_kernel(X, X, self.kernel, self.kparam) if center: kernel = center_kernel(kernel) if normalize: kernel = normalize_kernel(kernel) norms = np.diag(kernel).copy() if self.nu >= 1.0: print("Center-of-mass solution.") self.alphas = np.ones(self.samples) / float(self.samples) self.radius2 = 0.0 self.svs = np.array(range(self.samples), dtype='i') self.pobj = 0.0 # TODO: calculate real primal objective self.cTc = self.alphas[self.svs].T.dot( kernel[self.svs, :][:, self.svs].dot(self.alphas[self.svs])) return self.alphas, self.radius2 C = 1. / np.float(self.samples * self.nu) # generate a kernel matrix P = 2.0 * matrix(kernel) # this is the diagonal of the kernel matrix q = -matrix(norms) # sum_i alpha_i = A alpha = b = 1.0 A = matrix(1.0, (1, N)) b = matrix(1.0, (1, 1)) # 0 <= alpha_i <= h = C G1 = spmatrix(1.0, range(N), range(N)) G = sparse([G1, -G1]) h1 = matrix(C, (N, 1)) h2 = matrix(0.0, (N, 1)) h = matrix([h1, h2]) sol = qp(P, q, G, h, A, b) # store solution self.alphas = np.array(sol['x'], dtype=np.float) self.pobj = -sol['primal objective'] # find support vectors self.svs = np.where(self.alphas > self.PRECISION)[0] # self.cTc = self.alphas[self.svs].T.dot(kernel[self.svs, :][:, self.svs].dot(self.alphas[self.svs])) self.cTc = self.alphas.T.dot(kernel.dot(self.alphas)) # find support vectors with alpha < C for threshold calculation self.radius2 = 0. thres = self.predict(X[:, self.svs]) self.radius2 = np.min(thres) return self.alphas, thres
# generate training data co.setseed(11) Dtrainp = co.normal(2,N_pos)*0.6 Dtrainu = co.normal(2,N_unl)*0.6 Dtrainn = co.normal(2,N_neg)*0.6 Dtrain21 = Dtrainn-1 Dtrain21[0,:] = Dtrainn[0,:]+1 Dtrain22 = -Dtrain21 # training data Dtrain = co.matrix([[Dtrainp], [Dtrainu], [Dtrainn+0.8]]) Dtrain = np.array(Dtrain) # build the training kernel kernel = get_kernel(Dtrain, Dtrain, type=k_type, param=k_param) # use SSAD ssad = ConvexSSAD(kernel, Dy, 1./(10.*0.1), 1./(10.*0.1), 1., 1/(10.*0.1)) ssad.fit() # generate test data from a grid for nicer plots delta = 0.25 x = np.arange(-3.0, 3.0, delta) y = np.arange(-3.0, 3.0, delta) X, Y = np.meshgrid(x, y) (sx,sy) = X.shape Xf = np.reshape(X, (1, sx*sy)) Yf = np.reshape(Y, (1, sx*sy)) Dtest = np.append(Xf, Yf, axis=0) print(Dtest.shape)
Dtrainn = co.normal(2, N_neg)*0.3 Dtrain21 = Dtrainn-1 Dtrain21[0, :] = Dtrainn[0, :] + 1 Dtrain22 = -Dtrain21 # 1.3. concatenate training data Dtrain = co.matrix([[Dtrainp], [Dtrainu], [Dtrainn+1.0], [Dtrainn-1.0], [Dtrain21], [Dtrain22]]) Dtrain = np.array(Dtrain) # 1.4. build the training kernels: # - same training sample for each kernel = they have the same size # - each kernel captures a feature representation of the samples Dtrain # e.g., kernel1 is a BOW kernel, kernel2 a Lexical Diversity kernel # here: kernel1 und kernel2 are Gaussian kernels with different shape parameters # and kernel3 is a simple linear kernel kernel1 = get_kernel(Dtrain, Dtrain, type='rbf', param=1.0) kernel2 = get_kernel(Dtrain, Dtrain, type='rbf', param=1.0/50.0) kernel3 = get_kernel(Dtrain, Dtrain, type='rbf', param=1.0/100.0) kernel4 = get_kernel(Dtrain, Dtrain, type='linear') # MKL: (default) use SSAD ad = ConvexSSAD([], Dy, 1.0, 1.0, 1.0 / (100 * 0.05), 1.0) #ad = OCSVM(kernel1,C=0.02) # 2. STEP: TRAIN WITH A LIST OF KERNELS ssad = MKLWrapper(ad,[kernel1, kernel2, kernel3, kernel4], Dy, P_NORM) ssad.fit() # 3. TEST THE TRAINING DATA (just because we are curious) # 3.1. build the test kernel kernel1 = get_kernel(Dtrain, Dtrain[:,ssad.get_support_dual()], type='rbf', param=1.0)
import numpy as np import matplotlib.pyplot as plt from tilitools.lp_ocsvm_primal_sgd import LpOcSvmPrimalSGD from tilitools.ocsvm_dual_qp import OcSvmDualQP from tilitools.huber_ocsvm_primal import HuberOcsvmPrimal from tilitools.utils_kernel import get_kernel from tilitools.profiler import print_profiles if __name__ == '__main__': nu = 0.25 # generate raw training data Dtrain = np.random.rand(2, 200) * 3. Dtrain[1, :] = np.random.rand(1, Dtrain.shape[1]) * 0.9 kernel = get_kernel(Dtrain, Dtrain, 'linear') huber = HuberOcsvmPrimal(nu) huber.fit(Dtrain) svm = OcSvmDualQP(kernel, nu) svm.fit() skl_svm = LpOcSvmPrimalSGD(pnorm=2., nu=nu) skl_svm.fit(Dtrain) skl_svm.fit(Dtrain) delta = 0.1 x = np.arange(-4.0, 4.0, delta) y = np.arange(-4.0, 4.0, delta) X, Y = np.meshgrid(x, y)
def fit(self, max_iter=50, hotstart=None, prec=1e-3, center=False, normalize=False): """ Solve the optimization problem with a sequential convex programming/DC-programming approach: Iteratively, find the most likely configuration of the latent variables and then, optimize for the model parameter using fixed latent states. """ N = self.sobj.get_num_samples() DIMS = self.sobj.get_num_dims() # intermediate solutions # latent variables latent = [0.0] * N sol = self.sobj.get_hotstart_sol() if hotstart is not None and hotstart.size == DIMS: print('New hotstart position defined.') sol = hotstart psi = np.zeros((DIMS, N)) # (dim x exm) old_psi = np.zeros((DIMS, N)) # (dim x exm) threshold = 0. # terminate if objective function value doesn't change much iter = 0 allobjs = [] while iter < max_iter and ( iter < 2 or np.sum(abs(np.array(psi - old_psi))) >= prec): print('Starting iteration {0}.'.format(iter)) print(np.sum(abs(np.array(psi - old_psi)))) iter += 1 old_psi = psi.copy() # 1. most likely configuration # for the current solution compute the # most likely latent variable configuration for i in range(N): _, latent[i], psi[:, i] = self.sobj.argmax(sol, i) psi[:, i] /= np.linalg.norm(psi[:, i], ord=self.norm_ord) # 2. solve the intermediate convex optimization problem kernel = get_kernel(psi, psi) if center: kernel = center_kernel(kernel) if normalize: kernel = normalize_kernel(kernel) svm = OcSvmDualQP(kernel, self.nu) svm.fit() threshold = svm.get_threshold() self.svs_inds = svm.get_support_dual() sol = psi.dot(svm.get_alphas()) # calculate objective self.threshold = threshold slacks = threshold - sol.T.dot(psi) slacks[slacks < 0.0] = 0.0 obj = 0.5 * sol.T.dot(sol) - threshold + 1. / ( np.float(N) * self.nu) * np.sum(slacks) print( "Iter {0}: Values (Threshold-Slacks-Objective) = {1}-{2}-{3}". format(iter, threshold, np.sum(slacks), obj)) allobjs.append(obj) self.slacks = slacks self.sol = sol self.latent = latent return sol, latent, threshold
import numpy as np import pylab as pl from tilitools.bdd import BDD from tilitools.utils_kernel import get_kernel, get_diag_kernel if __name__ == '__main__': kparam = 0.1 ktype = 'rbf' # generate raw training data Dtrain1 = np.random.randn(2, 100) * 0.2 Dtrain2 = np.random.randn(2, 100) * 0.3 + 0.8 Dtrain = np.concatenate([Dtrain1.T, Dtrain2.T]).T print(Dtrain.shape) kernel = get_kernel(Dtrain, Dtrain, ktype, kparam) bdd = BDD(kernel) bdd.fit() # generate test data grid delta = 0.1 x = np.arange(-4.0, 4.0, delta) y = np.arange(-4.0, 4.0, delta) X, Y = np.meshgrid(x, y) sx, sy = X.shape Xf = np.reshape(X, (1, sx * sy)) Yf = np.reshape(Y, (1, sx * sy)) Dtest = np.append(Xf, Yf, axis=0) print(Dtest.shape) # build kernel map
import matplotlib.pyplot as plt from tilitools.lp_ocsvm_primal_sgd import LpOcSvmPrimalSGD from tilitools.ocsvm_dual_qp import OcSvmDualQP from tilitools.huber_ocsvm_primal import HuberOcsvmPrimal from tilitools.utils_kernel import get_kernel from tilitools.profiler import print_profiles if __name__ == '__main__': nu = 0.25 # generate raw training data Dtrain = np.random.rand(2, 200)*3. Dtrain[1, :] = np.random.rand(1, Dtrain.shape[1])*0.9 kernel = get_kernel(Dtrain, Dtrain, 'linear') huber = HuberOcsvmPrimal(nu) huber.fit(Dtrain) svm = OcSvmDualQP(kernel, nu) svm.fit() skl_svm = LpOcSvmPrimalSGD(pnorm=2., nu=nu) skl_svm.fit(Dtrain) skl_svm.fit(Dtrain) delta = 0.1 x = np.arange(-4.0, 4.0, delta) y = np.arange(-4.0, 4.0, delta) X, Y = np.meshgrid(x, y)
import pylab as pl from tilitools.bdd import BDD from tilitools.utils_kernel import get_kernel, get_diag_kernel if __name__ == '__main__': kparam = 0.1 ktype = 'rbf' # generate raw training data Dtrain1 = np.random.randn(2, 100)*0.2 Dtrain2 = np.random.randn(2, 100)*0.3 + 0.8 Dtrain = np.concatenate([Dtrain1.T, Dtrain2.T]).T print(Dtrain.shape) kernel = get_kernel(Dtrain, Dtrain, ktype, kparam) bdd = BDD(kernel) bdd.fit() # generate test data grid delta = 0.1 x = np.arange(-4.0, 4.0, delta) y = np.arange(-4.0, 4.0, delta) X, Y = np.meshgrid(x, y) sx, sy = X.shape Xf = np.reshape(X, (1, sx*sy)) Yf = np.reshape(Y, (1, sx*sy)) Dtest = np.append(Xf, Yf, axis=0) print(Dtest.shape) # build kernel map
def fit(self, max_iter=50, hotstart=None, prec=1e-3, center=False, normalize=False): """ Solve the optimization problem with a sequential convex programming/DC-programming approach: Iteratively, find the most likely configuration of the latent variables and then, optimize for the model parameter using fixed latent states. """ N = self.sobj.get_num_samples() DIMS = self.sobj.get_num_dims() # intermediate solutions # latent variables latent = [0.0]*N sol = self.sobj.get_hotstart_sol() if hotstart is not None and hotstart.size == DIMS: print('New hotstart position defined.') sol = hotstart psi = np.zeros((DIMS, N)) # (dim x exm) old_psi = np.zeros((DIMS, N)) # (dim x exm) threshold = 0. # terminate if objective function value doesn't change much iter = 0 allobjs = [] while iter < max_iter and (iter < 2 or np.sum(abs(np.array(psi-old_psi))) >= prec): print('Starting iteration {0}.'.format(iter)) print(np.sum(abs(np.array(psi-old_psi)))) iter += 1 old_psi = psi.copy() # 1. most likely configuration # for the current solution compute the # most likely latent variable configuration for i in range(N): _, latent[i], psi[:, i] = self.sobj.argmax(sol, i) psi[:, i] /= np.linalg.norm(psi[:, i], ord=self.norm_ord) # 2. solve the intermediate convex optimization problem kernel = get_kernel(psi, psi) if center: kernel = center_kernel(kernel) if normalize: kernel = normalize_kernel(kernel) svm = OcSvmDualQP(kernel, self.nu) svm.fit() threshold = svm.get_threshold() self.svs_inds = svm.get_support_dual() sol = psi.dot(svm.get_alphas()) # calculate objective self.threshold = threshold slacks = threshold - sol.T.dot(psi) slacks[slacks < 0.0] = 0.0 obj = 0.5*sol.T.dot(sol) - threshold + 1./(np.float(N)*self.nu) * np.sum(slacks) print("Iter {0}: Values (Threshold-Slacks-Objective) = {1}-{2}-{3}".format( iter, threshold, np.sum(slacks), obj)) allobjs.append(obj) self.slacks = slacks self.sol = sol self.latent = latent return sol, latent, threshold
def fit(self, X, max_iter=-1, center=False, normalize=False): """ :param X: Data matrix is assumed to be feats x samples. :param max_iter: *ignored*, just for compatibility. :return: Alphas and threshold for dual SVDDs. """ self.X = X.copy() dims, self.samples = X.shape if self.samples < 1: print('Invalid training data.') return -1 # number of training examples N = self.samples kernel = get_kernel(X, X, self.kernel, self.kparam) if center: kernel = center_kernel(kernel) if normalize: kernel = normalize_kernel(kernel) norms = np.diag(kernel).copy() if self.nu >= 1.0: print("Center-of-mass solution.") self.alphas = np.ones(self.samples) / float(self.samples) self.radius2 = 0.0 self.svs = np.array(range(self.samples), dtype='i') self.pobj = 0.0 # TODO: calculate real primal objective self.cTc = self.alphas[self.svs].T.dot(kernel[self.svs, :][:, self.svs].dot(self.alphas[self.svs])) return self.alphas, self.radius2 C = 1. / np.float(self.samples*self.nu) # generate a kernel matrix P = 2.0*matrix(kernel) # this is the diagonal of the kernel matrix q = -matrix(norms) # sum_i alpha_i = A alpha = b = 1.0 A = matrix(1.0, (1, N)) b = matrix(1.0, (1, 1)) # 0 <= alpha_i <= h = C G1 = spmatrix(1.0, range(N), range(N)) G = sparse([G1, -G1]) h1 = matrix(C, (N, 1)) h2 = matrix(0.0, (N, 1)) h = matrix([h1, h2]) sol = qp(P, q, G, h, A, b) # store solution self.alphas = np.array(sol['x'], dtype=np.float) self.pobj = -sol['primal objective'] # find support vectors self.svs = np.where(self.alphas > self.PRECISION)[0] # self.cTc = self.alphas[self.svs].T.dot(kernel[self.svs, :][:, self.svs].dot(self.alphas[self.svs])) self.cTc = self.alphas.T.dot(kernel.dot(self.alphas)) # find support vectors with alpha < C for threshold calculation self.radius2 = 0. thres = self.predict(X[:, self.svs]) self.radius2 = np.min(thres) return self.alphas, thres
# generate training data co.setseed(11) Dtrainp = co.normal(2, N_pos) * 0.6 Dtrainu = co.normal(2, N_unl) * 0.6 Dtrainn = co.normal(2, N_neg) * 0.6 Dtrain21 = Dtrainn - 1 Dtrain21[0, :] = Dtrainn[0, :] + 1 Dtrain22 = -Dtrain21 # training data Dtrain = co.matrix([[Dtrainp], [Dtrainu], [Dtrainn + 0.8]]) Dtrain = np.array(Dtrain) # build the training kernel kernel = get_kernel(Dtrain, Dtrain, type=k_type, param=k_param) # use SSAD ssad = ConvexSSAD(kernel, Dy, 1. / (10. * 0.1), 1. / (10. * 0.1), 1., 1 / (10. * 0.1)) ssad.fit() # generate test data from a grid for nicer plots delta = 0.25 x = np.arange(-3.0, 3.0, delta) y = np.arange(-3.0, 3.0, delta) X, Y = np.meshgrid(x, y) (sx, sy) = X.shape Xf = np.reshape(X, (1, sx * sy)) Yf = np.reshape(Y, (1, sx * sy)) Dtest = np.append(Xf, Yf, axis=0)
Dtest = np.append(Xf, Yf, axis=0) # 1.4. build the training kernels: # - same training sample for each kernel = they have the same size # - each kernel captures a feature representation of the samples Dtrain # e.g., kernel1 is a BOW kernel, kernel2 a Lexical Diversity kernel # here: kernel1 und kernel2 are Gaussian kernels with different shape parameters # and kernel3 is a simple linear kernel train_kernels = [] test_kernels = [] rbf_vals = [0.001, 0.01, 1., 2., 4., 10., 100.] eigenvalues = [] for val in rbf_vals: data = np.concatenate((Dtrain, Dtest), axis=1) print(data.shape) kernel = get_kernel(data, data, type='rbf', param=val) kernel = center_kernel(kernel) kernel = normalize_kernel(kernel) train_kernel = kernel[:Dtrain.shape[1], :].copy() test_kernel = kernel[:Dtrain.shape[1], :].copy() values = np.sort(np.real(np.linalg.eigvals(kernel))) eigenvalues.append(values[-1]/values[-2]) train_kernels.append(train_kernel[:, :Dtrain.shape[1]]) test_kernels.append(test_kernel[:, Dtrain.shape[1]:].T) # MKL: (default) use SSAD ad = ConvexSSAD(None, Dy, .0, 10.0, 1.0 / (N_unl * 0.5), 100.0)