def train_dc(self, zero_shot=False, max_iter=50, hotstart=matrix([])): """ Solve the optimization problem with a sequential convex programming/DC-programming approach: Iteratively, find the most likely configuration of the latent variables and then, optimize for the model parameter using fixed latent states. """ N = self.sobj.get_num_samples() DIMS = self.sobj.get_num_dims() # intermediate solutions # latent variables latent = [0.0]*N #setseed(0) sol = self.sobj.get_hotstart_sol() #sol[0:4] *= 0.01 if hotstart.size==(DIMS,1): print('New hotstart position defined.') sol = hotstart psi = matrix(0.0, (DIMS,N)) # (dim x exm) old_psi = matrix(0.0, (DIMS,N)) # (dim x exm) threshold = 0 obj = -1 iter = 0 allobjs = [] restarts = 0 # terminate if objective function value doesn't change much while iter < max_iter and (iter < 2 or sum(sum(abs(np.array(psi-old_psi)))) >= 0.001): print('Starting iteration {0}.'.format(iter)) print(sum(sum(abs(np.array(psi-old_psi))))) iter += 1 old_psi = matrix(psi) old_sol = sol # 1. linearize # for the current solution compute the # most likely latent variable configuration for i in range(N): (foo, latent[i], psi[:,i]) = self.sobj.argmax(sol, i, add_prior=True) #print psi[:,i] #psi[:4,i] /= 600.0 #psi[:,i] /= 600.0 #psi[:4,i] = psi[:4,i]/np.linalg.norm(psi[:4,i],ord=2) #psi[4:,i] = psi[4:,i]/np.linalg.norm(psi[4:,i],ord=2) psi[:,i] /= np.linalg.norm(psi[:, i], ord=self.norm_ord) #psi[:,i] /= np.max(np.abs(psi[:,i])) #psi[:,i] /= 600.0 #if i>10: # (foo, latent[i], psi[:,i]) = self.sobj.argmax(sol,i) #else: # psi[:,i] = self.sobj.get_joint_feature_map(i) # latent[i] = self.sobj.y[i] print psi # 2. solve the intermediate convex optimization problem kernel = Kernel.get_kernel(psi, psi) svm = OCSVM(kernel, self.C) svm.train_dual() threshold = svm.get_threshold() #inds = svm.get_support_dual() #alphas = svm.get_support_dual_values() #sol = phi[:,inds]*alphas self.svs_inds = svm.get_support_dual() #alphas = svm.get_support_dual_values() sol = psi*svm.get_alphas() print matrix([sol.trans(), old_sol.trans()]).trans() if len(self.svs_inds) == N and self.C > (1.0 / float(N)): print('###################################') print('Degenerate solution.') print('###################################') restarts += 1 if (restarts>10): print('###################################') print 'Too many restarts...' print('###################################') # calculate objective self.threshold = threshold slacks = [max([0.0, np.single(threshold - sol.trans()*psi[:,i]) ]) for i in xrange(N)] obj = 0.5*np.single(sol.trans()*sol) - np.single(threshold) + self.C*sum(slacks) print("Iter {0}: Values (Threshold-Slacks-Objective) = {1}-{2}-{3}".format(int(iter),np.single(threshold),np.single(sum(slacks)),np.single(obj))) allobjs.append(float(np.single(obj))) break # intermediate solutions # latent variables latent = [0.0]*N #setseed(0) sol = self.sobj.get_hotstart_sol() #sol[0:4] *= 0.01 if hotstart.size==(DIMS,1): print('New hotstart position defined.') sol = hotstart psi = matrix(0.0, (DIMS,N)) # (dim x exm) old_psi = matrix(0.0, (DIMS,N)) # (dim x exm) threshold = 0 obj = -1 iter = 0 allobjs = [] # calculate objective self.threshold = threshold slacks = [max([0.0, np.single(threshold - sol.trans()*psi[:,i]) ]) for i in xrange(N)] obj = 0.5*np.single(sol.trans()*sol) - np.single(threshold) + self.C*sum(slacks) print("Iter {0}: Values (Threshold-Slacks-Objective) = {1}-{2}-{3}".format(int(iter),np.single(threshold),np.single(sum(slacks)),np.single(obj))) allobjs.append(float(np.single(obj))) # zero shot learning: single iteration, hence random # structure coefficient if zero_shot: print('LatentOcSvm: Zero shot learning.') break print '+++++++++' print threshold print slacks print obj print '+++++++++' self.slacks = slacks print allobjs print(sum(sum(abs(np.array(psi-old_psi))))) print '+++++++++ SAD END' self.sol = sol self.latent = latent return sol, latent, threshold
svm = OCSVM(kernel,0.1) svm.train_dual() delta = 0.07 x = np.arange(-4.0, 4.0, delta) y = np.arange(-4.0, 4.0, delta) X, Y = np.meshgrid(x, y) (sx,sy) = X.shape Xf = np.reshape(X,(1,sx*sy)) Yf = np.reshape(Y,(1,sx*sy)) Dtest = np.append(Xf,Yf,axis=0) print(Dtest.shape) print('halloooo') foo = 3 * delta # build test kernel kernel = Kernel.get_kernel(co.matrix(Dtest),Dtrain[:,svm.get_support_dual()],ktype,kparam) (res,state) = svm.apply_dual(kernel) print(res.size) Z = np.reshape(res,(sx,sy)) plt.contourf(X, Y, Z) plt.contour(X, Y, Z, [np.array(svm.get_threshold())[0,0]]) plt.scatter(Dtrain[0,svm.get_support_dual()],Dtrain[1,svm.get_support_dual()],40,c='k') plt.scatter(Dtrain[0,:],Dtrain[1,:],10) plt.show() print('finished')
def train_dc(self, max_iter=50, hotstart=matrix([])): """ Solve the optimization problem with a sequential convex programming/DC-programming approach: Iteratively, find the most likely configuration of the latent variables and then, optimize for the model parameter using fixed latent states. """ N = self.sobj.get_num_samples() DIMS = self.sobj.get_num_dims() # intermediate solutions # latent variables latent = [0.0] * N #setseed(0) sol = self.sobj.get_hotstart_sol() #sol[0:4] *= 0.01 if hotstart.size == (DIMS, 1): print('New hotstart position defined.') sol = hotstart psi = matrix(0.0, (DIMS, N)) # (dim x exm) old_psi = matrix(0.0, (DIMS, N)) # (dim x exm) threshold = 0 obj = -1 iter = 0 allobjs = [] # terminate if objective function value doesn't change much while iter < max_iter and ( iter < 3 or sum(sum(abs(np.array(psi - old_psi)))) >= 0.001): print('Starting iteration {0}.'.format(iter)) print(sum(sum(abs(np.array(psi - old_psi))))) iter += 1 old_psi = matrix(psi) old_sol = sol # 1. linearize # for the current solution compute the # most likely latent variable configuration for i in range(N): (foo, latent[i], psi[:, i]) = self.sobj.argmax(sol, i, add_prior=True) norm = np.linalg.norm(psi[:, i], 2) psi[:, i] /= norm #if i>10: # (foo, latent[i], psi[:,i]) = self.sobj.argmax(sol,i) #else: # psi[:,i] = self.sobj.get_joint_feature_map(i) # latent[i] = self.sobj.y[i] # 2. solve the intermediate convex optimization problem psi_star = matrix(psi) #psi_star[0:7,:] *= 4.0 #psi_star[0:3,:] *= 0.01 #psi_star[0,:] *= 1.2 #psi_star[2,:] *= 2.4 kernel = Kernel.get_kernel(psi_star, psi_star) svm = OCSVM(kernel, self.C) svm.train_dual() threshold = svm.get_threshold() #inds = svm.get_support_dual() #alphas = svm.get_support_dual_values() #sol = phi[:,inds]*alphas #inds = svm.get_support_dual() #alphas = svm.get_support_dual_values() sol = psi_star * svm.get_alphas() print matrix([sol.trans(), old_sol.trans()]).trans() # calculate objective slacks = [ max([0.0, np.single(threshold - sol.trans() * psi[:, i])]) for i in xrange(N) ] obj = 0.5 * np.single(sol.trans() * sol) - np.single( threshold) + self.C * sum(slacks) print( "Iter {0}: Values (Threshold-Slacks-Objective) = {1}-{2}-{3}". format(int(iter), np.single(threshold), np.single(sum(slacks)), np.single(obj))) allobjs.append(float(np.single(obj))) print '+++++++++' print threshold print slacks print obj print '+++++++++' print allobjs print(sum(sum(abs(np.array(psi - old_psi))))) print '+++++++++ SAD END' self.sol = sol self.latent = latent return (sol, latent, threshold)
def train_dc(self, max_iter=50, hotstart=matrix([])): """ Solve the optimization problem with a sequential convex programming/DC-programming approach: Iteratively, find the most likely configuration of the latent variables and then, optimize for the model parameter using fixed latent states. """ N = self.sobj.get_num_samples() DIMS = self.sobj.get_num_dims() # intermediate solutions # latent variables latent = [0.0]*N #setseed(0) sol = self.sobj.get_hotstart_sol() #sol[0:4] *= 0.01 if hotstart.size==(DIMS,1): print('New hotstart position defined.') sol = hotstart psi = matrix(0.0, (DIMS,N)) # (dim x exm) old_psi = matrix(0.0, (DIMS,N)) # (dim x exm) threshold = 0 obj = -1 iter = 0 allobjs = [] # terminate if objective function value doesn't change much while iter<max_iter and (iter<3 or sum(sum(abs(np.array(psi-old_psi))))>=0.001): print('Starting iteration {0}.'.format(iter)) print(sum(sum(abs(np.array(psi-old_psi))))) iter += 1 old_psi = matrix(psi) old_sol = sol # 1. linearize # for the current solution compute the # most likely latent variable configuration for i in range(N): (foo, latent[i], psi[:,i]) = self.sobj.argmax(sol, i, add_prior=True) norm = np.linalg.norm(psi[:,i],2) psi[:,i] /= norm #if i>10: # (foo, latent[i], psi[:,i]) = self.sobj.argmax(sol,i) #else: # psi[:,i] = self.sobj.get_joint_feature_map(i) # latent[i] = self.sobj.y[i] # 2. solve the intermediate convex optimization problem psi_star = matrix(psi) #psi_star[0:7,:] *= 4.0 #psi_star[0:3,:] *= 0.01 #psi_star[0,:] *= 1.2 #psi_star[2,:] *= 2.4 kernel = Kernel.get_kernel(psi_star, psi_star) svm = OCSVM(kernel, self.C) svm.train_dual() threshold = svm.get_threshold() #inds = svm.get_support_dual() #alphas = svm.get_support_dual_values() #sol = phi[:,inds]*alphas #inds = svm.get_support_dual() #alphas = svm.get_support_dual_values() sol = psi_star*svm.get_alphas() print matrix([sol.trans(), old_sol.trans()]).trans() # calculate objective slacks = [max([0.0, np.single(threshold - sol.trans()*psi[:,i]) ]) for i in xrange(N)] obj = 0.5*np.single(sol.trans()*sol) - np.single(threshold) + self.C*sum(slacks) print("Iter {0}: Values (Threshold-Slacks-Objective) = {1}-{2}-{3}".format(int(iter),np.single(threshold),np.single(sum(slacks)),np.single(obj))) allobjs.append(float(np.single(obj))) print '+++++++++' print threshold print slacks print obj print '+++++++++' print allobjs print(sum(sum(abs(np.array(psi-old_psi))))) print '+++++++++ SAD END' self.sol = sol self.latent = latent return (sol, latent, threshold)
x = np.arange(-4.0, 4.0, delta) y = np.arange(-4.0, 4.0, delta) X, Y = np.meshgrid(x, y) (sx, sy) = X.shape Xf = np.reshape(X, (1, sx * sy)) Yf = np.reshape(Y, (1, sx * sy)) Dtest = np.append(Xf, Yf, axis=0) print(Dtest.shape) print('halloooo') foo = 3 * delta # build test kernel kernel = Kernel.get_kernel(co.matrix(Dtest), Dtrain[:, svm.get_support_dual()], ktype, kparam) (res, state) = svm.apply_dual(kernel) print(res.size) Z = np.reshape(res, (sx, sy)) plt.contourf(X, Y, Z) plt.contour(X, Y, Z, [np.array(svm.get_threshold())[0, 0]]) plt.scatter(Dtrain[0, svm.get_support_dual()], Dtrain[1, svm.get_support_dual()], 40, c='k') plt.scatter(Dtrain[0, :], Dtrain[1, :], 10) plt.show() print('finished')