def solve(self, C, all_xt, all_lt, task_indicator, M): """ use finite differences to compute gradient, use generic solver """ num_xt = len(all_xt) alphas = np.ones(num_xt) * C * 0.5 # add box constraints bounds = [(0, C) for idx in range(num_xt)] fix_args = (all_xt, all_lt, task_indicator, M) epsilon = C * 0.1 print "using C:", C # call solver self.alpha_opt, nfeval, rc = scipy.optimize.fmin_tnc( compute_dual_objective, alphas, bounds=bounds, approx_grad=True, messages=5, args=fix_args, maxfun=500, epsilon=epsilon) # compute W from alphas self.W = alphas_to_w(self.alpha_opt, all_xt, all_lt, task_indicator, M) return True
def solve(self, C, xt, lt, task_indicator, M): """ solve dual using cvxopt """ num_xt = len(xt) # set up quadratic term Q = np.zeros((num_xt, num_xt)) # compute quadratic term for i in xrange(num_xt): for j in xrange(num_xt): s = task_indicator[i] t = task_indicator[j] Q[i,j] = M[s,t] * lt[i] * lt[j] * np.dot(xt[i], xt[j]) # set up linear term p = -np.ones(num_xt) # if we would like to use bias #b = np.zeros((M,1)) #label_matrix = numpy.zeros((M,N)) # set up QP p = QP(Q, p, lb=np.zeros(num_xt), ub=C*np.ones(num_xt)) #Aeq=label_matrix, beq=b p.debug=1 # run solver r = p.solve('cvxopt_qp', iprint = 0) # recover result self.alphas = r.xf self.dual_obj = self.obj = r.ff # compute W from alphas self.W = alphas_to_w(self.alphas, xt, lt, task_indicator, M) return True
def solver_mtk_shogun(C, all_xt, all_lt, task_indicator, M, L, eps, target_obj): """ implementation using multitask kernel """ xt = numpy.array(all_xt) lt = numpy.array(all_lt) tt = numpy.array(task_indicator, dtype=numpy.int32) tsm = numpy.array(M) print "task_sim:", tsm num_tasks = L.shape[0] # sanity checks assert len(xt) == len(lt) == len(tt) assert M.shape == L.shape assert num_tasks == len(set(tt)) # set up shogun objects if type(xt[0]) == numpy.string_: feat = StringCharFeatures(DNA) xt = [str(a) for a in xt] feat.set_features(xt) base_kernel = WeightedDegreeStringKernel(feat, feat, 8) else: feat = RealFeatures(xt.T) base_kernel = LinearKernel(feat, feat) lab = Labels(lt) # set up normalizer normalizer = MultitaskKernelNormalizer(tt.tolist()) for i in xrange(num_tasks): for j in xrange(num_tasks): normalizer.set_task_similarity(i, j, M[i, j]) print "num of unique tasks: ", normalizer.get_num_unique_tasks( task_indicator) # set up kernel base_kernel.set_cache_size(2000) base_kernel.set_normalizer(normalizer) base_kernel.init_normalizer() # set up svm svm = SVMLight() #LibSVM() svm.set_epsilon(eps) #print "reducing num threads to one" #svm.parallel.set_num_threads(1) #print "using one thread" # how often do we like to compute objective etc svm.set_record_interval(0) svm.set_target_objective(target_obj) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) svm.io.set_loglevel(MSG_DEBUG) #SET THREADS TO 1 svm.set_C(C, C) svm.set_bias_enabled(False) # prepare for training svm.set_labels(lab) svm.set_kernel(base_kernel) # train svm svm.train() train_times = svm.get_training_times() objectives = [-obj for obj in svm.get_dual_objectives()] if False: # get model parameters sv_idx = svm.get_support_vectors() sparse_alphas = svm.get_alphas() assert len(sv_idx) == len(sparse_alphas) # compute dense alpha (remove label) alphas = numpy.zeros(len(xt)) for id_sparse, id_dense in enumerate(sv_idx): alphas[id_dense] = sparse_alphas[id_sparse] * lt[id_dense] # print alphas W = alphas_to_w(alphas, xt, lt, task_indicator, M) primal_obj = compute_primal_objective( W.reshape(W.shape[0] * W.shape[1]), C, all_xt, all_lt, task_indicator, L) objectives.append(primal_obj) train_times.append(train_times[-1] + 100) return objectives, train_times
def solve(self, C, all_xt, all_lt, task_indicator, M, L, record_progress=False): """ impementation of our dual coordinate descend solver """ num_xt = len(all_xt) num_tasks = M.shape[0] num_dim = len(all_xt[0]) V = np.zeros((num_tasks, num_dim)) alphas = np.zeros(num_xt) # we stop here optimal = False primal_obj = [] dual_obj = [] #while not optimal: for iteration in xrange(500): # dual coordinate descend: touch one example at a time for i in xrange(num_xt): # current task id ti = task_indicator[i] # the heart of the beast: the update inner_sum = 0 for t in xrange(num_tasks): inner_sum += M[t, ti] * all_lt[i] * np.dot( V[t, :], all_xt[i]) d = (1.0 - inner_sum) / np.dot(all_xt[i], all_xt[i]) # store previous alpha alpha_old = alphas[i] # project onto feasible set alphas[i] = max(0, min(C, alphas[i] + d)) # update w for example V[ti, :] += (alphas[i] - alpha_old) * all_lt[i] * all_xt[i] # keep track of objectives if record_progress and iteration < 3 and i % 5 == 0: # compute objective after outer iteration W_tmp = alphas_to_w(alphas, all_xt, all_lt, task_indicator, M).reshape(num_tasks * num_dim) primal_obj.append( compute_primal_objective(W_tmp, C, all_xt, all_lt, task_indicator, L)) dual_obj.append( compute_dual_objective(alphas, all_xt, all_lt, task_indicator, M)) # compute W from alphas W = alphas_to_w(alphas, all_xt, all_lt, task_indicator, M) W2 = v_to_w(V, all_xt, all_lt, task_indicator, M) # record final obj self.dual_obj = compute_dual_objective(alphas, all_xt, all_lt, task_indicator, M) self.primal_obj = compute_primal_objective( W.reshape(num_tasks * num_dim), C, all_xt, all_lt, task_indicator, L) return True
def solve(self, C, all_xt, all_lt, task_indicator, M, L, record_progress=False): """ impementation of our dual coordinate descend solver including lib linears shrinking strategy """ num_xt = len(all_xt) num_tasks = M.shape[0] num_dim = len(all_xt[0]) V = np.zeros((num_tasks, num_dim)) alphas = np.zeros(num_xt) # we stop here optimal = False primal_obj = [] dual_obj = [] # indices of active set active_idx = range(num_xt) remove_list = [] # set up projected gradients PG = None PGmax_old = float("inf") PGmin_old = float("-inf") PGmax_new = None PGmin_new = None epsilon = 0.00001 #while not optimal: #TODO use other criterion for iteration in xrange(500): #print "removing:", len(remove_list), "remaining:", len(active_idx) # shrink active set active_idx = list(set(active_idx) - set(remove_list)) remove_list = [] # process in random order random.shuffle(active_idx) PGmax_new = float("-inf") PGmin_new = float("inf") #print "iteration", iteration # dual coordinate descend: touch one example at a time for i in active_idx: # current task id ti = task_indicator[i] # the heart of the beast: the update inner_sum = 0 for t in xrange(num_tasks): inner_sum += M[t, ti] * all_lt[i] * np.dot( V[t, :], all_xt[i]) # this term corresponds to G in LibLinear G = inner_sum - 1.0 ################ # take care of shrinking PG = 0 if alphas[i] == 0: if G > PGmax_old: remove_list.append(i) continue elif G < 0: PG = G elif alphas[i] == C: if G < PGmin_old: remove_list.append(i) continue elif G > 0: PG = G else: PG = G PGmax_new = max(PGmax_new, PG) PGmin_new = min(PGmin_new, PG) ################ # update distance d = -G / np.dot(all_xt[i], all_xt[i]) # store previous alpha alpha_old = alphas[i] # project onto feasible set alphas[i] = max(0, min(C, alphas[i] + d)) # update w for example V[ti, :] += (alphas[i] - alpha_old) * all_lt[i] * all_xt[i] # update projected gradients PGmax_old = PGmax_new PGmin_old = PGmin_new if PGmax_old <= 0: PGmax_old = float("inf") if PGmin_old >= 0: PGmin_old = float("-inf") # keep track of objectives #if iteration < 3 and i % 5 == 0 : if False: # compute objective after outer iteration W_tmp = alphas_to_w(alphas, all_xt, all_lt, task_indicator, M).reshape(num_tasks * num_dim) #W_tmp = W.reshape(num_tasks * num_dim) primal_obj.append( compute_primal_objective(W_tmp, C, all_xt, all_lt, task_indicator, L)) dual_obj.append( compute_dual_objective(alphas, all_xt, all_lt, task_indicator, M)) # check stop criterion # compute gap gap = PGmax_new - PGmin_new # print gap if gap <= epsilon: print "terminating after iteration", iteration, " with active set size", len( active_idx) break # compute W from alphas self.W = alphas_to_w(alphas, all_xt, all_lt, task_indicator, M) W2 = v_to_w(V, all_xt, all_lt, task_indicator, M) self.alphas = alphas # record final obj self.dual_obj = compute_dual_objective(alphas, all_xt, all_lt, task_indicator, M) self.primal_obj = compute_primal_objective( self.W.reshape(num_tasks * num_dim), C, all_xt, all_lt, task_indicator, L) return True