Ejemplo n.º 1
0
    def solve(self, C, all_xt, all_lt, task_indicator, M):
        """
        use finite differences to compute gradient, use generic solver
        """

        num_xt = len(all_xt)
        alphas = np.ones(num_xt) * C * 0.5

        # add box constraints
        bounds = [(0, C) for idx in range(num_xt)]

        fix_args = (all_xt, all_lt, task_indicator, M)

        epsilon = C * 0.1

        print "using C:", C

        # call solver
        self.alpha_opt, nfeval, rc = scipy.optimize.fmin_tnc(
            compute_dual_objective,
            alphas,
            bounds=bounds,
            approx_grad=True,
            messages=5,
            args=fix_args,
            maxfun=500,
            epsilon=epsilon)

        # compute W from alphas
        self.W = alphas_to_w(self.alpha_opt, all_xt, all_lt, task_indicator, M)

        return True
Ejemplo n.º 2
0
    def solve(self, C, xt, lt, task_indicator, M):
        """
        solve dual using cvxopt
        """

        num_xt = len(xt)

        # set up quadratic term
        Q = np.zeros((num_xt, num_xt))

        # compute quadratic term
        for i in xrange(num_xt):
            for j in xrange(num_xt):

                s = task_indicator[i]
                t = task_indicator[j]
                
                Q[i,j] = M[s,t] * lt[i] * lt[j] * np.dot(xt[i], xt[j])

        # set up linear term
        p = -np.ones(num_xt)

        # if we would like to use bias
        #b = np.zeros((M,1))
        #label_matrix = numpy.zeros((M,N))

        # set up QP
        p = QP(Q, p, lb=np.zeros(num_xt), ub=C*np.ones(num_xt)) #Aeq=label_matrix, beq=b
        p.debug=1
        
        # run solver
        r = p.solve('cvxopt_qp', iprint = 0)

        # recover result
        self.alphas = r.xf
        self.dual_obj = self.obj = r.ff

        # compute W from alphas
        self.W = alphas_to_w(self.alphas, xt, lt, task_indicator, M)


        return True
Ejemplo n.º 3
0
def solver_mtk_shogun(C, all_xt, all_lt, task_indicator, M, L, eps,
                      target_obj):
    """
    implementation using multitask kernel
    """

    xt = numpy.array(all_xt)
    lt = numpy.array(all_lt)
    tt = numpy.array(task_indicator, dtype=numpy.int32)
    tsm = numpy.array(M)

    print "task_sim:", tsm

    num_tasks = L.shape[0]

    # sanity checks
    assert len(xt) == len(lt) == len(tt)
    assert M.shape == L.shape
    assert num_tasks == len(set(tt))

    # set up shogun objects
    if type(xt[0]) == numpy.string_:
        feat = StringCharFeatures(DNA)
        xt = [str(a) for a in xt]
        feat.set_features(xt)
        base_kernel = WeightedDegreeStringKernel(feat, feat, 8)
    else:
        feat = RealFeatures(xt.T)
        base_kernel = LinearKernel(feat, feat)

    lab = Labels(lt)

    # set up normalizer
    normalizer = MultitaskKernelNormalizer(tt.tolist())

    for i in xrange(num_tasks):
        for j in xrange(num_tasks):
            normalizer.set_task_similarity(i, j, M[i, j])

    print "num of unique tasks: ", normalizer.get_num_unique_tasks(
        task_indicator)

    # set up kernel
    base_kernel.set_cache_size(2000)
    base_kernel.set_normalizer(normalizer)
    base_kernel.init_normalizer()

    # set up svm
    svm = SVMLight()  #LibSVM()

    svm.set_epsilon(eps)
    #print "reducing num threads to one"
    #svm.parallel.set_num_threads(1)
    #print "using one thread"

    # how often do we like to compute objective etc
    svm.set_record_interval(0)
    svm.set_target_objective(target_obj)

    svm.set_linadd_enabled(False)
    svm.set_batch_computation_enabled(False)
    svm.io.set_loglevel(MSG_DEBUG)
    #SET THREADS TO 1

    svm.set_C(C, C)
    svm.set_bias_enabled(False)

    # prepare for training
    svm.set_labels(lab)
    svm.set_kernel(base_kernel)

    # train svm
    svm.train()

    train_times = svm.get_training_times()
    objectives = [-obj for obj in svm.get_dual_objectives()]

    if False:

        # get model parameters
        sv_idx = svm.get_support_vectors()
        sparse_alphas = svm.get_alphas()

        assert len(sv_idx) == len(sparse_alphas)

        # compute dense alpha (remove label)
        alphas = numpy.zeros(len(xt))
        for id_sparse, id_dense in enumerate(sv_idx):
            alphas[id_dense] = sparse_alphas[id_sparse] * lt[id_dense]

        # print alphas
        W = alphas_to_w(alphas, xt, lt, task_indicator, M)
        primal_obj = compute_primal_objective(
            W.reshape(W.shape[0] * W.shape[1]), C, all_xt, all_lt,
            task_indicator, L)
        objectives.append(primal_obj)
        train_times.append(train_times[-1] + 100)

    return objectives, train_times
Ejemplo n.º 4
0
    def solve(self,
              C,
              all_xt,
              all_lt,
              task_indicator,
              M,
              L,
              record_progress=False):
        """
        impementation of our dual coordinate descend solver
        """

        num_xt = len(all_xt)
        num_tasks = M.shape[0]
        num_dim = len(all_xt[0])

        V = np.zeros((num_tasks, num_dim))
        alphas = np.zeros(num_xt)

        # we stop here
        optimal = False

        primal_obj = []
        dual_obj = []

        #while not optimal:
        for iteration in xrange(500):

            # dual coordinate descend: touch one example at a time
            for i in xrange(num_xt):

                # current task id
                ti = task_indicator[i]

                # the heart of the beast: the update
                inner_sum = 0
                for t in xrange(num_tasks):
                    inner_sum += M[t, ti] * all_lt[i] * np.dot(
                        V[t, :], all_xt[i])
                d = (1.0 - inner_sum) / np.dot(all_xt[i], all_xt[i])

                # store previous alpha
                alpha_old = alphas[i]

                # project onto feasible set
                alphas[i] = max(0, min(C, alphas[i] + d))

                # update w for example
                V[ti, :] += (alphas[i] - alpha_old) * all_lt[i] * all_xt[i]

                # keep track of objectives
                if record_progress and iteration < 3 and i % 5 == 0:
                    # compute objective after outer iteration
                    W_tmp = alphas_to_w(alphas, all_xt, all_lt, task_indicator,
                                        M).reshape(num_tasks * num_dim)
                    primal_obj.append(
                        compute_primal_objective(W_tmp, C, all_xt, all_lt,
                                                 task_indicator, L))
                    dual_obj.append(
                        compute_dual_objective(alphas, all_xt, all_lt,
                                               task_indicator, M))

        # compute W from alphas
        W = alphas_to_w(alphas, all_xt, all_lt, task_indicator, M)
        W2 = v_to_w(V, all_xt, all_lt, task_indicator, M)

        # record final obj
        self.dual_obj = compute_dual_objective(alphas, all_xt, all_lt,
                                               task_indicator, M)
        self.primal_obj = compute_primal_objective(
            W.reshape(num_tasks * num_dim), C, all_xt, all_lt, task_indicator,
            L)

        return True
Ejemplo n.º 5
0
    def solve(self,
              C,
              all_xt,
              all_lt,
              task_indicator,
              M,
              L,
              record_progress=False):
        """
        impementation of our dual coordinate descend solver
        including lib linears shrinking strategy
        """

        num_xt = len(all_xt)
        num_tasks = M.shape[0]
        num_dim = len(all_xt[0])

        V = np.zeros((num_tasks, num_dim))
        alphas = np.zeros(num_xt)

        # we stop here
        optimal = False

        primal_obj = []
        dual_obj = []

        # indices of active set
        active_idx = range(num_xt)
        remove_list = []

        # set up projected gradients
        PG = None
        PGmax_old = float("inf")
        PGmin_old = float("-inf")
        PGmax_new = None
        PGmin_new = None

        epsilon = 0.00001

        #while not optimal:
        #TODO use other criterion
        for iteration in xrange(500):

            #print "removing:", len(remove_list), "remaining:", len(active_idx)

            # shrink active set
            active_idx = list(set(active_idx) - set(remove_list))
            remove_list = []

            # process in random order
            random.shuffle(active_idx)

            PGmax_new = float("-inf")
            PGmin_new = float("inf")

            #print "iteration", iteration

            # dual coordinate descend: touch one example at a time
            for i in active_idx:

                # current task id
                ti = task_indicator[i]

                # the heart of the beast: the update
                inner_sum = 0
                for t in xrange(num_tasks):
                    inner_sum += M[t, ti] * all_lt[i] * np.dot(
                        V[t, :], all_xt[i])

                # this term corresponds to G in LibLinear
                G = inner_sum - 1.0

                ################
                # take care of shrinking

                PG = 0

                if alphas[i] == 0:
                    if G > PGmax_old:
                        remove_list.append(i)
                        continue

                    elif G < 0:
                        PG = G

                elif alphas[i] == C:

                    if G < PGmin_old:
                        remove_list.append(i)
                        continue

                    elif G > 0:
                        PG = G
                else:
                    PG = G

                PGmax_new = max(PGmax_new, PG)
                PGmin_new = min(PGmin_new, PG)
                ################

                # update distance
                d = -G / np.dot(all_xt[i], all_xt[i])

                # store previous alpha
                alpha_old = alphas[i]

                # project onto feasible set
                alphas[i] = max(0, min(C, alphas[i] + d))

                # update w for example
                V[ti, :] += (alphas[i] - alpha_old) * all_lt[i] * all_xt[i]

                # update projected gradients
                PGmax_old = PGmax_new
                PGmin_old = PGmin_new
                if PGmax_old <= 0:
                    PGmax_old = float("inf")
                if PGmin_old >= 0:
                    PGmin_old = float("-inf")

                # keep track of objectives
                #if iteration < 3 and i % 5 == 0 :
                if False:
                    # compute objective after outer iteration
                    W_tmp = alphas_to_w(alphas, all_xt, all_lt, task_indicator,
                                        M).reshape(num_tasks * num_dim)
                    #W_tmp = W.reshape(num_tasks * num_dim)
                    primal_obj.append(
                        compute_primal_objective(W_tmp, C, all_xt, all_lt,
                                                 task_indicator, L))
                    dual_obj.append(
                        compute_dual_objective(alphas, all_xt, all_lt,
                                               task_indicator, M))

            # check stop criterion
            # compute gap
            gap = PGmax_new - PGmin_new
            # print gap

            if gap <= epsilon:
                print "terminating after iteration", iteration, " with active set size", len(
                    active_idx)
                break

        # compute W from alphas
        self.W = alphas_to_w(alphas, all_xt, all_lt, task_indicator, M)
        W2 = v_to_w(V, all_xt, all_lt, task_indicator, M)
        self.alphas = alphas

        # record final obj
        self.dual_obj = compute_dual_objective(alphas, all_xt, all_lt,
                                               task_indicator, M)
        self.primal_obj = compute_primal_objective(
            self.W.reshape(num_tasks * num_dim), C, all_xt, all_lt,
            task_indicator, L)

        return True