Example #1
0
def compute_gamma_linesearch(gamma_min, gamma_max, delta_max, cost_min,
                             cost_max, d, D, kernel_matrices, J_prev, y_mat,
                             alpha, C, goldensearch_precision_factor):
    gold_ratio = (5**0.5 + 1) / 2

    ##    print "stepmin",gamma_min
    ##    print "stepmax",gamma_max
    ##    print "deltamax",delta_max
    gamma_arr = np.array([gamma_min, gamma_max])
    cost_arr = np.array([cost_min, cost_max])

    coord = np.argmin(cost_arr)
    ##    print 'linesearch conditions'
    ##    print 'gamma_min',gamma_min
    ##    print 'gamma_max',gamma_max
    ##    print 'delta_max',delta_max
    ##    print 'golden search precision factor', goldensearch_precision_factor

    while ((gamma_max - gamma_min) > goldensearch_precision_factor *
           (abs(delta_max)) and gamma_max > np.finfo(float).eps):
        # print 'in line search loop'
        gamma_medr = gamma_min + (gamma_max - gamma_min) / gold_ratio
        gamma_medl = gamma_min + (gamma_medr - gamma_min) / gold_ratio

        tmp_d = d + gamma_medr * D
        alpha_r, cost_medr = compute_J_SVM(
            k_helpers.get_combined_kernel(kernel_matrices, tmp_d), y_mat, C)
        tmp_d = d + gamma_medl * D
        alpha_l, cost_medl = compute_J_SVM(
            k_helpers.get_combined_kernel(kernel_matrices, tmp_d), y_mat, C)

        cost_arr = np.array([cost_min, cost_medl, cost_medr, cost_max])
        gamma_arr = np.array([gamma_min, gamma_medl, gamma_medr, gamma_max])

        coord = np.argmin(cost_arr)

        if coord == 0:
            gamma_max = gamma_medl
            cost_max = cost_medl
            alpha = alpha_l
        if coord == 1:
            gamma_max = gamma_medr
            cost_max = cost_medr
            alpha = alpha_r
        if coord == 2:
            gamma_min = gamma_medl
            cost_min = cost_medl
            alpha = alpha_l
        if coord == 3:
            gamma_min = gamma_medr
            cost_min = cost_medr
            alpha = alpha_r

    if cost_arr[coord] < J_prev:
        return gamma_arr[coord], alpha, cost_arr[coord]
    else:
        return gamma_min, alpha, cost_min
Example #2
0
def get_armijos_step_size(iteration,
                          C,
                          kernel_matrices,
                          d,
                          y_mat,
                          alpha0,
                          gamma0,
                          Jd,
                          D,
                          dJ,
                          c=0.5,
                          T=0.5):
    #    print 'descent direction in armijos function'
    #    print D

    #m = D' * dJ, should be negative
    #Loop until f(x + gamma * p <= f(x) + gamma*c*m)
    # J(d + gamma * D) <= J(d) + gamma * c * m
    gamma = gamma0
    m = D.T.dot(dJ)

    while True:
        combined_kernel_matrix = k_helpers.get_combined_kernel(
            kernel_matrices, d + gamma * D)

        alpha, new_J, alpha_indices = compute_J_SVM(combined_kernel_matrix,
                                                    y_mat, C)

        if new_J <= Jd + gamma * c * m:
            return gamma
        else:
            #Update gamma
            gamma = gamma * T
    return gamma
def get_armijos_step_size(iteration, C, kernel_matrices, d, y_mat, alpha0, gamma0, Jd, D, dJ, c=0.5, T=0.5):
    #    print 'descent direction in armijos function'
    #    print D

    # m = D' * dJ, should be negative
    # Loop until f(x + gamma * p <= f(x) + gamma*c*m)
    # J(d + gamma * D) <= J(d) + gamma * c * m
    gamma = gamma0
    m = D.T.dot(dJ)

    while True:
        combined_kernel_matrix = k_helpers.get_combined_kernel(kernel_matrices, d + gamma * D)

        alpha, new_J, alpha_indices = compute_J_SVM(combined_kernel_matrix, y_mat, C)

        if new_J <= Jd + gamma * c * m:
            return gamma
        else:
            # Update gamma
            gamma = gamma * T
    return gamma
def compute_gamma_linesearch(
    gamma_min,
    gamma_max,
    delta_max,
    cost_min,
    cost_max,
    d,
    D,
    kernel_matrices,
    J_prev,
    y_mat,
    alpha,
    C,
    goldensearch_precision_factor,
):
    gold_ratio = (5 ** 0.5 + 1) / 2

    ##    print "stepmin",gamma_min
    ##    print "stepmax",gamma_max
    ##    print "deltamax",delta_max
    gamma_arr = np.array([gamma_min, gamma_max])
    cost_arr = np.array([cost_min, cost_max])

    coord = np.argmin(cost_arr)
    ##    print 'linesearch conditions'
    ##    print 'gamma_min',gamma_min
    ##    print 'gamma_max',gamma_max
    ##    print 'delta_max',delta_max
    ##    print 'golden search precision factor', goldensearch_precision_factor

    while (gamma_max - gamma_min) > goldensearch_precision_factor * (abs(delta_max)) and gamma_max > np.finfo(
        float
    ).eps:
        # print 'in line search loop'
        gamma_medr = gamma_min + (gamma_max - gamma_min) / gold_ratio
        gamma_medl = gamma_min + (gamma_medr - gamma_min) / gold_ratio

        tmp_d = d + gamma_medr * D
        alpha_r, cost_medr = compute_J_SVM(k_helpers.get_combined_kernel(kernel_matrices, tmp_d), y_mat, C)
        tmp_d = d + gamma_medl * D
        alpha_l, cost_medl = compute_J_SVM(k_helpers.get_combined_kernel(kernel_matrices, tmp_d), y_mat, C)

        cost_arr = np.array([cost_min, cost_medl, cost_medr, cost_max])
        gamma_arr = np.array([gamma_min, gamma_medl, gamma_medr, gamma_max])

        coord = np.argmin(cost_arr)

        if coord == 0:
            gamma_max = gamma_medl
            cost_max = cost_medl
            alpha = alpha_l
        if coord == 1:
            gamma_max = gamma_medr
            cost_max = cost_medr
            alpha = alpha_r
        if coord == 2:
            gamma_min = gamma_medl
            cost_min = cost_medl
            alpha = alpha_l
        if coord == 3:
            gamma_min = gamma_medr
            cost_min = cost_medr
            alpha = alpha_r

    if cost_arr[coord] < J_prev:
        return gamma_arr[coord], alpha, cost_arr[coord]
    else:
        return gamma_min, alpha, cost_min
def find_kernel_weights(d_init, kernel_matrices, C, y, verbose):
    ##########################################Initialization, starting from a point d
    weight_precision = 1e-08  #weights below this value are set to 0
    goldensearch_precision = 1e-01
    goldensearch_precision_init = 1e-01
    max_goldensearch_precision = 1e-08
    duality_gap_threshold = 0.01  #search stopping criteria defined in the paper
    for m in kernel_matrices:
        assert m.shape == (y.shape[0], y.shape[0])
    M = len(kernel_matrices)  #how many kernels we have
    d = d_init  #initial guessed weights of each kernel, d_m=1/M, where M is the number of kernels
    y_mat = np.outer(y, y)  #Creates y matrix for use in SVM later
    iteration = 0
    stop_state = False  #loop parameter
    ##########################################ALgorithm 1 pseudocode defined in the simpleMKL paper:
    #stop_state: check the dual gap between the primal MKL and dual MKL in each loop
    #d: weighted vector d={d1, d2 ... dm}, m=1,2...M kernels
    #dJ: gradient vector computed on current d vector
    #D: reduced gradient descent direction vector computed based on dJ and equality constraint
    while (
            not stop_state
    ):  #while loop until minimizers of d and corresponding alphas are found
        if verbose == 1:
            print "iteration:", iteration
            print "d:", d
        old_d = d.copy()
        #########################################SVM computation to get current d value and J(d) value
        combined_kernel_matrix = k_helpers.get_combined_kernel(
            kernel_matrices, d
        )  #given current d vector value, compute the combined kernel matrices value
        alpha, J = helpers.compute_J_SVM(
            combined_kernel_matrix, y_mat, C
        )  #SVM wrapper to solve alphas given current d and J(d) value at this point
        dJ = helpers.compute_dJ(
            kernel_matrices, y_mat, alpha
        )  #compute current GRADIENT of J(d),m-dimension vector given alpha values
        mu = np.argmax(
            d)  #mu is the index of the largest component in d vector
        D = helpers.compute_reduced_descent_direction(
            d, dJ, mu
        )  #compute the REDUCED gradient direction based on equality constraint on current d vector
        if verbose == 1:
            print 'current gradient:'
            print dJ
            print 'current alpha, J: ', alpha, J
            print 'current reduced descent: ', D
        J_cross = 0
        d_cross = d
        D_cross = D
        counter = 1
        J_prev = J
        while (
                J_cross < J
        ):  #an efficient update of d vector without the need to recompute the gradient at each new d vector
            #update d vector, D vector to d_cross and D_cross and corresponding J_cross value
            d = d_cross  #update d
            D = D_cross  #update reduced gradient
            if counter > 1:  #in the start of the while loop, J_cross = 0 and will lead to a bug
                J = J_cross  #update function value
            #compute the maximum step size based on current d and D value
            gamma_max = helpers.compute_max_admissible_gamma(
                d, D
            )  #compute the largest step size based on current d and D value
            delta_max = gamma_max
            #update d_cross based on gamma_max
            d_cross = d + gamma_max * D  #d_cross is a new point along the direction of reduced gradient D with step size gamma_max, so one component of d_cross will reach zero
            #update J_cross based on d_cross
            combined_kernel_matrix_cross = k_helpers.get_combined_kernel(
                kernel_matrices,
                d_cross)  #combined kernel given the new d_cross vector
            alpha_cross, J_cross = helpers.compute_J_SVM(
                combined_kernel_matrix_cross, y_mat,
                C)  #compute the SVM solution with the d_cross vector
            if J_cross < J:  #only update D_cross when J_cross < J(d)
                D_cross = helpers.update_reduced_descent_direction(
                    d_cross, D, mu, weight_precision
                )  #update the reduced gradient direction when the function value keeps decreasing
                counter = counter + 1
            if verbose == 1:
                print "updated cost: ", J_cross
                print "d cross:"
                print d_cross
                print "counter:", counter
                print "updated D_cross:"
                print D_cross
        #Now J(d_cross) > J(d), keep in mind that d has been updated several times before.
        #Do line-search along direction of D (no further update) to obtain the point which minimizes function value J between point d and point d_cross.
        gamma, alpha, J = helpers.compute_gamma_linesearch(
            0, gamma_max, delta_max, J, J_cross, d, D, kernel_matrices, J_prev,
            y_mat, alpha, C, goldensearch_precision)
        d = d + gamma * D  #update d to the new point to further decrease J value, gamma might be zero, i.e no update
        # numerical cleaning
        d = helpers.fix_weight_precision(d, weight_precision)
        # improve line search by enhancing precision
        if max(
                abs(d - old_d)
        ) < weight_precision and goldensearch_precision > max_goldensearch_precision:
            goldensearch_precision = goldensearch_precision / 10
        dJ_curr_d = helpers.compute_dJ(
            kernel_matrices, y_mat, alpha
        )  #compute the gradient value at the new point d and corresponding alpha values
        # stopping criterion: check difference between primal J(d) and dual function value
        duality_gap = (J + np.max(-dJ_curr_d) - np.sum(alpha)) / J
        print 'duality gap: ', duality_gap
        if duality_gap < duality_gap_threshold:
            stop_state = True
        iteration += 1
    return (d, k_helpers.get_combined_kernel(kernel_matrices,
                                             d), J, alpha, duality_gap)
Example #6
0
def find_kernel_weights(k_init,kernel_matrices,C,y):

    # various parameters
    weight_precision=1e-08 #weights below this value are set to 0
    goldensearch_precision=1e-01
    goldensearch_precision_init=1e-01
    max_goldensearch_precision=1e-08
    duality_gap_threshold=0.01
    
    for m in kernel_matrices:
        assert m.shape == (y.shape[0],y.shape[0])

    M = len(kernel_matrices)

    #initial weights of each kernel
    d = k_init
    
    #Creates y matrix for use in SVM later
    y_mat = np.outer(y, y)

    iteration = 0
    # initialization for stopping criterion
    stop_state=False
    
    # initial alphas
    combined_kernel_matrix = k_helpers.get_combined_kernel(kernel_matrices, d)
    alpha,J=helpers.compute_J_SVM(combined_kernel_matrix, y_mat,C)
    
##    print 'initial alpha, J: ',alpha, J
    while(not stop_state):
##	print "iteration:",iteration
##	print "d:",d
        old_d=d.copy()
        dJ = helpers.compute_dJ(kernel_matrices, y_mat, alpha)
##        print 'gradient before entering while loop'
##        print dJ
        mu=np.argmax(d)
        D = helpers.compute_descent_direction(d, dJ,mu)
##        print 'initial descent: ',D
        gamma_max=helpers.compute_max_admissible_gamma(d,D)
        delta_max=gamma_max

        if gamma_max>0.1:
            gamma_max=0.1
            
        J_cross=0
        J_prev=J

        while (J_cross < J):
            
##            print 'cost min: ',J
##            print 'cost max: ',J_cross
            d_cross = d + gamma_max*D
            combined_kernel_matrix_cross = k_helpers.get_combined_kernel(kernel_matrices, d_cross)
            alpha_cross, J_cross= helpers.compute_J_SVM(combined_kernel_matrix_cross, y_mat,C)
##            print "updated cost max: ",J_cross

            if J_cross<J:
                J=J_cross
                d = d_cross.copy()
##                print 'updated weights: ', d
                alpha=alpha_cross.copy()
                # update descent
##                print 'descent before update: ',D
                D=helpers.update_descent_direction(d,D,mu,weight_precision)
##                print 'updated descent: ', D
                
                # gamma_max=helpers.compute_max_admissible_gamma(d,D)
##                
                tmp_ind=np.where(D<0)[0]      
                if tmp_ind.shape[0]>0:
                    gamma_max=np.min(-(np.divide(d[tmp_ind],D[tmp_ind])))
                    delta_max=gamma_max
                    J_cross=0
                else:
                    gamma_max=0
                    delta_max=0
        # print 'support vector before line search',-np.sum(abs(alpha))
        # line-search
        gamma, alpha, J=helpers.compute_gamma_linesearch(0,gamma_max,delta_max,J,J_cross,
                                               d,D,kernel_matrices, J_prev,y_mat,alpha,C,goldensearch_precision)
        # print 'support vector after line search',-np.sum(abs(alpha))
##        print 'weights before final update',d
##        print 'gamma after line search',gamma
##        print 'descent',D                      
        d = d + gamma * D
        # numerical cleaning
        d = helpers.fix_weight_precision(d,weight_precision)
##        print 'weights after final update: ',d

        # improve line search by enhancing precision
        if max(abs(d-old_d))<weight_precision and \
            goldensearch_precision>max_goldensearch_precision:
                goldensearch_precision=goldensearch_precision/10
##        elif goldensearch_precision!=goldensearch_precision_init:
##            goldensearch_precision*=10
        

        
##        print 'weights after linesearch',d
        
#        print 'support vectors used to compute current gradient'
#        print alpha
        dJ_curr_d = helpers.compute_dJ(kernel_matrices, y_mat, alpha)

##        print 'parameters in computing duality gap'
##        print J
##        print np.max(-dJ_curr_d)
##        print -np.sum(alpha)
        
        # stopping criterion
        duality_gap=(J+np.max(-dJ_curr_d) -np.sum(alpha))/J
        # print 'duality gap: ',duality_gap
        if duality_gap<duality_gap_threshold:
            stop_state=True

        iteration += 1

        
    return (d,k_helpers.get_combined_kernel(kernel_matrices, d),J,alpha,duality_gap)