예제 #1
0
파일: epg_transforms.py 프로젝트: ecat/adbs
 def _csin(angle):
     return np.complex(np.sin(angle), 0.)
예제 #2
0
파일: epg_transforms.py 프로젝트: ecat/adbs
 def _ccos(angle):
     return np.complex(np.cos(angle), 0.)
예제 #3
0
def minConf_SPG(funObj, x, funProj, options=None):
    """ This function implements Mark Schmidt's MATLAB implementation of
    spectral projected gradient (SPG) to solve for projected quasi-Newton
    direction
                min funObj(x) s.t. x in C
    Parameters
    ----------
    funObj: function that returns objective function value and the gradient
    x: initial parameter value
    funProj: fcuntion that returns projection of x onto C
    options:
        verbose: level of verbosity (0: no output, 1: final, 2: iter (default), 3:
            debug)
        optTol: tolerance used to check for optimality (default: 1e-5)
        progTol: tolerance used to check for lack of progress (default: 1e-9)
        maxIter: maximum number of calls to funObj (default: 500)
        numDiff: compute derivatives numerically (0: use user-supplied
            derivatives (default), 1: use finite differences, 2: use complex
            differentials)
        suffDec: sufficient decrease parameter in Armijo condition (default
            : 1e-4)
        interp: type of interpolation (0: step-size halving, 1: quadratic,
            2: cubic)
        memory: number of steps to look back in non-monotone Armijo
            condition
        useSpectral: use spectral scaling of gradient direction (default:
            1)
        curvilinear: backtrack along projection Arc (default: 0)
        testOpt: test optimality condition (default: 1)
        feasibleInit: if 1, then the initial point is assumed to be
            feasible
        bbType: type of Barzilai Borwein step (default: 1)
 
    Notes: 
        - if the projection is expensive to compute, you can reduce the
            number of projections by setting testOpt to 0
    """
    
    nVars = x.shape[0]
    options_default = {'verbose':2, 'numDiff':0, 'optTol':1e-5, 'progTol':1e-9,\
                'maxIter':500, 'suffDec':1e-4, 'interp':2, 'memory':10,\
                'useSpectral':1,'curvilinear':0,'feasibleInit':0,'testOpt':1,\
                'bbType':1}
    options = setDefaultOptions(options, options_default)

    if options['verbose'] >= 2:
        if options['testOpt'] == 1:
            print '{:10s}'.format('Iteration') + \
                    '{:10s}'.format('FunEvals') + \
                    '{:10s}'.format('Projections') + \
                    '{:15s}'.format('StepLength') + \
                    '{:15s}'.format('FunctionVal') + \
                    '{:15s}'.format('OptCond')
        else:
            print '{:10s}'.format('Iteration') + \
                    '{:10s}'.format('FunEvals') + \
                    '{:10s}'.format('Projections') + \
                    '{:15s}'.format('StepLength') + \
                    '{:15s}'.format('FunctionVal')
    
    funEvalMultiplier = 1

    # evaluate initial point
    if options['feasibleInit'] == 0:
        x = funProj(x)
    [f, g] = funObj(x)
    projects = 1
    funEvals = 1

    # optionally check optimality
    if options['testOpt'] == 1:
        projects = projects + 1
        if np.max(np.abs(funProj(x-g)-x)) < options['optTol']:
            if options['verbose'] >= 1:
                print "First-order optimality conditions below optTol at initial point"
            return (x, f, funEvals, projects)
    
    i = 1
    while funEvals <= options['maxIter']:
        # compute step direction
        if i == 1 or options['useSpectral'] == 0:
            alpha = 1.
        else:
            y = g - g_old
            s = x - x_old
            if options['bbType'] == 1:
                alpha = np.dot(s,s)/np.dot(s,y)
            else:
                alpha = np.dot(s,y)/np.dot(y,y)
            if alpha <= 1e-10 or alpha >= 1e10:
                alpha = 1.
        
        d = -alpha * g
        f_old = f
        x_old = x
        g_old = g

        # compute projected step
        if options['curvilinear'] == 0:
            d = funProj(x+d) - x
            projects = projects + 1

        # check that progress can be made along the direction
        gtd = np.dot(g, d)
        if gtd > -options['progTol']:
            if options['verbose'] >= 1:
                print "Directional derivtive below progTol"
            break

        # select initial guess to step length
        if i == 1:
            t = np.minimum(1., 1./np.sum(np.abs(g)))
        else:
            t = 1.

        # compute reference function for non-monotone condition
        if options['memory'] == 1:
            funRef = f
        else:
            if i == 1:
                old_fvals = np.ones(options['memory'])*(-1)*np.infty
            
            if i <= options['memory']:
                old_fvals[i-1] = f
            else:
                old_fvals = np.append(old_fvals[1:], f)
            funRef = np.max(old_fvals)
        
        # evaluate the objective and gradient at the initial step
        if options['curvilinear'] == 1:
            x_new = funProj(x + t*d)
            projects = projects + 1
        else:
            x_new = x + t*d
        [f_new, g_new] = funObj(x_new)
        funEvals = funEvals + 1

        # Backtracking line search
        lineSearchIters = 1
        while f_new > funRef + options['suffDec']*np.dot(g,x_new-x) or \
                isLegal(f_new) == False:
            temp = t
            if options['interp'] == 0 or isLegal(f_new) == False:
                if options['verbose'] == 3:
                    print 'Halving step size'
                t = t/2.
            elif options['interp'] == 2 and isLegal(g_new):
                if options['verbose'] == 3:
                    print "Cubic Backtracking"
                t = polyinterp(np.array([[0,f,gtd],\
                        [t,f_new,np.dot(g_new,d)]]))[0]
            elif lineSearchIters < 2 or isLegal(f_prev):
                if options['verbose'] == 3:
                    print "Quadratic Backtracking"
                t = polyinterp(np.array([[0, f, gtd],\
                        [t, f_new, np.complex(0,1)]]))[0]
            else:
                if options['verbose'] == 3:
                    print "Cubic Backtracking on Function Values"
                t = polyinterp(np.array([[0., f, gtd],\
                                         [t,f_new,np.complex(0,1)],\
                                         [t_prev,f_prev,np.complex(0,1)]]))[0]
            # adjust if change is too small
            if t < temp*1e-3:
                if options['verbose'] == 3:
                    print "Interpolated value too small, Adjusting"
                t = temp * 1e-3
            elif t > temp * 0.6:
                if options['verbose'] == 3:
                    print "Interpolated value too large, Adjusting"
                t = temp * 0.6

            # check whether step has become too small
            if np.max(np.abs(t*d)) < options['progTol'] or t == 0:
                if options['verbose'] == 3:
                    print "Line Search failed"
                t = 0.
                f_new = f
                g_new = g
                break
            
            # evaluate new point
            f_prev = f_new
            t_prev = temp
            if options['curvilinear'] == True:
                x_new = funProj(x + t*d)
                projects = projects + 1
            else:
                x_new = x + t*d
            [f_new, g_new] = funObj(x_new)
            funEvals = funEvals + 1
            lineSearchIters = lineSearchIters + 1
        
        # done with line search

        # take step
        x = x_new
        f = f_new
        g = g_new

        if options['testOpt'] == True:
            optCond = np.max(np.abs(funProj(x-g)-x))
            projects = projects + 1

        # output log
        if options['verbose'] >= 2:
            if options['testOpt'] == True:
                print '{:10d}'.format(i) + \
                      '{:10d}'.format(funEvals*funEvalMultiplier) + \
                      '{:10d}'.format(projects) + \
                      '{:15.5e}'.format(t) + \
                      '{:15.5e}'.format(f) + \
                      '{:15.5e}'.format(optCond)
            else:
                print '{:10d}'.format(i) + \
                      '{:10d}'.format(funEvals*funEvalMultiplier) + \
                      '{:10d}'.format(projects) + \
                      '{:15.5e}'.format(t) + \
                      '{:15.5e}'.format(f)        
        # check optimality
        if options['testOpt'] == True:
            if optCond < options['optTol']:
                if options['verbose'] >= 1:
                    print "First-order optimality conditions below optTol"
                break

        if np.max(np.abs(t*d)) < options['progTol']:
            if options['verbose'] >= 1:
                print "Step size below progTol"
            break
        
        if np.abs(f-f_old) < options['progTol']:
            if options['verbose'] >= 1:
                print "Function value changing by less than progTol"
            break

        if funEvals*funEvalMultiplier > options['maxIter']:
            if options['verbose'] >= 1:
                print "Function evaluation exceeds maxIter"
            break

        i = i + 1

    return (x, f, funEvals, projects)
예제 #4
0
파일: epg_transforms.py 프로젝트: ecat/adbs
 def _cexp(angle):
     return np.complex(np.cos(angle), np.sin(angle))
예제 #5
0
        [N,D,G,M,K] = [5000, 50, 20, 100, 10]
        param = np.random.randn( M*4 + G*(M+2+K*2+N) + G*K*(N+D*2) )
        funProj_vec = lambda param: projectParam_vec(param,N,D,G,M,K,lb=1e-6)
        funProj_mat = lambda param: projectParam(param,N,D,G,M,K,lb=1e-6)
        t1 = time.time()
        w_loop = funProj_vec(param)
        t2 = time.time()
        print "vector + loop: ", t2-t1
        w_map = funProj_mat(param)
        t3 = time.time()
        print "mat : ", t3 - t2
        print "diff: ", np.linalg.norm(w_loop - w_map)

    elif flag_test == 3:
        #v0 = np.random.randn(100)
        v0 = np.complex(0,1)
        print isLegal(v0)
    elif flag_test == 4:
        # test polyinterp
        points = np.random.randn(2, 3)
        print polyinterp(points)
        np.savetxt(\
        "./Mark_Schmidt/minConf/minFunc/test_data/polyinterp_input_1.csv",\
                points)
    elif flag_test == 5:
        # test lbfgsUpdate
        [p, m, corrections, debug, Hdiag] = [2, 5, 2, 0, 1e-3]
        y = np.random.randn(p)
        s = y + 0.1 * np.random.randn(p)
        old_dirs = np.random.randn(p, m)
        old_stps = np.random.randn(p, m)
예제 #6
0
def polyinterp(points, doPlot=None, xminBound=None, xmaxBound=None):
    """ polynomial interpolation
    Parameters
    ----------
    points: shape(pointNum, 3), three columns represents x, f, g
    doPolot: set to 1 to plot, default 0
    xmin: min value that brackets minimum (default: min of points)
    xmax: max value that brackets maximum (default: max of points)
    
    set f or g to sqrt(-1)=1j if they are not known
    the order of the polynomial is the number of known f and g values minus 1

    Returns
    -------
    minPos:
    fmin:
    """
    
    if doPlot == None:
        doPlot = 0

    nPoints = points.shape[0]
    order = np.sum(np.imag(points[:, 1:3]) == 0) -1
    
    # code for most common case: cubic interpolation of 2 points
    if nPoints == 2 and order == 3 and doPlot == 0:
        [minVal, minPos] = [np.min(points[:,0]), np.argmin(points[:,0])]
        notMinPos = 1 - minPos
        d1 = points[minPos,2] + points[notMinPos,2] - 3*(points[minPos,1]-\
                points[notMinPos,1])/(points[minPos,0]-points[notMinPos,0])

        t_d2 =  d1**2 - points[minPos,2]*points[notMinPos,2]
        if t_d2 > 0:
            d2 = np.sqrt(t_d2)
        else:
            d2 = np.sqrt(-t_d2) * np.complex(0,1)
        if np.isreal(d2):
            t = points[notMinPos,0] - (points[notMinPos,0]-points[minPos,0])*\
                    ((points[notMinPos,2]+d2-d1)/(points[notMinPos,2]-\
                    points[minPos,2]+2*d2))
            minPos = np.min([np.max([t,points[minPos,0]]), points[notMinPos,0]])
        else:
            minPos = np.mean(points[:,0])
        fmin = minVal
        return (minPos, fmin)
    
    xmin = np.min(points[:,0])
    xmax = np.max(points[:,0])

    # compute bounds of interpolation area
    if xminBound == None:
        xminBound = xmin
    if xmaxBound == None:
        xmaxBound = xmax

    # constraints based on available function values
    A = np.zeros((0, order+1))
    b = np.zeros((0, 1))
    for i in range(nPoints):
        if np.imag(points[i,1]) == 0:
            constraint = np.zeros(order+1)
            for j in np.arange(order,-1,-1):
                constraint[order-j] = points[i,0]**j
            A = np.vstack((A, constraint))
            b = np.append(b, points[i,1])
    
    # constraints based on availabe derivatives
    for i in range(nPoints):
        if np.isreal(points[i,2]):
            constraint = np.zeros(order+1)
            for j in range(1,order+1):
                constraint[j-1] = (order-j+1)* points[i,0]**(order-j)
            A = np.vstack((A, constraint))
            b = np.append(b,points[i,2])
    
    # find interpolating polynomial
    params = np.linalg.solve(A, b)

    # compute critical points
    dParams = np.zeros(order)
    for i in range(params.size-1):
        dParams[i] = params[i] * (order-i)
    
    if np.any(np.isinf(dParams)):
        cp = np.concatenate((np.array([xminBound, xmaxBound]), points[:,0]))
    else:
        cp = np.concatenate((np.array([xminBound, xmaxBound]), points[:,0], \
                np.roots(dParams)))
    
    # test critical points
    fmin = np.infty;
    minPos = (xminBound + xmaxBound)/2.
    for xCP in cp:
        if np.imag(xCP) == 0 and xCP >= xminBound and xCP <= xmaxBound:
            fCP = np.polyval(params, xCP)
            if np.imag(fCP) == 0 and fCP < fmin:
                minPos = np.double(np.real(xCP))
                fmin = np.double(np.real(fCP))
    
    # plot situation (omit this part for now since we are not going to use it
    # anyway)

    return (minPos, fmin)
def get_hqmm_gradient(K_real, K_imag, rho_real, rho_imag, batch, burn_in,
                      strategy):

    K = np.array(K_real) + np.complex(0, 1) * np.array(K_imag)
    rho = np.array(rho_real) + np.complex(0, 1) * np.array(rho_imag)
    batch = np.array(batch)
    burn_in = int(burn_in)
    if len(batch.shape) in [0, 1]:
        batch = batch.reshape(1, -1)

    def log_loss(K_conj):
        """
            K is a tensor of CONJUGATE Kraus Operators of dim s x w x n x n
            s: output_dim
            w: ops_per_output
            n: state_dim
        """
        total_loss = 0.0

        # Iterate over each sequence in batch
        for i in range(batch.shape[0]):
            seq = batch[i]

            rho_new = np.log(rho.copy())

            # burn in
            for b in range(burn_in):
                temp_rho = np.zeros(
                    [K_conj.shape[1], K_conj.shape[2], K_conj.shape[3]],
                    dtype='complex128')
                for w in range(K_conj.shape[1]):
                    temp_rho[w, :, :] = np.dot(
                        np.dot(K[int(seq[b]) - 1, w, :, :], rho_new),
                        np.conjugate(K[int(seq[b]) - 1, w, :, :]).T)
                rho_new = np.sum(temp_rho, 0)
                rho_new = rho_new / np.trace(rho_new)

            # Compute likelihood for the sequence
            for s in seq[burn_in:]:
                rho_sum = logdotexp(
                    logdotexp(
                        np.log(np.conjugate(K_conj[int(s) - 1, 0, :, :])),
                        rho_new), np.log(K_conj[int(s) - 1, 0, :, :].T))
                for w in range(1, K_conj.shape[1]):
                    # subtract 1 to adjust for MATLAB indexing
                    rho_sum = logaddexp(
                        rho_sum,
                        logdotexp(
                            logdotexp(
                                np.log(
                                    np.conjugate(K_conj[int(s) - 1,
                                                        w, :, :])), rho_new),
                            np.log(K_conj[int(s) - 1, w, :, :].T)))

                rho_new = rho_sum

            total_loss += np.real(logsumexp(np.diag(rho_new)))

        return -total_loss / batch.shape[0]

    def loss(K_conj):
        """
            K is a tensor of CONJUGATE Kraus Operators of dim s x w x n x n
            s: output_dim
            w: ops_per_output
            n: state_dim
        """
        total_loss = 0.0

        # Iterate over each sequence in batch
        for i in range(batch.shape[0]):
            seq = batch[i]
            rho_new = rho.copy()
            # burn in
            for b in range(burn_in):
                temp_rho = np.zeros(
                    [K_conj.shape[1], K_conj.shape[2], K_conj.shape[3]],
                    dtype='complex128')
                for w in range(K_conj.shape[1]):
                    temp_rho[w, :, :] = np.dot(
                        np.dot(K[int(seq[b]) - 1, w, :, :], rho_new),
                        np.conjugate(K[int(seq[b]) - 1, w, :, :]).T)
                rho_new = np.sum(temp_rho, 0)
                rho_new = rho_new / np.trace(rho_new)

            # Compute likelihood for the sequence
            for s in seq[burn_in:]:
                rho_sum = np.zeros([K_conj.shape[2], K_conj.shape[2]],
                                   dtype='complex128')
                for w in range(K.shape[1]):
                    # subtract 1 to adjust for MATLAB indexing
                    rho_sum += np.dot(
                        np.dot(np.conjugate(K_conj[int(s) - 1, w, :, :]),
                               rho_new), K_conj[int(s) - 1, w, :, :].T)

                rho_new = rho_sum

            total_loss += np.log(np.real(np.trace(rho_new)))

        return -total_loss / batch.shape[0]

    if strategy == 'logloss':
        grad_fn = grad(log_loss)
        gradient = grad_fn(np.conjugate(K))
    elif strategy == 'loss':
        grad_fn = grad(loss)
        gradient = grad_fn(np.conjugate(K))
    else:
        raise Exception('Unknown Loss Strategy')

    return np.real(gradient), np.imag(gradient)
def get_qnb_gradient(labels, feats_matrix, K_real, K_imag, strategy):

    K = np.array(K_real) + np.complex(0, 1) * np.array(K_imag)
    feats_matrix = np.array(feats_matrix, dtype='int32')
    labels = np.array(labels, dtype='int32')

    def logloss(K_conj):
        """
            K is a tensor of CONJUGATE Kraus Operators of dim s x y x x x x
            s: dim of features
            y: number of features
            x: number of labels
        """
        total_loss = 0.0

        # Iterate over each sequence in batch
        for i in range(labels.shape[0]):
            features = feats_matrix[i, :]
            label = labels[i] - 1

            # Compute likelihood of the label generating the given features
            conjKrausProduct = np.log(K_conj[features[0] - 1, 0, :, :])
            for s in range(1, features.shape[0]):
                conjKrausProduct = logdotexp(
                    np.log(K_conj[features[s] - 1, s, :, :]), conjKrausProduct)

            eta = np.zeros([K_conj.shape[3], K_conj.shape[3]],
                           dtype='complex128')
            eta[label, label] = 1

            prod1 = logdotexp(np.conjugate(conjKrausProduct), np.log(eta))
            prod2 = logdotexp(prod1, conjKrausProduct.T)
            total_loss += np.real(logsumexp(np.diag(prod2)))

            # total_loss += np.real(np.trace(np.kron(np.conjugate(conjKrausProduct)[:, label], conjKrausProduct.T[:, label]).reshape(K_conj.shape[2], K_conj.shape[3])))

        return -total_loss / labels.shape[0]

    def losswlog(K_conj):
        """
            K is a tensor of CONJUGATE Kraus Operators of dim s x y x x x x
            s: dim of features
            y: number of features
            x: number of labels
        """
        total_loss = 0.0

        # Iterate over each sequence in batch
        for i in range(labels.shape[0]):
            features = feats_matrix[i, :]
            label = labels[i] - 1

            # Compute likelihood of the label generating the given features
            conjKrausProduct = K_conj[features[0] - 1, 0, :, :]
            for s in range(1, features.shape[0]):
                conjKrausProduct = np.dot(K_conj[features[s] - 1, s, :, :],
                                          conjKrausProduct)

            eta = np.zeros([K_conj.shape[3], K_conj.shape[3]],
                           dtype='complex128')
            eta[label, label] = 1

            prod1 = np.dot(np.conjugate(conjKrausProduct), eta)
            prod2 = np.dot(prod1, conjKrausProduct.T)
            total_loss += np.log(np.real(np.trace(prod2)))

            # total_loss += np.real(np.trace(np.kron(np.conjugate(conjKrausProduct)[:, label], conjKrausProduct.T[:, label]).reshape(K_conj.shape[2], K_conj.shape[3])))

        return -total_loss / labels.shape[0]

    if strategy == 'losswlog':
        grad_fn = grad(losswlog)
        gradient = grad_fn(np.conjugate(K))
    else:
        grad_fn = grad(logloss)
        gradient = grad_fn(np.conjugate(K))

    return [np.real(gradient), np.imag(gradient)]
예제 #9
0
def minConf_SPG(funObj, x, funProj, options=None):
    """ This function implements Mark Schmidt's MATLAB implementation of
    spectral projected gradient (SPG) to solve for projected quasi-Newton
    direction
                min funObj(x) s.t. x in C
    Parameters
    ----------
    funObj: function that returns objective function value and the gradient
    x: initial parameter value
    funProj: fcuntion that returns projection of x onto C
    options:
        verbose: level of verbosity (0: no output, 1: final, 2: iter (default), 3:
            debug)
        optTol: tolerance used to check for optimality (default: 1e-5)
        progTol: tolerance used to check for lack of progress (default: 1e-9)
        maxIter: maximum number of calls to funObj (default: 500)
        numDiff: compute derivatives numerically (0: use user-supplied
            derivatives (default), 1: use finite differences, 2: use complex
            differentials)
        suffDec: sufficient decrease parameter in Armijo condition (default
            : 1e-4)
        interp: type of interpolation (0: step-size halving, 1: quadratic,
            2: cubic)
        memory: number of steps to look back in non-monotone Armijo
            condition
        useSpectral: use spectral scaling of gradient direction (default:
            1)
        curvilinear: backtrack along projection Arc (default: 0)
        testOpt: test optimality condition (default: 1)
        feasibleInit: if 1, then the initial point is assumed to be
            feasible
        bbType: type of Barzilai Borwein step (default: 1)
 
    Notes: 
        - if the projection is expensive to compute, you can reduce the
            number of projections by setting testOpt to 0
    """

    nVars = x.shape[0]
    options_default = {'verbose':2, 'numDiff':0, 'optTol':1e-5, 'progTol':1e-9,\
                'maxIter':500, 'suffDec':1e-4, 'interp':2, 'memory':10,\
                'useSpectral':1,'curvilinear':0,'feasibleInit':0,'testOpt':1,\
                'bbType':1}
    options = setDefaultOptions(options, options_default)

    if options['verbose'] >= 2:
        if options['testOpt'] == 1:
            print '{:10s}'.format('Iteration') + \
                    '{:10s}'.format('FunEvals') + \
                    '{:10s}'.format('Projections') + \
                    '{:15s}'.format('StepLength') + \
                    '{:15s}'.format('FunctionVal') + \
                    '{:15s}'.format('OptCond')
        else:
            print '{:10s}'.format('Iteration') + \
                    '{:10s}'.format('FunEvals') + \
                    '{:10s}'.format('Projections') + \
                    '{:15s}'.format('StepLength') + \
                    '{:15s}'.format('FunctionVal')

    funEvalMultiplier = 1

    # evaluate initial point
    if options['feasibleInit'] == 0:
        x = funProj(x)
    [f, g] = funObj(x)
    projects = 1
    funEvals = 1

    # optionally check optimality
    if options['testOpt'] == 1:
        projects = projects + 1
        if np.max(np.abs(funProj(x - g) - x)) < options['optTol']:
            if options['verbose'] >= 1:
                print "First-order optimality conditions below optTol at initial point"
            return (x, f, funEvals, projects)

    i = 1
    while funEvals <= options['maxIter']:
        # compute step direction
        if i == 1 or options['useSpectral'] == 0:
            alpha = 1.
        else:
            y = g - g_old
            s = x - x_old
            if options['bbType'] == 1:
                alpha = np.dot(s, s) / np.dot(s, y)
            else:
                alpha = np.dot(s, y) / np.dot(y, y)
            if alpha <= 1e-10 or alpha >= 1e10:
                alpha = 1.

        d = -alpha * g
        f_old = f
        x_old = x
        g_old = g

        # compute projected step
        if options['curvilinear'] == 0:
            d = funProj(x + d) - x
            projects = projects + 1

        # check that progress can be made along the direction
        gtd = np.dot(g, d)
        if gtd > -options['progTol']:
            if options['verbose'] >= 1:
                print "Directional derivtive below progTol"
            break

        # select initial guess to step length
        if i == 1:
            t = np.minimum(1., 1. / np.sum(np.abs(g)))
        else:
            t = 1.

        # compute reference function for non-monotone condition
        if options['memory'] == 1:
            funRef = f
        else:
            if i == 1:
                old_fvals = np.ones(options['memory']) * (-1) * np.infty

            if i <= options['memory']:
                old_fvals[i - 1] = f
            else:
                old_fvals = np.append(old_fvals[1:], f)
            funRef = np.max(old_fvals)

        # evaluate the objective and gradient at the initial step
        if options['curvilinear'] == 1:
            x_new = funProj(x + t * d)
            projects = projects + 1
        else:
            x_new = x + t * d
        [f_new, g_new] = funObj(x_new)
        funEvals = funEvals + 1

        # Backtracking line search
        lineSearchIters = 1
        while f_new > funRef + options['suffDec']*np.dot(g,x_new-x) or \
                isLegal(f_new) == False:
            temp = t
            if options['interp'] == 0 or isLegal(f_new) == False:
                if options['verbose'] == 3:
                    print 'Halving step size'
                t = t / 2.
            elif options['interp'] == 2 and isLegal(g_new):
                if options['verbose'] == 3:
                    print "Cubic Backtracking"
                t = polyinterp(np.array([[0,f,gtd],\
                        [t,f_new,np.dot(g_new,d)]]))[0]
            elif lineSearchIters < 2 or isLegal(f_prev):
                if options['verbose'] == 3:
                    print "Quadratic Backtracking"
                t = polyinterp(np.array([[0, f, gtd],\
                        [t, f_new, np.complex(0,1)]]))[0]
            else:
                if options['verbose'] == 3:
                    print "Cubic Backtracking on Function Values"
                t = polyinterp(np.array([[0., f, gtd],\
                                         [t,f_new,np.complex(0,1)],\
                                         [t_prev,f_prev,np.complex(0,1)]]))[0]
            # adjust if change is too small
            if t < temp * 1e-3:
                if options['verbose'] == 3:
                    print "Interpolated value too small, Adjusting"
                t = temp * 1e-3
            elif t > temp * 0.6:
                if options['verbose'] == 3:
                    print "Interpolated value too large, Adjusting"
                t = temp * 0.6

            # check whether step has become too small
            if np.max(np.abs(t * d)) < options['progTol'] or t == 0:
                if options['verbose'] == 3:
                    print "Line Search failed"
                t = 0.
                f_new = f
                g_new = g
                break

            # evaluate new point
            f_prev = f_new
            t_prev = temp
            if options['curvilinear'] == True:
                x_new = funProj(x + t * d)
                projects = projects + 1
            else:
                x_new = x + t * d
            [f_new, g_new] = funObj(x_new)
            funEvals = funEvals + 1
            lineSearchIters = lineSearchIters + 1

        # done with line search

        # take step
        x = x_new
        f = f_new
        g = g_new

        if options['testOpt'] == True:
            optCond = np.max(np.abs(funProj(x - g) - x))
            projects = projects + 1

        # output log
        if options['verbose'] >= 2:
            if options['testOpt'] == True:
                print '{:10d}'.format(i) + \
                      '{:10d}'.format(funEvals*funEvalMultiplier) + \
                      '{:10d}'.format(projects) + \
                      '{:15.5e}'.format(t) + \
                      '{:15.5e}'.format(f) + \
                      '{:15.5e}'.format(optCond)
            else:
                print '{:10d}'.format(i) + \
                      '{:10d}'.format(funEvals*funEvalMultiplier) + \
                      '{:10d}'.format(projects) + \
                      '{:15.5e}'.format(t) + \
                      '{:15.5e}'.format(f)
        # check optimality
        if options['testOpt'] == True:
            if optCond < options['optTol']:
                if options['verbose'] >= 1:
                    print "First-order optimality conditions below optTol"
                break

        if np.max(np.abs(t * d)) < options['progTol']:
            if options['verbose'] >= 1:
                print "Step size below progTol"
            break

        if np.abs(f - f_old) < options['progTol']:
            if options['verbose'] >= 1:
                print "Function value changing by less than progTol"
            break

        if funEvals * funEvalMultiplier > options['maxIter']:
            if options['verbose'] >= 1:
                print "Function evaluation exceeds maxIter"
            break

        i = i + 1

    return (x, f, funEvals, projects)
예제 #10
0
def polyinterp(points, doPlot=None, xminBound=None, xmaxBound=None):
    """ polynomial interpolation
    Parameters
    ----------
    points: shape(pointNum, 3), three columns represents x, f, g
    doPolot: set to 1 to plot, default 0
    xmin: min value that brackets minimum (default: min of points)
    xmax: max value that brackets maximum (default: max of points)
    
    set f or g to sqrt(-1)=1j if they are not known
    the order of the polynomial is the number of known f and g values minus 1

    Returns
    -------
    minPos:
    fmin:
    """

    if doPlot == None:
        doPlot = 0

    nPoints = points.shape[0]
    order = np.sum(np.imag(points[:, 1:3]) == 0) - 1

    # code for most common case: cubic interpolation of 2 points
    if nPoints == 2 and order == 3 and doPlot == 0:
        [minVal, minPos] = [np.min(points[:, 0]), np.argmin(points[:, 0])]
        notMinPos = 1 - minPos
        d1 = points[minPos,2] + points[notMinPos,2] - 3*(points[minPos,1]-\
                points[notMinPos,1])/(points[minPos,0]-points[notMinPos,0])

        t_d2 = d1**2 - points[minPos, 2] * points[notMinPos, 2]
        if t_d2 > 0:
            d2 = np.sqrt(t_d2)
        else:
            d2 = np.sqrt(-t_d2) * np.complex(0, 1)
        if np.isreal(d2):
            t = points[notMinPos,0] - (points[notMinPos,0]-points[minPos,0])*\
                    ((points[notMinPos,2]+d2-d1)/(points[notMinPos,2]-\
                    points[minPos,2]+2*d2))
            minPos = np.min(
                [np.max([t, points[minPos, 0]]), points[notMinPos, 0]])
        else:
            minPos = np.mean(points[:, 0])
        fmin = minVal
        return (minPos, fmin)

    xmin = np.min(points[:, 0])
    xmax = np.max(points[:, 0])

    # compute bounds of interpolation area
    if xminBound == None:
        xminBound = xmin
    if xmaxBound == None:
        xmaxBound = xmax

    # constraints based on available function values
    A = np.zeros((0, order + 1))
    b = np.zeros((0, 1))
    for i in range(nPoints):
        if np.imag(points[i, 1]) == 0:
            constraint = np.zeros(order + 1)
            for j in np.arange(order, -1, -1):
                constraint[order - j] = points[i, 0]**j
            A = np.vstack((A, constraint))
            b = np.append(b, points[i, 1])

    # constraints based on availabe derivatives
    for i in range(nPoints):
        if np.isreal(points[i, 2]):
            constraint = np.zeros(order + 1)
            for j in range(1, order + 1):
                constraint[j - 1] = (order - j + 1) * points[i, 0]**(order - j)
            A = np.vstack((A, constraint))
            b = np.append(b, points[i, 2])

    # find interpolating polynomial
    params = np.linalg.solve(A, b)

    # compute critical points
    dParams = np.zeros(order)
    for i in range(params.size - 1):
        dParams[i] = params[i] * (order - i)

    if np.any(np.isinf(dParams)):
        cp = np.concatenate((np.array([xminBound, xmaxBound]), points[:, 0]))
    else:
        cp = np.concatenate((np.array([xminBound, xmaxBound]), points[:,0], \
                np.roots(dParams)))

    # test critical points
    fmin = np.infty
    minPos = (xminBound + xmaxBound) / 2.
    for xCP in cp:
        if np.imag(xCP) == 0 and xCP >= xminBound and xCP <= xmaxBound:
            fCP = np.polyval(params, xCP)
            if np.imag(fCP) == 0 and fCP < fmin:
                minPos = np.double(np.real(xCP))
                fmin = np.double(np.real(fCP))

    # plot situation (omit this part for now since we are not going to use it
    # anyway)

    return (minPos, fmin)
예제 #11
0
                                (N + D * 2))
        funProj_vec = lambda param: projectParam_vec(
            param, N, D, G, M, K, lb=1e-6)
        funProj_mat = lambda param: projectParam(param, N, D, G, M, K, lb=1e-6)
        t1 = time.time()
        w_loop = funProj_vec(param)
        t2 = time.time()
        print "vector + loop: ", t2 - t1
        w_map = funProj_mat(param)
        t3 = time.time()
        print "mat : ", t3 - t2
        print "diff: ", np.linalg.norm(w_loop - w_map)

    elif flag_test == 3:
        #v0 = np.random.randn(100)
        v0 = np.complex(0, 1)
        print isLegal(v0)
    elif flag_test == 4:
        # test polyinterp
        points = np.random.randn(2, 3)
        print polyinterp(points)
        np.savetxt(\
        "./Mark_Schmidt/minConf/minFunc/test_data/polyinterp_input_1.csv",\
                points)
    elif flag_test == 5:
        # test lbfgsUpdate
        [p, m, corrections, debug, Hdiag] = [2, 5, 2, 0, 1e-3]
        y = np.random.randn(p)
        s = y + 0.1 * np.random.randn(p)
        old_dirs = np.random.randn(p, m)
        old_stps = np.random.randn(p, m)