def solve(x_0, covMatrix, bounds):
    global covMatrix_, n_grad
    n_grad = 0
    covMatrix_ = covMatrix
    inf   = 1.0e20
    n = len(x_0)
    
    snopt = SNOPT_solver()
    snopt.setOption("Verbose", False)
    snopt.setOption("Specs file","/home/federico/workspace/TestMosek/algorithms/specs.spec")
    
    xlow    = np.array([bounds[0]]*n)
    xupp    = np.array([bounds[1]]*n)
    
    Flow    = np.array([0.0, 1.0])
    Fupp    = np.array([inf, 1.0])
    
    ObjRow = 1
    A = np.array([ [0]*n,
                   [1]*n
                 ])
    

    G = np.array([ [1]*n,
                   [0]*n
               ])

    [exe_time, iterations] = snopt.snopta(x0=x_0,xlow=xlow,xupp=xupp,
                 Flow=Flow,Fupp=Fupp, ObjRow=ObjRow, A=A, G=G,
                 usrfun=objFG)
    
    return exe_time, snopt.x, iterations
Example #2
0
    xp = xp.values
    [xm, ym] = inter_min(xp, inter_par)

    return xm, ym, cse


# p=interpolate_val(x,inter_par);
# e=R2-(x-xc)'*(x-xc);
# M=-e/(p-y0);
# if p<y0
#     M=-inf;
# end

inf = 1.0e20

snopt = SNOPT_solver()

snopt.setOption('Verbose', True)
snopt.setOption('Solution print', True)
snopt.setOption('Print file', 'sntoya.out')

# Either dtype works, but the names for x and F have to be of
# the correct length, else they are both ignored by SNOPT:
xNames = np.array(['      x0', '      x1'])
FNames = np.array(['      F0', '      F1', '      F2'], dtype='c')

x0 = np.array([1.0, 1.0])

xlow = np.array([0.0, -inf])
xupp = np.array([inf, inf])
def optimize_blending_function_between_two_distance_sigmas(sigmaA, sigmaB, personA, personB, min_distA, min_distB, params, constrain_at_endpoints=False):
    '''
    This function finds a blend between two trajectories that respect two minimum distance constraints.
    '''
    # Some important parameters here
    nsamples = params['nsamples'] if 'nsamples' in params else 50
    ndims    = 6
    
    dt = 0.01
    xdot5_limit = 0.001

    inf = 1.0e20

    lambda_snap = 1#(1/dt)**4 # snap must be scaled down to be comparable to position.
    lambda_pos = 1

    # A few derived quantities
    nvars = ndims*nsamples
    nconstraints_continuity = (ndims-1)*nsamples
    nconstraints_obstacles = 2*nsamples
    nconstraints = 1 + nconstraints_continuity + nconstraints_obstacles

    # Solver configuration
    snopt = SNOPT_solver()
    snopt.setOption('Verbose',False)
    snopt.setOption('Solution print',False)
    snopt.setOption('Print file','test5.out')
    snopt.setOption('Iteration limit',8000)
    snopt.setOption('Print level',3)
    snopt.setOption('Major optimality',2e-6)
    snopt.setOption('Verify level',3) # Turn to 3 to carefully check gradiants


    # 1. Set up decision variables
    x     = np.array([0.5]*nsamples) # Initialize to 0.5
    xdot1 = np.array([0.0]*nsamples)
    xdot2 = np.array([0.0]*nsamples)
    xdot3 = np.array([0.0]*nsamples)
    xdot4 = np.array([0.0]*nsamples)
    v     = np.array([0.0]*nsamples) # C4 Continuity Control Variable

    x0 = np.matrix(np.c_[x,xdot1,xdot2,xdot3,xdot4,v]).A1 # Interleave [x[0],xdot1[0],xdot2[0]...]

    # 2. Set up the bounds on x
    low_x     = np.array([ 0.0] *nsamples) # X must be greater or equal to 0
    low_xdot1 = np.array([ -inf]*nsamples)
    low_xdot2 = np.array([ -inf]*nsamples)
    low_xdot3 = np.array([ -inf]*nsamples)
    low_xdot4 = np.array([ -inf]*nsamples)
    low_v     = np.array([ -xdot5_limit]*nsamples) # Bound control variable arbitrarily
    if constrain_at_endpoints:
        low_x[0] = 0.5
        low_x[nsamples-1] = 0.5    
    xlow = np.matrix(np.c_[low_x,low_xdot1,low_xdot2,low_xdot3,low_xdot4,low_v]).A1 # Interleave [x[0],xdot1[0],xdot2[0]...]

    upp_x     = np.array([ 1.0] *nsamples) # X must be greater or equal to 0
    upp_xdot1 = np.array([ inf]*nsamples)
    upp_xdot2 = np.array([ inf]*nsamples)
    upp_xdot3 = np.array([ inf]*nsamples)
    upp_xdot4 = np.array([ inf]*nsamples)
    upp_v     = np.array([ xdot5_limit]*nsamples) # Bound control variable arbitrarily
    if constrain_at_endpoints:
        upp_x[0] = 0.5
        upp_x[nsamples-1] = 0.5
    xupp = np.matrix(np.c_[upp_x,upp_xdot1,upp_xdot2,upp_xdot3,upp_xdot4,upp_v]).A1 # Interleave [x[0],xdot1[0],xdot2[0]...]

    # 3. Set up the objective function
    M = np.array([
            [0,1,0,0,0],
            [0,0,1,0,0],
            [0,0,0,1,0],
            [0,0,0,0,1],
            [0,0,0,0,0]
        ])

    N = np.array([0,0,0,0,1])

    def grad_function(xM, compute_nonzero_only=False, compute_linear=False):
        G = np.zeros((nconstraints, nvars))

        # Set up the jacobian structure of the cost function. 
        # This only impacts the w_i and wdot4_i variables 
        obj_col = G[0,:]
        if not compute_nonzero_only:
            obj_col[::6] = 2*dt*lambda_pos*(xM[:,0] - 0.5)
            obj_col[4::6] = 2*dt*lambda_snap*xM[:,4]
        elif not compute_linear:
            obj_col[::6] = 1
            obj_col[4::6] = 1        

        if compute_linear:
            # The C4 continuity constraint is linear
            stupidcounter = 0
            add_to_fi = 0    
            for fi in range(1,nconstraints_continuity-5): # Looping over the objective function
                fi_row = G[fi,:]

                fi += add_to_fi

                fi_row[fi-1] = 1
                fi_row[fi]   = dt
                fi_row[fi+5] = -1

                stupidcounter += 1
                if stupidcounter == 5:
                    add_to_fi += 1
                    stupidcounter = 0

        return G    

    def calc_obj(xM):
        # our objective is the sum of
        # the L2 norm of our position error away from 0.5
        # the L2 norm of our 4th derivative error away from 0
        obj_pos  = dt * np.sum( (xM[:,0] - 0.5)**2)
        obj_snap = dt * np.sum( (xM[:,4]      )**2)  
        objective = lambda_pos * obj_pos + lambda_snap * obj_snap
        return (objective, obj_pos, obj_snap)

    def calc_obstacle_constraints(xM):
        blend = xM[:,0]
        sigmaBlended = (blend[:,np.newaxis]*sigmaA + (1-blend)[:,np.newaxis]*sigmaB)
        constraintA = la.norm(sigmaBlended - personA, axis=1) - min_distA
        constraintB = la.norm(sigmaBlended - personB, axis=1) - min_distB
        return np.r_[constraintA, constraintB]

    def blend_test3_objFG(status,x,needF,needG,cu,iu,ru):

        xM = x.reshape(nsamples,ndims)

        objective, obj_pos, obj_snap = calc_obj(xM)

        # Evaluate the current continuity constraints
        continuity_x = np.zeros((nsamples, 5))
        for i in range(nsamples-1):
            si  = xM[i,0:5]
            vi  = xM[i,5  ]
            si1 = xM[i+1,0:5]
            continuity_x[i] = si + (M.dot(si) + N.dot(vi))*dt - si1
        continuity_x = np.matrix(continuity_x).A1

        obstacles = calc_obstacle_constraints(xM)

        F = np.concatenate(
            ([objective],
             continuity_x,
             obstacles))

        #G = grad_function(xM)

        return status, F#, G[G_nonzero_inds]

    # 4. Set up bounds on F
    # [ objectivec can be anything, equal-to-zero for continuity, greater-than-0 for obstacles along traj]
    low_F    = np.concatenate(([-inf], np.array([0,0,0,0,0]*nsamples), [0  , 0]*nsamples))
    upp_F    = np.concatenate(([ inf], np.array([0,0,0,0,0]*nsamples), [inf, inf]*nsamples))

    # Matrix uses fortran numbering or something
    ObjRow = 1

    # Set up the linear and nonlinear structure of the jacobian matrix
    xM = x0.reshape(nsamples,ndims)
    G = grad_function(xM,compute_nonzero_only=True, compute_linear=False)
    G_nonzero_inds           = G.nonzero()
    A = grad_function(xM,compute_nonzero_only=True, compute_linear=True)

    # Now we solve
    a = time.time()

    snopt.snopta(name='blend_test3',usrfun=blend_test3_objFG,x0=x0,xlow=xlow,xupp=xupp,
                 Flow=low_F,Fupp=upp_F,ObjRow=ObjRow)
    b = time.time()

    print "Solved in %.4fs" % (b - a)

    print "Value of objective function: %.8f" % snopt.F[0]
    print "   lambda_pos: %f, lambda_snap: %f, " % (lambda_pos, lambda_snap)
    print "   objective: %f, obj_pos: %f, obj_snap: %f" % calc_obj(xM)
    
    xM = snopt.x.reshape(nsamples, ndims)
    return (xM, snopt)
Example #4
0
    # Nonlinear objective term only
    fObj = 0.0
    if mode == 0 or mode == 2:
        fObj    = sum**2

    gObj = np.zeros(nnObj,float)
    if mode == 1 or mode == 2:
        gObj[0] = 2.0*sum
        gObj[1] = 2.0*sum
        gObj[2] = 2.0*sum

    return mode, fObj, gObj



snoptb   = SNOPT_solver()
inf      = 1.0e+20

snoptb.setOption('Infinite bound',inf)
snoptb.setOption('Print file','sntoyb.out')

m     = 4
n     = 4

nnCon = 2
nnJac = 2
nnObj = 3

# J contains the sparsity pattern of the Jacobian matrix.
# For nonlinear elements, enter any nonzero number (in this case 100).
# Linear elements must be correctly defined.
def optimize(p_eval,psi_eval,                            \
             t_nominal,user_progress_nominal,dt_nominal, \
             const_vals_ti,                              \
             x_min_ti,x_max_ti,                          \
             u_min_ti,u_max_ti):

    assert allclose(psi_eval, 0.0)

    print "flashlight.trajectory_optimization.quadrotor3d_direct_transcription_nonconst_dt: Initializing optimization problem..."
    sys_time_begin = time.time()
    solver_time_begin = sys_time_begin

    #
    # find numerically stable and feasible trajectories to initialize the solver
    #
    numerically_stable_infeasible_trajectory = quadrotor3d_gaussian_time_stretch.optimize_numerically_stable_infeasible( p_eval,psi_eval,                               \
                                                                                                                         t_nominal,user_progress_nominal,dt_nominal,    \
                                                                                                                         x_min_ti,x_max_ti,                             \
                                                                                                                         u_min_ti,u_max_ti,                             \
                                                                                                                         max_stretch_iters_numerically_stable,          \
                                                                                                                         gauss_width_in_terms_of_dt_numerically_stable, \
                                                                                                                         gauss_max_in_terms_of_dt_numerically_stable,   \
                                                                                                                         0 )

    x_numerically_stable, u_numerically_stable, t_numerically_stable, user_progress_numerically_stable, dt_numerically_stable = numerically_stable_infeasible_trajectory

    if use_gaussian_time_stretching_for_feasible:

        # use gaussian time stretching to find a feasible trajectory
        feasible_trajectory = quadrotor3d_gaussian_time_stretch.optimize_feasible( p_eval,psi_eval,                                                             \
                                                                                   t_numerically_stable,user_progress_numerically_stable,dt_numerically_stable, \
                                                                                   x_min_ti,x_max_ti,                                                           \
                                                                                   u_min_ti,u_max_ti,                                                           \
                                                                                   max_stretch_iters_feasible,                                                  \
                                                                                   gauss_width_in_terms_of_dt_feasible,                                         \
                                                                                   gauss_max_in_terms_of_dt_feasible,                                           \
                                                                                   extra_iters_feasible )

        x_feasible, u_feasible, t_feasible, user_progress_feasible, dt_feasible = feasible_trajectory

    else:

        # use uniform time stretching to find a feasible trajectory
        p_nominal, _, _, _ = curveutils.reparameterize_curve(
            p_eval, user_progress_nominal)
        psi_nominal, _, _, _ = curveutils.reparameterize_curve(
            psi_eval, user_progress_nominal)

        feasible_trajectory = quadrotor3d_uniform_time_stretch.optimize_feasible( p_nominal,psi_nominal,dt_nominal, \
                                                                                  x_min_ti,x_max_ti,                \
                                                                                  u_min_ti,u_max_ti,                \
                                                                                  max_bin_search_iters_feasible,    \
                                                                                  dt_upper_init_feasible )

        x_feasible, u_feasible, dt_scale_feasible = feasible_trajectory
        t_feasible = t_nominal * dt_scale_feasible * dt_scale_extra_stretch_feasible
        user_progress_feasible = user_progress_nominal
        dt_feasible = dt_nominal * dt_scale_feasible * dt_scale_extra_stretch_feasible

    # return user_progress_numerically_stable,None,None,None,None,t_numerically_stable,t_numerically_stable[-1]
    # return user_progress_feasible,None,None,None,None,t_feasible,t_feasible[-1]

    sys_time_end = time.time()
    print "flashlight.optimize.quadrotor3d_fixed_path: Finished initializing optimization problem (%.03f seconds)." % (
        sys_time_end - sys_time_begin)

    #
    # set up optimization problem constants
    #
    num_trajectory_samples = p_eval.shape[0]
    num_x_dims = quadrotor3d.num_x_dims
    num_u_dims = quadrotor3d.num_u_dims
    num_dt_dims = 1
    num_alpha_dims = num_x_dims + num_u_dims + num_dt_dims
    x_p_inds = arange(0, 3)
    x_e_inds = arange(3, 6)
    num_x_p_inds = x_p_inds.size

    # soft control effort constraints
    lamb_J_control_effort = 0.0 * ones(num_trajectory_samples)

    # soft position waypoint constraints
    num_dims_J_x_p_waypoint_ref_ti = 3
    lamb_J_x_p_waypoint = 0.01 * ones(num_trajectory_samples)
    J_x_p_waypoint_ref = x_numerically_stable[:, 0:3]

    # soft dt constraints
    num_dims_J_dt_ref_ti = 1
    lamb_J_dt = 0.0001 * ones(num_trajectory_samples)
    J_dt_ref = dt_numerically_stable * ones(num_trajectory_samples)

    # hard dynamics constraints
    num_dims_g_dynamics_ti = num_x_dims

    # hard state space waypoint constraints
    num_dims_g_x_waypoint_ti = num_x_dims
    num_dims_x_waypoint_ref_ti = num_x_dims
    lamb_g_x_waypoint = zeros(num_trajectory_samples)
    lamb_g_x_waypoint[[0, -1]] = 1
    X_waypoint_ref = zeros(
        (num_trajectory_samples, num_dims_x_waypoint_ref_ti))
    X_waypoint_ref[0] = array([
        p_eval[0, 0], p_eval[0, 1], p_eval[0, 2], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0
    ])
    X_waypoint_ref[-1] = array([
        p_eval[-1, 0], p_eval[-1, 1], p_eval[-1, 2], 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 0.0, 0.0, 0.0
    ])

    lamb_g_x_waypoint_nonzero = nonzero(lamb_g_x_waypoint)[0]
    num_lamb_g_x_waypoint_nonzero = len(lamb_g_x_waypoint_nonzero)
    lamb_g_x_waypoint_ti_to_ti_sparse = -1 * ones_like(lamb_g_x_waypoint,
                                                       dtype=int32)
    lamb_g_x_waypoint_ti_to_ti_sparse[lamb_g_x_waypoint_nonzero] = arange(
        num_lamb_g_x_waypoint_nonzero)

    # hard position waypoint constraints
    num_dims_g_x_p_waypoint_ti = num_x_p_inds
    num_dims_x_p_waypoint_ref_ti = num_x_p_inds
    lamb_g_x_p_waypoint = zeros(num_trajectory_samples)
    X_p_waypoint_ref = zeros(
        (num_trajectory_samples, num_dims_x_p_waypoint_ref_ti))

    lamb_g_x_p_waypoint_nonzero = nonzero(lamb_g_x_p_waypoint)[0]
    num_lamb_g_x_p_waypoint_nonzero = len(lamb_g_x_p_waypoint_nonzero)
    lamb_g_x_p_waypoint_ti_to_ti_sparse = -1 * ones_like(lamb_g_x_p_waypoint,
                                                         dtype=int32)
    lamb_g_x_p_waypoint_ti_to_ti_sparse[lamb_g_x_p_waypoint_nonzero] = arange(
        num_lamb_g_x_p_waypoint_nonzero)

    # hard dt constraints
    num_dims_g_dt_ti = 1
    num_dims_dt_ref_ti = 1
    lamb_g_dt = zeros(num_trajectory_samples)
    dt_ref = zeros(num_trajectory_samples)

    lamb_g_dt_nonzero = nonzero(lamb_g_dt)[0]
    num_lamb_g_dt_nonzero = len(lamb_g_dt_nonzero)
    lamb_g_dt_ti_to_ti_sparse = -1 * ones_like(lamb_g_dt, dtype=int32)
    lamb_g_dt_ti_to_ti_sparse[lamb_g_dt_nonzero] = arange(
        num_lamb_g_dt_nonzero)

    dt_min_ti = dt_numerically_stable * 0.45
    dt_max_ti = dt_feasible * 1.55

    # stack all the const, lamb, and ref values
    const_vals = tile(const_vals_ti, (num_trajectory_samples, 1))
    lamb_vals = c_[lamb_J_control_effort, lamb_J_x_p_waypoint, lamb_J_dt,
                   lamb_g_x_waypoint, lamb_g_x_p_waypoint, lamb_g_dt]
    ref_vals = c_[J_x_p_waypoint_ref, J_dt_ref, X_waypoint_ref,
                  X_p_waypoint_ref, dt_ref]

    # number of constraints and decision variables
    num_constraints_g_dynamics = num_trajectory_samples - 1
    num_dims_g_dynamics_ti = num_x_dims

    num_constraints_1d_g_dynamics = num_constraints_g_dynamics * num_dims_g_dynamics_ti
    num_constraints_1d_g_x_waypoint = num_lamb_g_x_waypoint_nonzero * num_dims_g_x_waypoint_ti
    num_constraints_1d_g_x_p_waypoint = num_lamb_g_x_p_waypoint_nonzero * num_dims_g_x_p_waypoint_ti
    num_constraints_1d_g_dt = num_lamb_g_dt_nonzero * num_dims_g_dt_ti

    num_decision_vars_1d_X = num_trajectory_samples * num_x_dims
    num_decision_vars_1d_U = num_trajectory_samples * num_u_dims
    num_decision_vars_1d_DT = num_trajectory_samples * num_dt_dims

    def _unpack_Alpha_1d(Alpha_1d):

        X_1d_begin, X_1d_end = 0, 0 + num_trajectory_samples * num_x_dims
        U_1d_begin, U_1d_end = X_1d_end, X_1d_end + num_trajectory_samples * num_u_dims
        DT_1d_begin, DT_1d_end = U_1d_end, U_1d_end + num_trajectory_samples * num_dt_dims

        X_1d = Alpha_1d[X_1d_begin:X_1d_end]
        U_1d = Alpha_1d[U_1d_begin:U_1d_end]
        DT_1d = Alpha_1d[DT_1d_begin:DT_1d_end]
        X = X_1d.reshape((num_trajectory_samples, num_x_dims))
        U = U_1d.reshape((num_trajectory_samples, num_u_dims))
        DT = DT_1d.reshape((num_trajectory_samples, num_dt_dims))

        return X, U, DT

    def _compute_common_vals(ti, X, U, DT):

        lamb_J_control_effort_ti = lamb_J_control_effort[ti]
        lamb_J_x_p_waypoint_ti = lamb_J_x_p_waypoint[ti]
        lamb_J_dt_ti = lamb_J_dt[ti]
        lamb_g_x_waypoint_ti = lamb_g_x_waypoint[ti]
        lamb_g_x_p_waypoint_ti = lamb_g_x_p_waypoint[ti]
        lamb_g_dt_ti = lamb_g_dt[ti]
        J_x_p_waypoint_ref_ti = matrix(J_x_p_waypoint_ref[ti]).T
        J_dt_ref_ti = J_dt_ref[ti]
        x_waypoint_ref_ti = matrix(X_waypoint_ref[ti]).T
        x_p_waypoint_ref_ti = matrix(X_p_waypoint_ref[ti]).T
        dt_ref_ti = dt_ref[ti]
        x_ti = matrix(X[ti]).T
        u_ti = matrix(U[ti]).T
        dt_ti = DT[ti]

        lamb_vals_ti = hstack([
            lamb_J_control_effort_ti, lamb_J_x_p_waypoint_ti, lamb_J_dt_ti,
            lamb_g_x_waypoint_ti, lamb_g_x_p_waypoint_ti, lamb_g_dt_ti
        ])
        ref_vals_ti = hstack([
            matrix(J_x_p_waypoint_ref_ti).A1, J_dt_ref_ti,
            matrix(x_waypoint_ref_ti).A1,
            matrix(x_p_waypoint_ref_ti).A1, dt_ref_ti
        ])
        var_vals_ti = hstack([x_ti.A1, u_ti.A1, dt_ti])
        common_vals_ti = hstack([lamb_vals_ti, ref_vals_ti, var_vals_ti])

        return common_vals_ti

    def _compute_sparse_jacobian_indices(ti, ti_to_ti_sparse, num_dims_gi):

        ti_sparse = ti_to_ti_sparse[ti]
        gi_begin = (ti_sparse + 0) * num_dims_gi
        gi_end = (ti_sparse + 1) * num_dims_gi
        xi_begin = (ti + 0) * num_x_dims
        xi_end = (ti + 1) * num_x_dims
        ui_begin = (ti + 0) * num_u_dims
        ui_end = (ti + 1) * num_u_dims
        dti_begin = (ti + 0) * num_dt_dims
        dti_end = (ti + 1) * num_dt_dims

        return gi_begin, gi_end, xi_begin, xi_end, ui_begin, ui_end, dti_begin, dti_end

    # Define objective function
    def _obj_func(Alpha_1d):

        global snopt_major_iter_count
        global snopt_obj_vals

        X, U, DT = _unpack_Alpha_1d(Alpha_1d)

        common_vals = c_[lamb_vals, ref_vals, X, U, DT]
        const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals = c_[
            const_vals[:-1], X[:-1], X[1:], U[:-1], DT[:-1]]

        J_ti = J_ti_vectorized_autowrap(common_vals)
        g_dynamics = quadrotor3d.g_dynamics_ti_vectorized_autowrap(
            const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals)

        g_x_waypoint = zeros(
            (num_lamb_g_x_waypoint_nonzero, num_dims_g_x_waypoint_ti))
        g_x_p_waypoint = zeros(
            (num_lamb_g_x_p_waypoint_nonzero, num_dims_g_x_p_waypoint_ti))
        g_dt = zeros((num_lamb_g_dt_nonzero, num_dims_g_dt_ti))

        for ti in range(num_trajectory_samples):

            common_vals_ti = _compute_common_vals(ti, X, U, DT)

            lamb_g_x_waypoint_ti = lamb_g_x_waypoint[ti]
            lamb_g_x_p_waypoint_ti = lamb_g_x_p_waypoint[ti]
            lamb_g_dt_ti = lamb_g_dt[ti]

            if lamb_g_x_waypoint_ti != 0:
                g_x_waypoint[lamb_g_x_waypoint_ti_to_ti_sparse[
                    ti]] = sympyutils.evaluate_anon_func(
                        g_x_waypoint_ti_autowrap, common_vals_ti).T
            if lamb_g_x_p_waypoint_ti != 0:
                g_x_p_waypoint[lamb_g_x_p_waypoint_ti_to_ti_sparse[
                    ti]] = sympyutils.evaluate_anon_func(
                        g_x_p_waypoint_ti_autowrap, common_vals_ti).T
            if lamb_g_dt_ti != 0:
                g_dt[lamb_g_dt_ti_to_ti_sparse[
                    ti]] = sympyutils.evaluate_anon_func(
                        g_dt_ti_autowrap, common_vals_ti)

        J = sum(J_ti)

        g_1d = hstack([
            matrix(g_dynamics).A1,
            matrix(g_x_waypoint).A1,
            matrix(g_x_p_waypoint).A1,
            matrix(g_dt).A1
        ])

        snopt_obj_vals[snopt_major_iter_count, 0] = J
        snopt_obj_vals[snopt_major_iter_count,
                       1] = sum(norm(g_dynamics, axis=1))
        snopt_major_iter_count = snopt_major_iter_count + 1

        set_printoptions(suppress=True)
        print "SNOPT major iteration: %d, Objective value: %f, Total g_dynamics error: %f" % (
            snopt_major_iter_count, J, sum(square(g_dynamics)))

        fail = 0
        return J, g_1d, fail

    # Define gradient function
    def _grad_func(Alpha_1d, J, g_1d, compute_nonzero_only=False):

        X, U, DT = _unpack_Alpha_1d(Alpha_1d)

        dJ_dX = zeros((num_trajectory_samples, num_x_dims))
        dJ_dU = zeros((num_trajectory_samples, num_u_dims))
        dJ_dDT = zeros((num_trajectory_samples, num_dt_dims))

        dgdynamics_dX = zeros(
            (num_constraints_1d_g_dynamics, num_decision_vars_1d_X))
        dgdynamics_dU = zeros(
            (num_constraints_1d_g_dynamics, num_decision_vars_1d_U))
        dgdynamics_dDT = zeros(
            (num_constraints_1d_g_dynamics, num_decision_vars_1d_DT))

        dgxwaypoint_dX = zeros(
            (num_constraints_1d_g_x_waypoint, num_decision_vars_1d_X))
        dgxwaypoint_dU = zeros(
            (num_constraints_1d_g_x_waypoint, num_decision_vars_1d_U))
        dgxwaypoint_dDT = zeros(
            (num_constraints_1d_g_x_waypoint, num_decision_vars_1d_DT))

        dgxpwaypoint_dX = zeros(
            (num_constraints_1d_g_x_p_waypoint, num_decision_vars_1d_X))
        dgxpwaypoint_dU = zeros(
            (num_constraints_1d_g_x_p_waypoint, num_decision_vars_1d_U))
        dgxpwaypoint_dDT = zeros(
            (num_constraints_1d_g_x_p_waypoint, num_decision_vars_1d_DT))

        dgdt_dX = zeros((num_constraints_1d_g_dt, num_decision_vars_1d_X))
        dgdt_dU = zeros((num_constraints_1d_g_dt, num_decision_vars_1d_U))
        dgdt_dDT = zeros((num_constraints_1d_g_dt, num_decision_vars_1d_DT))

        if not compute_nonzero_only:

            common_vals = c_[lamb_vals, ref_vals, X, U, DT]
            const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals = c_[
                const_vals[:-1], X[:-1], X[1:], U[:-1], DT[:-1]]

            dJ_dX = dJti_dxti_vectorized_autowrap(common_vals)
            dJ_dU = dJti_duti_vectorized_autowrap(common_vals)
            dJ_dDT = dJti_ddtti_vectorized_autowrap(common_vals)

            dgdynamics_dX_current_block = quadrotor3d.dgdynamicsti_dxcurrent_vectorized_autowrap(
                const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals)
            dgdynamics_dX_next_block = quadrotor3d.dgdynamicsti_dxnext_vectorized_autowrap(
                const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals)
            dgdynamics_dU_current_block = quadrotor3d.dgdynamicsti_ducurrent_vectorized_autowrap(
                const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals)
            dgdynamics_dDT_current_block = quadrotor3d.dgdynamicsti_ddtcurrent_vectorized_autowrap(
                const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals)

        for ti in range(num_trajectory_samples):

            if compute_nonzero_only:
                dJ_dX[ti] = 1
                dJ_dU[ti] = 1
                dJ_dDT[ti] = 1

        for ti in range(num_constraints_g_dynamics):

            gi_begin = (ti + 0) * num_dims_g_dynamics_ti
            gi_end = (ti + 1) * num_dims_g_dynamics_ti

            ai_x_current_begin = (ti + 0) * num_x_dims
            ai_x_current_end = (ti + 1) * num_x_dims
            ai_x_next_begin = (ti + 1) * num_x_dims
            ai_x_next_end = (ti + 2) * num_x_dims
            ai_u_current_begin = (ti + 0) * num_u_dims
            ai_u_current_end = (ti + 1) * num_u_dims
            ai_dt_current_begin = (ti + 0) * num_dt_dims
            ai_dt_current_end = (ti + 1) * num_dt_dims

            if compute_nonzero_only:
                dgdynamics_dX[gi_begin:gi_end,
                              ai_x_current_begin:ai_x_current_end] = 1
                dgdynamics_dX[gi_begin:gi_end,
                              ai_x_next_begin:ai_x_next_end] = 1
                dgdynamics_dU[gi_begin:gi_end,
                              ai_u_current_begin:ai_u_current_end] = 1
                dgdynamics_dDT[gi_begin:gi_end,
                               ai_dt_current_begin:ai_dt_current_end] = 1
            else:
                dgdynamics_dX[
                    gi_begin:gi_end, ai_x_current_begin:
                    ai_x_current_end] = dgdynamics_dX_current_block[ti]
                dgdynamics_dX[gi_begin:gi_end, ai_x_next_begin:
                              ai_x_next_end] = dgdynamics_dX_next_block[ti]
                dgdynamics_dU[
                    gi_begin:gi_end, ai_u_current_begin:
                    ai_u_current_end] = dgdynamics_dU_current_block[ti]
                dgdynamics_dDT[gi_begin:gi_end,
                               ai_dt_current_begin:ai_dt_current_end] = matrix(
                                   dgdynamics_dDT_current_block[ti]).T

        for ti in range(num_trajectory_samples):

            common_vals_ti = _compute_common_vals(ti, X, U, DT)

            lamb_g_x_waypoint_ti = lamb_g_x_waypoint[ti]
            lamb_g_x_p_waypoint_ti = lamb_g_x_p_waypoint[ti]
            lamb_g_dt_ti = lamb_g_dt[ti]

            if lamb_g_x_waypoint_ti != 0:

                gi_begin, gi_end, xi_begin, xi_end, ui_begin, ui_end, li_begin, li_end = _compute_sparse_jacobian_indices(
                    ti, lamb_g_x_waypoint_ti_to_ti_sparse,
                    num_dims_g_x_waypoint_ti)
                dgxwaypoint_dX[
                    gi_begin:gi_end,
                    xi_begin:xi_end] = sympyutils.evaluate_anon_func(
                        dgxwaypointti_dxti_autowrap, common_vals_ti)

            if lamb_g_x_p_waypoint_ti != 0:

                gi_begin, gi_end, xi_begin, xi_end, ui_begin, ui_end, li_begin, li_end = _compute_sparse_jacobian_indices(
                    ti, lamb_g_x_p_waypoint_ti_to_ti_sparse,
                    num_dims_g_x_p_waypoint_ti)
                dgxpwaypoint_dX[
                    gi_begin:gi_end,
                    xi_begin:xi_end] = sympyutils.evaluate_anon_func(
                        dgxpwaypointti_dxti_autowrap, common_vals_ti)

            if lamb_g_dt_ti != 0:

                gi_begin, gi_end, xi_begin, xi_end, ui_begin, ui_end, dti_begin, dti_end = _compute_sparse_jacobian_indices(
                    ti, lamb_g_dt_ti_to_ti_sparse, num_dims_g_dt_ti)
                dgdt_dDT[gi_begin:gi_end,
                         dti_begin:dti_end] = sympyutils.evaluate_anon_func(
                             dgdtti_ddtti_autowrap, common_vals_ti)

        dJ_dAlpha_1d = hstack(
            [matrix(dJ_dX).A1,
             matrix(dJ_dU).A1,
             matrix(dJ_dDT).A1])

        dgdynamics_dAlpha = c_[dgdynamics_dX, dgdynamics_dU, dgdynamics_dDT]
        dgxwaypoint_dAlpha = c_[dgxwaypoint_dX, dgxwaypoint_dU,
                                dgxwaypoint_dDT]
        dgxpwaypoint_dAlpha = c_[dgxpwaypoint_dX, dgxpwaypoint_dU,
                                 dgxpwaypoint_dDT]
        dgdt_dAlpha = c_[dgdt_dX, dgdt_dU, dgdt_dDT]

        dg_dAlpha = r_[dgdynamics_dAlpha, dgxwaypoint_dAlpha,
                       dgxpwaypoint_dAlpha, dgdt_dAlpha]

        fail = 0
        return matrix(dJ_dAlpha_1d).A, dg_dAlpha, fail

    def _obj_grad_func(status, Alpha_1d, needF, needG, cu, iu, ru):

        J, g_1d, fail = _obj_func(Alpha_1d)
        dJ_dAlpha_1d, dg_dAlpha, fail = _grad_func(Alpha_1d, J, g_1d)
        J_g_1d = hstack([J, g_1d, snopt_dummy_val])
        dJ_dAlpha_dg_dAlpha = r_[dJ_dAlpha_1d, dg_dAlpha]
        dJ_dAlpha_dg_dAlpha_nonzero_vals = dJ_dAlpha_dg_dAlpha[
            dJ_dAlpha_dg_dAlpha_nonzero_inds]

        return status, J_g_1d, dJ_dAlpha_dg_dAlpha_nonzero_vals

    inf = 1.0e20
    snopt = SNOPT_solver()

    snopt.setOption('Verbose', False)
    snopt.setOption('Solution print', False)
    snopt.setOption('Major print level', 0)
    snopt.setOption('Print level', 0)

    snopt_obj_row = 1
    snopt_num_funcs_1d = num_constraints_1d_g_dynamics + num_constraints_1d_g_x_waypoint + num_constraints_1d_g_x_p_waypoint + num_constraints_1d_g_dt + 1
    snopt_num_vars_1d = num_decision_vars_1d_X + num_decision_vars_1d_U + num_decision_vars_1d_DT
    snopt_dummy_val = 0.0
    snopt_dummy_array = zeros((1, snopt_num_vars_1d))

    global snopt_major_iter_count
    global snopt_obj_vals

    snopt_major_iter_count = 0
    snopt_obj_vals = -1 * ones((10000, 2))

    X_min = tile(x_min_ti.A1, (1, num_trajectory_samples))
    X_max = tile(x_max_ti.A1, (1, num_trajectory_samples))
    U_min = tile(u_min_ti.A1, (1, num_trajectory_samples))
    U_max = tile(u_max_ti.A1, (1, num_trajectory_samples))
    DT_min = tile(dt_min_ti, (1, num_trajectory_samples))
    DT_max = tile(dt_max_ti, (1, num_trajectory_samples))

    Alpha_min = hstack([matrix(X_min).A1, matrix(U_min).A1, matrix(DT_min).A1])
    Alpha_max = hstack([matrix(X_max).A1, matrix(U_max).A1, matrix(DT_max).A1])

    X_0 = x_feasible
    U_0 = u_feasible
    DT_0 = dt_feasible * ones(num_trajectory_samples)
    Alpha_0 = hstack([matrix(X_0).A1, matrix(U_0).A1, matrix(DT_0).A1])

    print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Calculating objective value on initial guess..."
    _obj_func(Alpha_0)

    J_g_1d_min = hstack([
        -inf,
        zeros(num_constraints_1d_g_dynamics),
        zeros(num_constraints_1d_g_x_waypoint),
        zeros(num_constraints_1d_g_x_p_waypoint),
        zeros(num_constraints_1d_g_dt), snopt_dummy_val
    ])
    J_g_1d_max = hstack([
        inf,
        zeros(num_constraints_1d_g_dynamics),
        zeros(num_constraints_1d_g_x_waypoint),
        zeros(num_constraints_1d_g_x_p_waypoint),
        zeros(num_constraints_1d_g_dt), snopt_dummy_val
    ])

    dJ_dAlpha_dg_dAlpha_const = r_[zeros(
        (snopt_num_funcs_1d, snopt_num_vars_1d)), snopt_dummy_array]
    dJ_dAlpha_dg_dAlpha_const[-1, 0] = 10e-9

    dJ_dAlpha_nonzero, dg_dAlpha_nonzero, fail = _grad_func(
        Alpha_0, J=None, g_1d=None, compute_nonzero_only=True)

    dJ_dAlpha_dg_dAlpha_nonzero = r_[dJ_dAlpha_nonzero, dg_dAlpha_nonzero,
                                     snopt_dummy_array]
    dJ_dAlpha_dg_dAlpha_nonzero_inds = dJ_dAlpha_dg_dAlpha_nonzero.nonzero()

    print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Solving optimization problem..."
    sys_time_begin = time.time()

    snopt.snopta(name="quadrotor_3d_fixed_path_optimization_test",
                 usrfun=_obj_grad_func,
                 x0=Alpha_0,
                 xlow=Alpha_min,
                 xupp=Alpha_max,
                 Flow=J_g_1d_min,
                 Fupp=J_g_1d_max,
                 ObjRow=snopt_obj_row,
                 A=dJ_dAlpha_dg_dAlpha_const,
                 G=dJ_dAlpha_dg_dAlpha_nonzero,
                 xnames=None,
                 Fnames=None)

    sys_time_end = time.time()
    print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Finished solving optimization problem (%.03f seconds)." % (
        sys_time_end - sys_time_begin)

    solver_time_end = sys_time_end
    solver_time = solver_time_end - solver_time_begin
    print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Total solver time was %.03f seconds." % solver_time

    Alpha_opt_1d = snopt.x
    X_opt, U_opt, DT_opt = _unpack_Alpha_1d(Alpha_opt_1d)

    dt_opt_cumsum = cumsum(DT_opt[:-1])
    t_opt = hstack(
        [t_numerically_stable[0], t_numerically_stable[0] + dt_opt_cumsum])
    T_final_opt = dt_opt_cumsum[-1]

    return X_opt, U_opt, DT_opt, t_opt, T_final_opt, solver_time, snopt_obj_vals
Example #6
0
    sum = x[0] + x[1] + x[2]
    # Nonlinear objective term only
    fObj = 0.0
    if mode == 0 or mode == 2:
        fObj = sum**2

    gObj = np.zeros(nnObj, float)
    if mode == 1 or mode == 2:
        gObj[0] = 2.0 * sum
        gObj[1] = 2.0 * sum
        gObj[2] = 2.0 * sum

    return mode, fObj, gObj


snoptb = SNOPT_solver()
inf = 1.0e+20

snoptb.setOption('Infinite bound', inf)
snoptb.setOption('Print file', 'sntoyb.out')

m = 4
n = 4

nnCon = 2
nnJac = 2
nnObj = 3

# J contains the sparsity pattern of the Jacobian matrix.
# For nonlinear elements, enter any nonzero number (in this case 100).
# Linear elements must be correctly defined.
Example #7
0
"""

import numpy as np
import scipy.sparse as sp
from optimize.snopt7 import SNOPT_solver


def dieta_fun(status, x, needF, needG, cu, iu, ru):
    # LP has no nonlinear terms in the objective
    F = []
    G = []
    return status, F, G


inf = 1.0e20
snopt = SNOPT_solver()

snopt.setOption('Print file', 'dieta.out')
snopt.setOption('Minor print level', 1)
snopt.setOption('Summary frequency', 1)
snopt.setOption('Print frequency', 1)

nF = 4
ObjRow = 4
n = 6

# We provide the linear components of the Jacobian
# matrix as a dense matrix.
A = np.array([[110, 205, 160, 160, 420, 260], [4, 32, 13, 8, 4, 14],
              [2, 12, 54, 285, 22, 80], [3, 24, 13, 9, 20, 19]], float)
Example #8
0
    gObj[0] =   x[6]
    gObj[1] = - x[5]
    gObj[2] = - x[6] + x[7]
    gObj[3] =   x[8]
    gObj[4] = - x[7]
    gObj[5] = - x[1]
    gObj[6] = - x[2] + x[0]
    gObj[7] = - x[4] + x[2]
    gObj[8] =   x[3]

    return mode, fObj, gObj


inf      = 1.0e+20

snoptb   = SNOPT_solver()
snoptb.setOption('Infinite bound',inf)
snoptb.setOption('Specs file','snmainb.spc')
snoptb.setOption('Print file','snmainb.out')

m      = 18
n      = 9
nnCon  = 14
ne     = 52
nnJac  = n
nnObj  = n

bl     = -inf*np.ones(n+m)
bu     =  inf*np.ones(n+m)

# Nonlinear constraints
Example #9
0
    return status, F


def sntoya_objFG(status,x,needF,needG,cu,iu,ru):
    F = np.array([                      x[1], # objective row
                   x[0]**2        + 4.0*x[1]**2,
                  (x[0] - 2.0)**2 +     x[1]**2 ])

    G = np.array([ 2*x[0], 8*x[1], 2*(x[0]-2), 2*x[1] ])
    return status, F, G



inf   = 1.0e20

snopt = SNOPT_solver()

snopt.setOption('Verbose',True)
snopt.setOption('Solution print',True)
snopt.setOption('Print file','sntoya.out')


# Either dtype works, but the names for x and F have to be of
# the correct length, else they are both ignored by SNOPT:
xNames  = np.array([ '      x0', '      x1' ])
FNames  = np.array([ '      F0', '      F1', '      F2' ],dtype='c')

x0      = np.array([ 1.0, 1.0 ])

xlow    = np.array([ 0.0, -inf])
xupp    = np.array([ inf,  inf])
Example #10
0
    gObj[0] = x[6]
    gObj[1] = -x[5]
    gObj[2] = -x[6] + x[7]
    gObj[3] = x[8]
    gObj[4] = -x[7]
    gObj[5] = -x[1]
    gObj[6] = -x[2] + x[0]
    gObj[7] = -x[4] + x[2]
    gObj[8] = x[3]

    return mode, fObj, gObj


inf = 1.0e+20

snoptb = SNOPT_solver()
snoptb.setOption('Infinite bound', inf)
snoptb.setOption('Specs file', 'snmainb.spc')
snoptb.setOption('Print file', 'snmainb.out')

m = 18
n = 9
nnCon = 14
ne = 52
nnJac = n
nnObj = n

bl = -inf * np.ones(n + m)
bu = inf * np.ones(n + m)

# Nonlinear constraints
def optimize_blending_function_between_two_distance_sigmas(
        sigmaA,
        sigmaB,
        personA,
        personB,
        min_distA,
        min_distB,
        params,
        constrain_at_endpoints=False):
    '''
    This function finds a blend between two trajectories that respect two minimum distance constraints.
    '''
    # Some important parameters here
    nsamples = params['nsamples'] if 'nsamples' in params else 50
    ndims = 6

    dt = 0.01
    xdot5_limit = 0.001

    inf = 1.0e20

    lambda_snap = 1  #(1/dt)**4 # snap must be scaled down to be comparable to position.
    lambda_pos = 1

    # A few derived quantities
    nvars = ndims * nsamples
    nconstraints_continuity = (ndims - 1) * nsamples
    nconstraints_obstacles = 2 * nsamples
    nconstraints = 1 + nconstraints_continuity + nconstraints_obstacles

    # Solver configuration
    snopt = SNOPT_solver()
    snopt.setOption('Verbose', False)
    snopt.setOption('Solution print', False)
    snopt.setOption('Print file', 'test5.out')
    snopt.setOption('Iteration limit', 8000)
    snopt.setOption('Print level', 3)
    snopt.setOption('Major optimality', 2e-6)
    snopt.setOption('Verify level',
                    3)  # Turn to 3 to carefully check gradiants

    # 1. Set up decision variables
    x = np.array([0.5] * nsamples)  # Initialize to 0.5
    xdot1 = np.array([0.0] * nsamples)
    xdot2 = np.array([0.0] * nsamples)
    xdot3 = np.array([0.0] * nsamples)
    xdot4 = np.array([0.0] * nsamples)
    v = np.array([0.0] * nsamples)  # C4 Continuity Control Variable

    x0 = np.matrix(np.c_[x, xdot1, xdot2, xdot3, xdot4,
                         v]).A1  # Interleave [x[0],xdot1[0],xdot2[0]...]

    # 2. Set up the bounds on x
    low_x = np.array([0.0] * nsamples)  # X must be greater or equal to 0
    low_xdot1 = np.array([-inf] * nsamples)
    low_xdot2 = np.array([-inf] * nsamples)
    low_xdot3 = np.array([-inf] * nsamples)
    low_xdot4 = np.array([-inf] * nsamples)
    low_v = np.array([-xdot5_limit] *
                     nsamples)  # Bound control variable arbitrarily
    if constrain_at_endpoints:
        low_x[0] = 0.5
        low_x[nsamples - 1] = 0.5
    xlow = np.matrix(np.c_[low_x, low_xdot1, low_xdot2, low_xdot3, low_xdot4,
                           low_v]).A1  # Interleave [x[0],xdot1[0],xdot2[0]...]

    upp_x = np.array([1.0] * nsamples)  # X must be greater or equal to 0
    upp_xdot1 = np.array([inf] * nsamples)
    upp_xdot2 = np.array([inf] * nsamples)
    upp_xdot3 = np.array([inf] * nsamples)
    upp_xdot4 = np.array([inf] * nsamples)
    upp_v = np.array([xdot5_limit] *
                     nsamples)  # Bound control variable arbitrarily
    if constrain_at_endpoints:
        upp_x[0] = 0.5
        upp_x[nsamples - 1] = 0.5
    xupp = np.matrix(np.c_[upp_x, upp_xdot1, upp_xdot2, upp_xdot3, upp_xdot4,
                           upp_v]).A1  # Interleave [x[0],xdot1[0],xdot2[0]...]

    # 3. Set up the objective function
    M = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0],
                  [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])

    N = np.array([0, 0, 0, 0, 1])

    def grad_function(xM, compute_nonzero_only=False, compute_linear=False):
        G = np.zeros((nconstraints, nvars))

        # Set up the jacobian structure of the cost function.
        # This only impacts the w_i and wdot4_i variables
        obj_col = G[0, :]
        if not compute_nonzero_only:
            obj_col[::6] = 2 * dt * lambda_pos * (xM[:, 0] - 0.5)
            obj_col[4::6] = 2 * dt * lambda_snap * xM[:, 4]
        elif not compute_linear:
            obj_col[::6] = 1
            obj_col[4::6] = 1

        if compute_linear:
            # The C4 continuity constraint is linear
            stupidcounter = 0
            add_to_fi = 0
            for fi in range(1, nconstraints_continuity -
                            5):  # Looping over the objective function
                fi_row = G[fi, :]

                fi += add_to_fi

                fi_row[fi - 1] = 1
                fi_row[fi] = dt
                fi_row[fi + 5] = -1

                stupidcounter += 1
                if stupidcounter == 5:
                    add_to_fi += 1
                    stupidcounter = 0

        return G

    def calc_obj(xM):
        # our objective is the sum of
        # the L2 norm of our position error away from 0.5
        # the L2 norm of our 4th derivative error away from 0
        obj_pos = dt * np.sum((xM[:, 0] - 0.5)**2)
        obj_snap = dt * np.sum((xM[:, 4])**2)
        objective = lambda_pos * obj_pos + lambda_snap * obj_snap
        return (objective, obj_pos, obj_snap)

    def calc_obstacle_constraints(xM):
        blend = xM[:, 0]
        sigmaBlended = (blend[:, np.newaxis] * sigmaA +
                        (1 - blend)[:, np.newaxis] * sigmaB)
        constraintA = la.norm(sigmaBlended - personA, axis=1) - min_distA
        constraintB = la.norm(sigmaBlended - personB, axis=1) - min_distB
        return np.r_[constraintA, constraintB]

    def blend_test3_objFG(status, x, needF, needG, cu, iu, ru):

        xM = x.reshape(nsamples, ndims)

        objective, obj_pos, obj_snap = calc_obj(xM)

        # Evaluate the current continuity constraints
        continuity_x = np.zeros((nsamples, 5))
        for i in range(nsamples - 1):
            si = xM[i, 0:5]
            vi = xM[i, 5]
            si1 = xM[i + 1, 0:5]
            continuity_x[i] = si + (M.dot(si) + N.dot(vi)) * dt - si1
        continuity_x = np.matrix(continuity_x).A1

        obstacles = calc_obstacle_constraints(xM)

        F = np.concatenate(([objective], continuity_x, obstacles))

        #G = grad_function(xM)

        return status, F  #, G[G_nonzero_inds]

    # 4. Set up bounds on F
    # [ objectivec can be anything, equal-to-zero for continuity, greater-than-0 for obstacles along traj]
    low_F = np.concatenate(
        ([-inf], np.array([0, 0, 0, 0, 0] * nsamples), [0, 0] * nsamples))
    upp_F = np.concatenate(
        ([inf], np.array([0, 0, 0, 0, 0] * nsamples), [inf, inf] * nsamples))

    # Matrix uses fortran numbering or something
    ObjRow = 1

    # Set up the linear and nonlinear structure of the jacobian matrix
    xM = x0.reshape(nsamples, ndims)
    G = grad_function(xM, compute_nonzero_only=True, compute_linear=False)
    G_nonzero_inds = G.nonzero()
    A = grad_function(xM, compute_nonzero_only=True, compute_linear=True)

    # Now we solve
    a = time.time()

    snopt.snopta(name='blend_test3',
                 usrfun=blend_test3_objFG,
                 x0=x0,
                 xlow=xlow,
                 xupp=xupp,
                 Flow=low_F,
                 Fupp=upp_F,
                 ObjRow=ObjRow)
    b = time.time()

    print "Solved in %.4fs" % (b - a)

    print "Value of objective function: %.8f" % snopt.F[0]
    print "   lambda_pos: %f, lambda_snap: %f, " % (lambda_pos, lambda_snap)
    print "   objective: %f, obj_pos: %f, obj_snap: %f" % calc_obj(xM)

    xM = snopt.x.reshape(nsamples, ndims)
    return (xM, snopt)
def optimize(p_eval,psi_eval,                            \
             t_nominal,user_progress_nominal,dt_nominal, \
             const_vals_ti,                              \
             x_min_ti,x_max_ti,                          \
             u_min_ti,u_max_ti):

    assert allclose(psi_eval,0.0)

    print "flashlight.trajectory_optimization.quadrotor3d_direct_transcription_nonconst_dt: Initializing optimization problem..."
    sys_time_begin    = time.time()
    solver_time_begin = sys_time_begin

    #
    # find numerically stable and feasible trajectories to initialize the solver
    #
    numerically_stable_infeasible_trajectory = quadrotor3d_gaussian_time_stretch.optimize_numerically_stable_infeasible( p_eval,psi_eval,                               \
                                                                                                                         t_nominal,user_progress_nominal,dt_nominal,    \
                                                                                                                         x_min_ti,x_max_ti,                             \
                                                                                                                         u_min_ti,u_max_ti,                             \
                                                                                                                         max_stretch_iters_numerically_stable,          \
                                                                                                                         gauss_width_in_terms_of_dt_numerically_stable, \
                                                                                                                         gauss_max_in_terms_of_dt_numerically_stable,   \
                                                                                                                         0 )

    x_numerically_stable,u_numerically_stable,t_numerically_stable,user_progress_numerically_stable,dt_numerically_stable = numerically_stable_infeasible_trajectory

    if use_gaussian_time_stretching_for_feasible:

        # use gaussian time stretching to find a feasible trajectory
        feasible_trajectory = quadrotor3d_gaussian_time_stretch.optimize_feasible( p_eval,psi_eval,                                                             \
                                                                                   t_numerically_stable,user_progress_numerically_stable,dt_numerically_stable, \
                                                                                   x_min_ti,x_max_ti,                                                           \
                                                                                   u_min_ti,u_max_ti,                                                           \
                                                                                   max_stretch_iters_feasible,                                                  \
                                                                                   gauss_width_in_terms_of_dt_feasible,                                         \
                                                                                   gauss_max_in_terms_of_dt_feasible,                                           \
                                                                                   extra_iters_feasible )

        x_feasible,u_feasible,t_feasible,user_progress_feasible,dt_feasible = feasible_trajectory

    else:

        # use uniform time stretching to find a feasible trajectory
        p_nominal, _, _, _   = curveutils.reparameterize_curve( p_eval, user_progress_nominal )
        psi_nominal, _, _, _ = curveutils.reparameterize_curve( psi_eval, user_progress_nominal )

        feasible_trajectory = quadrotor3d_uniform_time_stretch.optimize_feasible( p_nominal,psi_nominal,dt_nominal, \
                                                                                  x_min_ti,x_max_ti,                \
                                                                                  u_min_ti,u_max_ti,                \
                                                                                  max_bin_search_iters_feasible,    \
                                                                                  dt_upper_init_feasible )

        x_feasible,u_feasible,dt_scale_feasible = feasible_trajectory
        t_feasible                              = t_nominal*dt_scale_feasible*dt_scale_extra_stretch_feasible
        user_progress_feasible                  = user_progress_nominal
        dt_feasible                             = dt_nominal*dt_scale_feasible*dt_scale_extra_stretch_feasible

    # return user_progress_numerically_stable,None,None,None,None,t_numerically_stable,t_numerically_stable[-1]
    # return user_progress_feasible,None,None,None,None,t_feasible,t_feasible[-1]

    sys_time_end = time.time()
    print "flashlight.optimize.quadrotor3d_fixed_path: Finished initializing optimization problem (%.03f seconds)." % (sys_time_end - sys_time_begin)

    #
    # set up optimization problem constants
    #
    num_trajectory_samples = p_eval.shape[0]
    num_x_dims             = quadrotor3d.num_x_dims
    num_u_dims             = quadrotor3d.num_u_dims
    num_dt_dims            = 1
    num_alpha_dims         = num_x_dims + num_u_dims + num_dt_dims
    x_p_inds               = arange(0,3)
    x_e_inds               = arange(3,6)
    num_x_p_inds           = x_p_inds.size

    # soft control effort constraints
    lamb_J_control_effort = 0.0*ones(num_trajectory_samples)

    # soft position waypoint constraints
    num_dims_J_x_p_waypoint_ref_ti = 3
    lamb_J_x_p_waypoint            = 0.01*ones(num_trajectory_samples)
    J_x_p_waypoint_ref             = x_numerically_stable[:,0:3]

    # soft dt constraints
    num_dims_J_dt_ref_ti = 1
    lamb_J_dt            = 0.0001*ones(num_trajectory_samples)
    J_dt_ref             = dt_numerically_stable*ones(num_trajectory_samples)

    # hard dynamics constraints
    num_dims_g_dynamics_ti = num_x_dims

    # hard state space waypoint constraints
    num_dims_g_x_waypoint_ti   = num_x_dims
    num_dims_x_waypoint_ref_ti = num_x_dims
    lamb_g_x_waypoint          = zeros(num_trajectory_samples)
    lamb_g_x_waypoint[[0,-1]]  = 1
    X_waypoint_ref             = zeros((num_trajectory_samples,num_dims_x_waypoint_ref_ti))
    X_waypoint_ref[0]          = array([ p_eval[0,0],  p_eval[0,1],  p_eval[0,2],  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ])
    X_waypoint_ref[-1]         = array([ p_eval[-1,0], p_eval[-1,1], p_eval[-1,2], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ])

    lamb_g_x_waypoint_nonzero                                     = nonzero(lamb_g_x_waypoint)[0]
    num_lamb_g_x_waypoint_nonzero                                 = len(lamb_g_x_waypoint_nonzero)
    lamb_g_x_waypoint_ti_to_ti_sparse                             = -1*ones_like(lamb_g_x_waypoint,dtype=int32)
    lamb_g_x_waypoint_ti_to_ti_sparse[lamb_g_x_waypoint_nonzero]  = arange(num_lamb_g_x_waypoint_nonzero)

    # hard position waypoint constraints
    num_dims_g_x_p_waypoint_ti   = num_x_p_inds
    num_dims_x_p_waypoint_ref_ti = num_x_p_inds
    lamb_g_x_p_waypoint          = zeros(num_trajectory_samples)
    X_p_waypoint_ref             = zeros((num_trajectory_samples,num_dims_x_p_waypoint_ref_ti))

    lamb_g_x_p_waypoint_nonzero                                      = nonzero(lamb_g_x_p_waypoint)[0]
    num_lamb_g_x_p_waypoint_nonzero                                  = len(lamb_g_x_p_waypoint_nonzero)
    lamb_g_x_p_waypoint_ti_to_ti_sparse                              = -1*ones_like(lamb_g_x_p_waypoint,dtype=int32)
    lamb_g_x_p_waypoint_ti_to_ti_sparse[lamb_g_x_p_waypoint_nonzero] = arange(num_lamb_g_x_p_waypoint_nonzero)

    # hard dt constraints
    num_dims_g_dt_ti   = 1
    num_dims_dt_ref_ti = 1
    lamb_g_dt          = zeros(num_trajectory_samples)
    dt_ref             = zeros(num_trajectory_samples)

    lamb_g_dt_nonzero                            = nonzero(lamb_g_dt)[0]
    num_lamb_g_dt_nonzero                        = len(lamb_g_dt_nonzero)
    lamb_g_dt_ti_to_ti_sparse                    = -1*ones_like(lamb_g_dt,dtype=int32)
    lamb_g_dt_ti_to_ti_sparse[lamb_g_dt_nonzero] = arange(num_lamb_g_dt_nonzero)

    dt_min_ti = dt_numerically_stable*0.45
    dt_max_ti = dt_feasible*1.55

    # stack all the const, lamb, and ref values
    const_vals    = tile(const_vals_ti,(num_trajectory_samples,1))
    lamb_vals     = c_[ lamb_J_control_effort, lamb_J_x_p_waypoint, lamb_J_dt, lamb_g_x_waypoint, lamb_g_x_p_waypoint, lamb_g_dt ]
    ref_vals      = c_[ J_x_p_waypoint_ref, J_dt_ref, X_waypoint_ref, X_p_waypoint_ref, dt_ref ]

    # number of constraints and decision variables
    num_constraints_g_dynamics = num_trajectory_samples-1
    num_dims_g_dynamics_ti     = num_x_dims

    num_constraints_1d_g_dynamics     = num_constraints_g_dynamics*num_dims_g_dynamics_ti
    num_constraints_1d_g_x_waypoint   = num_lamb_g_x_waypoint_nonzero*num_dims_g_x_waypoint_ti
    num_constraints_1d_g_x_p_waypoint = num_lamb_g_x_p_waypoint_nonzero*num_dims_g_x_p_waypoint_ti
    num_constraints_1d_g_dt           = num_lamb_g_dt_nonzero*num_dims_g_dt_ti

    num_decision_vars_1d_X  = num_trajectory_samples*num_x_dims
    num_decision_vars_1d_U  = num_trajectory_samples*num_u_dims
    num_decision_vars_1d_DT = num_trajectory_samples*num_dt_dims

    def _unpack_Alpha_1d(Alpha_1d):

        X_1d_begin,X_1d_end   = 0,        0        + num_trajectory_samples*num_x_dims
        U_1d_begin,U_1d_end   = X_1d_end, X_1d_end + num_trajectory_samples*num_u_dims
        DT_1d_begin,DT_1d_end = U_1d_end, U_1d_end + num_trajectory_samples*num_dt_dims

        X_1d  = Alpha_1d[X_1d_begin:X_1d_end]
        U_1d  = Alpha_1d[U_1d_begin:U_1d_end]
        DT_1d = Alpha_1d[DT_1d_begin:DT_1d_end]
        X     = X_1d.reshape((num_trajectory_samples,num_x_dims))
        U     = U_1d.reshape((num_trajectory_samples,num_u_dims))
        DT    = DT_1d.reshape((num_trajectory_samples,num_dt_dims))

        return X,U,DT

    def _compute_common_vals(ti,X,U,DT):

        lamb_J_control_effort_ti = lamb_J_control_effort[ti]
        lamb_J_x_p_waypoint_ti   = lamb_J_x_p_waypoint[ti]
        lamb_J_dt_ti             = lamb_J_dt[ti]
        lamb_g_x_waypoint_ti     = lamb_g_x_waypoint[ti]
        lamb_g_x_p_waypoint_ti   = lamb_g_x_p_waypoint[ti]
        lamb_g_dt_ti             = lamb_g_dt[ti]
        J_x_p_waypoint_ref_ti    = matrix(J_x_p_waypoint_ref[ti]).T
        J_dt_ref_ti              = J_dt_ref[ti]
        x_waypoint_ref_ti        = matrix(X_waypoint_ref[ti]).T
        x_p_waypoint_ref_ti      = matrix(X_p_waypoint_ref[ti]).T
        dt_ref_ti                = dt_ref[ti]
        x_ti                     = matrix(X[ti]).T
        u_ti                     = matrix(U[ti]).T
        dt_ti                    = DT[ti]

        lamb_vals_ti   = hstack( [ lamb_J_control_effort_ti, lamb_J_x_p_waypoint_ti, lamb_J_dt_ti, lamb_g_x_waypoint_ti, lamb_g_x_p_waypoint_ti, lamb_g_dt_ti ] )
        ref_vals_ti    = hstack( [ matrix(J_x_p_waypoint_ref_ti).A1, J_dt_ref_ti, matrix(x_waypoint_ref_ti).A1, matrix(x_p_waypoint_ref_ti).A1, dt_ref_ti ] )
        var_vals_ti    = hstack( [ x_ti.A1, u_ti.A1, dt_ti ] )
        common_vals_ti = hstack( [ lamb_vals_ti, ref_vals_ti, var_vals_ti ] )

        return common_vals_ti

    def _compute_sparse_jacobian_indices(ti,ti_to_ti_sparse,num_dims_gi):

        ti_sparse = ti_to_ti_sparse[ti]
        gi_begin  = (ti_sparse+0)*num_dims_gi
        gi_end    = (ti_sparse+1)*num_dims_gi
        xi_begin  = (ti+0)*num_x_dims
        xi_end    = (ti+1)*num_x_dims
        ui_begin  = (ti+0)*num_u_dims
        ui_end    = (ti+1)*num_u_dims
        dti_begin = (ti+0)*num_dt_dims
        dti_end   = (ti+1)*num_dt_dims

        return gi_begin,gi_end,xi_begin,xi_end,ui_begin,ui_end,dti_begin,dti_end

    # Define objective function
    def _obj_func(Alpha_1d):

        global snopt_major_iter_count
        global snopt_obj_vals

        X,U,DT = _unpack_Alpha_1d(Alpha_1d)

        common_vals                                                  = c_[ lamb_vals, ref_vals, X, U, DT ]
        const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals = c_[ const_vals[:-1], X[:-1], X[1:], U[:-1], DT[:-1] ]

        J_ti       = J_ti_vectorized_autowrap(common_vals)
        g_dynamics = quadrotor3d.g_dynamics_ti_vectorized_autowrap(const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals)

        g_x_waypoint   = zeros((num_lamb_g_x_waypoint_nonzero,   num_dims_g_x_waypoint_ti))
        g_x_p_waypoint = zeros((num_lamb_g_x_p_waypoint_nonzero, num_dims_g_x_p_waypoint_ti))
        g_dt           = zeros((num_lamb_g_dt_nonzero,           num_dims_g_dt_ti))

        for ti in range(num_trajectory_samples):

            common_vals_ti = _compute_common_vals(ti,X,U,DT)

            lamb_g_x_waypoint_ti   = lamb_g_x_waypoint[ti]
            lamb_g_x_p_waypoint_ti = lamb_g_x_p_waypoint[ti]
            lamb_g_dt_ti           = lamb_g_dt[ti]

            if lamb_g_x_waypoint_ti   != 0: g_x_waypoint[lamb_g_x_waypoint_ti_to_ti_sparse[ti]]     = sympyutils.evaluate_anon_func( g_x_waypoint_ti_autowrap,   common_vals_ti ).T
            if lamb_g_x_p_waypoint_ti != 0: g_x_p_waypoint[lamb_g_x_p_waypoint_ti_to_ti_sparse[ti]] = sympyutils.evaluate_anon_func( g_x_p_waypoint_ti_autowrap, common_vals_ti ).T
            if lamb_g_dt_ti           != 0: g_dt[lamb_g_dt_ti_to_ti_sparse[ti]]                     = sympyutils.evaluate_anon_func( g_dt_ti_autowrap,           common_vals_ti )

        J = sum(J_ti)

        g_1d = hstack( [ matrix(g_dynamics).A1, matrix(g_x_waypoint).A1, matrix(g_x_p_waypoint).A1, matrix(g_dt).A1 ] )

        snopt_obj_vals[snopt_major_iter_count,0] = J
        snopt_obj_vals[snopt_major_iter_count,1] = sum(norm(g_dynamics,axis=1))
        snopt_major_iter_count                   = snopt_major_iter_count+1

        set_printoptions(suppress=True)
        print "SNOPT major iteration: %d, Objective value: %f, Total g_dynamics error: %f" % (snopt_major_iter_count,J,sum(square(g_dynamics)))

        fail = 0
        return J, g_1d, fail

    # Define gradient function
    def _grad_func(Alpha_1d, J, g_1d, compute_nonzero_only=False):

        X,U,DT = _unpack_Alpha_1d(Alpha_1d)

        dJ_dX  = zeros((num_trajectory_samples,num_x_dims))
        dJ_dU  = zeros((num_trajectory_samples,num_u_dims))
        dJ_dDT = zeros((num_trajectory_samples,num_dt_dims))

        dgdynamics_dX  = zeros((num_constraints_1d_g_dynamics,num_decision_vars_1d_X))
        dgdynamics_dU  = zeros((num_constraints_1d_g_dynamics,num_decision_vars_1d_U))
        dgdynamics_dDT = zeros((num_constraints_1d_g_dynamics,num_decision_vars_1d_DT))

        dgxwaypoint_dX  = zeros((num_constraints_1d_g_x_waypoint,num_decision_vars_1d_X))
        dgxwaypoint_dU  = zeros((num_constraints_1d_g_x_waypoint,num_decision_vars_1d_U))
        dgxwaypoint_dDT = zeros((num_constraints_1d_g_x_waypoint,num_decision_vars_1d_DT))

        dgxpwaypoint_dX  = zeros((num_constraints_1d_g_x_p_waypoint,num_decision_vars_1d_X))
        dgxpwaypoint_dU  = zeros((num_constraints_1d_g_x_p_waypoint,num_decision_vars_1d_U))
        dgxpwaypoint_dDT = zeros((num_constraints_1d_g_x_p_waypoint,num_decision_vars_1d_DT))

        dgdt_dX  = zeros((num_constraints_1d_g_dt,num_decision_vars_1d_X))
        dgdt_dU  = zeros((num_constraints_1d_g_dt,num_decision_vars_1d_U))
        dgdt_dDT = zeros((num_constraints_1d_g_dt,num_decision_vars_1d_DT))

        if not compute_nonzero_only:
            
            common_vals                                                  = c_[ lamb_vals, ref_vals, X, U, DT ]
            const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals = c_[ const_vals[:-1], X[:-1], X[1:], U[:-1], DT[:-1] ]

            dJ_dX  = dJti_dxti_vectorized_autowrap(common_vals)
            dJ_dU  = dJti_duti_vectorized_autowrap(common_vals)
            dJ_dDT = dJti_ddtti_vectorized_autowrap(common_vals)

            dgdynamics_dX_current_block  = quadrotor3d.dgdynamicsti_dxcurrent_vectorized_autowrap(const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals)
            dgdynamics_dX_next_block     = quadrotor3d.dgdynamicsti_dxnext_vectorized_autowrap(const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals)
            dgdynamics_dU_current_block  = quadrotor3d.dgdynamicsti_ducurrent_vectorized_autowrap(const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals)
            dgdynamics_dDT_current_block = quadrotor3d.dgdynamicsti_ddtcurrent_vectorized_autowrap(const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals)

        for ti in range(num_trajectory_samples):

            if compute_nonzero_only:
                dJ_dX[ti]  = 1
                dJ_dU[ti]  = 1
                dJ_dDT[ti] = 1

        for ti in range(num_constraints_g_dynamics):

            gi_begin = (ti+0)*num_dims_g_dynamics_ti
            gi_end   = (ti+1)*num_dims_g_dynamics_ti

            ai_x_current_begin  = (ti+0)*num_x_dims
            ai_x_current_end    = (ti+1)*num_x_dims
            ai_x_next_begin     = (ti+1)*num_x_dims
            ai_x_next_end       = (ti+2)*num_x_dims
            ai_u_current_begin  = (ti+0)*num_u_dims
            ai_u_current_end    = (ti+1)*num_u_dims
            ai_dt_current_begin = (ti+0)*num_dt_dims
            ai_dt_current_end   = (ti+1)*num_dt_dims

            if compute_nonzero_only:
                dgdynamics_dX[gi_begin:gi_end,ai_x_current_begin:ai_x_current_end]    = 1
                dgdynamics_dX[gi_begin:gi_end,ai_x_next_begin:ai_x_next_end]          = 1
                dgdynamics_dU[gi_begin:gi_end,ai_u_current_begin:ai_u_current_end]    = 1
                dgdynamics_dDT[gi_begin:gi_end,ai_dt_current_begin:ai_dt_current_end] = 1
            else:
                dgdynamics_dX[gi_begin:gi_end,ai_x_current_begin:ai_x_current_end]    = dgdynamics_dX_current_block[ti]
                dgdynamics_dX[gi_begin:gi_end,ai_x_next_begin:ai_x_next_end]          = dgdynamics_dX_next_block[ti]
                dgdynamics_dU[gi_begin:gi_end,ai_u_current_begin:ai_u_current_end]    = dgdynamics_dU_current_block[ti]
                dgdynamics_dDT[gi_begin:gi_end,ai_dt_current_begin:ai_dt_current_end] = matrix(dgdynamics_dDT_current_block[ti]).T

        for ti in range(num_trajectory_samples):

            common_vals_ti = _compute_common_vals(ti,X,U,DT)

            lamb_g_x_waypoint_ti   = lamb_g_x_waypoint[ti]
            lamb_g_x_p_waypoint_ti = lamb_g_x_p_waypoint[ti]
            lamb_g_dt_ti           = lamb_g_dt[ti]

            if lamb_g_x_waypoint_ti != 0:

                gi_begin,gi_end,xi_begin,xi_end,ui_begin,ui_end,li_begin,li_end = _compute_sparse_jacobian_indices(ti,lamb_g_x_waypoint_ti_to_ti_sparse,num_dims_g_x_waypoint_ti)
                dgxwaypoint_dX[gi_begin:gi_end,xi_begin:xi_end]                 = sympyutils.evaluate_anon_func( dgxwaypointti_dxti_autowrap, common_vals_ti )

            if lamb_g_x_p_waypoint_ti != 0:

                gi_begin,gi_end,xi_begin,xi_end,ui_begin,ui_end,li_begin,li_end = _compute_sparse_jacobian_indices(ti,lamb_g_x_p_waypoint_ti_to_ti_sparse,num_dims_g_x_p_waypoint_ti)
                dgxpwaypoint_dX[gi_begin:gi_end,xi_begin:xi_end]                = sympyutils.evaluate_anon_func( dgxpwaypointti_dxti_autowrap, common_vals_ti )

            if lamb_g_dt_ti != 0:

                gi_begin,gi_end,xi_begin,xi_end,ui_begin,ui_end,dti_begin,dti_end = _compute_sparse_jacobian_indices(ti,lamb_g_dt_ti_to_ti_sparse,num_dims_g_dt_ti)
                dgdt_dDT[gi_begin:gi_end,dti_begin:dti_end]                       = sympyutils.evaluate_anon_func( dgdtti_ddtti_autowrap, common_vals_ti )

        dJ_dAlpha_1d = hstack( [ matrix(dJ_dX).A1, matrix(dJ_dU).A1, matrix(dJ_dDT).A1 ] )

        dgdynamics_dAlpha   = c_[ dgdynamics_dX,   dgdynamics_dU,   dgdynamics_dDT   ]
        dgxwaypoint_dAlpha  = c_[ dgxwaypoint_dX,  dgxwaypoint_dU,  dgxwaypoint_dDT  ]
        dgxpwaypoint_dAlpha = c_[ dgxpwaypoint_dX, dgxpwaypoint_dU, dgxpwaypoint_dDT ]
        dgdt_dAlpha         = c_[ dgdt_dX,         dgdt_dU,         dgdt_dDT ]

        dg_dAlpha = r_[ dgdynamics_dAlpha, dgxwaypoint_dAlpha, dgxpwaypoint_dAlpha, dgdt_dAlpha ]

        fail = 0
        return matrix(dJ_dAlpha_1d).A, dg_dAlpha, fail

    def _obj_grad_func(status,Alpha_1d,needF,needG,cu,iu,ru):

        J, g_1d, fail                    = _obj_func(Alpha_1d)
        dJ_dAlpha_1d, dg_dAlpha, fail    = _grad_func(Alpha_1d,J,g_1d)
        J_g_1d                           = hstack( [ J, g_1d, snopt_dummy_val ] )
        dJ_dAlpha_dg_dAlpha              = r_[ dJ_dAlpha_1d, dg_dAlpha ]
        dJ_dAlpha_dg_dAlpha_nonzero_vals = dJ_dAlpha_dg_dAlpha[dJ_dAlpha_dg_dAlpha_nonzero_inds]

        return status, J_g_1d, dJ_dAlpha_dg_dAlpha_nonzero_vals

    inf   = 1.0e20
    snopt = SNOPT_solver()

    snopt.setOption('Verbose',False)
    snopt.setOption('Solution print',False)
    snopt.setOption('Major print level',0)
    snopt.setOption('Print level',0)

    snopt_obj_row      = 1
    snopt_num_funcs_1d = num_constraints_1d_g_dynamics + num_constraints_1d_g_x_waypoint + num_constraints_1d_g_x_p_waypoint + num_constraints_1d_g_dt + 1
    snopt_num_vars_1d  = num_decision_vars_1d_X + num_decision_vars_1d_U + num_decision_vars_1d_DT
    snopt_dummy_val    = 0.0
    snopt_dummy_array  = zeros((1,snopt_num_vars_1d))

    global snopt_major_iter_count
    global snopt_obj_vals

    snopt_major_iter_count = 0
    snopt_obj_vals         = -1*ones((10000,2))

    X_min  = tile(x_min_ti.A1,(1,num_trajectory_samples))
    X_max  = tile(x_max_ti.A1,(1,num_trajectory_samples))
    U_min  = tile(u_min_ti.A1,(1,num_trajectory_samples))
    U_max  = tile(u_max_ti.A1,(1,num_trajectory_samples))
    DT_min = tile(dt_min_ti,(1,num_trajectory_samples))
    DT_max = tile(dt_max_ti,(1,num_trajectory_samples))

    Alpha_min = hstack( [ matrix(X_min).A1, matrix(U_min).A1, matrix(DT_min).A1 ] )
    Alpha_max = hstack( [ matrix(X_max).A1, matrix(U_max).A1, matrix(DT_max).A1 ] )

    X_0     = x_feasible
    U_0     = u_feasible
    DT_0    = dt_feasible*ones(num_trajectory_samples)
    Alpha_0 = hstack( [ matrix(X_0).A1, matrix(U_0).A1, matrix(DT_0).A1 ] )

    print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Calculating objective value on initial guess..."        
    _obj_func(Alpha_0)

    J_g_1d_min = hstack( [ -inf, zeros(num_constraints_1d_g_dynamics), zeros(num_constraints_1d_g_x_waypoint), zeros(num_constraints_1d_g_x_p_waypoint), zeros(num_constraints_1d_g_dt), snopt_dummy_val ] )
    J_g_1d_max = hstack( [  inf, zeros(num_constraints_1d_g_dynamics), zeros(num_constraints_1d_g_x_waypoint), zeros(num_constraints_1d_g_x_p_waypoint), zeros(num_constraints_1d_g_dt), snopt_dummy_val ] )

    dJ_dAlpha_dg_dAlpha_const       = r_[ zeros((snopt_num_funcs_1d,snopt_num_vars_1d)), snopt_dummy_array ]
    dJ_dAlpha_dg_dAlpha_const[-1,0] = 10e-9

    dJ_dAlpha_nonzero, dg_dAlpha_nonzero, fail = _grad_func(Alpha_0, J=None, g_1d=None, compute_nonzero_only=True)

    dJ_dAlpha_dg_dAlpha_nonzero      = r_[ dJ_dAlpha_nonzero, dg_dAlpha_nonzero, snopt_dummy_array ]
    dJ_dAlpha_dg_dAlpha_nonzero_inds = dJ_dAlpha_dg_dAlpha_nonzero.nonzero()

    print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Solving optimization problem..."
    sys_time_begin = time.time()

    snopt.snopta(
        name="quadrotor_3d_fixed_path_optimization_test",
        usrfun=_obj_grad_func,
        x0=Alpha_0,
        xlow=Alpha_min,
        xupp=Alpha_max,
        Flow=J_g_1d_min,
        Fupp=J_g_1d_max,
        ObjRow=snopt_obj_row,
        A=dJ_dAlpha_dg_dAlpha_const,
        G=dJ_dAlpha_dg_dAlpha_nonzero,
        xnames=None,
        Fnames=None)

    sys_time_end = time.time()
    print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Finished solving optimization problem (%.03f seconds)." % (sys_time_end - sys_time_begin)

    solver_time_end = sys_time_end
    solver_time     = solver_time_end - solver_time_begin
    print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Total solver time was %.03f seconds." % solver_time

    Alpha_opt_1d       = snopt.x
    X_opt,U_opt,DT_opt = _unpack_Alpha_1d(Alpha_opt_1d)

    dt_opt_cumsum = cumsum(DT_opt[:-1])
    t_opt         = hstack( [ t_numerically_stable[0], t_numerically_stable[0] + dt_opt_cumsum ] )
    T_final_opt   = dt_opt_cumsum[-1]

    return X_opt,U_opt,DT_opt,t_opt,T_final_opt,solver_time,snopt_obj_vals
Example #13
0
    def __init__(self, conf, load_path=None):
        tf_random_seed = None
        nonlinearity = tf.nn.relu
        self.keep_prob_train_val = 1.0
        self.floatX = 'float32'

        def unif_fanin_mat(shape, name):
            b = np.sqrt(3 * self.keep_prob_train_val / shape[0])
            initial = tf.random_uniform(shape, minval=-b, maxval=b, seed=tf_random_seed, dtype=self.floatX)
            return tf.Variable(initial, name=name)

        def bias(shape, name):
            initial = tf.constant(0.1, shape=shape, dtype=self.floatX)
            return tf.Variable(initial, name=name)

        self.snopt = SNOPT_solver()
        self.snopt.setOption('Major print level', 0)
        #self.snopt.setOption('Major feasibility', 1.0e-6)
        #self.snopt.setOption('Minor feasibility', 1.0e-6)
        #self.snopt.setOption('Major optimality', 1.0e-6)
        #self.snopt.setOption('Linesearch tolerance', 0.9)
        #self.snopt.setOption('Major iterations', 100)
        #self.snopt.setOption('Minor iterations', 50)
        #self.snopt.setOption('Scale option', 0)
        #self.snopt_max_count = conf['snopt_max_count']
        self.snopt.setOption('Iteration limit', conf['snopt_max_count'])

        self.profiler = Profiler()

        self.n_s = conf['n_s']
        self.n_a = conf['n_a']
        self.n_sa = self.n_s + self.n_a
        n_hidden = conf['n_hidden']
        self.n_1 = n_hidden
        self.n_2 = n_hidden
        self.n_q = conf['n_q']

        error_functions = [tf.square, tf.abs]
        self.error_function = error_functions[conf['error_type']]
        self.one_layer_only = conf['one_layer_only']
        self.n_minibatch = conf['minibatch_size']
        self.n_batches = conf['num_batches']
        self.n_megabatch = self.n_minibatch * self.n_batches
        self.max_a_min_iters = 5
        self.max_abs_torque = conf['max_torque']
        #self.a_tolerance = conf['a_tolerance']
        self.max_torques = np.array([[self.max_abs_torque]], dtype=self.floatX)
        self.max_torques_p = np.ones((self.n_megabatch,1)) * np.array([[self.max_abs_torque]], dtype=self.floatX)
        self.min_torques = np.array([[-self.max_abs_torque]], dtype=self.floatX)
        self.min_torques_p = np.ones((self.n_megabatch,1)) * np.array([[-self.max_abs_torque]], dtype=self.floatX)

        self.set_standardizer((np.zeros(self.n_sa), np.ones(self.n_sa)),
                (np.zeros(self.n_q), np.ones(self.n_q)))

        self.sess = tf.Session()
        self.keep_prob = tf.placeholder(self.floatX)
        self.sa_learn = tf.placeholder(self.floatX, shape=[None,self.n_sa])
        self.W_sa_1 = unif_fanin_mat([self.n_sa, self.n_1], 'W_sa_1')
        self.b_1 = bias([self.n_1], 'b_1')
        self.W_1_2 = unif_fanin_mat([self.n_1, self.n_2], 'W_1_2')
        self.b_2 = bias([self.n_2], 'b_2')
        self.W_2_q = unif_fanin_mat([self.n_2,self.n_q], 'W_2_q')
        self.b_q = bias([self.n_q], 'b_q')
        name_var_pairs = zip(['W_sa_1', 'b_1', 'b_q'],
                [self.W_sa_1, self.b_1, self.b_q])
        #if not self.one_layer_only:
        name_var_pairs.extend(zip(['W_1_2', 'b_2', 'W_2_q'], [self.W_1_2, self.b_2, self.W_2_q]))
        self.name_var_dict = {i:j for (i,j) in name_var_pairs}

        # # run collapse once to set number of params
        # self.collapse_params()

        def q_from_input(i):
            o1 = tf.nn.dropout(nonlinearity(tf.matmul(i, self.W_sa_1) + self.b_1), self.keep_prob)
            if self.one_layer_only:
                return tf.matmul(o1, self.W_2_q) + self.b_q

            o2 = tf.nn.dropout(nonlinearity(tf.matmul(o1, self.W_1_2) + self.b_2), self.keep_prob)
            return tf.matmul(o2, self.W_2_q) + self.b_q

        self.o1 = nonlinearity(tf.matmul(self.sa_learn, self.W_sa_1) + self.b_1)
        self.q_learn = q_from_input(self.sa_learn)
        self.y_learn = tf.placeholder(self.floatX, shape = [None, self.n_q])
        self.learn_delta = tf.square(self.y_learn - self.q_learn)
        self.learn_error = tf.reduce_mean(self.learn_delta)

        self.max_a_time_limit = conf['max_a_time_limit']

        global_step = tf.Variable(0, trainable=False)
        self.learn_rate = tf.train.exponential_decay(
                conf['initial_learn_rate'] / self.n_minibatch,
                global_step,
                conf['learn_rate_half_life'], 0.5, staircase=False)
        self.learn_opt = tf.train.AdamOptimizer(self.learn_rate).minimize(self.learn_error, global_step=global_step)

        self.learn_opts = []
        self.feed_ys = []
        for i in range(self.n_q):
            feed_y = tf.placeholder(self.floatX)
            self.feed_ys.append(feed_y)
            learn_error = self.error_function(feed_y - self.q_learn[0,i])
            learn_opt = tf.train.AdamOptimizer(self.learn_rate).minimize(learn_error, global_step=global_step)
            self.learn_opts.append(learn_opt)

        if self.n_a > 0:
            def query_setup(n_sample):
                s_query = tf.placeholder('float', shape=[n_sample, self.n_s])
                a_query = unif_fanin_mat([n_sample, self.n_a], 'a_query')
                min_cutoff = tf.matmul(np.ones((n_sample, 1), dtype=self.floatX), self.min_torques)
                max_cutoff = tf.matmul(np.ones((n_sample, 1), dtype=self.floatX), self.max_torques)
                # print "min cutoff:", self.sess.run(min_cutoff)
                # print "max cutoff:", self.sess.run(max_cutoff)
                a_query_clipped = tf.minimum(tf.maximum(min_cutoff, a_query), max_cutoff)

                #sa_query = tf.concat(1, [s_query, a_query_clipped])
                sa_query = tf.concat(1, [s_query, a_query])
                q_query = q_from_input(sa_query)
                q_query_mean = tf.reduce_mean(q_query)

                query_opt = tf.train.AdamOptimizer(0.1)
                query_grads_and_vars = query_opt.compute_gradients(q_query_mean, [a_query])
                # list of tuples (gradient, variable).
                query_grads_and_vars[0] = (-query_grads_and_vars[0][0], query_grads_and_vars[0][1])
                apply_query_grads = query_opt.apply_gradients(query_grads_and_vars)
                return s_query, a_query, a_query_clipped, sa_query, q_query, q_query_mean, apply_query_grads

            self.s_query, self.a_query, self.a_query_clipped, self.sa_query, self.q_query, \
                    self.q_query_mean, self.apply_query_grads = query_setup(1)
            self.s_query_p, self.a_query_p, self.a_query_clipped_p, self.sa_query_p, self.q_query_p, \
                    self.q_query_mean_p, self.apply_query_grads_p = query_setup(self.n_megabatch)

            self.sym_grad_p = tf.gradients(self.q_query_mean_p, self.a_query_p)
            self.sym_grad = tf.gradients(self.q_query_mean, self.a_query)

        self.saver = tf.train.Saver(self.name_var_dict)

        self.init_op = tf.initialize_all_variables()
        self.sess.run(self.init_op)

        if load_path != None:
            self.load_model(load_path)
Example #14
0
class ControlNN:
    def __init__(self, conf, load_path=None):
        tf_random_seed = None
        nonlinearity = tf.nn.relu
        self.keep_prob_train_val = 1.0
        self.floatX = 'float32'

        def unif_fanin_mat(shape, name):
            b = np.sqrt(3 * self.keep_prob_train_val / shape[0])
            initial = tf.random_uniform(shape, minval=-b, maxval=b, seed=tf_random_seed, dtype=self.floatX)
            return tf.Variable(initial, name=name)

        def bias(shape, name):
            initial = tf.constant(0.1, shape=shape, dtype=self.floatX)
            return tf.Variable(initial, name=name)

        self.snopt = SNOPT_solver()
        self.snopt.setOption('Major print level', 0)
        #self.snopt.setOption('Major feasibility', 1.0e-6)
        #self.snopt.setOption('Minor feasibility', 1.0e-6)
        #self.snopt.setOption('Major optimality', 1.0e-6)
        #self.snopt.setOption('Linesearch tolerance', 0.9)
        #self.snopt.setOption('Major iterations', 100)
        #self.snopt.setOption('Minor iterations', 50)
        #self.snopt.setOption('Scale option', 0)
        #self.snopt_max_count = conf['snopt_max_count']
        self.snopt.setOption('Iteration limit', conf['snopt_max_count'])

        self.profiler = Profiler()

        self.n_s = conf['n_s']
        self.n_a = conf['n_a']
        self.n_sa = self.n_s + self.n_a
        n_hidden = conf['n_hidden']
        self.n_1 = n_hidden
        self.n_2 = n_hidden
        self.n_q = conf['n_q']

        error_functions = [tf.square, tf.abs]
        self.error_function = error_functions[conf['error_type']]
        self.one_layer_only = conf['one_layer_only']
        self.n_minibatch = conf['minibatch_size']
        self.n_batches = conf['num_batches']
        self.n_megabatch = self.n_minibatch * self.n_batches
        self.max_a_min_iters = 5
        self.max_abs_torque = conf['max_torque']
        #self.a_tolerance = conf['a_tolerance']
        self.max_torques = np.array([[self.max_abs_torque]], dtype=self.floatX)
        self.max_torques_p = np.ones((self.n_megabatch,1)) * np.array([[self.max_abs_torque]], dtype=self.floatX)
        self.min_torques = np.array([[-self.max_abs_torque]], dtype=self.floatX)
        self.min_torques_p = np.ones((self.n_megabatch,1)) * np.array([[-self.max_abs_torque]], dtype=self.floatX)

        self.set_standardizer((np.zeros(self.n_sa), np.ones(self.n_sa)),
                (np.zeros(self.n_q), np.ones(self.n_q)))

        self.sess = tf.Session()
        self.keep_prob = tf.placeholder(self.floatX)
        self.sa_learn = tf.placeholder(self.floatX, shape=[None,self.n_sa])
        self.W_sa_1 = unif_fanin_mat([self.n_sa, self.n_1], 'W_sa_1')
        self.b_1 = bias([self.n_1], 'b_1')
        self.W_1_2 = unif_fanin_mat([self.n_1, self.n_2], 'W_1_2')
        self.b_2 = bias([self.n_2], 'b_2')
        self.W_2_q = unif_fanin_mat([self.n_2,self.n_q], 'W_2_q')
        self.b_q = bias([self.n_q], 'b_q')
        name_var_pairs = zip(['W_sa_1', 'b_1', 'b_q'],
                [self.W_sa_1, self.b_1, self.b_q])
        #if not self.one_layer_only:
        name_var_pairs.extend(zip(['W_1_2', 'b_2', 'W_2_q'], [self.W_1_2, self.b_2, self.W_2_q]))
        self.name_var_dict = {i:j for (i,j) in name_var_pairs}

        # # run collapse once to set number of params
        # self.collapse_params()

        def q_from_input(i):
            o1 = tf.nn.dropout(nonlinearity(tf.matmul(i, self.W_sa_1) + self.b_1), self.keep_prob)
            if self.one_layer_only:
                return tf.matmul(o1, self.W_2_q) + self.b_q

            o2 = tf.nn.dropout(nonlinearity(tf.matmul(o1, self.W_1_2) + self.b_2), self.keep_prob)
            return tf.matmul(o2, self.W_2_q) + self.b_q

        self.o1 = nonlinearity(tf.matmul(self.sa_learn, self.W_sa_1) + self.b_1)
        self.q_learn = q_from_input(self.sa_learn)
        self.y_learn = tf.placeholder(self.floatX, shape = [None, self.n_q])
        self.learn_delta = tf.square(self.y_learn - self.q_learn)
        self.learn_error = tf.reduce_mean(self.learn_delta)

        self.max_a_time_limit = conf['max_a_time_limit']

        global_step = tf.Variable(0, trainable=False)
        self.learn_rate = tf.train.exponential_decay(
                conf['initial_learn_rate'] / self.n_minibatch,
                global_step,
                conf['learn_rate_half_life'], 0.5, staircase=False)
        self.learn_opt = tf.train.AdamOptimizer(self.learn_rate).minimize(self.learn_error, global_step=global_step)

        self.learn_opts = []
        self.feed_ys = []
        for i in range(self.n_q):
            feed_y = tf.placeholder(self.floatX)
            self.feed_ys.append(feed_y)
            learn_error = self.error_function(feed_y - self.q_learn[0,i])
            learn_opt = tf.train.AdamOptimizer(self.learn_rate).minimize(learn_error, global_step=global_step)
            self.learn_opts.append(learn_opt)

        if self.n_a > 0:
            def query_setup(n_sample):
                s_query = tf.placeholder('float', shape=[n_sample, self.n_s])
                a_query = unif_fanin_mat([n_sample, self.n_a], 'a_query')
                min_cutoff = tf.matmul(np.ones((n_sample, 1), dtype=self.floatX), self.min_torques)
                max_cutoff = tf.matmul(np.ones((n_sample, 1), dtype=self.floatX), self.max_torques)
                # print "min cutoff:", self.sess.run(min_cutoff)
                # print "max cutoff:", self.sess.run(max_cutoff)
                a_query_clipped = tf.minimum(tf.maximum(min_cutoff, a_query), max_cutoff)

                #sa_query = tf.concat(1, [s_query, a_query_clipped])
                sa_query = tf.concat(1, [s_query, a_query])
                q_query = q_from_input(sa_query)
                q_query_mean = tf.reduce_mean(q_query)

                query_opt = tf.train.AdamOptimizer(0.1)
                query_grads_and_vars = query_opt.compute_gradients(q_query_mean, [a_query])
                # list of tuples (gradient, variable).
                query_grads_and_vars[0] = (-query_grads_and_vars[0][0], query_grads_and_vars[0][1])
                apply_query_grads = query_opt.apply_gradients(query_grads_and_vars)
                return s_query, a_query, a_query_clipped, sa_query, q_query, q_query_mean, apply_query_grads

            self.s_query, self.a_query, self.a_query_clipped, self.sa_query, self.q_query, \
                    self.q_query_mean, self.apply_query_grads = query_setup(1)
            self.s_query_p, self.a_query_p, self.a_query_clipped_p, self.sa_query_p, self.q_query_p, \
                    self.q_query_mean_p, self.apply_query_grads_p = query_setup(self.n_megabatch)

            self.sym_grad_p = tf.gradients(self.q_query_mean_p, self.a_query_p)
            self.sym_grad = tf.gradients(self.q_query_mean, self.a_query)

        self.saver = tf.train.Saver(self.name_var_dict)

        self.init_op = tf.initialize_all_variables()
        self.sess.run(self.init_op)

        if load_path != None:
            self.load_model(load_path)

    def __del__(self):
        self.sess.close()

    def set_standardizer(self, sa_mean_std, q_mean_std):
        def check_stuff(thing, l):
            assert len(thing) == 2
            assert thing[0].shape == thing[1].shape
            assert len(thing[0].shape) == 1
            assert thing[1].shape[0] == l

        check_stuff(sa_mean_std, self.n_sa)
        check_stuff(q_mean_std, self.n_q)
        self.sa_mean_std = sa_mean_std
        self.q_mean_std = q_mean_std

    def print_params(self):
        for (name, param) in enumerate(self.name_var_dict):
            print name
            print self.sess.run(param)
        print

    #def collapse_params(self):
    #    self.print_params()
    #    ans = []
    #    for (name, param) in enumerate(self.name_var_dict):
    #        ans.extend(self.sess.run(param).flatten().tolist())

    #    self.num_params = len(ans)
    #    print self.num_params
    #    return np.array(ans)

    def q_from_sa(self, sa_vals):
        net_q = self.sess.run(self.q_learn,
                feed_dict={self.sa_learn: sa_vals, #standardize(sa_vals, self.sa_mean_std),
                    self.keep_prob: 1.0})
        #print net_q.shape
        #return unstandardize(net_q, self.q_mean_std)
        return net_q



    def q_query_from_s(self, s_vals):
        return self.sess.run(self.q_query, feed_dict={self.s_query: s_vals[np.newaxis,:], self.keep_prob: 1.0})

    def q_query_from_s_p(self, s_vals):
        return self.sess.run(self.q_query_p, feed_dict={self.s_query_p: s_vals, self.keep_prob: 1.0})

    def o1_from_sa(self, sa_vals):
        return self.sess.run(self.o1, feed_dict={self.sa_learn: sa_vals, self.keep_prob: 1.0})

    def s_const_grid(self, s, xr, n=10000):
        # [[s x_1], [s x_2], ..., [s x_n]]
        s = np.array(s)
        xs = np.linspace(xr[0], xr[1], n)[:,np.newaxis]
        return xs, np.concatenate((np.ones((n,1)) * s[np.newaxis,:], xs), 1)

    def manual_max_a(self, s, xr=None):
        assert self.n_a == 1
        if xr == None:
            xr = self.max_abs_torque * np.array([-1.,1])
        xs, inputs = self.s_const_grid(s, xr)
        outputs = self.q_from_sa(inputs).flatten()
        print outputs
        best_input = np.argmax(outputs)
        return np.array([inputs[best_input][-1], outputs[best_input]])

    def manual_max_a_p(self, s, xr=None):
        assert self.n_a == 1
        ans = []
        for i in s:
            ans.append(self.manual_max_a(i, xr))
        return np.array(ans)

    def q_from_s_discrete(self, s):
        return self.q_from_sa(s)

    def q_from_sa_discrete(self, s, a):
        qs = self.q_from_s_discrete(s)
        chosen_qs = np.array([r[i] for r,i in zip(qs, a)])
        return chosen_qs[:, np.newaxis]

    def get_best_a_discrete(self, s):
        qs = self.q_from_s_discrete(s[np.newaxis,:])
        return np.argmax(qs)

    def get_best_q_discrete(self, s):
        assert s.shape[1] == self.n_s
        qs = self.q_from_s_discrete(s)
        return np.max(qs, 1)

    def get_best_a_p(self, s, is_p, num_tries, init_a=None, tolerance=0.01):
        #TODO: benchmark different init methods

        assert (is_p and len(s.shape) == 2 and s.shape[0] == self.n_megabatch) or (not is_p and len(s.shape) == 1)
        n_batch = s.shape[0] if is_p else 1

        inf = 1.0e20
        obj_row = 1

        x_names = np.array(['x' + str(i) for i in range(n_batch)])
        F_names = np.array(['F1', 'F2'])
        xlow = np.array([-self.max_abs_torque for i in range(n_batch)])
        xupp = np.array([self.max_abs_torque for i in range(n_batch)])

        Flow = np.array([-inf, -inf])
        Fupp = np.array([inf, inf])

        A = np.array([[0 for _ in range(n_batch)], [1 for _ in range(n_batch)]])
        G = np.array([[2 for _ in range(n_batch)], [0 for _ in range(n_batch)]])

        vs = {}
        vs['count'] = 0
        vs['old_a'] = None

        ans_a, ans_q = None, None

        def check_timeout(start_time, time_limit):
            if time.time() - start_time > time_limit:
                err_msg = 'error!!! max a timeout: s=%s is_p=%s num_tries=%s init_a=%s' % (s, is_p, num_tries, init_a)
                print err_msg
                return True
            return False

        def update_tolerance_conditions(a):
            vs['count'] += 1
            #if vs['old_a'] != None and np.max(np.abs(a - vs['old_a'])) < self.a_tolerance:
            #    print 'vs', vs
            #    return -1
            #vs['old_a'] = a
            if vs['count'] > self.snopt_max_count:
                return -1
            return 0

        def inner_p(init_a, time_limit):
            start_time = time.time()
            if init_a == None:
                init_a = self.min_torques_p + (self.max_torques_p - self.min_torques_p) * \
                        np.random.random((self.n_megabatch, self.n_a)) / 2.0

            def grad(a):
                g = self.sess.run(self.sym_grad_p, feed_dict={self.s_query_p: s, self.a_query_p: a[:,np.newaxis],
                    self.keep_prob: 1.0})
                ret_grad = -g[0].flatten()
                return ret_grad

            def objFG(status,a,needF,needG,cu,iu,ru):
                F = np.array([-self.sess.run(self.q_query_mean_p, feed_dict={self.s_query_p: s,
                    self.a_query_p: a[:,np.newaxis], self.keep_prob: 1.0}), 0])
                G = grad(a)
                #status = update_tolerance_conditions(a)
                return status, F, G

            self.snopt.snopta(n=n_batch, nF=2, usrfun=objFG, x0=init_a, xlow=xlow, xupp=xupp,
                Flow=Flow, Fupp=Fupp, ObjRow=obj_row, A=A, G=G, xnames=x_names, Fnames=F_names)

            res_x = self.snopt.x

            q_final = self.sess.run(self.q_query_p, feed_dict={self.s_query_p: s,
                self.a_query_p: res_x, self.keep_prob: 1.0})

            return res_x, q_final

        def inner(init_a, time_limit):
            start_time = time.time()
            if init_a == None:
                #init_a = np.zeros([1, self.n_a])
                init_a = self.min_torques + (self.max_torques - self.min_torques) * \
                        np.random.random((1, self.n_a))

            def grad(a):
                g = self.sess.run(self.sym_grad, feed_dict={self.s_query: s[np.newaxis,:],
                    self.a_query: a[:,np.newaxis], self.keep_prob: 1.0})
                ret_grad = -g[0].flatten()
                return ret_grad

            def objFG(status,a,needF,needG,cu,iu,ru):
                F = np.array([-self.sess.run(self.q_query_mean, feed_dict={self.s_query: s[np.newaxis,:],
                    self.a_query: a[:,np.newaxis], self.keep_prob: 1.0}), 0])
                G = grad(a)
                #status = update_tolerance_conditions(a)
                return status, F, G

            self.snopt.snopta(n=n_batch, nF=2, usrfun=objFG, x0=init_a, xlow=xlow, xupp=xupp,
                    Flow=Flow, Fupp=Fupp, ObjRow=obj_row, A=A, G=G)#, xnames=x_names, Fnames=F_names)

            res_x = self.snopt.x
            print init_a, res_x

            q_final = self.sess.run(self.q_query, feed_dict={self.s_query: s[np.newaxis,:],
                self.a_query: res_x, self.keep_prob: 1.0})

            return res_x, q_final

        inner_function = inner_p if is_p else inner
        iter_time_limit = self.max_a_time_limit / num_tries

        for i in range(num_tries):
            a, q = inner_function(init_a, iter_time_limit) if i == 0 else inner_function(None, iter_time_limit)
            if i == 0:
                ans_a, ans_q = a, q
                continue
            for i in range(len(q)):
                if q[i] > ans_q[i]:
                    ans_q[i] = q[i]
                    ans_a[i] = a[i]

        return ans_a, ans_q

    def get_learn_rate(self):
        return self.sess.run(self.learn_rate)

    def learn_delta_q(self, sa_vals, y_vals):
        return self.sess.run(self.learn_delta,
                feed_dict={self.sa_learn: sa_vals, self.y_learn: y_vals, self.keep_prob: 1.0})

    def mse_q(self, sa_vals, y_vals):
        return self.sess.run(self.learn_error,
                feed_dict={self.sa_learn: sa_vals, self.y_learn: y_vals, self.keep_prob: 1.0})

    def train(self, sa_vals, y_vals):
        self.sess.run(self.learn_opt, feed_dict={self.y_learn: y_vals,
            self.sa_learn: sa_vals, self.keep_prob: self.keep_prob_train_val})
        #print 'learn_rate', self.sess.run(self.learn_rate)

    def train_discrete(self, s_vals, a_vals, y_vals):
        for (s, a, y) in zip(s_vals, a_vals, y_vals):
            self.sess.run(self.learn_opts[a], feed_dict={self.sa_learn: s[np.newaxis,:],
                self.feed_ys[a]: y, self.keep_prob: 1.0})

    def save_model(self, save_path):
        self.saver.save(self.sess, save_path)

    def load_model(self, load_path):
        print 'loading model from', load_path
        self.saver.restore(self.sess,
                '/Users/jeffreyyan/drake-distro/drake/examples/NN/' + load_path)