Python Avoid.v_pi_opt Examples, mdp.grid_world_ext.Avoid.v_pi_opt Python Examples

Example #1

0

Show file

    # avoid_func_2 = lambda x: hypersphere_ext(x, center=cen_2,radius=rad_2,
    #                                        dims=[0, 1])
    # avoid_func = intersect([avoid_func_1, avoid_func_2])

    # Make MDP
    lamb = 0.0001  #lambda
    my_world = Avoid(num_nodes,
                     s_lims,
                     num_nodes_a,
                     a_lims,
                     dynamics=dynamics,
                     avoid_func=avoid_func,
                     lamb=lamb)
    grid = my_world._all_states_c
    grid_axes = my_world.axes
    value, _ = my_world.v_pi_opt(method='pi')
    reward = my_world.reward

    # Take slice of value and reward grids
    dim_fix = [2]  # dims to be held fixed
    val_fix = [0]  # value along fixed dimensions

    val_slice, new_shape, new_axes = my_world.slice_grid(
        value, dim_fix, val_fix)

    reward_slice, _, _ = my_world.slice_grid(reward, dim_fix, val_fix)

    x = new_axes[0]
    y = new_axes[1]

    # Plot contours of value and reward

Example #2

0

Show file

File: iso_surface.py Project: kakametalu/online_reachability

                     angular=[2])

    lamb_2 = 0.0001  #lambda
    my_world_2 = Avoid(num_nodes,
                       s_lims,
                       num_nodes_a,
                       a_lims,
                       num_nodes_d,
                       d_lims,
                       dynamics=dynamics,
                       avoid_func=avoid_func,
                       lamb=lamb_2,
                       sparse=True)

    reward = my_world.reward
    value_v, _ = my_world.v_pi_opt(method='vi')
    value_z, _ = my_world_2.v_pi_opt(method='vi')
    grid = my_world._all_states_c

    # Generates level curves for target (reward) and value_function
    verts_n_r, faces_r, _, _ = measure.marching_cubes(
        reward.reshape(num_nodes), 0)
    verts_r = verts_n_r / num_nodes * (s_lims[1, :] -
                                       s_lims[0, :]) + s_lims[0, :]

    verts_n_v, faces_v, _, _ = measure.marching_cubes(
        value_v.reshape(num_nodes), 0)
    verts_v = verts_n_v / num_nodes * (s_lims[1, :] -
                                       s_lims[0, :]) + s_lims[0, :]

    verts_n_z, faces_z, _, _ = measure.marching_cubes(

Example #3

0

Show file

    # Make MDP
    lamb = 0.01 #lambda
    my_world_c = Avoid(num_nodes_c, s_lims_c, num_nodes_a, a_lims, 
                       num_nodes_d, d_lims, dynamics=dynamics, 
                       avoid_func=avoid_func, lamb=lamb, sparse=True)


    my_world_f = Avoid(num_nodes_f, s_lims_f, num_nodes_a, a_lims, 
                       num_nodes_d, d_lims, dynamics=dynamics, 
                       avoid_func=avoid_func, lamb=lamb, sparse=True)
    
    grid_f = my_world_f._all_states_c

    # Compute value function on coarse
    t_start = time.time()
    value_c, _ =  my_world_c.v_pi_opt(method='vi')
    t_coarse = time.time() - t_start

    # Compute value function on fine with warm start value_c 
    t_start = time.time()
    warm_start = my_world_c.interp_grid(value_c, grid_f)
    value_f_warm, _ =  my_world_f.v_pi_opt(method='vi', V=warm_start)
    t_fine_warm = time.time() - t_start

    # Warm start from value_c 
    t_start = time.time()
    value_f, _ =  my_world_f.v_pi_opt(method='vi',force_run=True)
    t_fine = time.time() - t_start


    print("Time for coarse: {}".format(t_coarse))

Example #4

0

Show file

File: double_integrator_warmstart.py Project: kakametalu/online_reachability

                     num_nodes_a,
                     a_lims,
                     dynamics,
                     avoid_func,
                     lamb=lamb)

    my_world_2 = Avoid(num_nodes,
                       s_lims,
                       num_nodes_a,
                       a_lims,
                       dynamics_2,
                       avoid_func,
                       lamb=lamb)

    # Compute value function and policy
    v_opt_1, _ = my_world.v_pi_opt()

    v_opt_2, _ = my_world_2.v_pi_opt(V=v_opt_1)

    _, _ = my_world_2.v_pi_opt(force_run=True)

    # Computing anaylytic safe set (Model 2)
    s_min = s_lims[0]
    s_max = s_lims[1]
    x = range(my_world.num_nodes[0]) * my_world.ds[0] + s_min[0]
    y = range(my_world.num_nodes[1]) * my_world.ds[1] + s_min[1]
    u_lims = cube_lims[1]
    l_lims = cube_lims[0]

    analytic_1 = [
        min((-2 * min_u * (u_lims[0] - min(x_e, u_lims[0])))**0.5, u_lims[1])

Example #5

0

Show file

                       lamb=lamb,
                       sparse=True)

    # heavy model
    my_world_h = Avoid(num_nodes,
                       s_lims,
                       num_nodes_a,
                       a_lims,
                       dynamics=dynamics_h,
                       avoid_func=avoid_func,
                       lamb=lamb,
                       sparse=True)

    # Compute nominal value func
    t_start = time.time()
    value_n, _ = my_world.v_pi_opt(method='vi')
    t_n = time.time() - t_start

    #Compute value func for light model
    t_start = time.time()
    value_l_warm, _ = my_world_l.v_pi_opt(method='vi', V=value_n)
    t_l_warm = time.time() - t_start

    t_start = time.time()
    value_l, _ = my_world_l.v_pi_opt(method='vi', force_run=True)
    t_l = time.time() - t_start

    #Compute value func for heavy model
    t_start = time.time()
    value_h_warm, _ = my_world_h.v_pi_opt(method='vi', V=value_n)
    t_h_warm = time.time() - t_start

Example #6

0

Show file

    dynamics_2 = partial(double_integrator, **sys_params)

    # Construct avoid region, system should stay within hypercube
    cube_lims = np.array([[0, -3], [4, 3]])
    avoid_func = lambda x: dist_hypercube_int(x, cube_lims=cube_lims)

    # Make MDP
    my_world = Avoid(num_nodes, s_lims, num_nodes_a, a_lims, dynamics,
                     avoid_func)

    # Make MDP
    #my_world_2 = Avoid(num_nodes, s_lims, num_nodes_a,
    #                 a_lims, dynamics_2, avoid_func)

    # Compute value function and policy
    v_opt, pi_opt = my_world.v_pi_opt(method='pi')
    #v_opt, pi_opt = my_world_2.v_pi_opt(method='pi',pi=pi_opt)

    # Gradient of value function
    grad, grad_mag = my_world.gradient()

    # Computing anaylytic safe set
    s_min = s_lims[0]
    s_max = s_lims[1]
    x = range(my_world.num_nodes[0]) * my_world.ds[0] + s_min[0]
    y = range(my_world.num_nodes[1]) * my_world.ds[1] + s_min[1]
    u_lims = cube_lims[1]
    l_lims = cube_lims[0]
    z = [
        min((-2 * min_u * (u_lims[0] - min(x_e, u_lims[0])))**0.5, u_lims[1])
        for x_e in x

Example #7

0

Show file

File: model_update_pursuit_evasion.py Project: kakametalu/online_reachability

    # Pursuer has advantage
    my_world_d = Avoid(num_nodes,
                       s_lims,
                       num_nodes_a,
                       a_lims,
                       num_nodes_d,
                       d_lims_s,
                       dynamics=dynamics,
                       avoid_func=avoid_func,
                       lamb=lamb,
                       sparse=True,
                       angular=[2])

    # Compute nominal value func
    t_start = time.time()
    value_n, _ = my_world.v_pi_opt(method='vi')
    t_n = time.time() - t_start

    #Compute value func for evader advantage
    t_start = time.time()
    value_a_warm, _ = my_world_a.v_pi_opt(method='vi', V=value_n)
    t_a_warm = time.time() - t_start

    t_start = time.time()
    value_a, _ = my_world_a.v_pi_opt(method='vi', force_run=True)
    t_a = time.time() - t_start

    #Compute value func for pursuer advantage
    t_start = time.time()
    value_d_warm, _ = my_world_d.v_pi_opt(method='vi', V=value_n)
    t_d_warm = time.time() - t_start

Example #8

0

Show file

File: double_integrator_online.py Project: kakametalu/online_reachability

    # Construct avoid region, system should stay within hypercube
    cube_lims = np.array([[0, -3], [4, 3]])
    avoid_func = lambda x: hypercube_int(x, cube_lims=cube_lims)

    # Make MDP
    lamb = 0.1  #lambda
    my_world = Avoid(num_nodes,
                     s_lims,
                     num_nodes_a,
                     a_lims,
                     dynamics,
                     avoid_func,
                     lamb=lamb)

    # Compute value function and policy
    v_opt_i, _ = my_world.v_pi_opt()

    # Model estimation and value function update

    horizon = 101
    for k in range(horizon):
        print("Online Iteration {}".format(k))
        alpha = k / horizon
        gain = gain_f * alpha + gain_i * (1 - alpha)
        sys_params = {}
        sys_params['max_u'] = grav * gain
        sys_params['min_u'] = -grav * gain
        dynamics = partial(double_integrator, **sys_params)
        my_world.dynamics = dynamics
        my_world.update()