# avoid_func_2 = lambda x: hypersphere_ext(x, center=cen_2,radius=rad_2, # dims=[0, 1]) # avoid_func = intersect([avoid_func_1, avoid_func_2]) # Make MDP lamb = 0.0001 #lambda my_world = Avoid(num_nodes, s_lims, num_nodes_a, a_lims, dynamics=dynamics, avoid_func=avoid_func, lamb=lamb) grid = my_world._all_states_c grid_axes = my_world.axes value, _ = my_world.v_pi_opt(method='pi') reward = my_world.reward # Take slice of value and reward grids dim_fix = [2] # dims to be held fixed val_fix = [0] # value along fixed dimensions val_slice, new_shape, new_axes = my_world.slice_grid( value, dim_fix, val_fix) reward_slice, _, _ = my_world.slice_grid(reward, dim_fix, val_fix) x = new_axes[0] y = new_axes[1] # Plot contours of value and reward
angular=[2]) lamb_2 = 0.0001 #lambda my_world_2 = Avoid(num_nodes, s_lims, num_nodes_a, a_lims, num_nodes_d, d_lims, dynamics=dynamics, avoid_func=avoid_func, lamb=lamb_2, sparse=True) reward = my_world.reward value_v, _ = my_world.v_pi_opt(method='vi') value_z, _ = my_world_2.v_pi_opt(method='vi') grid = my_world._all_states_c # Generates level curves for target (reward) and value_function verts_n_r, faces_r, _, _ = measure.marching_cubes( reward.reshape(num_nodes), 0) verts_r = verts_n_r / num_nodes * (s_lims[1, :] - s_lims[0, :]) + s_lims[0, :] verts_n_v, faces_v, _, _ = measure.marching_cubes( value_v.reshape(num_nodes), 0) verts_v = verts_n_v / num_nodes * (s_lims[1, :] - s_lims[0, :]) + s_lims[0, :] verts_n_z, faces_z, _, _ = measure.marching_cubes(
# Make MDP lamb = 0.01 #lambda my_world_c = Avoid(num_nodes_c, s_lims_c, num_nodes_a, a_lims, num_nodes_d, d_lims, dynamics=dynamics, avoid_func=avoid_func, lamb=lamb, sparse=True) my_world_f = Avoid(num_nodes_f, s_lims_f, num_nodes_a, a_lims, num_nodes_d, d_lims, dynamics=dynamics, avoid_func=avoid_func, lamb=lamb, sparse=True) grid_f = my_world_f._all_states_c # Compute value function on coarse t_start = time.time() value_c, _ = my_world_c.v_pi_opt(method='vi') t_coarse = time.time() - t_start # Compute value function on fine with warm start value_c t_start = time.time() warm_start = my_world_c.interp_grid(value_c, grid_f) value_f_warm, _ = my_world_f.v_pi_opt(method='vi', V=warm_start) t_fine_warm = time.time() - t_start # Warm start from value_c t_start = time.time() value_f, _ = my_world_f.v_pi_opt(method='vi',force_run=True) t_fine = time.time() - t_start print("Time for coarse: {}".format(t_coarse))
num_nodes_a, a_lims, dynamics, avoid_func, lamb=lamb) my_world_2 = Avoid(num_nodes, s_lims, num_nodes_a, a_lims, dynamics_2, avoid_func, lamb=lamb) # Compute value function and policy v_opt_1, _ = my_world.v_pi_opt() v_opt_2, _ = my_world_2.v_pi_opt(V=v_opt_1) _, _ = my_world_2.v_pi_opt(force_run=True) # Computing anaylytic safe set (Model 2) s_min = s_lims[0] s_max = s_lims[1] x = range(my_world.num_nodes[0]) * my_world.ds[0] + s_min[0] y = range(my_world.num_nodes[1]) * my_world.ds[1] + s_min[1] u_lims = cube_lims[1] l_lims = cube_lims[0] analytic_1 = [ min((-2 * min_u * (u_lims[0] - min(x_e, u_lims[0])))**0.5, u_lims[1])
lamb=lamb, sparse=True) # heavy model my_world_h = Avoid(num_nodes, s_lims, num_nodes_a, a_lims, dynamics=dynamics_h, avoid_func=avoid_func, lamb=lamb, sparse=True) # Compute nominal value func t_start = time.time() value_n, _ = my_world.v_pi_opt(method='vi') t_n = time.time() - t_start #Compute value func for light model t_start = time.time() value_l_warm, _ = my_world_l.v_pi_opt(method='vi', V=value_n) t_l_warm = time.time() - t_start t_start = time.time() value_l, _ = my_world_l.v_pi_opt(method='vi', force_run=True) t_l = time.time() - t_start #Compute value func for heavy model t_start = time.time() value_h_warm, _ = my_world_h.v_pi_opt(method='vi', V=value_n) t_h_warm = time.time() - t_start
dynamics_2 = partial(double_integrator, **sys_params) # Construct avoid region, system should stay within hypercube cube_lims = np.array([[0, -3], [4, 3]]) avoid_func = lambda x: dist_hypercube_int(x, cube_lims=cube_lims) # Make MDP my_world = Avoid(num_nodes, s_lims, num_nodes_a, a_lims, dynamics, avoid_func) # Make MDP #my_world_2 = Avoid(num_nodes, s_lims, num_nodes_a, # a_lims, dynamics_2, avoid_func) # Compute value function and policy v_opt, pi_opt = my_world.v_pi_opt(method='pi') #v_opt, pi_opt = my_world_2.v_pi_opt(method='pi',pi=pi_opt) # Gradient of value function grad, grad_mag = my_world.gradient() # Computing anaylytic safe set s_min = s_lims[0] s_max = s_lims[1] x = range(my_world.num_nodes[0]) * my_world.ds[0] + s_min[0] y = range(my_world.num_nodes[1]) * my_world.ds[1] + s_min[1] u_lims = cube_lims[1] l_lims = cube_lims[0] z = [ min((-2 * min_u * (u_lims[0] - min(x_e, u_lims[0])))**0.5, u_lims[1]) for x_e in x
# Pursuer has advantage my_world_d = Avoid(num_nodes, s_lims, num_nodes_a, a_lims, num_nodes_d, d_lims_s, dynamics=dynamics, avoid_func=avoid_func, lamb=lamb, sparse=True, angular=[2]) # Compute nominal value func t_start = time.time() value_n, _ = my_world.v_pi_opt(method='vi') t_n = time.time() - t_start #Compute value func for evader advantage t_start = time.time() value_a_warm, _ = my_world_a.v_pi_opt(method='vi', V=value_n) t_a_warm = time.time() - t_start t_start = time.time() value_a, _ = my_world_a.v_pi_opt(method='vi', force_run=True) t_a = time.time() - t_start #Compute value func for pursuer advantage t_start = time.time() value_d_warm, _ = my_world_d.v_pi_opt(method='vi', V=value_n) t_d_warm = time.time() - t_start
# Construct avoid region, system should stay within hypercube cube_lims = np.array([[0, -3], [4, 3]]) avoid_func = lambda x: hypercube_int(x, cube_lims=cube_lims) # Make MDP lamb = 0.1 #lambda my_world = Avoid(num_nodes, s_lims, num_nodes_a, a_lims, dynamics, avoid_func, lamb=lamb) # Compute value function and policy v_opt_i, _ = my_world.v_pi_opt() # Model estimation and value function update horizon = 101 for k in range(horizon): print("Online Iteration {}".format(k)) alpha = k / horizon gain = gain_f * alpha + gain_i * (1 - alpha) sys_params = {} sys_params['max_u'] = grav * gain sys_params['min_u'] = -grav * gain dynamics = partial(double_integrator, **sys_params) my_world.dynamics = dynamics my_world.update()