s_grid_velocity = np.linspace(3, 8, 7) s_grid = (s_grid_height, s_grid_velocity) a_grid_aoa = np.linspace(00/180*np.pi, 70/180*np.pi, 21) # a_grid = (a_grid_aoa, ) a_grid_amp = np.linspace(0.9, 1.2, 11) a_grid = (a_grid_aoa, a_grid_amp) grids = {'states': s_grid, 'actions': a_grid} t = TicToc() t.tic() Q_map, Q_F, Q_reach = vibly.parcompute_Q_map(grids, p_map, keep_coords=True, verbose=2) t.toc() print("time elapsed: " + str(t.elapsed/60)) Q_V, S_V = vibly.compute_QV(Q_map, grids) S_M = vibly.project_Q2S(Q_V, grids, proj_opt=np.mean) Q_M = vibly.map_S2Q(Q_map, S_M, s_grid, Q_V=Q_V) # plt.scatter(Q_map[1], Q_map[0]) print("non-failing portion of Q: " + str(np.sum(~Q_F)/Q_F.size)) print("viable portion of Q: " + str(np.sum(Q_V)/Q_V.size)) import itertools as it # Q0 = np.zeros((len(grids['states']), total_gridpoints)) # def create_x0(grids): # for idx, state_action in enumerate(np.array(list( # it.product(*grids['states'], *grids['actions'])))): def color_generator(n=1): # colors = list() colors = np.zeros((n, 3))
filename = "./parsimonious0.pickle" # * Load and unpack infile = open(filename, 'rb') data = pickle.load(infile) infile.close() # * unpack invariants # Q_value = data["Q_value"] R_value = data["R_value"] grids = data["grids"] S_M = data["S_M"] XV = S_M > 0.0 # * RX_value = vibly.project_Q2S(R_value, grids, proj_opt=np.mean) # * limit of reward r_max = RX_value.max() r_min = RX_value.min() # * limit of values alpha = 0. # mynorm = colors.TwoSlopeNorm(vmin=0., vcenter=0., vmax=2.5) mynorm = colors.TwoSlopeNorm(vmin=-0.5, vcenter=alpha, vmax=2.5) mymap = vplot.get_vmap(2) extent = [ grids['states'][1][0], grids['states'][1][-1], grids['states'][0][0], grids['states'][0][-1] ]
# for rdx, reward_functions in enumerate(reward_schemes): Q_value = None for failure_penalty in failure_penalties: tictoc.tic() if Q_value is not None: Q_value *= failure_penalty Q_value, R_value = control.Q_value_iteration(Q_map, grids, reward_functions, 0.6, Q_on_grid=Q_on_grid, stopping_threshold=1e-6, max_iter=1000, output_R=True, Q_values=Q_value) X_value = vibly.project_Q2S(Q_value, grids, proj_opt=np.max) time_elapsed = tictoc.toc() print("time elapsed (minutes): " + str(time_elapsed / 60.0)) data2save = { "Q_value": Q_value, "R_value": R_value, "grids": grids, "Q_map": Q_map, "Q_F": Q_F, "Q_V": Q_V, "Q_M": Q_M, "S_M": S_M, "p": p }
def Q_value_iteration(Q_map, grids, reward_functions, gamma, Q_on_grid=None, stopping_threshold=1e-5, max_iter=1000, output_R=False, neighbor_option=np.mean, Q_values = None): """ Standard value iteration. Inputs: Q_map: transition map grids: list of grids for states and actions reward_functions: list of rewards gamma: discount factor convergence_threshold: threshold on improvement to stop iterating max_iter: maximum number of iterations output_R: toggle true to also return array of reward for each (s, a) neighbor_option: how to interpolate grid-borders of a bin Q_values: initial guess for Q_values, can help speed things up """ if Q_values is None: Q_values = np.zeros_like(Q_map, dtype=float) else: assert (Q_values.shape == Q_map.shape), "initial_guess is bad" # can be pre-computed one time R_values = np.zeros_like(Q_map, dtype=float) s_grid = grids['states'] a_grid = grids['actions'] n_states = len(s_grid) if Q_on_grid is None: for qdx, next_s in np.ndenumerate(Q_map): bin_idx = np.unravel_index(next_s, tuple(x+1 for x in map(np.size, s_grid))) # pass transition through each reward function reward = 0.0 s = [grid[qdx[i]] for i, grid in enumerate(s_grid)] a = [grid[qdx[n_states + i]] for i, grid in enumerate(a_grid)] for rfunc in reward_functions: reward += rfunc(s, a) R_values[qdx] = reward # iterate over each q for iteration in range(max_iter): max_change = 0.0 # for stopping for qdx, next_s in np.ndenumerate(Q_map): bin_idx = np.unravel_index(next_s, tuple(x+1 for x in map(np.size, s_grid))) grid_indices = get_grid_indices(bin_idx, s_grid) # average bin value by neighboring q-values from grid # bin_value = 0.0 # for g in grid_indices: # bin_value += Q_values[g].max()/len(grid_indices) bin_value = neighbor_option([Q_values[g].max() for g in grid_indices]) # keep track fo changes, for stopping condit ion diff = np.abs(Q_values[qdx] - R_values[qdx] - gamma*bin_value) if (diff > max_change): max_change = diff Q_values[qdx] = R_values[qdx] + gamma*bin_value if max_change < stopping_threshold: print("Stopped early after ", iteration, " iterations.") break print("max change in value: ", max_change) else: # * Q_on_grid given for qdx, next_s in np.ndenumerate(Q_map): # * Pre-compute R # pass transition through each reward function reward = 0.0 s = [grid[qdx[i]] for i, grid in enumerate(s_grid)] a = [grid[qdx[n_states + i]] for i, grid in enumerate(a_grid)] for rfunc in reward_functions: reward += rfunc(s, a) R_values[qdx] = reward # iterate over each q for iteration in range(max_iter): max_change = 0.0 # for stopping X_val = project_Q2S(Q_values, grids, proj_opt=np.max) X_val = X_val.flatten() for qdx, next_s in np.ndenumerate(Q_map): # average bin value by neighboring q-values from grid # bin_value = 0.0 # for g in grid_indices: # bin_value += Q_values[g].max()/len(grid_indices) # bin_value = neighbor_option([Q_values[g].max() # for g in grid_indices]) q_value = X_val[next_s] # keep track fo changes, for stopping condit ion diff = np.abs(Q_values[qdx] - R_values[qdx] - gamma*q_value) if (diff > max_change): max_change = diff Q_values[qdx] = R_values[qdx] + gamma*q_value if max_change < stopping_threshold: print("Stopped early after ", iteration, " iterations.") break print("max change in value: ", max_change) if output_R: return Q_values, R_values else: return Q_values
def compute_viability(x0, p, name, visualise=False): # * Solve for nominal open-loop limit-cycle # * Set-up P maps for computations p_map = model.poincare_map p_map.p = p p_map.x = x0.copy() # * choose high-level represenation p_map.xp2s = model.xp2s_y_xdot # * this maps the full simulated state to the high-level representation # * in this case the relevant states at apex: # * (y, xdot), in other words the (height, velocity) p_map.sa2xp = model.sa2xp_y_xdot_timedaoa # * this maps the high-level representation of state and actions back # * to the full state and parameters used for the simulation # p_map.sa2xp = model.sa2xp_amp # * this representation includes an amplification coefficient `a' for # * the muscle activation a*f(t). It adds a dimension to the grids, # * which substantially increases computation time, and also makes # * visualization much less straightforward (due to the 4-dimensional # * state-action space). # * We have tried this out, and a from a preliminary look, there does # * not seem to be much qualitative difference in the results for the # * nominal limit-cycle used in the paper. For other conditions, it may # * be important to include this (or other additional control inputs) # * in the model. # * set up grids for computing the viable set and measure # * a denser grid will yield more precision, but require more compute s_grid_height = np.linspace(0.05, 0.5, 91) s_grid_velocity = np.linspace(0, 10.0, 101) s_grid = (s_grid_height, s_grid_velocity) a_grid_aoa = np.linspace(0 / 180 * np.pi, 90 / 180 * np.pi, 91) a_grid = (a_grid_aoa, ) # * if you use the representation `sa2xp_amp` (see above), the # * action grid also includes an extra dimension # a_grid_amp = np.linspace(0.75, 1.25, 11) # a_grid = (a_grid_aoa, a_grid_amp) grids = {'states': s_grid, 'actions': a_grid} # * compute transition matrix and boolean matrix of failures Q_map, Q_F = vibly.parcompute_Q_map(grids, p_map, verbose=1) # * compute viable sets Q_V, S_V = vibly.compute_QV(Q_map, grids) # * compute the measure in state-space S_M = vibly.project_Q2S(Q_V, grids, proj_opt=np.mean) # * map the measure to Q-space Q_M = vibly.map_S2Q(Q_map, S_M, s_grid, Q_V=Q_V) print("non-failing portion of Q: " + str(np.sum(~Q_F) / Q_F.size)) print("viable portion of Q: " + str(np.sum(Q_V) / Q_V.size)) # * save data if not os.path.exists(name): os.makedirs(name) filename = name + '/' + name + '_' + '{:.4f}'.format(damping) data2save = { "grids": grids, "Q_map": Q_map, "Q_F": Q_F, "Q_V": Q_V, "Q_M": Q_M, "S_M": S_M, "p": p, "x0": x0 } outfile = open(filename + '.pickle', 'wb') pickle.dump(data2save, outfile) outfile.close() if visualise: print("SAVING FIGURE") print(" ") plt.figure() plt.imshow(S_M, origin='lower', vmin=0, vmax=1, cmap='viridis') plt.title('bird ' + name) plt.savefig(filename + '.pdf', format='pdf') # plt.show() # to just see it on the fly plt.close()
Q_map = data['Q_map'] Q_F = data['Q_F'] x0 = data['x0'] poincare_map = data['P_map'] p = data['p'] grids = data['grids'] ################################################################################ # Compute measure from grid for warm-start ################################################################################ Q_V, S_V = vibly.compute_QV(Q_map, grids) S_M = vibly.project_Q2S(Q_V, grids, np.mean) # S_M = vibly.project_Q2S(Q_V, grids, np.mean) #S_M = S_M / grids['actions'][0].size Q_M = vibly.map_S2Q(Q_map, S_M, Q_V) plt.plot(S_M) plt.show() plt.imshow(Q_M, origin='lower') plt.show() ################################################################################ # Create estimation object ################################################################################ AS_grid = np.meshgrid(grids['actions'][0], grids['states'][0]) estimation = MeasureEstimation(state_dim=1, action_dim=1, seed=1)