def main(): # random seed np.random.seed(88) # init util.get_x0() util.get_xd() # preallocate X = np.zeros( (param.get('n'), param.get('nt'))) # go sim param['ta_on'] = False x_curr = param.get('x0') for k,t in enumerate( param.get('T')): u_curr = controller.get_dynamics_u(x_curr,t) x_next = x_curr + dynamics.get_dxdt( x_curr, u_curr, t) * param.get('dt') X[:,k] = np.squeeze(x_curr) x_curr = x_next print('t/T: ' + str(t/param.get('T')[-1])) plotter.plot_SS( X, param.get('T'), title = 'No Task Assignment') # go sim param['ta_on'] = True x_curr = param.get('x0') for k,t in enumerate( param.get('T')): u_curr = controller.get_dynamics_u(x_curr,t) x_next = x_curr + dynamics.get_dxdt( x_curr, u_curr, t) * param.get('dt') X[:,k] = np.squeeze(x_curr) x_curr = x_next print('t/T: ' + str(t/param.get('T')[-1])) plotter.plot_SS( X, param.get('T'), title = 'With Task Assignment') # plotter.plot_ss( X[:,-1], param.get('T')[-1]) plotter.save_figs()
def fn4(): pdf_path = os.path.join(os.getcwd(), param.get('fn_plots')) # remove if exists if os.path.exists(pdf_path): os.remove(pdf_path) np.random.seed(88) util.get_x0() util.get_xd() X = [] U = [] param['controller'] = 'mpc' x_curr = param.get('x0') for t in param.get('T'): try: u_curr = controller.get_u(x_curr, t) x_next = x_curr + dynamics.get_dxdt(x_curr, u_curr, t) * param.get('dt') x_curr = x_next print('t/T = ' + str(t) + '/' + str(param.get('T')[-1])) except: pass X.append(x_curr) U.append(u_curr) X = np.squeeze(np.asarray(X)) plotter.plot_SS(X, param.get('T')) plotter.save_figs() subprocess.call(["xdg-open", pdf_path])
def main(): np.random.seed(88) util.get_x0() util.get_xd() dynamics.compute_jacobians() C = [] print('Sim...') for u_type in param.get('controllers'): param['controller'] = u_type print('Controller: ' + str(u_type)) X = [] U = [] V = [] pbar = [] vbar = [] abar = [] x_curr = param.get('x0') count = 0 for k, t in enumerate(param.get('T')): # try: if param.get('controller') is 'scp': if count == 0: U_scp = controller.get_u(x_curr, t) try: u_curr = U_scp[count] except: u_curr = np.zeros((param.get('m'), 1)) elif param.get('controller') is 'mpc': if np.mod(count, param.get('mpc_update')) == 0: count = 0 U_mpc = controller.get_u(x_curr, t) try: u_curr = U_mpc[count] except: u_curr = np.zeros((param.get('m'), 1)) else: u_curr = controller.get_u(x_curr, t) # except: # break x_next = x_curr + dynamics.get_dxdt(x_curr, u_curr, t) * param.get('dt') X.append(x_curr) U.append(u_curr) V.append(dynamics.get_V(x_curr, t)) # for error plot pbar.append( np.dot( \ np.transpose( util.get_my_1()), np.dot( util.get_p_a(), x_curr))) vbar.append( np.dot( \ np.transpose( util.get_my_1()), np.dot( util.get_v_a(), x_curr))) abar.append( np.dot( \ np.transpose( util.get_my_1()), dynamics.get_vdot_a(x_curr))) x_curr = x_next count += 1 print('\t' + str(t) + '/' + str(param.get('T')[-1])) # if k > 2: # break X = np.squeeze(np.asarray(X)) U = np.squeeze(np.asarray(U)) V = np.asarray(V) pbar = np.asarray(pbar) vbar = np.asarray(vbar) abar = np.asarray(abar) C.append(controller.calc_cost(U)) print('Plots...') plotter.plot_SS(X, param.get('T'), title=str(u_type) + ' State Space') plotter.plot_V(V, param.get('T'), title=str(u_type) + ' Lyapunov') plotter.plot_U(U, param.get('T'), title=str(u_type) + ' Controller') plotter.plot_test2( \ np.squeeze(pbar) - np.squeeze(param.get('pd')), \ np.squeeze(vbar) - np.squeeze(param.get('vd')), \ np.squeeze(abar) - np.squeeze(param.get('ad')), \ param.get('T'), title = str(u_type) + ' Errors') print('Plots Complete') # plotter.plot_cost( C) plotter.save_figs() if param.get('gif_on'): print('Gif...') plotter.make_gif(X, param.get('T')) print('Gif Complete')
# # plotting if True: env.render() for i_config in range(1): #range(env.state_dim_per_agent): fig,ax = plotter.make_fig() # ax.set_title(env.states_name[i_config]) for agent in env.agents: if env.good_nodes[agent.i]: color = 'blue' else: color = 'red' ax.plot( times[0:steps], states[0:steps,env.agent_idx_to_state_idx(agent.i)+i_config], color=color) ax.axhline( env.desired_ave, label='desired', color='green') print('Number of Trajectories:', trajectory_rollout_count) print('Data Shape: ', data.shape) file = os.path.join(param.il_load_dataset,'rl_data.npy') np.save(file, data) plotter.save_figs(param.plots_fn) plotter.open_figs(param.plots_fn)
def main(): param = Param() env = CityMap(param) if param.make_dataset_on: print(' making dataset...') train_dataset, test_dataset = datahandler.make_dataset(env) datahandler.write_dataset(env, train_dataset, test_dataset) datahandler.load_dataset(env) train_dataset = env.train_dataset test_dataset = env.test_dataset train_w = env.get_customer_demand(train_dataset, train_dataset[-1, 0]) test_w = env.get_customer_demand(test_dataset, test_dataset[-1, 0]) print(train_dataset[0, :]) print( np.linalg.norm([ train_dataset[0, 4] - train_dataset[0, 2], train_dataset[0, 5] - train_dataset[0, 3] ])) print(test_dataset[0, :]) print( np.linalg.norm([ test_dataset[0, 4] - test_dataset[0, 2], test_dataset[0, 5] - test_dataset[0, 3] ])) # dataset = [\ # starttime [s], # duration [s], # x_p (longitude) [degrees], # y_p (latitute) [degrees], # x_d (longitude) [degrees], # x_d (latitute) [degrees], #] print('train_dataset.shape', train_dataset.shape) print('test_dataset.shape', test_dataset.shape) print('train ave_length: ', ave_length(train_dataset)) print('test ave_length: ', ave_length(test_dataset)) print('train ave_duration: ', ave_duration(train_dataset)) print('test ave_duration: ', ave_duration(test_dataset)) print('train ave_customer_per_time: ', ave_customer_per_time(train_dataset)) print('test ave_customer_per_time: ', ave_customer_per_time(test_dataset)) print('train ave taxi speed: ', ave_taxi_speed(train_dataset)) print('test ave taxi speed: ', ave_taxi_speed(test_dataset)) print('train_dataset[0:1,:]:', train_dataset[0:1, :]) print('test_dataset[0:1,:]:', test_dataset[0:1, :]) fig, ax = plt.subplots() ax.plot(train_dataset[:, 0]) ax.set_ylabel('pickup time') ax.set_xlabel('customer request #') fig, ax = plt.subplots() ax.plot(test_dataset[:, 0]) ax.set_ylabel('pickup time') ax.set_xlabel('customer request #') fig, ax = plt.subplots() ax.plot(train_dataset[:, 1]) ax.set_ylabel('train duration') ax.set_xlabel('customer request #') fig, ax = plt.subplots() ax.plot(test_dataset[:, 1]) ax.set_ylabel('test duration') ax.set_xlabel('customer request #') fig, ax = plt.subplots() ax.plot( np.linalg.norm([ train_dataset[:, 4] - train_dataset[:, 2], train_dataset[:, 5] - train_dataset[:, 3] ], axis=0)) ax.set_ylabel('train length') ax.set_xlabel('customer request #') fig, ax = plt.subplots() ax.plot( np.linalg.norm([ test_dataset[:, 4] - test_dataset[:, 2], test_dataset[:, 5] - test_dataset[:, 3] ], axis=0)) ax.set_ylabel('test length') ax.set_xlabel('customer request #') print('saving and opening figs...') plotter.save_figs('dataset_plots.pdf') plotter.open_figs('dataset_plots.pdf')
def sim(param, env, controllers, initial_state, visualize): def run_sim(controller, initial_state): states = np.zeros((len(times), env.n)) actions = np.zeros((len(times) - 1, env.m)) states[0] = env.reset(initial_state) reward = 0 for step, time in enumerate(times[:-1]): state = states[step] if param.pomdp_on: observation = env.observe() else: observation = state action = controller.policy(observation) states[step + 1], r, done, _ = env.step(action) reward += r actions[step] = action.flatten() if done: break print('reward: ', reward) env.close() return states, actions, step # ------------------------------------------- # environment times = param.sim_times device = "cpu" if initial_state is None: initial_state = env.reset() print("Initial State: ", initial_state) # run sim SimResult = namedtuple('SimResult', ['states', 'actions', 'steps', 'name']) for name, controller in controllers.items(): print("Running simulation with " + name) if hasattr(controller, 'policy'): result = SimResult._make( run_sim(controller, initial_state) + (name, )) else: result = SimResult._make((controller.states, controller.actions, controller.steps, name)) sim_results = [] sim_results.append(result) if param.sim_render_on: env.render() # plot time varying states if param.single_agent_sim: for i in range(env.n): fig, ax = plotter.subplots() ax.set_title(env.states_name[i]) for result in sim_results: ax.plot(times[0:result.steps], result.states[0:result.steps, i], label=result.name) ax.legend() for i, name in enumerate(env.deduced_state_names): fig, ax = plotter.subplots() ax.set_title(name) for result in sim_results: deduce_states = np.empty( (result.steps, len(env.deduced_state_names))) for j in range(result.steps): deduce_states[j] = env.deduce_state(result.states[j]) ax.plot(times[0:result.steps], deduce_states[:, i], label=result.name) ax.legend() for i in range(env.m): fig, ax = plotter.subplots() ax.set_title(env.actions_name[i]) for result in sim_results: ax.plot(times[0:result.steps], result.actions[0:result.steps, i], label=result.name) ax.legend() elif param.multi_agent_sim: for i_config in range(1): #range(env.state_dim_per_agent): fig, ax = plotter.make_fig() ax.set_title(env.states_name[i_config]) for agent in env.agents: if env.good_nodes[agent.i]: color = 'blue' else: color = 'red' for result in sim_results: ax.plot( times[0:result.steps], result.states[0:result.steps, env.agent_idx_to_state_idx(agent.i) + i_config], label=result.name, color=color) ax.axhline(env.desired_ave, label='desired', color='green') # # plot state space # if param.multi_agent_sim: # fig,ax = plotter.make_fig() # for agent in env.agents: # ax.set_title('State Space') # for result in sim_results: # ax.plot( # result.states[0:result.steps,env.agent_idx_to_state_idx(agent.i)], # result.states[0:result.steps,env.agent_idx_to_state_idx(agent.i)+1], # label=result.name) # extract gains if param.il_controller_class in ['PID_wRef', 'PID']: controller = controllers['IL'] for result in sim_results: if result.name == 'IL': break kp, kd = util.extract_gains(controller, result.states[0:result.steps]) fig, ax = plotter.plot(times[1:result.steps], kp[0:result.steps, 0], title='Kp pos') fig, ax = plotter.plot(times[1:result.steps], kp[0:result.steps, 1], title='Kp theta') fig, ax = plotter.plot(times[1:result.steps], kd[0:result.steps, 0], title='Kd pos') fig, ax = plotter.plot(times[1:result.steps], kd[0:result.steps, 1], title='Kd theta') # extract reference trajectory if param.il_controller_class in ['PID_wRef', 'Ref']: controller = controllers['IL'] for result in sim_results: if result.name == 'IL': break ref_state = util.extract_ref_state(controller, result.states) for i in range(env.n): fig, ax = plotter.plot(times[1:result.steps + 1], ref_state[0:result.steps, i], title="ref " + env.states_name[i]) plotter.save_figs(param.plots_fn) plotter.open_figs(param.plots_fn) # visualize if visualize: # plotter.visualize(param, env, states_deeprl) env.visualize(sim_results[0].states[0:result.steps], 0.1)
def main(): param = Param() param.desired_env_ncell = 2000 param.xmin_thresh = None param.xmax_thresh = None param.ymin_thresh = None param.ymax_thresh = None param.update() env = CityMap(param) # init datasets if param.make_dataset_on: print(' making dataset...') train_dataset, test_dataset = datahandler.make_dataset(env) datahandler.write_dataset(env, train_dataset, test_dataset) print(' loading dataset...') datahandler.load_dataset(env) # env.init_map() # fig,ax=plotter.make_fig() # env.print_map(fig=fig,ax=ax) train_heatmap = env.heatmap(env.train_dataset) test_heatmap = env.heatmap(env.test_dataset) fig = plt.figure() gs = fig.add_gridspec(2,3) ax1 = fig.add_subplot(gs[0, 0]) ax2 = fig.add_subplot(gs[0, 1]) ax3 = fig.add_subplot(gs[0, 2]) ax4 = fig.add_subplot(gs[1, :]) plotter.plot_heatmap(env,train_heatmap,ax1) plotter.plot_heatmap(env,train_heatmap,ax2) plotter.plot_heatmap(env,test_heatmap,ax3) plotter.plot_cell_demand_over_time(env, ax4) ax2.set_ylim(bottom=41.85) ax2.set_ylim(top=42.) ax2.set_xlim(left=-87.7) ax2.set_xlim(right=-87.6) ax3.set_ylim(bottom=41.85) ax3.set_ylim(top=42.) ax3.set_xlim(left=-87.7) ax3.set_xlim(right=-87.6) ax1.axis("off") ax2.axis("off") ax3.axis("off") ax1.set_title('Chicago',fontsize=10) ax2.set_title('October 29, 2016',fontsize=7.5) ax3.set_title('October 30, 2016',fontsize=7.5) # ax1.get_yaxis().set_visible(False) # ax2.get_xaxis().set_visible(False) # ax2.get_yaxis().set_visible(False) print('saving and opening figs...') plotter.save_figs(param.plot_fn) plotter.open_figs(param.plot_fn)
def sim(param, env, controllers, initial_state, visualize): # environment times = param.sim_times device = "cpu" if initial_state is None: initial_state = env.reset() # run sim SimResult = namedtuple( 'SimResult', ['states', 'observations', 'actions', 'steps', 'name']) for name, controller in controllers.items(): print("Running simulation with " + name) print("Initial State: ", initial_state) if hasattr(controller, 'policy'): result = SimResult._make( run_sim(param, env, controller, initial_state) + (name, )) else: observations = [] result = SimResult._make( (controller.states, observations, controller.actions, controller.steps, name)) sim_results = [] sim_results.append(result) # plot state space if param.env_name in [ 'SingleIntegrator', 'SingleIntegratorVelSensing', 'DoubleIntegrator' ]: fig, ax = plotter.make_fig() ax.set_title('State Space') ax.set_aspect('equal') for o in env.obstacles: ax.add_patch( Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5)) for agent in env.agents: line = ax.plot( result.states[0:result.steps, env.agent_idx_to_state_idx(agent.i)], result.states[0:result.steps, env.agent_idx_to_state_idx(agent.i) + 1], alpha=0.5) color = line[0].get_color() # plot velocity vectors: X = [] Y = [] U = [] V = [] for k in np.arange(0, result.steps, 100): X.append( result.states[k, env.agent_idx_to_state_idx(agent.i)]) Y.append( result.states[k, env.agent_idx_to_state_idx(agent.i) + 1]) if param.env_name in [ 'SingleIntegrator', 'SingleIntegratorVelSensing' ]: # Singleintegrator: plot actions U.append(result.actions[k, 2 * agent.i + 0]) V.append(result.actions[k, 2 * agent.i + 1]) elif param.env_name in ['DoubleIntegrator']: # doubleintegrator: plot velocities U.append( result.states[k, env.agent_idx_to_state_idx(agent.i) + 2]) V.append( result.states[k, env.agent_idx_to_state_idx(agent.i) + 3]) ax.quiver(X, Y, U, V, angles='xy', scale_units='xy', scale=0.5, color=color, width=0.005) # num_obstacles = (result.observations.shape[1] - result.observations[0][0] - 5) / 2 # print(num_obstacles) # # print(result.observations) plotter.plot_circle( result.states[1, env.agent_idx_to_state_idx(agent.i)], result.states[1, env.agent_idx_to_state_idx(agent.i) + 1], param.r_agent, fig=fig, ax=ax, color=color) # plotter.plot_square(result.states[-1,env.agent_idx_to_state_idx(agent.i)], # result.states[-1,env.agent_idx_to_state_idx(agent.i)+1],param.r_agent,fig=fig,ax=ax,color=color) plotter.plot_square(agent.s_g[0], agent.s_g[1], param.r_agent, angle=45, fig=fig, ax=ax, color=color) # draw state for each time step robot = 1 if param.env_name in [ 'SingleIntegrator', 'SingleIntegratorVelSensing' ]: for step in np.arange(0, result.steps, 1000): fig, ax = plotter.make_fig() ax.set_title('State at t={} for robot={}'.format( times[step], robot)) ax.set_aspect('equal') # plot all obstacles for o in env.obstacles: ax.add_patch( Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5)) # plot overall trajectory line = ax.plot( result.states[0:result.steps, env.agent_idx_to_state_idx(robot)], result.states[0:result.steps, env.agent_idx_to_state_idx(robot) + 1], "--") color = line[0].get_color() # plot current position plotter.plot_circle( result.states[step, env.agent_idx_to_state_idx(robot)], result.states[step, env.agent_idx_to_state_idx(robot) + 1], param.r_agent, fig=fig, ax=ax, color=color) # plot current observation observation = result.observations[step][robot][0] num_neighbors = int(observation[0]) num_obstacles = int( (observation.shape[0] - 3 - 2 * num_neighbors) / 2) robot_pos = result.states[ step, env.agent_idx_to_state_idx(robot):env. agent_idx_to_state_idx(robot) + 2] idx = 3 for i in range(num_neighbors): pos = observation[idx:idx + 2] + robot_pos ax.add_patch( Circle(pos, 0.25, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 2 for i in range(num_obstacles): # pos = observation[idx : idx+2] + robot_pos - np.array([0.5,0.5]) # ax.add_patch(Rectangle(pos, 1.0, 1.0, facecolor='gray', edgecolor='red', alpha=0.5)) pos = observation[idx:idx + 2] + robot_pos ax.add_patch( Circle(pos, 0.5, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 2 # plot goal goal = observation[1:3] + robot_pos ax.add_patch( Rectangle(goal - np.array([0.2, 0.2]), 0.4, 0.4, alpha=0.5, color=color)) # # import matplotlib.pyplot as plt # # plt.savefig("test.svg") # # exit() elif param.env_name in ['DoubleIntegrator']: for step in np.arange(0, result.steps, 1000): fig, ax = plotter.make_fig() ax.set_title('State at t={} for robot={}'.format( times[step], robot)) ax.set_aspect('equal') # plot all obstacles for o in env.obstacles: ax.add_patch( Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5)) # plot overall trajectory line = ax.plot( result.states[0:result.steps, env.agent_idx_to_state_idx(robot)], result.states[0:result.steps, env.agent_idx_to_state_idx(robot) + 1], "--") color = line[0].get_color() # plot current position plotter.plot_circle( result.states[step, env.agent_idx_to_state_idx(robot)], result.states[step, env.agent_idx_to_state_idx(robot) + 1], param.r_agent, fig=fig, ax=ax, color=color) # plot current observation observation = result.observations[step][robot][0] num_neighbors = int(observation[0]) num_obstacles = int( (observation.shape[0] - 5 - 4 * num_neighbors) / 2) robot_pos = result.states[ step, env.agent_idx_to_state_idx(robot):env. agent_idx_to_state_idx(robot) + 2] X = [] Y = [] U = [] V = [] idx = 5 for i in range(num_neighbors): pos = observation[idx:idx + 2] + robot_pos X.append(pos[0]) Y.append(pos[1]) U.append(observation[idx + 2]) V.append(observation[idx + 3]) # print(np.linalg.norm(observation[idx+2:idx+4])) ax.add_patch( Circle(pos, param.r_agent, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 4 for i in range(num_obstacles): pos = observation[idx:idx + 2] + robot_pos - np.array( [0.5, 0.5]) ax.add_patch( Rectangle(pos, 1.0, 1.0, facecolor='gray', edgecolor='red', alpha=0.5)) # pos = observation[idx : idx+2] + robot_pos # ax.add_patch(Circle(pos, 0.5, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 2 # plot goal goal = observation[1:3] + robot_pos ax.add_patch( Rectangle(goal - np.array([0.2, 0.2]), 0.4, 0.4, alpha=0.5, color=color)) X.append(robot_pos[0]) Y.append(robot_pos[1]) U.append(observation[3]) V.append(observation[4]) # plot velocity vectors ax.quiver(X, Y, U, V, angles='xy', scale_units='xy', scale=0.5, color='red', width=0.005) elif param.env_name == 'Consensus' and param.sim_render_on: env.render() elif param.sim_render_on: env.render() # plot time varying states if param.single_agent_sim: for i in range(env.n): fig, ax = plotter.subplots() ax.set_title(env.states_name[i]) for result in sim_results: ax.plot(times[0:result.steps], result.states[0:result.steps, i], label=result.name) ax.legend() for i, name in enumerate(env.deduced_state_names): fig, ax = plotter.subplots() ax.set_title(name) for result in sim_results: deduce_states = np.empty( (result.steps, len(env.deduced_state_names))) for j in range(result.steps): deduce_states[j] = env.deduce_state(result.states[j]) ax.plot(times[0:result.steps], deduce_states[:, i], label=result.name) ax.legend() for i in range(env.m): fig, ax = plotter.subplots() ax.set_title(env.actions_name[i]) for result in sim_results: ax.plot(times[0:result.steps], result.actions[0:result.steps, i], label=result.name) ax.legend() elif param.env_name == 'Consensus': for i_config in range(1): #range(env.state_dim_per_agent): fig, ax = plotter.make_fig() # ax.set_title(env.states_name[i_config]) for agent in env.agents: if env.good_nodes[agent.i]: color = 'blue' else: color = 'red' for result in sim_results: ax.plot( times[0:result.steps], result.states[0:result.steps, env.agent_idx_to_state_idx(agent.i) + i_config], label=result.name, color=color) ax.axhline(env.desired_ave, label='desired', color='green') elif param.env_name in ['SingleIntegrator', 'DoubleIntegrator']: for i_config in range(env.state_dim_per_agent): fig, ax = plotter.make_fig() ax.set_title(env.states_name[i_config]) for agent in env.agents: for result in sim_results: ax.plot( times[1:result.steps], result.states[1:result.steps, env.agent_idx_to_state_idx(agent.i) + i_config], label=result.name) # plot time varying actions if param.env_name in ['SingleIntegrator', 'DoubleIntegrator']: for i_config in range(env.action_dim_per_agent): fig, ax = plotter.make_fig() ax.set_title(env.actions_name[i_config]) for agent in env.agents: for result in sim_results: ax.plot( times[1:result.steps], result.actions[1:result.steps, agent.i * env.action_dim_per_agent + i_config], label=result.name) # extract gains if name in ['PID_wRef', 'PID']: controller = controllers['IL'] for result in sim_results: if result.name == 'IL': break kp, kd = util.extract_gains(controller, result.states[0:result.steps]) fig, ax = plotter.plot(times[1:result.steps], kp[0:result.steps, 0], title='Kp pos') fig, ax = plotter.plot(times[1:result.steps], kp[0:result.steps, 1], title='Kp theta') fig, ax = plotter.plot(times[1:result.steps], kd[0:result.steps, 0], title='Kd pos') fig, ax = plotter.plot(times[1:result.steps], kd[0:result.steps, 1], title='Kd theta') # extract reference trajectory if name in ['PID_wRef', 'Ref']: controller = controllers['IL'] for result in sim_results: if result.name == 'IL': break ref_state = util.extract_ref_state(controller, result.states) for i in range(env.n): fig, ax = plotter.plot(times[1:result.steps + 1], ref_state[0:result.steps, i], title="ref " + env.states_name[i]) # extract belief topology if name in ['IL'] and param.env_name is 'Consensus': for result in sim_results: if result.name == 'IL': break fig, ax = plotter.make_fig() belief_topology = util.extract_belief_topology( controller, result.observations) label_on = True if param.n_agents * param.n_agents > 10: label_on = False for i_agent in range(param.n_agents): for j_agent in range(param.n_agents): if not i_agent == j_agent and env.good_nodes[i_agent]: if label_on: plotter.plot(times[0:result.steps + 1], belief_topology[:, i_agent, j_agent], fig=fig, ax=ax, label="K:{}{}".format( i_agent, j_agent)) else: plotter.plot(times[0:result.steps + 1], belief_topology[:, i_agent, j_agent], fig=fig, ax=ax) plotter.save_figs(param.plots_fn) plotter.open_figs(param.plots_fn) # visualize if visualize: # plotter.visualize(param, env, states_deeprl) env.visualize(sim_results[0].states[0:result.steps], 0.1)
def fn3(): pdf_path = os.path.join(os.getcwd(), param.get('fn_plots')) # remove if exists if os.path.exists(pdf_path): os.remove(pdf_path) np.random.seed(88) util.get_x0() util.get_xd() # baseline trajectory bX = [] bU = np.zeros((len(param.get('T')), param.get('m'))) # true perturbed trajectory X = [] Xdot = [] V = [] Vdot = [] # linearized perturbed trajectory tX = [] tXdot = [] tV = [] tVdot = [] # collect baseline x_curr = param.get('x0') for k, t in enumerate(param.get('T')): u_curr = util.list_to_vec(bU[k, :]) x_next = x_curr + param.get('dt') * dynamics.get_dxdt( x_curr, u_curr, t) bX.append(x_curr) x_curr = x_next bX = np.squeeze(np.asarray(bX)) # now run two trajectories and look at divergence eps_x0 = 0.0 * np.random.uniform(size=(param.get('n'), 1)) eps_u = 0.0 * np.random.uniform(size=(param.get('nt'), param.get('m'))) x_curr = param.get('x0') + eps_x0 tx_curr = param.get('x0') + eps_x0 scpU = bU + eps_u for k, t in enumerate(param.get('T')): # current control u_curr = np.reshape(np.transpose(scpU[k, :]), (param.get('m'), 1)) # base ub = util.list_to_vec(bU[k, :]) xb = util.list_to_vec(bX[k, :]) # true dxdt = dynamics.get_dxdt(x_curr, u_curr, t) x_next = x_curr + dxdt * param.get('dt') v = dynamics.get_V(x_curr, t) vdot = dynamics.get_LfV(x_curr, t) + np.matmul( dynamics.get_LgV(x_curr, t), u_curr) # approximate F_k, B_k, d_k = dynamics.get_linear_dynamics( xb, \ ub, t) R_k, w_k = dynamics.get_linear_lyapunov(xb, ub, t) tx_next = np.matmul( F_k, tx_curr) + \ np.matmul( B_k, u_curr) + d_k tv = np.matmul(R_k, tx_curr) + w_k X.append(x_curr) Xdot.append(dxdt) V.append(v) Vdot.append(vdot) tX.append(tx_curr) tV.append(tv) x_curr = x_next tx_curr = tx_next print('Timestep:' + str(k) + '/' + str(len(param.get('T')))) X = np.squeeze(np.asarray(X)) V = np.reshape(np.asarray(V), (len(V), 1)) Xdot = np.squeeze(np.asarray(Xdot)) tX = np.squeeze(np.asarray(tX)) tV = np.reshape(np.asarray(tV), (len(tV), 1)) tXdot = np.gradient(tX, param.get('dt'), axis=0) tVdot = np.gradient(tV, param.get('dt'), axis=0) print(np.shape(X)) print(np.shape(V)) print(np.shape(Xdot)) print(np.shape(tX)) print(np.shape(tV)) print(np.shape(tXdot)) print(np.shape(tVdot)) epa1 = np.linalg.norm(X[:, 0:2] - tX[:, 0:2], axis=1) eva1 = np.linalg.norm(X[:, 2:4] - tX[:, 2:4], axis=1) epa2 = np.linalg.norm(X[:, 4:6] - tX[:, 4:6], axis=1) eva2 = np.linalg.norm(X[:, 6:8] - tX[:, 6:8], axis=1) epb = np.linalg.norm(X[:, 8:10] - tX[:, 8:10], axis=1) evb = np.linalg.norm(X[:, 10:12] - tX[:, 10:12], axis=1) eXdot = np.linalg.norm(Xdot - tXdot, axis=1) eV = np.linalg.norm(V - tV, axis=1) eVdot = np.linalg.norm(Vdot - tVdot, axis=1) import matplotlib.pyplot as plt fig, ax = plt.subplots() plt.plot(epa1, eXdot, label='eXdot') plt.plot(epa1, eV, label='eV') ax.set_xscale('log') ax.set_yscale('log') ax.set_xlabel('epa') plt.legend() fig, ax = plt.subplots() plt.plot(eva1, eXdot, label='eXdot') plt.plot(eva1, eV, label='eV') ax.set_xscale('log') ax.set_yscale('log') ax.set_xlabel('eva') plt.legend() fig, ax = plt.subplots() plt.plot(epb, eXdot, label='eXdot') plt.plot(epb, eV, label='eV') # ax.set_xscale('log') ax.set_yscale('log') ax.set_xlabel('epb') plt.legend() fig, ax = plt.subplots() plt.plot(evb, eXdot, label='eXdot') plt.plot(evb, eV, label='eV') # ax.set_xscale('log') ax.set_yscale('log') ax.set_xlabel('evb') plt.legend() # plt.plot( eX, eVdot, label = 'eVdot') plotter.save_figs() subprocess.call(["xdg-open", pdf_path])
def fn2(): pdf_path = os.path.join(os.getcwd(), param.get('fn_plots')) # remove if exists if os.path.exists(pdf_path): os.remove(pdf_path) np.random.seed(88) util.get_x0() util.get_xd() # trjaectory to linearize about fX = [] fU = [] fV = [] # scp trajectory tX = [] tU = [] tV = [] # integrated trajectory with scp control scpV = [] # linearize about trajectory bV = [] # feedback linearizing baseline x_curr = param.get('x0') for k, t in enumerate(param.get('T')): u_curr = controller.get_fdbk_controller(x_curr, t) x_next = x_curr + param.get('dt') * dynamics.get_dxdt( x_curr, u_curr, t) v = dynamics.get_V(x_curr, t) fX.append(x_curr) fU.append(u_curr) fV.append(v) x_curr = x_next fX = np.squeeze(np.asarray(fX)) fU = np.asarray(fU) # scp scpX, scpU, bX, bU = controller.get_scp_clf_controller() # integrate tx_curr = param.get('x0') x_curr = param.get('x0') for k, t in enumerate(param.get('T')): ub = util.list_to_vec(bU[k, :]) xb = util.list_to_vec(bX[k, :]) u_curr = np.transpose(util.list_to_vec(scpU[k, :])) F_k, B_k, d_k = dynamics.get_linear_dynamics(xb, ub, t) R_k, w_k = dynamics.get_linear_lyapunov(xb, ub, t) tx_next = np.matmul(F_k, tx_curr) + np.matmul(B_k, u_curr) + d_k x_next = x_curr + param.get('dt') * dynamics.get_dxdt( x_curr, u_curr, t) tv = np.matmul(R_k, tx_curr) + w_k scpv = dynamics.get_V(x_curr, t) tX.append(tx_curr) tV.append(tv) scpV.append(scpv) bV.append(dynamics.get_V(xb, t)) tx_curr = tx_next x_curr = x_next tX = np.squeeze(np.asarray(tX)) bV = np.asarray(bV) plotter.plot_SS(fX, param.get('T'), title='Fdbk Linearize SS') plotter.plot_V(fV, param.get('T'), title='Fdbk Linearize V') plotter.plot_U(fU, param.get('T'), title='Fdbk Linearize U') plotter.plot_SS(bX, param.get('T'), title='What SCP is linearizing about: SS') plotter.plot_V(bV, param.get('T'), title='What SCP is linearizing about: V') # plotter.plot_U(bU, param.get('T'), title = 'What SCP is linearizing about: U') plotter.plot_SS(tX, param.get('T'), title='What SCP thinks its doing: SS') plotter.plot_V(tV, param.get('T'), title='What SCP thinks its doing: V') # plotter.plot_U(tU, param.get('T'), title = 'What SCP thinks its doing: U') plotter.plot_SS(scpX, param.get('T'), title='What SCP is actually doing: SS') plotter.plot_V(scpV, param.get('T'), title='What SCP is actually doing: V') # plotter.plot_U(scpU, param.get('T'), title = 'What SCP is actually doing: U') plotter.save_figs() subprocess.call(["xdg-open", pdf_path])
plotter.plot_totalreward_vs_number_of_agents(sim_results) else: # micro sim # - plot sim if default_param.plot_sim_over_time: for sim_result in sim_results: controller_name = sim_result["controller_name"] times = sim_result["times"] plotter.sim_plot_over_time(controller_name,sim_result,times) else: for sim_result in sim_results: for timestep,time in enumerate(sim_result["times"]): controller_name = sim_result["controller_name"] plotter.render(controller_name,sim_result,timestep) # break # - plot reward plotter.plot_cumulative_reward(sim_results) # - plot q value estimation for controller_name in default_param.controller_names: if 'bellman' in controller_name: plotter.plot_q_error(sim_results) break print('saving and opening figs...') plotter.save_figs(default_param.plot_fn) plotter.open_figs(default_param.plot_fn)
def sim(param, env, controllers, initial_state, visualize): # environment times = param.sim_times device = "cpu" if initial_state is None: initial_state = env.reset() # run sim SimResult = namedtuple( 'SimResult', ['states', 'observations', 'actions', 'steps', 'name']) for name, controller in controllers.items(): print("Running simulation with " + name) print("Initial State: ", initial_state) if hasattr(controller, 'policy'): result = SimResult._make( run_sim(param, env, controller, initial_state) + (name, )) else: observations = [] result = SimResult._make( (controller.states, observations, controller.actions, controller.steps, name)) sim_results = [] sim_results.append(result) # plot state space if param.env_name in [ 'SingleIntegrator', 'SingleIntegratorVelSensing', 'DoubleIntegrator' ]: fig, ax = plotter.make_fig() ax.set_title('State Space') ax.set_aspect('equal') for o in env.obstacles: ax.add_patch( Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5)) for agent in env.agents: line = ax.plot( result.states[0:result.steps, env.agent_idx_to_state_idx(agent.i)], result.states[0:result.steps, env.agent_idx_to_state_idx(agent.i) + 1], alpha=0.5) color = line[0].get_color() # plot velocity vectors: X = [] Y = [] U = [] V = [] for k in np.arange(0, result.steps, 100): X.append( result.states[k, env.agent_idx_to_state_idx(agent.i)]) Y.append( result.states[k, env.agent_idx_to_state_idx(agent.i) + 1]) if param.env_name in [ 'SingleIntegrator', 'SingleIntegratorVelSensing' ]: # Singleintegrator: plot actions U.append(result.actions[k, 2 * agent.i + 0]) V.append(result.actions[k, 2 * agent.i + 1]) elif param.env_name in ['DoubleIntegrator']: # doubleintegrator: plot velocities U.append( result.states[k, env.agent_idx_to_state_idx(agent.i) + 2]) V.append( result.states[k, env.agent_idx_to_state_idx(agent.i) + 3]) ax.quiver(X, Y, U, V, angles='xy', scale_units='xy', scale=0.5, color=color, width=0.005) plotter.plot_circle( result.states[1, env.agent_idx_to_state_idx(agent.i)], result.states[1, env.agent_idx_to_state_idx(agent.i) + 1], param.r_agent, fig=fig, ax=ax, color=color) plotter.plot_square(agent.s_g[0], agent.s_g[1], param.r_agent, angle=45, fig=fig, ax=ax, color=color) # draw state for each time step robot = 0 if param.env_name in ['SingleIntegrator']: for step in np.arange(0, result.steps, 1000): fig, ax = plotter.make_fig() ax.set_title('State at t={} for robot={}'.format( times[step], robot)) ax.set_aspect('equal') # plot all obstacles for o in env.obstacles: ax.add_patch( Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5)) # plot overall trajectory line = ax.plot( result.states[0:result.steps, env.agent_idx_to_state_idx(robot)], result.states[0:result.steps, env.agent_idx_to_state_idx(robot) + 1], "--") color = line[0].get_color() # plot current position plotter.plot_circle( result.states[step, env.agent_idx_to_state_idx(robot)], result.states[step, env.agent_idx_to_state_idx(robot) + 1], param.r_agent, fig=fig, ax=ax, color=color) # plot current observation observation = result.observations[step][robot][0] num_neighbors = int(observation[0]) num_obstacles = int( (observation.shape[0] - 3 - 2 * num_neighbors) / 2) robot_pos = result.states[ step, env.agent_idx_to_state_idx(robot):env. agent_idx_to_state_idx(robot) + 2] idx = 3 for i in range(num_neighbors): pos = observation[idx:idx + 2] + robot_pos ax.add_patch( Circle(pos, 0.25, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 2 for i in range(num_obstacles): # pos = observation[idx : idx+2] + robot_pos - np.array([0.5,0.5]) # ax.add_patch(Rectangle(pos, 1.0, 1.0, facecolor='gray', edgecolor='red', alpha=0.5)) pos = observation[idx:idx + 2] + robot_pos ax.add_patch( Circle(pos, 0.5, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 2 # plot goal goal = observation[1:3] + robot_pos ax.add_patch( Rectangle(goal - np.array([0.2, 0.2]), 0.4, 0.4, alpha=0.5, color=color)) # # import matplotlib.pyplot as plt # # plt.savefig("test.svg") # # exit() elif param.env_name in ['DoubleIntegrator']: for step in np.arange(0, result.steps, 1000): fig, ax = plotter.make_fig() ax.set_title('State at t={} for robot={}'.format( times[step], robot)) ax.set_aspect('equal') # plot all obstacles for o in env.obstacles: ax.add_patch( Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5)) # plot overall trajectory line = ax.plot( result.states[0:result.steps, env.agent_idx_to_state_idx(robot)], result.states[0:result.steps, env.agent_idx_to_state_idx(robot) + 1], "--") color = line[0].get_color() # plot current position plotter.plot_circle( result.states[step, env.agent_idx_to_state_idx(robot)], result.states[step, env.agent_idx_to_state_idx(robot) + 1], param.r_agent, fig=fig, ax=ax, color=color) # plot current observation observation = result.observations[step][robot][0] num_neighbors = int(observation[0]) num_obstacles = int( (observation.shape[0] - 5 - 4 * num_neighbors) / 2) robot_pos = result.states[ step, env.agent_idx_to_state_idx(robot):env. agent_idx_to_state_idx(robot) + 2] X = [] Y = [] U = [] V = [] idx = 5 for i in range(num_neighbors): pos = observation[idx:idx + 2] + robot_pos X.append(pos[0]) Y.append(pos[1]) U.append(observation[idx + 2]) V.append(observation[idx + 3]) # print(np.linalg.norm(observation[idx+2:idx+4])) ax.add_patch( Circle(pos, param.r_agent, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 4 for i in range(num_obstacles): pos = observation[idx:idx + 2] + robot_pos - np.array( [0.5, 0.5]) ax.add_patch( Rectangle(pos, 1.0, 1.0, facecolor='gray', edgecolor='red', alpha=0.5)) # pos = observation[idx : idx+2] + robot_pos # ax.add_patch(Circle(pos, 0.5, facecolor='gray', edgecolor='red', alpha=0.5)) idx += 2 # plot goal goal = observation[1:3] + robot_pos ax.add_patch( Rectangle(goal - np.array([0.2, 0.2]), 0.4, 0.4, alpha=0.5, color=color)) X.append(robot_pos[0]) Y.append(robot_pos[1]) U.append(observation[3]) V.append(observation[4]) # plot velocity vectors ax.quiver(X, Y, U, V, angles='xy', scale_units='xy', scale=0.5, color='red', width=0.005) # plot time varying states if param.env_name in ['SingleIntegrator', 'DoubleIntegrator']: for i_config in range(env.state_dim_per_agent): fig, ax = plotter.make_fig() ax.set_title(env.states_name[i_config]) for agent in env.agents: for result in sim_results: ax.plot( times[1:result.steps], result.states[1:result.steps, env.agent_idx_to_state_idx(agent.i) + i_config], label=result.name) # plot time varying actions if param.env_name in ['SingleIntegrator', 'DoubleIntegrator']: for i_config in range(env.action_dim_per_agent): fig, ax = plotter.make_fig() ax.set_title(env.actions_name[i_config]) for agent in env.agents: for result in sim_results: ax.plot( times[1:result.steps], result.actions[1:result.steps, agent.i * env.action_dim_per_agent + i_config], label=result.name) # if i_config == 5: ax.set_yscale('log') plotter.save_figs(param.plots_fn) plotter.open_figs(param.plots_fn) # visualize if visualize: # plotter.visualize(param, env, states_deeprl) env.visualize(sim_results[0].states[0:result.steps], 0.1)