def main(): env = make_env(config.env, config.seed, -1) print('State dimension:', env.observation_space.shape[0]) print('Action dimension:', env.action_space.shape[0]) print('Action limited:', env.action_space.low, "~", env.action_space.high) memorySampler = Sampler(config) safety_layer = SafetyLayer(args, env.observation_space.shape[0], env.action_space.shape[0]) agent = Agent(env.observation_space.shape[0], env.action_space.shape[0], config) # 载入网络参数 if args.load is not None: pretrained_model_path = os.path.join(proj_path + '/save_model/' + str(args.load)) pretrained_model = torch.load(pretrained_model_path, map_location=device) agent.policy.load_state_dict(pretrained_model) finished_steps = 0 finished_episodes = 0 best_target = 0 save_index = 0 print('____________________train safety layer_________________________') for i in range(args.safety_layer_epoch): for _ in range(args.sample_scale): memorySampler.sample(None) safety_layer.train(memorySampler.buffer) memorySampler.buffer.clear() for _ in range(args.sample_scale): memorySampler.sample(None) safety_layer.evaluate(memorySampler.buffer) memorySampler.buffer.clear() agent = Agent(env.observation_space.shape[0], env.action_space.shape[0], config) print('____________________train agent_________________________') for i in range(config.iterations): print('________________________iter:', i, '_____________________________') '''sample stage''' start_time = time.time() samples = memorySampler.sample(agent.policy) sample_time = time.time() - start_time '''train stage''' start_time = time.time() agent.train_model(samples, iter_index=i) train_time = time.time() - start_time show_agent(writer, config.agent['name'], agent, i) if config.agent['name'] in ['cppo', 'ppo', 'sac', 'safe_sac']: agent.print_loss() print('TRAIN:') print('reward: mean', np.mean(memorySampler.result_dict['reward']), 'max', np.max(memorySampler.result_dict['reward']), 'min', np.min(memorySampler.result_dict['reward']), 'std', np.std(memorySampler.result_dict['reward'])) print('cost: mean', np.mean(memorySampler.result_dict['cost']), 'max', np.max(memorySampler.result_dict['cost']), 'min', np.min(memorySampler.result_dict['cost']), 'std', np.std(memorySampler.result_dict['cost'])) print('done rate:', np.mean(memorySampler.result_dict['done']), 'collision rate:', np.mean(memorySampler.result_dict['collision'])) '''for saving model''' finished_steps += len(memorySampler.buffer) finished_episodes += memorySampler.buffer.num_episode target = np.mean(memorySampler.result_dict['reward']) / np.mean( memorySampler.result_dict['time']) # '''evel stage''' start_time = time.time() memorySampler.evel(agent.policy) evel_time = time.time() - start_time print('EVELUATION:') print('reward: mean', np.mean(memorySampler.result_dict['reward']), 'max', np.max(memorySampler.result_dict['reward']), 'std', np.std(memorySampler.result_dict['reward'])) print('cost: mean', np.mean(memorySampler.result_dict['cost']), 'max', np.max(memorySampler.result_dict['cost']), 'std', np.std(memorySampler.result_dict['cost'])) print('done rate:', np.mean(memorySampler.result_dict['done']), 'collision rate:', np.mean(memorySampler.result_dict['collision'])) # print('USE TIME AND SAMPLES:') # print('sample time:', sample_time, 'train time:', # train_time, 'steps:', memorySampler.buffer.sample_size, # 'total steps:', finished_steps, 'episodes:', # memorySampler.buffer.num_episode, 'total episodes:', finished_episodes) print('USE TIME AND SAMPLES:') print('sample time:', sample_time, 'train time:', train_time, 'evel time:', evel_time, 'steps:', memorySampler.buffer.sample_size, 'total steps:', finished_steps, 'episodes:', memorySampler.buffer.num_episode, 'total episodes:', finished_episodes) # Save the trained model condition = 2 # 保存满足条件的模型 if target > condition: ckpt_path = os.path.join(save_path \ + '/i_' + str(i) \ + 'seed_' + str(args.seed) \ + '_st_' + str(finished_steps) \ + '_ep_' + str(finished_episodes) \ + '_tar_' + str(round(target, 2)) + '.pt') torch.save(agent.policy.state_dict(), ckpt_path) if target > best_target: if save_index > 0: os.remove(save_path + '/best_model_i_' + str(save_index) + '.pt') best_ckpt_path = os.path.join(save_path \ + '/best_model_i_' + str(i) + '.pt') save_index = i torch.save(agent.policy.state_dict(), best_ckpt_path) best_target = target memorySampler.close()
# else: # policys[args.algo] = SACActor(env.observation_space.shape[0], env.action_space.shape[0], # hidden_sizes=(64, 64), ) # # if not args.algo == 'human': # policys[args.algo].load_state_dict(torch.load(args.load_path, map_location=device)) def slope(distances): distances = np.array(distances) return (distances[1:] - distances[:-1]) env = make_env( config.env, config.seed, 'test', ) model = { '(a) CPPO-PID-AEB': [ 'cppo', '/home/user/repos/CAV/save_model/expsumo_env-v6_2021-01-07/cppo_15-21-35/best_model_i_269.pt' ], '(b) CPPO-PID': [ 'cppo', '/home/user/repos/CAV/save_model/expsumo_env-v6_2021-01-08/cppo_14-05-50/best_model_i_153.pt' ], '(c) PPO-safe-AEB': [ 'ppo2', '/home/user/repos/CAV/save_model/expsumo_env-v6_2021-01-07/ppo2_15-21-43/best_model_i_252.pt' ],
def evaluation(algo_name, ax): np.random.seed(config.seed) torch.manual_seed(config.seed) random.seed(config.seed) torch.cuda.manual_seed(config.seed) if algo_name == '(g) Human': config.env['human_model'] = True env = make_env( config.env, config.seed, 'test', ) # fig, axs = plt.subplots(figsize=(3, 1)) # segments = np.full((args.episodes, config.env['max_step'], 2), np.nan) # dydx_s = np.full((args.episodes, config.env['max_step']), np.nan) segments = [] dydx_s = [] for i in range(args.episodes): results = {} results['time'] = 0 results['speed'] = 0 results['collision'] = 0 results['success'] = 0 results['fuel'] = 0 print('-----------------', i, '---------------------') # env.set_startstep(i) obs = env.reset() done = False speed = 0 step = 0 distances = [] if algo_name == '(g) Human': while not done: step += 1 distances.append( env.connect.vehicle.getDistance(env.curr_veh_id)) obs_next, reward, done, info = env.step() speed += info['speed'] results['collision'] += info['crash'] results['success'] += info['success'] results['fuel'] += info['fuel'] # print(info['speed']) else: while not done: step += 1 distances.append( env.connect.vehicle.getDistance(env.curr_veh_id)) obs = torch.Tensor(obs).unsqueeze(0) action = policys[algo_name](obs) action = action.detach().cpu().numpy()[0] obs_next, reward, done, info = env.step(action) obs = obs_next speed += info['speed'] results['collision'] += info['crash'] results['success'] += info['success'] results['fuel'] += info['fuel'] if config.env['gui']: time.sleep(0.005) results['time'] += 0.2 * step results['speed'] += speed / step dydx = slope(distances) # distances = distances[:-1] x = np.arange(0 + i * 40, len(distances) + i * 40, 1) y = np.array(distances) dydx_s.append(dydx) segments.append(np.array([x, y]).T.reshape(-1, 2)) # dydx_s[i, :x.shape[0]] = dydx # segments[i, :x.shape[0], :] = np.array([x, y]).T.reshape(-1, 2) print(results) # dydx_s = dydx_s.T.reshape(-1) # dydx_s = dydx_s[~np.isnan(dydx_s)] # dydx_s=dydx_s.reshape(-1) norm = plt.Normalize(0, 4) for i in range(args.episodes): segment = np.concatenate([[segments[i][:-1, :]], [segments[i][1:, :]]], axis=0) segment = segment.transpose(1, 0, 2) lc = LineCollection(segment, cmap='viridis', norm=norm) lc.set_array(dydx_s[i]) lc.set_linewidth(0.7) line = ax.add_collection(lc) # lc = LineCollection(segments, cmap='viridis', norm=norm) cb = fig.colorbar( line, ax=ax, ) cb.set_label('speed(m/0.2s)') x = np.arange(0, len(distances) + i * 40, 1) y = np.ones(len(distances) + i * 40) plt.fill_between( x, 95 * y, 110 * y, facecolor="gray", # The fill color # color='gray', # The outline color alpha=0.2, label="Crosswalk") plt.fill_between( x, 140 * y, 145 * y, facecolor="green", # The fill color # color='green', # The outline color alpha=0.2, label="End") # ax.fill_between(x, 95 * y, 110 * y, # facecolor="gray", # The fill color # # color='gray', # The outline color # alpha=0.2, ) # ax.fill_between(x, 140 * y, 145 * y, # facecolor="green", # The fill color # # color='green', # The outline color # alpha=0.2, ) ax.set_xlabel('Time(s)') ax.set_ylabel('Distance(m)') plt.legend(loc='lower right') # ax.set_title(algo_name) env.close()
def evaluation(): plt.style.use(['science', 'ieee']) for i in range(args.episodes): print('-----------------', i, '---------------------') fig, axes = plt.subplots(2, 1, sharex='col', figsize=(5, 5)) for key in policys.keys(): if key == 'Human': config.env['human_model'] = True else: config.env['human_model'] = False env = make_env(config.env, seed=i, env_index='test') np.random.seed(i) torch.manual_seed(i) random.seed(i) torch.cuda.manual_seed(i) print('-----------------', key, '---------------------') # env.set_startstep(i) for a in range(1): obs = env.reset() done = False step = 0 distances = [] speeds = [] actions = [] if key == 'Human': while not done: step += 1 distances.append( env.connect.vehicle.getDistance(env.curr_veh_id)) speeds.append(env.curr_speed) obs_next, reward, done, info = env.step() if config.env['gui']: time.sleep(0.005) else: while not done: step += 1 distances.append( env.connect.vehicle.getDistance(env.curr_veh_id)) speeds.append(env.curr_speed) obs = torch.Tensor(obs).unsqueeze(0) action = policys[key](obs) action = action.detach().cpu().numpy()[0] obs_next, reward, done, info = env.step(action) obs = obs_next if config.env['gui']: time.sleep(0.005) env.close() x = np.arange(0, len(distances), 1) y = np.array(distances) y_1 = np.array(speeds) # y_2= np.array(actions) axes[0].plot(x, y, label=key) axes[1].plot(x, y_1, label=key) # axes[2].plot(x, y_2, label=key) y = np.ones(150) axes[0].fill_between( np.arange(0, 150, 1), 95 * y, 110 * y, facecolor="gray", # The fill color # color='gray', # The outline color alpha=0.2, label="Crosswalk") axes[0].fill_between( np.arange(0, 150, 1), 140 * y, 145 * y, facecolor="green", # The fill color # color='green', # The outline color alpha=0.2, label="End") plt.legend(loc='lower right') axes[0].set_ylabel('Distance(m)') axes[1].set_ylabel('Velocity(m/s)') # axes[2].set_ylabel(r'Acceleration($m/s^2$)') plt.xlabel('Time(s)') # plt.ylabel('Distance(m)') plt.legend(loc=8, bbox_to_anchor=(0.4, -0.5), ncol=3) plt.show()