def _play_n_game(model, task: str, n_games: int, display=False, record=False): env = model.env_dict[task] if record: env = VecVideoRecorder(env, './data/videos/', record_video_trigger=lambda x: x == 0, video_length=10_000, name_prefix="trained-agent-{}".format(task)) timesteps = 0 sum_reward = 0 for i in range(n_games): obs = env.reset() done = None state = None while not done: action, state = model.predict(task, obs, state, done) obs, reward, done, info = env.step(action) timesteps += 1 sum_reward += reward if display is True: env.render() time.sleep(0.005) sum_reward = int(sum_reward / n_games) if sum_reward == 0: # harmonic mean needs greater than zero elements sum_reward = 0.1 timesteps = int(timesteps / n_games) env.close() return sum_reward, timesteps
def func_run(env, logger, lr, action_noise, file): expDir = '/home/shivanik/lab/pointExp/state/' num_objs = 1 verbose = 1 name = 'sac_%d_0.5' % num_objs nIter = 5e7 save_video_length = 200 save_video_interval = 1000000 env = VecVideoRecorder( env, osp.join(logger, "videos"), record_video_trigger=lambda x: x % save_video_interval == 0, video_length=save_video_length) model = SAC( MlpPolicy, env, verbose=verbose, tensorboard_log=logger, learning_rate=lr, action_noise=action_noise, ) model.learn(total_timesteps=int(nIter), log_interval=100) exp_name = expDir + "/%s/%s_%s" % (name, np.format_float_scientific(nIter), np.format_float_scientific(lr)) model.save(exp_name) file.write(exp_name + '\n') env.close() return True
def run_experiment(verbose, tensorboard_log, learning_rate): pdb.set_trace() env = make_vec_env( 'PointMassDense-%d-v1' % num_objs, 1, wrapper_class=FlattenDictWrapper, wrapper_env_kwargs=['observation', 'achieved_goal', 'desired_goal']) env = VecVideoRecorder( env, osp.join(logger, "videos"), record_video_trigger=lambda x: x % save_video_interval == 0, video_length=save_video_length) n_actions = env.action_space.shape[-1] stddev = 0.2 action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)) model = SAC( MlpPolicy, env, verbose=verbose, tensorboard_log=logger, learning_rate=learning_rate, action_noise=action_noise, ) model.learn(total_timesteps=int(nIter), log_interval=100) model.save(expDir + "/%s/%s_%s" % (name, np.format_float_scientific(nIter), np.format_float_scientific(learning_rate))) env.close()
def _load(model_name): model = PPO2.load(model_name) env = make_vec_env('PointMassDense-%d-v1' %num_objs, 1, wrapper_class = FlattenDictWrapper, wrapper_env_kwargs =['observation', 'achieved_goal', 'desired_goal']) env = VecVideoRecorder(env, osp.join(logger, "videos_3"), record_video_trigger=lambda x: x % save_video_interval == 0, video_length=save_video_length) model.set_env(env) model.learn(total_timesteps=int(nIter), log_interval=100) # model.save(exp_name) model.save(model_name + "_new") env.close()
def train(): set_gpu() expDir = '/home/shivanik/lab/pointExp/state/' num_objs = 1 verbose = 1 name = 'sac_%d_0.5' % num_objs nIter = 1e8 save_video_length = 200 save_video_interval = 1000000 file = open('sac_done.txt', 'w+') env = make_vec_env( 'PointMassDense-%d-v1' % num_objs, 1, wrapper_class=FlattenDictWrapper, wrapper_env_kwargs=['observation', 'achieved_goal', 'desired_goal']) n_actions = env.action_space.shape[-1] stddev = 0.2 pool = multiprocessing.Pool(processes=4) for lr in [1e-5]: #, 5e-4, 1e-5 logger = osp.join( expDir, name, 'logs%s_%s' % (np.format_float_scientific(nIter), np.format_float_scientific(lr))) env = VecVideoRecorder( env, osp.join(logger, "videos"), record_video_trigger=lambda x: x % save_video_interval == 0, video_length=save_video_length) action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)) # boo = pool.apply_async(func_run, args=(env, logger, lr, action_noise, file)) model = SAC( MlpPolicy, env, verbose=verbose, tensorboard_log=logger, learning_rate=lr, action_noise=action_noise, ) model.learn(total_timesteps=int(nIter), log_interval=100) exp_name = expDir + "/%s/%s_%s" % (name, np.format_float_scientific(nIter), np.format_float_scientific(lr)) model.save(exp_name) file.write(exp_name + '\n') env.close() file.close() pool.close() pool.join()
def record(exp): model = SAC.load(exp) env = make_vec_env( 'PointMassDense-%d-v1' % num_objs, 1, wrapper_class=FlattenDictWrapper, wrapper_env_kwargs=['observation', 'achieved_goal', 'desired_goal']) env = VecVideoRecorder( env, osp.join(logger, "videos_2"), record_video_trigger=lambda x: x % save_video_interval == 0, video_length=save_video_length) model.set_env(env) model.learn(total_timesteps=2000, log_interval=100) # model.save(expDir + "/%s/%d" %(name, nIter)) env.close()
model = ALGOS[algo].load(model_path) obs = env.reset() # Note: apparently it renders by default env = VecVideoRecorder(env, video_folder, record_video_trigger=lambda x: x == 0, video_length=video_length, name_prefix="{}-{}".format(algo, env_id)) env.reset() for _ in range(video_length + 1): # action = [env.action_space.sample()] action, _ = model.predict(obs, deterministic=deterministic) if isinstance(env.action_space, gym.spaces.Box): action = np.clip(action, env.action_space.low, env.action_space.high) obs, _, _, _ = env.step(action) # Workaround for https://github.com/openai/gym/issues/893 if n_envs == 1 and 'Bullet' not in env_id and not is_atari: env = env.venv # DummyVecEnv while isinstance(env, VecNormalize) or isinstance(env, VecFrameStack): env = env.venv env.envs[0].env.close() else: # SubprocVecEnv env.close()
def main(args): envconfig_string = args.envconfig custom_envconfig = _preprocess_custom_envconfig( args.envconfig) if args.envconfig is not None else {} env_id = 'gym_auv:' + args.env env_name = env_id.split(':')[-1] if ':' in env_id else env_id envconfig = gym_auv.SCENARIOS[env_name][ 'config'] if env_name in gym_auv.SCENARIOS else {} envconfig.update(custom_envconfig) NUM_CPU = multiprocessing.cpu_count() EXPERIMENT_ID = str(int(time())) + args.algo.lower() model = { 'ppo': PPO2, 'ddpg': DDPG, 'td3': TD3, 'a2c': A2C, 'acer': ACER, 'acktr': ACKTR, 'sac': SAC, 'trpo': TRPO }[args.algo.lower()] if args.mode == 'play': agent = model.load(args.agent) if args.agent is not None else None envconfig_play = envconfig.copy() envconfig_play['show_indicators'] = True #envconfig_play['autocamera3d'] = False env = create_env(env_id, envconfig_play, test_mode=True, render_mode=args.render, pilot=args.pilot, verbose=True) print('Created environment instance') if args.scenario: env.load(args.scenario) vec_env = DummyVecEnv([lambda: env]) recorded_env = VecVideoRecorder( vec_env, args.video_dir, record_video_trigger=lambda x: x == 0, video_length=args.recording_length, name_prefix=(args.env if args.video_name == 'auto' else args.video_name)) print(args.video_dir, args.video_name) play_scenario(env, recorded_env, args, agent=agent) recorded_env.env.close() elif (args.mode == 'enjoy'): agent = model.load(args.agent) figure_folder = os.path.join(DIR_PATH, 'logs', 'enjoys', args.env, EXPERIMENT_ID) os.makedirs(figure_folder, exist_ok=True) scenario_folder = os.path.join(figure_folder, 'scenarios') os.makedirs(scenario_folder, exist_ok=True) video_folder = os.path.join(DIR_PATH, 'logs', 'videos', args.env, EXPERIMENT_ID) os.makedirs(video_folder, exist_ok=True) env = create_env(env_id, envconfig, test_mode=True, render_mode=args.render, pilot=args.pilot) if args.scenario: env.load(args.scenario) vec_env = DummyVecEnv([lambda: env]) recorded_env = VecVideoRecorder( vec_env, video_folder, record_video_trigger=lambda x: x == 0, video_length=args.recording_length, name_prefix=(args.env if args.video_name == 'auto' else args.video_name)) obs = recorded_env.reset() state = None t_steps = 0 ep_number = 1 done = [False for _ in range(vec_env.num_envs)] for _ in range(args.recording_length): if args.recurrent: action, _states = agent.predict( observation=obs, state=state, mask=done, deterministic=not args.stochastic) state = _states else: action, _states = agent.predict( obs, deterministic=not args.stochastic) obs, reward, done, info = recorded_env.step(action) recorded_env.render() t_steps += 1 if t_steps % 800 == 0 or done: if not done: env.save_latest_episode(save_history=False) gym_auv.reporting.plot_trajectory( env, fig_dir=scenario_folder, fig_prefix=(args.env + '_ep{}_step{}'.format(ep_number, t_steps))) gym_auv.reporting.plot_trajectory( env, fig_dir=scenario_folder, fig_prefix=( args.env + '_ep{}_step{}_local'.format(ep_number, t_steps)), local=True) if done: ep_number += 1 recorded_env.close() elif (args.mode == 'train'): figure_folder = os.path.join(DIR_PATH, 'logs', 'figures', args.env, EXPERIMENT_ID) os.makedirs(figure_folder, exist_ok=True) scenario_folder = os.path.join(figure_folder, 'scenarios') os.makedirs(scenario_folder, exist_ok=True) video_folder = os.path.join(DIR_PATH, 'logs', 'videos', args.env, EXPERIMENT_ID) recording_length = 8000 os.makedirs(video_folder, exist_ok=True) agent_folder = os.path.join(DIR_PATH, 'logs', 'agents', args.env, EXPERIMENT_ID) os.makedirs(agent_folder, exist_ok=True) tensorboard_log = os.path.join(DIR_PATH, 'logs', 'tensorboard', args.env, EXPERIMENT_ID) tensorboard_port = 6006 if (args.nomp or model == DDPG or model == TD3 or model == SAC or model == TRPO): num_cpu = 1 vec_env = DummyVecEnv( [lambda: create_env(env_id, envconfig, pilot=args.pilot)]) else: num_cpu = NUM_CPU vec_env = SubprocVecEnv([ make_mp_env(env_id, i, envconfig, pilot=args.pilot) for i in range(num_cpu) ]) if (args.agent is not None): agent = model.load(args.agent) agent.set_env(vec_env) else: if (model == PPO2): if args.recurrent: hyperparams = { # 'n_steps': 1024, # 'nminibatches': 32, # 'lam': 0.95, # 'gamma': 0.99, # 'noptepochs': 10, # 'ent_coef': 0.0, # 'learning_rate': 0.0003, # 'cliprange': 0.2, 'n_steps': 1024, 'nminibatches': 1, 'lam': 0.98, 'gamma': 0.999, 'noptepochs': 4, 'ent_coef': 0.01, 'learning_rate': 2e-3, } class CustomLSTMPolicy(MlpLstmPolicy): def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, n_lstm=256, reuse=False, **_kwargs): super().__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, n_lstm, reuse, net_arch=[ 256, 256, 'lstm', dict(vf=[64], pi=[64]) ], **_kwargs) agent = PPO2(CustomLSTMPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log, **hyperparams) else: hyperparams = { # 'n_steps': 1024, # 'nminibatches': 32, # 'lam': 0.95, # 'gamma': 0.99, # 'noptepochs': 10, # 'ent_coef': 0.0, # 'learning_rate': 0.0003, # 'cliprange': 0.2, 'n_steps': 1024, 'nminibatches': 32, 'lam': 0.98, 'gamma': 0.999, 'noptepochs': 4, 'ent_coef': 0.01, 'learning_rate': 2e-4, } #policy_kwargs = dict(act_fun=tf.nn.tanh, net_arch=[64, 64, 64]) #policy_kwargs = dict(net_arch=[64, 64, 64]) layers = [256, 128, 64] #layers = [64, 64] policy_kwargs = dict(net_arch=[dict(vf=layers, pi=layers)]) agent = PPO2(MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log, **hyperparams, policy_kwargs=policy_kwargs) #dataset = ExpertDataset(expert_path='gail_expert.npz', traj_limitation=1, batch_size=128) #print('Pretraining {} agent on "{}"'.format(args.algo.upper(), env_id)) #agent.pretrain(dataset, n_epochs=1000) #print('Done pretraining {} agent on "{}"'.format(args.algo.upper(), env_id)) elif (model == DDPG): # rl-baselines-zoo inspired: # hyperparams = { # 'memory_limit': 50000, # 'normalize_observations': True, # 'normalize_returns': False, # 'gamma': 0.98, # 'actor_lr': 0.00156, # 'critic_lr': 0.00156, # 'batch_size': 256, # 'param_noise': AdaptiveParamNoiseSpec(initial_stddev=0.1, desired_action_stddev=0.1) # } hyperparams = { 'memory_limit': 1000000, 'normalize_observations': True, 'normalize_returns': False, 'gamma': 0.98, 'actor_lr': 0.00156, 'critic_lr': 0.00156, 'batch_size': 256, 'param_noise': AdaptiveParamNoiseSpec(initial_stddev=0.287, desired_action_stddev=0.287) } agent = DDPG(LnMlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log, **hyperparams) elif (model == TD3): # rl-baselines-zoo inspired: # hyperparams = { # 'batch_size': 256, # 'buffer_size': 50000, # 'learning_starts': 1000 # } hyperparams = { 'buffer_size': 1000000, 'train_freq': 1000, 'gradient_steps': 1000, 'learning_starts': 10000 } action_noise = NormalActionNoise(mean=np.zeros(2), sigma=0.1 * np.ones(2)) agent = TD3(stable_baselines.td3.MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log, action_noise=action_noise, **hyperparams) elif model == A2C: # rl-baselines-zoo inspired: # hyperparams = { # 'n_steps': 5, # 'gamma': 0.995, # 'ent_coef': 0.00001, # 'learning_rate': 0.00083, # 'lr_schedule': 'linear' # } # layers = [256, 128, 64] hyperparams = { 'n_steps': 16, 'gamma': 0.99, 'ent_coef': 0.001, 'learning_rate': 2e-4, 'lr_schedule': 'linear' } layers = [64, 64] policy_kwargs = dict(net_arch=[dict(vf=layers, pi=layers)]) agent = A2C(MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log, **hyperparams, policy_kwargs=policy_kwargs) elif model == ACER: agent = ACER(MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log) elif model == ACKTR: # rl-baselines-zoo inspired: # hyperparams = { # 'gamma': 0.99, # 'n_steps': 16, # 'ent_coef': 0.0, # 'learning_rate': 0.06, # 'lr_schedule': 'constant' # } # agent = ACKTR(MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log, **hyperparams) agent = ACKTR(MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log) elif model == SAC: # rl-baselines-zoo inspired: # hyperparams = { # 'batch_size': 256, # 'learning_starts': 1000 # } # agent = SAC(stable_baselines.sac.MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log, **hyperparams) agent = SAC(stable_baselines.sac.MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log) elif model == TRPO: agent = TRPO(MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log) print('Training {} agent on "{}"'.format(args.algo.upper(), env_id)) n_updates = 0 n_episodes = 0 def callback(_locals, _globals): nonlocal n_updates nonlocal n_episodes sys.stdout.write('Training update: {}\r'.format(n_updates)) sys.stdout.flush() _self = _locals['self'] vec_env = _self.get_env() class Struct(object): pass report_env = Struct() report_env.history = [] report_env.config = envconfig report_env.nsensors = report_env.config[ "n_sensors_per_sector"] * report_env.config["n_sectors"] report_env.sensor_angle = 2 * np.pi / (report_env.nsensors + 1) report_env.last_episode = vec_env.get_attr('last_episode')[0] report_env.config = vec_env.get_attr('config')[0] report_env.obstacles = vec_env.get_attr('obstacles')[0] env_histories = vec_env.get_attr('history') for episode in range(max(map(len, env_histories))): for env_idx in range(len(env_histories)): if (episode < len(env_histories[env_idx])): report_env.history.append( env_histories[env_idx][episode]) report_env.episode = len(report_env.history) + 1 total_t_steps = _self.get_env().get_attr( 'total_t_steps')[0] * num_cpu agent_filepath = os.path.join(agent_folder, str(total_t_steps) + '.pkl') if model == PPO2: recording_criteria = n_updates % 10 == 0 report_criteria = True _self.save(agent_filepath) elif model == A2C or model == ACER or model == ACKTR or model == SAC or model == TRPO: save_criteria = n_updates % 100 == 0 recording_criteria = n_updates % 1000 == 0 report_criteria = True if save_criteria: _self.save(agent_filepath) elif model == DDPG or model == TD3: save_criteria = n_updates % 10000 == 0 recording_criteria = n_updates % 50000 == 0 report_criteria = report_env.episode > n_episodes if save_criteria: _self.save(agent_filepath) if report_env.last_episode is not None and len( report_env.history) > 0 and report_criteria: try: #gym_auv.reporting.plot_trajectory(report_env, fig_dir=scenario_folder, fig_prefix=args.env + '_ep_{}'.format(report_env.episode)) gym_auv.reporting.report(report_env, report_dir=figure_folder) #vec_env.env_method('save', os.path.join(scenario_folder, '_ep_{}'.format(report_env.episode))) except OSError as e: print("Ignoring reporting OSError:") print(repr(e)) if recording_criteria: if args.pilot: cmd = 'python run.py enjoy {} --agent "{}" --video-dir "{}" --video-name "{}" --recording-length {} --algo {} --pilot {} --envconfig {}{}'.format( args.env, agent_filepath, video_folder, args.env + '-' + str(total_t_steps), recording_length, args.algo, args.pilot, envconfig_string, ' --recurrent' if args.recurrent else '') else: cmd = 'python run.py enjoy {} --agent "{}" --video-dir "{}" --video-name "{}" --recording-length {} --algo {} --envconfig {}{}'.format( args.env, agent_filepath, video_folder, args.env + '-' + str(total_t_steps), recording_length, args.algo, envconfig_string, ' --recurrent' if args.recurrent else '') subprocess.Popen(cmd) n_episodes = report_env.episode n_updates += 1 agent.learn(total_timesteps=1500000, tb_log_name='log', callback=callback) elif (args.mode in ['policyplot', 'vectorfieldplot', 'streamlinesplot']): figure_folder = os.path.join(DIR_PATH, 'logs', 'plots', args.env, EXPERIMENT_ID) os.makedirs(figure_folder, exist_ok=True) agent = PPO2.load(args.agent) if args.testvals: testvals = json.load(open(args.testvals, 'r')) valuegrid = list(ParameterGrid(testvals)) for valuedict in valuegrid: customconfig = envconfig.copy() customconfig.update(valuedict) env = create_env(env_id, envconfig, test_mode=True, pilot=args.pilot) valuedict_str = '_'.join( (key + '-' + str(val) for key, val in valuedict.items())) print('Running {} test for {}...'.format( args.mode, valuedict_str)) if args.mode == 'policyplot': gym_auv.reporting.plot_actions(env, agent, fig_dir=figure_folder, fig_prefix=valuedict_str) elif args.mode == 'vectorfieldplot': gym_auv.reporting.plot_vector_field( env, agent, fig_dir=figure_folder, fig_prefix=valuedict_str) elif args.mode == 'streamlinesplot': gym_auv.reporting.plot_streamlines( env, agent, fig_dir=figure_folder, fig_prefix=valuedict_str) else: env = create_env(env_id, envconfig, test_mode=True, pilot=args.pilot) with open(os.path.join(figure_folder, 'config.json'), 'w') as f: json.dump(env.config, f) if args.mode == 'policyplot': gym_auv.reporting.plot_actions(env, agent, fig_dir=figure_folder) elif args.mode == 'vectorfieldplot': gym_auv.reporting.plot_vector_field(env, agent, fig_dir=figure_folder) elif args.mode == 'streamlinesplot': gym_auv.reporting.plot_streamlines(env, agent, fig_dir=figure_folder) print('Output folder: ', figure_folder) elif args.mode == 'test': figure_folder = os.path.join(DIR_PATH, 'logs', 'tests', args.env, EXPERIMENT_ID) scenario_folder = os.path.join(figure_folder, 'scenarios') video_folder = os.path.join(figure_folder, 'videos') os.makedirs(figure_folder, exist_ok=True) os.makedirs(scenario_folder, exist_ok=True) os.makedirs(video_folder, exist_ok=True) if not args.onlyplot: agent = model.load(args.agent) def create_test_env(video_name_prefix, envconfig=envconfig): print('Creating test environment: ' + env_id) env = create_env(env_id, envconfig, test_mode=True, render_mode=args.render if args.video else None, pilot=args.pilot) vec_env = DummyVecEnv([lambda: env]) if args.video: video_length = min(500, args.recording_length) recorded_env = VecVideoRecorder(vec_env, video_folder, record_video_trigger=lambda x: (x % video_length) == 0, video_length=video_length, name_prefix=video_name_prefix) active_env = recorded_env if args.video else vec_env return env, active_env failed_tests = [] def run_test(id, reset=True, report_dir=figure_folder, scenario=None, max_t_steps=None, env=None, active_env=None): nonlocal failed_tests if env is None or active_env is None: env, active_env = create_test_env(video_name_prefix=args.env + '_' + id) if scenario is not None: obs = active_env.reset() env.load(args.scenario) print('Loaded', args.scenario) else: if reset: obs = active_env.reset() else: obs = env.observe() gym_auv.reporting.plot_scenario(env, fig_dir=scenario_folder, fig_postfix=id, show=args.onlyplot) if args.onlyplot: return cumulative_reward = 0 t_steps = 0 if max_t_steps is None: done = False else: done = t_steps > max_t_steps while not done: action, _states = agent.predict( obs, deterministic=not args.stochastic) obs, reward, done, info = active_env.step(action) if args.video: active_env.render() t_steps += 1 cumulative_reward += reward[0] report_msg = '{:<20}{:<20}{:<20.2f}{:<20.2%}\r'.format( id, t_steps, cumulative_reward, info[0]['progress']) sys.stdout.write(report_msg) sys.stdout.flush() if args.save_snapshots and t_steps % 1000 == 0 and not done: env.save_latest_episode(save_history=False) for size in (20, 50, 100, 200, 300, 400, 500): gym_auv.reporting.plot_trajectory( env, fig_dir=scenario_folder, fig_prefix=(args.env + '_t_step_' + str(t_steps) + '_' + str(size) + '_' + id), local=True, size=size) elif done: gym_auv.reporting.plot_trajectory(env, fig_dir=scenario_folder, fig_prefix=(args.env + '_' + id)) env.close() gym_auv.reporting.report(env, report_dir=report_dir, lastn=-1) #gym_auv.reporting.plot_trajectory(env, fig_dir=scenario_folder, fig_prefix=(args.env + '_' + id)) #env.save(os.path.join(scenario_folder, id)) if env.collision: failed_tests.append(id) with open(os.path.join(figure_folder, 'failures.txt'), 'w') as f: f.write(', '.join(map(str, failed_tests))) return copy.deepcopy(env.last_episode) print('Testing scenario "{}" for {} episodes.\n '.format( args.env, args.episodes)) report_msg_header = '{:<20}{:<20}{:<20}{:<20}{:<20}{:<20}{:<20}'.format( 'Episode', 'Timesteps', 'Cum. Reward', 'Progress', 'Collisions', 'CT-Error [m]', 'H-Error [deg]') print(report_msg_header) print('-' * len(report_msg_header)) if args.testvals: testvals = json.load(open(args.testvals, 'r')) valuegrid = list(ParameterGrid(testvals)) if args.scenario: if args.testvals: episode_dict = {} for valuedict in valuegrid: customconfig = envconfig.copy() customconfig.update(valuedict) env, active_env = create_test_env(envconfig=customconfig) valuedict_str = '_'.join( (key + '-' + str(val) for key, val in valuedict.items())) colorval = -np.log10( valuedict['reward_lambda']) #should be general rep_subfolder = os.path.join(figure_folder, valuedict_str) os.makedirs(rep_subfolder, exist_ok=True) for episode in range(args.episodes): last_episode = run_test(valuedict_str + '_ep' + str(episode), report_dir=rep_subfolder) episode_dict[valuedict_str] = [last_episode, colorval] print('Plotting all') gym_auv.reporting.plot_trajectory(env, fig_dir=scenario_folder, fig_prefix=(args.env + '_all_agents'), episode_dict=episode_dict) else: run_test("ep0", reset=True, scenario=args.scenario) else: if args.testvals: episode_dict = {} agent_index = 1 for valuedict in valuegrid: customconfig = envconfig.copy() customconfig.update(valuedict) env, active_env = create_test_env(envconfig=customconfig) valuedict_str = '_'.join( (key + '-' + str(val) for key, val in valuedict.items())) colorval = np.log10( valuedict['reward_lambda']) #should be general rep_subfolder = os.path.join(figure_folder, valuedict_str) os.makedirs(rep_subfolder, exist_ok=True) for episode in range(args.episodes): last_episode = run_test(valuedict_str + '_ep' + str(episode), report_dir=rep_subfolder) episode_dict['Agent ' + str(agent_index)] = [last_episode, colorval] agent_index += 1 gym_auv.reporting.plot_trajectory(env, fig_dir=figure_folder, fig_prefix=(args.env + '_all_agents'), episode_dict=episode_dict) else: env, active_env = create_test_env(video_name_prefix=args.env) for episode in range(args.episodes): run_test('ep' + str(episode), env=env, active_env=active_env) if args.video and active_env: active_env.close()