def train(): logger.configure() set_global_seeds(args.seed) directory = os.path.join( args.log_dir, '_'.join([args.env, datetime.datetime.now().strftime("%m%d%H%M")])) if not os.path.exists(directory): os.makedirs(directory) else: ValueError("The directory already exists...", directory) json.dump(vars(args), open(os.path.join(directory, 'learning_prop.json'), 'w')) env = make_atari(args.env) env = bench.Monitor(env, logger.get_dir()) env = deepq.wrap_atari_dqn(env) nb_test_steps = args.nb_test_steps if args.nb_test_steps > 0 else None if args.record == 1: env = Monitor(env, directory=args.log_dir) with tf.device(args.device): model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) act, records = deepq.learn( env, q_func=model, lr=args.learning_rate, lr_decay_factor=args.learning_rate_decay_factor, lr_growth_factor=args.learning_rate_growth_factor, max_timesteps=args.nb_train_steps, buffer_size=args.buffer_size, exploration_fraction=args.eps_fraction, exploration_final_eps=args.eps_min, train_freq=4, print_freq=1000, checkpoint_freq=int(args.nb_train_steps / 10), learning_starts=args.nb_warmup_steps, target_network_update_freq=args.target_update_freq, gamma=0.99, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, epoch_steps=args.nb_epoch_steps, gpu_memory=args.gpu_memory, double_q=args.double_q, save_dir=directory, nb_test_steps=nb_test_steps, scope=args.scope, test_eps=args.test_eps, ) print("Saving model to model.pkl") act.save(os.path.join(directory, "model.pkl")) env.close() plot(records, directory)
def train(seed, save_dir): logger.configure() set_global_seeds(seed) save_dir_0 = os.path.join(save_dir, 'seed_%d' % seed) os.makedirs(save_dir_0) env = envs.make(args.env, 'atari', record=bool(args.record), directory=save_dir_0) nb_test_steps = args.nb_test_steps if args.nb_test_steps > 0 else None with tf.device(args.device): with tf.compat.v1.variable_scope('seed_%d' % seed): model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) act = deepq.learn( env, q_func=model, lr=args.learning_rate, lr_decay_factor=args.learning_rate_decay_factor, lr_growth_factor=args.learning_rate_growth_factor, max_timesteps=args.nb_train_steps, buffer_size=args.buffer_size, exploration_fraction=args.eps_fraction, exploration_final_eps=args.eps_min, train_freq=4, print_freq=1000, checkpoint_freq=int(args.nb_train_steps / 10), learning_starts=args.nb_warmup_steps, target_network_update_freq=args.target_update_freq, gamma=0.99, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, scope=args.scope, double_q=args.double_q, epoch_steps=args.nb_epoch_steps, eval_logger=Logger(args.env, 'atari', nb_test_steps=nb_test_steps, save_dir=save_dir_0, render=bool(args.render)), save_dir=save_dir_0, test_eps=args.test_eps, gpu_memory=args.gpu_memory, render=bool(args.render), ) print("Saving model to model.pkl") act.save(os.path.join(save_dir_0, "model.pkl")) env.close() if args.record == 1: env.moviewriter.finish()
def train(): set_global_seeds(args.seed) directory = os.path.join( args.log_dir, '_'.join([args.env, datetime.datetime.now().strftime("%m%d%H%M")])) if not os.path.exists(directory): os.makedirs(directory) else: ValueError("The directory already exists...", directory) json.dump(vars(args), open(os.path.join(directory, 'learning_prop.json'), 'w')) env = envs.make(args.env, render=bool(args.render), record=bool(args.record), dirname=directory) with tf.device(args.device): model = deepq.models.mlp([args.num_units] * args.num_layers) act, records = deepq.learn( env, q_func=model, lr=args.learning_rate, lr_decay_factor=args.learning_rate_decay_factor, lr_growth_factor=args.learning_rate_growth_factor, max_timesteps=args.nb_train_steps, buffer_size=args.buffer_size, batch_size=args.batch_size, exploration_fraction=args.eps_fraction, exploration_final_eps=args.eps_min, target_network_update_freq=args.target_update_freq, print_freq=10, checkpoint_freq=int(args.nb_train_steps / 10), learning_starts=args.nb_warmup_steps, gamma=args.gamma, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, callback=None, #callback, epoch_steps=args.nb_epoch_steps, gpu_memory=args.gpu_memory, save_dir=directory, double_q=args.double_q, nb_test_steps=args.nb_test_steps, test_eps=args.test_eps, render=bool(args.render), ) print("Saving model to model.pkl") act.save(os.path.join(directory, "model.pkl")) plot(records, directory) memo = input("Memo for this experiment?: ") f = open(os.path.join(directory, "memo.txt"), 'w') f.write(memo) f.close() if args.record == 1: env.moviewriter.finish()
def make_mujoco_env(env_id, seed): """ Create a wrapped, monitored gym.Env for MuJoCo. """ rank = MPI.COMM_WORLD.Get_rank() set_global_seeds(seed + 10000 * rank) env = gym.make(env_id) env = Monitor(env, os.path.join(logger.get_dir(), str(rank))) env.seed(seed) return env
def make_robotics_env(env_id, seed, rank=0): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) env = gym.make(env_id) env = FlattenDictWrapper(env, ['observation', 'desired_goal']) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), info_keywords=('is_success',)) env.seed(seed) return env
def make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, start_index=0): """ Create a wrapped, monitored SubprocVecEnv for Atari. """ if wrapper_kwargs is None: wrapper_kwargs = {} def make_env(rank): # pylint: disable=C0111 def _thunk(): env = make_atari(env_id) env.seed(seed + rank) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return wrap_deepmind(env, **wrapper_kwargs) return _thunk set_global_seeds(seed) return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)])
def train(seed, save_dir): set_global_seeds(seed) save_dir_0 = os.path.join(save_dir, 'seed_%d' % seed) os.makedirs(save_dir_0) env = envs.make(args.env, 'classic_control') with tf.device(args.device): with tf.compat.v1.variable_scope('seed_%d' % seed): model = models.mlp([args.num_units] * args.num_layers, init_mean=args.init_mean, init_sd=args.init_sd) act = deepadfq.learn( env, q_func=model, lr=args.learning_rate, lr_decay_factor=args.learning_rate_decay_factor, lr_growth_factor=args.learning_rate_growth_factor, max_timesteps=args.nb_train_steps, buffer_size=args.buffer_size, batch_size=args.batch_size, exploration_fraction=args.eps_fraction, exploration_final_eps=args.eps_min, target_network_update_freq=args.target_update_freq, print_freq=args.nb_epoch_steps, checkpoint_freq=int(args.nb_train_steps / 5), learning_starts=args.nb_warmup_steps, gamma=args.gamma, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, callback=None, #callback, alg=args.alg, scope=args.scope, sdMin=np.sqrt(args.varth), noise=args.noise, act_policy=args.act_policy, epoch_steps=args.nb_epoch_steps, eval_logger=Logger(args.env, 'classic_control', save_dir=save_dir_0, render=bool(args.render)), save_dir=save_dir_0, test_eps=args.test_eps, gpu_memory=args.gpu_memory, render=bool(args.render), ) if args.record == 1: env.moviewriter.finish()
def test(): set_global_seeds(args.seed) import json if args.env == 'TargetTracking-v5': import simple_imtracking as simple else: import simple_tracking as simple learning_prop = json.load( open(os.path.join(args.log_dir, 'learning_prop.json'), 'r')) env = envs.make( args.env, render=bool(args.render), record=bool(args.record), ros=bool(args.ros), map_name=args.map, num_targets=learning_prop['nb_targets'], dirname=args.log_dir, is_training=True, im_size=args.im_size, ) act_params = {'scope': learning_prop['scope'], 'eps': args.test_eps} act = simple.load(os.path.join(args.log_dir, args.log_fname), act_params) if args.ros_log: from envs.target_tracking.ros_wrapper import RosLog log = RosLog(num_targets=args.nb_targets, wrapped_num=args.ros + args.render + args.record + 1) t = 0 while (t < args.nb_test_steps): # test episode t += 1 obs, done = env.reset(), False episode_rew = 0 while not done: if args.render: env.render() if args.ros_log: log.log(env) obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew) if args.record: env.moviewriter.finish() if args.ros_log: log.save(args.log_dir)
def train(seed, save_dir): set_global_seeds(seed) save_dir_0 = os.path.join(save_dir, 'seed_%d'%seed) os.makedirs(save_dir_0) env = envs.make(args.env, 'target_tracking', render=bool(args.render), record=bool(args.record), directory=save_dir_0, ros=bool(args.ros), map_name=args.map, num_targets=args.nb_targets, im_size=args.im_size, ) with tf.device(args.device): with tf.compat.v1.variable_scope('seed_%d'%seed): hiddens = args.hiddens.split(':') hiddens = [int(h) for h in hiddens] if args.env == 'TargetTracking-v5': model = models.cnn_plus_mlp( convs=[(4, 8, 4), (8, 4, 2)], hiddens= hiddens, dueling=bool(args.dueling), init_mean = args.init_mean, init_sd = args.init_sd, inpt_dim = (args.im_size, args.im_size), ) else: model = models.mlp(hiddens, init_mean=args.init_mean, init_sd=args.init_sd) act = deepadfq.learn( env, q_func=model, lr=args.learning_rate, lr_decay_factor=args.learning_rate_decay_factor, lr_growth_factor=args.learning_rate_growth_factor, max_timesteps=args.nb_train_steps, buffer_size=args.buffer_size, batch_size=args.batch_size, exploration_fraction=args.eps_fraction, exploration_final_eps=args.eps_min, target_network_update_freq=args.target_update_freq, checkpoint_freq=args.checkpoint_freq, learning_starts=args.nb_warmup_steps, gamma=args.gamma, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, callback=None,#callback, alg=args.alg, scope=args.scope, sdMin=np.sqrt(args.varth), noise=args.noise, act_policy=args.act_policy, epoch_steps=args.nb_epoch_steps, eval_logger=Logger(args.env, env_type='target_tracking', save_dir=save_dir_0, render=bool(args.render), figID=1, ros=bool(args.ros), map_name=args.map, num_targets=args.nb_targets, im_size=args.im_size, eval_type=args.eval_type, init_file_path=args.init_file_path, ), save_dir=save_dir_0, test_eps=args.test_eps, gpu_memory=args.gpu_memory, render=(bool(args.render) or bool(args.ros)), ) print("Saving model to model.pkl") act.save(os.path.join(save_dir_0, "model.pkl")) if args.record == 1: env.moviewriter.finish()
def test(self, args, env, act): seed = args.seed env.seed(seed) set_global_seeds(seed) if args.eval_type == 'random': params_set = [{}] elif args.eval_type == 'fixed_nb': if args.env == 'setTracking-v1': params_set = [{}] elif args.env == 'setTracking-v2': params_set = SET_EVAL_v4 elif args.env == 'setTracking-v3': params_set = SET_EVAL_v3 # params_set = SET_EVAL_8a elif args.env == 'setTracking-v4': params_set = SET_EVAL_v4 elif args.env == 'setTracking-v5': params_set = SET_EVAL_v4 elif args.env == 'maTracking-v4': params_set = MA_EVAL elif args.env == 'setTracking-v6': params_set = SET_EVAL_v3 elif args.env == 'setTracking-v7': params_set = SET_EVAL_v3 # params_set = SET_EVAL_8a else: raise ValueError("Eval set not created for this env.") elif args.eval_type == 'fixed_2': params_set = EVAL_BEHAVIOR_2 tot_eplen = 60 elif args.eval_type == 'fixed_4': params_set = EVAL_BEHAVIOR_4 tot_eplen = 100 else: raise ValueError("Wrong evaluation type for ttenv.") timelimit_env = env while (not hasattr(timelimit_env, '_elapsed_steps')): timelimit_env = timelimit_env.env if args.ros_log: from envs.target_tracking.ros_wrapper import RosLog ros_log = RosLog(num_targets=args.nb_targets, wrapped_num=args.ros + args.render + args.record + 1) init_pose_list = get_init_pose_list(args.nb_test_steps, args.eval_type) total_nlogdetcov = [] for params in params_set: ep = 0 ep_nlogdetcov = [] #'Episode nLogDetCov' time_elapsed = ['Elapsed Time (sec)'] test_observations = np.zeros(args.nb_test_steps) while (ep < args.nb_test_steps): # test episode ep += 1 episode_rew, nlogdetcov, ep_len = 0, 0, 0 done = {} obs = env.reset(init_pose_list=init_pose_list, **params) s_time = time.time() all_observations = np.zeros(env.nb_targets, dtype=bool) action_dict = {} bigq0 = [] bigq1 = [] # while type(done) is dict: while ep_len < tot_eplen: if args.render: env.render() if args.ros_log: ros_log.log(env) for agent_id, a_obs in obs.items(): action_dict[agent_id] = act(np.array(a_obs)[None])[0] # record target observations all_observations = np.logical_or( all_observations, a_obs[:, 5].astype(bool)) if all(all_observations) == True: test_observations[ep - 1] = 1 obs, rew, done, info = env.step(action_dict) episode_rew += rew['__all__'] nlogdetcov += info['mean_nlogdetcov'] rearrange = [0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11] qs0 = np.zeros((12)) qs1 = np.zeros((12)) q0 = np.zeros((12)) q1 = np.zeros((12)) qs0[action_dict['agent-0']] = 1 qs1[action_dict['agent-1']] = 1 for ii, val in enumerate(rearrange): q0[ii] = qs0[val] q1[ii] = qs1[val] bigq0.append(q0) bigq1.append(q1) ep_len += 1 bigq0 = np.asarray(bigq0) bigq1 = np.asarray(bigq1) time_elapsed.append(time.time() - s_time) ep_nlogdetcov.append(nlogdetcov) if args.render: print( "Ep.%d - Episode reward : %.2f, Episode nLogDetCov : %.2f" % (ep, episode_rew, nlogdetcov)) if ep % 50 == 0: print( "Ep.%d - Episode reward : %.2f, Episode nLogDetCov : %.2f" % (ep, episode_rew, nlogdetcov)) if args.record: env.moviewriter.finish() if args.ros_log: ros_log.save(args.log_dir) # Stats # meanofeps = np.mean(ep_nlogdetcov) # total_nlogdetcov.append(meanofeps) # # Eval plots and saves # if args.env == 'setTracking-v7': # eval_dir = os.path.join(os.path.split(args.log_dir)[0], 'v7_eval_seed%d_'%(seed)+args.map) # else: # eval_dir = os.path.join(os.path.split(args.log_dir)[0], 'eval_seed%d_'%(seed)+args.map) # model_seed = os.path.split(args.log_dir)[-1] # # eval_dir = os.path.join(args.log_dir, 'eval_seed%d_'%(seed)+args.map) # # model_seed = os.path.split(args.log_fname)[0] # if not os.path.exists(eval_dir): # os.makedirs(eval_dir) # # matplotlib.use('Agg') # f0, ax0 = plt.subplots() # _ = ax0.plot(ep_nlogdetcov, '.') # _ = ax0.set_title(args.env) # _ = ax0.set_xlabel('episode number') # _ = ax0.set_ylabel('mean nlogdetcov') # _ = ax0.axhline(y=meanofeps, color='r', linestyle='-', label='mean over episodes: %.2f'%(meanofeps)) # _ = ax0.legend() # _ = ax0.grid() # _ = f0.savefig(os.path.join(eval_dir, "%da%dt_%d_eval_"%(env.nb_agents, env.nb_targets, args.nb_test_steps) # +model_seed+".png")) # plt.close() # pickle.dump(ep_nlogdetcov, open(os.path.join(eval_dir,"%da%dt_%d_eval_"%(env.nb_agents, env.nb_targets, args.nb_test_steps)) # +model_seed+".pkl", 'wb')) f2 = plt.figure() ax2 = f2.add_subplot(121, projection='3d') ax3 = f2.add_subplot(122, projection='3d') lx = len(bigq0[0]) ly = len(bigq0[:, 0]) xpos = np.arange(0, lx, 1) ypos = np.arange(0, ly, 1) xpos, ypos = np.meshgrid(xpos + 0.25, ypos + 0.25) xpos = xpos.flatten() ypos = ypos.flatten() zpos = np.zeros(lx * ly) dx = 0.5 * np.ones_like(zpos) dy = dx.copy() dz0 = bigq0.flatten() dz1 = bigq1.flatten() cs = ['r', 'r', 'r', 'r', 'g', 'g', 'g', 'g', 'b', 'b', 'b', 'b' ] * ly ax2.bar3d(xpos, ypos, zpos, dx, dy, dz0, color=cs) ax3.bar3d(xpos, ypos, zpos, dx, dy, dz1, color=cs) print(test_observations) print("Cooperation ratio over total evals: %.2f" % (np.sum(test_observations) / args.nb_test_steps)) plt.show()
def train(): set_global_seeds(args.seed) directory = os.path.join( args.log_dir, '_'.join([args.env, datetime.datetime.now().strftime("%m%d%H%M")])) if not os.path.exists(directory): os.makedirs(directory) else: ValueError("The directory already exists...", directory) json.dump(vars(args), open(os.path.join(directory, 'learning_prop.json'), 'w')) env = envs.make( args.env, render=bool(args.render), record=bool(args.record), ros=bool(args.ros), dirname=directory, map_name=args.map, num_targets=args.nb_targets, im_size=args.im_size, ) hiddens = args.hiddens.split(':') hiddens = [int(h) for h in hiddens] with tf.device(args.device): if args.env == 'TargetTracking-v5': import simple_imtracking as simple model = models.cnn_plus_mlp( convs=[(8, 4, 2), (16, 3, 1)], hiddens=hiddens, dueling=bool(args.dueling), init_mean=args.init_mean, init_sd=args.init_sd, ) else: import simple_tracking as simple model = models.mlp(hiddens, init_mean=args.init_mean, init_sd=args.init_sd) act, records = simple.learn( env, q_func=model, lr=args.learning_rate, lr_decay_factor=args.learning_rate_decay_factor, lr_growth_factor=args.learning_rate_growth_factor, max_timesteps=args.nb_train_steps, buffer_size=args.buffer_size, batch_size=args.batch_size, exploration_fraction=args.eps_fraction, exploration_final_eps=args.eps_min, target_network_update_freq=args.target_update_freq, print_freq=10, checkpoint_freq=int(args.nb_train_steps / 10), learning_starts=args.nb_warmup_steps, gamma=args.gamma, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, callback=None, #callback, epoch_steps=args.nb_epoch_steps, noise=args.noise, varTH=args.varth, alg=args.alg, gpu_memory=args.gpu_memory, act_policy=args.act_policy, save_dir=directory, nb_test_steps=args.nb_test_steps, scope=args.scope, test_eps=args.test_eps, render=(bool(args.render) or bool(args.ros)), map_name=args.map, num_targets=args.nb_targets, im_size=args.im_size, ) print("Saving model to model.pkl") act.save(os.path.join(directory, "model.pkl")) plot(records, directory) memo = input("Memo for this experiment?: ") f = open(os.path.join(directory, "memo.txt"), 'w') f.write(memo) f.close() if args.record == 1: env.moviewriter.finish()
def train(seed, save_dir): set_global_seeds(seed) save_dir_0 = os.path.join(save_dir, 'seed_%d'%seed) os.makedirs(save_dir_0) env = envs.make(args.env, 'target_tracking', render=bool(args.render), record=bool(args.record), directory=save_dir_0, ros=bool(args.ros), map_name=args.map, num_targets=args.nb_targets, im_size=args.im_size, ) with tf.device(args.device): with tf.compat.v1.variable_scope('seed_%d'%seed): hiddens = args.hiddens.split(':') hiddens = [int(h) for h in hiddens] model = deepq.models.mlp(hiddens) act = deepq.learn( env, q_func=model, lr=args.learning_rate, lr_decay_factor=args.learning_rate_decay_factor, lr_growth_factor=args.learning_rate_growth_factor, max_timesteps=args.nb_train_steps, buffer_size=args.buffer_size, batch_size=args.batch_size, exploration_fraction=args.eps_fraction, exploration_final_eps=args.eps_min, target_network_update_freq=args.target_update_freq, print_freq=10, checkpoint_freq=int(args.nb_train_steps/10), learning_starts=args.nb_warmup_steps, gamma = args.gamma, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, callback=None,#callback, double_q = args.double_q, scope=args.scope, epoch_steps = args.nb_epoch_steps, eval_logger=Logger(args.env, env_type='target_tracking', save_dir=save_dir_0, render=bool(args.render), figID=1, ros=bool(args.ros), map_name=args.map, num_targets=args.nb_targets, eval_type=args.eval_type, init_file_path=args.init_file_path, ), save_dir=save_dir_0, test_eps = args.test_eps, gpu_memory=args.gpu_memory, render = (bool(args.render) or bool(args.ros)), ) print("Saving model to model.pkl") act.save(os.path.join(save_dir_0,"model.pkl")) if args.record == 1: env.moviewriter.finish()
def test(self, args, env, act): seed = args.seed env.seed(seed) set_global_seeds(seed) if args.eval_type == 'random': params_set = [{}] elif args.eval_type == 'fixed_nb': if args.env == 'setTracking-v1': params_set = [{}] elif args.env == 'setTracking-v2': params_set = SET_EVAL_v4 elif args.env == 'setTracking-v3': params_set = SET_EVAL_v3 # params_set = SET_EVAL_8a elif args.env == 'setTracking-v4': params_set = SET_EVAL_v4 elif args.env == 'setTracking-v5': params_set = SET_EVAL_v4 elif args.env == 'maTracking-v4': params_set = MA_EVAL elif args.env == 'setTracking-v6': params_set = SET_EVAL_v3 elif args.env == 'setTracking-v7': params_set = SET_EVAL_v3 # params_set = SET_EVAL_8a else: raise ValueError("Eval set not created for this env.") else: raise ValueError("Wrong evaluation type for ttenv.") timelimit_env = env while (not hasattr(timelimit_env, '_elapsed_steps')): timelimit_env = timelimit_env.env if args.ros_log: from envs.target_tracking.ros_wrapper import RosLog ros_log = RosLog(num_targets=args.nb_targets, wrapped_num=args.ros + args.render + args.record + 1) total_nlogdetcov = [] for params in params_set: ep = 0 ep_nlogdetcov = [] #'Episode nLogDetCov' time_elapsed = ['Elapsed Time (sec)'] while (ep < args.nb_test_steps): # test episode ep += 1 episode_rew, nlogdetcov = 0, 0 done = {} obs = env.reset(**params) s_time = time.time() action_dict = {} while type(done) is dict: if args.render: env.render() if args.ros_log: ros_log.log(env) for agent_id, a_obs in obs.items(): action_dict[agent_id] = act(np.array(a_obs)[None])[0] obs, rew, done, info = env.step(action_dict) episode_rew += rew['__all__'] nlogdetcov += info['mean_nlogdetcov'] time_elapsed.append(time.time() - s_time) ep_nlogdetcov.append(nlogdetcov) if args.render: print( "Ep.%d - Episode reward : %.2f, Episode nLogDetCov : %.2f" % (ep, episode_rew, nlogdetcov)) if ep % 50 == 0: print( "Ep.%d - Episode reward : %.2f, Episode nLogDetCov : %.2f" % (ep, episode_rew, nlogdetcov)) if args.record: env.moviewriter.finish() if args.ros_log: ros_log.save(args.log_dir) # Stats meanofeps = np.mean(ep_nlogdetcov) total_nlogdetcov.append(meanofeps) # Eval plots and saves if args.env == 'setTracking-v7': eval_dir = os.path.join( os.path.split(args.log_dir)[0], 'v7_eval_seed%d_' % (seed) + args.map) else: eval_dir = os.path.join( os.path.split(args.log_dir)[0], 'eval_seed%d_' % (seed) + args.map) model_seed = os.path.split(args.log_dir)[-1] # eval_dir = os.path.join(args.log_dir, 'eval_seed%d_'%(seed)+args.map) # model_seed = os.path.split(args.log_fname)[0] if not os.path.exists(eval_dir): os.makedirs(eval_dir) matplotlib.use('Agg') f0, ax0 = plt.subplots() _ = ax0.plot(ep_nlogdetcov, '.') _ = ax0.set_title(args.env) _ = ax0.set_xlabel('episode number') _ = ax0.set_ylabel('mean nlogdetcov') _ = ax0.axhline(y=meanofeps, color='r', linestyle='-', label='mean over episodes: %.2f' % (meanofeps)) _ = ax0.legend() _ = ax0.grid() _ = f0.savefig( os.path.join( eval_dir, "%da%dt_%d_eval_" % (env.nb_agents, env.nb_targets, args.nb_test_steps) + model_seed + ".png")) plt.close() pickle.dump( ep_nlogdetcov, open( os.path.join( eval_dir, "%da%dt_%d_eval_" % (env.nb_agents, env.nb_targets, args.nb_test_steps)) + model_seed + ".pkl", 'wb')) #Plot over all example episode sets f1, ax1 = plt.subplots() _ = ax1.plot(total_nlogdetcov, '.') _ = ax1.set_title(args.env) _ = ax1.set_xlabel('example episode set number') _ = ax1.set_ylabel('mean nlogdetcov over episodes') _ = ax1.grid() _ = f1.savefig( os.path.join( eval_dir, 'all_%d_eval' % (args.nb_test_steps) + model_seed + '.png')) plt.close() pickle.dump( total_nlogdetcov, open( os.path.join(eval_dir, 'all_%d_eval' % (args.nb_test_steps)) + model_seed + '%da%dt' % (args.nb_agents, args.nb_targets) + '.pkl', 'wb'))