def make_env(game_name): env = gym.make( game_name ) # Already performs a frame-skip of 4 @ baselines.common.atari_wrappers_deprecated monitored_env = SimpleMonitor( env ) # puts rewards and number of steps in info, before environment is wrapped #env = wrap_dqn(monitored_env) # applies a bunch of modification to simplify the observation space (downsample, make b/w) return env, monitored_env
def make_env(game_name): env = gym.make(game_name + "NoFrameskip-v4") monitored_env = SimpleMonitor( env ) # puts rewards and number of steps in info, before environment is wrapped env = wrap_dqn( monitored_env ) # applies a bunch of modification to simplify the observation space (downsample, make b/w) return env, monitored_env
def make_env(game_name): env = gym.make(game_name + "NoFrameskip-v4") env = SimpleMonitor(env) env = wrap_dqn(env) return env
def run(env_id, seed, noise_type, num_cpu, layer_norm, logdir, gym_monitor, evaluation, bind_to_core, **kwargs): kwargs['logdir'] = logdir whoami = mpi_fork(num_cpu, bind_to_core=bind_to_core) if whoami == 'parent': sys.exit(0) # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: # Write to temp directory for all non-master workers. actual_dir = None Logger.CURRENT.close() Logger.CURRENT = Logger(dir=mkdtemp(), output_formats=[]) logger.set_level(logger.DISABLED) # Create envs. if rank == 0: env = gym.make(env_id) if gym_monitor and logdir: env = gym.wrappers.Monitor(env, os.path.join(logdir, 'gym_train'), force=True) env = SimpleMonitor(env) if evaluation: eval_env = gym.make(env_id) if gym_monitor and logdir: eval_env = gym.wrappers.Monitor(eval_env, os.path.join(logdir, 'gym_eval'), force=True) eval_env = SimpleMonitor(eval_env) else: eval_env = None else: env = gym.make(env_id) if evaluation: eval_env = gym.make(env_id) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() Logger.CURRENT.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def make_env(game_name): env = gym.make(game_name + "NoFrameskip-v3") env_monitored = SimpleMonitor(env) env = wrap_dqn(env_monitored) return env_monitored, env
def make_env(game_name): env = gym.make(game_name + "NoFrameskip-v4") env = bench.Monitor(env, None) env = SimpleMonitor(env) env = wrap_atari_dqn(env) return env
except Exception as e: sys.exit( str(e) + '\n' + 'map_config import error. File not exist or map_config not specified' ) from gym.envs.registration import register register( id='MonsterKong-v0', entry_point='baselines.ple.gym_env.monsterkong:MonsterKongEnv', kwargs={'map_config': map_config}, ) env = gym.make('MonsterKong-v0') env = ProcessFrame(env) env = SimpleMonitor(env) else: env, monitored_env = make_env(args.env) subdir = (datetime.datetime.now() ).strftime("%m-%d-%Y-%H:%M:%S") + " " + args.comment tf_writer = tf.summary.FileWriter(os.path.join(args.log_dir, subdir), tf.get_default_graph()) value_summary = tf.Summary() qec_summary = tf.Summary() value_summary.value.add(tag='discount_reward_mean') value_summary.value.add(tag='non_discount_reward_mean') # value_summary.value.add(tag='non_discount_reward_mean') # value_summary.value.add(tag='episode') qec_summary.value.add(tag='qec_mean')