def make_env(env_name, env_type, seed, wrapper_kwargs=None): if env_type == 'atari': env = wrap_deepmind(make_atari("{}NoFrameskip-v4".format(env_name)), wrapper_kwargs) else: env = gym.make(env_name) env.seed(seed) return env
def prep_env(env_name, video_path): if env_name.lower() == "cartpole": env = gym.make("CartPole-v0") env.record_start = lambda: None env.record_end = lambda: None else: env = wrap_deepmind( make_atari(env_name + "NoFrameskip-v4")) # make sure to add NoFrameskip-v4 env = Monitor(env=env, directory=video_path, force=True) return env
def invoke_agent_env(params, alg): """Returns the wrapped env and string name of agent, then Use `eval(agent)` to activate it from main script """ if params.mode == "Atari": env = wrap_deepmind(make_atari("{}NoFrameskip-v4".format( params.env_name, skip_frame_k=params.skip_frame_k)), skip_frame_k=params.skip_frame_k) if params.debug_flg: agent = "{}_debug".format(alg) else: agent = "{}".format(alg) else: agent = "{}".format(alg) if params.mode == "CartPole": env = MyWrapper(gym.make("CartPole-v0")) elif params.mode == "CartPole-p": env = CartPole_Pixel(gym.make("CartPole-v0")) return agent, env
import tensorflow as tf from tf_rl.common.memory_tf import ReplayBuffer from tf_rl.common.wrappers import wrap_deepmind, make_atari env = wrap_deepmind(make_atari("PongNoFrameskip-v4")) state = env.reset() memory = ReplayBuffer(capacity=100, n_step=0, act_shape=(), obs_shape=state.shape, obs_dtype=tf.int8, checkpoint_dir="./tmp") done = False for t in range(100): action = env.action_space.sample() next_state, reward, done, info = env.step(action) memory.add(state, action, reward, next_state, done) state = next_state env.close() print(len(memory)) obs, action, next_obs, reward, done = memory.sample(batch_size=10) print(obs.shape, action.shape, next_obs.shape, reward.shape, done.shape) path = memory.save() # recover phase print("=== Recover Phase ===") del memory memory = ReplayBuffer(capacity=100, n_step=0, act_shape=(),
import itertools from tf_rl.common.monitor import Monitor from tf_rl.common.wrappers import wrap_deepmind, make_atari env_name = "PongNoFrameskip-v4" env = wrap_deepmind(make_atari(env_name)) env = Monitor(env=env, directory="./video/{}".format(env_name), force=True) env.record_start() state = env.reset() for t in itertools.count(): action = env.action_space.sample() next_state, reward, done, info = env.step(action) state = next_state if done: break print("End at {}".format(t + 1)) env.record_end() env.close()
policy = EpsilonGreedyPolicy_eager(Epsilon_fn=anneal_ep) optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate=0.00025, decay=0.95, momentum=0.0, epsilon=0.00001, centered=True) replay_buffer = ReplayBuffer(params.memory_size) reward_buffer = deque(maxlen=params.reward_buffer_ep) loss_fn = create_loss_func(params.loss_fn) grad_clip_fn = gradient_clip_fn(flag=params.grad_clip_flg) # create a directory for log/model params = create_log_model_directory(params, get_alg_name()) summary_writer = tf.contrib.summary.create_file_writer(params.log_dir) # choose env and instantiate the agent correspondingly env = wrap_deepmind(make_atari("{}NoFrameskip-v4".format( params.env_name, skip_frame_k=params.skip_frame_k)), skip_frame_k=params.skip_frame_k) if params.debug_flg: agent = DQN_debug(Model, optimizer, loss_fn, grad_clip_fn, env.action_space.n, params) else: agent = DQN(Model, optimizer, loss_fn, grad_clip_fn, env.action_space.n, params) # set seed env.seed(params.seed) tf.compat.v1.random.set_random_seed(params.seed) train_DQN(agent, env, policy, replay_buffer, reward_buffer, summary_writer)