Exemple #1
0
def make_env(env_name, env_type, seed, wrapper_kwargs=None):
    if env_type == 'atari':
        env = wrap_deepmind(make_atari("{}NoFrameskip-v4".format(env_name)), wrapper_kwargs)
    else:
        env = gym.make(env_name)
    env.seed(seed)
    return env
Exemple #2
0
def prep_env(env_name, video_path):
    if env_name.lower() == "cartpole":
        env = gym.make("CartPole-v0")
        env.record_start = lambda: None
        env.record_end = lambda: None
    else:
        env = wrap_deepmind(
            make_atari(env_name +
                       "NoFrameskip-v4"))  # make sure to add NoFrameskip-v4
        env = Monitor(env=env, directory=video_path, force=True)
    return env
Exemple #3
0
def invoke_agent_env(params, alg):
    """Returns the wrapped env and string name of agent, then Use `eval(agent)` to activate it from main script
    """
    if params.mode == "Atari":
        env = wrap_deepmind(make_atari("{}NoFrameskip-v4".format(
            params.env_name, skip_frame_k=params.skip_frame_k)),
                            skip_frame_k=params.skip_frame_k)
        if params.debug_flg:
            agent = "{}_debug".format(alg)
        else:
            agent = "{}".format(alg)
    else:
        agent = "{}".format(alg)
        if params.mode == "CartPole":
            env = MyWrapper(gym.make("CartPole-v0"))
        elif params.mode == "CartPole-p":
            env = CartPole_Pixel(gym.make("CartPole-v0"))
    return agent, env
Exemple #4
0
import tensorflow as tf
from tf_rl.common.memory_tf import ReplayBuffer
from tf_rl.common.wrappers import wrap_deepmind, make_atari

env = wrap_deepmind(make_atari("PongNoFrameskip-v4"))
state = env.reset()
memory = ReplayBuffer(capacity=100,
                      n_step=0,
                      act_shape=(),
                      obs_shape=state.shape,
                      obs_dtype=tf.int8,
                      checkpoint_dir="./tmp")

done = False
for t in range(100):
    action = env.action_space.sample()
    next_state, reward, done, info = env.step(action)
    memory.add(state, action, reward, next_state, done)
    state = next_state
env.close()
print(len(memory))
obs, action, next_obs, reward, done = memory.sample(batch_size=10)
print(obs.shape, action.shape, next_obs.shape, reward.shape, done.shape)
path = memory.save()

# recover phase
print("=== Recover Phase ===")
del memory
memory = ReplayBuffer(capacity=100,
                      n_step=0,
                      act_shape=(),
Exemple #5
0
import itertools
from tf_rl.common.monitor import Monitor
from tf_rl.common.wrappers import wrap_deepmind, make_atari

env_name = "PongNoFrameskip-v4"

env = wrap_deepmind(make_atari(env_name))
env = Monitor(env=env, directory="./video/{}".format(env_name), force=True)
env.record_start()
state = env.reset()
for t in itertools.count():
    action = env.action_space.sample()
    next_state, reward, done, info = env.step(action)
    state = next_state
    if done:
        break
print("End at {}".format(t + 1))
env.record_end()
env.close()
Exemple #6
0
policy = EpsilonGreedyPolicy_eager(Epsilon_fn=anneal_ep)
optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate=0.00025,
                                                decay=0.95,
                                                momentum=0.0,
                                                epsilon=0.00001,
                                                centered=True)
replay_buffer = ReplayBuffer(params.memory_size)
reward_buffer = deque(maxlen=params.reward_buffer_ep)
loss_fn = create_loss_func(params.loss_fn)
grad_clip_fn = gradient_clip_fn(flag=params.grad_clip_flg)

# create a directory for log/model
params = create_log_model_directory(params, get_alg_name())
summary_writer = tf.contrib.summary.create_file_writer(params.log_dir)

# choose env and instantiate the agent correspondingly
env = wrap_deepmind(make_atari("{}NoFrameskip-v4".format(
    params.env_name, skip_frame_k=params.skip_frame_k)),
                    skip_frame_k=params.skip_frame_k)
if params.debug_flg:
    agent = DQN_debug(Model, optimizer, loss_fn, grad_clip_fn,
                      env.action_space.n, params)
else:
    agent = DQN(Model, optimizer, loss_fn, grad_clip_fn, env.action_space.n,
                params)

# set seed
env.seed(params.seed)
tf.compat.v1.random.set_random_seed(params.seed)

train_DQN(agent, env, policy, replay_buffer, reward_buffer, summary_writer)