Ejemplo n.º 1
0
# run this from the terminal and make sure you are loading appropriate environment variables
# $ echo $LD_LIBRARY_PATH

import gym
from tf_rl.common.monitor import Monitor
import environments.register as register

video_dir = "./video/"
temp = 5

env = gym.make("CentipedeSix-v1")
env = Monitor(env, video_dir, force=True)

for ep in range(10):
    if ep % temp == 0:
        print("recording")
        env.record_start()

    env.reset()
    done = False
    while not done:
        # env.render()
        action = env.action_space.sample()
        s, r, done, info = env.step(action)  # take a random action
    if ep % temp == 0:
        env.record_end()
Ejemplo n.º 2
0
all_distances, all_rewards, all_actions = list(), list(), list()
distance_func = get_distance(
    agent.params.env_name)  # create the distance measure func
print("=== Evaluation Mode ===")
for ep in range(params.n_trial):
    env.record_start()
    obs = env.reset()
    state = obs["flat_obs"]
    done = False
    episode_reward = 0
    while not done:
        action = agent.eval_predict(state)
        # action = env.action_space.sample()

        # scale for execution in env (in DDPG, every action is clipped between [-1, 1] in agent.predict)
        obs, reward, done, info = env.step(action * env.action_space.high)
        # print(action, reward)
        next_flat_state, next_graph_state = obs["flat_obs"], obs["graph_obs"]
        distance = distance_func(action, reward, info)
        all_actions.append(action.mean()**2)  # Mean Squared of action values
        all_distances.append(distance)
        state = next_flat_state
        episode_reward += reward

    all_rewards.append(episode_reward)
    tf.contrib.summary.scalar("Evaluation Score",
                              episode_reward,
                              step=agent.index_timestep)
    print("| Ep: {}/{} | Score: {} |".format(ep + 1, params.n_trial,
                                             episode_reward))
env.record_end()
Ejemplo n.º 3
0
    # for summary purpose, we put all codes in this context
    with tf.contrib.summary.always_record_summaries():

        for i in itertools.count():
            state = env.reset()
            total_reward = 0
            start = time.time()
            done = False
            episode_len = 0
            while not done:
                if global_timestep.numpy() < agent.params.learning_start:
                    action = env.action_space.sample()
                else:
                    action = agent.predict(state)
                # scale for execution in env (in DDPG, every action is clipped between [-1, 1] in agent.predict)
                next_state, reward, done, info = env.step(
                    action * env.action_space.high)
                replay_buffer.add(state, action, reward, next_state, done)
                """
                === Update the models
                """
                if global_timestep.numpy() > agent.params.learning_start:
                    states, actions, rewards, next_states, dones = replay_buffer.sample(
                        agent.params.batch_size)
                    loss = agent.update(states, actions, rewards, next_states,
                                        dones)
                    soft_target_model_update_eager(
                        agent.target_actor,
                        agent.actor,
                        tau=agent.params.soft_update_tau)
                    soft_target_model_update_eager(
                        agent.target_critic,
Ejemplo n.º 4
0
import gym
from tf_rl.common.monitor import Monitor

ENVS = [
    "Ant-v2",
    "HalfCheetah-v2",
    "Hopper-v2",
    "Humanoid-v2",
    # "Reacher-v2",
    # "Swimmer-v2",
    "Walker2d-v2"
]

DEFAULT = 250

for env_name in ENVS:
    env = gym.make(env_name)
    env = Monitor(env, "./video/{}".format(env_name), force=True)
    print(env_name)
    env.record_start()
    env.reset()
    done = False
    while not done:
        # env.render(mode="human", annotation_flg=False)
        s, r, done, i = env.step(env.action_space.sample())
    env.record_end()
    env.close()
Ejemplo n.º 5
0
import gym
from tf_rl.common.monitor import Monitor

env = gym.make('CartPole-v0')
env = Monitor(env=env, directory="./video/cartpole", force=True)

for ep in range(20):
    if ep == 0: env.record_start()
    state = env.reset()
    for t in range(1000):
        action = env.action_space.sample()
        state, reward, done, info = env.step(action)
        if done:
            if ep == 0: env.record_end()
            break

env.close()