Python Monitor.close Examples

Programming Language: Python

Namespace/Package Name: gym.wrappers.monitoring

Class/Type: Monitor

Method/Function: close

Examples at hotexamples.com: 3

Python Monitor.close - 3 examples found. These are the top rated real world Python examples of gym.wrappers.monitoring.Monitor.close extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Monitor(8)

close(2)

render(1)

reset(1)

step(1)

Example #1

Show file

def main(_):
    with tf.Session() as sess:
        env = gym.make(ENV_NAME)
        np.random.seed(RANDOM_SEED)
        tf.set_random_seed(RANDOM_SEED)
        env.seed(RANDOM_SEED)

        print(env.observation_space)
        print(env.action_space)

        state_dim = env.observation_space.shape[0]

        try:
            action_dim = env.action_space.shape[0]
            action_bound = env.action_space.high
            # Ensure action bound is symmetric
            assert (env.action_space.high == -env.action_space.low)
            discrete = False
            print('Continuous Action Space')
        except IndexError:
            action_dim = env.action_space.n
            action_bound = 1
            discrete = True
            print('Discrete Action Space')

        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             ACTOR_LEARNING_RATE, TAU)

        critic = CriticNetwork(sess, state_dim, action_dim,
                               CRITIC_LEARNING_RATE, TAU,
                               actor.get_num_trainable_vars())

        noise = Noise(DELTA, SIGMA, OU_A, OU_MU)
        reward = Reward(REWARD_FACTOR, GAMMA)

        if GYM_MONITOR_EN:
            if not RENDER_ENV:
                env = Monitor(env,
                              MONITOR_DIR,
                              video_callable=False,
                              force=True)
            else:
                env = Monitor(env, MONITOR_DIR, force=True)

        try:
            train(sess, env, actor, critic, noise, reward, discrete)
        except KeyboardInterrupt:
            pass

        if GYM_MONITOR_EN:
            env.close()

Example #2

Show file

File: LunarLander-v2.py Project: artonge/aalto

        if stateValues[maxValueAction]['value'] < stateValues[action]['value']:
            maxValueAction = action
        if stateValues[minCountAction]['count'] > stateValues[action]['count']:
            minCountAction = action
    # Compute the decay of the exploration
    decayX = 0.5
    decayY = 50
    decay = max(-i_episode * decayX + decayY, 10 / (i_episode + 1))
    if randint(0, 100) < decay:
        explorationHistory[i_episode] += 1
        return minCountAction
    else:
        return maxValueAction


nbEpisodes = 1000
stepsHistory = [0] * nbEpisodes
env = gym.make('LunarLander-v2')
env = Monitor(env, 'tmp/cart-pole', force=True)
for i in range(6):
    print i
    history = {}  # 'state' ==> [{'count': int, 'value': float}]
    explorationHistory = [0] * nbEpisodes
    learn(nbEpisodes, i)
env.close()
# gym.upload('tmp/cart-pole', api_key='sk_QoYvL963TwnAqSJXZLOQ')
plt.plot(range(nbEpisodes), stepsHistory, range(nbEpisodes),
         explorationHistory, range(nbEpisodes), [195] * nbEpisodes)
plt.ylabel('Number of rewards')
plt.show()

Example #3

Show file

File: cartpole_ddqn.py Project: hsgui/interest-only

            randomAgent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                break
    print("Finish to full the random agent memory")
    agent.memory = randomAgent.memory
    randomAgent = None

    env = Monitor(env, 'tmp/cart-pole-ddqn-2', force=True)
    for e in range(EPISODES):
        if DEBUG and e >= EPISODES - 10:
            agent.stopExploration()

        state = env.reset()
        for time in range(500):
            #env.render()
            
            # act on one input (one state)
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)

            agent.remember(state, action, reward, next_state, done)
            agent.replay(batch_size)
            state = next_state
            if done:
                print("episode: {}/{}, score: {}, e: {:.2}"
                      .format(e, EPISODES, time, agent.epsilon))
                break

    env.close()