Esempi in Python per Monitor.close

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: gym.wrappers.monitoring

Classe/tipologia: Monitor

Metodo/funzione: close

Esempi su hotexamples.com: 3

Monitor.close in Python: 3 esempi trovati. Questi sono i migliori esempi reali in Python per gym.wrappers.monitoring.Monitor.close, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Monitor(8)

close(2)

render(1)

reset(1)

step(1)

Metodi utilizzati di frequente

Monitor (8)

close (2)

render (1)

reset (1)

step (1)

Esempio n. 1

Mostra file

def main(_):
    with tf.Session() as sess:
        env = gym.make(ENV_NAME)
        np.random.seed(RANDOM_SEED)
        tf.set_random_seed(RANDOM_SEED)
        env.seed(RANDOM_SEED)

        print(env.observation_space)
        print(env.action_space)

        state_dim = env.observation_space.shape[0]

        try:
            action_dim = env.action_space.shape[0]
            action_bound = env.action_space.high
            # Ensure action bound is symmetric
            assert (env.action_space.high == -env.action_space.low)
            discrete = False
            print('Continuous Action Space')
        except IndexError:
            action_dim = env.action_space.n
            action_bound = 1
            discrete = True
            print('Discrete Action Space')

        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             ACTOR_LEARNING_RATE, TAU)

        critic = CriticNetwork(sess, state_dim, action_dim,
                               CRITIC_LEARNING_RATE, TAU,
                               actor.get_num_trainable_vars())

        noise = Noise(DELTA, SIGMA, OU_A, OU_MU)
        reward = Reward(REWARD_FACTOR, GAMMA)

        if GYM_MONITOR_EN:
            if not RENDER_ENV:
                env = Monitor(env,
                              MONITOR_DIR,
                              video_callable=False,
                              force=True)
            else:
                env = Monitor(env, MONITOR_DIR, force=True)

        try:
            train(sess, env, actor, critic, noise, reward, discrete)
        except KeyboardInterrupt:
            pass

        if GYM_MONITOR_EN:
            env.close()

Esempio n. 2

Mostra file

File: LunarLander-v2.py Progetto: artonge/aalto

        if stateValues[maxValueAction]['value'] < stateValues[action]['value']:
            maxValueAction = action
        if stateValues[minCountAction]['count'] > stateValues[action]['count']:
            minCountAction = action
    # Compute the decay of the exploration
    decayX = 0.5
    decayY = 50
    decay = max(-i_episode * decayX + decayY, 10 / (i_episode + 1))
    if randint(0, 100) < decay:
        explorationHistory[i_episode] += 1
        return minCountAction
    else:
        return maxValueAction


nbEpisodes = 1000
stepsHistory = [0] * nbEpisodes
env = gym.make('LunarLander-v2')
env = Monitor(env, 'tmp/cart-pole', force=True)
for i in range(6):
    print i
    history = {}  # 'state' ==> [{'count': int, 'value': float}]
    explorationHistory = [0] * nbEpisodes
    learn(nbEpisodes, i)
env.close()
# gym.upload('tmp/cart-pole', api_key='sk_QoYvL963TwnAqSJXZLOQ')
plt.plot(range(nbEpisodes), stepsHistory, range(nbEpisodes),
         explorationHistory, range(nbEpisodes), [195] * nbEpisodes)
plt.ylabel('Number of rewards')
plt.show()

Esempio n. 3

Mostra file

File: cartpole_ddqn.py Progetto: hsgui/interest-only

            randomAgent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                break
    print("Finish to full the random agent memory")
    agent.memory = randomAgent.memory
    randomAgent = None

    env = Monitor(env, 'tmp/cart-pole-ddqn-2', force=True)
    for e in range(EPISODES):
        if DEBUG and e >= EPISODES - 10:
            agent.stopExploration()

        state = env.reset()
        for time in range(500):
            #env.render()
            
            # act on one input (one state)
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)

            agent.remember(state, action, reward, next_state, done)
            agent.replay(batch_size)
            state = next_state
            if done:
                print("episode: {}/{}, score: {}, e: {:.2}"
                      .format(e, EPISODES, time, agent.epsilon))
                break

    env.close()