Python ReplayMemory.countの例

プログラミング言語: Python

名前空間/パッケージ名: replay_memory

クラス/型: ReplayMemory

メソッド/関数: count

hotexamples.comのコード掲載数: 1

Python ReplayMemory.count - 1件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのreplay_memory.ReplayMemory.countの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

ReplayMemory(30)

add(30)

append(21)

enqueue(11)

add_transition(3)

batch(3)

add_experience(3)

add_episode(3)

clear(2)

enable_gbl(2)

disable_gbl(2)

__len__(2)

add_memory(2)

dump(1)

debug(1)

current_stats(1)

count(1)

batch_ops(1)

can_provide_sample(1)

add_element(1)

append_storage_chunk(1)

add_state_transition(1)

add_recent_state(1)

add_recent_action(1)

add_all(1)

コード例 #1

ファイルを表示

def main():
    sess = tf.Session()
    K.set_session(sess)
    env = gym.make("MountainCarContinuous-v0")

    #Parameters
    memory_size = 100000
    batch_size = 32
    tau = 0.001
    lr_actor = 0.0001
    lr_critic = 0.001
    discount_factor = 0.99
    episodes = 1001
    time_steps = 501
    collect_experience = 50000
    save_frequency = 250
    ep_reward = []
    training = False

    #Noise objecct
    noise = OUNoise(env.action_space)
    #Initialize actor and critic objects
    actor = Actor(env, sess, lr_actor, tau)

    #Uncomment to the following line to save the actor model architecture as json file. Need to be saved
    #once only

    # actor.save_model_architecture("Actor_model_architecture.json")
    critic = Critic(env, sess, lr_critic, tau, discount_factor)

    #Initialize replay memory of size defined by memory_size
    replay_memory = ReplayMemory(memory_size)

    #Toggle between true and false for debugging purposes. For training it is always true
    run = True
    if run:
        #Loop over the number of episodes. At eqach new episode reset the environment, reset the noise
        #state and set total episode reward to 0
        for episode in range(episodes):
            state = env.reset()
            noise.reset()
            episode_reward = 0

            #Loop over the number of steps in an episode
            for time in range(time_steps):
                #Uncomment the following line of you want to visualize the mountain car during training.
                #Can also be trained without visualization for the case where we are using
                #position and velocities as state variables.

                # env.render()

                #Predict an action from the actor model using the current state
                action = actor.predict_action(state.reshape((1, 2)))[0]

                #Add ohlnbeck noise to the predicted action to encourage exploration of the environment
                exploratory_action = noise.get_action(action, time)

                #Take the noisy action to enter the next state
                next_state, reward, done, _ = env.step(exploratory_action)

                #Predict the action to be taken given the next_state. This next state action is predicted
                #using the actor's target model
                next_action = actor.predict_next_action(
                    next_state.reshape((1, 2)))[0]

                #Append this experience sample to the replay memory
                replay_memory.append(state, exploratory_action, reward,
                                     next_state, next_action, done)

                #Only start training when there are a minimum number of experience samples available in
                #memory
                if replay_memory.count() == collect_experience:
                    training = True
                    print('Start training')

                #When training:
                if training:
                    # 1)first draw a random batch of samples from the replay memory
                    batch = replay_memory.sample(batch_size)
                    # 2) using this sample calculate dQ/dA from the critic model
                    grads = critic.calc_grads(batch)
                    # 3) calculate dA/dTheta from the actor using the same batch
                    # 4) multiply dA/dTheta by negative dQ/dA to get dJ/dTheta
                    # 5) Update actor weights such that dJ/dTheta is maximized
                    # 6) The above operation is easily performed by minimizing the value obtained in (4)
                    t_grads = actor.train(batch, grads)

                    # update critic weights by minimizing the bellman loss. Use actor target to compute
                    # next action in the next state (already computed and stored in replay memory)
                    # in order to compute TD target
                    critic.train(batch)

                    #After each weight update of the actor and critic online model perform soft updates
                    # of their targets so that they can smoothly and slowly track the online model's
                    #weights
                    actor.update_target()
                    critic.update_target()

                #Add each step reward to the episode reward
                episode_reward += reward

                #Set current state as next state
                state = next_state

                #If target reached before the max allowed time steps, break the inner for loop
                if done:
                    break

            #Store episode reward
            ep_reward.append([episode, episode_reward])

            #Print info for each episode to track training progress
            print(
                "Completed in {} steps.... episode: {}/{}, episode reward: {} "
                .format(time, episode, episodes, episode_reward))

            #Save model's weights and episode rewards after each save_frequency episode
            if training and (episode % save_frequency) == 0:
                print('Data saved at epsisode:', episode)
                actor.save_weights(
                    './Model/DDPG_actor_model_{}.h5'.format(episode))
                pickle.dump(
                    ep_reward,
                    open('./Rewards/rewards_{}.dump'.format(episode), 'wb'))

        # Close the mountain car environment
        env.close()