Python ReplayBuffer.sample_batch Exemples

Langage de programmation: Python

Espace de nommage/Pack: replay

Class/Type: ReplayBuffer

Méthode/Fonction: sample_batch

Exemples au hotexamples.com: 2

Python ReplayBuffer.sample_batch - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de replay.ReplayBuffer.sample_batch extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

ReplayBuffer(21)

add(10)

sample(9)

append(2)

sample_batch(2)

store_frame(1)

store_effect(1)

store(1)

size(1)

save(1)

random_batch(1)

remote(1)

push(1)

load(1)

get_batch(1)

encode_recent_observation(1)

can_sample(1)

batch(1)

add_replay(1)

update(1)

Méthodes fréquemment utilisées

ReplayBuffer (21)

add (10)

sample (9)

append (2)

sample_batch (2)

store_frame (1)

store_effect (1)

store (1)

size (1)

save (1)

Méthodes fréquemment utilisées

random_batch (1)

remote (1)

push (1)

load (1)

get_batch (1)

encode_recent_observation (1)

can_sample (1)

batch (1)

add_replay (1)

update (1)

Exemple #1

0

Afficher le fichier

Fichier : trainer.py Projet : tsubame-mz/machine_learning

def train( self, env: gym.Env, agent: Agent, network: Network, optimizer, window_size: int, nb_self_play: int, num_unroll_steps: int, td_steps: int, discount: float, batch_size: int, nb_train_update: int, nb_train_epochs: int, max_grad_norm: float, filename: str, ent_c: float, ): replay_buffer = ReplayBuffer(window_size, batch_size) for epoch in range(nb_train_epochs): network.eval() rewards = [] for _ in range(nb_self_play): game_buffer = self._play_one_game(env, agent) # game_buffer.print_buffer() replay_buffer.append(game_buffer) rewards.append(np.sum(game_buffer.rewards)) network.train() losses = [] for _ in range(nb_train_update): batch = replay_buffer.sample_batch(num_unroll_steps, td_steps, discount) losses.append( self._update_weights(network, optimizer, batch, max_grad_norm, ent_c)) v_loss, r_loss, p_loss, entropy = np.mean(losses, axis=0) print( f"Epoch[{epoch+1}]: Reward[{np.mean(rewards)}], Loss: V[{v_loss:.6f}]/R[{r_loss:.6f}]/P[{p_loss:.6f}]/E[{entropy:.6f}]" ) if (epoch + 1) % 10 == 0: agent.save_model(filename)

Exemple #2

0

Afficher le fichier

Fichier : ddpg.py Projet : namidairo777/DRL

def train(sess, env, actor, critic, noise, reward, discrete): # set up summary writer summary_write = tf.summary.FileWriter("ddpg_summary") sess.run(tf.global_variables_initializer()) # initialize target and critic network actor.update_target_network() critic.update_target_network() # initialize replay memory replay_buffer = ReplayBuffer(BUFFER_SIZE, RANDOM_SEED) # initialize noise ou_level = 0. for i in range(MAX_EPISODES): s = env.reset() ep_reward = 0 ep_ave_max_q = 0 episode_buffer = np.empty((0, 5), float) for j in range(MAX_EP_STEPS): if RENDER_ENV: env.render() a = actor.predict(np.reshape(s, (1, actor.s_dim))) # Add exploration noise if i < NOISE_MAX_EP: ou_level = noise.ornstein_uhlenbeck_level(ou_level) a = a + ou_level # Set action for discrete and continuous action spaces if discrete: action = np.argmax(a) else: action = a[0] s2, r, terminal, info = env.step(action) # Choose reward type ep_reward += r episode_buffer = np.append(episode_buffer, [[s, a, r, terminal, s2]], axis=0) # Adding experience to memory if replay_buffer.size() > MINIBATCH_SIZE: s_batch, a_batch, r_batch, t_batch, s2_batch = \ replay_buffer.sample_batch(MINIBATCH_SIZE) # Calculate targets target_q = critic.predict_target( s2_batch, actor.predict_target(s2_batch)) y_i = [] for k in range(MINIBATCH_SIZE): if t_batch[k]: y_i.append(r_batch[k]) else: y_i.append(r_batch[k] + GAMMA * target_q[k]) # Update the critic given the targes predicted_q_value, _ = critic.train( s_batch, a_batch, np.reshape(y_i, (MINIBATCH_SIZE, 1))) ep_ave_max_q += np.max(predicted_q_value) # Update the actor policy using the sampled gradient a_outs = actor.predict(s_batch) grads = critic.action_gradients(s_batch, a_outs) actor.train(s_batch, grads[0]) # Update target networks actor.update_target_network() critic.update_target_network() # Set previous state for next step s = s2 if terminal: # Reward system for episode # episode_buffer = reward.discount(episode_buffer) # Add episode to replay for step in episode_buffer: replay_buffer.add(np.reshape(step[0], (actor.s_dim, )), np.reshape(step[1], (actor.a_dim, )), step[2], step[3], np.reshape(step[4], (actor.s_dim, ))) # summary = tf.summary() # summary.value.add(tag="Perf/Reward", simple_value=float(ep_reward)) # summary.value.add(tag="Perf/Qmax", simple_value=float(ep_ave_max_q / float(j))) # summary_writer.add_summary(summary, i) # summary_writer.flush() if i != 0: print "|Reward: %.2i | Episode: %d | Qmax: %.4f" % ( int(ep_reward), i, (ep_ave_max_q / float(i))) break