Python ReplayBuffer.current_count Exemples

Langage de programmation: Python

Espace de nommage/Pack: replay_buffer

Class/Type: ReplayBuffer

Méthode/Fonction: current_count

Exemples au hotexamples.com: 1

Python ReplayBuffer.current_count - 1 exemples trouvés. Ce sont les exemples réels les mieux notés de replay_buffer.ReplayBuffer.current_count extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

ReplayBuffer(30)

add(30)

count(26)

get_batch(25)

append(17)

encode_recent_observation(7)

getBatch(7)

add_sample(7)

insert(7)

clear(6)

load(5)

load_memory(5)

can_sample(4)

add_episode(4)

add_transition(4)

get_last_steps(3)

get_size(3)

create_batch(3)

is_ready(3)

add_experience(3)

add_to_memory(2)

make_index(2)

compute_values(2)

compute_value_difference(2)

compute_reward_distances(2)

compute_returns(2)

compute_lambda_returns(2)

fetch_sample(2)

compute_episode_boundaries(2)

encoder_recent_observation(2)

idx(2)

get_batch_data(1)

get_current_size(1)

get_experiences(1)

get_len(1)

get_current_state(1)

load_replay(1)

get_losses_offline(1)

get_memory(1)

get_minibatch(1)

importance_sampling(1)

in_order_iterate(1)

insert_sample(1)

k(1)

length(1)

lenth(1)

load_from_file(1)

get_random_minibatch(1)

dump(1)

get(1)

Méthodes fréquemment utilisées

ReplayBuffer (30)

add (30)

count (26)

get_batch (25)

append (17)

encode_recent_observation (7)

getBatch (7)

add_sample (7)

insert (7)

clear (6)

Méthodes fréquemment utilisées

load (5)

load_memory (5)

can_sample (4)

add_episode (4)

add_transition (4)

get_last_steps (3)

get_size (3)

create_batch (3)

is_ready (3)

add_experience (3)

add_to_memory (2)

make_index (2)

compute_values (2)

compute_value_difference (2)

compute_reward_distances (2)

compute_returns (2)

compute_lambda_returns (2)

fetch_sample (2)

compute_episode_boundaries (2)

encoder_recent_observation (2)

Méthodes fréquemment utilisées

add_to_memory (2)

make_index (2)

compute_values (2)

compute_value_difference (2)

compute_reward_distances (2)

compute_returns (2)

compute_lambda_returns (2)

fetch_sample (2)

compute_episode_boundaries (2)

encoder_recent_observation (2)

idx (2)

get_batch_data (1)

get_current_size (1)

get_experiences (1)

get_len (1)

get_current_state (1)

load_replay (1)

get_losses_offline (1)

get_memory (1)

get_minibatch (1)

importance_sampling (1)

in_order_iterate (1)

insert_sample (1)

k (1)

length (1)

lenth (1)

load_from_file (1)

get_random_minibatch (1)

dump (1)

get (1)

Méthodes fréquemment utilisées

idx (2)

get_batch_data (1)

get_current_size (1)

get_experiences (1)

get_len (1)

get_current_state (1)

load_replay (1)

get_losses_offline (1)

get_memory (1)

get_minibatch (1)

importance_sampling (1)

in_order_iterate (1)

insert_sample (1)

k (1)

length (1)

lenth (1)

load_from_file (1)

get_random_minibatch (1)

dump (1)

get (1)

batch_load (1)

_count (1)

_ptr (1)

addAbsorbing (1)

add_batch (1)

add_data (1)

add_datapoints (1)

add_effects (1)

add_errors (1)

add_expert (1)

add_tuples (1)

adds (1)

as_dataset (1)

buffer_init (1)

full (1)

buffered (1)

cache (1)

clear_buffer (1)

create_episode (1)

current_count (1)

Exemple #1

0

Afficher le fichier

class DDPG(object): def __init__(self, observation_dim, num_actions, batch_size, gamma, d_epsilon, update_rate, is_train): self.observation_dim = observation_dim self.num_actions = num_actions self.actor = Actor_Policy(observation_dim, num_actions).type(dtype) self.actor_target = Actor_Policy(observation_dim, num_actions).type(dtype) self.actor_target.load_state_dict(self.actor.state_dict()) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=1e-4) self.critic = Critic_Value(observation_dim, num_actions).type(dtype) self.critic_target = Critic_Value(observation_dim, num_actions).type(dtype) self.critic_target.load_state_dict(self.critic.state_dict()) self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=1e-3, weight_decay=1e-2) self.replay_buffer = ReplayBuffer(1e6) self.ornstein_uhlenbeck = OrnsteinUhlenbeckProcess(theta=0.15, sigma=0.2) self.batch_size = batch_size self.update_rate = update_rate self.epsilon = 1 self.d_epsilon = 1.0 / d_epsilon self.is_train = is_train self.gamma = gamma def update_target(self, target, original, update_rate): for target_param, param in zip(target.parameters(), original.parameters()): target_param.data.copy_((1.0 - update_rate) * target_param.data + update_rate * param.data) def select_action(self, state): obs = Variable((torch.from_numpy(np.array([state])))).type(dtype) action = self.actor(obs).cpu().squeeze(dim=0).data.numpy() action = action + (self.is_train) * max( self.epsilon, 0) * self.ornstein_uhlenbeck.sample() action = np.clip(action, -1.0, 1.0) if (self.epsilon > 0): self.epsilon -= self.d_epsilon return action def reset(self): self.ornstein_uhlenbeck.reset_states() def store_experience(self, state, action, reward, next_state, done): self.replay_buffer.add(state, action, reward, next_state, done) def update_model(self): if (self.replay_buffer.current_count() < self.batch_size): return state_batch, action_batch, reward_batch, \ next_state_batch, done_batch = self.replay_buffer.sample(self.batch_size) state_batch = Variable(torch.from_numpy( np.array(state_batch))).type(dtype) action_batch = Variable(torch.from_numpy( np.array(action_batch))).type(dtype) reward_batch = Variable(torch.from_numpy( np.array(reward_batch))).type(dtype) next_state_batch = Variable( torch.from_numpy(np.array(next_state_batch))).type(dtype) done_mask = Variable( torch.from_numpy(1 - np.array([done_batch]).T.astype(int))).type(dtype) # ----- # Compute Bellman error to update critic # ----- # (a) Q(s', mu(s'|theta_mu_frozen) | theta_q_frozen) action_tp1_target = self.actor_target(next_state_batch) Q_target_tp1_values = self.critic_target( [next_state_batch, action_tp1_target]).detach() # if current state is end of episode, then there is no next Q value Q_target_tp1_values = done_mask * Q_target_tp1_values # (b) Q(s, a | theta_q) Q_values = self.critic((state_batch, action_batch)) # r + gamma * (a) - (b) y_i = reward_batch + self.gamma * Q_target_tp1_values critic_loss = nn.MSELoss() error = critic_loss(Q_values, y_i) self.critic_optimizer.zero_grad() error.backward() self.critic_optimizer.step() # ----- # Update actor using critic # ----- predicted_actions = self.actor(state_batch) actor_loss = (-self.critic([state_batch, predicted_actions])).mean() self.actor_optimizer.zero_grad() actor_loss.backward() self.actor_optimizer.step() # ----- # Update target networks # ----- self.update_target(self.critic_target, self.critic, self.update_rate) self.update_target(self.actor_target, self.actor, self.update_rate)