Python ReplayBuffer.sample Exemples

Langage de programmation: Python

Espace de nommage/Pack: agent

Class/Type: ReplayBuffer

Méthode/Fonction: sample

Exemples au hotexamples.com: 2

Python ReplayBuffer.sample - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de agent.ReplayBuffer.sample extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

ReplayBuffer(11)

add(2)

sample(2)

empty_buffer_before_explore(1)

update(1)

update__now_len__before_sample(1)

update_now_len_before_sample(1)

Méthodes fréquemment utilisées

ReplayBuffer (11)

add (2)

sample (2)

empty_buffer_before_explore (1)

update (1)

update__now_len__before_sample (1)

update_now_len_before_sample (1)

Exemple #1

0

Afficher le fichier

Fichier : multiagent.py Projet : tho121/drl_collaboration

class MultiAgent(): def __init__(self, num_agents, state_size, action_size): self.agents = [] for i in range(num_agents): self.agents.append(Agent(state_size, action_size)) self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE) def step(self, states, actions, rewards, next_states, done): for i in range(len(states)): self.memory.add(states[i], actions[i], rewards[i], next_states[i], done) if len(self.memory) > BATCH_SIZE: experiences, indexes = self.memory.sample() for agent in self.agents: error = agent.learn(experiences, GAMMA) #update priority replay memory self.memory.update(indexes, abs(error)) def act(self, states, add_noise=True, noise_weight=1.0): actions = [] for i in range(len(self.agents)): actions.append(self.agents[i].act(states[i], add_noise, noise_weight)) return actions def reset(self): for agent in self.agents: agent.reset()

Exemple #2

0

Afficher le fichier

Fichier : maddpg.py Projet : aka5hChandra/Deep-reinforcement-learning-Nano-Degree

class maddpg: """Wrapper class managing different agents in the environment.""" def __init__(self, num_agents=2, state_size=24, action_size=2): """Initialize a maddpg_agent wrapper. Params ====== num_agents (int): the number of agents in the environment state_size (int): dimension of each state action_size (int): dimension of each action """ self.num_agents = num_agents self.state_size = state_size self.action_size = action_size self.agents = [ ddpg(state_size, action_size, i + 1, random_seed=0) for i in range(num_agents) ] # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed=0) def reset(self): """Resets OU Noise for each agent.""" for agent in self.agents: agent.reset() def act(self, observations, add_noise=False): """Picks an action for each agent given.""" actions = [] for agent, observation in zip(self.agents, observations): action = agent.act(observation, add_noise=add_noise) actions.append(action) return np.array(actions) def step(self, states, actions, rewards, next_states, dones, timestep): """Save experience in replay memory.""" states = states.reshape(1, -1) actions = actions.reshape(1, -1) next_states = next_states.reshape(1, -1) self.memory.add(states, actions, rewards, next_states, dones) # Learn, if enough samples are available in memory if len(self.memory) > BATCH_SIZE and timestep % LEARNING_PERIOD == 0: for a_i, agent in enumerate(self.agents): experiences = self.memory.sample() self.learn(experiences, a_i) def learn(self, experiences, agent_number): """ The critic takes as its input the combined observations and actions from all agents. Collect actions from each agent for the 'experiences'. """ next_actions = [] actions_pred = [] states, _, _, next_states, _ = experiences next_states = next_states.reshape(-1, self.num_agents, self.state_size) states = states.reshape(-1, self.num_agents, self.state_size) for a_i, agent in enumerate(self.agents): agent_id_tensor = self._get_agent_number(a_i) state = states.index_select(1, agent_id_tensor).squeeze(1) next_state = next_states.index_select(1, agent_id_tensor).squeeze(1) next_actions.append(agent.actor_target(next_state)) actions_pred.append(agent.actor_local(state)) next_actions = torch.cat(next_actions, dim=1).to(device) actions_pred = torch.cat(actions_pred, dim=1).to(device) agent = self.agents[agent_number] agent.learn(experiences, next_actions, actions_pred) def _get_agent_number(self, i): """Helper to get an agent's number as a Torch tensor.""" return torch.tensor([i]).to(device) def save_weights(self, dir): for i in range(self.num_agents): torch.save(self.agents[i].actor_local.state_dict(), os.path.join(dir, 'checkpoint_actor_{}.pth'.format(i))) torch.save(self.agents[i].critic_local.state_dict(), os.path.join(dir, 'checkpoint_critic_{}.pth'.format(i)))