Python ReplayBuffer.sampleの例

プログラミング言語: Python

名前空間/パッケージ名: agent

クラス/型: ReplayBuffer

メソッド/関数: sample

hotexamples.comのコード掲載数: 2

Python ReplayBuffer.sample - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのagent.ReplayBuffer.sampleの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

ReplayBuffer(11)

add(2)

sample(2)

empty_buffer_before_explore(1)

update(1)

update__now_len__before_sample(1)

update_now_len_before_sample(1)

コード例 #1

ファイルを表示

ファイル: multiagent.py プロジェクト: tho121/drl_collaboration

class MultiAgent():
    def __init__(self, num_agents, state_size, action_size):

        self.agents = []

        for i in range(num_agents):
            self.agents.append(Agent(state_size, action_size))

        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE)

    def step(self, states, actions, rewards, next_states, done):

        for i in range(len(states)):
            self.memory.add(states[i], actions[i], rewards[i], next_states[i],
                            done)

        if len(self.memory) > BATCH_SIZE:
            experiences, indexes = self.memory.sample()

            for agent in self.agents:
                error = agent.learn(experiences, GAMMA)

                #update priority replay memory
                self.memory.update(indexes, abs(error))

    def act(self, states, add_noise=True, noise_weight=1.0):

        actions = []

        for i in range(len(self.agents)):
            actions.append(self.agents[i].act(states[i], add_noise,
                                              noise_weight))

        return actions

    def reset(self):
        for agent in self.agents:
            agent.reset()

コード例 #2

ファイルを表示

ファイル: maddpg.py プロジェクト: aka5hChandra/Deep-reinforcement-learning-Nano-Degree

class maddpg:
    """Wrapper class managing different agents in the environment."""
    def __init__(self, num_agents=2, state_size=24, action_size=2):
        """Initialize a maddpg_agent wrapper.
        Params
        ======
            num_agents (int): the number of agents in the environment
            state_size (int): dimension of each state
            action_size (int): dimension of each action
        """
        self.num_agents = num_agents
        self.state_size = state_size
        self.action_size = action_size

        self.agents = [
            ddpg(state_size, action_size, i + 1, random_seed=0)
            for i in range(num_agents)
        ]

        # Replay memory
        self.memory = ReplayBuffer(action_size,
                                   BUFFER_SIZE,
                                   BATCH_SIZE,
                                   seed=0)

    def reset(self):
        """Resets OU Noise for each agent."""
        for agent in self.agents:
            agent.reset()

    def act(self, observations, add_noise=False):
        """Picks an action for each agent given."""
        actions = []
        for agent, observation in zip(self.agents, observations):
            action = agent.act(observation, add_noise=add_noise)
            actions.append(action)
        return np.array(actions)

    def step(self, states, actions, rewards, next_states, dones, timestep):
        """Save experience in replay memory."""
        states = states.reshape(1, -1)
        actions = actions.reshape(1, -1)
        next_states = next_states.reshape(1, -1)

        self.memory.add(states, actions, rewards, next_states, dones)

        # Learn, if enough samples are available in memory
        if len(self.memory) > BATCH_SIZE and timestep % LEARNING_PERIOD == 0:
            for a_i, agent in enumerate(self.agents):
                experiences = self.memory.sample()
                self.learn(experiences, a_i)

    def learn(self, experiences, agent_number):
        """ The critic takes as its input the combined observations and 
        actions from all agents. Collect actions from each agent for the 'experiences'. """
        next_actions = []
        actions_pred = []
        states, _, _, next_states, _ = experiences

        next_states = next_states.reshape(-1, self.num_agents, self.state_size)
        states = states.reshape(-1, self.num_agents, self.state_size)

        for a_i, agent in enumerate(self.agents):
            agent_id_tensor = self._get_agent_number(a_i)

            state = states.index_select(1, agent_id_tensor).squeeze(1)
            next_state = next_states.index_select(1,
                                                  agent_id_tensor).squeeze(1)

            next_actions.append(agent.actor_target(next_state))
            actions_pred.append(agent.actor_local(state))

        next_actions = torch.cat(next_actions, dim=1).to(device)
        actions_pred = torch.cat(actions_pred, dim=1).to(device)

        agent = self.agents[agent_number]
        agent.learn(experiences, next_actions, actions_pred)

    def _get_agent_number(self, i):
        """Helper to get an agent's number as a Torch tensor."""
        return torch.tensor([i]).to(device)

    def save_weights(self, dir):
        for i in range(self.num_agents):
            torch.save(self.agents[i].actor_local.state_dict(),
                       os.path.join(dir, 'checkpoint_actor_{}.pth'.format(i)))
            torch.save(self.agents[i].critic_local.state_dict(),
                       os.path.join(dir, 'checkpoint_critic_{}.pth'.format(i)))