Python Transition Examples

Programming Language: Python

Namespace/Package Name: libs.utils

Method/Function: Transition

Examples at hotexamples.com: 5

Python Transition - 5 examples found. These are the top rated real world Python examples of libs.utils.Transition extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def optimize_model(memory, batch_size, gamma=0.999):
    if len(memory) < batch_size:
        return
    transitions = memory.sample(batch_size)
    batch = utils.Transition(*zip(*transitions))

    next_state_batch = torch.stack(batch.next_state).to(device)
    state_batch = torch.stack(batch.state).to(device)
    action_batch = torch.stack(batch.action).to(device)
    reward_batch = torch.stack(batch.reward).to(device)
    done_batch = torch.stack(batch.done).to(device)

    # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
    # columns of actions taken
    state_action_values = policy_net(state_batch).gather(1, action_batch)

    # Compute V(s_{t+1}) for all next states.
    next_action = policy_net(next_state_batch).argmax(dim=1).unsqueeze(1)
    next_state_values = target_net(next_state_batch).gather(
        1, next_action).detach()
    # Compute the expected Q values
    expected_state_action_values = (next_state_values * gamma *
                                    (1.0 - done_batch)) + reward_batch

    # Compute Huber loss
    loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)

    # Optimize the model
    optimizer.zero_grad()
    loss.backward()
    for param in policy_net.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer.step()

Example #2

Show file

 def push(self, *args):
     max_prio = self.priorities.max() if self.memory else 1.0
     if len(self.memory) < self.capacity:
         self.memory.append(None)
     self.memory[self.position] = utils.Transition(*args)
     self.priorities[self.position] = max_prio
     self.position = (self.position + 1) % self.capacity

Example #3

Show file

File: ddpg.py Project: neka-nat/player_zoo

def optimize_model(memory, batch_size, criterion=nn.MSELoss(), gamma=0.999):
    if len(memory) < batch_size:
        return
    transitions = memory.sample(batch_size)
    batch = utils.Transition(*zip(*transitions))

    next_state_batch = torch.stack(batch.next_state).to(device)
    state_batch = torch.stack(batch.state).to(device)
    action_batch = torch.stack(batch.action).to(device)
    reward_batch = torch.stack(batch.reward).to(device)
    done_batch = torch.stack(batch.done).to(device)

    state_action_values = critic([state_batch, action_batch])
    next_state_action_values = target_critic(
        [next_state_batch, target_actor(next_state_batch)]).detach()
    expected_state_action_values = (next_state_action_values * gamma *
                                    (1.0 - done_batch)) + reward_batch
    critic_loss = criterion(state_action_values, expected_state_action_values)
    critic_optimizer.zero_grad()
    critic_loss.backward()
    critic_optimizer.step()

    actor_loss = -critic([state_batch, actor(state_batch)]).mean()
    actor_optimizer.zero_grad()
    actor_loss.backward()
    actor_optimizer.step()

    soft_update(target_actor, actor)
    soft_update(target_critic, critic)

Example #4

Show file

def optimize_model(memory, batch_size, gamma=0.999):
    if len(memory) < batch_size:
        return
    transitions, indices, weights = memory.sample(batch_size)
    batch = utils.Transition(*zip(*transitions))

    next_state_batch = torch.stack(batch.next_state).to(device)
    state_batch = torch.stack(batch.state).to(device)
    action_batch = torch.stack(batch.action).to(device)
    reward_batch = torch.stack(batch.reward).to(device)
    done_batch = torch.stack(batch.done).to(device)
    weights_batch = torch.tensor(weights).to(device)

    # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
    # columns of actions taken
    state_action_values = policy_net(state_batch).gather(1, action_batch)

    # Compute V(s_{t+1}) for all next states.
    next_state_values = target_net(next_state_batch).max(1)[0].unsqueeze(
        1).detach()
    # Compute the expected Q values
    expected_state_action_values = (next_state_values * gamma *
                                    (1.0 - done_batch)) + reward_batch

    # Compute Huber loss
    delta = F.smooth_l1_loss(state_action_values,
                             expected_state_action_values,
                             reduce=False)
    prios = delta.abs() + 1e-5
    loss = (delta * weights_batch.unsqueeze(1)).mean()

    # Optimize the model
    optimizer.zero_grad()
    loss.backward()
    for param in policy_net.parameters():
        param.grad.data.clamp_(-1, 1)
    memory.update_priorities(indices, prios.data.cpu().numpy())
    optimizer.step()

Example #5

Show file

 def push(self, *args):
     """Saves a transition."""
     if len(self.memory) < self.capacity:
         self.memory.append(None)
     self.memory[self.position] = utils.Transition(*args)
     self.position = (self.position + 1) % self.capacity