Python ModelUtils.dynamic_partition Beispiele

Programmiersprache: Python

Namespace / Paketname: mlagents.trainers.torch.utils

Klasse / Typ: ModelUtils

Methode / Funktion: dynamic_partition

Beispiele auf hotexamples.com: 3

Python ModelUtils.dynamic_partition - 3 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die mlagents.trainers.torch.utils.ModelUtils.dynamic_partition, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

list_to_tensor(30)

to_numpy(16)

masked_mean(13)

break_into_branches(12)

actions_to_onehot(9)

create_input_processors(8)

update_learning_rate(6)

DecayedValue(5)

soft_update(5)

get_probs_and_entropy(4)

get_encoder_for_type(4)

dynamic_partition(3)

create_residual_self_attention(2)

create_encoders(2)

ActionFlattener(2)

list_to_tensor_list(1)

polynomial_decay(1)

_check_resolution_for_encoder(1)

trust_region_policy_loss(1)

trust_region_value_loss(1)

Beispiel #1

Datei anzeigen

Datei: curiosity_reward_provider.py Projekt: zt1217396582/ml-agents

 def compute_forward_loss(self, mini_batch: AgentBuffer) -> torch.Tensor:
     """
     Computes the loss for the next state prediction
     """
     return torch.mean(
         ModelUtils.dynamic_partition(
             self.compute_reward(mini_batch),
             ModelUtils.list_to_tensor(mini_batch["masks"],
                                       dtype=torch.float),
             2,
         )[1])

Beispiel #2

Datei anzeigen

Datei: curiosity_reward_provider.py Projekt: zaku-06/ml-agents

 def compute_inverse_loss(self, mini_batch: AgentBuffer) -> torch.Tensor:
     """
     Computes the inverse loss for a mini_batch. Corresponds to the error on the
     action prediction (given the current and next state).
     """
     predicted_action = self.predict_action(mini_batch)
     actions = AgentAction.from_dict(mini_batch)
     _inverse_loss = 0
     if self._action_spec.continuous_size > 0:
         sq_difference = (
             actions.continuous_tensor - predicted_action.continuous
         ) ** 2
         sq_difference = torch.sum(sq_difference, dim=1)
         _inverse_loss += torch.mean(
             ModelUtils.dynamic_partition(
                 sq_difference,
                 ModelUtils.list_to_tensor(mini_batch["masks"], dtype=torch.float),
                 2,
             )[1]
         )
     if self._action_spec.discrete_size > 0:
         true_action = torch.cat(
             ModelUtils.actions_to_onehot(
                 actions.discrete_tensor, self._action_spec.discrete_branches
             ),
             dim=1,
         )
         cross_entropy = torch.sum(
             -torch.log(predicted_action.discrete + self.EPSILON) * true_action,
             dim=1,
         )
         _inverse_loss += torch.mean(
             ModelUtils.dynamic_partition(
                 cross_entropy,
                 ModelUtils.list_to_tensor(
                     mini_batch["masks"], dtype=torch.float
                 ),  # use masks not action_masks
                 2,
             )[1]
         )
     return _inverse_loss

Beispiel #3

Datei anzeigen

Datei: curiosity_reward_provider.py Projekt: PriuS2/ml-agents

 def compute_inverse_loss(self, mini_batch: AgentBuffer) -> torch.Tensor:
     """
     Computes the inverse loss for a mini_batch. Corresponds to the error on the
     action prediction (given the current and next state).
     """
     predicted_action = self.predict_action(mini_batch)
     if self._policy_specs.is_action_continuous():
         sq_difference = (
             ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.float)
             - predicted_action
         ) ** 2
         sq_difference = torch.sum(sq_difference, dim=1)
         return torch.mean(
             ModelUtils.dynamic_partition(
                 sq_difference,
                 ModelUtils.list_to_tensor(mini_batch["masks"], dtype=torch.float),
                 2,
             )[1]
         )
     else:
         true_action = torch.cat(
             ModelUtils.actions_to_onehot(
                 ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.long),
                 self._policy_specs.discrete_action_branches,
             ),
             dim=1,
         )
         cross_entropy = torch.sum(
             -torch.log(predicted_action + self.EPSILON) * true_action, dim=1
         )
         return torch.mean(
             ModelUtils.dynamic_partition(
                 cross_entropy,
                 ModelUtils.list_to_tensor(
                     mini_batch["masks"], dtype=torch.float
                 ),  # use masks not action_masks
                 2,
             )[1]
         )