def __init__( self, observation_shapes: List[Tuple[int, ...]], network_settings: NetworkSettings, act_type: ActionType, act_size: List[int], conditional_sigma: bool = False, tanh_squash: bool = False, ): super().__init__() self.act_type = act_type self.act_size = act_size self.version_number = torch.nn.Parameter(torch.Tensor([2.0])) self.is_continuous_int = torch.nn.Parameter( torch.Tensor([int(act_type == ActionType.CONTINUOUS)])) self.act_size_vector = torch.nn.Parameter(torch.Tensor(act_size)) self.network_body = NetworkBody(observation_shapes, network_settings) if network_settings.memory is not None: self.encoding_size = network_settings.memory.memory_size // 2 else: self.encoding_size = network_settings.hidden_units if self.act_type == ActionType.CONTINUOUS: self.distribution = GaussianDistribution( self.encoding_size, act_size[0], conditional_sigma=conditional_sigma, tanh_squash=tanh_squash, ) else: self.distribution = MultiCategoricalDistribution( self.encoding_size, act_size)
def forward( self, vec_inputs: List[torch.Tensor], vis_inputs: List[torch.Tensor], masks: Optional[torch.Tensor] = None, memories: Optional[torch.Tensor] = None, ) -> Tuple[torch.Tensor, int, int, int, int]: """ Note: This forward() method is required for exporting to ONNX. Don't modify the inputs and outputs. """ dists, _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1) if self.action_spec.is_continuous(): action_list = self.sample_action(dists) action_out = torch.stack(action_list, dim=-1) if self._clip_action_on_export: action_out = torch.clamp(action_out, -3, 3) / 3 else: action_out = torch.cat([dist.all_log_prob() for dist in dists], dim=1) return ( action_out, self.version_number, torch.Tensor([self.network_body.memory_size]), self.is_continuous_int, self.act_size_vector, )
def __init__( self, observation_shapes: List[Tuple[int, ...]], network_settings: NetworkSettings, action_spec: ActionSpec, conditional_sigma: bool = False, tanh_squash: bool = False, ): super().__init__() self.action_spec = action_spec self.version_number = torch.nn.Parameter(torch.Tensor([2.0])) self.is_continuous_int_deprecated = torch.nn.Parameter( torch.Tensor([int(self.action_spec.is_continuous())]) ) self.continuous_act_size_vector = torch.nn.Parameter( torch.Tensor([int(self.action_spec.continuous_size)]), requires_grad=False ) # TODO: export list of branch sizes instead of sum self.discrete_act_size_vector = torch.nn.Parameter( torch.Tensor([sum(self.action_spec.discrete_branches)]), requires_grad=False ) self.act_size_vector_deprecated = torch.nn.Parameter( torch.Tensor( [ self.action_spec.continuous_size + sum(self.action_spec.discrete_branches) ] ), requires_grad=False, ) self.network_body = NetworkBody(observation_shapes, network_settings) if network_settings.memory is not None: self.encoding_size = network_settings.memory.memory_size // 2 else: self.encoding_size = network_settings.hidden_units self.action_model = ActionModel( self.encoding_size, action_spec, conditional_sigma=conditional_sigma, tanh_squash=tanh_squash, )
def __init__( self, observation_shapes: List[Tuple[int, ...]], network_settings: NetworkSettings, action_spec: ActionSpec, conditional_sigma: bool = False, tanh_squash: bool = False, ): super().__init__() self.action_spec = action_spec self.version_number = torch.nn.Parameter(torch.Tensor([2.0])) self.is_continuous_int = torch.nn.Parameter( torch.Tensor([int(self.action_spec.is_continuous())])) self.act_size_vector = torch.nn.Parameter( torch.Tensor([ self.action_spec.continuous_size + sum(self.action_spec.discrete_branches) ]), requires_grad=False, ) self.network_body = NetworkBody(observation_shapes, network_settings) if network_settings.memory is not None: self.encoding_size = network_settings.memory.memory_size // 2 else: self.encoding_size = network_settings.hidden_units if self.action_spec.is_continuous(): self.distribution = GaussianDistribution( self.encoding_size, self.action_spec.continuous_size, conditional_sigma=conditional_sigma, tanh_squash=tanh_squash, ) else: self.distribution = MultiCategoricalDistribution( self.encoding_size, self.action_spec.discrete_branches) # During training, clipping is done in TorchPolicy, but we need to clip before ONNX # export as well. self._clip_action_on_export = not tanh_squash
def forward( self, vec_inputs: List[torch.Tensor], vis_inputs: List[torch.Tensor], masks: Optional[torch.Tensor] = None, memories: Optional[torch.Tensor] = None, ) -> Tuple[Union[int, torch.Tensor], ...]: """ Note: This forward() method is required for exporting to ONNX. Don't modify the inputs and outputs. At this moment, torch.onnx.export() doesn't accept None as tensor to be exported, so the size of return tuple varies with action spec. """ encoding, memories_out = self.network_body(vec_inputs, vis_inputs, memories=memories, sequence_length=1) ( cont_action_out, disc_action_out, action_out_deprecated, ) = self.action_model.get_action_out(encoding, masks) export_out = [ self.version_number, torch.Tensor([self.network_body.memory_size]), ] if self.action_spec.continuous_size > 0: export_out += [cont_action_out, self.continuous_act_size_vector] if self.action_spec.discrete_size > 0: export_out += [disc_action_out, self.discrete_act_size_vector] # Only export deprecated nodes with non-hybrid action spec if self.action_spec.continuous_size == 0 or self.action_spec.discrete_size == 0: export_out += [ action_out_deprecated, self.is_continuous_int_deprecated, self.act_size_vector_deprecated, ] return tuple(export_out)
def __init__( self, observation_specs: List[ObservationSpec], network_settings: NetworkSettings, action_spec: ActionSpec, conditional_sigma: bool = False, tanh_squash: bool = False, ): super().__init__() self.action_spec = action_spec self.version_number = torch.nn.Parameter( torch.Tensor([self.MODEL_EXPORT_VERSION]), requires_grad=False ) self.is_continuous_int_deprecated = torch.nn.Parameter( torch.Tensor([int(self.action_spec.is_continuous())]), requires_grad=False ) self.continuous_act_size_vector = torch.nn.Parameter( torch.Tensor([int(self.action_spec.continuous_size)]), requires_grad=False ) self.discrete_act_size_vector = torch.nn.Parameter( torch.Tensor([self.action_spec.discrete_branches]), requires_grad=False ) self.act_size_vector_deprecated = torch.nn.Parameter( torch.Tensor( [ self.action_spec.continuous_size + sum(self.action_spec.discrete_branches) ] ), requires_grad=False, ) self.network_body = NetworkBody(observation_specs, network_settings) if network_settings.memory is not None: self.encoding_size = network_settings.memory.memory_size // 2 else: self.encoding_size = network_settings.hidden_units self.memory_size_vector = torch.nn.Parameter( torch.Tensor([int(self.network_body.memory_size)]), requires_grad=False ) self.action_model = ActionModel( self.encoding_size, action_spec, conditional_sigma=conditional_sigma, tanh_squash=tanh_squash, )
def forward( self, vec_inputs: List[torch.Tensor], vis_inputs: List[torch.Tensor], masks: Optional[torch.Tensor] = None, memories: Optional[torch.Tensor] = None, ) -> Tuple[torch.Tensor, int, int, int, int]: """ Note: This forward() method is required for exporting to ONNX. Don't modify the inputs and outputs. """ dists, _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1) action_list = self.sample_action(dists) sampled_actions = torch.stack(action_list, dim=-1) if self.act_type == ActionType.CONTINUOUS: action_out = sampled_actions else: action_out = dists[0].all_log_prob() return ( action_out, self.version_number, torch.Tensor([self.network_body.memory_size]), self.is_continuous_int, self.act_size_vector, )
def __init__(self, lr): # Todo: add learning rate decay super().__init__() self.learning_rate = torch.Tensor([lr])
def __init__(self): super().__init__() self.__global_step = nn.Parameter(torch.Tensor([0]).to(torch.int64), requires_grad=False)
def test_swish(): layer = Swish() input_tensor = torch.Tensor([[1, 2, 3], [4, 5, 6]]) target_tensor = torch.mul(input_tensor, torch.sigmoid(input_tensor)) assert torch.all(torch.eq(layer(input_tensor), target_tensor))