Ejemplo n.º 1
0
    def __init__(self,
                 *,
                 stddev_mode='predicted',
                 bounded_transform='tanh',
                 name=None,
                 action_spec=None,
                 input_spec=None):
        assert action_spec.type == 'float'

        parameters_spec = TensorsSpec(
            mean=TensorSpec(type='float', shape=action_spec.shape),
            stddev=TensorSpec(type='float', shape=action_spec.shape),
            log_stddev=TensorSpec(type='float', shape=action_spec.shape))
        conditions_spec = TensorsSpec()

        super().__init__(name=name,
                         action_spec=action_spec,
                         input_spec=input_spec,
                         parameters_spec=parameters_spec,
                         conditions_spec=conditions_spec)

        self.stddev_mode = stddev_mode

        if bounded_transform is None:
            bounded_transform = 'tanh'
        if bounded_transform not in ('clipping', 'tanh'):
            raise TensorforceError.value(name='Gaussian',
                                         argument='bounded_transform',
                                         value=bounded_transform,
                                         hint='not in {clipping,tanh}')
        elif bounded_transform == 'tanh' and (
            (self.action_spec.min_value
             is not None) is not (self.action_spec.max_value is not None)):
            raise TensorforceError.value(
                name='Gaussian',
                argument='bounded_transform',
                value=bounded_transform,
                condition='one-sided bounded action space')
        elif self.action_spec.min_value is None and self.action_spec.max_value is None:
            bounded_transform = None
        self.bounded_transform = bounded_transform

        if self.input_spec.rank == 1:
            # Single embedding
            self.mean = self.submodule(name='mean',
                                       module='linear',
                                       modules=layer_modules,
                                       size=self.action_spec.size,
                                       initialization_scale=0.01,
                                       input_spec=self.input_spec)
            if self.stddev_mode == 'predicted':
                self.stddev = self.submodule(name='stddev',
                                             module='linear',
                                             modules=layer_modules,
                                             size=self.action_spec.size,
                                             initialization_scale=0.01,
                                             input_spec=self.input_spec)

        else:
            # Embedding per action
            if self.input_spec.rank < 1 or self.input_spec.rank > 3:
                raise TensorforceError.value(name=name,
                                             argument='input_spec.shape',
                                             value=self.embedding_shape,
                                             hint='invalid rank')
            elif self.input_spec.shape[:-1] == self.action_spec.shape[:-1]:
                size = self.action_spec.shape[-1]
            elif self.input_spec.shape[:-1] == self.action_spec.shape:
                size = 0
            else:
                raise TensorforceError.value(
                    name=name,
                    argument='input_spec.shape',
                    value=self.input_spec.shape,
                    hint='not flattened and incompatible with action shape')
            self.mean = self.submodule(name='mean',
                                       module='linear',
                                       modules=layer_modules,
                                       size=size,
                                       initialization_scale=0.01,
                                       input_spec=self.input_spec)
            if self.stddev_mode == 'predicted':
                self.stddev = self.submodule(name='stddev',
                                             module='linear',
                                             modules=layer_modules,
                                             size=size,
                                             initialization_scale=0.01,
                                             input_spec=self.input_spec)