def __init__(self, *, stddev_mode='predicted', bounded_transform='tanh', name=None, action_spec=None, input_spec=None): assert action_spec.type == 'float' parameters_spec = TensorsSpec( mean=TensorSpec(type='float', shape=action_spec.shape), stddev=TensorSpec(type='float', shape=action_spec.shape), log_stddev=TensorSpec(type='float', shape=action_spec.shape)) conditions_spec = TensorsSpec() super().__init__(name=name, action_spec=action_spec, input_spec=input_spec, parameters_spec=parameters_spec, conditions_spec=conditions_spec) self.stddev_mode = stddev_mode if bounded_transform is None: bounded_transform = 'tanh' if bounded_transform not in ('clipping', 'tanh'): raise TensorforceError.value(name='Gaussian', argument='bounded_transform', value=bounded_transform, hint='not in {clipping,tanh}') elif bounded_transform == 'tanh' and ( (self.action_spec.min_value is not None) is not (self.action_spec.max_value is not None)): raise TensorforceError.value( name='Gaussian', argument='bounded_transform', value=bounded_transform, condition='one-sided bounded action space') elif self.action_spec.min_value is None and self.action_spec.max_value is None: bounded_transform = None self.bounded_transform = bounded_transform if self.input_spec.rank == 1: # Single embedding self.mean = self.submodule(name='mean', module='linear', modules=layer_modules, size=self.action_spec.size, initialization_scale=0.01, input_spec=self.input_spec) if self.stddev_mode == 'predicted': self.stddev = self.submodule(name='stddev', module='linear', modules=layer_modules, size=self.action_spec.size, initialization_scale=0.01, input_spec=self.input_spec) else: # Embedding per action if self.input_spec.rank < 1 or self.input_spec.rank > 3: raise TensorforceError.value(name=name, argument='input_spec.shape', value=self.embedding_shape, hint='invalid rank') elif self.input_spec.shape[:-1] == self.action_spec.shape[:-1]: size = self.action_spec.shape[-1] elif self.input_spec.shape[:-1] == self.action_spec.shape: size = 0 else: raise TensorforceError.value( name=name, argument='input_spec.shape', value=self.input_spec.shape, hint='not flattened and incompatible with action shape') self.mean = self.submodule(name='mean', module='linear', modules=layer_modules, size=size, initialization_scale=0.01, input_spec=self.input_spec) if self.stddev_mode == 'predicted': self.stddev = self.submodule(name='stddev', module='linear', modules=layer_modules, size=size, initialization_scale=0.01, input_spec=self.input_spec)