def __init__(self, network='auto', *, device=None, l2_regularization=None, name=None, states_spec=None, auxiliaries_spec=None, internals_spec=None, actions_spec=None): super().__init__(device=device, l2_regularization=l2_regularization, name=name, states_spec=states_spec, auxiliaries_spec=auxiliaries_spec, actions_spec=actions_spec) ParametrizedPolicy.__init__(self=self, network=network, inputs_spec=self.states_spec) output_spec = self.network.output_spec() # State value self.value = self.submodule(name='value', module='linear', modules=layer_modules, size=0, input_spec=output_spec)
def __init__( self, network='auto', *, device=None, l2_regularization=None, name=None, states_spec=None, auxiliaries_spec=None, internals_spec=None, actions_spec=None ): super().__init__( device=device, l2_regularization=l2_regularization, name=name, states_spec=states_spec, auxiliaries_spec=auxiliaries_spec, actions_spec=actions_spec ) inputs_spec = TensorsSpec() if self.states_spec.is_singleton(): inputs_spec['states'] = self.states_spec.singleton() else: inputs_spec['states'] = self.states_spec if self.actions_spec.is_singleton(): inputs_spec['actions'] = self.actions_spec.singleton() else: inputs_spec['actions'] = self.actions_spec ParametrizedPolicy.__init__(self=self, network=network, inputs_spec=inputs_spec) output_spec = self.network.output_spec() # Action value self.value = self.submodule( name='value', module='linear', modules=layer_modules, size=0, input_spec=output_spec )
def __init__(self, network='auto', *, single_output=True, distributions=None, temperature=1.0, use_beta_distribution=False, device=None, l2_regularization=None, name=None, states_spec=None, auxiliaries_spec=None, internals_spec=None, actions_spec=None): super().__init__(temperature=temperature, device=device, l2_regularization=l2_regularization, name=name, states_spec=states_spec, auxiliaries_spec=auxiliaries_spec, actions_spec=actions_spec) if single_output: outputs = None elif self.actions_spec.is_singleton(): outputs = ('action-embedding', ) else: outputs = tuple(name + '-embedding' for name in self.actions_spec) ParametrizedPolicy.__init__(self=self, network=network, inputs_spec=self.states_spec, outputs=outputs) output_spec = self.network.output_spec() if not isinstance(output_spec, TensorsSpec): output_spec = TensorsSpec(embedding=output_spec) # Distributions self.distributions = ModuleDict() for name, spec in self.actions_spec.items(): if spec.type == 'bool': default_module = 'bernoulli' elif spec.type == 'int': assert spec.num_values is not None default_module = 'categorical' elif spec.type == 'float': if use_beta_distribution and spec.min_value is not None: default_module = 'beta' else: default_module = 'gaussian' if distributions is None: module = None else: module = dict() if name is None and isinstance(distributions, str): module = distributions elif name in distributions: if isinstance(distributions[name], str): module = distributions[name] else: module.update(distributions[name]) elif spec.type in distributions: if isinstance(distributions[spec.type], str): module = distributions[spec.type] else: module.update(distributions[spec.type]) elif name is None and 'type' in distributions: module.update(distributions) if name is None: self.distributions[name] = self.submodule( name='action_distribution', module=module, modules=distribution_modules, default_module=default_module, action_spec=spec, input_spec=output_spec.get('action-embedding', output_spec['embedding'])) else: self.distributions[name] = self.submodule( name=(name + '_distribution'), module=module, modules=distribution_modules, default_module=default_module, action_spec=spec, input_spec=output_spec.get(name + '-embedding', output_spec['embedding'])) self.kldiv_reference_spec = self.distributions.fmap( function=(lambda x: x.parameters_spec), cls=TensorsSpec)
def __init__(self, network='auto', *, state_value_mode='separate', device=None, l2_regularization=None, name=None, states_spec=None, auxiliaries_spec=None, internals_spec=None, actions_spec=None): super().__init__(device=device, l2_regularization=l2_regularization, name=name, states_spec=states_spec, auxiliaries_spec=auxiliaries_spec, actions_spec=actions_spec) if not all(spec.type in ('bool', 'int') for spec in self.actions_spec.values()): raise TensorforceError.value(name='ParametrizedValuePolicy', argument='actions_spec', value=actions_spec, hint='types not bool/int') ParametrizedPolicy.__init__(self=self, network=network, inputs_spec=self.states_spec) output_spec = self.network.output_spec() # Action values def function(name, spec): if name is None: name = 'action_value' else: name = name + '_action_value' if spec.type == 'bool': return self.submodule(name=name, module='linear', modules=layer_modules, size=(spec.size * 2), input_spec=output_spec) elif spec.type == 'int': return self.submodule(name=name, module='linear', modules=layer_modules, size=(spec.size * spec.num_values), input_spec=output_spec) self.a_values = self.actions_spec.fmap(function=function, cls=ModuleDict, with_names=True) # State value mode if state_value_mode not in ('implicit', 'separate', 'separate-per-action'): raise TensorforceError.value( name='ParametrizedValuePolicy', argument='state_value_mode', value=state_value_mode, hint='not from {implicit,separate,separate-per-action}') self.state_value_mode = state_value_mode if self.state_value_mode == 'separate': # State value self.s_value = self.submodule(name='value', module='linear', modules=layer_modules, size=0, input_spec=output_spec) elif self.state_value_mode == 'separate-per-action': # State values per action def function(name, spec): if name is None: name = 'state_value' else: name = name + '_state_value' return self.submodule(name=name, module='linear', modules=layer_modules, size=spec.size, input_spec=output_spec) self.s_values = self.states_spec.fmap(function=function, cls=ModuleDict, with_names=True)