コード例 #1
0
    def __init__(self,
                 network='auto',
                 *,
                 device=None,
                 l2_regularization=None,
                 name=None,
                 states_spec=None,
                 auxiliaries_spec=None,
                 internals_spec=None,
                 actions_spec=None):
        super().__init__(device=device,
                         l2_regularization=l2_regularization,
                         name=name,
                         states_spec=states_spec,
                         auxiliaries_spec=auxiliaries_spec,
                         actions_spec=actions_spec)

        ParametrizedPolicy.__init__(self=self,
                                    network=network,
                                    inputs_spec=self.states_spec)
        output_spec = self.network.output_spec()

        # State value
        self.value = self.submodule(name='value',
                                    module='linear',
                                    modules=layer_modules,
                                    size=0,
                                    input_spec=output_spec)
コード例 #2
0
    def __init__(
        self, network='auto', *, device=None, l2_regularization=None, name=None, states_spec=None,
        auxiliaries_spec=None, internals_spec=None, actions_spec=None
    ):
        super().__init__(
            device=device, l2_regularization=l2_regularization, name=name, states_spec=states_spec,
            auxiliaries_spec=auxiliaries_spec, actions_spec=actions_spec
        )

        inputs_spec = TensorsSpec()
        if self.states_spec.is_singleton():
            inputs_spec['states'] = self.states_spec.singleton()
        else:
            inputs_spec['states'] = self.states_spec
        if self.actions_spec.is_singleton():
            inputs_spec['actions'] = self.actions_spec.singleton()
        else:
            inputs_spec['actions'] = self.actions_spec
        ParametrizedPolicy.__init__(self=self, network=network, inputs_spec=inputs_spec)
        output_spec = self.network.output_spec()

        # Action value
        self.value = self.submodule(
            name='value', module='linear', modules=layer_modules, size=0, input_spec=output_spec
        )
コード例 #3
0
    def __init__(self,
                 network='auto',
                 *,
                 single_output=True,
                 distributions=None,
                 temperature=1.0,
                 use_beta_distribution=False,
                 device=None,
                 l2_regularization=None,
                 name=None,
                 states_spec=None,
                 auxiliaries_spec=None,
                 internals_spec=None,
                 actions_spec=None):
        super().__init__(temperature=temperature,
                         device=device,
                         l2_regularization=l2_regularization,
                         name=name,
                         states_spec=states_spec,
                         auxiliaries_spec=auxiliaries_spec,
                         actions_spec=actions_spec)

        if single_output:
            outputs = None
        elif self.actions_spec.is_singleton():
            outputs = ('action-embedding', )
        else:
            outputs = tuple(name + '-embedding' for name in self.actions_spec)
        ParametrizedPolicy.__init__(self=self,
                                    network=network,
                                    inputs_spec=self.states_spec,
                                    outputs=outputs)
        output_spec = self.network.output_spec()
        if not isinstance(output_spec, TensorsSpec):
            output_spec = TensorsSpec(embedding=output_spec)

        # Distributions
        self.distributions = ModuleDict()
        for name, spec in self.actions_spec.items():

            if spec.type == 'bool':
                default_module = 'bernoulli'
            elif spec.type == 'int':
                assert spec.num_values is not None
                default_module = 'categorical'
            elif spec.type == 'float':
                if use_beta_distribution and spec.min_value is not None:
                    default_module = 'beta'
                else:
                    default_module = 'gaussian'

            if distributions is None:
                module = None
            else:
                module = dict()
                if name is None and isinstance(distributions, str):
                    module = distributions
                elif name in distributions:
                    if isinstance(distributions[name], str):
                        module = distributions[name]
                    else:
                        module.update(distributions[name])
                elif spec.type in distributions:
                    if isinstance(distributions[spec.type], str):
                        module = distributions[spec.type]
                    else:
                        module.update(distributions[spec.type])
                elif name is None and 'type' in distributions:
                    module.update(distributions)

            if name is None:
                self.distributions[name] = self.submodule(
                    name='action_distribution',
                    module=module,
                    modules=distribution_modules,
                    default_module=default_module,
                    action_spec=spec,
                    input_spec=output_spec.get('action-embedding',
                                               output_spec['embedding']))
            else:
                self.distributions[name] = self.submodule(
                    name=(name + '_distribution'),
                    module=module,
                    modules=distribution_modules,
                    default_module=default_module,
                    action_spec=spec,
                    input_spec=output_spec.get(name + '-embedding',
                                               output_spec['embedding']))

        self.kldiv_reference_spec = self.distributions.fmap(
            function=(lambda x: x.parameters_spec), cls=TensorsSpec)
コード例 #4
0
    def __init__(self,
                 network='auto',
                 *,
                 state_value_mode='separate',
                 device=None,
                 l2_regularization=None,
                 name=None,
                 states_spec=None,
                 auxiliaries_spec=None,
                 internals_spec=None,
                 actions_spec=None):
        super().__init__(device=device,
                         l2_regularization=l2_regularization,
                         name=name,
                         states_spec=states_spec,
                         auxiliaries_spec=auxiliaries_spec,
                         actions_spec=actions_spec)

        if not all(spec.type in ('bool', 'int')
                   for spec in self.actions_spec.values()):
            raise TensorforceError.value(name='ParametrizedValuePolicy',
                                         argument='actions_spec',
                                         value=actions_spec,
                                         hint='types not bool/int')

        ParametrizedPolicy.__init__(self=self,
                                    network=network,
                                    inputs_spec=self.states_spec)
        output_spec = self.network.output_spec()

        # Action values
        def function(name, spec):
            if name is None:
                name = 'action_value'
            else:
                name = name + '_action_value'
            if spec.type == 'bool':
                return self.submodule(name=name,
                                      module='linear',
                                      modules=layer_modules,
                                      size=(spec.size * 2),
                                      input_spec=output_spec)
            elif spec.type == 'int':
                return self.submodule(name=name,
                                      module='linear',
                                      modules=layer_modules,
                                      size=(spec.size * spec.num_values),
                                      input_spec=output_spec)

        self.a_values = self.actions_spec.fmap(function=function,
                                               cls=ModuleDict,
                                               with_names=True)

        # State value mode
        if state_value_mode not in ('implicit', 'separate',
                                    'separate-per-action'):
            raise TensorforceError.value(
                name='ParametrizedValuePolicy',
                argument='state_value_mode',
                value=state_value_mode,
                hint='not from {implicit,separate,separate-per-action}')
        self.state_value_mode = state_value_mode

        if self.state_value_mode == 'separate':
            # State value
            self.s_value = self.submodule(name='value',
                                          module='linear',
                                          modules=layer_modules,
                                          size=0,
                                          input_spec=output_spec)

        elif self.state_value_mode == 'separate-per-action':
            # State values per action

            def function(name, spec):
                if name is None:
                    name = 'state_value'
                else:
                    name = name + '_state_value'
                return self.submodule(name=name,
                                      module='linear',
                                      modules=layer_modules,
                                      size=spec.size,
                                      input_spec=output_spec)

            self.s_values = self.states_spec.fmap(function=function,
                                                  cls=ModuleDict,
                                                  with_names=True)