Esempio n. 1
0
    def create(environment, **kwargs):
        """
        Creates an environment from a specification.

        Args:
            environment (specification): JSON file, specification key, configuration dictionary,
                library module, or `Environment` subclass
                (<span style="color:#C00000"><b>required</b></span>).
            kwargs: Additional arguments.
        """
        if isinstance(environment, Environment):
            # TODO: asserts???????
            return environment

        elif isinstance(environment, dict):
            # Dictionary specification
            util.deep_disjoint_update(target=kwargs, source=environment)
            environment = kwargs.pop('environment',
                                     kwargs.pop('type', 'default'))
            assert environment is not None

            return Environment.create(environment=environment, **kwargs)

        elif isinstance(environment, str):
            if os.path.isfile(environment):
                # JSON file specification
                with open(environment, 'r') as fp:
                    environment = json.load(fp=fp)

                util.deep_disjoint_update(target=kwargs, source=environment)
                environment = kwargs.pop('environment',
                                         kwargs.pop('type', 'default'))
                assert environment is not None

                return Environment.create(environment=environment, **kwargs)

            elif '.' in environment:
                # Library specification
                library_name, module_name = environment.rsplit('.', 1)
                library = importlib.import_module(name=library_name)
                environment = getattr(library, module_name)

                environment = environment(**kwargs)
                assert isinstance(environment, Environment)

                return environment

            else:
                # Keyword specification
                environment = tensorforce.environments.environments[
                    environment](**kwargs)
                assert isinstance(environment, Environment)

                return environment

        else:
            assert False
Esempio n. 2
0
 def optimizer_arguments(self, **kwargs):
     arguments = super().optimizer_arguments()
     util.deep_disjoint_update(
         target=arguments, source=self.objective1.optimizer_arguments(**kwargs)
     )
     util.deep_disjoint_update(
         target=arguments, source=self.objective2.optimizer_arguments(**kwargs)
     )
     return arguments
Esempio n. 3
0
    def tf_optimize(self, indices):
        # Baseline optimization
        if self.baseline_optimizer is not None:
            optimized = self.optimize_baseline(indices=indices)
            dependencies = (optimized,)
        else:
            dependencies = (indices,)

        # Reward estimation
        with tf.control_dependencies(control_inputs=dependencies):
            reward = self.memory.retrieve(indices=indices, values='reward')
            reward = self.estimator.complete(
                baseline=self.baseline_policy, memory=self.memory, indices=indices, reward=reward
            )
            reward = self.add_summary(
                label=('empirical-reward', 'rewards'), name='empirical-reward', tensor=reward
            )
            is_baseline_optimized = self.separate_baseline_policy and \
                self.baseline_optimizer is None and self.baseline_objective is None
            reward = self.estimator.estimate(
                baseline=self.baseline_policy, memory=self.memory, indices=indices, reward=reward,
                is_baseline_optimized=is_baseline_optimized
            )
            reward = self.add_summary(
                label=('estimated-reward', 'rewards'), name='estimated-reward', tensor=reward
            )

        # Stop gradients of estimated rewards if separate baseline optimization
        if not is_baseline_optimized:
            reward = tf.stop_gradient(input=reward)

        # Retrieve states, internals and actions
        past_horizon = self.policy.past_horizon(is_optimization=True)
        if self.separate_baseline_policy and self.baseline_optimizer is None:
            assertion = tf.debugging.assert_equal(
                x=past_horizon,
                y=self.baseline_policy.past_horizon(is_optimization=True),
                message="Policy and baseline depend on a different number of previous states."
            )
        else:
            assertion = past_horizon

        with tf.control_dependencies(control_inputs=(assertion,)):
            # horizon change: see timestep-based batch sampling
            starts, lengths, states, internals = self.memory.predecessors(
                indices=indices, horizon=past_horizon, sequence_values='states',
                initial_values='internals'
            )
            Module.update_tensors(dependency_starts=starts, dependency_lengths=lengths)
            auxiliaries, actions = self.memory.retrieve(
                indices=indices, values=('auxiliaries', 'actions')
            )

        # Optimizer arguments
        independent = Module.update_tensor(
            name='independent', tensor=tf.constant(value=True, dtype=util.tf_dtype(dtype='bool'))
        )

        variables = self.get_variables(only_trainable=True)

        arguments = dict(
            states=states, internals=internals, auxiliaries=auxiliaries, actions=actions,
            reward=reward
        )

        fn_loss = self.total_loss

        def fn_kl_divergence(states, internals, auxiliaries, actions, reward, other=None):
            kl_divergence = self.policy.kl_divergence(
                states=states, internals=internals, auxiliaries=auxiliaries, other=other
            )
            if self.baseline_optimizer is None and self.baseline_objective is not None:
                kl_divergence += self.baseline_policy.kl_divergence(
                    states=states, internals=internals, auxiliaries=auxiliaries, other=other
                )
            return kl_divergence

        if self.global_model is None:
            global_variables = None
        else:
            global_variables = self.global_model.get_variables(only_trainable=True)

        kwargs = self.objective.optimizer_arguments(
            policy=self.policy, baseline=self.baseline_policy
        )
        if self.baseline_optimizer is None and self.baseline_objective is not None:
            util.deep_disjoint_update(
                target=kwargs,
                source=self.baseline_objective.optimizer_arguments(policy=self.baseline_policy)
            )

        dependencies = util.flatten(xs=arguments)

        # KL divergence before
        if self.is_summary_logged(
            label=('kl-divergence', 'action-kl-divergences', 'kl-divergences')
        ):
            with tf.control_dependencies(control_inputs=dependencies):
                kldiv_reference = self.policy.kldiv_reference(
                    states=states, internals=internals, auxiliaries=auxiliaries
                )
                dependencies = util.flatten(xs=kldiv_reference)

        # Optimization
        with tf.control_dependencies(control_inputs=dependencies):
            optimized = self.optimizer.minimize(
                variables=variables, arguments=arguments, fn_loss=fn_loss,
                fn_kl_divergence=fn_kl_divergence, global_variables=global_variables, **kwargs
            )

        with tf.control_dependencies(control_inputs=(optimized,)):
            # Loss summaries
            if self.is_summary_logged(label=('loss', 'objective-loss', 'losses')):
                objective_loss = self.objective.loss_per_instance(policy=self.policy, **arguments)
                objective_loss = tf.math.reduce_mean(input_tensor=objective_loss, axis=0)
            if self.is_summary_logged(label=('objective-loss', 'losses')):
                optimized = self.add_summary(
                    label=('objective-loss', 'losses'), name='objective-loss',
                    tensor=objective_loss, pass_tensors=optimized
                )
            if self.is_summary_logged(label=('loss', 'regularization-loss', 'losses')):
                regularization_loss = self.regularize(
                    states=states, internals=internals, auxiliaries=auxiliaries
                )
            if self.is_summary_logged(label=('regularization-loss', 'losses')):
                optimized = self.add_summary(
                    label=('regularization-loss', 'losses'), name='regularization-loss',
                    tensor=regularization_loss, pass_tensors=optimized
                )
            if self.is_summary_logged(label=('loss', 'losses')):
                loss = objective_loss + regularization_loss
            if self.baseline_optimizer is None and self.baseline_objective is not None:
                if self.is_summary_logged(label=('loss', 'baseline-objective-loss', 'losses')):
                    if self.baseline_objective is None:
                        baseline_objective_loss = self.objective.loss_per_instance(
                            policy=self.baseline_policy, **arguments
                        )
                    else:
                        baseline_objective_loss = self.baseline_objective.loss_per_instance(
                            policy=self.baseline_policy, **arguments
                        )
                    baseline_objective_loss = tf.math.reduce_mean(
                        input_tensor=baseline_objective_loss, axis=0
                    )
                if self.is_summary_logged(label=('baseline-objective-loss', 'losses')):
                    optimized = self.add_summary(
                        label=('baseline-objective-loss', 'losses'),
                        name='baseline-objective-loss', tensor=baseline_objective_loss,
                        pass_tensors=optimized
                    )
                if self.is_summary_logged(
                    label=('loss', 'baseline-regularization-loss', 'losses')
                ):
                    baseline_regularization_loss = self.baseline_policy.regularize()
                if self.is_summary_logged(label=('baseline-regularization-loss', 'losses')):
                    optimized = self.add_summary(
                        label=('baseline-regularization-loss', 'losses'),
                        name='baseline-regularization-loss', tensor=baseline_regularization_loss,
                        pass_tensors=optimized
                    )
                if self.is_summary_logged(label=('loss', 'baseline-loss', 'losses')):
                    baseline_loss = baseline_objective_loss + baseline_regularization_loss
                if self.is_summary_logged(label=('baseline-loss', 'losses')):
                    optimized = self.add_summary(
                        label=('baseline-loss', 'losses'), name='baseline-loss',
                        tensor=baseline_loss, pass_tensors=optimized
                    )
                if self.is_summary_logged(label=('loss', 'losses')):
                    loss += self.baseline_loss_weight * baseline_loss
            if self.is_summary_logged(label=('loss', 'losses')):
                optimized = self.add_summary(
                    label=('loss', 'losses'), name='loss', tensor=loss, pass_tensors=optimized
                )

            # Entropy summaries
            if self.is_summary_logged(label=('entropy', 'action-entropies', 'entropies')):
                entropies = self.policy.entropy(
                    states=states, internals=internals, auxiliaries=auxiliaries,
                    include_per_action=(len(self.actions_spec) > 1)
                )
            if self.is_summary_logged(label=('entropy', 'entropies')):
                if len(self.actions_spec) == 1:
                    optimized = self.add_summary(
                        label=('entropy', 'entropies'), name='entropy', tensor=entropies,
                        pass_tensors=optimized
                    )
                else:
                    optimized = self.add_summary(
                        label=('entropy', 'entropies'), name='entropy', tensor=entropies['*'],
                        pass_tensors=optimized
                    )
            if len(self.actions_spec) > 1 and \
                    self.is_summary_logged(label=('action-entropies', 'entropies')):
                for name in self.actions_spec:
                    optimized = self.add_summary(
                        label=('action-entropies', 'entropies'), name=(name + '-entropy'),
                        tensor=entropies[name], pass_tensors=optimized
                    )

            # KL divergence summaries
            if self.is_summary_logged(
                label=('kl-divergence', 'action-kl-divergences', 'kl-divergences')
            ):
                kl_divergences = self.policy.kl_divergence(
                    states=states, internals=internals, auxiliaries=auxiliaries,
                    other=kldiv_reference, include_per_action=(len(self.actions_spec) > 1)
                )
            if self.is_summary_logged(label=('kl-divergence', 'kl-divergences')):
                if len(self.actions_spec) == 1:
                    optimized = self.add_summary(
                        label=('kl-divergence', 'kl-divergences'), name='kl-divergence',
                        tensor=kl_divergences, pass_tensors=optimized
                    )
                else:
                    optimized = self.add_summary(
                        label=('kl-divergence', 'kl-divergences'), name='kl-divergence',
                        tensor=kl_divergences['*'], pass_tensors=optimized
                    )
            if len(self.actions_spec) > 1 and \
                    self.is_summary_logged(label=('action-kl-divergences', 'kl-divergences')):
                for name in self.actions_spec:
                    optimized = self.add_summary(
                        label=('action-kl-divergences', 'kl-divergences'),
                        name=(name + '-kl-divergence'), tensor=kl_divergences[name],
                        pass_tensors=optimized
                    )

        Module.update_tensor(name='independent', tensor=independent)

        return optimized
Esempio n. 4
0
    def create(agent=None, environment=None, **kwargs):
        """
        Creates an agent from a specification.

        Args:
            agent (specification): JSON file, specification key, configuration dictionary,
                library module, or `Agent` subclass
                (<span style="color:#00C000"><b>default</b></span>: Policy agent).
            environment (Environment): Environment which the agent is supposed to be trained on,
                environment-related arguments like state/action space specifications will be
                extract if given.
            kwargs: Additional arguments.
        """
        if agent is None:
            agent = 'default'

        if isinstance(agent, Agent):
            if not agent.is_initialized:
                agent.initialize()

            return agent

        elif isinstance(agent, dict):
            # Dictionary specification
            util.deep_disjoint_update(target=kwargs, source=agent)
            agent = kwargs.pop('agent', kwargs.pop('type', 'default'))

            return Agent.create(agent=agent, environment=environment, **kwargs)

        elif isinstance(agent, str):
            if os.path.isfile(agent):
                # JSON file specification
                with open(agent, 'r') as fp:
                    agent = json.load(fp=fp)

                util.deep_disjoint_update(target=kwargs, source=agent)
                agent = kwargs.pop('agent', kwargs.pop('type', 'default'))

                return Agent.create(agent=agent, environment=environment, **kwargs)

            elif '.' in agent:
                # Library specification
                library_name, module_name = agent.rsplit('.', 1)
                library = importlib.import_module(name=library_name)
                agent = getattr(library, module_name)

                if environment is not None:
                    env_spec = dict(states=environment.states(), actions=environment.actions())
                    if environment.max_episode_timesteps() is not None:
                        env_spec['max_episode_timesteps'] = environment.max_episode_timesteps()
                    util.deep_disjoint_update(target=kwargs, source=env_spec)

                agent = agent(**kwargs)
                assert isinstance(agent, Agent)

                if not agent.is_initialized:
                    agent.initialize()

                return agent

            else:
                # Keyword specification
                if environment is not None:
                    env_spec = dict(states=environment.states(), actions=environment.actions())
                    if environment.max_episode_timesteps() is not None:
                        env_spec['max_episode_timesteps'] = environment.max_episode_timesteps()
                    util.deep_disjoint_update(target=kwargs, source=env_spec)

                agent = tensorforce.agents.agents[agent](**kwargs)
                assert isinstance(agent, Agent)

                if not agent.is_initialized:
                    agent.initialize()

                return agent

        else:
            assert False
Esempio n. 5
0
    def create(agent='tensorforce', environment=None, **kwargs):
        """
        Creates an agent from a specification.

        Args:
            agent (specification | Agent class/object): JSON file, specification key, configuration
                dictionary, library module, or `Agent` class/object
                (<span style="color:#00C000"><b>default</b></span>: Policy agent).
            environment (Environment object): Environment which the agent is supposed to be trained
                on, environment-related arguments like state/action space specifications and
                maximum episode length will be extract if given
                (<span style="color:#00C000"><b>recommended</b></span>).
            kwargs: Additional arguments.
        """
        if isinstance(agent, Agent):
            if environment is not None:
                assert util.deep_equal(xs=agent.spec['states'],
                                       ys=environment.states())
                assert util.deep_equal(xs=agent.spec['actions'],
                                       ys=environment.actions())
                assert environment.max_episode_timesteps() is None or \
                    agent.spec['max_episode_timesteps'] >= environment.max_episode_timesteps()

            for key, value in kwargs.items():
                assert agent.spec[key] == value

            if agent.is_initialized:
                agent.reset()
            else:
                agent.initialize()

            return agent

        elif isinstance(agent, type) and issubclass(agent, Agent):
            if environment is not None:
                if 'states' in kwargs:
                    assert util.deep_equal(xs=kwargs['states'],
                                           ys=environment.states())
                else:
                    kwargs['states'] = environment.states()
                if 'actions' in kwargs:
                    assert util.deep_equal(xs=kwargs['actions'],
                                           ys=environment.actions())
                else:
                    kwargs['actions'] = environment.actions()
                if environment.max_episode_timesteps() is None:
                    pass
                elif 'max_episode_timesteps' in kwargs:
                    assert kwargs[
                        'max_episode_timesteps'] >= environment.max_episode_timesteps(
                        )
                else:
                    kwargs[
                        'max_episode_timesteps'] = environment.max_episode_timesteps(
                        )

            agent = agent(**kwargs)
            assert isinstance(agent, Agent)
            return Agent.create(agent=agent, environment=environment)

        elif isinstance(agent, dict):
            # Dictionary specification
            util.deep_disjoint_update(target=kwargs, source=agent)
            agent = kwargs.pop('agent', kwargs.pop('type', 'default'))

            return Agent.create(agent=agent, environment=environment, **kwargs)

        elif isinstance(agent, str):
            if os.path.isfile(agent):
                # JSON file specification
                with open(agent, 'r') as fp:
                    agent = json.load(fp=fp)
                return Agent.create(agent=agent,
                                    environment=environment,
                                    **kwargs)

            elif '.' in agent:
                # Library specification
                library_name, module_name = agent.rsplit('.', 1)
                library = importlib.import_module(name=library_name)
                agent = getattr(library, module_name)
                return Agent.create(agent=agent,
                                    environment=environment,
                                    **kwargs)

            elif agent in tensorforce.agents.agents:
                # Keyword specification
                agent = tensorforce.agents.agents[agent]
                return Agent.create(agent=agent,
                                    environment=environment,
                                    **kwargs)

            else:
                raise TensorforceError.value(name='Agent.create',
                                             argument='agent',
                                             dtype=agent)

        else:
            raise TensorforceError.type(name='Agent.create',
                                        argument='agent',
                                        dtype=type(agent))
Esempio n. 6
0
    def get_module_class_and_args(*,
                                  name,
                                  module=None,
                                  modules=None,
                                  default_module=None,
                                  disable_first_arg=False,
                                  **kwargs):
        # name
        if not isinstance(name, str):
            raise TensorforceError.type(name='Module.add_module',
                                        argument='name',
                                        dtype=type(name))
        # modules
        if modules is not None and not isinstance(modules, dict):
            raise TensorforceError.type(name='Module.add_module',
                                        argument='modules',
                                        dtype=type(modules))

        # default_module
        if default_module is not None and default_module not in modules and \
                not issubclass(default_module, Module):
            raise TensorforceError.value(name='Module.add_module',
                                         argument='default_module',
                                         value=default_module)

        # disable_first_arg
        if not isinstance(disable_first_arg, bool):
            raise TensorforceError.type(name='Module.add_module',
                                        argument='disable_first_arg',
                                        dtype=type(disable_first_arg))

        # module
        if isinstance(module, dict):
            # Dictionary module specification (type either given via 'type' or 'default_module')
            util.deep_disjoint_update(target=kwargs, source=module)
            module = kwargs.pop('type', default_module)
            return Module.get_module_class_and_args(
                name=name,
                module=module,
                modules=modules,
                default_module=default_module,
                disable_first_arg=True,
                **kwargs)

        elif isinstance(module, str):
            if os.path.isfile(module):
                # JSON file module specification
                with open(module, 'r') as fp:
                    module = json.load(fp=fp)
                return Module.get_module_class_and_args(
                    name=name,
                    module=module,
                    modules=modules,
                    default_module=default_module,
                    disable_first_arg=True,
                    **kwargs)

            elif '.' in module:
                # Library module specification
                library_name, module_name = module.rsplit('.', 1)
                library = importlib.import_module(name=library_name)
                module = getattr(library, module_name)
                return Module.get_module_class_and_args(
                    name=name,
                    module=module,
                    modules=modules,
                    default_module=default_module,
                    disable_first_arg=True,
                    **kwargs)

            elif modules is not None and module in modules:
                # Keyword module specification
                return Module.get_module_class_and_args(
                    name=name,
                    module=modules[module],
                    modules=modules,
                    default_module=default_module,
                    disable_first_arg=True,
                    **kwargs)

            elif 'default' in modules or default_module is not None:
                # Default module specification
                if '_first_arg' in kwargs:
                    raise TensorforceError.invalid(name='Module.add_module',
                                                   argument='_first_arg')
                if module is not None:
                    if disable_first_arg:
                        raise TensorforceError.value(name='Module.add_module',
                                                     argument='module',
                                                     value=module)
                    kwargs['_first_arg'] = module
                if default_module is None:
                    default_module = modules['default']
                return Module.get_module_class_and_args(name=name,
                                                        module=default_module,
                                                        modules=modules,
                                                        **kwargs)

            else:
                raise TensorforceError.value(name='Module.add_module',
                                             argument='module',
                                             value=module)

        elif not callable(module) and ('default' in modules
                                       or default_module is not None):
            # Default module specification
            if '_first_arg' in kwargs:
                raise TensorforceError.invalid(name='Module.add_module',
                                               argument='_first_arg')
            if module is not None:
                kwargs['_first_arg'] = module
            if default_module is None:
                default_module = modules['default']
            return Module.get_module_class_and_args(name=name,
                                                    module=default_module,
                                                    modules=modules,
                                                    **kwargs)

        elif callable(module):
            if '_first_arg' in kwargs:
                args = (kwargs.pop('_first_arg'), )
            else:
                args = ()
            kwargs['name'] = name
            return module, args, kwargs

        else:
            raise TensorforceError.value(name='Module.add_module',
                                         argument='module',
                                         value=module)
Esempio n. 7
0
    def create(environment, max_episode_timesteps=None, **kwargs):
        """
        Creates an environment from a specification.

        Args:
            environment (specification | Environment object): JSON file, specification key,
                configuration dictionary, library module, or `Environment` object
                (<span style="color:#C00000"><b>required</b></span>).
            max_episode_timesteps (int > 0): Maximum number of timesteps per episode, overwrites
                the environment default if defined
                (<span style="color:#00C000"><b>default</b></span>: environment default).
            kwargs: Additional arguments.
        """
        if isinstance(environment, Environment):
            if max_episode_timesteps is not None:
                environment = EnvironmentWrapper(
                    environment=environment,
                    max_episode_timesteps=max_episode_timesteps)
            return environment

        elif isinstance(environment, dict):
            # Dictionary specification
            util.deep_disjoint_update(target=kwargs, source=environment)
            environment = kwargs.pop('environment',
                                     kwargs.pop('type', 'default'))
            assert environment is not None

            return Environment.create(
                environment=environment,
                max_episode_timesteps=max_episode_timesteps,
                **kwargs)

        elif isinstance(environment, str):
            if os.path.isfile(environment):
                # JSON file specification
                with open(environment, 'r') as fp:
                    environment = json.load(fp=fp)

                util.deep_disjoint_update(target=kwargs, source=environment)
                environment = kwargs.pop('environment',
                                         kwargs.pop('type', 'default'))
                assert environment is not None

                return Environment.create(
                    environment=environment,
                    max_episode_timesteps=max_episode_timesteps,
                    **kwargs)

            elif '.' in environment:
                # Library specification
                library_name, module_name = environment.rsplit('.', 1)
                library = importlib.import_module(name=library_name)
                environment = getattr(library, module_name)

                environment = environment(**kwargs)
                assert isinstance(environment, Environment)
                return Environment.create(
                    environment=environment,
                    max_episode_timesteps=max_episode_timesteps)

            else:
                # Keyword specification
                environment = tensorforce.environments.environments[
                    environment](**kwargs)
                assert isinstance(environment, Environment)
                return Environment.create(
                    environment=environment,
                    max_episode_timesteps=max_episode_timesteps)

        else:
            assert False
Esempio n. 8
0
    def get_module_class_and_args(*,
                                  name,
                                  module=None,
                                  modules=None,
                                  default_module=None,
                                  disable_first_arg=False,
                                  **kwargs):
        # name
        if not isinstance(name, str):
            raise TensorforceError.type(name='Module.add_module',
                                        argument='name',
                                        dtype=type(name))
        # modules
        if modules is not None and not isinstance(modules, dict):
            raise TensorforceError.type(name='Module.add_module',
                                        argument='modules',
                                        dtype=type(modules))

        # default_module
        if default_module is not None and default_module not in modules and \
                not issubclass(default_module, Module):
            raise TensorforceError.value(name='Module.add_module',
                                         argument='default_module',
                                         value=default_module)

        # disable_first_arg
        if not isinstance(disable_first_arg, bool):
            raise TensorforceError.type(name='Module.add_module',
                                        argument='disable_first_arg',
                                        dtype=type(disable_first_arg))

        # module
        if isinstance(module, dict):
            # Dictionary module specification (type either given via 'type' or 'default_module')
            util.deep_disjoint_update(target=kwargs, source=module)
            module = kwargs.pop('type', default_module)
            return Module.get_module_class_and_args(
                name=name,
                module=module,
                modules=modules,
                default_module=default_module,
                disable_first_arg=True,
                **kwargs)

        elif isinstance(module, str):
            if os.path.isfile(module):
                # JSON file module specification
                with open(module, 'r') as fp:
                    module = json.load(fp=fp)
                return Module.get_module_class_and_args(
                    name=name,
                    module=module,
                    modules=modules,
                    default_module=default_module,
                    disable_first_arg=True,
                    **kwargs)

            elif modules is not None and module in modules:
                # Keyword module specification
                return Module.get_module_class_and_args(
                    name=name,
                    module=modules[module],
                    modules=modules,
                    default_module=default_module,
                    disable_first_arg=True,
                    **kwargs)

            else:
                # Library module specification
                assert modules is not None
                parent_class = next(iter(modules.values()))
                while len(parent_class.mro()) >= 4 and parent_class.mro(
                )[1] != Module:
                    parent_class = parent_class.mro()[1]
                module = util.try_import_module(module=module,
                                                parent_class=parent_class)
                if module is not None:
                    return Module.get_module_class_and_args(
                        name=name,
                        module=module,
                        modules=modules,
                        default_module=default_module,
                        disable_first_arg=True,
                        **kwargs)

                if 'default' in modules or default_module is not None:
                    # Default module specification
                    if '_first_arg' in kwargs:
                        raise TensorforceError.invalid(
                            name='Module.add_module', argument='_first_arg')
                    if module is not None:
                        if disable_first_arg:
                            raise TensorforceError.value(
                                name='Module.add_module',
                                argument='module',
                                value=module)
                        kwargs['_first_arg'] = module
                    if default_module is None:
                        default_module = modules['default']
                    return Module.get_module_class_and_args(
                        name=name,
                        module=default_module,
                        modules=modules,
                        **kwargs)

                else:
                    raise TensorforceError.value(name='Module.add_module',
                                                 argument='module',
                                                 value=module)

        elif (not callable(module) or isinstance(module, tf.keras.Model) or
              (isinstance(module, type) and issubclass(module, tf.keras.Model))
              ) and ('default' in modules or default_module is not None):
            # Default module specification
            if '_first_arg' in kwargs:
                raise TensorforceError.invalid(name='Module.add_module',
                                               argument='_first_arg')
            if module is not None:
                kwargs['_first_arg'] = module
            if default_module is None:
                default_module = modules['default']
            return Module.get_module_class_and_args(name=name,
                                                    module=default_module,
                                                    modules=modules,
                                                    **kwargs)

        elif callable(module):
            if '_first_arg' in kwargs:
                args = (kwargs.pop('_first_arg'), )
            else:
                args = ()
            kwargs['name'] = name
            return module, args, kwargs

        else:
            raise TensorforceError.value(name='Module.add_module',
                                         argument='module',
                                         value=module)
Esempio n. 9
0
    def create(environment=None,
               max_episode_timesteps=None,
               reward_shaping=None,
               remote=None,
               blocking=False,
               host=None,
               port=None,
               **kwargs):
        """
        Creates an environment from a specification. In case of "socket-server" remote mode, runs
        environment in server communication loop until closed.

        Args:
            environment (specification | Environment class/object): JSON file, specification key,
                configuration dictionary, library module, `Environment` class/object, or gym.Env
                (<span style="color:#C00000"><b>required</b></span>, invalid for "socket-client"
                remote mode).
            max_episode_timesteps (int > 0): Maximum number of timesteps per episode, overwrites
                the environment default if defined
                (<span style="color:#00C000"><b>default</b></span>: environment default, invalid
                for "socket-client" remote mode).
            reward_shaping (callable[(s,a,t,r,s') -> r|(r,t)] | str): Reward shaping function
                mapping state, action, terminal, reward and next state to shaped reward and
                terminal, or a string expression with arguments "states", "actions", "terminal",
                "reward" and "next_states", e.g. "-1.0 if terminal else max(reward, 0.0)"
                (<span style="color:#00C000"><b>default</b></span>: no reward shaping).
            remote ("multiprocessing" | "socket-client" | "socket-server"): Communication mode for
                remote environment execution of parallelized environment execution, "socket-client"
                mode requires a corresponding "socket-server" running, and "socket-server" mode
                runs environment in server communication loop until closed
                (<span style="color:#00C000"><b>default</b></span>: local execution).
            blocking (bool): Whether remote environment calls should be blocking
                (<span style="color:#00C000"><b>default</b></span>: not blocking, invalid unless
                "multiprocessing" or "socket-client" remote mode).
            host (str): Socket server hostname or IP address
                (<span style="color:#C00000"><b>required</b></span> only for "socket-client" remote
                mode).
            port (int): Socket server port
                (<span style="color:#C00000"><b>required</b></span> only for "socket-client/server"
                remote mode).
            kwargs: Additional arguments.
        """
        if remote not in ('multiprocessing', 'socket-client'):
            if blocking:
                raise TensorforceError.invalid(
                    name='Environment.create',
                    argument='blocking',
                    condition='no multiprocessing/socket-client instance')
        if remote not in ('socket-client', 'socket-server'):
            if host is not None:
                raise TensorforceError.invalid(name='Environment.create',
                                               argument='host',
                                               condition='no socket instance')
            elif port is not None:
                raise TensorforceError.invalid(name='Environment.create',
                                               argument='port',
                                               condition='no socket instance')

        if remote == 'multiprocessing':
            from tensorforce.environments import MultiprocessingEnvironment
            environment = MultiprocessingEnvironment(
                blocking=blocking,
                environment=environment,
                max_episode_timesteps=max_episode_timesteps,
                reward_shaping=reward_shaping,
                **kwargs)
            return environment

        elif remote == 'socket-client':
            if environment is not None:
                raise TensorforceError.invalid(
                    name='Environment.create',
                    argument='environment',
                    condition='socket-client instance')
            elif max_episode_timesteps is not None:
                raise TensorforceError.invalid(
                    name='Environment.create',
                    argument='max_episode_timesteps',
                    condition='socket-client instance')
            elif len(kwargs) > 0:
                raise TensorforceError.invalid(
                    name='Environment.create',
                    argument='kwargs',
                    condition='socket-client instance')
            from tensorforce.environments import SocketEnvironment
            environment = SocketEnvironment(host=host,
                                            port=port,
                                            blocking=blocking)
            return environment

        elif remote == 'socket-server':
            from tensorforce.environments import SocketEnvironment
            SocketEnvironment.remote(
                port=port,
                environment=environment,
                max_episode_timesteps=max_episode_timesteps,
                reward_shaping=reward_shaping,
                **kwargs)

        elif remote is not None:
            raise TensorforceError.value(name='Environment.create',
                                         argument='remote',
                                         value=remote)

        elif isinstance(environment, (EnvironmentWrapper, RemoteEnvironment)):
            if max_episode_timesteps is not None:
                raise TensorforceError.invalid(
                    name='Environment.create',
                    argument='max_episode_timesteps',
                    condition='EnvironmentWrapper instance')
            if len(kwargs) > 0:
                raise TensorforceError.invalid(
                    name='Environment.create',
                    argument='kwargs',
                    condition='EnvironmentWrapper instance')
            return environment

        elif isinstance(environment, type) and \
                issubclass(environment, (EnvironmentWrapper, RemoteEnvironment)):
            raise TensorforceError.type(name='Environment.create',
                                        argument='environment',
                                        dtype=type(environment))

        elif isinstance(environment, Environment):
            return EnvironmentWrapper(
                environment=environment,
                max_episode_timesteps=max_episode_timesteps,
                reward_shaping=reward_shaping)

        elif isinstance(environment, type) and issubclass(
                environment, Environment):
            environment = environment(**kwargs)
            assert isinstance(environment, Environment)
            return Environment.create(
                environment=environment,
                max_episode_timesteps=max_episode_timesteps,
                reward_shaping=reward_shaping)

        elif isinstance(environment, dict):
            # Dictionary specification
            util.deep_disjoint_update(target=kwargs, source=environment)
            environment = kwargs.pop('environment',
                                     kwargs.pop('type', 'default'))
            assert environment is not None
            if max_episode_timesteps is None:
                max_episode_timesteps = kwargs.pop('max_episode_timesteps',
                                                   None)
            if reward_shaping is None:
                reward_shaping = kwargs.pop('reward_shaping', None)

            return Environment.create(
                environment=environment,
                max_episode_timesteps=max_episode_timesteps,
                reward_shaping=reward_shaping,
                **kwargs)

        elif isinstance(environment, str):
            if os.path.isfile(environment):
                # JSON file specification
                with open(environment, 'r') as fp:
                    environment = json.load(fp=fp)

                util.deep_disjoint_update(target=kwargs, source=environment)
                environment = kwargs.pop('environment',
                                         kwargs.pop('type', 'default'))
                assert environment is not None
                if max_episode_timesteps is None:
                    max_episode_timesteps = kwargs.pop('max_episode_timesteps',
                                                       None)
                if reward_shaping is None:
                    reward_shaping = kwargs.pop('reward_shaping', None)

                return Environment.create(
                    environment=environment,
                    max_episode_timesteps=max_episode_timesteps,
                    reward_shaping=reward_shaping,
                    **kwargs)

            elif '.' in environment:
                # Library specification
                library_name, module_name = environment.rsplit('.', 1)
                library = importlib.import_module(name=library_name)
                environment = getattr(library, module_name)
                return Environment.create(
                    environment=environment,
                    max_episode_timesteps=max_episode_timesteps,
                    reward_shaping=reward_shaping,
                    **kwargs)

            elif environment in tensorforce.environments.environments:
                # Keyword specification
                environment = tensorforce.environments.environments[
                    environment]
                return Environment.create(
                    environment=environment,
                    max_episode_timesteps=max_episode_timesteps,
                    reward_shaping=reward_shaping,
                    **kwargs)

            else:
                # Default: OpenAI Gym
                try:
                    return Environment.create(
                        environment='gym',
                        level=environment,
                        max_episode_timesteps=max_episode_timesteps,
                        reward_shaping=reward_shaping,
                        **kwargs)
                except TensorforceError:
                    raise TensorforceError.value(name='Environment.create',
                                                 argument='environment',
                                                 value=environment)

        else:
            # Default: OpenAI Gym
            from gym import Env
            if isinstance(environment, Env) or \
                    (isinstance(environment, type) and issubclass(environment, Env)):
                return Environment.create(
                    environment='gym',
                    level=environment,
                    max_episode_timesteps=max_episode_timesteps,
                    reward_shaping=reward_shaping,
                    **kwargs)

            else:
                raise TensorforceError.type(name='Environment.create',
                                            argument='environment',
                                            dtype=type(environment))
Esempio n. 10
0
    def create(environment, max_episode_timesteps=None, **kwargs):
        """
        Creates an environment from a specification.

        Args:
            environment (specification | Environment class/object): JSON file, specification key,
                configuration dictionary, library module, or `Environment` class/object
                (<span style="color:#C00000"><b>required</b></span>).
            max_episode_timesteps (int > 0): Maximum number of timesteps per episode, overwrites
                the environment default if defined
                (<span style="color:#00C000"><b>default</b></span>: environment default).
            kwargs: Additional arguments.
        """
        if isinstance(environment, (EnvironmentWrapper, RemoteEnvironment)):
            if max_episode_timesteps is not None:
                TensorforceError.invalid(
                    name='Environment.create',
                    argument='max_episode_timesteps',
                    condition='EnvironmentWrapper instance')
            if len(kwargs) > 0:
                TensorforceError.invalid(
                    name='Environment.create',
                    argument='kwargs',
                    condition='EnvironmentWrapper instance')
            return environment

        elif isinstance(environment, type) and \
                issubclass(environment, (EnvironmentWrapper, RemoteEnvironment)):
            raise TensorforceError.type(name='Environment.create',
                                        argument='environment',
                                        dtype=type(environment))

        elif isinstance(environment, Environment):
            if max_episode_timesteps is not None:
                environment = EnvironmentWrapper(
                    environment=environment,
                    max_episode_timesteps=max_episode_timesteps)
            return environment

        elif isinstance(environment, type) and issubclass(
                environment, Environment):
            environment = environment(**kwargs)
            assert isinstance(environment, Environment)
            return Environment.create(
                environment=environment,
                max_episode_timesteps=max_episode_timesteps)

        elif isinstance(environment, dict):
            # Dictionary specification
            util.deep_disjoint_update(target=kwargs, source=environment)
            environment = kwargs.pop('environment',
                                     kwargs.pop('type', 'default'))
            assert environment is not None
            if max_episode_timesteps is None:
                max_episode_timesteps = kwargs.pop('max_episode_timesteps',
                                                   None)

            return Environment.create(
                environment=environment,
                max_episode_timesteps=max_episode_timesteps,
                **kwargs)

        elif isinstance(environment, str):
            if os.path.isfile(environment):
                # JSON file specification
                with open(environment, 'r') as fp:
                    environment = json.load(fp=fp)

                util.deep_disjoint_update(target=kwargs, source=environment)
                environment = kwargs.pop('environment',
                                         kwargs.pop('type', 'default'))
                assert environment is not None
                if max_episode_timesteps is None:
                    max_episode_timesteps = kwargs.pop('max_episode_timesteps',
                                                       None)

                return Environment.create(
                    environment=environment,
                    max_episode_timesteps=max_episode_timesteps,
                    **kwargs)

            elif '.' in environment:
                # Library specification
                library_name, module_name = environment.rsplit('.', 1)
                library = importlib.import_module(name=library_name)
                environment = getattr(library, module_name)
                return Environment.create(
                    environment=environment,
                    max_episode_timesteps=max_episode_timesteps,
                    **kwargs)

            else:
                # Keyword specification
                environment = tensorforce.environments.environments[
                    environment]
                return Environment.create(
                    environment=environment,
                    max_episode_timesteps=max_episode_timesteps,
                    **kwargs)

        else:

            from gym import Env
            if isinstance(environment, Env) or \
                    (isinstance(environment, type) and issubclass(environment, Env)):
                return Environment.create(
                    environment='gym',
                    level=environment,
                    max_episode_timesteps=max_episode_timesteps,
                    **kwargs)

            else:
                raise TensorforceError.type(name='Environment.create',
                                            argument='environment',
                                            dtype=type(environment))
Esempio n. 11
0
    def get_module_class_and_kwargs(
        name, module=None, modules=None, default_module=None, **kwargs
    ):
        # name
        if not util.is_valid_name(name=name):
            raise TensorforceError.value(name='module', argument='name', value=name)
        # module
        # ???
        # modules
        if modules is not None and not isinstance(modules, dict):
            raise TensorforceError.type(name='module', argument='modules', value=modules)
        # default_module
        # ???
        if isinstance(module, dict):
            # Dictionary module specification (type either given via 'type' or 'default_module')
            util.deep_disjoint_update(target=kwargs, source=module)
            module = kwargs.pop('type', default_module)
            return Module.get_module_class_and_kwargs(
                name=name, module=module, modules=modules, default_module=default_module, **kwargs
            )

        elif isinstance(module, str):
            if os.path.isfile(module):
                # JSON file module specification
                with open(module, 'r') as fp:
                    module = json.load(fp=fp)
                return Module.get_module_class_and_kwargs(
                    name=name, module=module, modules=modules, default_module=default_module,
                    **kwargs
                )

            elif '.' in module:
                # Library module specification
                library_name, module_name = module.rsplit('.', 1)
                library = importlib.import_module(name=library_name)
                module = getattr(library, module_name)
                return Module.get_module_class_and_kwargs(
                    name=name, module=module, modules=modules, default_module=default_module,
                    **kwargs
                )

            elif modules is not None and module in modules:
                # Keyword module specification
                return Module.get_module_class_and_kwargs(
                    name=name, module=modules[module], default_module=default_module, **kwargs
                )

            elif 'default' in modules or default_module is not None:
                # Default module specification
                if '_first_arg' in kwargs:
                    raise TensorforceError.value(name='module kwargs', value='_first_arg')
                if module is not None:
                    kwargs['_first_arg'] = module
                if default_module is None:
                    default_module = modules['default']
                return Module.get_module_class_and_kwargs(
                    name=name, module=default_module, modules=modules, **kwargs
                )

            else:
                raise TensorforceError.value(name='module specification', value=module)

        elif not callable(module) and ('default' in modules or default_module is not None):
            # Default module specification
            if '_first_arg' in kwargs:
                raise TensorforceError.value(name='module kwargs', value='_first_arg')
            if module is not None:
                kwargs['_first_arg'] = module
            if default_module is None:
                default_module = modules['default']
            return Module.get_module_class_and_kwargs(
                name=name, module=default_module, modules=modules, **kwargs
            )

        elif callable(module):
            # for key, arg in kwargs.items():
            #     assert arg is not None, (key, arg)
            #     if arg is None:
            #         assert False
            #         kwargs.pop(key)
            first_arg = kwargs.pop('_first_arg', None)
            return module, first_arg, kwargs

        else:
            raise TensorforceError.value(name='module specification', value=module)