def create(environment, **kwargs): """ Creates an environment from a specification. Args: environment (specification): JSON file, specification key, configuration dictionary, library module, or `Environment` subclass (<span style="color:#C00000"><b>required</b></span>). kwargs: Additional arguments. """ if isinstance(environment, Environment): # TODO: asserts??????? return environment elif isinstance(environment, dict): # Dictionary specification util.deep_disjoint_update(target=kwargs, source=environment) environment = kwargs.pop('environment', kwargs.pop('type', 'default')) assert environment is not None return Environment.create(environment=environment, **kwargs) elif isinstance(environment, str): if os.path.isfile(environment): # JSON file specification with open(environment, 'r') as fp: environment = json.load(fp=fp) util.deep_disjoint_update(target=kwargs, source=environment) environment = kwargs.pop('environment', kwargs.pop('type', 'default')) assert environment is not None return Environment.create(environment=environment, **kwargs) elif '.' in environment: # Library specification library_name, module_name = environment.rsplit('.', 1) library = importlib.import_module(name=library_name) environment = getattr(library, module_name) environment = environment(**kwargs) assert isinstance(environment, Environment) return environment else: # Keyword specification environment = tensorforce.environments.environments[ environment](**kwargs) assert isinstance(environment, Environment) return environment else: assert False
def optimizer_arguments(self, **kwargs): arguments = super().optimizer_arguments() util.deep_disjoint_update( target=arguments, source=self.objective1.optimizer_arguments(**kwargs) ) util.deep_disjoint_update( target=arguments, source=self.objective2.optimizer_arguments(**kwargs) ) return arguments
def tf_optimize(self, indices): # Baseline optimization if self.baseline_optimizer is not None: optimized = self.optimize_baseline(indices=indices) dependencies = (optimized,) else: dependencies = (indices,) # Reward estimation with tf.control_dependencies(control_inputs=dependencies): reward = self.memory.retrieve(indices=indices, values='reward') reward = self.estimator.complete( baseline=self.baseline_policy, memory=self.memory, indices=indices, reward=reward ) reward = self.add_summary( label=('empirical-reward', 'rewards'), name='empirical-reward', tensor=reward ) is_baseline_optimized = self.separate_baseline_policy and \ self.baseline_optimizer is None and self.baseline_objective is None reward = self.estimator.estimate( baseline=self.baseline_policy, memory=self.memory, indices=indices, reward=reward, is_baseline_optimized=is_baseline_optimized ) reward = self.add_summary( label=('estimated-reward', 'rewards'), name='estimated-reward', tensor=reward ) # Stop gradients of estimated rewards if separate baseline optimization if not is_baseline_optimized: reward = tf.stop_gradient(input=reward) # Retrieve states, internals and actions past_horizon = self.policy.past_horizon(is_optimization=True) if self.separate_baseline_policy and self.baseline_optimizer is None: assertion = tf.debugging.assert_equal( x=past_horizon, y=self.baseline_policy.past_horizon(is_optimization=True), message="Policy and baseline depend on a different number of previous states." ) else: assertion = past_horizon with tf.control_dependencies(control_inputs=(assertion,)): # horizon change: see timestep-based batch sampling starts, lengths, states, internals = self.memory.predecessors( indices=indices, horizon=past_horizon, sequence_values='states', initial_values='internals' ) Module.update_tensors(dependency_starts=starts, dependency_lengths=lengths) auxiliaries, actions = self.memory.retrieve( indices=indices, values=('auxiliaries', 'actions') ) # Optimizer arguments independent = Module.update_tensor( name='independent', tensor=tf.constant(value=True, dtype=util.tf_dtype(dtype='bool')) ) variables = self.get_variables(only_trainable=True) arguments = dict( states=states, internals=internals, auxiliaries=auxiliaries, actions=actions, reward=reward ) fn_loss = self.total_loss def fn_kl_divergence(states, internals, auxiliaries, actions, reward, other=None): kl_divergence = self.policy.kl_divergence( states=states, internals=internals, auxiliaries=auxiliaries, other=other ) if self.baseline_optimizer is None and self.baseline_objective is not None: kl_divergence += self.baseline_policy.kl_divergence( states=states, internals=internals, auxiliaries=auxiliaries, other=other ) return kl_divergence if self.global_model is None: global_variables = None else: global_variables = self.global_model.get_variables(only_trainable=True) kwargs = self.objective.optimizer_arguments( policy=self.policy, baseline=self.baseline_policy ) if self.baseline_optimizer is None and self.baseline_objective is not None: util.deep_disjoint_update( target=kwargs, source=self.baseline_objective.optimizer_arguments(policy=self.baseline_policy) ) dependencies = util.flatten(xs=arguments) # KL divergence before if self.is_summary_logged( label=('kl-divergence', 'action-kl-divergences', 'kl-divergences') ): with tf.control_dependencies(control_inputs=dependencies): kldiv_reference = self.policy.kldiv_reference( states=states, internals=internals, auxiliaries=auxiliaries ) dependencies = util.flatten(xs=kldiv_reference) # Optimization with tf.control_dependencies(control_inputs=dependencies): optimized = self.optimizer.minimize( variables=variables, arguments=arguments, fn_loss=fn_loss, fn_kl_divergence=fn_kl_divergence, global_variables=global_variables, **kwargs ) with tf.control_dependencies(control_inputs=(optimized,)): # Loss summaries if self.is_summary_logged(label=('loss', 'objective-loss', 'losses')): objective_loss = self.objective.loss_per_instance(policy=self.policy, **arguments) objective_loss = tf.math.reduce_mean(input_tensor=objective_loss, axis=0) if self.is_summary_logged(label=('objective-loss', 'losses')): optimized = self.add_summary( label=('objective-loss', 'losses'), name='objective-loss', tensor=objective_loss, pass_tensors=optimized ) if self.is_summary_logged(label=('loss', 'regularization-loss', 'losses')): regularization_loss = self.regularize( states=states, internals=internals, auxiliaries=auxiliaries ) if self.is_summary_logged(label=('regularization-loss', 'losses')): optimized = self.add_summary( label=('regularization-loss', 'losses'), name='regularization-loss', tensor=regularization_loss, pass_tensors=optimized ) if self.is_summary_logged(label=('loss', 'losses')): loss = objective_loss + regularization_loss if self.baseline_optimizer is None and self.baseline_objective is not None: if self.is_summary_logged(label=('loss', 'baseline-objective-loss', 'losses')): if self.baseline_objective is None: baseline_objective_loss = self.objective.loss_per_instance( policy=self.baseline_policy, **arguments ) else: baseline_objective_loss = self.baseline_objective.loss_per_instance( policy=self.baseline_policy, **arguments ) baseline_objective_loss = tf.math.reduce_mean( input_tensor=baseline_objective_loss, axis=0 ) if self.is_summary_logged(label=('baseline-objective-loss', 'losses')): optimized = self.add_summary( label=('baseline-objective-loss', 'losses'), name='baseline-objective-loss', tensor=baseline_objective_loss, pass_tensors=optimized ) if self.is_summary_logged( label=('loss', 'baseline-regularization-loss', 'losses') ): baseline_regularization_loss = self.baseline_policy.regularize() if self.is_summary_logged(label=('baseline-regularization-loss', 'losses')): optimized = self.add_summary( label=('baseline-regularization-loss', 'losses'), name='baseline-regularization-loss', tensor=baseline_regularization_loss, pass_tensors=optimized ) if self.is_summary_logged(label=('loss', 'baseline-loss', 'losses')): baseline_loss = baseline_objective_loss + baseline_regularization_loss if self.is_summary_logged(label=('baseline-loss', 'losses')): optimized = self.add_summary( label=('baseline-loss', 'losses'), name='baseline-loss', tensor=baseline_loss, pass_tensors=optimized ) if self.is_summary_logged(label=('loss', 'losses')): loss += self.baseline_loss_weight * baseline_loss if self.is_summary_logged(label=('loss', 'losses')): optimized = self.add_summary( label=('loss', 'losses'), name='loss', tensor=loss, pass_tensors=optimized ) # Entropy summaries if self.is_summary_logged(label=('entropy', 'action-entropies', 'entropies')): entropies = self.policy.entropy( states=states, internals=internals, auxiliaries=auxiliaries, include_per_action=(len(self.actions_spec) > 1) ) if self.is_summary_logged(label=('entropy', 'entropies')): if len(self.actions_spec) == 1: optimized = self.add_summary( label=('entropy', 'entropies'), name='entropy', tensor=entropies, pass_tensors=optimized ) else: optimized = self.add_summary( label=('entropy', 'entropies'), name='entropy', tensor=entropies['*'], pass_tensors=optimized ) if len(self.actions_spec) > 1 and \ self.is_summary_logged(label=('action-entropies', 'entropies')): for name in self.actions_spec: optimized = self.add_summary( label=('action-entropies', 'entropies'), name=(name + '-entropy'), tensor=entropies[name], pass_tensors=optimized ) # KL divergence summaries if self.is_summary_logged( label=('kl-divergence', 'action-kl-divergences', 'kl-divergences') ): kl_divergences = self.policy.kl_divergence( states=states, internals=internals, auxiliaries=auxiliaries, other=kldiv_reference, include_per_action=(len(self.actions_spec) > 1) ) if self.is_summary_logged(label=('kl-divergence', 'kl-divergences')): if len(self.actions_spec) == 1: optimized = self.add_summary( label=('kl-divergence', 'kl-divergences'), name='kl-divergence', tensor=kl_divergences, pass_tensors=optimized ) else: optimized = self.add_summary( label=('kl-divergence', 'kl-divergences'), name='kl-divergence', tensor=kl_divergences['*'], pass_tensors=optimized ) if len(self.actions_spec) > 1 and \ self.is_summary_logged(label=('action-kl-divergences', 'kl-divergences')): for name in self.actions_spec: optimized = self.add_summary( label=('action-kl-divergences', 'kl-divergences'), name=(name + '-kl-divergence'), tensor=kl_divergences[name], pass_tensors=optimized ) Module.update_tensor(name='independent', tensor=independent) return optimized
def create(agent=None, environment=None, **kwargs): """ Creates an agent from a specification. Args: agent (specification): JSON file, specification key, configuration dictionary, library module, or `Agent` subclass (<span style="color:#00C000"><b>default</b></span>: Policy agent). environment (Environment): Environment which the agent is supposed to be trained on, environment-related arguments like state/action space specifications will be extract if given. kwargs: Additional arguments. """ if agent is None: agent = 'default' if isinstance(agent, Agent): if not agent.is_initialized: agent.initialize() return agent elif isinstance(agent, dict): # Dictionary specification util.deep_disjoint_update(target=kwargs, source=agent) agent = kwargs.pop('agent', kwargs.pop('type', 'default')) return Agent.create(agent=agent, environment=environment, **kwargs) elif isinstance(agent, str): if os.path.isfile(agent): # JSON file specification with open(agent, 'r') as fp: agent = json.load(fp=fp) util.deep_disjoint_update(target=kwargs, source=agent) agent = kwargs.pop('agent', kwargs.pop('type', 'default')) return Agent.create(agent=agent, environment=environment, **kwargs) elif '.' in agent: # Library specification library_name, module_name = agent.rsplit('.', 1) library = importlib.import_module(name=library_name) agent = getattr(library, module_name) if environment is not None: env_spec = dict(states=environment.states(), actions=environment.actions()) if environment.max_episode_timesteps() is not None: env_spec['max_episode_timesteps'] = environment.max_episode_timesteps() util.deep_disjoint_update(target=kwargs, source=env_spec) agent = agent(**kwargs) assert isinstance(agent, Agent) if not agent.is_initialized: agent.initialize() return agent else: # Keyword specification if environment is not None: env_spec = dict(states=environment.states(), actions=environment.actions()) if environment.max_episode_timesteps() is not None: env_spec['max_episode_timesteps'] = environment.max_episode_timesteps() util.deep_disjoint_update(target=kwargs, source=env_spec) agent = tensorforce.agents.agents[agent](**kwargs) assert isinstance(agent, Agent) if not agent.is_initialized: agent.initialize() return agent else: assert False
def create(agent='tensorforce', environment=None, **kwargs): """ Creates an agent from a specification. Args: agent (specification | Agent class/object): JSON file, specification key, configuration dictionary, library module, or `Agent` class/object (<span style="color:#00C000"><b>default</b></span>: Policy agent). environment (Environment object): Environment which the agent is supposed to be trained on, environment-related arguments like state/action space specifications and maximum episode length will be extract if given (<span style="color:#00C000"><b>recommended</b></span>). kwargs: Additional arguments. """ if isinstance(agent, Agent): if environment is not None: assert util.deep_equal(xs=agent.spec['states'], ys=environment.states()) assert util.deep_equal(xs=agent.spec['actions'], ys=environment.actions()) assert environment.max_episode_timesteps() is None or \ agent.spec['max_episode_timesteps'] >= environment.max_episode_timesteps() for key, value in kwargs.items(): assert agent.spec[key] == value if agent.is_initialized: agent.reset() else: agent.initialize() return agent elif isinstance(agent, type) and issubclass(agent, Agent): if environment is not None: if 'states' in kwargs: assert util.deep_equal(xs=kwargs['states'], ys=environment.states()) else: kwargs['states'] = environment.states() if 'actions' in kwargs: assert util.deep_equal(xs=kwargs['actions'], ys=environment.actions()) else: kwargs['actions'] = environment.actions() if environment.max_episode_timesteps() is None: pass elif 'max_episode_timesteps' in kwargs: assert kwargs[ 'max_episode_timesteps'] >= environment.max_episode_timesteps( ) else: kwargs[ 'max_episode_timesteps'] = environment.max_episode_timesteps( ) agent = agent(**kwargs) assert isinstance(agent, Agent) return Agent.create(agent=agent, environment=environment) elif isinstance(agent, dict): # Dictionary specification util.deep_disjoint_update(target=kwargs, source=agent) agent = kwargs.pop('agent', kwargs.pop('type', 'default')) return Agent.create(agent=agent, environment=environment, **kwargs) elif isinstance(agent, str): if os.path.isfile(agent): # JSON file specification with open(agent, 'r') as fp: agent = json.load(fp=fp) return Agent.create(agent=agent, environment=environment, **kwargs) elif '.' in agent: # Library specification library_name, module_name = agent.rsplit('.', 1) library = importlib.import_module(name=library_name) agent = getattr(library, module_name) return Agent.create(agent=agent, environment=environment, **kwargs) elif agent in tensorforce.agents.agents: # Keyword specification agent = tensorforce.agents.agents[agent] return Agent.create(agent=agent, environment=environment, **kwargs) else: raise TensorforceError.value(name='Agent.create', argument='agent', dtype=agent) else: raise TensorforceError.type(name='Agent.create', argument='agent', dtype=type(agent))
def get_module_class_and_args(*, name, module=None, modules=None, default_module=None, disable_first_arg=False, **kwargs): # name if not isinstance(name, str): raise TensorforceError.type(name='Module.add_module', argument='name', dtype=type(name)) # modules if modules is not None and not isinstance(modules, dict): raise TensorforceError.type(name='Module.add_module', argument='modules', dtype=type(modules)) # default_module if default_module is not None and default_module not in modules and \ not issubclass(default_module, Module): raise TensorforceError.value(name='Module.add_module', argument='default_module', value=default_module) # disable_first_arg if not isinstance(disable_first_arg, bool): raise TensorforceError.type(name='Module.add_module', argument='disable_first_arg', dtype=type(disable_first_arg)) # module if isinstance(module, dict): # Dictionary module specification (type either given via 'type' or 'default_module') util.deep_disjoint_update(target=kwargs, source=module) module = kwargs.pop('type', default_module) return Module.get_module_class_and_args( name=name, module=module, modules=modules, default_module=default_module, disable_first_arg=True, **kwargs) elif isinstance(module, str): if os.path.isfile(module): # JSON file module specification with open(module, 'r') as fp: module = json.load(fp=fp) return Module.get_module_class_and_args( name=name, module=module, modules=modules, default_module=default_module, disable_first_arg=True, **kwargs) elif '.' in module: # Library module specification library_name, module_name = module.rsplit('.', 1) library = importlib.import_module(name=library_name) module = getattr(library, module_name) return Module.get_module_class_and_args( name=name, module=module, modules=modules, default_module=default_module, disable_first_arg=True, **kwargs) elif modules is not None and module in modules: # Keyword module specification return Module.get_module_class_and_args( name=name, module=modules[module], modules=modules, default_module=default_module, disable_first_arg=True, **kwargs) elif 'default' in modules or default_module is not None: # Default module specification if '_first_arg' in kwargs: raise TensorforceError.invalid(name='Module.add_module', argument='_first_arg') if module is not None: if disable_first_arg: raise TensorforceError.value(name='Module.add_module', argument='module', value=module) kwargs['_first_arg'] = module if default_module is None: default_module = modules['default'] return Module.get_module_class_and_args(name=name, module=default_module, modules=modules, **kwargs) else: raise TensorforceError.value(name='Module.add_module', argument='module', value=module) elif not callable(module) and ('default' in modules or default_module is not None): # Default module specification if '_first_arg' in kwargs: raise TensorforceError.invalid(name='Module.add_module', argument='_first_arg') if module is not None: kwargs['_first_arg'] = module if default_module is None: default_module = modules['default'] return Module.get_module_class_and_args(name=name, module=default_module, modules=modules, **kwargs) elif callable(module): if '_first_arg' in kwargs: args = (kwargs.pop('_first_arg'), ) else: args = () kwargs['name'] = name return module, args, kwargs else: raise TensorforceError.value(name='Module.add_module', argument='module', value=module)
def create(environment, max_episode_timesteps=None, **kwargs): """ Creates an environment from a specification. Args: environment (specification | Environment object): JSON file, specification key, configuration dictionary, library module, or `Environment` object (<span style="color:#C00000"><b>required</b></span>). max_episode_timesteps (int > 0): Maximum number of timesteps per episode, overwrites the environment default if defined (<span style="color:#00C000"><b>default</b></span>: environment default). kwargs: Additional arguments. """ if isinstance(environment, Environment): if max_episode_timesteps is not None: environment = EnvironmentWrapper( environment=environment, max_episode_timesteps=max_episode_timesteps) return environment elif isinstance(environment, dict): # Dictionary specification util.deep_disjoint_update(target=kwargs, source=environment) environment = kwargs.pop('environment', kwargs.pop('type', 'default')) assert environment is not None return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps, **kwargs) elif isinstance(environment, str): if os.path.isfile(environment): # JSON file specification with open(environment, 'r') as fp: environment = json.load(fp=fp) util.deep_disjoint_update(target=kwargs, source=environment) environment = kwargs.pop('environment', kwargs.pop('type', 'default')) assert environment is not None return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps, **kwargs) elif '.' in environment: # Library specification library_name, module_name = environment.rsplit('.', 1) library = importlib.import_module(name=library_name) environment = getattr(library, module_name) environment = environment(**kwargs) assert isinstance(environment, Environment) return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps) else: # Keyword specification environment = tensorforce.environments.environments[ environment](**kwargs) assert isinstance(environment, Environment) return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps) else: assert False
def get_module_class_and_args(*, name, module=None, modules=None, default_module=None, disable_first_arg=False, **kwargs): # name if not isinstance(name, str): raise TensorforceError.type(name='Module.add_module', argument='name', dtype=type(name)) # modules if modules is not None and not isinstance(modules, dict): raise TensorforceError.type(name='Module.add_module', argument='modules', dtype=type(modules)) # default_module if default_module is not None and default_module not in modules and \ not issubclass(default_module, Module): raise TensorforceError.value(name='Module.add_module', argument='default_module', value=default_module) # disable_first_arg if not isinstance(disable_first_arg, bool): raise TensorforceError.type(name='Module.add_module', argument='disable_first_arg', dtype=type(disable_first_arg)) # module if isinstance(module, dict): # Dictionary module specification (type either given via 'type' or 'default_module') util.deep_disjoint_update(target=kwargs, source=module) module = kwargs.pop('type', default_module) return Module.get_module_class_and_args( name=name, module=module, modules=modules, default_module=default_module, disable_first_arg=True, **kwargs) elif isinstance(module, str): if os.path.isfile(module): # JSON file module specification with open(module, 'r') as fp: module = json.load(fp=fp) return Module.get_module_class_and_args( name=name, module=module, modules=modules, default_module=default_module, disable_first_arg=True, **kwargs) elif modules is not None and module in modules: # Keyword module specification return Module.get_module_class_and_args( name=name, module=modules[module], modules=modules, default_module=default_module, disable_first_arg=True, **kwargs) else: # Library module specification assert modules is not None parent_class = next(iter(modules.values())) while len(parent_class.mro()) >= 4 and parent_class.mro( )[1] != Module: parent_class = parent_class.mro()[1] module = util.try_import_module(module=module, parent_class=parent_class) if module is not None: return Module.get_module_class_and_args( name=name, module=module, modules=modules, default_module=default_module, disable_first_arg=True, **kwargs) if 'default' in modules or default_module is not None: # Default module specification if '_first_arg' in kwargs: raise TensorforceError.invalid( name='Module.add_module', argument='_first_arg') if module is not None: if disable_first_arg: raise TensorforceError.value( name='Module.add_module', argument='module', value=module) kwargs['_first_arg'] = module if default_module is None: default_module = modules['default'] return Module.get_module_class_and_args( name=name, module=default_module, modules=modules, **kwargs) else: raise TensorforceError.value(name='Module.add_module', argument='module', value=module) elif (not callable(module) or isinstance(module, tf.keras.Model) or (isinstance(module, type) and issubclass(module, tf.keras.Model)) ) and ('default' in modules or default_module is not None): # Default module specification if '_first_arg' in kwargs: raise TensorforceError.invalid(name='Module.add_module', argument='_first_arg') if module is not None: kwargs['_first_arg'] = module if default_module is None: default_module = modules['default'] return Module.get_module_class_and_args(name=name, module=default_module, modules=modules, **kwargs) elif callable(module): if '_first_arg' in kwargs: args = (kwargs.pop('_first_arg'), ) else: args = () kwargs['name'] = name return module, args, kwargs else: raise TensorforceError.value(name='Module.add_module', argument='module', value=module)
def create(environment=None, max_episode_timesteps=None, reward_shaping=None, remote=None, blocking=False, host=None, port=None, **kwargs): """ Creates an environment from a specification. In case of "socket-server" remote mode, runs environment in server communication loop until closed. Args: environment (specification | Environment class/object): JSON file, specification key, configuration dictionary, library module, `Environment` class/object, or gym.Env (<span style="color:#C00000"><b>required</b></span>, invalid for "socket-client" remote mode). max_episode_timesteps (int > 0): Maximum number of timesteps per episode, overwrites the environment default if defined (<span style="color:#00C000"><b>default</b></span>: environment default, invalid for "socket-client" remote mode). reward_shaping (callable[(s,a,t,r,s') -> r|(r,t)] | str): Reward shaping function mapping state, action, terminal, reward and next state to shaped reward and terminal, or a string expression with arguments "states", "actions", "terminal", "reward" and "next_states", e.g. "-1.0 if terminal else max(reward, 0.0)" (<span style="color:#00C000"><b>default</b></span>: no reward shaping). remote ("multiprocessing" | "socket-client" | "socket-server"): Communication mode for remote environment execution of parallelized environment execution, "socket-client" mode requires a corresponding "socket-server" running, and "socket-server" mode runs environment in server communication loop until closed (<span style="color:#00C000"><b>default</b></span>: local execution). blocking (bool): Whether remote environment calls should be blocking (<span style="color:#00C000"><b>default</b></span>: not blocking, invalid unless "multiprocessing" or "socket-client" remote mode). host (str): Socket server hostname or IP address (<span style="color:#C00000"><b>required</b></span> only for "socket-client" remote mode). port (int): Socket server port (<span style="color:#C00000"><b>required</b></span> only for "socket-client/server" remote mode). kwargs: Additional arguments. """ if remote not in ('multiprocessing', 'socket-client'): if blocking: raise TensorforceError.invalid( name='Environment.create', argument='blocking', condition='no multiprocessing/socket-client instance') if remote not in ('socket-client', 'socket-server'): if host is not None: raise TensorforceError.invalid(name='Environment.create', argument='host', condition='no socket instance') elif port is not None: raise TensorforceError.invalid(name='Environment.create', argument='port', condition='no socket instance') if remote == 'multiprocessing': from tensorforce.environments import MultiprocessingEnvironment environment = MultiprocessingEnvironment( blocking=blocking, environment=environment, max_episode_timesteps=max_episode_timesteps, reward_shaping=reward_shaping, **kwargs) return environment elif remote == 'socket-client': if environment is not None: raise TensorforceError.invalid( name='Environment.create', argument='environment', condition='socket-client instance') elif max_episode_timesteps is not None: raise TensorforceError.invalid( name='Environment.create', argument='max_episode_timesteps', condition='socket-client instance') elif len(kwargs) > 0: raise TensorforceError.invalid( name='Environment.create', argument='kwargs', condition='socket-client instance') from tensorforce.environments import SocketEnvironment environment = SocketEnvironment(host=host, port=port, blocking=blocking) return environment elif remote == 'socket-server': from tensorforce.environments import SocketEnvironment SocketEnvironment.remote( port=port, environment=environment, max_episode_timesteps=max_episode_timesteps, reward_shaping=reward_shaping, **kwargs) elif remote is not None: raise TensorforceError.value(name='Environment.create', argument='remote', value=remote) elif isinstance(environment, (EnvironmentWrapper, RemoteEnvironment)): if max_episode_timesteps is not None: raise TensorforceError.invalid( name='Environment.create', argument='max_episode_timesteps', condition='EnvironmentWrapper instance') if len(kwargs) > 0: raise TensorforceError.invalid( name='Environment.create', argument='kwargs', condition='EnvironmentWrapper instance') return environment elif isinstance(environment, type) and \ issubclass(environment, (EnvironmentWrapper, RemoteEnvironment)): raise TensorforceError.type(name='Environment.create', argument='environment', dtype=type(environment)) elif isinstance(environment, Environment): return EnvironmentWrapper( environment=environment, max_episode_timesteps=max_episode_timesteps, reward_shaping=reward_shaping) elif isinstance(environment, type) and issubclass( environment, Environment): environment = environment(**kwargs) assert isinstance(environment, Environment) return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps, reward_shaping=reward_shaping) elif isinstance(environment, dict): # Dictionary specification util.deep_disjoint_update(target=kwargs, source=environment) environment = kwargs.pop('environment', kwargs.pop('type', 'default')) assert environment is not None if max_episode_timesteps is None: max_episode_timesteps = kwargs.pop('max_episode_timesteps', None) if reward_shaping is None: reward_shaping = kwargs.pop('reward_shaping', None) return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps, reward_shaping=reward_shaping, **kwargs) elif isinstance(environment, str): if os.path.isfile(environment): # JSON file specification with open(environment, 'r') as fp: environment = json.load(fp=fp) util.deep_disjoint_update(target=kwargs, source=environment) environment = kwargs.pop('environment', kwargs.pop('type', 'default')) assert environment is not None if max_episode_timesteps is None: max_episode_timesteps = kwargs.pop('max_episode_timesteps', None) if reward_shaping is None: reward_shaping = kwargs.pop('reward_shaping', None) return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps, reward_shaping=reward_shaping, **kwargs) elif '.' in environment: # Library specification library_name, module_name = environment.rsplit('.', 1) library = importlib.import_module(name=library_name) environment = getattr(library, module_name) return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps, reward_shaping=reward_shaping, **kwargs) elif environment in tensorforce.environments.environments: # Keyword specification environment = tensorforce.environments.environments[ environment] return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps, reward_shaping=reward_shaping, **kwargs) else: # Default: OpenAI Gym try: return Environment.create( environment='gym', level=environment, max_episode_timesteps=max_episode_timesteps, reward_shaping=reward_shaping, **kwargs) except TensorforceError: raise TensorforceError.value(name='Environment.create', argument='environment', value=environment) else: # Default: OpenAI Gym from gym import Env if isinstance(environment, Env) or \ (isinstance(environment, type) and issubclass(environment, Env)): return Environment.create( environment='gym', level=environment, max_episode_timesteps=max_episode_timesteps, reward_shaping=reward_shaping, **kwargs) else: raise TensorforceError.type(name='Environment.create', argument='environment', dtype=type(environment))
def create(environment, max_episode_timesteps=None, **kwargs): """ Creates an environment from a specification. Args: environment (specification | Environment class/object): JSON file, specification key, configuration dictionary, library module, or `Environment` class/object (<span style="color:#C00000"><b>required</b></span>). max_episode_timesteps (int > 0): Maximum number of timesteps per episode, overwrites the environment default if defined (<span style="color:#00C000"><b>default</b></span>: environment default). kwargs: Additional arguments. """ if isinstance(environment, (EnvironmentWrapper, RemoteEnvironment)): if max_episode_timesteps is not None: TensorforceError.invalid( name='Environment.create', argument='max_episode_timesteps', condition='EnvironmentWrapper instance') if len(kwargs) > 0: TensorforceError.invalid( name='Environment.create', argument='kwargs', condition='EnvironmentWrapper instance') return environment elif isinstance(environment, type) and \ issubclass(environment, (EnvironmentWrapper, RemoteEnvironment)): raise TensorforceError.type(name='Environment.create', argument='environment', dtype=type(environment)) elif isinstance(environment, Environment): if max_episode_timesteps is not None: environment = EnvironmentWrapper( environment=environment, max_episode_timesteps=max_episode_timesteps) return environment elif isinstance(environment, type) and issubclass( environment, Environment): environment = environment(**kwargs) assert isinstance(environment, Environment) return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps) elif isinstance(environment, dict): # Dictionary specification util.deep_disjoint_update(target=kwargs, source=environment) environment = kwargs.pop('environment', kwargs.pop('type', 'default')) assert environment is not None if max_episode_timesteps is None: max_episode_timesteps = kwargs.pop('max_episode_timesteps', None) return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps, **kwargs) elif isinstance(environment, str): if os.path.isfile(environment): # JSON file specification with open(environment, 'r') as fp: environment = json.load(fp=fp) util.deep_disjoint_update(target=kwargs, source=environment) environment = kwargs.pop('environment', kwargs.pop('type', 'default')) assert environment is not None if max_episode_timesteps is None: max_episode_timesteps = kwargs.pop('max_episode_timesteps', None) return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps, **kwargs) elif '.' in environment: # Library specification library_name, module_name = environment.rsplit('.', 1) library = importlib.import_module(name=library_name) environment = getattr(library, module_name) return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps, **kwargs) else: # Keyword specification environment = tensorforce.environments.environments[ environment] return Environment.create( environment=environment, max_episode_timesteps=max_episode_timesteps, **kwargs) else: from gym import Env if isinstance(environment, Env) or \ (isinstance(environment, type) and issubclass(environment, Env)): return Environment.create( environment='gym', level=environment, max_episode_timesteps=max_episode_timesteps, **kwargs) else: raise TensorforceError.type(name='Environment.create', argument='environment', dtype=type(environment))
def get_module_class_and_kwargs( name, module=None, modules=None, default_module=None, **kwargs ): # name if not util.is_valid_name(name=name): raise TensorforceError.value(name='module', argument='name', value=name) # module # ??? # modules if modules is not None and not isinstance(modules, dict): raise TensorforceError.type(name='module', argument='modules', value=modules) # default_module # ??? if isinstance(module, dict): # Dictionary module specification (type either given via 'type' or 'default_module') util.deep_disjoint_update(target=kwargs, source=module) module = kwargs.pop('type', default_module) return Module.get_module_class_and_kwargs( name=name, module=module, modules=modules, default_module=default_module, **kwargs ) elif isinstance(module, str): if os.path.isfile(module): # JSON file module specification with open(module, 'r') as fp: module = json.load(fp=fp) return Module.get_module_class_and_kwargs( name=name, module=module, modules=modules, default_module=default_module, **kwargs ) elif '.' in module: # Library module specification library_name, module_name = module.rsplit('.', 1) library = importlib.import_module(name=library_name) module = getattr(library, module_name) return Module.get_module_class_and_kwargs( name=name, module=module, modules=modules, default_module=default_module, **kwargs ) elif modules is not None and module in modules: # Keyword module specification return Module.get_module_class_and_kwargs( name=name, module=modules[module], default_module=default_module, **kwargs ) elif 'default' in modules or default_module is not None: # Default module specification if '_first_arg' in kwargs: raise TensorforceError.value(name='module kwargs', value='_first_arg') if module is not None: kwargs['_first_arg'] = module if default_module is None: default_module = modules['default'] return Module.get_module_class_and_kwargs( name=name, module=default_module, modules=modules, **kwargs ) else: raise TensorforceError.value(name='module specification', value=module) elif not callable(module) and ('default' in modules or default_module is not None): # Default module specification if '_first_arg' in kwargs: raise TensorforceError.value(name='module kwargs', value='_first_arg') if module is not None: kwargs['_first_arg'] = module if default_module is None: default_module = modules['default'] return Module.get_module_class_and_kwargs( name=name, module=default_module, modules=modules, **kwargs ) elif callable(module): # for key, arg in kwargs.items(): # assert arg is not None, (key, arg) # if arg is None: # assert False # kwargs.pop(key) first_arg = kwargs.pop('_first_arg', None) return module, first_arg, kwargs else: raise TensorforceError.value(name='module specification', value=module)