def __init__(self, config): config.default(PolicyGradientModel.default_config) # distribution self.distribution = dict() for name, action in config.actions: self.distribution[name] = _get_distribution(action) # baseline if config.baseline is None: self.baseline = None else: baseline = util.function(f=config.baseline, predefined=value_functions) args = config.baseline_args or () kwargs = config.baseline_kwargs or {} self.baseline = baseline(*args, **kwargs) super(PolicyGradientModel, self).__init__(config) # advantage estimation self.generalized_advantage_estimation = config.generalized_advantage_estimation if self.generalized_advantage_estimation: self.gae_lambda = config.gae_lambda self.normalize_advantage = config.normalize_advantage
def __init__(self, config): config.default(PolicyGradientModel.default_config) # distribution self.distribution = dict() for name, action in config.actions: if 'distribution' in action: distribution = action.distribution else: distribution = 'gaussian' if action.continuous else 'categorical' if distribution not in distributions: raise TensorForceError() if action.continuous: self.distribution[name] = distributions[distribution]() else: self.distribution[name] = distributions[distribution]( num_actions=action.num_actions) # baseline if config.baseline is None: self.baseline = None else: baseline = util.function(f=config.baseline, predefined=value_functions) args = config.baseline_args or () kwargs = config.baseline_kwargs or {} self.baseline = baseline(*args, **kwargs) super(PolicyGradientModel, self).__init__(config) # advantage estimation self.generalized_advantage_estimation = config.generalized_advantage_estimation if self.generalized_advantage_estimation: self.gae_lambda = config.gae_lambda self.normalize_advantage = config.normalize_advantage
def network_builder(inputs): if len(inputs) != 1: raise TensorForceError('Layered network must have only one input.') x = next(iter(inputs.values())) internal_inputs = [] internal_outputs = [] internal_inits = [] for layer_config in layers_config: layer_type = layer_config['type'] layer = util.function(layer_type, predefined=layers) x = layer(x=x, **{k: v for k, v in layer_config.items() if k != 'type'}) if isinstance(x, list) or isinstance(x, tuple): assert len(x) == 4 internal_inputs.extend(x[1]) internal_outputs.extend(x[2]) internal_inits.extend(x[3]) x = x[0] if internal_inputs: return x, internal_inputs, internal_outputs, internal_inits else: return x
def from_config(config): exploration = config.type args = config.args if 'args' in config else () kwargs = config.kwargs if 'kwargs' in config else {} return util.function( exploration, tensorforce.core.explorations.explorations)(*args, **kwargs)
def from_config(config): if not isinstance(config, list): config = [config] preprocessing = Preprocessing() for config in config: preprocessor = config.type args = config.args if 'args' in config else () kwargs = config.kwargs if 'kwargs' in config else {} preprocessor = util.function(preprocessor, tensorforce.core.preprocessing.preprocessors)(*args, **kwargs) preprocessing.add(preprocessor=preprocessor) return preprocessing
def __init__(self, config): config.default(MemoryAgent.default_config) super(MemoryAgent, self).__init__(config) self.batch_size = config.batch_size memory = util.function(config.memory, memories) args = config.memory_args or () kwargs = config.memory_kwargs or {} self.memory = memory(config.memory_capacity, config.states, config.actions, *args, **kwargs) self.update_frequency = config.update_frequency self.first_update = config.first_update self.repeat_update = config.repeat_update
def create_tf_operations(self, config): """ Creates generic TensorFlow operations and placeholders required for models. Args: config: Model configuration which must contain entries for states and actions. Returns: """ self.action_taken = dict() self.internal_inputs = list() self.internal_outputs = list() self.internal_inits = list() # Placeholders with tf.variable_scope('placeholder'): # States self.state = dict() for name, state in config.states.items(): self.state[name] = tf.placeholder(dtype=util.tf_dtype(state.type), shape=(None,) + tuple(state.shape), name=name) # Actions self.action = dict() self.discrete_actions = [] self.continuous_actions = [] for name, action in config.actions: if action.continuous: if not self.__class__.allows_continuous_actions: raise TensorForceError("Error: Model does not support continuous actions.") self.action[name] = tf.placeholder(dtype=util.tf_dtype('float'), shape=(None,), name=name) else: if not self.__class__.allows_discrete_actions: raise TensorForceError("Error: Model does not support discrete actions.") self.action[name] = tf.placeholder(dtype=util.tf_dtype('int'), shape=(None,), name=name) # Reward & terminal self.reward = tf.placeholder(dtype=tf.float32, shape=(None,), name='reward') self.terminal = tf.placeholder(dtype=tf.bool, shape=(None,), name='terminal') # Deterministic action flag self.deterministic = tf.placeholder(dtype=tf.bool, shape=(), name='deterministic') # Optimizer if config.optimizer is not None: learning_rate = config.learning_rate with tf.variable_scope('optimization'): optimizer = util.function(config.optimizer, optimizers) args = config.optimizer_args or () kwargs = config.optimizer_kwargs or {} self.optimizer = optimizer(learning_rate, *args, **kwargs) else: self.optimizer = None
def create_tf_operations(self, config): """ Creates generic TensorFlow operations and placeholders required for models. Args: config: Model configuration which must contain entries for states and actions. Returns: """ self.action_taken = dict() self.internal_inputs = list() self.internal_outputs = list() self.internal_inits = list() # Placeholders with tf.variable_scope('placeholder'): # States self.state = dict() for name, state in config.states.items(): self.state[name] = tf.placeholder(dtype=util.tf_dtype(state.type), shape=(None,) + tuple(state.shape), name=name) # Actions self.action = dict() self.discrete_actions = [] self.continuous_actions = [] for name, action in config.actions: if action.continuous: if not self.__class__.allows_continuous_actions: raise TensorForceError("Error: Model does not support continuous actions.") self.action[name] = tf.placeholder(dtype=util.tf_dtype('float'), shape=(None,), name=name) else: if not self.__class__.allows_discrete_actions: raise TensorForceError("Error: Model does not support discrete actions.") self.action[name] = tf.placeholder(dtype=util.tf_dtype('int'), shape=(None,), name=name) # Reward & terminal self.reward = tf.placeholder(dtype=tf.float32, shape=(None,), name='reward') self.terminal = tf.placeholder(dtype=tf.bool, shape=(None,), name='terminal') # Optimizer if config.optimizer is not None: learning_rate = config.learning_rate with tf.variable_scope('optimization'): optimizer = util.function(config.optimizer, optimizers) args = config.optimizer_args or () kwargs = config.optimizer_kwargs or {} self.optimizer = optimizer(learning_rate, *args, **kwargs) else: self.optimizer = None
def __init__(self, config): assert self.__class__.name is not None and self.__class__.model is not None config.default(Agent.default_config) # only one state if 'type' in config.states: config.states = dict(state=config.states) self.unique_state = True else: config.states = config.states self.unique_state = False # only one action if 'continuous' in config.actions: config.actions = dict(action=config.actions) self.unique_action = True else: config.actions = config.actions self.unique_action = False self.states_config = config.states self.actions_config = config.actions self.model = self.__class__.model(config) # exploration self.exploration = dict() for name, action in config.actions: if 'exploration' not in action: self.exploration[name] = None continue exploration = action.exploration args = action.exploration_args kwargs = action.exploration_kwargs self.exploration[name] = util.function(exploration, explorations)(*args, **kwargs) self.episode = 0 self.timestep = 0
def from_config(config): exploration = config.type args = config.args if 'args' in config else () kwargs = config.kwargs if 'kwargs' in config else {} return util.function(exploration, tensorforce.core.explorations.explorations)(*args, **kwargs)