예제 #1
0
    def __init__(self, config):
        config.default(PolicyGradientModel.default_config)

        # distribution
        self.distribution = dict()
        for name, action in config.actions:
            self.distribution[name] = _get_distribution(action)

        # baseline
        if config.baseline is None:
            self.baseline = None
        else:
            baseline = util.function(f=config.baseline,
                                     predefined=value_functions)
            args = config.baseline_args or ()
            kwargs = config.baseline_kwargs or {}
            self.baseline = baseline(*args, **kwargs)

        super(PolicyGradientModel, self).__init__(config)

        # advantage estimation
        self.generalized_advantage_estimation = config.generalized_advantage_estimation
        if self.generalized_advantage_estimation:
            self.gae_lambda = config.gae_lambda
        self.normalize_advantage = config.normalize_advantage
    def __init__(self, config):
        config.default(PolicyGradientModel.default_config)

        # distribution
        self.distribution = dict()
        for name, action in config.actions:
            if 'distribution' in action:
                distribution = action.distribution
            else:
                distribution = 'gaussian' if action.continuous else 'categorical'
            if distribution not in distributions:
                raise TensorForceError()
            if action.continuous:
                self.distribution[name] = distributions[distribution]()
            else:
                self.distribution[name] = distributions[distribution](
                    num_actions=action.num_actions)

        # baseline
        if config.baseline is None:
            self.baseline = None
        else:
            baseline = util.function(f=config.baseline,
                                     predefined=value_functions)
            args = config.baseline_args or ()
            kwargs = config.baseline_kwargs or {}
            self.baseline = baseline(*args, **kwargs)

        super(PolicyGradientModel, self).__init__(config)

        # advantage estimation
        self.generalized_advantage_estimation = config.generalized_advantage_estimation
        if self.generalized_advantage_estimation:
            self.gae_lambda = config.gae_lambda
        self.normalize_advantage = config.normalize_advantage
예제 #3
0
    def network_builder(inputs):
        if len(inputs) != 1:
            raise TensorForceError('Layered network must have only one input.')
        x = next(iter(inputs.values()))
        internal_inputs = []
        internal_outputs = []
        internal_inits = []

        for layer_config in layers_config:
            layer_type = layer_config['type']
            layer = util.function(layer_type, predefined=layers)
            x = layer(x=x,
                      **{k: v
                         for k, v in layer_config.items() if k != 'type'})

            if isinstance(x, list) or isinstance(x, tuple):
                assert len(x) == 4
                internal_inputs.extend(x[1])
                internal_outputs.extend(x[2])
                internal_inits.extend(x[3])
                x = x[0]

        if internal_inputs:
            return x, internal_inputs, internal_outputs, internal_inits
        else:
            return x
예제 #4
0
 def from_config(config):
     exploration = config.type
     args = config.args if 'args' in config else ()
     kwargs = config.kwargs if 'kwargs' in config else {}
     return util.function(
         exploration, tensorforce.core.explorations.explorations)(*args,
                                                                  **kwargs)
예제 #5
0
    def from_config(config):
        if not isinstance(config, list):
            config = [config]

        preprocessing = Preprocessing()
        for config in config:
            preprocessor = config.type
            args = config.args if 'args' in config else ()
            kwargs = config.kwargs if 'kwargs' in config else {}
            preprocessor = util.function(preprocessor, tensorforce.core.preprocessing.preprocessors)(*args, **kwargs)
            preprocessing.add(preprocessor=preprocessor)
        return preprocessing
예제 #6
0
    def __init__(self, config):
        config.default(MemoryAgent.default_config)
        super(MemoryAgent, self).__init__(config)

        self.batch_size = config.batch_size
        memory = util.function(config.memory, memories)
        args = config.memory_args or ()
        kwargs = config.memory_kwargs or {}
        self.memory = memory(config.memory_capacity, config.states, config.actions, *args, **kwargs)
        self.update_frequency = config.update_frequency
        self.first_update = config.first_update
        self.repeat_update = config.repeat_update
예제 #7
0
파일: model.py 프로젝트: et0803/tensorforce
    def create_tf_operations(self, config):
        """
        Creates generic TensorFlow operations and placeholders required for models.
        
        Args:
            config: Model configuration which must contain entries for states and actions.

        Returns:

        """
        self.action_taken = dict()
        self.internal_inputs = list()
        self.internal_outputs = list()
        self.internal_inits = list()

        # Placeholders
        with tf.variable_scope('placeholder'):
            # States
            self.state = dict()
            for name, state in config.states.items():
                self.state[name] = tf.placeholder(dtype=util.tf_dtype(state.type), shape=(None,) + tuple(state.shape), name=name)

            # Actions
            self.action = dict()
            self.discrete_actions = []
            self.continuous_actions = []
            for name, action in config.actions:
                if action.continuous:
                    if not self.__class__.allows_continuous_actions:
                        raise TensorForceError("Error: Model does not support continuous actions.")
                    self.action[name] = tf.placeholder(dtype=util.tf_dtype('float'), shape=(None,), name=name)
                else:
                    if not self.__class__.allows_discrete_actions:
                        raise TensorForceError("Error: Model does not support discrete actions.")
                    self.action[name] = tf.placeholder(dtype=util.tf_dtype('int'), shape=(None,), name=name)

            # Reward & terminal
            self.reward = tf.placeholder(dtype=tf.float32, shape=(None,), name='reward')
            self.terminal = tf.placeholder(dtype=tf.bool, shape=(None,), name='terminal')

            # Deterministic action flag
            self.deterministic = tf.placeholder(dtype=tf.bool, shape=(), name='deterministic')

        # Optimizer
        if config.optimizer is not None:
            learning_rate = config.learning_rate
            with tf.variable_scope('optimization'):
                optimizer = util.function(config.optimizer, optimizers)
                args = config.optimizer_args or ()
                kwargs = config.optimizer_kwargs or {}
                self.optimizer = optimizer(learning_rate, *args, **kwargs)
        else:
            self.optimizer = None
예제 #8
0
    def __init__(self, config):
        config.default(MemoryAgent.default_config)
        super(MemoryAgent, self).__init__(config)

        self.batch_size = config.batch_size
        memory = util.function(config.memory, memories)
        args = config.memory_args or ()
        kwargs = config.memory_kwargs or {}
        self.memory = memory(config.memory_capacity, config.states,
                             config.actions, *args, **kwargs)
        self.update_frequency = config.update_frequency
        self.first_update = config.first_update
        self.repeat_update = config.repeat_update
예제 #9
0
    def create_tf_operations(self, config):
        """
        Creates generic TensorFlow operations and placeholders required for models.
        
        Args:
            config: Model configuration which must contain entries for states and actions.

        Returns:

        """
        self.action_taken = dict()
        self.internal_inputs = list()
        self.internal_outputs = list()
        self.internal_inits = list()

        # Placeholders
        with tf.variable_scope('placeholder'):
            # States
            self.state = dict()
            for name, state in config.states.items():
                self.state[name] = tf.placeholder(dtype=util.tf_dtype(state.type), shape=(None,) + tuple(state.shape), name=name)

            # Actions
            self.action = dict()
            self.discrete_actions = []
            self.continuous_actions = []
            for name, action in config.actions:
                if action.continuous:
                    if not self.__class__.allows_continuous_actions:
                        raise TensorForceError("Error: Model does not support continuous actions.")
                    self.action[name] = tf.placeholder(dtype=util.tf_dtype('float'), shape=(None,), name=name)
                else:
                    if not self.__class__.allows_discrete_actions:
                        raise TensorForceError("Error: Model does not support discrete actions.")
                    self.action[name] = tf.placeholder(dtype=util.tf_dtype('int'), shape=(None,), name=name)

            # Reward & terminal
            self.reward = tf.placeholder(dtype=tf.float32, shape=(None,), name='reward')
            self.terminal = tf.placeholder(dtype=tf.bool, shape=(None,), name='terminal')

        # Optimizer
        if config.optimizer is not None:
            learning_rate = config.learning_rate
            with tf.variable_scope('optimization'):
                optimizer = util.function(config.optimizer, optimizers)
                args = config.optimizer_args or ()
                kwargs = config.optimizer_kwargs or {}
                self.optimizer = optimizer(learning_rate, *args, **kwargs)
        else:
            self.optimizer = None
예제 #10
0
    def __init__(self, config):
        assert self.__class__.name is not None and self.__class__.model is not None
        config.default(Agent.default_config)

        # only one state
        if 'type' in config.states:
            config.states = dict(state=config.states)
            self.unique_state = True
        else:
            config.states = config.states
            self.unique_state = False

        # only one action
        if 'continuous' in config.actions:
            config.actions = dict(action=config.actions)
            self.unique_action = True
        else:
            config.actions = config.actions
            self.unique_action = False

        self.states_config = config.states
        self.actions_config = config.actions

        self.model = self.__class__.model(config)

        # exploration
        self.exploration = dict()
        for name, action in config.actions:
            if 'exploration' not in action:
                self.exploration[name] = None
                continue
            exploration = action.exploration
            args = action.exploration_args
            kwargs = action.exploration_kwargs
            self.exploration[name] = util.function(exploration, explorations)(*args, **kwargs)

        self.episode = 0
        self.timestep = 0
예제 #11
0
    def network_builder(inputs):
        if len(inputs) != 1:
            raise TensorForceError('Layered network must have only one input.')
        x = next(iter(inputs.values()))
        internal_inputs = []
        internal_outputs = []
        internal_inits = []

        for layer_config in layers_config:
            layer_type = layer_config['type']
            layer = util.function(layer_type, predefined=layers)
            x = layer(x=x, **{k: v for k, v in layer_config.items() if k != 'type'})

            if isinstance(x, list) or isinstance(x, tuple):
                assert len(x) == 4
                internal_inputs.extend(x[1])
                internal_outputs.extend(x[2])
                internal_inits.extend(x[3])
                x = x[0]

        if internal_inputs:
            return x, internal_inputs, internal_outputs, internal_inits
        else:
            return x
예제 #12
0
 def from_config(config):
     exploration = config.type
     args = config.args if 'args' in config else ()
     kwargs = config.kwargs if 'kwargs' in config else {}
     return util.function(exploration, tensorforce.core.explorations.explorations)(*args, **kwargs)