Beispiel #1
0
    def __init__(self, config):
        """Initializes the reinforcement learning agent.

        Args:
            config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and
                'exploration`.

        """
        assert self.__class__.name is not None and self.__class__.model is not None
        config.default(Agent.default_config)

        # states config and preprocessing
        self.preprocessing = dict()
        if 'type' in config.states:
            # only one state
            config.states = dict(state=config.states)
            self.unique_state = True
            if config.preprocessing is not None:
                config.preprocessing = dict(state=config.preprocessing)
        else:
            self.unique_state = False
        for name, state in config.states:
            state.default(dict(type='float'))
            if isinstance(state.shape, int):
                state.shape = (state.shape, )
            if config.preprocessing is not None and name in config.preprocessing:
                preprocessing = Preprocessing.from_config(
                    config=config.preprocessing[name])
                self.preprocessing[name] = preprocessing
                state.shape = preprocessing.processed_shape(shape=state.shape)

        # actions config and exploration
        self.exploration = dict()
        if 'continuous' in config.actions:
            # only one action
            config.actions = dict(action=config.actions)
            if config.exploration is not None:
                config.exploration = dict(action=config.exploration)
            self.unique_action = True
        else:
            self.unique_action = False
        for name, action in config.actions:
            if action.continuous:
                action.default(dict(shape=(), min_value=None, max_value=None))
            else:
                action.default(dict(shape=()))
            if isinstance(action.shape, int):
                action.shape = (action.shape, )
            if config.exploration is not None and name in config.exploration:
                self.exploration[name] = Exploration.from_config(
                    config=config.exploration[name])

        self.states_config = config.states
        self.actions_config = config.actions

        self.model = self.__class__.model(config)

        self.episode = -1
        self.timestep = 0
        self.reset()
Beispiel #2
0
    def __init__(self, config):
        """Initializes the reinforcement learning agent.

        Args:
            config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and
                'exploration`.

        """
        assert self.__class__.name is not None and self.__class__.model is not None
        config.default(Agent.default_config)

        # states config and preprocessing
        self.preprocessing = dict()
        if 'type' in config.states:
            # only one state
            config.states = dict(state=config.states)
            self.unique_state = True
            if config.preprocessing is not None:
                config.preprocessing = dict(state=config.preprocessing)
        else:
            self.unique_state = False
        for name, state in config.states:
            if config.preprocessing is not None and name in config.preprocessing:
                preprocessing = Preprocessing.from_config(
                    config=config.preprocessing[name])
                self.preprocessing[name] = preprocessing
                state.shape = preprocessing.processed_shape(shape=state.shape)

        # actions config and exploration
        self.continuous_actions = list()
        self.exploration = dict()
        if 'continuous' in config.actions:
            # only one action
            if config.actions.continuous:
                self.continuous_actions.append('action')
            config.actions = dict(action=config.actions)
            if config.exploration is not None:
                config.exploration = dict(action=config.exploration)
            self.unique_action = True
        else:
            self.unique_action = False
        for name, action in config.actions:
            if action.continuous:
                self.continuous_actions.append(name)
            if config.exploration is not None and name in config.exploration:
                self.exploration[name] = Exploration.from_config(
                    config=config.exploration[name])

        self.states_config = config.states
        self.actions_config = config.actions

        self.model = self.__class__.model(config)

        self.episode = 0
        self.timestep = 0

        # Reset internal state - needs to be called after every episode
        self.next_internal = self.current_internal = self.model.reset()
        for preprocessing in self.preprocessing.values():
            preprocessing.reset()
Beispiel #3
0
    def __init__(self, config):
        """Initializes the reinforcement learning agent.

        Args:
            config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and
                'exploration`.

        """
        assert self.__class__.name is not None and self.__class__.model is not None
        config.default(Agent.default_config)

        # states config and preprocessing
        self.preprocessing = dict()
        if 'type' in config.states:
            # only one state
            config.states = dict(state=config.states)
            self.unique_state = True
            if config.preprocessing is not None:
                config.preprocessing = dict(state=config.preprocessing)
        else:
            self.unique_state = False
        for name, state in config.states:
            if config.preprocessing is not None and name in config.preprocessing:
                preprocessing = Preprocessing.from_config(config=config.preprocessing[name])
                self.preprocessing[name] = preprocessing
                state.shape = preprocessing.processed_shape(shape=state.shape)

        # actions config and exploration
        self.continuous_actions = list()
        self.exploration = dict()
        if 'continuous' in config.actions:
            # only one action
            if config.actions.continuous:
                self.continuous_actions.append('action')
            config.actions = dict(action=config.actions)
            if config.exploration is not None:
                config.exploration = dict(action=config.exploration)
            self.unique_action = True
        else:
            self.unique_action = False
        for name, action in config.actions:
            if action.continuous:
                self.continuous_actions.append(name)
            if config.exploration is not None and name in config.exploration:
                self.exploration[name] = Exploration.from_config(config=config.exploration[name])

        self.states_config = config.states
        self.actions_config = config.actions

        self.model = self.__class__.model(config)

        self.episode = 0
        self.timestep = 0

        # Reset internal state - needs to be called after every episode
        self.next_internal = self.current_internal = self.model.reset()
        for preprocessing in self.preprocessing.values():
            preprocessing.reset()
Beispiel #4
0
    def __init__(self, config, model=None):
        """Initializes the reinforcement learning agent.

        Args:
            config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and
                'exploration`.
            model (Model): optional model instance. If not supplied, a new model is created.

        """
        assert self.__class__.name is not None and self.__class__.model is not None
        config.default(Agent.default_config)

        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(util.log_levels[config.log_level])

        # states config and preprocessing
        self.preprocessing = dict()
        if 'type' in config.states:
            # only one state
            config.states = dict(state=config.states)
            self.unique_state = True
            if config.preprocessing is not None:
                config.preprocessing = dict(state=config.preprocessing)
        else:
            self.unique_state = False
        for name, state in config.states:
            state.default(dict(type='float'))
            if isinstance(state.shape, int):
                state.shape = (state.shape, )
            if config.preprocessing is not None and name in config.preprocessing:
                preprocessing = Preprocessing.from_config(
                    config=config.preprocessing[name])
                self.preprocessing[name] = preprocessing
                state.shape = preprocessing.processed_shape(shape=state.shape)

        # actions config and exploration
        self.exploration = dict()
        if 'continuous' in config.actions:
            # only one action
            config.actions = dict(action=config.actions)
            if config.exploration is not None:
                config.exploration = dict(action=config.exploration)
            self.unique_action = True
        else:
            self.unique_action = False
        for name, action in config.actions:
            if action.continuous:
                action.default(dict(shape=(), min_value=None, max_value=None))
            else:
                action.default(dict(shape=()))
            if isinstance(action.shape, int):
                action.shape = (action.shape, )
            if config.exploration is not None and name in config.exploration:
                self.exploration[name] = Exploration.from_config(
                    config=config.exploration[name])

        self.states_config = config.states
        self.actions_config = config.actions

        if model is None:
            self.model = self.__class__.model(config)
        else:
            if not isinstance(model, self.__class__.model):
                raise TensorForceError(
                    "Supplied model class `{}` does not match expected agent model class `{}`"
                    .format(
                        type(model).__name__, self.__class__.model.__name__))
            self.model = model

        not_accessed = config.not_accessed()
        if not_accessed:
            self.logger.warning("Configuration values not accessed: {}".format(
                ', '.join(not_accessed)))

        self.episode = -1
        self.timestep = 0
        self.reset()
Beispiel #5
0
    def __init__(self, states_spec, actions_spec, preprocessing, exploration,
                 reward_preprocessing, batched_observe):
        """
        Initializes the reinforcement learning agent.

        Args:
            states_spec:
            actions_spec:
            preprocessing:
            exploration:
            reward_preprocessing:
            batched_observe:
        """

        # States config and preprocessing
        self.preprocessing = dict()

        if 'shape' in states_spec:  # Single-state
            self.unique_state = True
            state = dict(states_spec)
            self.states_spec = dict(state=state)
            if isinstance(state['shape'], int):  # Shape: int to unary tuple
                state['shape'] = (state['shape'], )
            if 'type' not in state:  # Type: default to float
                state['type'] = 'float'
            if preprocessing is not None:
                preprocessing = Preprocessing.from_spec(spec=preprocessing)
                self.preprocessing['state'] = preprocessing
                state['shape'] = preprocessing.processed_shape(
                    shape=state['shape'])

        else:  # Multi-state
            self.unique_state = False
            self.states_spec = dict(states_spec)
            for name, state in self.states_spec.items():
                if isinstance(state['shape'],
                              int):  # Shape: int to unary tuple
                    state['shape'] = (state['shape'], )
                if 'type' not in state:  # Type: default to float
                    state['type'] = 'float'
                if preprocessing is not None and name in preprocessing:
                    preprocessing = Preprocessing.from_spec(
                        preprocessing[name])
                    self.preprocessing[name] = preprocessing
                    state['shape'] = preprocessing.processed_shape(
                        shape=state['shape'])

        # Actions config and exploration
        self.exploration = dict()

        if 'type' in actions_spec:  # Single-action
            self.unique_action = True
            action = dict(actions_spec)
            self.actions_spec = dict(action=action)
            if action['type'] == 'int':  # Check required values
                if 'num_actions' not in action:
                    raise TensorForceError(
                        "Action requires value 'num_actions' set!")
            elif action['type'] == 'float':
                if ('min_value' in action) != ('max_value' in action):
                    raise TensorForceError(
                        "Action requires both values 'min_value' and 'max_value' set!"
                    )
            if 'shape' not in action:  # Shape: default to empty tuple
                action['shape'] = ()
            if isinstance(action['shape'], int):  # Shape: int to unary tuple
                action['shape'] = (action['shape'], )
            if exploration is not None:
                self.exploration['action'] = Exploration.from_spec(exploration)

        else:  # Multi-action
            self.unique_action = False
            self.actions_spec = dict(actions_spec)
            for name, action in self.actions_spec.items():
                if action['type'] == 'int':  # Check required values
                    if 'num_actions' not in action:
                        raise TensorForceError(
                            "Action requires value 'num_actions' set!")
                elif action['type'] == 'float':
                    if ('min_value' in action) != ('max_value' in action):
                        raise TensorForceError(
                            "Action requires both values 'min_value' and 'max_value' set!"
                        )
                if 'shape' not in action:  # Shape: default to empty tuple
                    action['shape'] = ()
                if isinstance(action['shape'],
                              int):  # Shape: int to unary tuple
                    action['shape'] = (action['shape'], )
                if exploration is not None and name in exploration:
                    self.exploration[name] = Exploration.from_spec(
                        exploration[name])

        # reward preprocessing config
        if reward_preprocessing is None:
            self.reward_preprocessing = None
        else:
            self.reward_preprocessing = Preprocessing.from_spec(
                reward_preprocessing)

        self.model = self.initialize_model(
            states_spec=self.states_spec,
            actions_spec=self.actions_spec,
        )

        # Batched observe for better performance with Python.
        self.batched_observe = batched_observe
        if self.batched_observe is not None:
            self.observe_terminal = list()
            self.observe_reward = list()

        self.reset()
Beispiel #6
0
    def __init__(self, states_spec, actions_spec, config):
        """
        Initializes the reinforcement learning agent.

        Args:
            model (Model): optional model instance. If not supplied, a new model is created.
            config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and
                'exploration`.

        """
        self.logger = logging.getLogger(self.__class__.__name__)  # other name?
        self.logger.setLevel(util.log_levels[config.log_level])

        # States config and preprocessing
        self.preprocessing = dict()

        if 'shape' in states_spec:  # Single-state
            self.unique_state = True
            state = dict(states_spec)
            self.states_spec = dict(state=state)
            if isinstance(state['shape'], int):  # Shape: int to unary tuple
                state['shape'] = (state['shape'], )
            if 'type' not in state:  # Type: default to float
                state['type'] = 'float'
            if config.preprocessing is not None:
                preprocessing = Preprocessing.from_spec(
                    spec=config.preprocessing)
                self.preprocessing['state'] = preprocessing
                state['shape'] = preprocessing.processed_shape(
                    shape=state['shape'])

        else:  # Multi-state
            self.unique_state = False
            self.states_spec = dict(states_spec)
            for name, state in self.states_spec.items():
                if isinstance(state['shape'],
                              int):  # Shape: int to unary tuple
                    state['shape'] = (state['shape'], )
                if 'type' not in state:  # Type: default to float
                    state['type'] = 'float'
                if config.preprocessing is not None and name in config.preprocessing:
                    preprocessing = Preprocessing.from_spec(
                        config.preprocessing[name])
                    self.preprocessing[name] = preprocessing
                    state['shape'] = preprocessing.processed_shape(
                        shape=state['shape'])

        # Actions config and exploration
        self.exploration = dict()

        if 'type' in actions_spec:  # Single-action
            self.unique_action = True
            action = dict(actions_spec)
            self.actions_spec = dict(action=action)
            if action['type'] == 'int':  # Check required values
                if 'num_actions' not in action:
                    raise TensorForceError(
                        "Action requires value 'num_actions' set!")
            elif action['type'] == 'float':
                if ('min_value' in action) != ('max_value' in action):
                    raise TensorForceError(
                        "Action requires both values 'min_value' and 'max_value' set!"
                    )
            if 'shape' not in action:  # Shape: default to empty tuple
                action['shape'] = ()
            if isinstance(action['shape'], int):  # Shape: int to unary tuple
                action['shape'] = (action['shape'], )
            if config.exploration is not None:
                self.exploration['action'] = Exploration.from_spec(
                    config.exploration)

        else:  # Multi-action
            self.unique_action = False
            self.actions_spec = dict(actions_spec)
            for name, action in self.actions_spec.items():
                if action['type'] == 'int':  # Check required values
                    if 'num_actions' not in action:
                        raise TensorForceError(
                            "Action requires value 'num_actions' set!")
                elif action['type'] == 'float':
                    if ('min_value' in action) != ('max_value' in action):
                        raise TensorForceError(
                            "Action requires both values 'min_value' and 'max_value' set!"
                        )
                if 'shape' not in action:  # Shape: default to empty tuple
                    action['shape'] = ()
                if isinstance(action['shape'],
                              int):  # Shape: int to unary tuple
                    action['shape'] = (action['shape'], )
                if config.exploration is not None and name in config.exploration:
                    self.exploration[name] = Exploration.from_spec(
                        config.exploration[name])

        # reward preprocessing config
        if config.reward_preprocessing is None:
            self.reward_preprocessing = None
        else:
            self.reward_preprocessing = Preprocessing.from_spec(
                config.reward_preprocessing)

        self.model = self.initialize_model(states_spec=self.states_spec,
                                           actions_spec=self.actions_spec,
                                           config=config)

        not_accessed = config.not_accessed()
        if not_accessed:
            self.logger.warning("Configuration values not accessed: {}".format(
                ', '.join(not_accessed)))

        self.episode = -1
        self.timestep = 0
        self.reset()
Beispiel #7
0
    def __init__(
        self,
        states_spec,
        actions_spec,
        preprocessing,
        exploration,
        reward_preprocessing,
        batched_observe
    ):
        """
        Initializes the reinforcement learning agent.

        Args:
            states_spec: Dict containing at least one state definition. In the case of a single state,
               keys `shape` and `type` are necessary. For multiple states, pass a dict of dicts where each state
               is a dict itself with a unique name as its key.
            actions_spec: Dict containing at least one action definition. Actions have types and either `num_actions`
                for discrete actions or a `shape` for continuous actions. Consult documentation and tests for more.
            preprocessing: Optional list of preprocessors (e.g. `image_resize`, `grayscale`) to apply to state. Each
                preprocessor is a dict containing a type and optional necessary arguments.
            exploration: Optional dict specifying exploration type (epsilon greedy strategies or Gaussian noise)
                and arguments.
            reward_preprocessing: Optional dict specifying reward preprocessor using same syntax as state preprocessing.
            batched_observe: Optional int specifying how many observe calls are batched into one session run.
                Without batching, throughput will be lower because every `observe` triggers a session invocation to
                update rewards in the graph.
        """

        # States config and preprocessing
        self.preprocessing = dict()

        self.unique_state = ('shape' in states_spec)
        if self.unique_state:
            states_spec = dict(state=states_spec)
            preprocessing = dict(state=preprocessing)

        self.states_spec = deepcopy(states_spec)

        for name, state in self.states_spec.items():
            # Convert int to unary tuple
            if isinstance(state['shape'], int):
                state['shape'] = (state['shape'],)

            # Set default type to float
            if 'type' not in state:
                state['type'] = 'float'

            if preprocessing is not None and preprocessing.get(name) is not None:
                state_preprocessing = Preprocessing.from_spec(spec=preprocessing[name])
                self.preprocessing[name] = state_preprocessing
                state['shape'] = state_preprocessing.processed_shape(shape=state['shape'])

        # Actions config and exploration
        self.exploration = dict()

        self.unique_action = ('type' in actions_spec)
        if self.unique_action:
            actions_spec = dict(action=actions_spec)
            exploration = dict(action=exploration)

        self.actions_spec = deepcopy(actions_spec)

        for name, action in self.actions_spec.items():
            # Check requried values
            if action['type'] == 'int':
                if 'num_actions' not in action:
                    raise TensorForceError("Action requires value 'num_actions' set!")
            elif action['type'] == 'float':
                if ('min_value' in action) != ('max_value' in action):
                    raise TensorForceError("Action requires both values 'min_value' and 'max_value' set!")

            # Set default shape to empty tuple
            if 'shape' not in action:
                action['shape'] = ()

            # Convert int to unary tuple
            if isinstance(action['shape'], int):
                action['shape'] = (action['shape'],)

            # Set exploration
            if exploration is not None and exploration.get(name) is not None:
                self.exploration[name] = Exploration.from_spec(spec=exploration[name])

        # Reward preprocessing config
        if reward_preprocessing is None:
            self.reward_preprocessing = None
        else:
            self.reward_preprocessing = Preprocessing.from_spec(reward_preprocessing)

        self.model = self.initialize_model(
            states_spec=self.states_spec,
            actions_spec=self.actions_spec,
        )

        # Batched observe for better performance with Python.
        self.batched_observe = batched_observe
        if self.batched_observe is not None:
            self.observe_terminal = list()
            self.observe_reward = list()

        self.reset()