コード例 #1
0
    def __init__(
        # Environment
        self, states, actions, max_episode_timesteps=None,
        # TensorFlow etc
        parallel_interactions=1, buffer_observe=True, seed=None, recorder=None
    ):
        assert hasattr(self, 'spec')

        if seed is not None:
            assert isinstance(seed, int)
            random.seed(a=seed)
            np.random.seed(seed=seed)

        # States/actions specification
        self.states_spec = util.valid_values_spec(
            values_spec=states, value_type='state', return_normalized=True
        )
        self.actions_spec = util.valid_values_spec(
            values_spec=actions, value_type='action', return_normalized=True
        )
        self.max_episode_timesteps = max_episode_timesteps

        # Check for name overlap
        for name in self.states_spec:
            if name in self.actions_spec:
                TensorforceError.collision(
                    name='name', value=name, group1='states', group2='actions'
                )

        # Parallel episodes
        if isinstance(parallel_interactions, int):
            if parallel_interactions <= 0:
                raise TensorforceError.value(
                    name='parallel_interactions', value=parallel_interactions
                )
            self.parallel_interactions = parallel_interactions
        else:
            raise TensorforceError.type(name='parallel_interactions', value=parallel_interactions)

        # Buffer observe
        if isinstance(buffer_observe, bool):
            if not buffer_observe and self.parallel_interactions > 1:
                raise TensorforceError.unexpected()
            if self.max_episode_timesteps is None and self.parallel_interactions > 1:
                raise TensorforceError.unexpected()
            if not buffer_observe:
                self.buffer_observe = 1
            elif self.max_episode_timesteps is None:
                self.buffer_observe = 100
            else:
                self.buffer_observe = self.max_episode_timesteps
        elif isinstance(buffer_observe, int):
            if buffer_observe <= 0:
                raise TensorforceError.value(name='buffer_observe', value=buffer_observe)
            if self.parallel_interactions > 1:
                raise TensorforceError.unexpected()
            if self.max_episode_timesteps is None:
                self.buffer_observe = buffer_observe
            else:
                self.buffer_observe = min(buffer_observe, self.max_episode_timesteps)
        else:
            raise TensorforceError.type(name='buffer_observe', value=buffer_observe)

        # Parallel terminal/reward buffers
        self.terminal_buffers = np.ndarray(
            shape=(self.parallel_interactions, self.buffer_observe),
            dtype=util.np_dtype(dtype='long')
        )
        self.reward_buffers = np.ndarray(
            shape=(self.parallel_interactions, self.buffer_observe),
            dtype=util.np_dtype(dtype='float')
        )

        # Parallel buffer indices
        self.buffer_indices = np.zeros(
            shape=(self.parallel_interactions,), dtype=util.np_dtype(dtype='int')
        )

        self.timesteps = 0
        self.episodes = 0
        self.updates = 0

        # Recorder
        if recorder is None:
            pass
        elif not all(key in ('directory', 'frequency', 'max-traces') for key in recorder):
            raise TensorforceError.value(name='recorder', value=list(recorder))
        self.recorder_spec = recorder
        if self.recorder_spec is not None:
            self.record_states = OrderedDict(((name, list()) for name in self.states_spec))
            for name, spec in self.actions_spec.items():
                if spec['type'] == 'int':
                    self.record_states[name + '_mask'] = list()
            self.record_actions = OrderedDict(((name, list()) for name in self.actions_spec))
            self.record_terminal = list()
            self.record_reward = list()
            self.num_episodes = 0
コード例 #2
0
ファイル: agent.py プロジェクト: vbelus/tensorforce
    def __init__(
        # Environment
        self, states, actions, max_episode_timesteps=None,
        # TensorFlow etc
        parallel_interactions=1, buffer_observe=True, seed=None, recorder=None
    ):
        assert hasattr(self, 'spec')

        if seed is not None:
            assert isinstance(seed, int)
            random.seed(a=seed)
            np.random.seed(seed=seed)

        # States/actions specification
        self.states_spec = util.valid_values_spec(
            values_spec=states, value_type='state', return_normalized=True
        )
        self.actions_spec = util.valid_values_spec(
            values_spec=actions, value_type='action', return_normalized=True
        )
        self.max_episode_timesteps = max_episode_timesteps

        # Check for name overlap
        for name in self.states_spec:
            if name in self.actions_spec:
                TensorforceError.collision(
                    name='name', value=name, group1='states', group2='actions'
                )

        # Parallel episodes
        if isinstance(parallel_interactions, int):
            if parallel_interactions <= 0:
                raise TensorforceError.value(
                    name='parallel_interactions', value=parallel_interactions
                )
            self.parallel_interactions = parallel_interactions
        else:
            raise TensorforceError.type(name='parallel_interactions', value=parallel_interactions)

        # Buffer observe
        if isinstance(buffer_observe, bool):
            if not buffer_observe and self.parallel_interactions > 1:
                raise TensorforceError.unexpected()
            if self.max_episode_timesteps is None and self.parallel_interactions > 1:
                raise TensorforceError.unexpected()
            if not buffer_observe:
                self.buffer_observe = 1
            elif self.max_episode_timesteps is None:
                self.buffer_observe = 100
            else:
                self.buffer_observe = self.max_episode_timesteps
        elif isinstance(buffer_observe, int):
            if buffer_observe <= 0:
                raise TensorforceError.value(name='buffer_observe', value=buffer_observe)
            if self.parallel_interactions > 1:
                raise TensorforceError.unexpected()
            if self.max_episode_timesteps is None:
                self.buffer_observe = buffer_observe
            else:
                self.buffer_observe = min(buffer_observe, self.max_episode_timesteps)
        else:
            raise TensorforceError.type(name='buffer_observe', value=buffer_observe)

        # Recorder
        if recorder is None:
            pass
        elif not all(key in ('directory', 'frequency', 'max-traces', 'start') for key in recorder):
            raise TensorforceError.value(name='recorder', value=list(recorder))
        self.recorder_spec = recorder if recorder is None else dict(recorder)

        self.is_initialized = False
コード例 #3
0
    def __init__(self,
                 states,
                 actions,
                 parallel_interactions=1,
                 buffer_observe=1000,
                 seed=None):
        """
        Agent constructor.

        Args:
            states (specification): States specification, arbitrarily nested dictionary of state
                descriptions with the following attributes:
                - type ('bool' | 'int' | 'float'): state data type (default: 'float').
                - shape (int | iter[int]): state shape (required).
                - num_states (int > 0): number of discrete state values (required for type 'int').
                - min_value/max_value (float): minimum/maximum state value (optional for type
                'float').
            actions (specification): Actions specification, arbitrarily nested dictionary of action
                descriptions with the following attributes:
                - type ('bool' | 'int' | 'float'): action data type (required).
                - shape (int > 0 | iter[int > 0]): action shape (default: []).
                - num_actions (int > 0): number of discrete action values (required for type
                'int').
                - min_value/max_value (float): minimum/maximum action value (optional for type
                'float').
            parallel_interactions (int > 0): Maximum number of parallel interactions to support,
                for instance, to enable multiple parallel episodes, environments or (centrally
                controlled) agents within an environment.
            buffer_observe (int > 0): Maximum number of timesteps within an episode to buffer
                before executing internal observe operations, to reduce calls to TensorFlow for
                improved performance.
        """
        if seed is not None:
            assert isinstance(seed, int)
            random.seed(n=seed)
            np.random.seed(seed=seed)
            tf.random.set_random_seed(seed=seed)

        # States/actions specification
        self.states_spec = util.valid_values_spec(values_spec=states,
                                                  value_type='state',
                                                  return_normalized=True)
        self.actions_spec = util.valid_values_spec(values_spec=actions,
                                                   value_type='action',
                                                   return_normalized=True)

        # Check for name overlap
        for name in self.states_spec:
            if name in self.actions_spec:
                TensorforceError.collision(name='name',
                                           value=name,
                                           group1='states',
                                           group2='actions')

        # Parallel episodes
        if isinstance(parallel_interactions, int):
            if parallel_interactions <= 0:
                raise TensorforceError.value(name='parallel_interactions',
                                             value=parallel_interactions)
            self.parallel_interactions = parallel_interactions
        else:
            raise TensorforceError.type(name='parallel_interactions',
                                        value=parallel_interactions)

        # Buffer observe
        if isinstance(buffer_observe, bool):
            # if update_mode['unit'] == 'episodes':
            #     self.buffer_observe = 1000 if buffer_observe else 1
            # else:
            #     self.buffer_observe = update_mode['batch_size']
            self.buffer_observe = 1000 if buffer_observe else 1
        elif isinstance(buffer_observe, int):
            if buffer_observe <= 0:
                raise TensorforceError.value(name='buffer_observe',
                                             value=buffer_observe)
            self.buffer_observe = buffer_observe
        else:
            raise TensorforceError.type(name='buffer_observe',
                                        value=buffer_observe)

        # Parallel terminal/reward buffers
        self.terminal_buffers = np.ndarray(shape=(self.parallel_interactions,
                                                  self.buffer_observe),
                                           dtype=util.np_dtype(dtype='bool'))
        self.reward_buffers = np.ndarray(shape=(self.parallel_interactions,
                                                self.buffer_observe),
                                         dtype=util.np_dtype(dtype='float'))

        # Parallel buffer indices
        self.buffer_indices = np.zeros(shape=(self.parallel_interactions, ),
                                       dtype=util.np_dtype(dtype='int'))

        self.timestep = 0
        self.episode = 0