コード例 #1
0
    def update(self, _, a, r, s1):

        if not self._stop:

            a = double_to_single_precision(a)
            r = double_to_single_precision(r)
            s1 = double_to_single_precision(np.array(s1))
            d = double_to_single_precision(1.0)

            timestep = dm_env.transition(reward=r, observation=s1, discount=d)

            self.agent.observe(a, timestep)
            self.agent.update()

        # Log values.
        values = {
            'step': self._obs_counter,
            'action': a,
            'reward': r,
        }
        self._logger.write(values)
コード例 #2
0
ファイル: agent.py プロジェクト: GAIPS/ILU-RL
    def update(self, _, a, r, s1):

        if not self._stop:

            a = double_to_single_precision(np.array(a))
            r = double_to_single_precision(r)
            s1 = double_to_single_precision(np.array(s1))
            d = double_to_single_precision(1.0)

            timestep = dm_env.transition(reward=r, observation=s1, discount=d)

            self.agent.observe(a, timestep)
            self.agent.update()

        # Log values.
        values = {
            'step': self._obs_counter,
            'reward': r,
        }
        for i in range(self._params.num_phases):
            values[f"action_p{i}"] = a[i]
        self._logger.write(values)
コード例 #3
0
    def act(self, s):
        s = double_to_single_precision(np.array(s))

        # Make first observation.
        if self._obs_counter == 0:
            t_1 = dm_env.restart(s)
            self.agent.observe_first(t_1)

        # Select action.
        if self._stop:
            action = self.agent.deterministic_action(s)
        else:
            action = self.agent.select_action(s)

        self._obs_counter += 1

        return int(action)