Example #1
0
    def make_training_input():
        with tf.variable_scope("training_input_preprocessing"):
            # もとのデータの先頭に余計なものがひっついてくる
            # return (idxes, weights) + tuple(components)
            transition = replay_buffer.sample_proportional_from_buffer(
                batch_size,
                prioritized_replay_beta0,
                minimum_sample_size=learning_starts)

            # GPU because in our SKU the CPUs were the bottleneck
            with tf.device('/gpu:1'):
                idxes, weights, actor_num, transition_action, transition_reward, transition_done = transition[:
                                                                                                              6]
                # (observations, dones)
                frames = transition[6:]
                assert len(frames) == (framestack + multi_step_n) * 2

                # Handle edge cases (done = True)
                frames, dones = frames[:framestack +
                                       multi_step_n], frames[framestack +
                                                             multi_step_n:]
                # 観測値 (もっとも古いstatus)
                obs_t = make_masked_frame(frames[:framestack],
                                          dones[:framestack], data_format)
                # もっとも新しい観測値(multistepだけあと)
                obs_tp1 = make_masked_frame(frames[-framestack:],
                                            dones[-framestack:], data_format)

                return actor_num, obs_t, transition_action, transition_reward, obs_tp1, transition_done, weights, idxes
Example #2
0
    def step(self, action, indices=None, name=None):
        assert indices is None
        sliced_act_obs = self.env.observation(indices)
        if self.data_format == 'NCHW':
            sliced_act_obs = tf.transpose(sliced_act_obs, (0, 3, 1, 2))

        sliced_act_obs = tf.image.convert_image_dtype(sliced_act_obs, tf.uint8)
        assert sliced_act_obs.dtype == tf.uint8

        with tf.device('/cpu:0'):
            _, recent_obs_done = self.buffer.encode_history()

            observations, dones = zip(
                *recent_obs_done[1 - self.num_stacked_frames:])
            observations += (sliced_act_obs, )
            dones += (None, )

        obs = make_masked_frame(observations, dones, self.data_format)
        with tf.control_dependencies([sliced_act_obs]):
            rew, done = self.env.step(action=action,
                                      indices=indices,
                                      name=name)
            update_recent_history = self.buffer.enqueue([sliced_act_obs, done])

            with tf.control_dependencies([update_recent_history[0].op]):
                return tf.identity(rew), tf.identity(done)
Example #3
0
    def make_training_input():
        with tf.variable_scope("training_input_preprocessing"):
            transition = replay_buffer.sample_proportional_from_buffer(
                batch_size,
                prioritized_replay_beta,
                minimum_sample_size=learning_starts)

            # GPU because in our SKU the CPUs were the bottleneck
            with tf.device('/gpu:1'):
                idxes, weights, actor_num, transition_action, transition_reward, transition_done = transition[:
                                                                                                              6]
                frames = transition[6:]
                assert len(frames) == (framestack + multi_step_n) * 2

                # Handle edge cases (done = True)
                frames, dones = frames[:framestack +
                                       multi_step_n], frames[framestack +
                                                             multi_step_n:]
                obs_t = make_masked_frame(frames[:framestack],
                                          dones[:framestack], data_format)
                obs_tp1 = make_masked_frame(frames[-framestack:],
                                            dones[-framestack:], data_format)

                return actor_num, obs_t, transition_action, transition_reward, obs_tp1, transition_done, weights, idxes
Example #4
0
    def observation(self, indices=None, reset=False, name=None):
        assert indices is None
        obs = self.env.observation(indices)
        if self.data_format == 'NCHW':
            obs = tf.transpose(obs, (0, 3, 1, 2))

        with tf.device('/cpu:0'):
            _, recent_obs_done = self.buffer.encode_history()

            observations, dones = zip(
                *recent_obs_done[1 - self.num_stacked_frames:])
            observations += (obs, )
            dones += (None, )

        return make_masked_frame(observations, dones, self.data_format)