def make_training_input(): with tf.variable_scope("training_input_preprocessing"): # もとのデータの先頭に余計なものがひっついてくる # return (idxes, weights) + tuple(components) transition = replay_buffer.sample_proportional_from_buffer( batch_size, prioritized_replay_beta0, minimum_sample_size=learning_starts) # GPU because in our SKU the CPUs were the bottleneck with tf.device('/gpu:1'): idxes, weights, actor_num, transition_action, transition_reward, transition_done = transition[: 6] # (observations, dones) frames = transition[6:] assert len(frames) == (framestack + multi_step_n) * 2 # Handle edge cases (done = True) frames, dones = frames[:framestack + multi_step_n], frames[framestack + multi_step_n:] # 観測値 (もっとも古いstatus) obs_t = make_masked_frame(frames[:framestack], dones[:framestack], data_format) # もっとも新しい観測値(multistepだけあと) obs_tp1 = make_masked_frame(frames[-framestack:], dones[-framestack:], data_format) return actor_num, obs_t, transition_action, transition_reward, obs_tp1, transition_done, weights, idxes
def step(self, action, indices=None, name=None): assert indices is None sliced_act_obs = self.env.observation(indices) if self.data_format == 'NCHW': sliced_act_obs = tf.transpose(sliced_act_obs, (0, 3, 1, 2)) sliced_act_obs = tf.image.convert_image_dtype(sliced_act_obs, tf.uint8) assert sliced_act_obs.dtype == tf.uint8 with tf.device('/cpu:0'): _, recent_obs_done = self.buffer.encode_history() observations, dones = zip( *recent_obs_done[1 - self.num_stacked_frames:]) observations += (sliced_act_obs, ) dones += (None, ) obs = make_masked_frame(observations, dones, self.data_format) with tf.control_dependencies([sliced_act_obs]): rew, done = self.env.step(action=action, indices=indices, name=name) update_recent_history = self.buffer.enqueue([sliced_act_obs, done]) with tf.control_dependencies([update_recent_history[0].op]): return tf.identity(rew), tf.identity(done)
def make_training_input(): with tf.variable_scope("training_input_preprocessing"): transition = replay_buffer.sample_proportional_from_buffer( batch_size, prioritized_replay_beta, minimum_sample_size=learning_starts) # GPU because in our SKU the CPUs were the bottleneck with tf.device('/gpu:1'): idxes, weights, actor_num, transition_action, transition_reward, transition_done = transition[: 6] frames = transition[6:] assert len(frames) == (framestack + multi_step_n) * 2 # Handle edge cases (done = True) frames, dones = frames[:framestack + multi_step_n], frames[framestack + multi_step_n:] obs_t = make_masked_frame(frames[:framestack], dones[:framestack], data_format) obs_tp1 = make_masked_frame(frames[-framestack:], dones[-framestack:], data_format) return actor_num, obs_t, transition_action, transition_reward, obs_tp1, transition_done, weights, idxes
def observation(self, indices=None, reset=False, name=None): assert indices is None obs = self.env.observation(indices) if self.data_format == 'NCHW': obs = tf.transpose(obs, (0, 3, 1, 2)) with tf.device('/cpu:0'): _, recent_obs_done = self.buffer.encode_history() observations, dones = zip( *recent_obs_done[1 - self.num_stacked_frames:]) observations += (obs, ) dones += (None, ) return make_masked_frame(observations, dones, self.data_format)