def generate_samples(self, data_dir, tmp_dir, unused_dataset_split): self._setup() self.debug_dump_frames_path = os.path.join( data_dir, self.debug_dump_frames_path) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) self.restore_networks(sess) pieces_generated = 0 while pieces_generated < self.num_steps + self.warm_up: avilable_data_size = sess.run(self.avilable_data_size_op) if avilable_data_size < 1: sess.run(self.collect_trigger_op) observ, reward, action, done = sess.run(self.data_get_op) debug_im = None if self.make_extra_debug_info: self.total_sim_reward += reward if not FLAGS.autoencoder_path: err = np.ndarray.astype(np.maximum(np.abs( self.real_ob - observ, dtype=np.int) - 10, 0), np.uint8) debug_im_np = np.concatenate([observ, self.real_ob, err], axis=1) debug_im = gym_utils.encode_image_to_png(debug_im_np) if done: self.dones += 1 if self.total_real_reward == self.total_sim_reward: self.successful_episode_reward_predictions += 1 self.total_real_reward = 0.0 self.total_sim_reward = 0.0 self.real_reward = 0 self.real_env.reset() # Slight weirdness to make sim env and real env aligned for _ in range(self.num_input_frames): self.real_ob, _, _, _ = self.real_env.step(0) else: self.real_ob, self.real_reward, _, _ = self.real_env.step(action) self.total_real_reward += self.real_reward self.sum_of_rewards += self.real_reward if FLAGS.autoencoder_path: if self.simulated_environment: debug_im = gym_utils.encode_image_to_png( self.autodecode(observ, sess)) else: orig_observ = observ observ = self.autoencode(observ, sess) debug_im_np = np.concatenate([self.autodecode(observ, sess), orig_observ], axis=1) debug_im = gym_utils.encode_image_to_png(debug_im_np) ret_dict = {"frame": observ, "image/format": ["png"], "image/height": [self.frame_height], "image/width": [self.frame_width], "action": [int(action)], "done": [int(False)], "reward": [int(reward) - self.min_reward]} if self.make_extra_debug_info: ret_dict["image/encoded_debug"] = [debug_im] yield ret_dict pieces_generated += 1
def generate_samples(self, data_dir, tmp_dir, unused_dataset_split): self._setup() self.debug_dump_frames_path = os.path.join(data_dir, self.debug_dump_frames_path) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) self.restore_networks(sess) pieces_generated = 0 while pieces_generated < self.num_steps + self.warm_up: avilable_data_size = sess.run(self.avilable_data_size_op) if avilable_data_size < 1: sess.run(self.collect_trigger_op) observ, reward, action, done = sess.run(self.data_get_op) debug_im = None if self.make_extra_debug_info: self.total_sim_reward += reward err = np.ndarray.astype( np.maximum( np.abs(self.real_ob - observ, dtype=np.int) - 10, 0), np.uint8) debug_im_np = np.concatenate([observ, self.real_ob, err], axis=1) debug_im = gym_utils.encode_image_to_png(debug_im_np) if done: self.dones += 1 if self.total_real_reward == self.total_sim_reward: self.successful_dones += 1 if self.dones % self.report_reward_statistics_every == 0: print("Got correct total rewards {} out of {}:". format(self.successful_dones, self.report_reward_statistics_every)) self.successful_dones = 0 self.total_real_reward = 0.0 self.total_sim_reward = 0.0 self.real_reward = 0 self.real_env.reset() # Slight weirdness to make sim env and real env aligned for _ in range(simulated_batch_env.SimulatedBatchEnv. NUMBER_OF_HISTORY_FRAMES): self.real_ob, _, _, _ = self.real_env.step(0) else: self.real_ob, self.real_reward, _, _ = self.real_env.step( action) self.total_real_reward += self.real_reward if FLAGS.autoencoder_path: observ = self.autoencode(observ, sess) ret_dict = { "frame": observ, "image/format": ["png"], "image/height": [self.frame_height], "image/width": [self.frame_width], "action": [int(action)], "done": [int(False)], "reward": [int(reward) - self.min_reward] } if self.make_extra_debug_info: ret_dict["image/encoded_debug"] = [debug_im] yield ret_dict pieces_generated += 1