コード例 #1
0
  def generate_samples(self, data_dir, tmp_dir, unused_dataset_split):
    self._setup()
    self.debug_dump_frames_path = os.path.join(
        data_dir, self.debug_dump_frames_path)

    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      self.restore_networks(sess)
      pieces_generated = 0
      while pieces_generated < self.num_steps + self.warm_up:
        avilable_data_size = sess.run(self.avilable_data_size_op)
        if avilable_data_size < 1:
          sess.run(self.collect_trigger_op)
        observ, reward, action, done = sess.run(self.data_get_op)
        debug_im = None
        if self.make_extra_debug_info:
          self.total_sim_reward += reward
          if not FLAGS.autoencoder_path:
            err = np.ndarray.astype(np.maximum(np.abs(
                self.real_ob - observ, dtype=np.int) - 10, 0),
                                    np.uint8)
            debug_im_np = np.concatenate([observ, self.real_ob, err], axis=1)
            debug_im = gym_utils.encode_image_to_png(debug_im_np)
          if done:
            self.dones += 1
            if self.total_real_reward == self.total_sim_reward:
              self.successful_episode_reward_predictions += 1

            self.total_real_reward = 0.0
            self.total_sim_reward = 0.0
            self.real_reward = 0
            self.real_env.reset()
            # Slight weirdness to make sim env and real env aligned
            for _ in range(self.num_input_frames):
              self.real_ob, _, _, _ = self.real_env.step(0)
          else:
            self.real_ob, self.real_reward, _, _ = self.real_env.step(action)
            self.total_real_reward += self.real_reward
            self.sum_of_rewards += self.real_reward
        if FLAGS.autoencoder_path:
          if self.simulated_environment:
            debug_im = gym_utils.encode_image_to_png(
                self.autodecode(observ, sess))
          else:
            orig_observ = observ
            observ = self.autoencode(observ, sess)
            debug_im_np = np.concatenate([self.autodecode(observ, sess),
                                          orig_observ], axis=1)
            debug_im = gym_utils.encode_image_to_png(debug_im_np)
        ret_dict = {"frame": observ,
                    "image/format": ["png"],
                    "image/height": [self.frame_height],
                    "image/width": [self.frame_width],
                    "action": [int(action)],
                    "done": [int(False)],
                    "reward": [int(reward) - self.min_reward]}
        if self.make_extra_debug_info:
          ret_dict["image/encoded_debug"] = [debug_im]
        yield ret_dict
        pieces_generated += 1
コード例 #2
0
    def generate_samples(self, data_dir, tmp_dir, unused_dataset_split):
        self._setup()
        self.debug_dump_frames_path = os.path.join(data_dir,
                                                   self.debug_dump_frames_path)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            self.restore_networks(sess)
            pieces_generated = 0
            while pieces_generated < self.num_steps + self.warm_up:
                avilable_data_size = sess.run(self.avilable_data_size_op)
                if avilable_data_size < 1:
                    sess.run(self.collect_trigger_op)
                observ, reward, action, done = sess.run(self.data_get_op)
                debug_im = None
                if self.make_extra_debug_info:
                    self.total_sim_reward += reward
                    err = np.ndarray.astype(
                        np.maximum(
                            np.abs(self.real_ob - observ, dtype=np.int) - 10,
                            0), np.uint8)
                    debug_im_np = np.concatenate([observ, self.real_ob, err],
                                                 axis=1)
                    debug_im = gym_utils.encode_image_to_png(debug_im_np)
                    if done:
                        self.dones += 1
                        if self.total_real_reward == self.total_sim_reward:
                            self.successful_dones += 1
                        if self.dones % self.report_reward_statistics_every == 0:
                            print("Got correct total rewards {} out of {}:".
                                  format(self.successful_dones,
                                         self.report_reward_statistics_every))
                            self.successful_dones = 0

                        self.total_real_reward = 0.0
                        self.total_sim_reward = 0.0
                        self.real_reward = 0
                        self.real_env.reset()
                        # Slight weirdness to make sim env and real env aligned
                        for _ in range(simulated_batch_env.SimulatedBatchEnv.
                                       NUMBER_OF_HISTORY_FRAMES):
                            self.real_ob, _, _, _ = self.real_env.step(0)
                    else:
                        self.real_ob, self.real_reward, _, _ = self.real_env.step(
                            action)
                        self.total_real_reward += self.real_reward
                if FLAGS.autoencoder_path:
                    observ = self.autoencode(observ, sess)
                ret_dict = {
                    "frame": observ,
                    "image/format": ["png"],
                    "image/height": [self.frame_height],
                    "image/width": [self.frame_width],
                    "action": [int(action)],
                    "done": [int(False)],
                    "reward": [int(reward) - self.min_reward]
                }
                if self.make_extra_debug_info:
                    ret_dict["image/encoded_debug"] = [debug_im]
                yield ret_dict
                pieces_generated += 1