コード例 #1
0
 def __init__(self, batch_env):
   super(AutoencoderWrapper, self).__init__(batch_env)
   self._observ = self._observ = tf.Variable(
       tf.zeros((len(self),) + self.observ_shape, self.observ_dtype),
       trainable=False)
   with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
     autoencoder_hparams = autoencoders.autoencoder_discrete_pong()
     self.autoencoder_model = autoencoders.AutoencoderOrderedDiscrete(
         autoencoder_hparams, tf.estimator.ModeKeys.EVAL)
   self.autoencoder_model.set_mode(tf.estimator.ModeKeys.EVAL)
コード例 #2
0
 def setup_autoencoder(self):
     if self.autoencoder_model is not None:
         return
     with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
         autoencoder_hparams = autoencoders.autoencoder_discrete_pong()
         autoencoder_hparams.data_dir = "unused"
         autoencoder_hparams.problem_hparams = self.get_hparams(
             autoencoder_hparams)
         autoencoder_hparams.problem = self
         self.autoencoder_model = autoencoders.AutoencoderOrderedDiscrete(
             autoencoder_hparams, tf.estimator.ModeKeys.EVAL)
コード例 #3
0
 def __init__(self, batch_env):
   super(AutoencoderWrapper, self).__init__(batch_env)
   batch_size, height, width, _ = self._batch_env.observ.get_shape().as_list()
   ae_height = int(math.ceil(height / self.autoencoder_factor))
   ae_width = int(math.ceil(width / self.autoencoder_factor))
   ae_channels = 24  # TODO(piotrmilos): make it better
   observ_shape = (batch_size, ae_height, ae_width, ae_channels)
   self._observ = self._observ = tf.Variable(
       tf.zeros(observ_shape, tf.float32), trainable=False)
   with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
     autoencoder_hparams = autoencoders.autoencoder_discrete_pong()
     self.autoencoder_model = autoencoders.AutoencoderOrderedDiscrete(
         autoencoder_hparams, tf.estimator.ModeKeys.EVAL)
   self.autoencoder_model.set_mode(tf.estimator.ModeKeys.EVAL)
コード例 #4
0
ファイル: gym.py プロジェクト: facaiy/tensor2tensor
 def autoencode(self, image, sess):
   with tf.Graph().as_default():
     hparams = autoencoders.autoencoder_discrete_pong()
     hparams.data_dir = "unused"
     hparams.problem_hparams = self.get_hparams(hparams)
     hparams.problem = self
     model = autoencoders.AutoencoderOrderedDiscrete(
         hparams, tf.estimator.ModeKeys.EVAL)
     img = tf.constant(image)
     img = tf.to_int32(tf.reshape(
         img, [1, 1, self.frame_height, self.frame_width, self.num_channels]))
     encoded = model.encode(img)
     model_saver = tf.train.Saver(tf.global_variables())
     model_saver.restore(sess, FLAGS.autoencoder_path)
     return sess.run(encoded)
コード例 #5
0
 def autoencoder_factor(self):
     """By how much to divide sizes when using autoencoders."""
     hparams = autoencoders.autoencoder_discrete_pong()
     return 2**hparams.num_hidden_layers
コード例 #6
0
    def _setup(self):
        if self.make_extra_debug_info:
            self.report_reward_statistics_every = 10
            self.dones = 0
            self.real_reward = 0
            self.real_env.reset()
            # Slight weirdness to make sim env and real env aligned
            for _ in range(simulated_batch_env.SimulatedBatchEnv.
                           NUMBER_OF_HISTORY_FRAMES):
                self.real_ob, _, _, _ = self.real_env.step(0)
            self.total_sim_reward, self.total_real_reward = 0.0, 0.0
            self.successful_episode_reward_predictions = 0

        in_graph_wrappers = self.in_graph_wrappers + [(atari.MemoryWrapper, {})
                                                      ]
        env_hparams = tf.contrib.training.HParams(
            in_graph_wrappers=in_graph_wrappers,
            simulated_environment=self.simulated_environment)

        generator_batch_env = batch_env_factory(self.environment_spec,
                                                env_hparams,
                                                num_agents=1,
                                                xvfb=False)

        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            if FLAGS.agent_policy_path:
                policy_lambda = self.collect_hparams.network
            else:
                # When no agent_policy_path is set, just generate random samples.
                policy_lambda = rl.random_policy_fun
            policy_factory = tf.make_template(
                "network",
                functools.partial(policy_lambda,
                                  self.environment_spec().action_space,
                                  self.collect_hparams),
                create_scope_now_=True,
                unique_name_="network")

        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            self.collect_hparams.epoch_length = 10
            _, self.collect_trigger_op = collect.define_collect(
                policy_factory,
                generator_batch_env,
                self.collect_hparams,
                eval_phase=False,
                scope="define_collect")

        if FLAGS.autoencoder_path:
            # TODO(lukaszkaiser): remove hard-coded autoencoder params.
            with tf.variable_scope(tf.get_variable_scope(),
                                   reuse=tf.AUTO_REUSE):
                autoencoder_hparams = autoencoders.autoencoder_discrete_pong()
                autoencoder_hparams.data_dir = "unused"
                autoencoder_hparams.problem_hparams = self.get_hparams(
                    autoencoder_hparams)
                autoencoder_hparams.problem = self
                autoencoder_model = autoencoders.AutoencoderOrderedDiscrete(
                    autoencoder_hparams, tf.estimator.ModeKeys.EVAL)
                shape = [
                    self.raw_frame_height, self.raw_frame_width,
                    self.num_channels
                ]
                self.autoencoder_feed = tf.placeholder(tf.int32, shape=shape)
                autoencoded = autoencoder_model.encode(
                    tf.reshape(self.autoencoder_feed, [1, 1] + shape))
                autoencoded = tf.reshape(autoencoded, [
                    self.frame_height, self.frame_width, self.num_channels, 8
                ])  # 8-bit groups.
                self.autoencoder_result = discretization.bit_to_int(
                    autoencoded, 8)

        self.avilable_data_size_op = atari.MemoryWrapper.singleton.speculum.size(
        )
        self.data_get_op = atari.MemoryWrapper.singleton.speculum.dequeue()