Esempio n. 1
0
    def __init__(self, environment_spec, length, initial_frame_chooser):
        """Batch of environments inside the TensorFlow graph."""

        observ_space = utils.get_observation_space(environment_spec)
        initial_frames_problem = environment_spec.initial_frames_problem
        self._frames_problem_name = str(initial_frames_problem)
        observ_shape = (initial_frames_problem.frame_height,
                        initial_frames_problem.frame_width,
                        initial_frames_problem.num_channels)
        observ_space.shape = observ_shape
        action_space = utils.get_action_space(environment_spec)
        super(SimulatedBatchEnv, self).__init__(observ_space, action_space)

        self.length = length
        self._min_reward = initial_frames_problem.min_reward
        self._num_frames = environment_spec.video_num_input_frames
        self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale

        model_hparams = trainer_lib.create_hparams(FLAGS.hparams_set,
                                                   problem_name=FLAGS.problem)
        model_hparams.force_full_predict = True
        self._model = registry.model(FLAGS.model)(
            model_hparams, tf.estimator.ModeKeys.PREDICT)

        self.history_buffer = HistoryBuffer(initial_frame_chooser, self.length,
                                            self.observ_dtype)

        self._observ = tf.Variable(tf.zeros((len(self), ) + observ_shape,
                                            self.observ_dtype),
                                   trainable=False)
Esempio n. 2
0
    def __init__(self, environment_spec, length):
        """Batch of environments inside the TensorFlow graph."""

        observ_space = utils.get_observation_space(environment_spec)
        initial_frames_problem = environment_spec.initial_frames_problem
        observ_shape = (initial_frames_problem.frame_height,
                        initial_frames_problem.frame_width,
                        initial_frames_problem.num_channels)
        observ_space.shape = observ_shape
        action_space = utils.get_action_space(environment_spec)
        super(SimulatedBatchEnv, self).__init__(observ_space, action_space)

        self.length = length
        self._min_reward = initial_frames_problem.min_reward
        self._num_frames = environment_spec.video_num_input_frames
        self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale

        model_hparams = trainer_lib.create_hparams(FLAGS.hparams_set,
                                                   problem_name=FLAGS.problem)
        model_hparams.force_full_predict = True
        self._model = registry.model(FLAGS.model)(
            model_hparams, tf.estimator.ModeKeys.PREDICT)

        hparams = HParams(
            video_num_input_frames=environment_spec.video_num_input_frames,
            video_num_target_frames=environment_spec.video_num_target_frames,
            environment_spec=environment_spec)

        # TODO(piotrmilos): check if this shouldn't be tf.estimator.ModeKeys.Predict
        initial_frames_dataset = initial_frames_problem.dataset(
            tf.estimator.ModeKeys.TRAIN,
            FLAGS.data_dir,
            shuffle_files=False,
            hparams=hparams).take(1)
        start_frame = None
        if environment_spec.simulation_random_starts:
            dataset = initial_frames_problem.dataset(
                tf.estimator.ModeKeys.TRAIN,
                FLAGS.data_dir,
                shuffle_files=True,
                hparams=hparams,
                only_last=True)
            dataset = dataset.shuffle(buffer_size=1000)
            if environment_spec.simulation_flip_first_random_for_beginning:
                # Later flip the first random frame in PPO batch for the true beginning.
                start = initial_frames_dataset.make_one_shot_iterator(
                ).get_next()
                start_frame = tf.expand_dims(start["inputs"], axis=0)
        else:
            dataset = initial_frames_dataset

        dataset = dataset.map(lambda x: x["inputs"]).repeat()
        self.history_buffer = HistoryBuffer(dataset,
                                            self.length,
                                            self.observ_dtype,
                                            start_frame=start_frame)

        self._observ = tf.Variable(tf.zeros((len(self), ) + observ_shape,
                                            self.observ_dtype),
                                   trainable=False)
Esempio n. 3
0
    def __init__(self, environment_spec, length):
        """Batch of environments inside the TensorFlow graph."""

        observ_space = utils.get_observation_space(environment_spec)
        initial_frames_problem = environment_spec.initial_frames_problem
        observ_shape = (initial_frames_problem.frame_height,
                        initial_frames_problem.frame_width,
                        initial_frames_problem.num_channels)
        observ_space.shape = observ_shape
        action_space = utils.get_action_space(environment_spec)
        super(SimulatedBatchEnv, self).__init__(observ_space, action_space)

        self.length = length
        self._min_reward = initial_frames_problem.min_reward
        self._num_frames = environment_spec.video_num_input_frames
        self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale

        model_hparams = trainer_lib.create_hparams(FLAGS.hparams_set,
                                                   problem_name=FLAGS.problem)
        model_hparams.force_full_predict = True
        self._model = registry.model(FLAGS.model)(
            model_hparams, tf.estimator.ModeKeys.PREDICT)

        hparams = HParams(
            video_num_input_frames=environment_spec.video_num_input_frames,
            video_num_target_frames=environment_spec.video_num_target_frames,
            environment_spec=environment_spec)

        if environment_spec.simulation_random_starts:
            dataset = initial_frames_problem.dataset(
                tf.estimator.ModeKeys.TRAIN,
                FLAGS.data_dir,
                shuffle_files=True,
                hparams=hparams)
            dataset = dataset.shuffle(buffer_size=1000)
        else:
            dataset = initial_frames_problem.dataset(
                tf.estimator.ModeKeys.TRAIN,
                FLAGS.data_dir,
                shuffle_files=False,
                hparams=hparams).take(1)

        dataset = dataset.map(lambda x: x["inputs"]).repeat()
        self.history_buffer = HistoryBuffer(dataset, self.length,
                                            self.observ_dtype)

        self._observ = tf.Variable(tf.zeros((len(self), ) + observ_shape,
                                            self.observ_dtype),
                                   trainable=False)
  def __init__(self, hparams, length,
               simulation_random_starts=False,
               intrinsic_reward_scale=0.):
    """Batch of environments inside the TensorFlow graph."""
    self.length = length
    environment_spec = hparams.environment_spec
    initial_frames_problem = environment_spec.initial_frames_problem
    self._min_reward = initial_frames_problem.min_reward
    self._num_frames = hparams.model_hparams.video_num_input_frames
    self._intrinsic_reward_scale = intrinsic_reward_scale

    # initialization_env = environment_lambda()
    model_hparams = trainer_lib.create_hparams(
        FLAGS.hparams_set, problem_name=FLAGS.problem)
    model_hparams.force_full_predict = True
    self._model = registry.model(FLAGS.model)(
        model_hparams, tf.estimator.ModeKeys.PREDICT)

    _, self.action_shape, self.action_dtype = get_action_space(environment_spec)

    # TODO(lukaszkaiser): do this in a more cleaner way
    hparams.video_num_input_frames, hparams.video_num_target_frames = (
        hparams.model_hparams.video_num_input_frames,
        hparams.model_hparams.video_num_target_frames)

    if simulation_random_starts:
      dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                               FLAGS.data_dir,
                                               shuffle_files=True,
                                               hparams=hparams)
      dataset = dataset.shuffle(buffer_size=100)
    else:
      dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                               FLAGS.data_dir,
                                               shuffle_files=True,
                                               hparams=hparams).take(1)

    dataset = dataset.map(lambda x: x["inputs"]).repeat()
    self.history_buffer = HistoryBuffer(dataset, self.length)

    shape = (self.length, initial_frames_problem.frame_height,
             initial_frames_problem.frame_width,
             initial_frames_problem.num_channels)
    self._observ = tf.Variable(tf.zeros(shape, tf.float32), trainable=False)
  def __init__(self, environment_spec, length, other_hparams):
    """Batch of environments inside the TensorFlow graph."""
    del other_hparams
    self.length = length
    initial_frames_problem = environment_spec.initial_frames_problem
    self._min_reward = initial_frames_problem.min_reward
    self._num_frames = environment_spec.video_num_input_frames
    self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale

    model_hparams = trainer_lib.create_hparams(
        FLAGS.hparams_set, problem_name=FLAGS.problem)
    model_hparams.force_full_predict = True
    self._model = registry.model(FLAGS.model)(
        model_hparams, tf.estimator.ModeKeys.PREDICT)

    _, self.action_shape, self.action_dtype = get_action_space(environment_spec)

    hparams = HParams(video_num_input_frames=
                      environment_spec.video_num_input_frames,
                      video_num_target_frames=
                      environment_spec.video_num_target_frames,
                      environment_spec=environment_spec)

    if environment_spec.simulation_random_starts:
      dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                               FLAGS.data_dir,
                                               shuffle_files=True,
                                               hparams=hparams)
      dataset = dataset.shuffle(buffer_size=100)
    else:
      dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                               FLAGS.data_dir,
                                               shuffle_files=False,
                                               hparams=hparams).take(1)

    dataset = dataset.map(lambda x: x["inputs"]).repeat()
    self.history_buffer = HistoryBuffer(dataset, self.length)

    shape = (self.length, initial_frames_problem.frame_height,
             initial_frames_problem.frame_width,
             initial_frames_problem.num_channels)
    self._observ = tf.Variable(tf.zeros(shape, tf.float32), trainable=False)