Ejemplo n.º 1
0
    def __init__(self, environment_spec, length, initial_frame_chooser):
        """Batch of environments inside the TensorFlow graph."""

        observ_space = utils.get_observation_space(environment_spec)
        initial_frames_problem = environment_spec.initial_frames_problem
        self._frames_problem_name = str(initial_frames_problem)
        observ_shape = (initial_frames_problem.frame_height,
                        initial_frames_problem.frame_width,
                        initial_frames_problem.num_channels)
        observ_space.shape = observ_shape
        action_space = utils.get_action_space(environment_spec)
        super(SimulatedBatchEnv, self).__init__(observ_space, action_space)

        self.length = length
        self._min_reward = initial_frames_problem.min_reward
        self._num_frames = environment_spec.video_num_input_frames
        self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale

        model_hparams = trainer_lib.create_hparams(FLAGS.hparams_set,
                                                   problem_name=FLAGS.problem)
        model_hparams.force_full_predict = True
        self._model = registry.model(FLAGS.model)(
            model_hparams, tf.estimator.ModeKeys.PREDICT)

        self.history_buffer = HistoryBuffer(initial_frame_chooser, self.length,
                                            self.observ_dtype)

        self._observ = tf.Variable(tf.zeros((len(self), ) + observ_shape,
                                            self.observ_dtype),
                                   trainable=False)
Ejemplo n.º 2
0
    def __init__(self, environment_spec, length):
        """Batch of environments inside the TensorFlow graph."""

        observ_space = utils.get_observation_space(environment_spec)
        initial_frames_problem = environment_spec.initial_frames_problem
        observ_shape = (initial_frames_problem.frame_height,
                        initial_frames_problem.frame_width,
                        initial_frames_problem.num_channels)
        observ_space.shape = observ_shape
        action_space = utils.get_action_space(environment_spec)
        super(SimulatedBatchEnv, self).__init__(observ_space, action_space)

        self.length = length
        self._min_reward = initial_frames_problem.min_reward
        self._num_frames = environment_spec.video_num_input_frames
        self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale

        model_hparams = trainer_lib.create_hparams(FLAGS.hparams_set,
                                                   problem_name=FLAGS.problem)
        model_hparams.force_full_predict = True
        self._model = registry.model(FLAGS.model)(
            model_hparams, tf.estimator.ModeKeys.PREDICT)

        hparams = HParams(
            video_num_input_frames=environment_spec.video_num_input_frames,
            video_num_target_frames=environment_spec.video_num_target_frames,
            environment_spec=environment_spec)

        # TODO(piotrmilos): check if this shouldn't be tf.estimator.ModeKeys.Predict
        initial_frames_dataset = initial_frames_problem.dataset(
            tf.estimator.ModeKeys.TRAIN,
            FLAGS.data_dir,
            shuffle_files=False,
            hparams=hparams).take(1)
        start_frame = None
        if environment_spec.simulation_random_starts:
            dataset = initial_frames_problem.dataset(
                tf.estimator.ModeKeys.TRAIN,
                FLAGS.data_dir,
                shuffle_files=True,
                hparams=hparams,
                only_last=True)
            dataset = dataset.shuffle(buffer_size=1000)
            if environment_spec.simulation_flip_first_random_for_beginning:
                # Later flip the first random frame in PPO batch for the true beginning.
                start = initial_frames_dataset.make_one_shot_iterator(
                ).get_next()
                start_frame = tf.expand_dims(start["inputs"], axis=0)
        else:
            dataset = initial_frames_dataset

        dataset = dataset.map(lambda x: x["inputs"]).repeat()
        self.history_buffer = HistoryBuffer(dataset,
                                            self.length,
                                            self.observ_dtype,
                                            start_frame=start_frame)

        self._observ = tf.Variable(tf.zeros((len(self), ) + observ_shape,
                                            self.observ_dtype),
                                   trainable=False)
Ejemplo n.º 3
0
    def __init__(self, environment_spec, length):
        """Batch of environments inside the TensorFlow graph."""

        observ_space = utils.get_observation_space(environment_spec)
        initial_frames_problem = environment_spec.initial_frames_problem
        observ_shape = (initial_frames_problem.frame_height,
                        initial_frames_problem.frame_width,
                        initial_frames_problem.num_channels)
        observ_space.shape = observ_shape
        action_space = utils.get_action_space(environment_spec)
        super(SimulatedBatchEnv, self).__init__(observ_space, action_space)

        self.length = length
        self._min_reward = initial_frames_problem.min_reward
        self._num_frames = environment_spec.video_num_input_frames
        self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale

        model_hparams = trainer_lib.create_hparams(FLAGS.hparams_set,
                                                   problem_name=FLAGS.problem)
        model_hparams.force_full_predict = True
        self._model = registry.model(FLAGS.model)(
            model_hparams, tf.estimator.ModeKeys.PREDICT)

        hparams = HParams(
            video_num_input_frames=environment_spec.video_num_input_frames,
            video_num_target_frames=environment_spec.video_num_target_frames,
            environment_spec=environment_spec)

        if environment_spec.simulation_random_starts:
            dataset = initial_frames_problem.dataset(
                tf.estimator.ModeKeys.TRAIN,
                FLAGS.data_dir,
                shuffle_files=True,
                hparams=hparams)
            dataset = dataset.shuffle(buffer_size=1000)
        else:
            dataset = initial_frames_problem.dataset(
                tf.estimator.ModeKeys.TRAIN,
                FLAGS.data_dir,
                shuffle_files=False,
                hparams=hparams).take(1)

        dataset = dataset.map(lambda x: x["inputs"]).repeat()
        self.history_buffer = HistoryBuffer(dataset, self.length,
                                            self.observ_dtype)

        self._observ = tf.Variable(tf.zeros((len(self), ) + observ_shape,
                                            self.observ_dtype),
                                   trainable=False)