def __init__(self, environment_spec, length, initial_frame_chooser): """Batch of environments inside the TensorFlow graph.""" observ_space = utils.get_observation_space(environment_spec) initial_frames_problem = environment_spec.initial_frames_problem self._frames_problem_name = str(initial_frames_problem) observ_shape = (initial_frames_problem.frame_height, initial_frames_problem.frame_width, initial_frames_problem.num_channels) observ_space.shape = observ_shape action_space = utils.get_action_space(environment_spec) super(SimulatedBatchEnv, self).__init__(observ_space, action_space) self.length = length self._min_reward = initial_frames_problem.min_reward self._num_frames = environment_spec.video_num_input_frames self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale model_hparams = trainer_lib.create_hparams(FLAGS.hparams_set, problem_name=FLAGS.problem) model_hparams.force_full_predict = True self._model = registry.model(FLAGS.model)( model_hparams, tf.estimator.ModeKeys.PREDICT) self.history_buffer = HistoryBuffer(initial_frame_chooser, self.length, self.observ_dtype) self._observ = tf.Variable(tf.zeros((len(self), ) + observ_shape, self.observ_dtype), trainable=False)
def __init__(self, environment_spec, length): """Batch of environments inside the TensorFlow graph.""" observ_space = utils.get_observation_space(environment_spec) initial_frames_problem = environment_spec.initial_frames_problem observ_shape = (initial_frames_problem.frame_height, initial_frames_problem.frame_width, initial_frames_problem.num_channels) observ_space.shape = observ_shape action_space = utils.get_action_space(environment_spec) super(SimulatedBatchEnv, self).__init__(observ_space, action_space) self.length = length self._min_reward = initial_frames_problem.min_reward self._num_frames = environment_spec.video_num_input_frames self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale model_hparams = trainer_lib.create_hparams(FLAGS.hparams_set, problem_name=FLAGS.problem) model_hparams.force_full_predict = True self._model = registry.model(FLAGS.model)( model_hparams, tf.estimator.ModeKeys.PREDICT) hparams = HParams( video_num_input_frames=environment_spec.video_num_input_frames, video_num_target_frames=environment_spec.video_num_target_frames, environment_spec=environment_spec) # TODO(piotrmilos): check if this shouldn't be tf.estimator.ModeKeys.Predict initial_frames_dataset = initial_frames_problem.dataset( tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=False, hparams=hparams).take(1) start_frame = None if environment_spec.simulation_random_starts: dataset = initial_frames_problem.dataset( tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=True, hparams=hparams, only_last=True) dataset = dataset.shuffle(buffer_size=1000) if environment_spec.simulation_flip_first_random_for_beginning: # Later flip the first random frame in PPO batch for the true beginning. start = initial_frames_dataset.make_one_shot_iterator( ).get_next() start_frame = tf.expand_dims(start["inputs"], axis=0) else: dataset = initial_frames_dataset dataset = dataset.map(lambda x: x["inputs"]).repeat() self.history_buffer = HistoryBuffer(dataset, self.length, self.observ_dtype, start_frame=start_frame) self._observ = tf.Variable(tf.zeros((len(self), ) + observ_shape, self.observ_dtype), trainable=False)
def __init__(self, environment_spec, length): """Batch of environments inside the TensorFlow graph.""" observ_space = utils.get_observation_space(environment_spec) initial_frames_problem = environment_spec.initial_frames_problem observ_shape = (initial_frames_problem.frame_height, initial_frames_problem.frame_width, initial_frames_problem.num_channels) observ_space.shape = observ_shape action_space = utils.get_action_space(environment_spec) super(SimulatedBatchEnv, self).__init__(observ_space, action_space) self.length = length self._min_reward = initial_frames_problem.min_reward self._num_frames = environment_spec.video_num_input_frames self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale model_hparams = trainer_lib.create_hparams(FLAGS.hparams_set, problem_name=FLAGS.problem) model_hparams.force_full_predict = True self._model = registry.model(FLAGS.model)( model_hparams, tf.estimator.ModeKeys.PREDICT) hparams = HParams( video_num_input_frames=environment_spec.video_num_input_frames, video_num_target_frames=environment_spec.video_num_target_frames, environment_spec=environment_spec) if environment_spec.simulation_random_starts: dataset = initial_frames_problem.dataset( tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=True, hparams=hparams) dataset = dataset.shuffle(buffer_size=1000) else: dataset = initial_frames_problem.dataset( tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=False, hparams=hparams).take(1) dataset = dataset.map(lambda x: x["inputs"]).repeat() self.history_buffer = HistoryBuffer(dataset, self.length, self.observ_dtype) self._observ = tf.Variable(tf.zeros((len(self), ) + observ_shape, self.observ_dtype), trainable=False)
def __init__(self, hparams, length, simulation_random_starts=False, intrinsic_reward_scale=0.): """Batch of environments inside the TensorFlow graph.""" self.length = length environment_spec = hparams.environment_spec initial_frames_problem = environment_spec.initial_frames_problem self._min_reward = initial_frames_problem.min_reward self._num_frames = hparams.model_hparams.video_num_input_frames self._intrinsic_reward_scale = intrinsic_reward_scale # initialization_env = environment_lambda() model_hparams = trainer_lib.create_hparams( FLAGS.hparams_set, problem_name=FLAGS.problem) model_hparams.force_full_predict = True self._model = registry.model(FLAGS.model)( model_hparams, tf.estimator.ModeKeys.PREDICT) _, self.action_shape, self.action_dtype = get_action_space(environment_spec) # TODO(lukaszkaiser): do this in a more cleaner way hparams.video_num_input_frames, hparams.video_num_target_frames = ( hparams.model_hparams.video_num_input_frames, hparams.model_hparams.video_num_target_frames) if simulation_random_starts: dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=True, hparams=hparams) dataset = dataset.shuffle(buffer_size=100) else: dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=True, hparams=hparams).take(1) dataset = dataset.map(lambda x: x["inputs"]).repeat() self.history_buffer = HistoryBuffer(dataset, self.length) shape = (self.length, initial_frames_problem.frame_height, initial_frames_problem.frame_width, initial_frames_problem.num_channels) self._observ = tf.Variable(tf.zeros(shape, tf.float32), trainable=False)
def __init__(self, environment_spec, length, other_hparams): """Batch of environments inside the TensorFlow graph.""" del other_hparams self.length = length initial_frames_problem = environment_spec.initial_frames_problem self._min_reward = initial_frames_problem.min_reward self._num_frames = environment_spec.video_num_input_frames self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale model_hparams = trainer_lib.create_hparams( FLAGS.hparams_set, problem_name=FLAGS.problem) model_hparams.force_full_predict = True self._model = registry.model(FLAGS.model)( model_hparams, tf.estimator.ModeKeys.PREDICT) _, self.action_shape, self.action_dtype = get_action_space(environment_spec) hparams = HParams(video_num_input_frames= environment_spec.video_num_input_frames, video_num_target_frames= environment_spec.video_num_target_frames, environment_spec=environment_spec) if environment_spec.simulation_random_starts: dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=True, hparams=hparams) dataset = dataset.shuffle(buffer_size=100) else: dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=False, hparams=hparams).take(1) dataset = dataset.map(lambda x: x["inputs"]).repeat() self.history_buffer = HistoryBuffer(dataset, self.length) shape = (self.length, initial_frames_problem.frame_height, initial_frames_problem.frame_width, initial_frames_problem.num_channels) self._observ = tf.Variable(tf.zeros(shape, tf.float32), trainable=False)