def _set_up_staging(self, transition): """Sets up staging ops for prefetching the next transition. This allows us to hide the py_func latency. To do so we use a staging area to pre-fetch the next batch of transitions. Args: transition: tuple of tf.Tensors with shape memory.get_transition_elements(). Returns: prefetched_transition: tuple of tf.Tensors with shape memory.get_transition_elements() that have been previously prefetched. """ transition_type = self.memory.get_transition_elements() # Create the staging area in CPU. prefetch_area = contrib_staging.StagingArea( [shape_with_type.type for shape_with_type in transition_type]) # Store prefetch op for tests, but keep it private -- users should not be # calling _prefetch_batch. self._prefetch_batch = prefetch_area.put(transition) initial_prefetch = tf.cond( tf.equal(prefetch_area.size(), 0), lambda: prefetch_area.put(transition), tf.no_op) # Every time a transition is sampled self.prefetch_batch will be # called. If the staging area is empty, two put ops will be called. with tf.control_dependencies([self._prefetch_batch, initial_prefetch]): prefetched_transition = prefetch_area.get() return prefetched_transition
def _create_staging_area(self, tensors): names, dtypes, shapes = [], [], [] for name, tensor in tensors.items(): dtypes.append(tensor.dtype) shapes.append(tensor.shape) names.append(name) return tf_staging.StagingArea(dtypes=dtypes, shapes=shapes, names=names)
def _create_staging_area(self, tensors): return tf_staging.StagingArea( dtypes=[tensor.dtype for tensor in tensors], shapes=[tensor.shape for tensor in tensors])
def train(action_set, level_names): """Train.""" if is_single_machine(): local_job_device = '' shared_job_device = '' is_actor_fn = lambda i: True is_learner = True global_variable_device = '/gpu' server = tf.train.Server.create_local_server() server_target = FLAGS.master filters = [] else: local_job_device = '/job:%s/task:%d' % (FLAGS.job_name, FLAGS.task) shared_job_device = '/job:learner/task:0' is_actor_fn = lambda i: FLAGS.job_name == 'actor' and i == FLAGS.task is_learner = FLAGS.job_name == 'learner' # Placing the variable on CPU, makes it cheaper to send it to all the # actors. Continual copying the variables from the GPU is slow. global_variable_device = shared_job_device + '/cpu' cluster = tf.train.ClusterSpec({ 'actor': ['localhost:%d' % (8001 + i) for i in range(FLAGS.num_actors)], 'learner': ['localhost:8000'] }) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task) server_target = server.target filters = [shared_job_device, local_job_device] # Only used to find the actor output structure. with tf.Graph().as_default(): agent = create_agent(len(action_set)) env = create_environment(level_names[0], seed=1) structure = build_actor(agent, env, level_names[0], action_set) flattened_structure = nest.flatten(structure) dtypes = [t.dtype for t in flattened_structure] shapes = [t.shape.as_list() for t in flattened_structure] with tf.Graph().as_default(), \ tf.device(local_job_device + '/cpu'), \ pin_global_variables(global_variable_device): tf.set_random_seed(FLAGS.seed) # Makes initialization deterministic. # Create Queue and Agent on the learner. with tf.device(shared_job_device): queue = tf.FIFOQueue(1, dtypes, shapes, shared_name='buffer') agent = create_agent(len(action_set)) # Build actors and ops to enqueue their output. enqueue_ops = [] for i in range(FLAGS.num_actors): if is_actor_fn(i): level_name = level_names[i % len(level_names)] tf.logging.info('Creating actor %d with level %s', i, level_name) env = create_environment(level_name, seed=i + 1) actor_output = build_actor(agent, env, level_name, action_set) with tf.device(shared_job_device): enqueue_ops.append(queue.enqueue(nest.flatten(actor_output))) # If running in a single machine setup, run actors with QueueRunners # (separate threads). if is_learner and enqueue_ops: tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops)) # Build learner. if is_learner: # Create global step, which is the number of environment frames processed. tf.get_variable( 'num_environment_frames', initializer=tf.zeros_initializer(), shape=[], dtype=tf.int64, trainable=False, collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES]) # Create batch (time major) and recreate structure. dequeued = queue.dequeue_many(FLAGS.batch_size) dequeued = nest.pack_sequence_as(structure, dequeued) def make_time_major(s): return nest.map_structure( lambda t: tf.transpose(t, [1, 0] + list(range(t.shape.ndims))[2:]), s) dequeued = dequeued._replace( env_outputs=make_time_major(dequeued.env_outputs), agent_outputs=make_time_major(dequeued.agent_outputs)) with tf.device('/gpu'): # Using StagingArea allows us to prepare the next batch and send it to # the GPU while we're performing a training step. This adds up to 1 step # policy lag. flattened_output = nest.flatten(dequeued) area = contrib_staging.StagingArea([t.dtype for t in flattened_output], [t.shape for t in flattened_output]) stage_op = area.put(flattened_output) data_from_actors = nest.pack_sequence_as(structure, area.get()) # Unroll agent on sequence, create losses and update ops. output = build_learner(agent, data_from_actors.agent_state, data_from_actors.env_outputs, data_from_actors.agent_outputs) # Create MonitoredSession (to run the graph, checkpoint and log). tf.logging.info('Creating MonitoredSession, is_chief %s', is_learner) # config = tf.ConfigProto(allow_soft_placement=True) config = tf.ConfigProto(allow_soft_placement=True, device_filters=filters) with tf.train.MonitoredTrainingSession( server_target, is_chief=is_learner, checkpoint_dir=FLAGS.logdir, save_checkpoint_secs=600, save_summaries_secs=30, log_step_count_steps=50000, config=config, hooks=[py_process.PyProcessHook()]) as session: if is_learner: tf.logging.info('is_learner') # Logging. level_returns = {level_name: [] for level_name in level_names} summary_writer = tf.summary.FileWriterCache.get(FLAGS.logdir) # Prepare data for first run. session.run_step_fn( lambda step_context: step_context.session.run(stage_op)) # Execute learning and track performance. num_env_frames_v = 0 while num_env_frames_v < FLAGS.total_environment_frames: tf.logging.info(num_env_frames_v) level_names_v, done_v, infos_v, num_env_frames_v, _ = session.run( (data_from_actors.level_name,) + output + (stage_op,)) level_names_v = np.repeat([level_names_v], done_v.shape[0], 0) for level_name, episode_return, episode_step in zip( level_names_v[done_v], infos_v.episode_return[done_v], infos_v.episode_step[done_v]): episode_frames = episode_step tf.logging.info('Level: %s Episode return: %f', level_name, episode_return) summary = tf.summary.Summary() summary.value.add(tag=level_name + '/episode_return', simple_value=episode_return) summary.value.add(tag=level_name + '/episode_frames', simple_value=episode_frames) summary_writer.add_summary(summary, num_env_frames_v) else: tf.logging.info('actor') # Execute actors (they just need to enqueue their output). while True: session.run(enqueue_ops)