コード例 #1
0
ファイル: metric_utils.py プロジェクト: yrbahn/agents
def compute_summaries(metrics,
                      environment,
                      policy,
                      num_episodes=1,
                      global_step=None,
                      tf_summaries=True,
                      log=False,
                      callback=None):
    """Compute metrics using `policy` on the `environment` and logs summaries.

  Args:
    metrics: List of metrics to compute.
    environment: py_environment instance.
    policy: py_policy instance used to step the environment. A tf_policy can be
      used in_eager_mode.
    num_episodes: Number of episodes to compute the metrics over.
    global_step: An optional global step for summaries.
    tf_summaries: If True, write TF summaries for each computed metric.
    log: If True, log computed metrics.
    callback: If provided, this function is called with (computed_metrics,
      global_step).

  Returns:
    A dictionary of results {metric_name: metric_value}
  """
    results = compute(metrics, environment, policy, num_episodes)
    if tf_summaries:
        py_metric.run_summaries(metrics)
    if log:
        log_metrics(metrics, prefix='Step = {}'.format(global_step))
    if callback is not None:
        callback(results, global_step)
    return results
コード例 #2
0
  def testSummaryMultipleStepMetrics(self):
    with tf.contrib.summary.always_record_summaries():
      self.metric1.tf_summaries(step_metrics=(self.metric2, self.metric3))
      self.metric2.tf_summaries(step_metrics=(self.metric2, self.metric3))
      self.metric3.tf_summaries(step_metrics=(self.metric2, self.metric3))

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      tf.contrib.summary.initialize(session=sess)
      self.metric1.value = 1
      self.metric2.value = 2
      self.metric3.value = 3
      py_metric.run_summaries([self.metric1, self.metric2, self.metric3])
      sess.run(self.flush_op)

    records = self.get_records()

    # (3 records for metric1, 2 for metric2, 2 for metric3) + 1 file header
    self.assertEqual(8, len(records))

    self.assert_summary_equals(records, 'Metric1', 0, 1)
    self.assert_summary_equals(records, 'vs_Metric2/Metric1', 2, 1)
    self.assert_summary_equals(records, 'vs_Metric3/Metric1', 3, 1)

    self.assert_summary_equals(records, 'Metric2', 0, 2)
    self.assert_summary_equals(records, 'vs_Metric3/Metric2', 3, 2)

    self.assert_summary_equals(records, 'Metric3', 0, 3)
    self.assert_summary_equals(records, 'vs_Metric2/Metric3', 2, 3)
コード例 #3
0
  def testSummaryUpdates(self):
    with tf.contrib.summary.always_record_summaries():
      self.metric1.tf_summaries()
      self.metric2.tf_summaries()

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      tf.contrib.summary.initialize(session=sess)
      self.metric1.value = 3
      self.metric2.value = 0
      py_metric.run_summaries([self.metric1, self.metric2])
      sess.run(self.incr_global_step)
      self.metric1.value = 5
      self.metric2.value = 2
      py_metric.run_summaries([self.metric1, self.metric2])
      sess.run(self.flush_op)

    records = self.get_records()

    # 4 summaries + 1 file header
    self.assertEqual(5, len(records))

    self.assert_summary_equals(records, 'Metric1', 0, 3)
    self.assert_summary_equals(records, 'Metric2', 0, 0)
    self.assert_summary_equals(records, 'Metric1', 1, 5)
    self.assert_summary_equals(records, 'Metric2', 1, 2)
コード例 #4
0
    def testSummaryUpdates(self):
        if tf.executing_eagerly():
            self.skipTest('b/123881100')
        with tf.compat.v2.summary.record_if(True):
            self.metric1.tf_summaries(train_step=self.global_step)
            self.metric2.tf_summaries(train_step=self.global_step)

        with self.cached_session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            sess.run(self.writer.init())
            self.metric1.value = 3
            self.metric2.value = 0
            py_metric.run_summaries([self.metric1, self.metric2])
            sess.run(self.incr_global_step)
            self.metric1.value = 5
            self.metric2.value = 2
            py_metric.run_summaries([self.metric1, self.metric2])
            sess.run(self.writer.flush())

        records = self.get_records()

        # 4 summaries + 1 file header
        self.assertEqual(5, len(records))

        self.assert_summary_equals(records, 'Metrics/Metric1', 0, 3)
        self.assert_summary_equals(records, 'Metrics/Metric2', 0, 0)
        self.assert_summary_equals(records, 'Metrics/Metric1', 1, 5)
        self.assert_summary_equals(records, 'Metrics/Metric2', 1, 2)
コード例 #5
0
    def testSummaryStepMetricsUpdate(self):
        if tf.executing_eagerly():
            self.skipTest('b/123881100')
        with tf.compat.v2.summary.record_if(True):
            self.metric1.tf_summaries(train_step=self.global_step,
                                      step_metrics=(self.metric2, ))
            self.metric2.tf_summaries(train_step=self.global_step,
                                      step_metrics=(self.metric2, ))

        with self.cached_session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            sess.run(self.writer.init())
            self.metric1.value = 3
            self.metric2.value = 2
            py_metric.run_summaries([self.metric1, self.metric2])
            self.metric1.value = 4
            self.metric2.value = 3
            py_metric.run_summaries([self.metric1, self.metric2])
            sess.run(self.writer.flush())

        records = self.get_records()

        # (2 records for metric1, 1 for metric2) * 2 + 1 file header
        self.assertEqual(7, len(records))

        self.assert_summary_equals(records, 'Metrics_vs_Metric2/Metric1', 2, 3)
        self.assert_summary_equals(records, 'Metrics_vs_Metric2/Metric1', 3, 4)
コード例 #6
0
        def run(self):
            """Execute the train/eval loop."""
            with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(
                    allow_soft_placement=True)) as sess:
                # Initialize the graph.
                self._initialize_graph(sess)

                # Initial collect
                self._initial_collect()

                while self._iteration_metric.result() < self._num_iterations:
                    # Train phase
                    env_steps = 0
                    for metric in self._train_phase_metrics:
                        metric.reset()
                    while env_steps < self._train_steps_per_iteration:
                        env_steps += self._run_episode(
                            sess,
                            self._train_metrics + self._train_phase_metrics,
                            train=True)
                    for metric in self._train_phase_metrics:
                        log_metric(metric, prefix='Train/Metrics')
                    py_metric.run_summaries(self._train_phase_metrics +
                                            [self._iteration_metric])

                    global_step_val = sess.run(self._global_step)

                    if self._do_eval:
                        # Eval phase
                        env_steps = 0
                        for metric in self._eval_metrics:
                            metric.reset()
                        while env_steps < self._eval_steps_per_iteration:
                            env_steps += self._run_episode(sess,
                                                           self._eval_metrics,
                                                           train=False)

                        py_metric.run_summaries(self._eval_metrics +
                                                [self._iteration_metric])
                        if self._eval_metrics_callback:
                            results = dict((metric.name, metric.result())
                                           for metric in self._eval_metrics)
                            self._eval_metrics_callback(
                                results, global_step_val)
                        for metric in self._eval_metrics:
                            log_metric(metric, prefix='Eval/Metrics')

                    self._iteration_metric()

                    self._train_checkpointer.save(global_step=global_step_val)
                    self._policy_checkpointer.save(global_step=global_step_val)
                    self._rb_checkpointer.save(global_step=global_step_val)

                    export_dir = os.path.join(
                        self._train_dir, 'saved_policy',
                        'step_' + ('%d' % global_step_val).zfill(8))
                    self._policy_exporter.save(export_dir)
                    common.save_spec(
                        self._collect_policy.trajectory_spec,
                        os.path.join(export_dir, 'trajectory_spec'))
コード例 #7
0
  def testSummaryMultipleStepMetrics(self):
    if tf.executing_eagerly():
      self.skipTest('b/123881100')
    with tf.compat.v2.summary.record_if(True):
      self.metric1.tf_summaries(step_metrics=(self.metric2, self.metric3))
      self.metric2.tf_summaries(step_metrics=(self.metric2, self.metric3))
      self.metric3.tf_summaries(step_metrics=(self.metric2, self.metric3))

    with self.cached_session() as sess:
      sess.run(tf.compat.v1.global_variables_initializer())
      tf.contrib.summary.initialize(session=sess)
      self.metric1.value = 1
      self.metric2.value = 2
      self.metric3.value = 3
      py_metric.run_summaries([self.metric1, self.metric2, self.metric3])
      sess.run(tf.contrib.summary.flush())

    records = self.get_records()

    # (3 records for metric1, 2 for metric2, 2 for metric3) + 1 file header
    self.assertEqual(8, len(records))

    self.assert_summary_equals(records, 'Metrics/Metric1', 0, 1)
    self.assert_summary_equals(records, 'Metrics_vs_Metric2/Metric1', 2, 1)
    self.assert_summary_equals(records, 'Metrics_vs_Metric3/Metric1', 3, 1)

    self.assert_summary_equals(records, 'Metrics/Metric2', 0, 2)
    self.assert_summary_equals(records, 'Metrics_vs_Metric3/Metric2', 3, 2)

    self.assert_summary_equals(records, 'Metrics/Metric3', 0, 3)
    self.assert_summary_equals(records, 'Metrics_vs_Metric2/Metric3', 2, 3)
コード例 #8
0
    def run(self):
        """Execute the train/eval loop."""
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            # Initialize the graph.
            self._initialize_graph(sess)

            # Initial collect
            self._initial_collect()

            while self._iteration_metric.result() < self._num_iterations:
                # Train phase
                env_steps = 0
                for metric in self._train_phase_metrics:
                    metric.reset()
                while env_steps < self._train_steps_per_iteration:
                    env_steps += self._run_episode(sess,
                                                   self._train_metrics +
                                                   self._train_phase_metrics,
                                                   train=True)
                for metric in self._train_phase_metrics:
                    log_metric(metric, prefix='Train/Metrics')
                py_metric.run_summaries(self._train_phase_metrics +
                                        [self._iteration_metric])

                global_step_val = sess.run(self._global_step)

                if self._do_eval:
                    # Eval phase
                    env_steps = 0
                    for metric in self._eval_metrics:
                        metric.reset()
                    while env_steps < self._eval_steps_per_iteration:
                        env_steps += self._run_episode(sess,
                                                       self._eval_metrics,
                                                       train=False)

                    py_metric.run_summaries(self._eval_metrics +
                                            [self._iteration_metric])
                    if self._eval_metrics_callback:
                        results = dict((metric.name, metric.result())
                                       for metric in self._eval_metrics)
                        self._eval_metrics_callback(results, global_step_val)
                    for metric in self._eval_metrics:
                        log_metric(metric, prefix='Eval/Metrics')

                self._iteration_metric()

                self._train_checkpointer.save(global_step=global_step_val)
                self._policy_checkpointer.save(global_step=global_step_val)
                self._rb_checkpointer.save(global_step=global_step_val)
コード例 #9
0
ファイル: py_metric_test.py プロジェクト: lyricslee/agents
    def testSummarySimple(self):
        with tf.contrib.summary.always_record_summaries():
            self.metric1.tf_summaries()
            self.metric2.tf_summaries()

        with self.cached_session() as sess:
            sess.run(tf.global_variables_initializer())
            tf.contrib.summary.initialize(session=sess)
            self.metric1.value = 3
            py_metric.run_summaries([self.metric1, self.metric2])
            sess.run(self.flush_op)

        records = self.get_records()

        # 2 summaries + 1 file header
        self.assertEqual(3, len(records))

        self.assert_summary_equals(records, 'Metric1', 0, 3)
        self.assert_summary_equals(records, 'Metric2', 0, 0)
コード例 #10
0
  def testSummarySimple(self):
    if tf.executing_eagerly():
      self.skipTest('b/123881100')
    with tf.compat.v2.summary.record_if(True):
      self.metric1.tf_summaries()
      self.metric2.tf_summaries()

    with self.cached_session() as sess:
      sess.run(tf.compat.v1.global_variables_initializer())
      tf.contrib.summary.initialize(session=sess)
      self.metric1.value = 3
      py_metric.run_summaries([self.metric1, self.metric2])
      sess.run(tf.contrib.summary.flush())

    records = self.get_records()

    # 2 summaries + 1 file header
    self.assertEqual(3, len(records))

    self.assert_summary_equals(records, 'Metrics/Metric1', 0, 3)
    self.assert_summary_equals(records, 'Metrics/Metric2', 0, 0)
コード例 #11
0
  def testSummaryStepMetrics(self):
    with tf.contrib.summary.always_record_summaries():
      self.metric1.tf_summaries(step_metrics=(self.metric2,))
      self.metric2.tf_summaries(step_metrics=(self.metric2,))

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      tf.contrib.summary.initialize(session=sess)
      self.metric1.value = 3
      self.metric2.value = 2
      py_metric.run_summaries([self.metric1, self.metric2])
      sess.run(self.flush_op)

    records = self.get_records()

    # (2 records for metric1, 1 for metric2) + 1 file header
    self.assertEqual(4, len(records))

    self.assert_summary_equals(records, 'Metric1', 0, 3)
    self.assert_summary_equals(records, 'vs_Metric2/Metric1', 2, 3)

    self.assert_summary_equals(records, 'Metric2', 0, 2)
コード例 #12
0
def perform_eval_and_summaries(metrics,
                               env,
                               policy,
                               num_episodes=1,
                               num_episodes_to_render=1,
                               images_ph=None,
                               images_summary=None,
                               render_images_summary=None,
                               eval_tasks=None,
                               model_input='image'):
    """
  Collect num_episodes on env, using policy
  Render num_episodes_to_render of those episodes
  eval_tasks: task list from which to reset the task for each episode
  Run summaries on the images

  NOTE: this function is an episode version (not trials)
  So the latent variable is reset after each episode.
  And task is reset for each episode.
  """

    ###########
    # init
    ###########

    for metric in metrics:
        metric.reset()
    if num_episodes_to_render:
        env.start_rendering()

    #####################
    # reset env + task
    #####################

    if eval_tasks is not None:
        # can repeat, since might be evaluating more episodes than # of eval tasks
        which_tasks = np.random.choice(len(eval_tasks), num_episodes)
        env.set_task_for_env(eval_tasks[which_tasks[0]])
    time_step = env.reset()

    #####################
    # init images holder
    #####################

    render_images = []
    if num_episodes_to_render and 'pixels' in time_step.observation:
        images = [[time_step.observation['pixels']]]
    else:
        images = []

    #################################
    # collect num_episodes rollouts
    #################################

    step = 0
    episode = 0
    policy_state_initial = policy.get_initial_state(env.batch_size)
    policy_state = policy.get_initial_state(env.batch_size)

    while episode < num_episodes:

        # at beginning of each episode
        if step > 0 and time_step.is_first():

            # reset task (to a random one, from the list)
            # Note: policy_state (i.e. latent z) gets automatically reset
            if eval_tasks is not None:
                task_idx = np.minimum(episode, len(which_tasks) - 1)
                task = which_tasks[task_idx]
                env.set_task_for_env(eval_tasks[task])

        # get action
        action_step = policy.action(time_step, policy_state)

        # take step
        next_time_step = env.step(action_step.action)

        # run metrics on the step
        traj = trajectory.from_transition(time_step, action_step,
                                          next_time_step)
        for observer in metrics:
            observer(traj)

        # render
        if episode < num_episodes_to_render:
            if traj.is_last():
                render_images.append(list(env.frames))
                env.frames[:] = []
                if episode + 1 >= num_episodes_to_render:
                    env.stop_rendering()

            if 'pixels' in time_step.observation:
                if traj.is_boundary():
                    images.append([])
                images[-1].append(next_time_step.observation['pixels'])

        # +1 episode if transition is end of a rollout
        if traj.is_last():
            # add to episode counter
            episode += 1

        # +1 step if it's not a boundary transition
        step += np.sum(~traj.is_boundary())

        time_step = next_time_step
        policy_state = action_step.state

    py_metric.run_summaries(metrics)

    if render_images:
        render_images = pad_and_concatenate_videos(render_images)
        session = tf.compat.v1.get_default_session()
        session.run(render_images_summary,
                    feed_dict={images_ph: [render_images]})
    if images and not (model_input == 'state'):
        images = pad_and_concatenate_videos(images)
        session = tf.compat.v1.get_default_session()
        session.run(images_summary, feed_dict={images_ph: [images]})
コード例 #13
0
def perform_eval_and_summaries_meld(
        eval_metrics,
        env,
        policy,
        num_trials,
        max_episode_len,
        episodes_per_trial,
        log_image_strips=False,
        num_trials_to_render=1,
        num_trials_to_render_overwrite=None,
        eval_tasks=None,
        model_input='image',
        latent1_size=None,
        latent2_size=None,
        logger=None,
        log_image_observations=False,
        global_step_val=0,
        render_fps=10,
        decode_rews_op=None,
        latent_samples_1_ph=None,
        latent_samples_2_ph=None,
        eval_doing_second_agent=False,
        dont_plot_reward_gifs=True,  ## dont plot, be default
):
    """
  Collect num_trials trials on env, using policy
  Where each trial is episodes_per_trial episodes
  And each episode is of length max_episode_len
  Render all episodes from num_trials_to_render trials

  NOTE: At the beginning of each trial,
  We reset the latent variable,
  and reset the task randomly from eval_tasks.
  """

    ###########
    # init
    ###########

    session = tf.compat.v1.get_default_session()

    if episodes_per_trial > 1:
        num_trials_to_render = 1  #this is what will look good, because we visualize each episode from a single trial

    if num_trials_to_render_overwrite is not None:
        num_trials_to_render = num_trials_to_render_overwrite  #can overwrite this during offline eval

    num_metrics_per_episode = int(len(eval_metrics) / episodes_per_trial)

    for metric in eval_metrics:
        metric.reset()
    rendering = False
    if num_trials_to_render:
        rendering = True
        env.start_rendering()

    #####################
    # select random tasks
    #####################

    if num_trials <= len(eval_tasks):
        selected_tasks_indices = np.random.choice(len(eval_tasks),
                                                  num_trials,
                                                  replace=False)
    else:
        selected_tasks_indices = np.random.choice(len(eval_tasks),
                                                  num_trials,
                                                  replace=True)

    #####################
    # reset env
    #####################

    time_step = env.reset()

    #####################
    # init images holder
    #####################

    render_images = []
    if num_trials_to_render and 'pixels' in time_step.observation:
        images = [[]]
        variance_plots = []
    else:
        images = []
        variance_plots = []
    fig_list = []

    #####################
    # collect rollouts
    #####################

    for trial, task_idx in zip(range(num_trials), selected_tasks_indices):

        # set the task at beginning of each trial
        env.set_task_for_env(eval_tasks[task_idx])
        env.frames[:] = []  # clear buffer
        time_step = env.reset()

        # explicitly get the "reset" policy_state to feed into policy
        # to serve as a manual reset of the latent
        policy_state = policy.get_initial_state(env.batch_size)

        if rendering:
            if 'pixels' in time_step.observation:
                # print("BEGINNING OF TRIAL... ADDED...")
                images[-1].append(
                    time_step.observation['pixels'])  # first frame in a trial

        for episode in range(episodes_per_trial):
            # print('  episode ', episode, '/', episodes_per_trial)

            # init for plotting
            z1_stds, z2_stds, z1_means, z2_means = [], [], [], []
            rewards_dense, rewards_sparse = [], []
            mean_of_means, std_of_means, mean_of_vars, std_of_vars = [], [], [], []

            for step in range(max_episode_len):

                # print('      step ', step, '/', max_episode_len, ' steptype: ', time_step.step_type)

                # at beginning of each rollout, prevent latent from resetting
                # except for episode==0 (first episode of a trial)
                if step == 0 and episode > 0:
                    # policy shall not see step type as first
                    # thus, it will not reset the latent in its policy_state
                    policy_time_step = mask_episode_transition(time_step)
                    # print("    I AM HIDING THE STEP TYPE..")
                else:
                    policy_time_step = time_step

                # get action
                # get action(prev latent info)
                # note that action_step has .state which is curr latent info
                action_step = policy.action(policy_time_step, policy_state)

                # take step
                next_time_step = env.step(action_step.action)

                # plot info as gifs in tb summaries
                if rendering:
                    if not dont_plot_reward_gifs:
                        ######### rewards
                        rewards_dense.append(
                            next_time_step.observation['state'][-1])
                        rewards_sparse.append(
                            next_time_step.observation['state'][-2])

                        ######### latents
                        curr_action_taken, curr_latent_sample, curr_latent_dist = action_step.state
                        # curr_latent_sample = (1, 1, [z1, z2])
                        # curr_latent_dist = (1, 1, [z1_mean, z1_std, z2_mean, z2_std])
                        z1_mean = curr_latent_dist[0, 0, :latent1_size]
                        z1_std = curr_latent_dist[0, 0, latent1_size:2 *
                                                  latent1_size]
                        z2_mean = curr_latent_dist[0, 0, 2 *
                                                   latent1_size:-latent2_size]
                        z2_std = curr_latent_dist[0, 0, -latent2_size:]

                        z1_means.append(np.linalg.norm(z1_mean))
                        z1_stds.append(np.linalg.norm(z1_std))
                        z2_means.append(np.linalg.norm(z2_mean))
                        z2_stds.append(np.linalg.norm(z2_std))

                        ########## recon

                        # sample latents + reconstruct rewards from the samples

                        num_latent_samples = 30

                        cov = np.zeros((z1_mean.shape[0], z1_mean.shape[0]))
                        np.fill_diagonal(cov, z1_std[None])
                        latent1_samples = np.random.multivariate_normal(
                            z1_mean, cov,
                            num_latent_samples)  #(num_latent_samples, z1_dim)
                        latent1_samples = latent1_samples.astype(np.float32)

                        cov2 = np.zeros((z2_mean.shape[0], z2_mean.shape[0]))
                        np.fill_diagonal(cov2, z2_std[None])
                        latent2_samples = np.random.multivariate_normal(
                            z2_mean, cov2,
                            num_latent_samples)  #(num_latent_samples, z2_dim)
                        latent2_samples = latent2_samples.astype(np.float32)

                        #(num_latent_samples, 1, z1_dim), (num_latent_samples, 1, z2_dim)
                        # rew_dists = policy._tf_policy._model_network.reward_decoder(latent1_samples[:,None,:], latent2_samples[:,None,:])
                        # rew_means, rew_stds = session.run([rew_dists.mean(), rew_dists.stddev()]) #(num_samples, 1), (num_samples, 1)

                        rew_means, rew_stds = session.run(
                            decode_rews_op,
                            feed_dict={
                                latent_samples_1_ph: latent1_samples[:,
                                                                     None, :],
                                latent_samples_2_ph: latent2_samples[:,
                                                                     None, :],
                            })

                        # plot
                        mean_of_means.append(np.mean(rew_means))
                        std_of_means.append(np.std(rew_means))
                        mean_of_vars.append(np.mean(rew_stds))
                        std_of_vars.append(np.std(rew_stds))

                # run metrics
                traj = trajectory.from_transition(time_step, action_step,
                                                  next_time_step)
                relevant_observers = eval_metrics[
                    num_metrics_per_episode * episode:num_metrics_per_episode *
                    (episode + 1)]
                for observer in relevant_observers:
                    observer(traj)

                # save images for rendering
                if rendering:
                    if 'pixels' in time_step.observation:
                        # print("A STEP... ADDED...")
                        images[-1].append(next_time_step.observation['pixels'])

                # update
                time_step = next_time_step
                policy_state = action_step.state

            # at end of episode, create plot of latents and rewards
            if not dont_plot_reward_gifs:

                dpi = 50
                width = 15
                height = 10
                fig = Figure(figsize=(width, height), dpi=dpi)
                # canvas = FigureCanvasAgg(fig)

                ######## latents
                ax = fig.add_subplot(121)

                # ax.plot(rewards_dense[1:], 'g', label='dense rew')
                ax.plot(rewards_sparse[1:], 'g--', label='sparse rew')

                ax.plot(z1_means, 'm', label='z1')
                ax.fill_between(np.arange(len(z1_means)),
                                np.array(z1_means) + np.array(z1_stds),
                                np.array(z1_means) - np.array(z1_stds),
                                color='m',
                                alpha=0.25)

                ax.plot(z2_means, 'c', label='z2')
                ax.fill_between(np.arange(len(z2_means)),
                                np.array(z2_means) + np.array(z2_stds),
                                np.array(z2_means) - np.array(z2_stds),
                                color='c',
                                alpha=0.25)

                ax.legend(loc='lower left')

                ######### reconstruction
                ax = fig.add_subplot(122)

                ax.plot(mean_of_means, 'r', label='recon reward mean')
                ax.fill_between(
                    np.arange(len(mean_of_means)),
                    np.array(mean_of_means) + np.array(std_of_means),
                    np.array(mean_of_means) - np.array(std_of_means),
                    color='r',
                    alpha=0.25)

                ax.plot(mean_of_vars, 'b', label='recon reward var')
                ax.fill_between(np.arange(len(mean_of_means)),
                                np.array(mean_of_vars) + np.array(std_of_vars),
                                np.array(mean_of_vars) - np.array(std_of_vars),
                                color='b',
                                alpha=0.25)

                ax.plot(rewards_dense[1:], 'g', label='true dense rew')
                ax.plot(rewards_sparse[1:], 'g--', label='true sparse rew')

                ######### plot
                ax.legend(loc='lower left')
                # canvas.draw()
                # plot_image = np.frombuffer(fig.canvas.tostring_rgb(), dtype='uint8').reshape(dpi*width, dpi*height, 3)
                # variance_plots.append(plot_image)

                fig_list.append(fig)

            # render at the end of episode
            if rendering:
                render_images.append(list(env.frames))
                env.frames[:] = []
                # print("ADDED NEW LIST AT END OF EPISODE...")
                images.append([])

            # one additional between-episode step to force the reset
            # (ie transition between prev episode and next one, so step types all work out)

            # currently, time_step has step_type=2
            action_step = policy.action(mask_episode_transition(time_step),
                                        policy_state)  # mask time_step
            # next_time_step is initial pos, action is ignored
            # step_type becomes 0
            next_time_step = env.step(action_step.action)
            traj = trajectory.from_transition(time_step, action_step,
                                              next_time_step)

            # run metrics
            relevant_observers = eval_metrics[num_metrics_per_episode *
                                              episode:num_metrics_per_episode *
                                              (episode + 1)]
            for observer in relevant_observers:
                observer(
                    traj
                )  # observer might need these "boundary" transitions to properly separate episodes

            # render
            if rendering:
                if 'pixels' in time_step.observation:
                    # print("BEGINNING OF EPISODE... ADDED...")
                    images[-1].append(next_time_step.observation['pixels']
                                      )  # first frame in next episode

            # update
            time_step = next_time_step  # step_type=0
            policy_state = action_step.state

        num_finished_trials = trial + 1

        # when all episodes in a trial are done
        if rendering:
            # print("POPPED... len old:", len(images), " len new: ", len(images)-1)
            images.pop()  # pop the empty list in the end

        # when all episodes in a trial are done AND not rendering anymore
        if rendering and num_finished_trials >= num_trials_to_render:
            rendering = False
            env.stop_rendering()

        # get ready for the next trial
        if rendering:
            # print("ADDED NEW LIST AT END OF A TRIAL...")
            images.append([])

    # when all trials are done
    if rendering:
        # print("POPPED when all trials are done... len old:", len(images)-1, " len new: ", len(images))
        images.pop()

    py_metric.run_summaries(eval_metrics)

    if eval_doing_second_agent:
        render_name = 'Eval2'
        obs_name = 'EvalObs2'
        rews_name = 'LatsRews2'
    else:
        render_name = 'Eval'
        obs_name = 'EvalObs'
        rews_name = 'LatsRews'

    # rendering images (what we want to see)
    if render_images:
        if not log_image_strips:
            render_images = np.array(render_images)
            render_images = np.transpose(render_images, [0, 1, 4, 2, 3])
            logger.log_video(
                render_images,
                render_name,
                step=global_step_val,
                fps=render_fps
            )  #"Need [N, T, C, H, W] input tensor for video logging!"
        else:
            # images: num rollouts, rollout len, height, width, 3
            # [B,T,H,W,C] --> [B,H,T,W,C] --> [BH,T,W,C] --> [BH,TW,C]
            # This tiles batches vertically and time horizontally
            render_images = np.array(render_images)
            render_images = render_images[:, ::5]  # subsample
            render_images_tiled = np.transpose(render_images, [0, 2, 1, 3, 4])
            render_images_tiled = render_images_tiled.reshape(
                (render_images_tiled.shape[0] * render_images_tiled.shape[1],
                 render_images_tiled.shape[2], render_images_tiled.shape[3],
                 render_images_tiled.shape[4]))
            render_images_tiled = render_images_tiled.reshape(
                (render_images_tiled.shape[0],
                 render_images_tiled.shape[1] * render_images_tiled.shape[2],
                 render_images_tiled.shape[3]))  #[BH,TW,C]
            render_images_tiled = np.transpose(render_images_tiled,
                                               (2, 0, 1))  #[C,BH,TW]
            logger.log_image(render_images_tiled,
                             render_name,
                             step=global_step_val)  # [C, H, W]

    # observation images (what the robot sees)
    if log_image_observations:
        if images and not (model_input == 'state'):
            if not log_image_strips:
                images = np.array(images)
                images = np.transpose(images, [0, 1, 4, 2, 3])
                logger.log_video(
                    images, obs_name, step=global_step_val, fps=render_fps
                )  #"Need [N, T, C, H, W] input tensor for video logging!"
            else:
                # images: num rollouts, rollout len, height, width, 3
                # [B,T,H,W,C] --> [B,H,T,W,C] --> [BH,T,W,C] --> [BH,TW,C]
                # This tiles batches vertically and time horizontally
                images = np.array(images)
                images = images[:, ::5]  # subsample
                images_tiled = np.transpose(images, [0, 2, 1, 3, 4])
                images_tiled = images_tiled.reshape(
                    (images_tiled.shape[0] * images_tiled.shape[1],
                     images_tiled.shape[2], images_tiled.shape[3],
                     images_tiled.shape[4]))
                images_tiled = images_tiled.reshape(
                    (images_tiled.shape[0],
                     images_tiled.shape[1] * images_tiled.shape[2],
                     images_tiled.shape[3]))  #[BH,TW,C]
                images_tiled = np.transpose(images_tiled,
                                            (2, 0, 1))  #[C,BH,TW]
                logger.log_image(images_tiled, obs_name,
                                 step=global_step_val)  # [C, H, W]

    if not dont_plot_reward_gifs:
        for fig in fig_list:
            logger.log_figure(fig, rews_name, step=global_step_val, phase=0)

    logger.flush()

    if not dont_plot_reward_gifs:
        # these 3 things are for freeing things from mem
        fig.clf()
        plt.close()
    num = gc.collect()

    return
コード例 #14
0
 def _maybe_record_summaries(self, global_step_val):
     """Record summaries if global_step_val is a multiple of summary_interval."""
     if global_step_val % self._summary_interval == 0:
         py_metric.run_summaries(self._train_metrics)
コード例 #15
0
def compute_summaries(metrics,
                      environment,
                      policy,
                      num_episodes=1,
                      num_episodes_to_render=1,
                      images_ph=None,
                      images_summary=None,
                      render_images_summary=None):
    for metric in metrics:
        metric.reset()

    if num_episodes_to_render:
        environment.start_rendering()

    time_step = environment.reset()
    policy_state = policy.get_initial_state(environment.batch_size)

    render_images = []
    if num_episodes_to_render and 'pixels' in time_step.observation:
        images = [[time_step.observation['pixels']]]
    else:
        images = []
    step = 0
    episode = 0
    while episode < num_episodes:
        action_step = policy.action(time_step, policy_state)
        next_time_step = environment.step(action_step.action)

        traj = trajectory.from_transition(time_step, action_step,
                                          next_time_step)
        for observer in metrics:
            observer(traj)

        if episode < num_episodes_to_render:
            if traj.is_last():
                render_images.append(list(environment.frames))
                environment.frames[:] = []
                if episode + 1 >= num_episodes_to_render:
                    environment.stop_rendering()

            if 'pixels' in time_step.observation:
                if traj.is_boundary():
                    images.append([])
                images[-1].append(next_time_step.observation['pixels'])

        episode += np.sum(traj.is_last())
        step += np.sum(~traj.is_boundary())

        time_step = next_time_step
        policy_state = action_step.state

    py_metric.run_summaries(metrics)

    if render_images:
        render_images = pad_and_concatenate_videos(render_images)
        session = tf.compat.v1.get_default_session()
        session.run(render_images_summary,
                    feed_dict={images_ph: [render_images]})
    if images:
        images = pad_and_concatenate_videos(images)
        session = tf.compat.v1.get_default_session()
        session.run(images_summary, feed_dict={images_ph: [images]})