Python simulate_episodes Exemples, planet.training.utility.simulate_episodes Python Exemples

Exemple #1

0

Afficher le fichier

def define_summaries(graph, config, cleanups):
  summaries = []
  plot_summaries = []  # Control dependencies for non thread-safe matplot.
  length = graph.data['length']
  mask = tf.range(graph.embedded.shape[1].value)[None, :] < length[:, None]
  heads = graph.heads.copy()
  last_time = tf.Variable(lambda: tf.timestamp(), trainable=False)
  last_step = tf.Variable(lambda: 0.0, trainable=False, dtype=tf.float64)

  def transform(dist):
    mean = config.postprocess_fn(dist.mean())
    mean = tf.clip_by_value(mean, 0.0, 1.0)
    return tfd.Independent(tfd.Normal(mean, 1.0), len(dist.event_shape))
  heads.unlock()
  heads['image'] = lambda features: transform(graph.heads['image'](features))
  heads.lock()

  with tf.variable_scope('general'):
    summaries += summary.data_summaries(graph.data, config.postprocess_fn)
    summaries += summary.dataset_summaries(config.train_dir)
    summaries += summary.objective_summaries(graph.objectives)
    summaries.append(tf.summary.scalar('step', graph.step))
    new_time, new_step = tf.timestamp(), tf.cast(graph.global_step, tf.float64)
    delta_time, delta_step = new_time - last_time, new_step - last_step
    with tf.control_dependencies([delta_time, delta_step]):
      assign_ops = [last_time.assign(new_time), last_step.assign(new_step)]
      with tf.control_dependencies(assign_ops):
        summaries.append(tf.summary.scalar(
            'steps_per_second', delta_step / delta_time))
        summaries.append(tf.summary.scalar(
            'seconds_per_step', delta_time / delta_step))

  with tf.variable_scope('closedloop'):
    prior, posterior = tools.unroll.closed_loop(
        graph.cell, graph.embedded, graph.data['action'], config.debug)
    summaries += summary.state_summaries(graph.cell, prior, posterior, mask)
    with tf.variable_scope('prior'):
      prior_features = graph.cell.features_from_state(prior)
      prior_dists = {
          name: head(prior_features)
          for name, head in heads.items()}
      summaries += summary.dist_summaries(prior_dists, graph.data, mask)
      summaries += summary.image_summaries(
          prior_dists['image'], config.postprocess_fn(graph.data['image']))
    with tf.variable_scope('posterior'):
      posterior_features = graph.cell.features_from_state(posterior)
      posterior_dists = {
          name: head(posterior_features)
          for name, head in heads.items()}
      summaries += summary.dist_summaries(
          posterior_dists, graph.data, mask)
      summaries += summary.image_summaries(
          posterior_dists['image'],
          config.postprocess_fn(graph.data['image']))

  with tf.variable_scope('openloop'):
    state = tools.unroll.open_loop(
        graph.cell, graph.embedded, graph.data['action'],
        config.open_loop_context, config.debug)
    state_features = graph.cell.features_from_state(state)
    state_dists = {name: head(state_features) for name, head in heads.items()}
    summaries += summary.dist_summaries(state_dists, graph.data, mask)
    summaries += summary.image_summaries(
        state_dists['image'], config.postprocess_fn(graph.data['image']))
    summaries += summary.state_summaries(graph.cell, state, posterior, mask)
    with tf.control_dependencies(plot_summaries):
      plot_summary, prediction = summary.prediction_summaries(
          state_dists, graph.data, state)
      plot_summaries += plot_summary
      summaries += plot_summary


      # sess = tf.Session()
      # result = sess.run(prediction[0].eval)
      # phase, epoch, steps_in = self._find_current_phase(global_step)
      # with sess.as_default():
      #   resutl = sess.run(phase.op, phase.feeds)
      # print("Dists: ", state.items())
      # print("prediction: ", type(prediction), len(prediction))
      # print("Truth: ", type(truth), len(truth))
      # truth_arr = np.asarray(truth)
      # prediction_arr = np.asarray(prediction)
      # print("prediction_arr: ", type(prediction_arr), prediction_arr)
      # print("truth_arr: ", type(truth_arr), truth_arr)
      # np.save(file='/home/pulver/Desktop/tmp_planet/supervised_data/prediction', arr=prediction_arr)
      # np.save(file='/home/pulver/Desktop/tmp_planet/supervised_data/truth', arr=truth_arr)
  print("Pred: ", prediction)
  with tf.variable_scope('simulation'):
    sim_returns = []
    for name, params in config.test_collects.items():
      # These are expensive and equivalent for train and test phases, so only
      # do one of them.
      sim_summary, sim_return = tf.cond(
          tf.equal(graph.phase, 'test'),
          lambda: utility.simulate_episodes(
              config, params, graph, cleanups,
              expensive_summaries=False,
              gif_summary=True,
              name=name),
          lambda: ('', 0.0),
          name='should_simulate_' + params.task.name)
      summaries.append(sim_summary)
      sim_returns.append(sim_return)

  summaries = tf.summary.merge(summaries)
  score = tf.reduce_mean(sim_returns)[None]
  # print("==> Graph.data: ", graph.data)
  print("==> state_dists['image']: ", state_dists['image'].mode()[0])
  graph_slim = {}
  graph_slim['action'] =graph.data['action'][0]
  graph_slim['image'] = graph.data['image'][0]
  graph_slim['position'] = graph.data['position'][0]
  graph_slim['velocity'] = graph.data['velocity'][0]
  graph_slim['return'] = graph.data['return'][0]
  graph_slim['reward'] = graph.data['reward'][0]
  graph_slim['predicted_image'] = state_dists['image'].mode()[0]

  return summaries, score, prediction, graph_slim

Exemple #2

0

Afficher le fichier

def define_testmodel(data, trainer, config, logdir):
    tf.logging.info('Build TensorFlow compute graph.')
    dependencies = []
    cleanups = []
    step = trainer.step
    global_step = trainer.global_step
    phase = trainer.phase

    # Instantiate network blocks.
    cell = config.cell()
    kwargs = dict(create_scope_now_=True)
    encoder = tf.make_template('encoder', config.encoder, **kwargs)
    heads = tools.AttrDict(_unlocked=True)
    dummy_features = cell.features_from_state(cell.zero_state(1, tf.float32))
    for key, head in config.heads.items():
        name = 'head_{}'.format(key)
        kwargs = dict(create_scope_now_=True)
        if key in data:
            kwargs['data_shape'] = data[key].shape[2:].as_list()
        elif key == 'action_target':
            kwargs['data_shape'] = data['action'].shape[2:].as_list()
        heads[key] = tf.make_template(name, head, **kwargs)
        heads[key](dummy_features)  # Initialize weights.
    print(
        cell,
        encoder,
    )
    # Apply and optimize model.
    embedded = encoder(data)
    with tf.control_dependencies(dependencies):
        embedded = tf.identity(embedded)
    graph = tools.AttrDict(locals())
    prior, posterior = tools.unroll.closed_loop(cell, embedded, data['action'],
                                                config.debug)

    features = graph.cell.features_from_state(posterior)
    pred = heads['reward'](features)
    # dependencies.append(reward_statistics(pred, data['reward'], logdir))
    summaries = []
    with tf.variable_scope('simulation'):
        sim_returns = []
        for name, params in config.test_collects.items():
            # These are expensive and equivalent for train and test phases, so only
            # do one of them.
            print(name, params)
            sim_summary, score = tf.cond(
                tf.equal(graph.phase, 'test'),
                lambda: utility.simulate_episodes(config,
                                                  params,
                                                  graph,
                                                  cleanups,
                                                  expensive_summaries=True,
                                                  gif_summary=True,
                                                  name=name),
                lambda: ('', 0.0),
                name='should_simulate_' + params.task.name)
    # with tf.variable_scope('collection'):
    #     with tf.control_dependencies(summaries):  # Make sure to train first.
    #         for name, params in config.train_collects.items():
    #             # schedule = tools.schedule.binary(
    #             #     step, config.batch_shape[0],
    #             #     params.steps_after, params.steps_every, params.steps_until)
    #             # summary, _ = tf.cond(
    #             #     tf.logical_and(tf.equal(trainer.phase, 'train'), schedule),
    #             #     functools.partial(
    #             #         utility.simulate_episodes, config, params, graph, cleanups,
    #             #         expensive_summaries=True, gif_summary=False, name=name),
    #             #     lambda: (tf.constant(''), tf.constant(0.0)),
    #             #     name='should_collect_' + name)
    #             summary, score = utility.simulate_episodes(config, params, graph, cleanups,
    #                                                    expensive_summaries=False, gif_summary=False, name=name)
    #             # dependencies.append(summary)

    # print('wuuw', sim_return)
    # objectives = utility.compute_objectives(
    #     posterior, prior, data, graph, config, trainer)
    # summaries, grad_norms = utility.apply_optimizers(
    #     objectives, trainer, config)

    # # Active data collection.
    # with tf.variable_scope('collection'):
    #   with tf.control_dependencies(summaries):  # Make sure to train first.
    #     for name, params in config.train_collects.items():
    #       schedule = tools.schedule.binary(
    #           step, config.batch_shape[0],
    #           params.steps_after, params.steps_every, params.steps_until)
    #       summary, _ = tf.cond(
    #           tf.logical_and(tf.equal(trainer.phase, 'train'), schedule),
    #           functools.partial(
    #               utility.simulate_episodes, config, params, graph, cleanups,
    #               expensive_summaries=True, gif_summary=False, name=name),
    #           lambda: (tf.constant(''), tf.constant(0.0)),
    #           name='should_collect_' + name)
    #       summaries.append(summary)

    # # Compute summaries.
    # score = tf.zeros((0,), tf.float32)
    # summary, score = tf.cond(
    #     trainer.log,
    #     lambda: define_summaries.define_summaries(graph, config, cleanups),
    #     lambda: (tf.constant(''), tf.zeros((0,), tf.float32)),
    #     name='summaries')
    # summaries = tf.summary.merge([summaries, summary])
    # dependencies.append(utility.print_metrics(
    #     {ob.name: ob.value for ob in objectives},
    #     step, config.print_metrics_every, 'objectives'))
    with tf.control_dependencies(dependencies):
        score = tf.identity(score)
    return score, summaries, cleanups

Exemple #3

0

Afficher le fichier

def define_summaries(graph, config, cleanups):
    summaries = []
    plot_summaries = []  # Control dependencies for non thread-safe matplot.
    length = graph.data['length']
    mask = tf.range(graph.embedded.shape[1])[None, :] < length[:, None]
    heads = graph.heads.copy()
    last_time = tf.Variable(lambda: tf.timestamp(), trainable=False)
    last_step = tf.Variable(lambda: 0.0, trainable=False, dtype=tf.float64)

    def transform(dist):
        mean = config.postprocess_fn(dist.mean())
        mean = tf.clip_by_value(mean, 0.0, 1.0)
        return tfd.Independent(tfd.Normal(mean, 1.0), len(dist.event_shape))

    heads.unlock()
    heads['image'] = lambda features: transform(graph.heads['image'](features))
    heads.lock()

    with tf.compat.v1.variable_scope('general'):
        summaries += summary.data_summaries(graph.data, config.postprocess_fn)
        summaries += summary.dataset_summaries(config.train_dir)
        summaries += summary.objective_summaries(graph.objectives)
        summaries.append(tf.compat.v1.summary.scalar('step', graph.step))
        new_time, new_step = tf.timestamp(), tf.cast(graph.global_step,
                                                     tf.float64)
        delta_time, delta_step = new_time - last_time, new_step - last_step
        with tf.control_dependencies([delta_time, delta_step]):
            assign_ops = [
                last_time.assign(new_time),
                last_step.assign(new_step)
            ]
            with tf.control_dependencies(assign_ops):
                summaries.append(
                    tf.compat.v1.summary.scalar('steps_per_second',
                                                delta_step / delta_time))
                summaries.append(
                    tf.compat.v1.summary.scalar('seconds_per_step',
                                                delta_time / delta_step))

    with tf.compat.v1.variable_scope('closedloop'):
        prior, posterior = tools.unroll.closed_loop(graph.cell, graph.embedded,
                                                    graph.data['action'],
                                                    config.debug)
        summaries += summary.state_summaries(graph.cell, prior, posterior,
                                             mask)
        with tf.compat.v1.variable_scope('prior'):
            prior_features = graph.cell.features_from_state(prior)
            prior_dists = {
                name: head(prior_features)
                for name, head in heads.items()
            }
            summaries += summary.dist_summaries(prior_dists, graph.data, mask)
            summaries += summary.image_summaries(
                prior_dists['image'],
                config.postprocess_fn(graph.data['image']))
        with tf.compat.v1.variable_scope('posterior'):
            posterior_features = graph.cell.features_from_state(posterior)
            posterior_dists = {
                name: head(posterior_features)
                for name, head in heads.items()
            }
            summaries += summary.dist_summaries(posterior_dists, graph.data,
                                                mask)
            summaries += summary.image_summaries(
                posterior_dists['image'],
                config.postprocess_fn(graph.data['image']))

    with tf.compat.v1.variable_scope('openloop'):
        state = tools.unroll.open_loop(graph.cell, graph.embedded,
                                       graph.data['action'],
                                       config.open_loop_context, config.debug)
        state_features = graph.cell.features_from_state(state)
        state_dists = {
            name: head(state_features)
            for name, head in heads.items()
        }
        summaries += summary.dist_summaries(state_dists, graph.data, mask)
        summaries += summary.image_summaries(
            state_dists['image'], config.postprocess_fn(graph.data['image']))
        summaries += summary.state_summaries(graph.cell, state, posterior,
                                             mask)
        with tf.control_dependencies(plot_summaries):
            plot_summary = summary.prediction_summaries(
                state_dists, graph.data, state)
            plot_summaries += plot_summary
            summaries += plot_summary

    with tf.compat.v1.variable_scope('simulation'):
        sim_returns = []
        for name, params in config.test_collects.items():
            # These are expensive and equivalent for train and test phases, so only
            # do one of them.
            sim_summary, sim_return = tf.compat.v1.cond(
                tf.compat.v1.equal(graph.phase, 'test'),
                lambda: utility.simulate_episodes(config,
                                                  params,
                                                  graph,
                                                  cleanups,
                                                  expensive_summaries=False,
                                                  gif_summary=True,
                                                  name=name),
                lambda: ('', 0.0),
                name='should_simulate_' + params.task.name)
            summaries.append(sim_summary)
            sim_returns.append(sim_return)

    summaries = tf.compat.v1.summary.merge(summaries)
    score = tf.reduce_mean(sim_returns)[None]
    return summaries, score

Exemple #4

0

Afficher le fichier

Fichier : define_summaries.py Projet : dingyiming0427/planet

def define_summaries(graph, config, cleanups):
    summaries = []
    plot_summaries = []  # Control dependencies for non thread-safe matplot.
    length = graph.data['length']
    mask = tf.range(graph.embedded.shape[1].value)[None, :] < length[:, None]
    heads = graph.heads.copy()
    last_time = tf.Variable(lambda: tf.timestamp(), trainable=False)
    last_step = tf.Variable(lambda: 0.0, trainable=False, dtype=tf.float64)

    def transform(dist):
        mean = config.postprocess_fn(dist.mean())
        mean = tf.clip_by_value(mean, 0.0, 1.0)
        return tfd.Independent(tfd.Normal(mean, 1.0), len(dist.event_shape))

    if not config.cpc:
        heads.unlock()
        heads['image'] = lambda features: transform(graph.heads['image']
                                                    (features))
        heads.lock()

    with tf.variable_scope('general'):
        summaries += summary.data_summaries(graph.data, config.postprocess_fn)
        summaries += summary.dataset_summaries(config.train_dir)
        summaries += summary.objective_summaries(graph.objectives)
        summaries.append(tf.summary.scalar('step', graph.step))
        new_time, new_step = tf.timestamp(), tf.cast(graph.global_step,
                                                     tf.float64)
        delta_time, delta_step = new_time - last_time, new_step - last_step
        with tf.control_dependencies([delta_time, delta_step]):
            assign_ops = [
                last_time.assign(new_time),
                last_step.assign(new_step)
            ]
            with tf.control_dependencies(assign_ops):
                summaries.append(
                    tf.summary.scalar('steps_per_second',
                                      delta_step / delta_time))
                summaries.append(
                    tf.summary.scalar('seconds_per_step',
                                      delta_time / delta_step))

    with tf.variable_scope('embedding_magnitude'):
        summaries += summary.magnitude_summary(graph.embedded, 'emb')

    with tf.variable_scope('cpc'):
        cpc_logs = graph.cpc_logs
        for k, v in cpc_logs.items():
            summaries.append(tf.summary.scalar(k, v))

    with tf.variable_scope('closedloop'):
        prior, posterior = tools.unroll.closed_loop(graph.cell, graph.embedded,
                                                    graph.data['action'],
                                                    config.debug)
        summaries += summary.state_summaries(graph.cell, prior, posterior,
                                             mask)
        with tf.variable_scope('prior'):
            prior_features = graph.cell.features_from_state(prior)
            prior_dists = {
                name: head(prior_features)
                for name, head in heads.items()
            }
            summaries += summary.dist_summaries(prior_dists, graph.data, mask)
            if not config.cpc:
                summaries += summary.image_summaries(
                    prior_dists['image'],
                    config.postprocess_fn(graph.data['image']))
            with tf.variable_scope('magnitude'):
                summaries += summary.magnitude_summary(prior['sample'],
                                                       'sample')
                summaries += summary.magnitude_summary(
                    prior['sample'][:, 1:] - prior['sample'][:, :-1], 'diff')

        with tf.variable_scope('posterior'):
            posterior_features = graph.cell.features_from_state(posterior)
            posterior_dists = {
                name: head(posterior_features)
                for name, head in heads.items()
            }
            summaries += summary.dist_summaries(posterior_dists, graph.data,
                                                mask)
            with tf.variable_scope('magnitude'):
                summaries += summary.magnitude_summary(posterior['sample'],
                                                       'sample')
                summaries += summary.magnitude_summary(
                    posterior['sample'][:, 1:] - posterior['sample'][:, :-1],
                    'diff')
            if not config.cpc:
                summaries += summary.image_summaries(
                    posterior_dists['image'],
                    config.postprocess_fn(graph.data['image']))
        with tf.variable_scope('mixed'):
            with tf.variable_scope('magnitude'):
                summaries += summary.magnitude_summary(
                    prior['sample'][:, 1:] - posterior['sample'][:, :-1],
                    'diff')

    with tf.variable_scope('openloop'):
        state = tools.unroll.open_loop(graph.cell, graph.embedded,
                                       graph.data['action'],
                                       config.open_loop_context, config.debug)
        state_features = graph.cell.features_from_state(state)
        state_dists = {
            name: head(state_features)
            for name, head in heads.items()
        }
        summaries += summary.dist_summaries(state_dists, graph.data, mask)
        with tf.variable_scope('magnitude'):
            summaries += summary.magnitude_summary(state['sample'], 'sample')
            summaries += summary.magnitude_summary(
                tf.abs(state['sample'][:, 1:] - state['sample'][:, :-1]),
                'diff')
        if not config.cpc:
            summaries += summary.image_summaries(
                state_dists['image'],
                config.postprocess_fn(graph.data['image']))
        summaries += summary.state_summaries(graph.cell, state, posterior,
                                             mask)
        with tf.control_dependencies(plot_summaries):
            plot_summary = summary.prediction_summaries(
                state_dists, graph.data, state)
            plot_summaries += plot_summary
            summaries += plot_summary

    with tf.variable_scope('simulation'):
        sim_returns = []
        for name, params in config.test_collects.items():
            # These are expensive and equivalent for train and test phases, so only
            # do one of them.
            sim_summary, sim_return = tf.cond(
                tf.equal(graph.phase, 'test'),
                lambda: utility.simulate_episodes(config,
                                                  params,
                                                  graph,
                                                  cleanups,
                                                  expensive_summaries=False,
                                                  gif_summary=True,
                                                  name=name),
                lambda: ('', 0.0),
                name='should_simulate_' + params.task.name)
            summaries.append(sim_summary)
            sim_returns.append(sim_return)

    if config.robustness_summary:
        with tf.variable_scope('robustness'):
            env = config.tasks[0].env_ctor()
            num_states = 20
            num_tries = 5
            images = tf.zeros(shape=(0, 32, 32, 3))
            for i in range(num_states):
                state = np.random.uniform(low=[-1.8, -np.pi],
                                          high=[1.8, np.pi],
                                          size=(2, ))
                for j in range(num_tries):
                    env._physics.reset_from_obs(state)
                    env.task.get_observation(env._physics)
                    img = config.preprocess_fn(env._render_image())
                    # plt.imshow(img)
                    # plt.savefig("%d_%d.png" % (i, j))
                    images = tf.concat([images, img[None]], axis=0)
            embedded = tf.reshape(graph.encoder(images),
                                  shape=(num_states, num_tries, -1))
            # calculate variance within different representations of the same state
            group_mean = tf.reduce_mean(embedded, axis=1, keepdims=True)
            variance_within = tf.reduce_mean(
                tf.reduce_sum(tf.square(embedded - group_mean), axis=-1))
            # calculate total variance
            total_mean = tf.reduce_mean(embedded, axis=[0, 1], keepdims=True)
            total_variance = tf.reduce_mean(
                tf.reduce_sum(tf.square(embedded - total_mean), axis=-1))
            summaries.append(
                tf.summary.scalar('variance_within', variance_within))
            summaries.append(
                tf.summary.scalar('total_variance', total_variance))
            summaries.append(
                tf.summary.scalar('variance_ratio',
                                  variance_within / total_variance))

            images = tf.zeros(shape=(0, 32, 32, 3))
            for i in range(num_states):
                state = np.random.uniform(low=[-1.8, -np.pi],
                                          high=[1.8, np.pi],
                                          size=(2, ))
                for j in range(2):
                    env._physics.reset_from_obs(state)
                    env.task.get_observation(env._physics, no_dis=(j == 0))
                    img = config.preprocess_fn(env._render_image())
                    # plt.imshow(img)
                    # plt.savefig("%d_%d.png" % (i, j))
                    images = tf.concat([images, img[None]], axis=0)
            embedded = tf.reshape(graph.encoder(images),
                                  shape=(num_states, 2, -1))
            # calculate variance within different representations of the same state
            difference_norm = tf.norm(embedded[:, 0] - embedded[:, 1], axis=-1)
            first_norm = tf.norm(embedded[:, 1], axis=-1)
            ratio = tf.reduce_mean(difference_norm / first_norm)

            summaries.append(
                tf.summary.scalar('difference_norm',
                                  tf.reduce_mean(difference_norm)))
            summaries.append(
                tf.summary.scalar('first_norm', tf.reduce_mean(first_norm)))
            summaries.append(tf.summary.scalar('ratio_of_norm', ratio))

    summaries = tf.summary.merge(summaries)
    score = tf.reduce_mean(sim_returns)[None]
    return summaries, score