Ejemplo n.º 1
0
def process(logdir, args):
    with args.params.unlocked:
        args.params.logdir = logdir
    config = tools.AttrDict()
    with config.unlocked:
        config = getattr(configs, args.config)(config, args.params)
    training.utility.collect_initial_episodes(config)
    tf.reset_default_graph()
    dataset = tools.numpy_episodes(config.train_dir,
                                   config.test_dir,
                                   config.batch_shape,
                                   loader=config.data_loader,
                                   preprocess_fn=config.preprocess_fn,
                                   scan_every=config.scan_episodes_every,
                                   num_chunks=config.num_chunks,
                                   resize=config.resize,
                                   sub_sample=config.sub_sample,
                                   max_length=config.max_length,
                                   max_episodes=config.max_episodes,
                                   action_noise=config.fixed_action_noise)
    for score in training.utility.train(training.define_model, dataset, logdir,
                                        config):
        yield score
Ejemplo n.º 2
0
def _define_simulation(task,
                       config,
                       params,
                       horizon,
                       batch_size,
                       objective='reward',
                       rewards=False):
    planner = params.get('planner', 'cem')
    if planner == 'cem':
        planner_fn = tools.bind(control.planning.cross_entropy_method,
                                amount=params.get('planner_amount', 1000),
                                iterations=params.get('planner_iterations',
                                                      10),
                                topk=params.get('planner_topk', 100),
                                horizon=horizon)
    else:
        raise NotImplementedError(planner)
    return tools.AttrDict(task=task,
                          num_agents=batch_size,
                          planner=planner_fn,
                          objective=tools.bind(getattr(objectives_lib,
                                                       objective),
                                               params=params))
Ejemplo n.º 3
0
def simulate_episodes(config, params, graph, cleanups, expensive_summaries,
                      gif_summary, name):
    def env_ctor():
        env = params.task.env_ctor()
        if params.save_episode_dir:
            env = control.wrappers.CollectGymDataset(env,
                                                     params.save_episode_dir)
        env = control.wrappers.ConcatObservation(env, ['image'])
        return env

    bind_or_none = lambda x, **kw: x and functools.partial(x, **kw)
    cell = graph.cell
    agent_config = tools.AttrDict(cell=cell,
                                  encoder=graph.encoder,
                                  planner=functools.partial(params.planner,
                                                            graph=graph),
                                  objective=bind_or_none(params.objective,
                                                         graph=graph),
                                  exploration=params.exploration,
                                  preprocess_fn=config.preprocess_fn,
                                  postprocess_fn=config.postprocess_fn)
    params = params.copy()
    with params.unlocked:
        params.update(agent_config)
    with agent_config.unlocked:
        agent_config.update(params)
    summary, return_, cleanup = control.simulate(graph.step,
                                                 env_ctor,
                                                 params.task.max_length,
                                                 params.num_agents,
                                                 agent_config,
                                                 config.isolate_envs,
                                                 expensive_summaries,
                                                 gif_summary,
                                                 name=name)
    cleanups.append(cleanup)  # Work around tf.cond() tensor return type.
    return summary, return_
Ejemplo n.º 4
0
def simulate_episodes(config, params, graph, name):
    def env_ctor():
        env = params.task.env_ctor()
        if params.save_episode_dir:
            env = control.wrappers.CollectGymDataset(env,
                                                     params.save_episode_dir)
        env = control.wrappers.ConcatObservation(env, ['image'])
        return env

    cell = graph.cell
    agent_config = tools.AttrDict(cell=cell,
                                  encoder=graph.encoder,
                                  planner=params.planner,
                                  objective=functools.partial(params.objective,
                                                              graph=graph),
                                  exploration=params.exploration,
                                  preprocess_fn=config.preprocess_fn,
                                  postprocess_fn=config.postprocess_fn)
    params = params.copy()
    params.update(agent_config)
    agent_config.update(params)
    # Batch size larger crashes so we simulate the episodes individually.
    summaries, returns = [], []
    for index in range(params.batch_size):
        # with tf.control_dependencies(summaries + returns):
        with tf.variable_scope('simulate-{}'.format(index + 1)):
            summary, return_ = control.simulate(graph.step,
                                                env_ctor,
                                                params.task.max_length,
                                                1,
                                                agent_config,
                                                name=name)
        summaries.append(summary)
        returns.append(return_)
    summary = tf.summary.merge(summaries)
    return_ = tf.reduce_mean(returns)
    return summary, return_
Ejemplo n.º 5
0
 def trainable(config):
     print('begin a trial')
     args.params = tools.AttrDict(yaml.safe_load(args.params.replace('#', ',')))
     args.logdir = args.logdir and os.path.expanduser(args.logdir)
     print('debug ', config["divergence_scale"], config["reward_loss_scale"])
     with args.params.unlocked:
         args.params.divergence_scale = config["divergence_scale"]
         args.params.reward_loss_scale = config["reward_loss_scale"]
         # args.params.main_learning_rate = config["main_learning_rate"]
         args.params.test_steps = 50
         # args.params.num_units = config['num_units']
         args.params.test_traj = 5
     training.utility.set_up_logging()
     experiment = training.Experiment(
         args.logdir,
         process_fn=functools.partial(process, args=args),
         num_runs=args.num_runs,
         ping_every=args.ping_every,
         resume_runs=args.resume_runs)
     for run in experiment:
         for test_score in run:
             if test_score > 1.0:
                 tune.report(mean_score=test_score)
         break
Ejemplo n.º 6
0
def define_model(data, trainer, config):
    tf.logging.info('Build TensorFlow compute graph.')
    dependencies = []
    cleanups = []
    step = trainer.step
    global_step = trainer.global_step
    phase = trainer.phase

    # Instantiate network blocks.
    cell = config.cell()
    kwargs = dict(create_scope_now_=True)
    encoder = tf.make_template('encoder', config.encoder, **kwargs)
    heads = tools.AttrDict(_unlocked=True)
    dummy_features = cell.features_from_state(cell.zero_state(1, tf.float32))
    for key, head in config.heads.items():
        name = 'head_{}'.format(key)
        kwargs = dict(create_scope_now_=True)
        if key in data:
            kwargs['data_shape'] = data[key].shape[2:].as_list()
        elif key == 'action_target':
            kwargs['data_shape'] = data['action'].shape[2:].as_list()
        heads[key] = tf.make_template(name, head, **kwargs)
        heads[key](dummy_features)  # Initialize weights.

    # Apply and optimize model.
    embedded = encoder(data)
    with tf.control_dependencies(dependencies):
        embedded = tf.identity(embedded)
    graph = tools.AttrDict(locals())
    prior, posterior = tools.unroll.closed_loop(cell, embedded, data['action'],
                                                config.debug)
    objectives = utility.compute_objectives(posterior, prior, data, graph,
                                            config)
    summaries, grad_norms = utility.apply_optimizers(objectives, trainer,
                                                     config)

    # Active data collection.
    with tf.variable_scope('collection'):
        with tf.control_dependencies(summaries):  # Make sure to train first.
            for name, params in config.train_collects.items():
                schedule = tools.schedule.binary(step, config.batch_shape[0],
                                                 params.steps_after,
                                                 params.steps_every,
                                                 params.steps_until)
                summary, _ = tf.cond(
                    tf.logical_and(tf.equal(trainer.phase, 'train'), schedule),
                    functools.partial(utility.simulate_episodes,
                                      config,
                                      params,
                                      graph,
                                      cleanups,
                                      expensive_summaries=False,
                                      gif_summary=False,
                                      name=name),
                    lambda: (tf.constant(''), tf.constant(0.0)),
                    name='should_collect_' + name)
                summaries.append(summary)

    # Compute summaries.
    graph = tools.AttrDict(locals())
    summaries, score, prediction, truth = define_summaries.define_summaries(
        graph, config, cleanups)  #tf.cond(
    # trainer.log,
    # lambda: define_summaries.define_summaries(graph, config, cleanups),
    # lambda: (tf.constant(''), tf.zeros((0,), tf.float32), tf.zeros((8,), tf.float32)),
    # name='summaries')
    summaries = tf.summary.merge([summaries, summary])
    dependencies.append(
        utility.print_metrics({ob.name: ob.value
                               for ob in objectives}, step,
                              config.print_metrics_every, 'objectives'))
    dependencies.append(
        utility.print_metrics(grad_norms, step, config.print_metrics_every,
                              'grad_norms'))
    with tf.control_dependencies(dependencies):
        score = tf.identity(score)
    return score, summaries, cleanups, prediction, truth
Ejemplo n.º 7
0
if __name__ == '__main__':
    boolean = lambda x: bool(['False', 'True'].index(x))
    parser = argparse.ArgumentParser()
    parser.add_argument('--logdir', default=None)
    parser.add_argument('--num_runs', type=int, default=1)
    parser.add_argument(
        '--config',
        default='default',
        help='Select a configuration function from scripts/configs.py.')
    parser.add_argument(
        '--params',
        default='{}',
        help='YAML formatted dictionary to be used by the config.')
    parser.add_argument(
        '--ping_every',
        type=int,
        default=0,
        help='Used to prevent conflicts between multiple workers; 0 to disable.'
    )
    parser.add_argument(
        '--resume_runs',
        type=boolean,
        default=True,
        help='Whether to resume unfinished runs in the log directory.')
    args_, remaining = parser.parse_known_args()
    args_.params = tools.AttrDict(
        yaml.safe_load(args_.params.replace('#', ',')))
    args_.logdir = args_.logdir and os.path.expanduser(args_.logdir)
    remaining.insert(0, sys.argv[0])
    tf.app.run(lambda _: main(args_), remaining)
Ejemplo n.º 8
0
def define_model(data, trainer, config):
    tf.logging.info('Build TensorFlow compute graph.')
    dependencies = []
    cleanups = []
    step = trainer.step
    global_step = trainer.global_step
    phase = trainer.phase

    #Disagreement additions

    cell = []
    for mdl in range(config.num_models):
        with tf.variable_scope('model_no' + str(mdl)):
            cell.append(config.cell())
            kwargs = dict(create_scope_now_=True)

    encoder = tf.make_template('encoder', config.encoder, **kwargs)
    #heads = tools.AttrDict(_unlocked=True)
    heads = tools.AttrDict(_unlocked=True)
    #dummy_features = cell.features_from_state(cell.zero_state(1, tf.float32))
    dummy_features = cell[0].features_from_state(cell[0].zero_state(
        1, tf.float32))

    for key, head in config.heads.items():
        print('KEYHEAD', key)
        name = 'head_{}'.format(key)
        kwargs = dict(create_scope_now_=True)
        if key in data:
            kwargs['data_shape'] = data[key].shape[2:].as_list()
        elif key == 'action_target':
            kwargs['data_shape'] = data['action'].shape[2:].as_list()
        #heads[key] = tf.make_template(name, head, **kwargs)
        heads[key] = tf.make_template(name, head, **kwargs)
        heads[key](dummy_features)  # Initialize weights.

    embedded = encoder(data)
    with tf.control_dependencies(dependencies):
        embedded = tf.identity(embedded)

    graph = tools.AttrDict(locals())
    posterior = []
    prior = []

    bagging_size = int(config.batch_shape[0])
    sample_with_replacement = tf.random.uniform(
        [config.num_models, bagging_size],
        minval=0,
        maxval=config.batch_shape[0],
        dtype=tf.int32)

    for mdl in range(config.num_models):
        with tf.variable_scope('model_no' + str(mdl)):
            bootstrap_action_data = tf.gather(data['action'],
                                              sample_with_replacement[mdl, :],
                                              axis=0)
            bootstrap_embedded = tf.gather(embedded,
                                           sample_with_replacement[mdl, :],
                                           axis=0)
            tmp_prior, tmp_posterior = tools.unroll.closed_loop(
                cell[mdl], bootstrap_embedded, bootstrap_action_data,
                config.debug)
            prior.append(tmp_prior)
            posterior.append(tmp_posterior)

    graph = tools.AttrDict(locals())
    objectives = utility.compute_objectives(posterior, prior, data, graph,
                                            config)

    summaries, grad_norms = utility.apply_optimizers(objectives, trainer,
                                                     config)

    graph = tools.AttrDict(locals())
    # Active data collection.
    with tf.variable_scope('collection'):
        with tf.control_dependencies(summaries):  # Make sure to train first.
            for name, params in config.train_collects.items():
                schedule = tools.schedule.binary(step, config.batch_shape[0],
                                                 params.steps_after,
                                                 params.steps_every,
                                                 params.steps_until)
                summary, _ = tf.cond(
                    tf.logical_and(tf.equal(trainer.phase, 'train'), schedule),
                    functools.partial(utility.simulate_episodes,
                                      config,
                                      params,
                                      graph,
                                      cleanups,
                                      expensive_summaries=False,
                                      gif_summary=False,
                                      name=name),
                    lambda: (tf.constant(''), tf.constant(0.0)),
                    name='should_collect_' + name)
                summaries.append(summary)
    print('AFTER ACTIVE DATA COLLECT')
    # Compute summaries.
    graph = tools.AttrDict(locals())
    # for k,v in graph.items():
    #     print('KEEY',k)
    #assert 1==2
    #TODO: Determine if summary from one model is enough
    summary, score = tf.cond(
        trainer.log,
        lambda: define_summaries.define_summaries(graph, config, cleanups),
        lambda: (tf.constant(''), tf.zeros((0, ), tf.float32)),
        name='summaries')
    summaries = tf.summary.merge([summaries, summary])
    #TODO: Determine if objective and grad norm printed from only one model is enough
    # Objectives
    dependencies.append(
        utility.print_metrics({ob.name: ob.value
                               for ob in objectives}, step,
                              config.print_metrics_every, 'objectives'))
    dependencies.append(
        utility.print_metrics(grad_norms, step, config.print_metrics_every,
                              'grad_norms'))
    with tf.control_dependencies(dependencies):
        score = tf.identity(score)
    print('Code runs?')
    #assert 1==2
    return score, summaries, cleanups
Ejemplo n.º 9
0
def define_testmodel(data, trainer, config, logdir):
    tf.logging.info('Build TensorFlow compute graph.')
    dependencies = []
    cleanups = []
    step = trainer.step
    global_step = trainer.global_step
    phase = trainer.phase

    # Instantiate network blocks.
    cell = config.cell()
    kwargs = dict(create_scope_now_=True)
    encoder = tf.make_template('encoder', config.encoder, **kwargs)
    heads = tools.AttrDict(_unlocked=True)
    dummy_features = cell.features_from_state(cell.zero_state(1, tf.float32))
    for key, head in config.heads.items():
        name = 'head_{}'.format(key)
        kwargs = dict(create_scope_now_=True)
        if key in data:
            kwargs['data_shape'] = data[key].shape[2:].as_list()
        elif key == 'action_target':
            kwargs['data_shape'] = data['action'].shape[2:].as_list()
        heads[key] = tf.make_template(name, head, **kwargs)
        heads[key](dummy_features)  # Initialize weights.
    print(
        cell,
        encoder,
    )
    # Apply and optimize model.
    embedded = encoder(data)
    with tf.control_dependencies(dependencies):
        embedded = tf.identity(embedded)
    graph = tools.AttrDict(locals())
    prior, posterior = tools.unroll.closed_loop(cell, embedded, data['action'],
                                                config.debug)

    features = graph.cell.features_from_state(posterior)
    pred = heads['reward'](features)
    # dependencies.append(reward_statistics(pred, data['reward'], logdir))
    summaries = []
    with tf.variable_scope('simulation'):
        sim_returns = []
        for name, params in config.test_collects.items():
            # These are expensive and equivalent for train and test phases, so only
            # do one of them.
            print(name, params)
            sim_summary, score = tf.cond(
                tf.equal(graph.phase, 'test'),
                lambda: utility.simulate_episodes(config,
                                                  params,
                                                  graph,
                                                  cleanups,
                                                  expensive_summaries=True,
                                                  gif_summary=True,
                                                  name=name),
                lambda: ('', 0.0),
                name='should_simulate_' + params.task.name)
    # with tf.variable_scope('collection'):
    #     with tf.control_dependencies(summaries):  # Make sure to train first.
    #         for name, params in config.train_collects.items():
    #             # schedule = tools.schedule.binary(
    #             #     step, config.batch_shape[0],
    #             #     params.steps_after, params.steps_every, params.steps_until)
    #             # summary, _ = tf.cond(
    #             #     tf.logical_and(tf.equal(trainer.phase, 'train'), schedule),
    #             #     functools.partial(
    #             #         utility.simulate_episodes, config, params, graph, cleanups,
    #             #         expensive_summaries=True, gif_summary=False, name=name),
    #             #     lambda: (tf.constant(''), tf.constant(0.0)),
    #             #     name='should_collect_' + name)
    #             summary, score = utility.simulate_episodes(config, params, graph, cleanups,
    #                                                    expensive_summaries=False, gif_summary=False, name=name)
    #             # dependencies.append(summary)

    # print('wuuw', sim_return)
    # objectives = utility.compute_objectives(
    #     posterior, prior, data, graph, config, trainer)
    # summaries, grad_norms = utility.apply_optimizers(
    #     objectives, trainer, config)

    # # Active data collection.
    # with tf.variable_scope('collection'):
    #   with tf.control_dependencies(summaries):  # Make sure to train first.
    #     for name, params in config.train_collects.items():
    #       schedule = tools.schedule.binary(
    #           step, config.batch_shape[0],
    #           params.steps_after, params.steps_every, params.steps_until)
    #       summary, _ = tf.cond(
    #           tf.logical_and(tf.equal(trainer.phase, 'train'), schedule),
    #           functools.partial(
    #               utility.simulate_episodes, config, params, graph, cleanups,
    #               expensive_summaries=True, gif_summary=False, name=name),
    #           lambda: (tf.constant(''), tf.constant(0.0)),
    #           name='should_collect_' + name)
    #       summaries.append(summary)

    # # Compute summaries.
    # score = tf.zeros((0,), tf.float32)
    # summary, score = tf.cond(
    #     trainer.log,
    #     lambda: define_summaries.define_summaries(graph, config, cleanups),
    #     lambda: (tf.constant(''), tf.zeros((0,), tf.float32)),
    #     name='summaries')
    # summaries = tf.summary.merge([summaries, summary])
    # dependencies.append(utility.print_metrics(
    #     {ob.name: ob.value for ob in objectives},
    #     step, config.print_metrics_every, 'objectives'))
    with tf.control_dependencies(dependencies):
        score = tf.identity(score)
    return score, summaries, cleanups
Ejemplo n.º 10
0
def define_model(data, trainer, config):
  tf.logging.info('Build TensorFlow compute graph.')
  dependencies = []
  step = trainer.step
  global_step = trainer.global_step
  phase = trainer.phase
  should_summarize = trainer.log

  # Preprocess data.
  with tf.device('/cpu:0'):
    if config.dynamic_action_noise:
      data['action'] += tf.random_normal(
          tf.shape(data['action']), 0.0, config.dynamic_action_noise)
    prev_action = tf.concat(
        [0 * data['action'][:, :1], data['action'][:, :-1]], 1)
    obs = data.copy()
    del obs['length']

  # Instantiate network blocks.
  cell = config.cell()
  kwargs = dict()
  encoder = tf.make_template(
      'encoder', config.encoder, create_scope_now_=True, **kwargs)
  heads = {}
  for key, head in config.heads.items():
    name = 'head_{}'.format(key)
    kwargs = dict(data_shape=obs[key].shape[2:].as_list())
    heads[key] = tf.make_template(name, head, create_scope_now_=True, **kwargs)

  # Embed observations and unroll model.
  embedded = encoder(obs)
  # Separate overshooting and zero step observations because computing
  # overshooting targets for images would be expensive.
  zero_step_obs = {}
  overshooting_obs = {}
  for key, value in obs.items():
    if config.zero_step_losses.get(key):
      zero_step_obs[key] = value
    if config.overshooting_losses.get(key):
      overshooting_obs[key] = value
  assert config.overshooting <= config.batch_shape[1]
  target, prior, posterior, mask = tools.overshooting(
      cell, overshooting_obs, embedded, prev_action, data['length'],
      config.overshooting + 1)
  losses = []

  # Zero step losses.
  _, zs_prior, zs_posterior, zs_mask = tools.nested.map(
      lambda tensor: tensor[:, :, :1], (target, prior, posterior, mask))
  zs_target = {key: value[:, :, None] for key, value in zero_step_obs.items()}
  zero_step_losses = utility.compute_losses(
      config.zero_step_losses, cell, heads, step, zs_target, zs_prior,
      zs_posterior, zs_mask, config.free_nats, debug=config.debug)
  losses += [
      loss * config.zero_step_losses[name] for name, loss in
      zero_step_losses.items()]
  if 'divergence' not in zero_step_losses:
    zero_step_losses['divergence'] = tf.zeros((), dtype=tf.float32)

  # Overshooting losses.
  if config.overshooting > 1:
    os_target, os_prior, os_posterior, os_mask = tools.nested.map(
        lambda tensor: tensor[:, :, 1:-1], (target, prior, posterior, mask))
    if config.stop_os_posterior_gradient:
      os_posterior = tools.nested.map(tf.stop_gradient, os_posterior)
    overshooting_losses = utility.compute_losses(
        config.overshooting_losses, cell, heads, step, os_target, os_prior,
        os_posterior, os_mask, config.free_nats, debug=config.debug)
    losses += [
        loss * config.overshooting_losses[name] for name, loss in
        overshooting_losses.items()]
  else:
    overshooting_losses = {}
  if 'divergence' not in overshooting_losses:
    overshooting_losses['divergence'] = tf.zeros((), dtype=tf.float32)

  # Workaround for TensorFlow deadlock bug.
  loss = sum(losses)
  train_loss = tf.cond(
      tf.equal(phase, 'train'),
      lambda: loss,
      lambda: 0 * tf.get_variable('dummy_loss', (), tf.float32))
  train_summary = utility.apply_optimizers(
      train_loss, step, should_summarize, config.optimizers)
  # train_summary = tf.cond(
  #     tf.equal(phase, 'train'),
  #     lambda: utility.apply_optimizers(
  #         loss, step, should_summarize, config.optimizers),
  #     str, name='optimizers')

  # Active data collection.
  collect_summaries = []
  graph = tools.AttrDict(locals())
  with tf.variable_scope('collection'):
    should_collects = []
    for name, params in config.sim_collects.items():
      after, every = params.steps_after, params.steps_every
      should_collect = tf.logical_and(
          tf.equal(phase, 'train'),
          tools.schedule.binary(step, config.batch_shape[0], after, every))
      collect_summary, _ = tf.cond(
          should_collect,
          functools.partial(
              utility.simulate_episodes, config, params, graph, name),
          lambda: (tf.constant(''), tf.constant(0.0)),
          name='should_collect_' + params.task.name)
      should_collects.append(should_collect)
      collect_summaries.append(collect_summary)

  # Compute summaries.
  graph = tools.AttrDict(locals())
  with tf.control_dependencies(collect_summaries):
    summaries, score = tf.cond(
        should_summarize,
        lambda: define_summaries.define_summaries(graph, config),
        lambda: (tf.constant(''), tf.zeros((0,), tf.float32)),
        name='summaries')
  with tf.device('/cpu:0'):
    summaries = tf.summary.merge([summaries, train_summary])
    # summaries = tf.summary.merge(
    #     [summaries, train_summary] + collect_summaries)
    zs_entropy = (tf.reduce_sum(tools.mask(
        cell.dist_from_state(zs_posterior, zs_mask).entropy(), zs_mask)) /
        tf.reduce_sum(tf.to_float(zs_mask)))
    dependencies.append(utility.print_metrics((
        ('score', score),
        ('loss', loss),
        ('zs_entropy', zs_entropy),
        ('zs_divergence', zero_step_losses['divergence']),
    ), step, config.mean_metrics_every))
  with tf.control_dependencies(dependencies):
    score = tf.identity(score)
  return score, summaries
Ejemplo n.º 11
0
def define_model(data, trainer, config):
    tf.logging.info('Build TensorFlow compute graph.')
    dependencies = []
    step = trainer.step
    global_step = trainer.global_step  # tf.train.get_or_create_global_step()
    phase = trainer.phase
    should_summarize = trainer.log

    num_gpu = NUM_GPU

    #  for multi-gpu
    if num_gpu > 1:
        var_for_trainop = {}
        grads_dict = {}

        # data split for multi-gpu
        data_dict = {}
        for loss_head, optimizer_cls in config.optimizers.items():
            grads_dict[loss_head] = []
            var_for_trainop[loss_head] = []

        for gpu_i in range(num_gpu):
            data_dict[gpu_i] = {}

        for data_item in list(data.keys()):
            data_split = tf.split(data[data_item], num_gpu)
            for gpu_j in range(num_gpu):
                data_dict[gpu_j][data_item] = data_split[gpu_j]

    for gpu_k in range(num_gpu):
        with tf.device('/gpu:%s' % gpu_k):
            scope_name = r'.+shared_vars'
            with tf.name_scope('%s_%d' % ("GPU", gpu_k)):  # 'GPU'
                with tf.variable_scope(name_or_scope='shared_vars',
                                       reuse=tf.AUTO_REUSE):

                    #  for multi-gpu
                    if num_gpu > 1:
                        data = data_dict[gpu_k]

                    # Preprocess data.
                    # with tf.device('/cpu:0'):
                    if config.dynamic_action_noise:
                        data['action'] += tf.random_normal(
                            tf.shape(data['action']), 0.0,
                            config.dynamic_action_noise)
                    prev_action = tf.concat(
                        [0 * data['action'][:, :1], data['action'][:, :-1]],
                        1)  # i.e.: (0 * a1, a1, a2, ..., a49)
                    obs = data.copy()
                    del obs['length']

                    # Instantiate network blocks.
                    cell = config.cell()
                    kwargs = dict()
                    encoder = tf.make_template('encoder',
                                               config.encoder,
                                               create_scope_now_=True,
                                               **kwargs)
                    heads = {}
                    for key, head in config.heads.items(
                    ):  # heads: network of 'image', 'reward', 'state'
                        name = 'head_{}'.format(key)
                        kwargs = dict(data_shape=obs[key].shape[2:].as_list())
                        heads[key] = tf.make_template(name,
                                                      head,
                                                      create_scope_now_=True,
                                                      **kwargs)

                    # Embed observations and unroll model.
                    embedded = encoder(obs)  # encode obs['image']
                    # Separate overshooting and zero step observations because computing
                    # overshooting targets for images would be expensive.
                    zero_step_obs = {}
                    overshooting_obs = {}
                    for key, value in obs.items():
                        if config.zero_step_losses.get(key):
                            zero_step_obs[key] = value
                        if config.overshooting_losses.get(key):
                            overshooting_obs[key] = value
                    assert config.overshooting <= config.batch_shape[1]
                    target, prior, posterior, mask = tools.overshooting(  # prior:{'mean':shape(40,50,51,30), ...}; posterior:{'mean':shape(40,50,51,30), ...}
                        cell,
                        overshooting_obs,
                        embedded,
                        prev_action,
                        data[
                            'length'],  # target:{'reward':shape(40,50,51), ...}; mask:shape(40,50,51)
                        config.overshooting + 1)
                    losses = []

                    # Zero step losses.
                    _, zs_prior, zs_posterior, zs_mask = tools.nested.map(
                        lambda tensor: tensor[:, :, :1],
                        (target, prior, posterior, mask))
                    zs_target = {
                        key: value[:, :, None]
                        for key, value in zero_step_obs.items()
                    }
                    zero_step_losses = utility.compute_losses(
                        config.zero_step_losses,
                        cell,
                        heads,
                        step,
                        zs_target,
                        zs_prior,
                        zs_posterior,
                        zs_mask,
                        config.free_nats,
                        debug=config.debug)
                    losses += [
                        loss * config.zero_step_losses[name]
                        for name, loss in zero_step_losses.items()
                    ]
                    if 'divergence' not in zero_step_losses:
                        zero_step_losses['divergence'] = tf.zeros(
                            (), dtype=tf.float32)

                    # Overshooting losses.
                    if config.overshooting > 1:
                        os_target, os_prior, os_posterior, os_mask = tools.nested.map(
                            lambda tensor: tensor[:, :, 1:-1],
                            (target, prior, posterior, mask))
                        if config.stop_os_posterior_gradient:
                            os_posterior = tools.nested.map(
                                tf.stop_gradient, os_posterior)
                        overshooting_losses = utility.compute_losses(
                            config.overshooting_losses,
                            cell,
                            heads,
                            step,
                            os_target,
                            os_prior,
                            os_posterior,
                            os_mask,
                            config.free_nats,
                            debug=config.debug)
                        losses += [
                            loss * config.overshooting_losses[name]
                            for name, loss in overshooting_losses.items()
                        ]
                    else:
                        overshooting_losses = {}
                    if 'divergence' not in overshooting_losses:
                        overshooting_losses['divergence'] = tf.zeros(
                            (), dtype=tf.float32)

                    # Workaround for TensorFlow deadlock bug.
                    loss = sum(losses)
                    train_loss = tf.cond(
                        tf.equal(phase, 'train'), lambda: loss,
                        lambda: 0 * tf.get_variable('dummy_loss',
                                                    (), tf.float32))

                    #  for multi-gpu
                    if num_gpu == 1:
                        train_summary = utility.apply_optimizers(
                            train_loss, step, should_summarize,
                            config.optimizers)
                    else:
                        training_grad_dict = utility.get_grads(
                            train_loss,
                            step,
                            should_summarize,
                            config.optimizers,
                            include_var=(scope_name, ))
                        for a in grads_dict.keys():
                            grads_dict[a].append(training_grad_dict[a]["grad"])
                            if gpu_k == 0:
                                var_for_trainop[a].append(
                                    training_grad_dict[a]["var"])
                        # train_summary = tf.cond(
                        #     tf.equal(phase, 'train'),
                        #     lambda: utility.apply_optimizers(
                        #         loss, step, should_summarize, config.optimizers),
                        #     str, name='optimizers')

    #  for multi-gpu
    if num_gpu > 1:
        averaged_gradients = {}
        with tf.device('/cpu:0'):
            for a in grads_dict.keys():
                averaged_gradients[a] = average_gradients(grads_dict[a])
            train_summary = utility.apply_grads(averaged_gradients,
                                                var_for_trainop, step,
                                                should_summarize,
                                                config.optimizers)

    # Active data collection.
    collect_summaries = []
    graph = tools.AttrDict(locals())
    with tf.variable_scope('collection'):
        should_collects = []
        for name, params in config.sim_collects.items():
            after, every = params.steps_after, params.steps_every
            should_collect = tf.logical_and(
                tf.equal(phase, 'train'),
                tools.schedule.binary(step, config.batch_shape[0], after,
                                      every))
            collect_summary, score_train = tf.cond(
                should_collect,
                functools.partial(utility.simulate_episodes, config, params,
                                  graph, name),
                lambda: (tf.constant(''), tf.constant(0.0)),
                name='should_collect_' + params.task.name)
            should_collects.append(should_collect)
            collect_summaries.append(collect_summary)

    # Compute summaries.
    graph = tools.AttrDict(locals())
    with tf.control_dependencies(collect_summaries):
        summaries, score = tf.cond(
            should_summarize,
            lambda: define_summaries.define_summaries(graph, config),
            lambda: (tf.constant(''), tf.zeros((0, ), tf.float32)),
            name='summaries')
    with tf.device('/cpu:0'):
        summaries = tf.summary.merge([summaries, train_summary])
        # summaries = tf.summary.merge([summaries, train_summary] + collect_summaries)
        zs_entropy = (tf.reduce_sum(
            tools.mask(
                cell.dist_from_state(zs_posterior, zs_mask).entropy(),
                zs_mask)) / tf.reduce_sum(tf.to_float(zs_mask)))
        dependencies.append(
            utility.print_metrics((
                ('score', score_train),
                ('loss', loss),
                ('zs_entropy', zs_entropy),
                ('zs_divergence', zero_step_losses['divergence']),
            ), step, config.mean_metrics_every))
    with tf.control_dependencies(dependencies):
        score = tf.identity(score)
    return score, summaries
Ejemplo n.º 12
0
    parser.add_argument(
        '--config',
        default='default',
        help='Select a configuration function from scripts/configs.py.')
    parser.add_argument(
        '--params',
        default="{tasks: [carla]}",
        type=str,  # pendulum carla
        help='YAML formatted dictionary to be used by the config.')
    parser.add_argument(
        '--ping_every',
        type=int,
        default=0,
        help='Used to prevent conflicts between multiple workers; 0 to disable.'
    )
    parser.add_argument(
        '--resume_runs',
        type=boolean,
        default=True,
        help='Whether to resume unfinished runs in the log directory.')
    args_, remaining = parser.parse_known_args(
    )  # args_ = Namespace(config='default', logdir='./log_debug', num_runs=1, params={'tasks': ['cheetah_run']}, ping_every=0, resume_runs=True)
    args_.params = tools.AttrDict(
        yaml.safe_load(args_.params.replace('#', ','))
    )  # class AttrDict: """Wrap a dictionary to access keys as attributes."""
    args_.logdir = args_.logdir and os.path.expanduser(args_.logdir)
    remaining.insert(0, sys.argv[0])
    tf.app.run(
        lambda _: main(args_), remaining
    )  # tf.app.run(main, argv): Runs the program with an optional 'main' function and 'argv' list.
Ejemplo n.º 13
0
def _define_simulation(task,
                       config,
                       params,
                       horizon,
                       batch_size,
                       objective='reward',
                       rewards=False):
    config.rival = params.get('rival', '')
    config.planner = params.get('planner', 'cem')
    if config.planner == 'cem':
        print('normal cem')
        planner_fn = tools.bind(control.planning.cross_entropy_method,
                                amount=params.get('planner_amount', 1000),
                                iterations=params.get('planner_iterations',
                                                      10),
                                topk=params.get('planner_topk', 100),
                                horizon=horizon)
    elif config.planner == 'cem_eval':
        planner_fn = tools.bind(control.planning.cross_entropy_method_eval,
                                amount=params.get('planner_amount', 1000),
                                iterations=params.get('planner_iterations',
                                                      10),
                                topk=params.get('planner_topk', 100),
                                eval_ratio=params.get('eval_ratio', 0.1),
                                logdir=params.logdir,
                                horizon=horizon,
                                task=config.tasks[0])
        print('Cem_eval !!!')
    elif config.planner == 'sim':
        planner_fn = tools.bind(control.planning.simulator_planner,
                                amount=params.get('planner_amount', 1000),
                                iterations=params.get('planner_iterations',
                                                      10),
                                topk=params.get('planner_topk', 100),
                                eval_ratio=params.get('eval_ratio', 0.1),
                                logdir=params.logdir,
                                horizon=horizon,
                                task=config.tasks[0])
        print('Sim eval')
    elif config.planner == 'dual1':
        planner_fn = tools.bind(control.planning.cross_entropy_method_dual1,
                                amount=params.get('planner_amount', 1000),
                                iterations=params.get('planner_iterations',
                                                      10),
                                topk=params.get('planner_topk', 100),
                                eval_ratio=params.get('eval_ratio', 0.1),
                                logdir=params.logdir,
                                horizon=horizon,
                                task=config.tasks[0])
        print('dual1')
    elif config.planner == 'dual2':
        planner_fn = tools.bind(control.planning.cross_entropy_method_dual2,
                                amount=params.get('planner_amount', 1000),
                                iterations=params.get('planner_iterations',
                                                      10),
                                topk=params.get('planner_topk', 100),
                                eval_ratio=params.get('eval_ratio', 0.1),
                                logdir=params.logdir,
                                horizon=horizon,
                                task=config.tasks[0])
        print('dual2')
    else:
        raise NotImplementedError(config.planner)
    return tools.AttrDict(task=task,
                          num_agents=batch_size,
                          planner=planner_fn,
                          objective=tools.bind(getattr(objectives_lib,
                                                       objective),
                                               params=params))