コード例 #1
0
ファイル: configs.py プロジェクト: jsikyoon/dreamer-1
def make_config(params):
    config = tools.AttrDict()
    config.debug = params.get('debug', False)
    with params.unlocked:
        for name in params.get('defaults', ['dreamer']):
            for key, value in DEFAULTS[name].items():
                if key not in params:
                    params[key] = value
    config.loss_scales = tools.AttrDict()
    config = _data_processing(config, params)
    config = _model_components(config, params)
    config = _tasks(config, params)
    config = _loss_functions(config, params)
    config = _training_schedule(config, params)
    # Mark params as used which are only accessed at run-time.
    run_time_keys = [
        'planner_discount', 'planner_lambda', 'objective_entropy_scale',
        'normalize_actions', 'max_length', 'render_size', 'atari_lifes',
        'atari_noops', 'atari_sticky', 'atari_train_max_length',
        'atari_grayscale'
    ]
    for key in run_time_keys:
        params.get(key, None)
    if params.untouched:
        message = 'Found unused config overrides: {}'
        raise KeyError(message.format(', '.join(params.untouched)))
    return config
コード例 #2
0
ファイル: configs.py プロジェクト: jsikyoon/dreamer-1
def _active_collection(tasks, collects, defaults, config, params):
    sims = tools.AttrDict()
    for task in tasks:
        for user_collect in collects:
            for key in user_collect:
                if key not in defaults:
                    message = 'Invalid key {} in activation collection config.'
                    raise KeyError(message.format(key))
            collect = tools.AttrDict(defaults, _unlocked=True)
            collect.update(user_collect)
            collect.planner = _define_planner(collect.planner, collect.horizon,
                                              config, params)
            collect.objective = tools.bind(getattr(objectives_lib,
                                                   collect.objective),
                                           params=params)
            if collect.give_rewards:
                collect.task = task
            else:
                env_ctor = tools.bind(
                    lambda ctor: control.wrappers.NoRewardHint(ctor()),
                    task.env_ctor)
                collect.task = tasks_lib.Task(task.name, env_ctor)
            collect.exploration = tools.AttrDict(
                scale=collect.action_noise_scale,
                type=collect.action_noise_type,
                schedule=tools.bind(tools.schedule.linear,
                                    ramp=collect.action_noise_ramp,
                                    min=collect.action_noise_min),
                factors=collect.action_noise_factors)
            name = '{}_{}_{}'.format(collect.prefix, collect.name, task.name)
            assert name not in sims, (set(sims.keys()), name)
            sims[name] = collect
    return sims
コード例 #3
0
 def test_dm_control_thread(self):
     args = tools.AttrDict(logdir=self.get_temp_dir(),
                           num_runs=1,
                           params=tools.AttrDict(
                               defaults=['dreamer', 'debug'],
                               tasks=['cup_catch'],
                               isolate_envs='thread',
                               max_steps=30),
                           ping_every=0,
                           resume_runs=False)
     train.main(args)
コード例 #4
0
 def test_planet(self):
     args = tools.AttrDict(logdir=self.get_temp_dir(),
                           num_runs=1,
                           params=tools.AttrDict(
                               defaults=['planet', 'debug'],
                               tasks=['dummy'],
                               isolate_envs='none',
                               max_steps=30,
                               planner_horizon=3),
                           ping_every=0,
                           resume_runs=False)
     train.main(args)
コード例 #5
0
 def test_no_value(self):
     args = tools.AttrDict(logdir=self.get_temp_dir(),
                           num_runs=1,
                           params=tools.AttrDict(
                               defaults=['actor', 'debug'],
                               tasks=['dummy'],
                               isolate_envs='none',
                               max_steps=30,
                               imagination_horizon=3),
                           ping_every=0,
                           resume_runs=False)
     train.main(args)
コード例 #6
0
 def test_atari_thread(self):
     args = tools.AttrDict(logdir=self.get_temp_dir(),
                           num_runs=1,
                           params=tools.AttrDict(
                               defaults=['dreamer', 'debug'],
                               tasks=['atari_pong'],
                               isolate_envs='thread',
                               action_head_dist='onehot_score',
                               action_noise_type='epsilon_greedy',
                               max_steps=30),
                           ping_every=0,
                           resume_runs=False)
     train.main(args)
コード例 #7
0
 def test_dreamer(self):
     args = tools.AttrDict(logdir=self.get_temp_dir(),
                           num_runs=1,
                           params=tools.AttrDict(
                               defaults=['dreamer', 'debug'],
                               tasks=['dummy'],
                               isolate_envs='none',
                               max_steps=30,
                               train_planner='policy_sample',
                               test_planner='policy_mode',
                               planner_objective='reward_value',
                               action_head=True,
                               value_head=True,
                               imagination_horizon=3),
                           ping_every=0,
                           resume_runs=False)
     train.main(args)
コード例 #8
0
ファイル: configs.py プロジェクト: jsikyoon/dreamer-1
def _initial_collection(config, params):
    num_seed_episodes = int(params.get('num_seed_episodes', 5))
    num_seed_steps = int(params.get('num_seed_steps', 2500))
    sims = tools.AttrDict()
    for task in config.train_tasks:
        sims['train-' + task.name] = tools.AttrDict(
            task=task,
            mode='train',
            save_episode_dir=config.train_dir,
            num_episodes=num_seed_episodes,
            num_steps=num_seed_steps,
            give_rewards=params.get('seed_episode_rewards', True))
    for task in config.test_tasks:
        sims['test-' + task.name] = tools.AttrDict(
            task=task,
            mode='test',
            save_episode_dir=config.test_dir,
            num_episodes=num_seed_episodes,
            num_steps=num_seed_steps,
            give_rewards=True)
    return sims
コード例 #9
0
ファイル: trainer.py プロジェクト: letusfly85/dreamer-tf1
 def __init__(self, logdir, config=None):
     self._logdir = logdir
     self._global_step = tf.train.get_or_create_global_step()
     self._step = tf.placeholder(tf.int32, name='step')
     self._phase = tf.placeholder(tf.string, name='phase')
     self._log = tf.placeholder(tf.bool, name='log')
     self._report = tf.placeholder(tf.bool, name='report')
     self._reset = tf.placeholder(tf.bool, name='reset')
     self._phases = []
     # Checkpointing.
     self._loaders = []
     self._savers = []
     self._logdirs = []
     self._checkpoints = []
     self._config = config or tools.AttrDict()
コード例 #10
0
ファイル: utility.py プロジェクト: letusfly85/dreamer-tf1
def simulate(metrics, config, params, graph, cleanups, gif_summary, name):
    def env_ctor():
        env = params.task.env_ctor()
        if params.save_episode_dir:
            env = control.wrappers.CollectDataset(env, params.save_episode_dir)
        return env

    bind_or_none = lambda x, **kw: x and functools.partial(x, **kw)
    cell = graph.cell
    agent_config = tools.AttrDict(cell=cell,
                                  encoder=graph.encoder,
                                  planner=functools.partial(params.planner,
                                                            graph=graph),
                                  objective=bind_or_none(params.objective,
                                                         graph=graph),
                                  exploration=params.exploration,
                                  preprocess_fn=config.preprocess_fn,
                                  postprocess_fn=config.postprocess_fn)
    params = params.copy()
    with params.unlocked:
        params.update(agent_config)
    with agent_config.unlocked:
        agent_config.update(params)
    with tf.variable_scope(name):
        summaries = []
        env = control.create_batch_env(env_ctor, params.num_envs,
                                       config.isolate_envs)
        agent = control.MPCAgent(env, graph.step, False, False, agent_config)
        cleanup = lambda: env.close()
        scores, lengths, data = control.simulate(agent, env,
                                                 params.num_episodes,
                                                 params.num_steps)
        summaries.append(tf.summary.scalar('return', scores[0]))
        summaries.append(tf.summary.scalar('length', lengths[0]))
        if gif_summary:
            summaries.append(
                tools.gif_summary('gif', data['image'], max_outputs=1, fps=20))
        write_metrics = [
            metrics.add_scalars(name + '/return', scores),
            metrics.add_scalars(name + '/length', lengths),
            # metrics.add_tensor(name + '/frames', data['image']),
        ]
        with tf.control_dependencies(write_metrics):
            summary = tf.summary.merge(summaries)
    cleanups.append(cleanup)  # Work around tf.cond() tensor return type.
    return summary, tf.reduce_mean(scores)
コード例 #11
0
ファイル: configs.py プロジェクト: jsikyoon/dreamer-1
def _training_schedule(config, params):
    config.train_steps = int(params.get('train_steps', 50000))
    config.test_steps = int(params.get('test_steps', config.batch_shape[0]))
    config.max_steps = int(params.get('max_steps', 5e7))
    config.train_log_every = params.get('train_log_every', config.train_steps)
    config.train_checkpoint_every = None
    config.test_checkpoint_every = int(
        params.get('checkpoint_every', 10 * config.test_steps))
    config.checkpoint_to_load = None
    config.savers = [tools.AttrDict(exclude=(r'.*_temporary.*', ))]
    config.print_metrics_every = config.train_steps // 10
    config.train_dir = os.path.join(params.logdir, 'train_episodes')
    config.test_dir = os.path.join(params.logdir, 'test_episodes')
    config.random_collects = _initial_collection(config, params)

    defaults = tools.AttrDict()
    defaults.name = 'main'
    defaults.give_rewards = True
    defaults.horizon = params.get('planner_horizon', 12)
    defaults.objective = params.get('planner_objective', 'reward_value')
    defaults.num_envs = params.get('num_envs', 1)
    defaults.num_episodes = params.get('collect_episodes', defaults.num_envs)
    defaults.num_steps = params.get('collect_steps', 500)
    defaults.steps_after = params.get('collect_every', 5000)
    defaults.steps_every = params.get('collect_every', 5000)
    defaults.steps_until = -1
    defaults.action_noise_type = params.get('action_noise_type',
                                            'additive_normal')

    train_defaults = defaults.copy(_unlocked=True)
    train_defaults.prefix = 'train'
    train_defaults.mode = 'train'
    train_defaults.save_episode_dir = config.train_dir
    train_defaults.planner = params.get('train_planner', 'policy_sample')
    train_defaults.objective = params.get('train_planner_objective',
                                          defaults.objective)
    train_defaults.action_noise_scale = params.get('train_action_noise', 0.3)
    train_defaults.action_noise_ramp = params.get('train_action_noise_ramp', 0)
    train_defaults.action_noise_min = params.get('train_action_noise_min', 0.0)
    train_defaults.action_noise_factors = params.get(
        'train_action_noise_factors', [])
    config.train_collects = _active_collection(
        config.train_tasks, params.get('train_collects', [{}]), train_defaults,
        config, params)

    test_defaults = defaults.copy(_unlocked=True)
    test_defaults.prefix = 'test'
    test_defaults.mode = 'test'
    test_defaults.save_episode_dir = config.test_dir
    test_defaults.planner = params.get('test_planner', 'policy_mode')
    test_defaults.objective = params.get('test_planner_objective',
                                         defaults.objective)
    test_defaults.action_noise_scale = params.get('test_action_noise', 0.0)
    test_defaults.action_noise_ramp = 0
    test_defaults.action_noise_min = 0.0
    test_defaults.action_noise_factors = params.get(
        'train_action_noise_factors', None)
    config.test_collects = _active_collection(
        config.test_tasks, params.get('test_collects', [{}]), test_defaults,
        config, params)
    return config
コード例 #12
0
ファイル: configs.py プロジェクト: jsikyoon/dreamer-1
def _loss_functions(config, params):
    for head in config.gradient_heads:
        assert head in config.heads, head
    config.imagination_horizon = params.get('imagination_horizon', 15)
    config.imagination_skip_last = params.get('imagination_skip_last', None)
    config.imagination_include_initial = params.get(
        'imagination_include_initial', True)

    config.action_source = params.get('action_source', 'model')
    config.action_model_horizon = params.get('action_model_horizon', None)
    config.action_bootstrap = params.get('action_bootstrap', True)
    config.action_discount = params.get('action_discount', 0.99)
    config.action_lambda = params.get('action_lambda', 0.95)
    config.action_target_update = params.get('action_target_update', 1)
    config.action_target_period = params.get('action_target_period', 50000)
    config.action_loss_pcont = params.get('action_loss_pcont', False)
    config.action_pcont_stop_grad = params.get('action_pcont_stop_grad', False)
    config.action_pcont_weight = params.get('action_pcont_weight', True)

    config.value_source = params.get('value_source', 'model')
    config.value_model_horizon = params.get('value_model_horizon', None)
    config.value_discount = params.get('value_discount', 0.99)
    config.value_lambda = params.get('value_lambda', 0.95)
    config.value_bootstrap = params.get('value_bootstrap', True)
    config.value_target_update = params.get('value_target_update', 1)
    config.value_target_period = params.get('value_target_period', 50000)
    config.value_loss_pcont = params.get('value_loss_pcont', False)
    config.value_pcont_weight = params.get('value_pcont_weight', True)
    config.value_maxent = params.get('value_maxent', False)

    config.action_beta = params.get('action_beta', 0.0)
    config.action_beta_dims_value = params.get('action_beta_dims_value', None)
    config.state_beta = params.get('state_beta', 0.0)
    config.stop_grad_pre_action = params.get('stop_grad_pre_action', True)
    config.pcont_label_weight = params.get('pcont_label_weight', None)

    config.loss_scales.divergence = params.get('divergence_scale', 1.0)
    config.loss_scales.global_divergence = params.get('global_div_scale', 0.0)
    config.loss_scales.overshooting = params.get('overshooting_scale', 0.0)
    for head in config.heads:
        if head in ('value_target', 'action_target'):  # Untrained.
            continue
        config.loss_scales[head] = params.get(head + '_loss_scale', 1.0)

    config.free_nats = params.get('free_nats', 3.0)
    config.overshooting_distance = params.get('overshooting_distance', 0)
    config.os_stop_posterior_grad = params.get('os_stop_posterior_grad', True)
    config.cpc_contrast = params.get('cpc_contrast', 'window')
    config.cpc_batch_amount = params.get('cpc_batch_amount', 10)
    config.cpc_time_amount = params.get('cpc_time_amount', 30)

    optimizer_cls = tools.bind(tf.train.AdamOptimizer,
                               epsilon=params.get('optimizer_epsilon', 1e-4))
    config.optimizers = tools.AttrDict()
    config.optimizers.default = tools.bind(
        tools.CustomOptimizer,
        optimizer_cls=optimizer_cls,
        # schedule=tools.bind(tools.schedule.linear, ramp=0),
        learning_rate=params.get('default_lr', 1e-3),
        clipping=params.get('default_gradient_clipping', 1000.0))
    config.optimizers.model = config.optimizers.default.copy(
        learning_rate=params.get('model_lr', 6e-4),
        clipping=params.get('model_gradient_clipping', 100.0))
    config.optimizers.value = config.optimizers.default.copy(
        learning_rate=params.get('value_lr', 8e-5),
        clipping=params.get('value_gradient_clipping', 100.0))
    config.optimizers.action = config.optimizers.default.copy(
        learning_rate=params.get('action_lr', 8e-5),
        clipping=params.get('action_gradient_clipping', 100.0))
    return config
コード例 #13
0
ファイル: configs.py プロジェクト: jsikyoon/dreamer-1
def _model_components(config, params):
    config.gradient_heads = params.get('gradient_heads', ['image', 'reward'])
    config.activation = ACTIVATIONS[params.get('activation', 'elu')]
    config.num_layers = params.get('num_layers', 3)
    config.num_units = params.get('num_units', 400)
    encoder = params.get('encoder', 'conv')
    if encoder == 'conv':
        config.encoder = networks.conv.encoder
    elif encoder == 'proprio':
        config.encoder = tools.bind(networks.proprio.encoder,
                                    keys=params.get('proprio_encoder_keys'),
                                    num_layers=params.get(
                                        'proprio_encoder_num_layers', 3),
                                    units=params.get('proprio_encoder_units',
                                                     300))
    else:
        raise NotImplementedError(encoder)
    config.head_network = tools.bind(networks.feed_forward,
                                     num_layers=config.num_layers,
                                     units=config.num_units,
                                     activation=config.activation)
    config.heads = tools.AttrDict()
    if params.get('value_head', True):
        config.heads.value = tools.bind(
            config.head_network,
            num_layers=params.get('value_layers', 3),
            data_shape=[],
            dist=params.get('value_dist', 'normal'))
    if params.get('value_target_head', False):
        config.heads.value_target = tools.bind(
            config.head_network,
            num_layers=params.get('value_layers', 3),
            data_shape=[],
            stop_gradient=True,
            dist=params.get('value_dist', 'normal'))
    if params.get('return_head', False):
        config.heads['return'] = tools.bind(config.head_network,
                                            activation=config.activation)
    if params.get('action_head', True):
        config.heads.action = tools.bind(
            config.head_network,
            num_layers=params.get('action_layers', 4),
            mean_activation=ACTIVATIONS[params.get('action_mean_activation',
                                                   'none')],
            dist=params.get('action_head_dist', 'tanh_normal_tanh'),
            std=params.get('action_head_std', 'learned'),
            min_std=params.get('action_head_min_std', 1e-4),
            init_std=params.get('action_head_init_std', 5.0))
    if params.get('action_target_head', False):
        config.heads.action_target = tools.bind(
            config.head_network,
            num_layers=params.get('action_layers', 4),
            stop_gradient=True,
            mean_activation=ACTIVATIONS[params.get('action_mean_activation',
                                                   'none')],
            dist=params.get('action_head_dist', 'tanh_normal_tanh'),
            std=params.get('action_head_std', 'learned'),
            min_std=params.get('action_head_min_std', 1e-4),
            init_std=params.get('action_head_init_std', 5.0))
    if params.get('cpc_head', False):
        config.heads.cpc = config.head_network.copy(
            dist=params.get('cpc_head_dist', 'normal'),
            std=params.get('cpc_head_std', 'learned'),
            num_layers=params.get('cpc_head_layers', 3))
    image_head = params.get('image_head', 'conv')
    if image_head == 'conv':
        config.heads.image = tools.bind(networks.conv.decoder,
                                        std=params.get('image_head_std', 1.0))
    else:
        raise NotImplementedError(image_head)
    hidden_size = params.get('model_size', 200)
    state_size = params.get('state_size', 30)
    model = params.get('model', 'rssm')
    if model == 'rssm':
        config.cell = tools.bind(models.RSSM, state_size, hidden_size,
                                 hidden_size, params.get('future_rnn', True),
                                 params.get('mean_only', False),
                                 params.get('min_stddev',
                                            1e-1), config.activation,
                                 params.get('model_layers', 1),
                                 params.get('rssm_model', 'gru'),
                                 params.get('trxl_layer', 2),
                                 params.get('trxl_n_head', 10),
                                 params.get('trxl_mem_len', 8),
                                 params.get('trxl_pre_lnorm', False),
                                 params.get('trxl_gate', 'plus'))
    else:
        raise NotImplementedError(model)
    return config
コード例 #14
0
def define_model(logdir, metrics, data, trainer, config):
    print('Build TensorFlow compute graph.')
    dependencies = []
    cleanups = []
    step = trainer.step
    global_step = trainer.global_step
    phase = trainer.phase
    timestamp = tf.py_func(
        lambda: datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%S'), [],
        tf.string)
    dependencies.append(
        metrics.set_tags(global_step=global_step,
                         step=step,
                         phase=phase,
                         time=timestamp))

    # Instantiate network blocks. Note, this initialization would be expensive
    # when using tf.function since it would run at every step.
    try:
        cell = config.cell()
    except TypeError:
        cell = config.cell(action_size=data['action'].shape[-1].value)
    kwargs = dict(create_scope_now_=True)
    encoder = tf.make_template('encoder', config.encoder, **kwargs)
    heads = tools.AttrDict(_unlocked=True)
    raw_dummy_features = cell.features_from_state(
        cell.zero_state(1, tf.float32))[:, None]
    for key, head in config.heads.items():
        name = 'head_{}'.format(key)
        kwargs = dict(create_scope_now_=True)
        if key in data:
            kwargs['data_shape'] = data[key].shape[2:].as_list()
        if key == 'action_target':
            kwargs['data_shape'] = data['action'].shape[2:].as_list()
        if key == 'cpc':
            kwargs['data_shape'] = [cell.feature_size]
            dummy_features = encoder(data)[:1, :1]
        else:
            dummy_features = raw_dummy_features
        heads[key] = tf.make_template(name, head, **kwargs)
        heads[key](dummy_features)  # Initialize weights.

    # Update target networks.
    if 'value_target' in heads:
        dependencies.append(
            tools.track_network(trainer, config.batch_shape[0],
                                r'.*/head_value/.*',
                                r'.*/head_value_target/.*',
                                config.value_target_period,
                                config.value_target_update))
    if 'value_target_2' in heads:
        dependencies.append(
            tools.track_network(trainer, config.batch_shape[0],
                                r'.*/head_value/.*',
                                r'.*/head_value_target_2/.*',
                                config.value_target_period,
                                config.value_target_update))
    if 'action_target' in heads:
        dependencies.append(
            tools.track_network(trainer, config.batch_shape[0],
                                r'.*/head_action/.*',
                                r'.*/head_action_target/.*',
                                config.action_target_period,
                                config.action_target_update))

    # Apply and optimize model.
    embedded = encoder(data)
    with tf.control_dependencies(dependencies):
        embedded = tf.identity(embedded)
    graph = tools.AttrDict(locals())
    prior, posterior = tools.unroll.closed_loop(cell, embedded, data['action'],
                                                config.debug)
    objectives = utility.compute_objectives(posterior, prior, data, graph,
                                            config)
    summaries, grad_norms = utility.apply_optimizers(objectives, trainer,
                                                     config)
    dependencies += summaries

    # Active data collection.
    with tf.variable_scope('collection'):
        with tf.control_dependencies(
                dependencies):  # Make sure to train first.
            for name, params in config.train_collects.items():
                schedule = tools.schedule.binary(step, config.batch_shape[0],
                                                 params.steps_after,
                                                 params.steps_every,
                                                 params.steps_until)
                summary, _ = tf.cond(tf.logical_and(
                    tf.equal(trainer.phase, 'train'), schedule),
                                     functools.partial(utility.simulate,
                                                       metrics,
                                                       config,
                                                       params,
                                                       graph,
                                                       cleanups,
                                                       gif_summary=False,
                                                       name=name),
                                     lambda:
                                     (tf.constant(''), tf.constant(0.0)),
                                     name='should_collect_' + name)
                summaries.append(summary)
                dependencies.append(summary)

    # Compute summaries.
    graph = tools.AttrDict(locals())
    summary, score = tf.cond(
        trainer.log,
        lambda: define_summaries.define_summaries(graph, config, cleanups),
        lambda: (tf.constant(''), tf.zeros((0, ), tf.float32)),
        name='summaries')
    summaries = tf.summary.merge([summaries, summary])
    dependencies.append(
        utility.print_metrics({ob.name: ob.value
                               for ob in objectives}, step,
                              config.print_metrics_every, 2, 'objectives'))
    dependencies.append(
        utility.print_metrics(grad_norms, step, config.print_metrics_every, 2,
                              'grad_norms'))
    dependencies.append(tf.cond(trainer.log, metrics.flush, tf.no_op))
    with tf.control_dependencies(dependencies):
        score = tf.identity(score)
    return score, summaries, cleanups
コード例 #15
0
ファイル: train.py プロジェクト: letusfly85/dreamer-tf1
    experiment = training.Experiment(args.logdir,
                                     process_fn=functools.partial(process,
                                                                  args=args),
                                     num_runs=args.num_runs,
                                     ping_every=args.ping_every,
                                     resume_runs=args.resume_runs)
    for run in experiment:
        for unused_score in run:
            pass


if __name__ == '__main__':
    boolean = lambda x: bool(['False', 'True'].index(x))
    parser = argparse.ArgumentParser()
    parser.add_argument('--logdir', type=pathlib.Path, required=True)
    parser.add_argument('--params', default='{}')
    parser.add_argument('--num_runs', type=int, default=1)
    parser.add_argument('--ping_every', type=int, default=0)
    parser.add_argument('--resume_runs', type=boolean, default=True)
    parser.add_argument('--dmlab_runfiles_path', default=None)
    args_, remaining = parser.parse_known_args()
    params_ = args_.params.replace('#', ',').replace('\\', '')
    args_.params = tools.AttrDict(yaml.safe_load(params_))
    if args_.dmlab_runfiles_path:
        with args_.params.unlocked:
            args_.params.dmlab_runfiles_path = args_.dmlab_runfiles_path
        assert args_.params.dmlab_runfiles_path  # Mark as accessed.
    args_.logdir = args_.logdir and os.path.expanduser(args_.logdir)
    remaining.insert(0, sys.argv[0])
    tf.app.run(lambda _: main(args_), remaining)