コード例 #1
0
def make_env(config, logger, mode, train_eps, eval_eps):
    suite, task = config.task.split('_', 1)
    if suite == 'dmc':
        env = wrappers.DeepMindControl(task, config.action_repeat, config.size)
        env = wrappers.NormalizeActions(env)
    elif suite == 'atari':
        env = wrappers.Atari(task,
                             config.action_repeat,
                             config.size,
                             grayscale=config.atari_grayscale,
                             life_done=False and (mode == 'train'),
                             sticky_actions=True,
                             all_actions=True)
        env = wrappers.OneHotAction(env)
    else:
        raise NotImplementedError(suite)
    env = wrappers.TimeLimit(env, config.time_limit)
    callbacks = [
        functools.partial(process_episode, config, logger, mode, train_eps,
                          eval_eps)
    ]
    env = wrappers.CollectDataset(env, callbacks)
    env = wrappers.RewardObs(env)
    return env
コード例 #2
0
def make_env(config, writer, prefix, datadir, store):
    suite, task = config.task.split('_', 1)
    if suite == 'dmc':
        env = wrappers.DeepMindControl(task)
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    elif suite == 'atari':
        env = wrappers.Atari(task,
                             config.action_repeat, (64, 64),
                             grayscale=False,
                             life_done=True,
                             sticky_actions=True)
        env = wrappers.OneHotAction(env)
    else:
        raise NotImplementedError(suite)
    env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat)
    callbacks = []
    if store:
        callbacks.append(lambda ep: tools.save_episodes(datadir, [ep]))
    callbacks.append(
        lambda ep: summarize_episode(ep, config, datadir, writer, prefix))
    env = wrappers.Collect(env, callbacks, config.precision)
    env = wrappers.RewardObs(env)
    return env
コード例 #3
0
def make_env(config, writer, prefix, datadir, store):
    suite, task = config.task.split('_', 1)
    if suite == 'dmc':
        env = wrappers.DeepMindControl(task)
        env = wrappers.ActionRepeat(env, config.action_repeat)
        env = wrappers.NormalizeActions(env)
    elif suite == 'atari':
        env = wrappers.Atari(task,
                             config.action_repeat, (64, 64),
                             grayscale=False,
                             life_done=True,
                             sticky_actions=True)
        env = wrappers.OneHotAction(env)
    elif suite == 'football':
        env = football_env.create_environment(
            representation='pixels',
            env_name='academy_empty_goal_close',
            stacked=False,
            logdir='./football/empty_goal_close2',
            write_goal_dumps=True,
            write_full_episode_dumps=True,
            render=True,
            write_video=True)
        env = wrappers.Football(env)
        env = wrappers.OneHotAction(env)
    else:
        raise NotImplementedError(suite)
    env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat)
    callbacks = []
    if store:
        callbacks.append(lambda ep: tools.save_episodes(datadir, [ep]))
    callbacks.append(
        lambda ep: summarize_episode(ep, config, datadir, writer, prefix))
    env = wrappers.Collect(env, callbacks, config.precision)
    env = wrappers.RewardObs(env)
    return env