Esempio n. 1
0
def run(config, seed, device):
    set_global_seeds(seed)
    logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)
    
    env = make_env(config, seed)
    env = VecMonitor(env)
    if config['env.standardize_obs']:
        env = VecStandardizeObservation(env, clip=5.)
    if config['env.standardize_reward']:
        env = VecStandardizeReward(env, clip=10., gamma=config['agent.gamma'])
    
    agent = Agent(config, env, device)
    runner = EpisodeRunner(reset_on_call=False)
    engine = Engine(config, agent=agent, env=env, runner=runner)
    train_logs = []
    for i in count():
        if agent.total_timestep >= config['train.timestep']:
            break
        train_logger = engine.train(i)
        train_logs.append(train_logger.logs)
        if i == 0 or (i+1) % config['log.freq'] == 0:
            train_logger.dump(keys=None, index=0, indent=0, border='-'*50)
        if i == 0 or (i+1) % config['checkpoint.freq'] == 0:
            agent.checkpoint(logdir, i + 1)
    agent.checkpoint(logdir, i + 1)
    pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
    return None
Esempio n. 2
0
 def checkpoint(self, logdir, num_iter):
     self.save(logdir / f'agent_{num_iter}.pth')
     obs_env = get_wrapper(self.env, 'VecStandardizeObservation')
     if obs_env is not None:
         pickle_dump(obj=(obs_env.mean, obs_env.var),
                     f=logdir / f'obs_moments_{num_iter}',
                     ext='.pth')
Esempio n. 3
0
def run(config, seed, device, logdir):
    set_global_seeds(seed)

    env = make_env(config, seed, 'train')
    if config['use_lstm']:
        agent = LSTMAgent(config, env, device)
    else:
        agent = Agent(config, env, device)
    runner = StepRunner(reset_on_call=False)
    engine = Engine(config, agent=agent, env=env, runner=runner)
    train_logs = []
    checkpoint_count = 0
    for i in count():
        if agent.total_timestep >= config['train.timestep']:
            break
        train_logger = engine.train(i)
        train_logs.append(train_logger.logs)
        if i == 0 or (i + 1) % config['log.freq'] == 0:
            train_logger.dump(keys=None, index=0, indent=0, border='-' * 50)
        if agent.total_timestep >= int(config['train.timestep'] *
                                       (checkpoint_count /
                                        (config['checkpoint.num'] - 1))):
            agent.checkpoint(logdir, i + 1)
            checkpoint_count += 1
    pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl')
    return None
Esempio n. 4
0
 def checkpoint(self, logdir, num_iter):
     self.save(logdir / f'agent_{num_iter}.pth')
     if self.config['env.normalize_obs']:
         moments = (self.env.obs_moments.mean, self.env.obs_moments.var)
         pickle_dump(obj=moments,
                     f=logdir / f'obs_moments_{num_iter}',
                     ext='.pth')
Esempio n. 5
0
def run(config, seed, device, logdir):
    set_global_seeds(seed)
    
    print('Initializing...')
    agent = Agent(config, make_env(config, seed), device)
    es = CMAES([config['train.mu0']]*agent.num_params, config['train.std0'], 
               {'popsize': config['train.popsize'], 
                'seed': seed})
    train_logs = []
    checkpoint_count = 0
    with ProcessPoolExecutor(max_workers=config['train.popsize'], initializer=initializer, initargs=(config, seed, device)) as executor:
        print('Finish initialization. Training starts...')
        for generation in range(config['train.generations']):
            start_time = time.perf_counter()
            solutions = es.ask()
            out = list(executor.map(fitness, solutions, chunksize=2))
            Rs, Hs = zip(*out)
            es.tell(solutions, [-R for R in Rs])
            logger = Logger()
            logger('generation', generation+1)
            logger('num_seconds', round(time.perf_counter() - start_time, 1))
            logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n'))
            logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n'))
            logger('fbest', es.result.fbest)
            train_logs.append(logger.logs)
            if generation == 0 or (generation+1)%config['log.freq'] == 0:
                logger.dump(keys=None, index=0, indent=0, border='-'*50)
            if (generation+1) >= int(config['train.generations']*(checkpoint_count/(config['checkpoint.num'] - 1))):
                agent.from_vec(tensorify(es.result.xbest, 'cpu'))
                agent.checkpoint(logdir, generation+1)
                checkpoint_count += 1
    pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
    return None
Esempio n. 6
0
def run(config, seed, device, logdir):
    set_global_seeds(seed)

    env = make_env(config, seed, 'train')
    eval_env = make_env(config, seed, 'eval')
    random_agent = RandomAgent(config, env, device)
    if config['agent.use_td3']:
        agent = TD3Agent(config, env, device)
    else:
        agent = DDPGAgent(config, env, device)
    runner = EpisodeRunner()
    replay = ReplayBuffer(env, config['replay.capacity'], device)
    engine = Engine(config,
                    agent=agent,
                    random_agent=random_agent,
                    env=env,
                    eval_env=eval_env,
                    runner=runner,
                    replay=replay,
                    logdir=logdir)

    train_logs, eval_logs = engine.train()
    pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl')
    pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl')
    return None
Esempio n. 7
0
def run(config, seed, device):
    set_global_seeds(seed)
    logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)

    train_loader, test_loader = make_dataset(config)
    if config['nn.type'] == 'VAE':
        model = VAE(config, device)
    elif config['nn.type'] == 'ConvVAE':
        model = ConvVAE(config, device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    engine = Engine(config,
                    model=model,
                    optimizer=optimizer,
                    train_loader=train_loader,
                    test_loader=test_loader)

    train_logs = []
    eval_logs = []
    for epoch in range(config['train.num_epoch']):
        train_logger = engine.train(epoch, logdir=logdir)
        train_logs.append(train_logger.logs)
        eval_logger = engine.eval(epoch, logdir=logdir)
        eval_logs.append(eval_logger.logs)
    pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl')
    pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl')
    return None
Esempio n. 8
0
    def save_running_average(self, f):
        r"""Save the running averages for observation and reward in a dictionary by pickling. 
        
        It saves the mean and standard deviation for observation running average and the standard deviation
        for reward running average. A dictionary with keys 'obs_avg' and 'r_avg' will be created. Each key
        contains sub-keys ['mu', 'sigma']. 
        
        Args:
            f (str): saving path
        """
        out = self.running_averages

        pickle_dump(obj=out, f=f, ext='.pkl')
Esempio n. 9
0
    def save_configs(self, f, method='pickle'):
        r"""Save the list of configurations returned from :meth:`make_configs`. 
        
        Args:
            f (str): file path
            method (str): the method to save the list of configuration. Either 'pickle' or 'yaml'
        """
        methods = ['pickle', 'yaml']
        assert method in methods, f'expected {methods}, got {method}'

        if method == 'pickle':
            pickle_dump(obj=self.configs, f=f, ext='.pkl')
        elif method == 'yaml':
            yaml_dump(obj=self.configs, f=f, ext='.yml')
Esempio n. 10
0
def run(config, seed, device, logdir):
    set_global_seeds(seed)
    torch.set_num_threads(1)  # VERY IMPORTANT TO AVOID GETTING STUCK

    print('Initializing...')
    agent = Agent(config, make_env(config, seed, 'eval'), device)
    es = OpenAIES(
        [config['train.mu0']] * agent.num_params, config['train.std0'], {
            'popsize': config['train.popsize'],
            'seed': seed,
            'sigma_scheduler_args': config['train.sigma_scheduler_args'],
            'lr': config['train.lr'],
            'lr_decay': config['train.lr_decay'],
            'min_lr': config['train.min_lr'],
            'antithetic': config['train.antithetic'],
            'rank_transform': config['train.rank_transform']
        })
    train_logs = []
    checkpoint_count = 0
    with Pool(processes=config['train.popsize'] //
              config['train.worker_chunksize']) as pool:
        print('Finish initialization. Training starts...')
        for generation in range(config['train.generations']):
            t0 = time.perf_counter()
            solutions = es.ask()
            data = [(config, seed, device, solution) for solution in solutions]
            out = pool.map(CloudpickleWrapper(fitness),
                           data,
                           chunksize=config['train.worker_chunksize'])
            Rs, Hs = zip(*out)
            es.tell(solutions, [-R for R in Rs])
            logger = Logger()
            logger('generation', generation + 1)
            logger('num_seconds', round(time.perf_counter() - t0, 1))
            logger('Returns',
                   describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n'))
            logger('Horizons',
                   describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n'))
            logger('fbest', es.result.fbest)
            train_logs.append(logger.logs)
            if generation == 0 or (generation + 1) % config['log.freq'] == 0:
                logger.dump(keys=None, index=0, indent=0, border='-' * 50)
            if (generation + 1) >= int(config['train.generations'] *
                                       (checkpoint_count /
                                        (config['checkpoint.num'] - 1))):
                agent.from_vec(tensorify(es.result.xbest, 'cpu'))
                agent.checkpoint(logdir, generation + 1)
                checkpoint_count += 1
    pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl')
    return None
Esempio n. 11
0
def run(config, seed, device, logdir):
    set_global_seeds(seed)
    
    env = make_env(config, seed)
    env = VecMonitor(env)
    env = VecStepInfo(env)
    
    eval_env = make_env(config, seed)
    eval_env = VecMonitor(eval_env)
    
    agent = Agent(config, env, device)
    replay = ReplayBuffer(env, config['replay.capacity'], device)
    engine = Engine(config, agent=agent, env=env, eval_env=eval_env, replay=replay, logdir=logdir)
    
    train_logs, eval_logs = engine.train()
    pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
    pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl')
    return None  
Esempio n. 12
0
File: algo.py Progetto: vin136/lagom
    def process_es_result(self, result):
        best_f_val = result['best_f_val']
        best_return = -best_f_val

        self.logger('generation', self.generation + 1)
        self.logger('best_return', best_return)

        if self.generation == 0 or (self.generation +
                                    1) % self.config['log.interval'] == 0:
            print('-' * 50)
            self.logger.dump(keys=None, index=-1, indent=0)
            print('-' * 50)

        # Save the loggings and final parameters
        if (self.generation + 1) == self.config['train.num_iteration']:
            pickle_dump(obj=self.logger.logs,
                        f=self.logdir / 'result',
                        ext='.pkl')
            np.save(self.logdir / 'trained_param', result['best_param'])
Esempio n. 13
0
def test_pickle_yaml():
    a = {'one': 1, 'two': [2, 3]}
    b = {'three': 3, 'four': [4, 5]}
    c = [a, b]

    def check(x):
        assert isinstance(x, list)
        assert len(x) == 2
        assert all([isinstance(i, dict) for i in x])
        assert list(x[0].keys()) == ['one', 'two']
        assert list(x[1].keys()) == ['three', 'four']
        assert list(x[0].values()) == [1, [2, 3]]
        assert list(x[1].values()) == [3, [4, 5]]

    pickle_dump(c, '.tmp_pickle')
    check(pickle_load('.tmp_pickle.pkl'))
    os.unlink('.tmp_pickle.pkl')

    yaml_dump(c, '.tmp_yaml')
    check(yaml_load('.tmp_yaml.yml'))
    os.unlink('.tmp_yaml.yml')
Esempio n. 14
0
def learner(config, logdir, agent, engine, queue):
    torch.set_num_threads(1)  # VERY IMPORTANT TO AVOID GETTING STUCK
    train_logs = []
    checkpoint_count = 0
    n = 0
    while agent.total_timestep < config['train.timestep']:
        D = []
        while len(D) < config['train.batch_size']:
            while queue.empty():
                time.sleep(0.01)
            D.append(queue.get_nowait())
        train_logger = engine.train(n, D=D)
        train_logs.append(train_logger.logs)
        if n == 0 or (n + 1) % config['log.freq'] == 0:
            train_logger.dump(keys=None, index=0, indent=0, border='-' * 50)
        if agent.total_timestep >= int(config['train.timestep'] *
                                       (checkpoint_count /
                                        (config['checkpoint.num'] - 1))):
            agent.checkpoint(logdir, n + 1)
            checkpoint_count += 1
        n += 1
    pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl')
Esempio n. 15
0
        def algorithm(config, seed, device):
            logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)
            seeder = Seeder(seed)
            seeds = seeder(size=config['env.count'])
            env_constructors = []
            for seed in seeds:
                env_constructors.append(partial(CraftingEnv, seed))
            env = VecStandardize(SerialVecEnv(env_constructors),
                                 clip_reward=100.0)
            env_spec = EnvSpec(env)

            agent = Agent(config, env_spec, device)
            runner = RollingSegmentRunner(config, agent, env)
            engine = Engine(agent, runner, env)

            for i in range(config['train.iter']):
                training_result = engine.train(i)
                print(f'Training iteration {i} complete.')
                if i % config['log.interval'] == 0:
                    logs = engine.log_train(training_result)
                    pickle_dump(obj=logs, f=logdir / f'iter_{i}_train_logs', ext='.pkl')
                    torch.save(engine.agent.policy.state_dict(),
                               logdir / 'trained_params')
Esempio n. 16
0
def run_experiment(run, config, seeds, num_worker):
    r"""A convenient function to launch a parallelized experiment (Master-Worker). 
    
    .. note::
    
        It automatically creates all subfolders for logging the experiment. The topmost
        folder is indicated by the logging directory specified in the configuration. 
        Then all subfolders for each configuration are created with the name of their ID.
        Finally, under each configuration subfolder, a set subfolders are created for each
        random seed (the random seed as folder name). Intuitively, an experiment could have 
        following directory structure::
        
            - logs
                - 0  # ID number
                    - 123  # random seed
                    - 345
                    - 567
                - 1
                    - 123
                    - 345
                    - 567
                - 2
                    - 123
                    - 345
                    - 567
                - 3
                    - 123
                    - 345
                    - 567
                - 4
                    - 123
                    - 345
                    - 567
    
    Args:
        run (function): an algorithm function to train on.
        config (Config): a :class:`Config` object defining all configuration settings
        seeds (list): a list of random seeds
        num_worker (int): number of workers
        
    """
    experiment = ExperimentMaster(ExperimentWorker, num_worker, run, config,
                                  seeds)

    log_path = Path(experiment.configs[0]['log.dir'])
    if not log_path.exists():
        log_path.mkdir(parents=True)
    else:
        msg = f"Logging directory '{log_path.absolute()}' already existed, do you want to clean it ?"
        answer = ask_yes_or_no(msg)
        if answer:
            rmtree(log_path)
            log_path.mkdir(parents=True)
        else:  # back up
            old_log_path = log_path.with_name('old_' + log_path.name)
            log_path.rename(old_log_path)
            log_path.mkdir(parents=True)
            print(
                f"The old logging directory is renamed to '{old_log_path.absolute()}'. "
            )
            input('Please, press Enter to continue\n>>> ')

    # save source files
    source_path = Path(log_path / 'source_files/')
    source_path.mkdir(parents=True)
    [
        copyfile(s, source_path / s.name)
        for s in Path(inspect.getsourcefile(run)).parent.glob('*.py')
    ]

    # Create subfolders for each ID and subsubfolders for each random seed
    for config in experiment.configs:
        ID = config['ID']
        for seed in experiment.seeds:
            p = log_path / f'{ID}' / f'{seed}'
            p.mkdir(parents=True)
        yaml_dump(obj=config, f=log_path / f'{ID}' / 'config', ext='.yml')

    pickle_dump(experiment.configs, log_path / 'configs', ext='.pkl')

    # Run experiment in parallel
    results = experiment()
    return results
Esempio n. 17
0
    def __call__(self, config, seed, device):
        set_global_seeds(seed)
        logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)

        if config['env.time_aware_obs']:
            kwargs = {'extra_wrapper': [TimeAwareObservation]}
        else:
            kwargs = {}
        env = make_vec_env(SerialVecEnv,
                           make_gym_env,
                           config['env.id'],
                           config['train.N'],
                           seed,
                           monitor=True,
                           **kwargs)
        if config['eval.independent']:
            eval_env = make_vec_env(SerialVecEnv, make_gym_env,
                                    config['env.id'], config['eval.N'], seed)
        if config['env.clip_action']:
            env = VecClipAction(env)
            if config['eval.independent']:
                eval_env = VecClipAction(eval_env)
        if config[
                'env.standardize']:  # running averages of observation and reward
            env = VecStandardize(
                venv=env,
                use_obs=True,
                use_reward=False,  # A2C specific 
                clip_obs=10.,
                clip_reward=10.,
                gamma=0.99,
                eps=1e-8)
        env_spec = EnvSpec(env)

        agent = Agent(config, env_spec, device)

        runner = RollingSegmentRunner(config, agent, env)

        if config['eval.independent']:
            engine = Engine(agent, runner, config, eval_env=eval_env)
        else:
            engine = Engine(agent, runner, config)

        train_logs = []
        eval_logs = []
        for i in count():
            if 'train.iter' in config and i >= config[
                    'train.iter']:  # enough iterations
                break
            elif 'train.timestep' in config and agent.total_T >= config[
                    'train.timestep']:  # enough timesteps
                break

            train_output = engine.train(i)

            if i == 0 or (i + 1) % config['log.interval'] == 0:
                train_log = engine.log_train(train_output)
                train_logs.append(train_log)

                if config['eval.independent']:
                    with torch.no_grad():  # disable grad, save memory
                        eval_output = engine.eval(n=i)
                    eval_log = engine.log_eval(eval_output)
                    eval_logs.append(eval_log)

        pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl')
        pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl')

        return None
Esempio n. 18
0
def run_experiment(run,
                   config,
                   seeds,
                   log_dir,
                   max_workers,
                   chunksize=1,
                   use_gpu=False,
                   gpu_ids=None):
    r"""A convenient function to parallelize the experiment (master-worker pipeline). 
    
    It is implemented by using `concurrent.futures.ProcessPoolExecutor`
    
    It automatically creates all subfolders for each pair of configuration and random seed
    to store the loggings of the experiment. The root folder is given by the user.
    Then all subfolders for each configuration are created with the name of their job IDs.
    Under each configuration subfolder, a set subfolders are created for each
    random seed (the random seed as folder name). Intuitively, an experiment could have 
    following directory structure::

        - logs
            - 0  # ID number
                - 123  # random seed
                - 345
                - 567
            - 1
                - 123
                - 345
                - 567
            - 2
                - 123
                - 345
                - 567
            - 3
                - 123
                - 345
                - 567
            - 4
                - 123
                - 345
                - 567
                
    Args:
        run (function): a function that defines an algorithm, it must take the 
            arguments `(config, seed, device, logdir)`
        config (Config): a :class:`Config` object defining all configuration settings
        seeds (list): a list of random seeds
        log_dir (str): a string to indicate the path to store loggings.
        max_workers (int): argument for ProcessPoolExecutor. if `None`, then all experiments run serially.
        chunksize (int): argument for Executor.map()
        use_gpu (bool): if `True`, then use CUDA. Otherwise, use CPU.
        gpu_ids (list): if `None`, then use all available GPUs. Otherwise, only use the
            GPU device defined in the list. 
    
    """
    configs = config.make_configs()

    # create logging dir
    log_path = Path(log_dir)
    if not log_path.exists():
        log_path.mkdir(parents=True)
    else:
        msg = f"Logging directory '{log_path.absolute()}' already existed, do you want to clean it ?"
        answer = ask_yes_or_no(msg)
        if answer:
            rmtree(log_path)
            log_path.mkdir(parents=True)
        else:  # back up
            old_log_path = log_path.with_name('old_' + log_path.name)
            log_path.rename(old_log_path)
            log_path.mkdir(parents=True)
            print(
                f"The old logging directory is renamed to '{old_log_path.absolute()}'. "
            )
            input('Please, press Enter to continue\n>>> ')

    # save source files
    source_path = Path(log_path / 'source_files/')
    source_path.mkdir(parents=True)
    [
        copyfile(s, source_path / s.name)
        for s in Path(inspect.getsourcefile(run)).parent.glob('*.py')
    ]

    # Create subfolders for each ID and subsubfolders for each random seed
    for config in configs:
        ID = config['ID']
        for seed in seeds:
            p = log_path / f'{ID}' / f'{seed}'
            p.mkdir(parents=True)
        yaml_dump(obj=config, f=log_path / f'{ID}' / 'config', ext='.yml')

    pickle_dump(configs, log_path / 'configs', ext='.pkl')

    # Create unique id for each job
    jobs = list(enumerate(product(configs, seeds)))

    def _run(job):
        job_id, (config, seed) = job
        # VERY IMPORTANT TO AVOID GETTING STUCK, oversubscription
        # see following links
        # https://github.com/pytorch/pytorch/issues/19163
        # https://software.intel.com/en-us/intel-threading-building-blocks-openmp-or-native-threads
        torch.set_num_threads(1)
        if use_gpu:
            num_gpu = torch.cuda.device_count()
            if gpu_ids is None:  # use all GPUs
                device_id = job_id % num_gpu
            else:
                assert all([i >= 0 and i < num_gpu for i in gpu_ids])
                device_id = gpu_ids[job_id % len(gpu_ids)]
            torch.cuda.set_device(device_id)
            device = torch.device(f'cuda:{device_id}')
        else:
            device = torch.device('cpu')

        print(
            f'@ Experiment: ID: {config["ID"]} ({len(configs)}), Seed: {seed}, Device: {device}, Job: {job_id} ({len(jobs)}), PID: {os.getpid()}'
        )
        print('#' * 50)
        [print(f'# {key}: {value}') for key, value in config.items()]
        print('#' * 50)

        logdir = log_path / f'{config["ID"]}' / f'{seed}'
        result = run(config, seed, device, logdir)
        # Release all un-freed GPU memory
        if use_gpu:
            torch.cuda.empty_cache()
        return result

    if max_workers is None:
        results = [_run(job) for job in jobs]
    else:
        with ProcessPoolExecutor(
                max_workers=min(max_workers, len(jobs))) as executor:
            results = list(
                executor.map(CloudpickleWrapper(_run),
                             jobs,
                             chunksize=chunksize))
    print(
        color_str(
            f'\nExperiment finished. Loggings are stored in {log_path.absolute()}. ',
            'cyan',
            bold=True))
    return results
Esempio n. 19
0
def evaluator(config, logdir, seed, make_env, learner_agent):
    torch.set_num_threads(1)  # VERY IMPORTANT TO AVOID GETTING STUCK
    eval_logs = []
    env = make_env(config, seed, 'train')
    agent = Agent(config, env, torch.device('cpu'))
    runner = EpisodeRunner(reset_on_call=True)
    evaluated_steps = config['eval.freq']
    while learner_agent.total_timestep < config['train.timestep']:
        if learner_agent.total_timestep < evaluated_steps:
            time.sleep(1.0)
        else:
            t0 = time.perf_counter()
            agent.load_state_dict(
                learner_agent.state_dict())  # copy to CPU by default
            with torch.no_grad():
                D = []
                for _ in range(config['eval.num_episode']):
                    D += runner(agent, env, env.spec.max_episode_steps)
            logger = Logger()
            logger('num_seconds', round(time.perf_counter() - t0, 1))
            logger('num_trajectories', len(D))
            logger('num_timesteps', sum([len(traj) for traj in D]))
            logger('accumulated_trained_timesteps',
                   learner_agent.total_timestep)

            infos = [
                info
                for info in chain.from_iterable([traj.infos for traj in D])
                if 'episode' in info
            ]
            online_returns = [info['episode']['return'] for info in infos]
            online_horizons = [info['episode']['horizon'] for info in infos]
            logger(
                'online_return',
                describe(online_returns,
                         axis=-1,
                         repr_indent=1,
                         repr_prefix='\n'))
            logger(
                'online_horizon',
                describe(online_horizons,
                         axis=-1,
                         repr_indent=1,
                         repr_prefix='\n'))

            monitor_env = get_wrapper(env, 'VecMonitor')
            logger(
                'running_return',
                describe(monitor_env.return_queue,
                         axis=-1,
                         repr_indent=1,
                         repr_prefix='\n'))
            logger(
                'running_horizon',
                describe(monitor_env.horizon_queue,
                         axis=-1,
                         repr_indent=1,
                         repr_prefix='\n'))
            logger.dump(keys=None,
                        index=0,
                        indent=0,
                        border=color_str('+' * 50, color='green'))
            eval_logs.append(logger.logs)

            evaluated_steps += config['eval.freq']
    pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl')