def log_eval(self, eval_output, **kwargs): D = eval_output['D'] n = eval_output['n'] T = eval_output['T'] num_sec = eval_output['num_sec'] logger = Logger() batch_returns = D.numpy_rewards.sum(1) logger('evaluation_iteration', n + 1) logger('num_seconds', round(num_sec, 1)) logger('num_trajectories', D.N) logger('max_allowed_horizon', T) logger('mean_horizon', D.Ts.mean()) logger('total_timesteps', D.total_T) logger('accumulated_trained_timesteps', self.agent.total_T) logger('mean_return', batch_returns.mean()) logger('std_return', batch_returns.std()) logger('min_return', batch_returns.min()) logger('max_return', batch_returns.max()) print(color_str('+' * 50, 'yellow', 'bold')) logger.dump(keys=None, index=None, indent=0) print(color_str('+' * 50, 'yellow', 'bold')) return logger.logs
def eval(self, n=None, **kwargs): start_time = perf_counter() returns = [] horizons = [] for _ in range(self.config['eval.num_episode']): observation = self.eval_env.reset() for _ in range(self.eval_env.spec.max_episode_steps): with torch.no_grad(): action = self.agent.choose_action(observation, mode='eval')['action'] next_observation, reward, done, info = self.eval_env.step(action) if done[0]: # [0] single environment returns.append(info[0]['episode']['return']) horizons.append(info[0]['episode']['horizon']) break observation = next_observation logger = Logger() logger('num_seconds', round(perf_counter() - start_time, 1)) logger('accumulated_trained_timesteps', kwargs['accumulated_trained_timesteps']) logger('accumulated_trained_episodes', kwargs['accumulated_trained_episodes']) logger('online_return', describe(returns, axis=-1, repr_indent=1, repr_prefix='\n')) logger('online_horizon', describe(horizons, axis=-1, repr_indent=1, repr_prefix='\n')) monitor_env = get_wrapper(self.eval_env, 'VecMonitor') logger('running_return', describe(monitor_env.return_queue, axis=-1, repr_indent=1, repr_prefix='\n')) logger('running_horizon', describe(monitor_env.horizon_queue, axis=-1, repr_indent=1, repr_prefix='\n')) logger.dump(keys=None, index=0, indent=0, border=color_str('+'*50, color='green')) return logger.logs
def eval(self, n=None, **kwargs): t0 = time.perf_counter() with torch.no_grad(): D = self.runner(self.agent, self.eval_env, 10, mode='eval') logger = Logger() logger('eval_iteration', n+1) logger('num_seconds', round(time.perf_counter() - t0, 1)) logger('accumulated_trained_timesteps', self.agent.total_timestep) logger('online_return', describe([sum(traj.rewards) for traj in D], axis=-1, repr_indent=1, repr_prefix='\n')) logger('online_horizon', describe([traj.T for traj in D], axis=-1, repr_indent=1, repr_prefix='\n')) logger('running_return', describe(self.eval_env.return_queue, axis=-1, repr_indent=1, repr_prefix='\n')) logger('running_horizon', describe(self.eval_env.horizon_queue, axis=-1, repr_indent=1, repr_prefix='\n')) logger.dump(keys=None, index=0, indent=0, border=color_str('+'*50, color='green')) return logger.logs
def test_color_str(): assert color_str('lagom', 'green', 'bold') == '\x1b[38;5;2m\x1b[1mlagom\x1b[0m' assert color_str('lagom', 'white') == '\x1b[38;5;15mlagom\x1b[0m'
def test_color_str(): assert color_str('lagom', 'green', bold=True) == '\x1b[32m\x1b[1mlagom\x1b[0m' assert color_str('lagom', 'white') == '\x1b[37mlagom\x1b[0m'
def run_experiment(worker_class, master_class, num_worker): r"""A convenient function to launch a parallelized experiment (Master-Worker). .. note:: It automatically creates all subfolders for logging the experiment. The topmost folder is indicated by the logging directory specified in the configuration. Then all subfolders for each configuration are created with the name of their ID. Finally, under each configuration subfolder, a set subfolders are created for each random seed (the random seed as folder name). Intuitively, an experiment could have following directory structure:: - logs - 0 # ID number - 123 # random seed - 345 - 567 - 1 - 123 - 345 - 567 - 2 - 123 - 345 - 567 - 3 - 123 - 345 - 567 - 4 - 123 - 345 - 567 Args: worker_class (BaseExperimentWorker): a worker class for the experiment. master_class (BaseExperimentMaster): a master class for the experiment. num_worker (int, optional): number of workers. """ t = time() experiment = master_class(worker_class=worker_class, num_worker=num_worker) log_path = Path(experiment.configs[0]['log.dir']) if not log_path.exists(): log_path.mkdir(parents=True) else: msg = f"Logging directory '{log_path.absolute()}' already existed, do you want to clean it ?" answer = ask_yes_or_no(msg) if answer: rmtree(log_path) log_path.mkdir(parents=True) else: # back up old_log_path = log_path.with_name('old_' + log_path.name) log_path.rename(old_log_path) log_path.mkdir(parents=True) print( f"The old logging directory is renamed to '{old_log_path.absolute()}'. " ) input('Please, press Enter to continue\n>>> ') # Create subfolders for each ID and subsubfolders for each random seed for config in experiment.configs: ID = config['ID'] for seed in experiment.seeds: p = log_path / f'{ID}' / f'{seed}' p.mkdir(parents=True) yaml_dump(obj=config, f=log_path / f'{ID}' / 'config', ext='.yml') experiment.save_configs(log_path / 'configs') # Run experiment in parallel experiment() msg = color_str(f'\nTotal time: {timedelta(seconds=round(time() - t))}', 'green', 'bold') print(msg)
def run_experiment(run, config, seeds, log_dir, max_workers, chunksize=1, use_gpu=False, gpu_ids=None): r"""A convenient function to parallelize the experiment (master-worker pipeline). It is implemented by using `concurrent.futures.ProcessPoolExecutor` It automatically creates all subfolders for each pair of configuration and random seed to store the loggings of the experiment. The root folder is given by the user. Then all subfolders for each configuration are created with the name of their job IDs. Under each configuration subfolder, a set subfolders are created for each random seed (the random seed as folder name). Intuitively, an experiment could have following directory structure:: - logs - 0 # ID number - 123 # random seed - 345 - 567 - 1 - 123 - 345 - 567 - 2 - 123 - 345 - 567 - 3 - 123 - 345 - 567 - 4 - 123 - 345 - 567 Args: run (function): a function that defines an algorithm, it must take the arguments `(config, seed, device, logdir)` config (Config): a :class:`Config` object defining all configuration settings seeds (list): a list of random seeds log_dir (str): a string to indicate the path to store loggings. max_workers (int): argument for ProcessPoolExecutor. if `None`, then all experiments run serially. chunksize (int): argument for Executor.map() use_gpu (bool): if `True`, then use CUDA. Otherwise, use CPU. gpu_ids (list): if `None`, then use all available GPUs. Otherwise, only use the GPU device defined in the list. """ configs = config.make_configs() # create logging dir log_path = Path(log_dir) if not log_path.exists(): log_path.mkdir(parents=True) else: msg = f"Logging directory '{log_path.absolute()}' already existed, do you want to clean it ?" answer = ask_yes_or_no(msg) if answer: rmtree(log_path) log_path.mkdir(parents=True) else: # back up old_log_path = log_path.with_name('old_' + log_path.name) log_path.rename(old_log_path) log_path.mkdir(parents=True) print( f"The old logging directory is renamed to '{old_log_path.absolute()}'. " ) input('Please, press Enter to continue\n>>> ') # save source files source_path = Path(log_path / 'source_files/') source_path.mkdir(parents=True) [ copyfile(s, source_path / s.name) for s in Path(inspect.getsourcefile(run)).parent.glob('*.py') ] # Create subfolders for each ID and subsubfolders for each random seed for config in configs: ID = config['ID'] for seed in seeds: p = log_path / f'{ID}' / f'{seed}' p.mkdir(parents=True) yaml_dump(obj=config, f=log_path / f'{ID}' / 'config', ext='.yml') pickle_dump(configs, log_path / 'configs', ext='.pkl') # Create unique id for each job jobs = list(enumerate(product(configs, seeds))) def _run(job): job_id, (config, seed) = job # VERY IMPORTANT TO AVOID GETTING STUCK, oversubscription # see following links # https://github.com/pytorch/pytorch/issues/19163 # https://software.intel.com/en-us/intel-threading-building-blocks-openmp-or-native-threads torch.set_num_threads(1) if use_gpu: num_gpu = torch.cuda.device_count() if gpu_ids is None: # use all GPUs device_id = job_id % num_gpu else: assert all([i >= 0 and i < num_gpu for i in gpu_ids]) device_id = gpu_ids[job_id % len(gpu_ids)] torch.cuda.set_device(device_id) device = torch.device(f'cuda:{device_id}') else: device = torch.device('cpu') print( f'@ Experiment: ID: {config["ID"]} ({len(configs)}), Seed: {seed}, Device: {device}, Job: {job_id} ({len(jobs)}), PID: {os.getpid()}' ) print('#' * 50) [print(f'# {key}: {value}') for key, value in config.items()] print('#' * 50) logdir = log_path / f'{config["ID"]}' / f'{seed}' result = run(config, seed, device, logdir) # Release all un-freed GPU memory if use_gpu: torch.cuda.empty_cache() return result if max_workers is None: results = [_run(job) for job in jobs] else: with ProcessPoolExecutor( max_workers=min(max_workers, len(jobs))) as executor: results = list( executor.map(CloudpickleWrapper(_run), jobs, chunksize=chunksize)) print( color_str( f'\nExperiment finished. Loggings are stored in {log_path.absolute()}. ', 'cyan', bold=True)) return results
def evaluator(config, logdir, seed, make_env, learner_agent): torch.set_num_threads(1) # VERY IMPORTANT TO AVOID GETTING STUCK eval_logs = [] env = make_env(config, seed, 'train') agent = Agent(config, env, torch.device('cpu')) runner = EpisodeRunner(reset_on_call=True) evaluated_steps = config['eval.freq'] while learner_agent.total_timestep < config['train.timestep']: if learner_agent.total_timestep < evaluated_steps: time.sleep(1.0) else: t0 = time.perf_counter() agent.load_state_dict( learner_agent.state_dict()) # copy to CPU by default with torch.no_grad(): D = [] for _ in range(config['eval.num_episode']): D += runner(agent, env, env.spec.max_episode_steps) logger = Logger() logger('num_seconds', round(time.perf_counter() - t0, 1)) logger('num_trajectories', len(D)) logger('num_timesteps', sum([len(traj) for traj in D])) logger('accumulated_trained_timesteps', learner_agent.total_timestep) infos = [ info for info in chain.from_iterable([traj.infos for traj in D]) if 'episode' in info ] online_returns = [info['episode']['return'] for info in infos] online_horizons = [info['episode']['horizon'] for info in infos] logger( 'online_return', describe(online_returns, axis=-1, repr_indent=1, repr_prefix='\n')) logger( 'online_horizon', describe(online_horizons, axis=-1, repr_indent=1, repr_prefix='\n')) monitor_env = get_wrapper(env, 'VecMonitor') logger( 'running_return', describe(monitor_env.return_queue, axis=-1, repr_indent=1, repr_prefix='\n')) logger( 'running_horizon', describe(monitor_env.horizon_queue, axis=-1, repr_indent=1, repr_prefix='\n')) logger.dump(keys=None, index=0, indent=0, border=color_str('+' * 50, color='green')) eval_logs.append(logger.logs) evaluated_steps += config['eval.freq'] pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl')