def __init__(self, cfg): self.cfg = cfg if self.cfg.seed is not None: log.info('Settings fixed seed %d', self.cfg.seed) torch.manual_seed(self.cfg.seed) np.random.seed(self.cfg.seed) self.device = torch.device('cuda') self.train_step = self.env_steps = 0 self.total_train_seconds = 0 self.last_training_step = time.time() self.best_avg_reward = math.nan self.summary_rate_decay = LinearDecay([(0, 100), (1000000, 2000), (10000000, 10000)]) self.last_summary_written = -1e9 self.save_rate_decay = LinearDecay([(0, self.cfg.initial_save_rate), (1000000, 5000)], staircase=100) summary_dir = summaries_dir(experiment_dir(cfg=self.cfg)) self.writer = SummaryWriter(summary_dir, flush_secs=10)
def test_example(self): experiment_name = 'test_example' register_custom_components() # test training for a few thousand frames cfg = custom_parse_args(argv=['--algo=APPO', '--env=my_custom_env_v1', f'--experiment={experiment_name}']) cfg.num_workers = 2 cfg.train_for_env_steps = 100000 cfg.save_every_sec = 1 cfg.decorrelate_experience_max_seconds = 0 cfg.seed = 0 cfg.device = 'cpu' status = run_algorithm(cfg) self.assertEqual(status, ExperimentStatus.SUCCESS) # then test the evaluation of the saved model cfg = custom_parse_args( argv=['--algo=APPO', '--env=my_custom_env_v1', f'--experiment={experiment_name}'], evaluation=True, ) cfg.device = 'cpu' status, avg_reward = enjoy(cfg, max_num_frames=1000) directory = experiment_dir(cfg=cfg) self.assertTrue(isdir(directory)) shutil.rmtree(directory, ignore_errors=True) # self.assertFalse(isdir(directory)) self.assertEqual(status, ExperimentStatus.SUCCESS) # not sure if we should check it here, it's optional # maybe a longer test where it actually has a chance to converge self.assertGreater(avg_reward, 60)
def ensure_initialized(cfg, env_name): global DMLAB_INITIALIZED if DMLAB_INITIALIZED: return dmlab_register_models() if env_name == 'dmlab_30': # extra functions to calculate human-normalized score etc. EXTRA_EPISODIC_STATS_PROCESSING.append(dmlab_extra_episodic_stats_processing) EXTRA_PER_POLICY_SUMMARIES.append(dmlab_extra_summaries) num_policies = cfg.num_policies if hasattr(cfg, 'num_policies') else 1 all_levels = list_all_levels_for_experiment(env_name) level_cache_dir = cfg.dmlab_level_cache_path dmlab_ensure_global_cache_initialized(experiment_dir(cfg=cfg), all_levels, num_policies, level_cache_dir) DMLAB_INITIALIZED = True
def run_many(run_description): experiments = run_description.experiments max_parallel = run_description.max_parallel log.info('Starting processes with base cmds: %r', [e.cmd for e in experiments]) log.info('Max parallel processes is %d', max_parallel) log.info('Monitor log files using tail -f train_dir/%s/**/**/log.txt', run_description.run_name) processes = [] experiments = run_description.generate_experiments() next_experiment = next(experiments, None) while len(processes) > 0 or next_experiment is not None: while len(processes) < max_parallel and next_experiment is not None: cmd, name, root_dir = next_experiment log.info('Starting experiment "%s"', cmd) cmd_tokens = cmd.split(' ') logfile = open(join(experiment_dir(name, root_dir), 'log.txt'), 'wb') process = subprocess.Popen(cmd_tokens, stdout=logfile, stderr=logfile) process.process_logfile = logfile processes.append(process) next_experiment = next(experiments, None) remaining_processes = [] for process in processes: if process.poll() is None: remaining_processes.append(process) continue else: process.process_logfile.close() log.info('Process %r finished with code %r', process, process.returncode) processes = remaining_processes time.sleep(0.1) log.info('Done!') return 0
def _load_checkpoint(self, checkpoints_dir): checkpoints = self._get_checkpoints(checkpoints_dir) if len(checkpoints) <= 0: log.warning('No checkpoints found in %s', experiment_dir(cfg=self.cfg)) return None else: latest_checkpoint = checkpoints[-1] log.warning('Loading state from checkpoint %s...', latest_checkpoint) if str( self.device ) == 'cuda': # the checkpoint will try to load onto the GPU storage unless specified checkpoint_dict = torch.load(latest_checkpoint) else: checkpoint_dict = torch.load( latest_checkpoint, map_location=lambda storage, loc: storage) return checkpoint_dict
def policy_reward_shaping_file(cfg, policy_id): return join(experiment_dir(cfg=cfg), f'policy_{policy_id:02d}_reward_shaping.json')
def policy_cfg_file(cfg, policy_id): return join(experiment_dir(cfg=cfg), f'policy_{policy_id:02d}_cfg.json')
def initialize(self): self._save_cfg() save_git_diff(experiment_dir(cfg=self.cfg))
def __init__(self, cfg): super().__init__(cfg) # we should not use CUDA in the main thread, only on the workers set_global_cuda_envvars(cfg) tmp_env = make_env_func(self.cfg, env_config=None) self.obs_space = tmp_env.observation_space self.action_space = tmp_env.action_space self.num_agents = tmp_env.num_agents self.reward_shaping_scheme = None if self.cfg.with_pbt: if hasattr(tmp_env.unwrapped, '_reward_shaping_wrapper'): # noinspection PyProtectedMember self.reward_shaping_scheme = tmp_env.unwrapped._reward_shaping_wrapper.reward_shaping_scheme else: try: from envs.doom.multiplayer.doom_multiagent_wrapper import MultiAgentEnv if isinstance(tmp_env.unwrapped, MultiAgentEnv): self.reward_shaping_scheme = tmp_env.unwrapped.default_reward_shaping except ImportError: pass tmp_env.close() # shared memory allocation self.traj_buffers = SharedBuffers(self.cfg, self.num_agents, self.obs_space, self.action_space) self.actor_workers = None self.report_queue = MpQueue(20 * 1000 * 1000) self.policy_workers = dict() self.policy_queues = dict() self.learner_workers = dict() self.workers_by_handle = None self.policy_inputs = [[] for _ in range(self.cfg.num_policies)] self.policy_outputs = dict() for worker_idx in range(self.cfg.num_workers): for split_idx in range(self.cfg.worker_num_splits): self.policy_outputs[(worker_idx, split_idx)] = dict() self.policy_avg_stats = dict() self.policy_lag = [dict() for _ in range(self.cfg.num_policies)] self.last_timing = dict() self.env_steps = dict() self.samples_collected = [0 for _ in range(self.cfg.num_policies)] self.total_env_steps_since_resume = 0 # currently this applies only to the current run, not experiment as a whole # to change this behavior we'd need to save the state of the main loop to a filesystem self.total_train_seconds = 0 self.last_report = time.time() self.last_experiment_summaries = 0 self.report_interval = 5.0 # sec self.experiment_summaries_interval = self.cfg.experiment_summaries_interval # sec self.avg_stats_intervals = (2, 12, 60 ) # 10 seconds, 1 minute, 5 minutes self.fps_stats = deque([], maxlen=max(self.avg_stats_intervals)) self.throughput_stats = [ deque([], maxlen=5) for _ in range(self.cfg.num_policies) ] self.avg_stats = dict() self.stats = dict() # regular (non-averaged) stats self.writers = dict() writer_keys = list(range(self.cfg.num_policies)) for key in writer_keys: summary_dir = join(summaries_dir(experiment_dir(cfg=self.cfg)), str(key)) summary_dir = ensure_dir_exists(summary_dir) self.writers[key] = SummaryWriter(summary_dir, flush_secs=20) self.pbt = PopulationBasedTraining(self.cfg, self.reward_shaping_scheme, self.writers)
def checkpoint_dir(cfg, policy_id): checkpoint_dir = join(experiment_dir(cfg=cfg), f'checkpoint_p{policy_id}') return ensure_dir_exists(checkpoint_dir)
def main(): """Script entry point.""" stop_at = 80 * 1000 * 1000 prefix = 'simple' # noinspection PyUnusedLocal experiments_very_sparse = [ Experiment('doom_curious_vs_vanilla/doom_maze_very_sparse/doom_maze_very_sparse_pre_0.0', 'A2C (no curiosity)'), Experiment('doom_sweep_very_sparse/doom_sweep_i_0.5_p_0.05', 'A2C+ICM (curious)'), ] # noinspection PyUnusedLocal experiments_sparse = [ Experiment('doom_curious_vs_vanilla/doom_maze_sparse/doom_maze_sparse_pre_0.0', 'A2C (no curiosity)'), Experiment('doom_curious_vs_vanilla/doom_maze_sparse/doom_maze_sparse_pre_0.05', 'A2C+ICM (curious)'), ] # noinspection PyUnusedLocal experiments_basic = [ Experiment('doom_curious_vs_vanilla/doom_maze/doom_maze_pre_0.0', 'A2C (no curiosity)'), Experiment('doom_curious_vs_vanilla/doom_maze/doom_maze_pre_0.05', 'A2C+ICM (curious)'), ] experiments = [ Experiment('doom_curious_vs_vanilla/doom_basic/doom_basic_pre_0.0', 'A2C (no curiosity)'), Experiment('doom_curious_vs_vanilla/doom_basic/doom_basic_pre_0.05', 'A2C+ICM (curious)'), ] plots = [ Plot('a2c_aux_summary/avg_reward', 'average reward', 'Avg. reward for the last 1000 episodes'), Plot( 'a2c_agent_summary/policy_entropy', 'policy entropy, nats', 'Stochastic policy entropy', ), ] for plot in plots: fig = plt.figure(figsize=(5, 4)) fig.add_subplot() for ex_i, experiment in enumerate(experiments): experiment_name = experiment.name.split(os.sep)[-1] experiments_root = join(*(experiment.name.split(os.sep)[:-1])) exp_dir = experiment_dir(experiment_name, experiments_root) path_to_events_dir = summaries_dir(exp_dir) events_files = [] for f in os.listdir(path_to_events_dir): if f.startswith('events'): events_files.append(join(path_to_events_dir, f)) if len(events_files) == 0: log.error('No events file for %s', experiment) continue events_files = sorted(events_files) steps, values = [], [] for events_file in events_files: iterator = tf.train.summary_iterator(events_file) while True: try: e = next(iterator, None) except Exception as exc: log.warning(str(exc)) break if e is None: break for v in e.summary.value: if e.step >= stop_at: break if v.tag == plot.name: steps.append(e.step) values.append(v.simple_value) # just in case values = np.nan_to_num(values) smooth = 10 values_smooth = running_mean(values, smooth) steps = steps[smooth:] values = values[smooth:] plt.plot(steps, values, color=COLORS[ex_i], alpha=0.2, label='__nolegend__') plt.plot(steps, values_smooth, color=COLORS[ex_i], label=experiment.descr, linewidth=2) plt.xlabel('environment steps') plt.ylabel(plot.axis) plt.title(plot.descr) plt.grid(True) plt.legend() plt.tight_layout() plots_dir = ensure_dir_exists(join(experiments_dir(), 'plots')) plot_name = plot.name.replace('/', '_') plt.savefig(join(plots_dir, f'{prefix}_{plot_name}.png')) plt.close() return 0
def experiment_dir(self): return experiment_dir(self._experiment_name, self.experiments_root)
def _checkpoint_dir(self): checkpoint_dir = join(experiment_dir(cfg=self.cfg), 'checkpoint') return ensure_dir_exists(checkpoint_dir)