def test_example(self): experiment_name = 'test_example' register_custom_components() # test training for a few thousand frames cfg = custom_parse_args(argv=['--algo=APPO', '--env=my_custom_env_v1', f'--experiment={experiment_name}']) cfg.num_workers = 4 cfg.train_for_env_steps = 100000 cfg.save_every_sec = 1 cfg.decorrelate_experience_max_seconds = 0 cfg.seed = 0 cfg.device = 'cpu' status = run_algorithm(cfg) self.assertEqual(status, ExperimentStatus.SUCCESS) # then test the evaluation of the saved model cfg = custom_parse_args( argv=['--algo=APPO', '--env=my_custom_env_v1', f'--experiment={experiment_name}'], evaluation=True, ) cfg.device = 'cpu' status, avg_reward = enjoy(cfg, max_num_frames=1000) directory = experiment_dir(cfg=cfg) self.assertTrue(isdir(directory)) shutil.rmtree(directory, ignore_errors=True) # self.assertFalse(isdir(directory)) self.assertEqual(status, ExperimentStatus.SUCCESS) # not sure if we should check it here, it's optional # maybe a longer test where it actually has a chance to converge self.assertGreater(avg_reward, 60)
def ensure_initialized(cfg, env_name): global DMLAB_INITIALIZED if DMLAB_INITIALIZED: return dmlab_register_models() if env_name == 'dmlab_30': # extra functions to calculate human-normalized score etc. EXTRA_EPISODIC_STATS_PROCESSING.append( dmlab_extra_episodic_stats_processing) EXTRA_PER_POLICY_SUMMARIES.append(dmlab_extra_summaries) num_policies = cfg.num_policies if hasattr(cfg, 'num_policies') else 1 all_levels = list_all_levels_for_experiment(env_name) level_cache_dir = cfg.dmlab_level_cache_path dmlab_ensure_global_cache_initialized(experiment_dir(cfg=cfg), all_levels, num_policies, level_cache_dir) DMLAB_INITIALIZED = True
def policy_reward_shaping_file(cfg, policy_id): return join(experiment_dir(cfg=cfg), f'policy_{policy_id:02d}_reward_shaping.json')
def policy_cfg_file(cfg, policy_id): return join(experiment_dir(cfg=cfg), f'policy_{policy_id:02d}_cfg.json')
def initialize(self): self._save_cfg() save_git_diff(experiment_dir(cfg=self.cfg)) init_file_logger(experiment_dir(self.cfg))
def __init__(self, cfg): super().__init__(cfg) # we should not use CUDA in the main thread, only on the workers set_global_cuda_envvars(cfg) tmp_env = make_env_func(self.cfg, env_config=None) self.obs_space = tmp_env.observation_space self.action_space = tmp_env.action_space self.num_agents = tmp_env.num_agents self.reward_shaping_scheme = None if self.cfg.with_pbt: self.reward_shaping_scheme = get_default_reward_shaping(tmp_env) tmp_env.close() # shared memory allocation self.traj_buffers = SharedBuffers(self.cfg, self.num_agents, self.obs_space, self.action_space) self.actor_workers = None self.report_queue = MpQueue(40 * 1000 * 1000) self.policy_workers = dict() self.policy_queues = dict() self.learner_workers = dict() self.workers_by_handle = None self.policy_inputs = [[] for _ in range(self.cfg.num_policies)] self.policy_outputs = dict() for worker_idx in range(self.cfg.num_workers): for split_idx in range(self.cfg.worker_num_splits): self.policy_outputs[(worker_idx, split_idx)] = dict() self.policy_avg_stats = dict() self.policy_lag = [dict() for _ in range(self.cfg.num_policies)] self.last_timing = dict() self.env_steps = dict() self.samples_collected = [0 for _ in range(self.cfg.num_policies)] self.total_env_steps_since_resume = 0 # currently this applies only to the current run, not experiment as a whole # to change this behavior we'd need to save the state of the main loop to a filesystem self.total_train_seconds = 0 self.last_report = time.time() self.last_experiment_summaries = 0 self.report_interval = 5.0 # sec self.experiment_summaries_interval = self.cfg.experiment_summaries_interval # sec self.avg_stats_intervals = (2, 12, 60 ) # 10 seconds, 1 minute, 5 minutes self.fps_stats = deque([], maxlen=max(self.avg_stats_intervals)) self.throughput_stats = [ deque([], maxlen=5) for _ in range(self.cfg.num_policies) ] self.avg_stats = dict() self.stats = dict() # regular (non-averaged) stats init_wandb(self.cfg) self.writers = dict() writer_keys = list(range(self.cfg.num_policies)) for key in writer_keys: summary_dir = join(summaries_dir(experiment_dir(cfg=self.cfg)), str(key)) summary_dir = ensure_dir_exists(summary_dir) self.writers[key] = SummaryWriter(summary_dir, flush_secs=20) self.pbt = PopulationBasedTraining(self.cfg, self.reward_shaping_scheme, self.writers)