def reset(self): if self._episode_recording_dir is not None and self._record_id > 0: # save actions to text file with open(join(self._episode_recording_dir, 'actions.json'), 'w') as actions_file: json.dump(self._recorded_actions, actions_file) # rename previous episode dir reward = self._recorded_episode_reward + self._recorded_episode_shaping_reward new_dir_name = self._episode_recording_dir + f'_r{reward:.2f}' os.rename(self._episode_recording_dir, new_dir_name) log.info( 'Finished recording %s (rew %.3f, shaping %.3f)', new_dir_name, reward, self._recorded_episode_shaping_reward, ) dir_name = f'ep_{self._record_id:03d}_p{self._player_id}' self._episode_recording_dir = join(self._record_to, dir_name) ensure_dir_exists(self._episode_recording_dir) self._record_id += 1 self._frame_id = 0 self._recorded_episode_reward = 0 self._recorded_episode_shaping_reward = 0 self._recorded_actions = [] return self.env.reset()
def test_simple_cmd(self): logging.disable(logging.INFO) echo_params = ParamGrid([ ('p1', [3.14, 2.71]), ('p2', ['a', 'b', 'c']), ('p3', list(np.arange(3))), ]) experiments = [ Experiment('test_echo1', 'echo', echo_params.generate_params(randomize=True)), Experiment('test_echo2', 'echo', echo_params.generate_params(randomize=False)), ] train_dir = ensure_dir_exists(join(project_tmp_dir(), 'tests')) root_dir_name = '__test_run__' rd = RunDescription(root_dir_name, experiments) args = runner_argparser().parse_args([]) args.max_parallel = 8 args.pause_between = 0 args.train_dir = train_dir run(rd, args) rd2 = RunDescription(root_dir_name, experiments, experiment_dirs_sf_format=False, experiment_arg_name='--experiment_tst', experiment_dir_arg_name='--dir') run(rd2, args) logging.disable(logging.NOTSET) shutil.rmtree(join(train_dir, root_dir_name))
def __init__(self, run_name, experiments, train_dir=None): if train_dir is None: train_dir = ensure_dir_exists(join(os.getcwd(), 'train_dir')) self.train_dir = train_dir self.run_name = run_name self.experiments = experiments self.experiment_suffix = ''
def aggregate(path, subpath, experiments, ax, legend_name, group_id): print("Started aggregation {}".format(path / subpath)) curr_dir = os.path.dirname(os.path.abspath(__file__)) cache_dir = join(curr_dir, 'cache') cache_env = join(cache_dir, subpath) if os.path.isdir(cache_env): with open(join(cache_env, f'{subpath}.pickle'), 'rb') as fobj: interpolated_keys = pickle.load(fobj) else: cache_env = ensure_dir_exists(cache_env) interpolated_keys = extract(experiments=experiments) with open(join(cache_env, f'{subpath}.pickle'), 'wb') as fobj: pickle.dump(interpolated_keys, fobj) for i, key in enumerate(interpolated_keys.keys()): plot(i, interpolated_keys[key], ax[i], legend_name, group_id)
def run(run_description, args): experiments = run_description.experiments max_parallel = args.max_parallel log.info('Starting processes with base cmds: %r', [e.cmd for e in experiments]) log.info('Max parallel processes is %d', max_parallel) log.info( 'Monitor log files using\n\n\ttail -f train_dir/%s/**/**/sf_log.txt\n\n', run_description.run_name) processes = [] processes_per_gpu = {g: [] for g in range(args.num_gpus)} experiments = run_description.generate_experiments(args.train_dir) next_experiment = next(experiments, None) def find_least_busy_gpu(): least_busy_gpu = None gpu_available_processes = 0 for gpu_id in range(args.num_gpus): available_processes = args.experiments_per_gpu - len( processes_per_gpu[gpu_id]) if available_processes > gpu_available_processes: gpu_available_processes = available_processes least_busy_gpu = gpu_id return least_busy_gpu, gpu_available_processes def can_squeeze_another_process(): if len(processes) >= max_parallel: return False if args.experiments_per_gpu > 0: least_busy_gpu, gpu_available_processes = find_least_busy_gpu() if gpu_available_processes <= 0: return False return True failed_processes = [] last_log_time = 0 log_interval = 3 # seconds while len(processes) > 0 or next_experiment is not None: while can_squeeze_another_process() and next_experiment is not None: cmd, name, root_dir, exp_env_vars = next_experiment cmd_tokens = cmd.split(' ') # workaround to make sure we're running the correct python executable from our virtual env if cmd_tokens[0].startswith('python'): cmd_tokens[0] = sys.executable log.debug('Using Python executable %s', cmd_tokens[0]) ensure_dir_exists(join(args.train_dir, root_dir)) envvars = os.environ.copy() best_gpu = None if args.experiments_per_gpu > 0: best_gpu, best_gpu_available_processes = find_least_busy_gpu() log.info( 'The least busy gpu is %d where we can run %d more processes', best_gpu, best_gpu_available_processes, ) envvars['CUDA_VISIBLE_DEVICES'] = f'{best_gpu}' log.info('Starting process %r', cmd_tokens) if exp_env_vars is not None: for key, value in exp_env_vars.items(): log.info('Adding env variable %r %r', key, value) envvars[str(key)] = str(value) process = subprocess.Popen(cmd_tokens, stdout=None, stderr=None, env=envvars) process.gpu_id = best_gpu process.proc_cmd = cmd processes.append(process) if process.gpu_id is not None: processes_per_gpu[process.gpu_id].append(process.proc_cmd) log.info('Started process %s on GPU %r', process.proc_cmd, process.gpu_id) log.info('Waiting for %d seconds before starting next process', args.pause_between) time.sleep(args.pause_between) next_experiment = next(experiments, None) remaining_processes = [] for process in processes: if process.poll() is None: remaining_processes.append(process) continue else: if process.gpu_id is not None: processes_per_gpu[process.gpu_id].remove(process.proc_cmd) log.info('Process %r finished with code %r', process.proc_cmd, process.returncode) if process.returncode != 0: failed_processes.append( (process.proc_cmd, process.pid, process.returncode)) log.error('WARNING: RETURN CODE IS %r', process.returncode) processes = remaining_processes if time.time() - last_log_time > log_interval: if failed_processes: log.error( 'Failed processes: %s', ', '.join([ f'PID: {p[1]} code: {p[2]}' for p in failed_processes ])) last_log_time = time.time() time.sleep(0.1) log.info('Done!') return 0
def __init__(self, cache_dir, experiment_dir, all_levels_for_experiment, policy_idx): self.cache_dir = cache_dir self.experiment_dir = experiment_dir self.policy_idx = policy_idx self.all_seeds = dict() self.available_seeds = dict() self.used_seeds = dict() self.num_seeds_used_in_current_run = dict() self.locks = dict() for lvl in all_levels_for_experiment: self.all_seeds[lvl] = [] self.available_seeds[lvl] = [] self.num_seeds_used_in_current_run[lvl] = multiprocessing.RawValue( ctypes.c_int32, 0) self.locks[lvl] = multiprocessing.Lock() log.debug('Reading the DMLab level cache...') cache_dir = ensure_dir_exists(cache_dir) lvl_seed_files = Path(cache_dir).rglob(f'*.{LEVEL_SEEDS_FILE_EXT}') for lvl_seed_file in lvl_seed_files: lvl_seed_file = str(lvl_seed_file) level = filename_to_level(os.path.relpath(lvl_seed_file, cache_dir)) self.all_seeds[level] = read_seeds_file(lvl_seed_file, has_keys=True) self.all_seeds[level] = list(set( self.all_seeds[level])) # leave only unique seeds log.debug('Level %s has %d total seeds available', level, len(self.all_seeds[level])) log.debug('Updating level cache for the current experiment...') used_lvl_seeds_dir = self.get_used_seeds_dir() used_seeds_files = Path(used_lvl_seeds_dir).rglob( f'*.{LEVEL_SEEDS_FILE_EXT}') self.used_seeds = dict() for used_seeds_file in used_seeds_files: used_seeds_file = str(used_seeds_file) level = filename_to_level( os.path.relpath(used_seeds_file, used_lvl_seeds_dir)) self.used_seeds[level] = read_seeds_file(used_seeds_file, has_keys=False) log.debug('%d seeds already used in this experiment for level %s', len(self.used_seeds[level]), level) self.used_seeds[level] = set(self.used_seeds[level]) for lvl in all_levels_for_experiment: lvl_seeds = self.all_seeds.get(lvl, []) lvl_used_seeds = self.used_seeds.get(lvl, []) lvl_remaining_seeds = set(lvl_seeds) - set(lvl_used_seeds) self.available_seeds[lvl] = list(lvl_remaining_seeds) same_levels_for_population = False if same_levels_for_population: # shuffle with fixed seed so agents in population get the same levels random.Random(42).shuffle(self.available_seeds[lvl]) else: random.shuffle(self.available_seeds[lvl]) log.debug('Env %s has %d remaining unused seeds', lvl, len(self.available_seeds[lvl])) log.debug('Done initializing global DMLab level cache!')
def get_used_seeds_dir(self): return ensure_dir_exists( join(self.experiment_dir, f'dmlab_used_lvl_seeds_p{self.policy_idx:02d}'))
def __init__(self, cfg): super().__init__(cfg) # we should not use CUDA in the main thread, only on the workers set_global_cuda_envvars(cfg) tmp_env = make_env_func(self.cfg, env_config=None) self.obs_space = tmp_env.observation_space self.action_space = tmp_env.action_space self.num_agents = tmp_env.num_agents self.reward_shaping_scheme = None if self.cfg.with_pbt: self.reward_shaping_scheme = get_default_reward_shaping(tmp_env) tmp_env.close() # shared memory allocation self.traj_buffers = SharedBuffers(self.cfg, self.num_agents, self.obs_space, self.action_space) self.actor_workers = None self.report_queue = MpQueue(40 * 1000 * 1000) self.policy_workers = dict() self.policy_queues = dict() self.learner_workers = dict() self.workers_by_handle = None self.policy_inputs = [[] for _ in range(self.cfg.num_policies)] self.policy_outputs = dict() for worker_idx in range(self.cfg.num_workers): for split_idx in range(self.cfg.worker_num_splits): self.policy_outputs[(worker_idx, split_idx)] = dict() self.policy_avg_stats = dict() self.policy_lag = [dict() for _ in range(self.cfg.num_policies)] self.last_timing = dict() self.env_steps = dict() self.samples_collected = [0 for _ in range(self.cfg.num_policies)] self.total_env_steps_since_resume = 0 # currently this applies only to the current run, not experiment as a whole # to change this behavior we'd need to save the state of the main loop to a filesystem self.total_train_seconds = 0 self.last_report = time.time() self.last_experiment_summaries = 0 self.report_interval = 5.0 # sec self.experiment_summaries_interval = self.cfg.experiment_summaries_interval # sec self.avg_stats_intervals = (2, 12, 60 ) # 10 seconds, 1 minute, 5 minutes self.fps_stats = deque([], maxlen=max(self.avg_stats_intervals)) self.throughput_stats = [ deque([], maxlen=5) for _ in range(self.cfg.num_policies) ] self.avg_stats = dict() self.stats = dict() # regular (non-averaged) stats init_wandb(self.cfg) self.writers = dict() writer_keys = list(range(self.cfg.num_policies)) for key in writer_keys: summary_dir = join(summaries_dir(experiment_dir(cfg=self.cfg)), str(key)) summary_dir = ensure_dir_exists(summary_dir) self.writers[key] = SummaryWriter(summary_dir, flush_secs=20) self.pbt = PopulationBasedTraining(self.cfg, self.reward_shaping_scheme, self.writers)
def __init__( self, task_id, level, action_repeat, res_w, res_h, benchmark_mode, renderer, dataset_path, with_instructions, extended_action_set, use_level_cache, level_cache_path, gpu_index, extra_cfg=None, ): self.width = res_w self.height = res_h # self._main_observation = 'DEBUG.CAMERA_INTERLEAVED.PLAYER_VIEW_NO_RETICLE' self.main_observation = 'RGB_INTERLEAVED' self.instructions_observation = DMLAB_INSTRUCTIONS self.with_instructions = with_instructions and not benchmark_mode self.action_repeat = action_repeat self.random_state = None self.task_id = task_id self.level = level self.level_name = dmlab_level_to_level_name(self.level) # the policy index which currently acts in the environment self.curr_policy_idx = 0 self.curr_cache = dmlab_level_cache.DMLAB_GLOBAL_LEVEL_CACHE[ self.curr_policy_idx] self.instructions = np.zeros([DMLAB_MAX_INSTRUCTION_LEN], dtype=np.int32) observation_format = [self.main_observation] if self.with_instructions: observation_format += [self.instructions_observation] config = { 'width': self.width, 'height': self.height, 'gpuDeviceIndex': str(gpu_index), 'datasetPath': dataset_path, } if extra_cfg is not None: config.update(extra_cfg) config = {k: str(v) for k, v in config.items()} self.use_level_cache = use_level_cache self.level_cache_path = ensure_dir_exists(level_cache_path) env_level_cache = self if use_level_cache else None self.env_uses_level_cache = False # will be set to True when this env instance queries the cache self.last_reset_seed = None if env_level_cache is not None: if not isinstance(self.curr_cache, dmlab_level_cache.DmlabLevelCacheGlobal): raise Exception( 'DMLab global level cache object is not initialized! Make sure to call' 'dmlab_ensure_global_cache_initialized() in the main thread before you fork any child processes' 'or create any DMLab envs') self.dmlab = deepmind_lab.Lab( level, observation_format, config=config, renderer=renderer, level_cache=env_level_cache, ) self.action_set = EXTENDED_ACTION_SET if extended_action_set else ACTION_SET self.action_list = np.array( self.action_set, dtype=np.intc) # DMLAB requires intc type for actions self.last_observation = None self.render_scale = 5 self.render_fps = 30 self.last_frame = time.time() self.action_space = gym.spaces.Discrete(len(self.action_set)) self.observation_space = gym.spaces.Dict( obs=gym.spaces.Box(low=0, high=255, shape=(self.height, self.width, 3), dtype=np.uint8)) if self.with_instructions: self.observation_space.spaces[ self.instructions_observation] = gym.spaces.Box( low=0, high=DMLAB_VOCABULARY_SIZE, shape=[DMLAB_MAX_INSTRUCTION_LEN], dtype=np.int32, ) self.benchmark_mode = benchmark_mode if self.benchmark_mode: log.warning( 'DmLab benchmark mode is true! Use this only for testing, not for actual training runs!' ) self.seed()