def main(): args = parse_args() try: # assuming we're given the full name of the module run_module = importlib.import_module(f'{args.run}') except ImportError: try: run_module = importlib.import_module( f'sample_factory.runner.runs.{args.run}') except ImportError: log.error('Could not import the run module') return ExperimentStatus.FAILURE run_description = run_module.RUN_DESCRIPTION run_description.experiment_suffix = args.experiment_suffix if args.runner == 'processes': from sample_factory.runner.run_processes import run run(run_description, args) elif args.runner == 'slurm': from sample_factory.runner.run_slurm import run_slurm run_slurm(run_description, args) elif args.runner == 'ngc': from sample_factory.runner.run_ngc import run_ngc run_ngc(run_description, args) return ExperimentStatus.SUCCESS
def register_additional_doom_env(doom_spec): try: spec = doom_env_by_name(doom_spec.name) log.error('Doom env spec %s already exists', spec.name) return except RuntimeError: pass log.debug('Registering Doom environment %s...', doom_spec.name) DOOM_ENVS.append(doom_spec)
def wrapper(*args, **kwargs): for i in range(num_attempts): try: return func(*args, **kwargs) except exception_class as e: if i == num_attempts - 1: raise else: log.error('Failed with error %r, trying again', e) sleep(sleep_time)
def _write_dict_summaries(dictionary, writer, name, env_steps): for d, key, value in iterate_recursively(dictionary): if isinstance(value, bool): value = int(value) if isinstance(value, (int, float)): writer.add_scalar(f'zz_pbt/{name}_{key}', value, env_steps) elif isinstance(value, (tuple, list)): for i, tuple_value in enumerate(value): writer.add_scalar(f'zz_pbt/{name}_{key}_{i}', tuple_value, env_steps) else: log.error('Unsupported type in pbt summaries %r', type(value))
def _set_env_attr(self, env, player_id, attr_chain, value): """Allows us to set an arbitrary attribute of the environment, e.g. attr_chain can be unwrapped.foo.bar""" assert player_id == self.player_id attrs = attr_chain.split('.') curr_attr = env try: for attr_name in attrs[:-1]: curr_attr = getattr(curr_attr, attr_name) except AttributeError: log.error('Env does not have an attribute %s', attr_chain) attr_to_set = attrs[-1] setattr(curr_attr, attr_to_set, value)
def _selected_weapon_rewards(self, selected_weapon, selected_weapon_ammo, deltas): # we must keep the weapon ready for a certain number of frames to get rewards unholstered = len(self.selected_weapon) > 4 and all(sw == selected_weapon for sw in self.selected_weapon) reward = 0.0 if selected_weapon_ammo > 0 and unholstered: try: reward = self.reward_shaping_scheme['selected_weapon'][f'SELECTED{weapon}'] except KeyError: log.error('%r', self.reward_shaping_scheme) log.error('%r', selected_weapon) weapon_key = f'weapon{selected_weapon}' deltas.append((weapon_key, reward)) self.reward_structure[weapon_key] = self.reward_structure.get(weapon_key, 0.0) + reward return reward
def get_gpus_without_triggering_pytorch_cuda_initialization(envvars=None): if envvars is None: envvars = os.environ import subprocess out = subprocess.run([sys.executable, '-m', 'sample_factory.utils.get_available_gpus'], capture_output=True, env=envvars) text_output = out.stdout.decode() err_output = out.stderr.decode() returncode = out.returncode from sample_factory.utils.utils import log if returncode: log.error( 'Querying available GPUs... return code %d, error: %s, stdout: %s', returncode, err_output, text_output, ) log.debug('Queried available GPUs: %s', text_output) return text_output
def reset(self): self._ensure_initialized() if self.record_to is not None and not self.is_multiplayer: # does not work in multiplayer (uses different mechanism) if not os.path.exists(self.record_to): os.makedirs(self.record_to) demo_path = self.demo_path(self._num_episodes) log.warning('Recording episode demo to %s', demo_path) self.game.new_episode(demo_path) else: if self._num_episodes > 0: # no demo recording (default) self.game.new_episode() self.state = self.game.get_state() img = None try: img = self.state.screen_buffer except AttributeError: # sometimes Doom does not return screen buffer at all??? Rare bug pass if img is None: log.error( 'Game returned None screen buffer! This is not supposed to happen!' ) img = self._black_screen() # Swap current and previous histogram if self.current_histogram is not None and self.previous_histogram is not None: swap = self.current_histogram self.current_histogram = self.previous_histogram self.previous_histogram = swap self.current_histogram.fill(0) self._actions_flattened = None self._last_episode_info = copy.deepcopy(self._prev_info) self._prev_info = None self._num_episodes += 1 return np.transpose(img, (1, 2, 0))
def wrapper(*args, **kwargs): for i in range(num_attempts): try: return func(*args, **kwargs) except exception_class as e: # This accesses the self instance variable multiagent_wrapper_obj = args[0] multiagent_wrapper_obj.initialized = False multiagent_wrapper_obj.close() # This is done to reset if it is in the step function if should_reset: multiagent_wrapper_obj.reset() if i == num_attempts - 1: raise else: log.error('Failed with error %r, trying again', e) sleep(sleep_time)
def init_wandb(cfg): """ Must call initialization of Wandb before summary writer is initialized, otherwise sync_tensorboard does not work. """ if not cfg.with_wandb: log.debug('Weights and Biases integration disabled') return if 'wandb_unique_id' not in cfg: # if we're going to restart the experiment, this will be saved to a json file cfg.wandb_unique_id = f'{cfg.experiment}_{datetime.now().strftime("%Y%m%d_%H%M%S_%f")}' wandb_unique_id = cfg.wandb_unique_id wandb_group = cfg.env if cfg.wandb_group is None else cfg.wandb_group log.debug( 'Weights and Biases integration enabled. Project: %s, user: %s, group: %s, unique_id: %s', cfg.wandb_project, cfg.wandb_user, cfg.wandb_group, wandb_unique_id, ) import wandb # this can fail occasionally, so we try a couple more times @retry(3, exceptions=(Exception,)) def init_wandb_func(): wandb.init( project=cfg.wandb_project, entity=cfg.wandb_user, sync_tensorboard=True, id=wandb_unique_id, name=wandb_unique_id, group=wandb_group, job_type=cfg.wandb_job_type, tags=cfg.wandb_tags, resume=True, settings=wandb.Settings(start_method='fork'), ) log.debug('Initializing WandB...') try: init_wandb_func() except Exception as exc: log.error(f'Could not initialize WandB! {exc}') wandb.config.update(cfg, allow_val_change=True)
def read_seeds_file(filename, has_keys): seeds = [] with open(filename, 'r') as seed_file: lines = seed_file.readlines() for line in lines: try: if has_keys: seed, cache_key = line.split(' ') else: seed = line seed = int(seed) seeds.append(seed) except Exception: log.error( 'Could not read seed value from the file! File potentially corrupted' ) log.exception('Exception when reading seeds file') return seeds
def run(self): for p in self.processes: time.sleep(0.3) p.start() finished_reset = np.zeros([self.cfg.num_workers], dtype=np.bool) while not all(finished_reset): try: msg = self.report_queue.get(timeout=0.1) if 'finished_reset' in msg: finished_reset[msg['proc_idx']] = True log.debug('Process %d finished reset! Status %r', msg['proc_idx'], finished_reset) except Empty: pass log.debug('All workers finished reset!') time.sleep(2) self.start_event.set() start = time.time() env_frames = 0 last_process_report = [time.time() for _ in self.processes] while not self.terminate.value: try: try: msgs = self.report_queue.get_many( timeout=self.report_every_sec * 1.5) for msg in msgs: last_process_report[msg['proc_idx']] = time.time() if 'crash' in msg: self.terminate.value = True log.error( 'Terminating due to process %d crashing...', msg['proc_idx']) break env_frames += msg['env_frames'] if env_frames >= self.cfg.sample_env_frames: log.warning('Desired number of frames reached') self.terminate.value = True if time.time() - start > self.cfg.timeout_seconds: log.warning('Terminated by timer') self.terminate.value = True except Empty: pass except KeyboardInterrupt: self.terminate.value = True log.error('KeyboardInterrupt in main loop! Terminating...') break if time.time() - self.last_report > self.report_every_sec: self.report(env_frames) for proc_idx, p in enumerate(self.processes): delay = time.time() - last_process_report[proc_idx] if delay > 600: # killing the whole script is the best way to know that some of the processes froze log.error( 'Process %d had not responded in %.1f s!!! Terminating...', proc_idx, delay) self.terminate.value = True for p in self.processes: if not p.is_alive(): self.terminate.value = True log.error('Process %r died! terminating...', p) total_time = time.time() - start log.info('Collected %d frames in %.1f s, avg FPS: %.1f', env_frames, total_time, env_frames / total_time) log.debug('Done sampling...')
def sample(self, proc_idx): # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg signal.signal(signal.SIGINT, signal.SIG_IGN) if self.cfg.sampler_worker_gpus: set_gpus_for_process( proc_idx, num_gpus_per_process=1, process_type='sampler_proc', gpu_mask=self.cfg.sampler_worker_gpus, ) timing = Timing() from threadpoolctl import threadpool_limits with threadpool_limits(limits=1, user_api=None): if self.cfg.set_workers_cpu_affinity: set_process_cpu_affinity(proc_idx, self.cfg.num_workers) initial_cpu_affinity = psutil.Process().cpu_affinity( ) if platform != 'darwin' else None psutil.Process().nice(10) with timing.timeit('env_init'): envs = [] env_key = ['env' for _ in range(self.cfg.num_envs_per_worker)] for env_idx in range(self.cfg.num_envs_per_worker): global_env_id = proc_idx * self.cfg.num_envs_per_worker + env_idx env_config = AttrDict(worker_index=proc_idx, vector_index=env_idx, env_id=global_env_id) env = make_env_func(cfg=self.cfg, env_config=env_config) log.debug( 'CPU affinity after create_env: %r', psutil.Process().cpu_affinity() if platform != 'darwin' else 'MacOS - None') env.seed(global_env_id) envs.append(env) # this is to track the performance for individual DMLab levels if hasattr(env.unwrapped, 'level_name'): env_key[env_idx] = env.unwrapped.level_name episode_length = [0 for _ in envs] episode_lengths = [deque([], maxlen=20) for _ in envs] # sample a lot of random actions once, otherwise it is pretty slow in Python total_random_actions = 500 actions = [[ env.action_space.sample() for _ in range(env.num_agents) ] for _ in range(total_random_actions)] action_i = 0 try: with timing.timeit('first_reset'): for env_idx, env in enumerate(envs): env.reset() log.info('Process %d finished resetting %d/%d envs', proc_idx, env_idx + 1, len(envs)) self.report_queue.put( dict(proc_idx=proc_idx, finished_reset=True)) self.start_event.wait() with timing.timeit('work'): last_report = last_report_frames = total_env_frames = 0 while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker: for env_idx, env in enumerate(envs): with timing.add_time(f'{env_key[env_idx]}.step'): obs, rewards, dones, infos = env.step( actions[action_i]) action_i = (action_i + 1) % total_random_actions num_frames = sum( [info.get('num_frames', 1) for info in infos]) total_env_frames += num_frames episode_length[env_idx] += num_frames if all(dones): episode_lengths[env_idx].append( episode_length[env_idx] / env.num_agents) episode_length[env_idx] = 0 with timing.add_time('report'): now = time.time() if now - last_report > self.report_every_sec: last_report = now frames_since_last_report = total_env_frames - last_report_frames last_report_frames = total_env_frames self.report_queue.put( dict(proc_idx=proc_idx, env_frames=frames_since_last_report)) if proc_idx == 0: log.debug('Memory usage: %.4f Mb', memory_consumption_mb()) # Extra check to make sure cpu affinity is preserved throughout the execution. # I observed weird effect when some environments tried to alter affinity of the current process, leading # to decreased performance. # This can be caused by some interactions between deep learning libs, OpenCV, MKL, OpenMP, etc. # At least user should know about it if this is happening. cpu_affinity = psutil.Process().cpu_affinity( ) if platform != 'darwin' else None assert initial_cpu_affinity == cpu_affinity, \ f'Worker CPU affinity was changed from {initial_cpu_affinity} to {cpu_affinity}!' \ f'This can significantly affect performance!' except: log.exception('Unknown exception') log.error('Unknown exception in worker %d, terminating...', proc_idx) self.report_queue.put(dict(proc_idx=proc_idx, crash=True)) time.sleep(proc_idx * 0.01 + 0.01) log.info('Process %d finished sampling. Timing: %s', proc_idx, timing) for env_idx, env in enumerate(envs): if len(episode_lengths[env_idx]) > 0: log.warning('Level %s avg episode len %d', env_key[env_idx], np.mean(episode_lengths[env_idx])) for env in envs: env.close()
def init_subset(self, indices, actor_queues): """ Initialize a subset of actor workers (rollout workers) and wait until the first reset() is completed for all envs on these workers. This function will retry if the worker process crashes during the initial reset. :param indices: indices of actor workers to initialize :param actor_queues: task queues corresponding to these workers :return: initialized workers """ reset_timelimit_seconds = self.cfg.reset_timeout_seconds # fail worker if not a single env was reset in that time workers = dict() last_env_initialized = dict() for i in indices: w = self.create_actor_worker(i, actor_queues[i]) w.init() w.request_reset() workers[i] = w last_env_initialized[i] = time.time() total_num_envs = self.cfg.num_workers * self.cfg.num_envs_per_worker envs_initialized = [0] * self.cfg.num_workers workers_finished = set() while len(workers_finished) < len(workers): failed_worker = -1 try: report = self.report_queue.get(timeout=1.0) if 'initialized_env' in report: worker_idx, split_idx, env_i = report['initialized_env'] last_env_initialized[worker_idx] = time.time() envs_initialized[worker_idx] += 1 log.debug( 'Progress for %d workers: %d/%d envs initialized...', len(indices), sum(envs_initialized), total_num_envs, ) elif 'finished_reset' in report: workers_finished.add(report['finished_reset']) elif 'critical_error' in report: failed_worker = report['critical_error'] except Empty: pass for worker_idx, w in workers.items(): if worker_idx in workers_finished: continue time_passed = time.time() - last_env_initialized[worker_idx] timeout = time_passed > reset_timelimit_seconds if timeout or failed_worker == worker_idx or not w.process.is_alive( ): envs_initialized[worker_idx] = 0 log.error('Worker %d is stuck or failed (%.3f). Reset!', w.worker_idx, time_passed) log.debug('Status: %r', w.process.is_alive()) stuck_worker = w stuck_worker.process.kill() new_worker = self.create_actor_worker( worker_idx, actor_queues[worker_idx]) new_worker.init() new_worker.request_reset() last_env_initialized[worker_idx] = time.time() workers[worker_idx] = new_worker del stuck_worker return workers.values()
def sample(self, proc_idx): # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg signal.signal(signal.SIGINT, signal.SIG_IGN) timing = Timing() psutil.Process().nice(10) num_envs = len(DMLAB30_LEVELS_THAT_USE_LEVEL_CACHE) assert self.cfg.num_workers % num_envs == 0, f'should have an integer number of workers per env, e.g. {1 * num_envs}, {2 * num_envs}, etc...' assert self.cfg.num_envs_per_worker == 1, 'use populate_cache with 1 env per worker' with timing.timeit('env_init'): env_key = 'env' env_desired_num_levels = 0 global_env_id = proc_idx * self.cfg.num_envs_per_worker env_config = AttrDict(worker_index=proc_idx, vector_index=0, env_id=global_env_id) env = create_env(self.cfg.env, cfg=self.cfg, env_config=env_config) env.seed(global_env_id) # this is to track the performance for individual DMLab levels if hasattr(env.unwrapped, 'level_name'): env_key = env.unwrapped.level_name env_level = env.unwrapped.level approx_num_episodes_per_1b_frames = DMLAB30_APPROX_NUM_EPISODES_PER_BILLION_FRAMES[env_key] num_billions = DESIRED_TRAINING_LENGTH / int(1e9) num_workers_for_env = self.cfg.num_workers // num_envs env_desired_num_levels = int((approx_num_episodes_per_1b_frames * num_billions) / num_workers_for_env) env_num_levels_generated = len(dmlab_level_cache.DMLAB_GLOBAL_LEVEL_CACHE[0].all_seeds[env_level]) // num_workers_for_env log.warning('Worker %d (env %s) generated %d/%d levels!', proc_idx, env_key, env_num_levels_generated, env_desired_num_levels) time.sleep(4) env.reset() env_uses_level_cache = env.unwrapped.env_uses_level_cache self.report_queue.put(dict(proc_idx=proc_idx, finished_reset=True)) self.start_event.wait() try: with timing.timeit('work'): last_report = last_report_frames = total_env_frames = 0 while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker: action = env.action_space.sample() with timing.add_time(f'{env_key}.step'): env.step(action) total_env_frames += 1 with timing.add_time(f'{env_key}.reset'): env.reset() env_num_levels_generated += 1 log.debug('Env %s done %d/%d resets', env_key, env_num_levels_generated, env_desired_num_levels) if env_num_levels_generated >= env_desired_num_levels: log.debug('%s finished %d/%d resets, sleeping...', env_key, env_num_levels_generated, env_desired_num_levels) time.sleep(30) # free up CPU time for other envs # if env does not use level cache, there is no need to run it # let other workers proceed if not env_uses_level_cache: log.debug('Env %s does not require cache, sleeping...', env_key) time.sleep(200) with timing.add_time('report'): now = time.time() if now - last_report > self.report_every_sec: last_report = now frames_since_last_report = total_env_frames - last_report_frames last_report_frames = total_env_frames self.report_queue.put(dict(proc_idx=proc_idx, env_frames=frames_since_last_report)) if get_free_disk_space_mb(self.cfg) < 3 * 1024: log.error('Not enough disk space! %d', get_free_disk_space_mb(self.cfg)) time.sleep(200) except: log.exception('Unknown exception') log.error('Unknown exception in worker %d, terminating...', proc_idx) self.report_queue.put(dict(proc_idx=proc_idx, crash=True)) time.sleep(proc_idx * 0.1 + 0.1) log.info('Process %d finished sampling. Timing: %s', proc_idx, timing) env.close()
def run(run_description, args): experiments = run_description.experiments max_parallel = args.max_parallel log.info('Starting processes with base cmds: %r', [e.cmd for e in experiments]) log.info('Max parallel processes is %d', max_parallel) log.info( 'Monitor log files using\n\n\ttail -f train_dir/%s/**/**/sf_log.txt\n\n', run_description.run_name) processes = [] processes_per_gpu = {g: [] for g in range(args.num_gpus)} experiments = run_description.generate_experiments(args.train_dir) next_experiment = next(experiments, None) def find_least_busy_gpu(): least_busy_gpu = None gpu_available_processes = 0 for gpu_id in range(args.num_gpus): available_processes = args.experiments_per_gpu - len( processes_per_gpu[gpu_id]) if available_processes > gpu_available_processes: gpu_available_processes = available_processes least_busy_gpu = gpu_id return least_busy_gpu, gpu_available_processes def can_squeeze_another_process(): if len(processes) >= max_parallel: return False if args.experiments_per_gpu > 0: least_busy_gpu, gpu_available_processes = find_least_busy_gpu() if gpu_available_processes <= 0: return False return True failed_processes = [] last_log_time = 0 log_interval = 3 # seconds while len(processes) > 0 or next_experiment is not None: while can_squeeze_another_process() and next_experiment is not None: cmd, name, root_dir, exp_env_vars = next_experiment cmd_tokens = cmd.split(' ') # workaround to make sure we're running the correct python executable from our virtual env if cmd_tokens[0].startswith('python'): cmd_tokens[0] = sys.executable log.debug('Using Python executable %s', cmd_tokens[0]) ensure_dir_exists(join(args.train_dir, root_dir)) envvars = os.environ.copy() best_gpu = None if args.experiments_per_gpu > 0: best_gpu, best_gpu_available_processes = find_least_busy_gpu() log.info( 'The least busy gpu is %d where we can run %d more processes', best_gpu, best_gpu_available_processes, ) envvars['CUDA_VISIBLE_DEVICES'] = f'{best_gpu}' log.info('Starting process %r', cmd_tokens) if exp_env_vars is not None: for key, value in exp_env_vars.items(): log.info('Adding env variable %r %r', key, value) envvars[str(key)] = str(value) process = subprocess.Popen(cmd_tokens, stdout=None, stderr=None, env=envvars) process.gpu_id = best_gpu process.proc_cmd = cmd processes.append(process) if process.gpu_id is not None: processes_per_gpu[process.gpu_id].append(process.proc_cmd) log.info('Started process %s on GPU %r', process.proc_cmd, process.gpu_id) log.info('Waiting for %d seconds before starting next process', args.pause_between) time.sleep(args.pause_between) next_experiment = next(experiments, None) remaining_processes = [] for process in processes: if process.poll() is None: remaining_processes.append(process) continue else: if process.gpu_id is not None: processes_per_gpu[process.gpu_id].remove(process.proc_cmd) log.info('Process %r finished with code %r', process.proc_cmd, process.returncode) if process.returncode != 0: failed_processes.append( (process.proc_cmd, process.pid, process.returncode)) log.error('WARNING: RETURN CODE IS %r', process.returncode) processes = remaining_processes if time.time() - last_log_time > log_interval: if failed_processes: log.error( 'Failed processes: %s', ', '.join([ f'PID: {p[1]} code: {p[2]}' for p in failed_processes ])) last_log_time = time.time() time.sleep(0.1) log.info('Done!') return 0
def run_slurm(run_description, args): workdir = args.slurm_workdir pause_between = args.pause_between experiments = run_description.experiments log.info('Starting processes with base cmds: %r', [e.cmd for e in experiments]) if not os.path.exists(workdir): log.info('Creating %s...', workdir) os.makedirs(workdir) if args.slurm_sbatch_template is not None: with open(args.slurm_sbatch_template, 'r') as template_file: sbatch_template = template_file.read() else: sbatch_template = SBATCH_TEMPLATE_DEFAULT log.info('Sbatch template: %s', sbatch_template) experiments = run_description.generate_experiments(args.train_dir) sbatch_files = [] for experiment in experiments: cmd, name, *_ = experiment sbatch_fname = f'sbatch_{name}.sh' sbatch_fname = join(workdir, sbatch_fname) file_content = sbatch_template + '\n' + cmd + '\n\necho "Done!!!"' with open(sbatch_fname, 'w') as sbatch_f: sbatch_f.write(file_content) sbatch_files.append(sbatch_fname) partition = '' if args.slurm_partition is not None: partition = f'-p {args.slurm_partition} ' job_ids = [] idx = 0 for sbatch_file in sbatch_files: idx += 1 sbatch_fname = os.path.basename(sbatch_file) num_cpus = args.slurm_cpus_per_gpu * args.slurm_gpus_per_job cmd = f'sbatch {partition}--gres=gpu:{args.slurm_gpus_per_job} -c {num_cpus} --parsable --output {workdir}/{sbatch_fname}-slurm-%j.out {sbatch_file}' log.info('Executing %s...', cmd) if args.slurm_print_only: output = idx else: cmd_tokens = cmd.split() process = Popen(cmd_tokens, stdout=PIPE) output, err = process.communicate() exit_code = process.wait() log.info('Output: %s, err: %s, exit code: %r', output, err, exit_code) if exit_code != 0: log.error('sbatch process failed!') time.sleep(5) job_id = int(output) job_ids.append(str(job_id)) time.sleep(pause_between) tail_cmd = f'tail -f {workdir}/*.out' log.info('Monitor log files using\n\n\t %s \n\n', tail_cmd) scancel_cmd = f'scancel {" ".join(job_ids)}' log.info('Jobs queued: %r', job_ids) log.info('Use this command to cancel your jobs: \n\t %s \n', scancel_cmd) with open(join(workdir, 'scancel.sh'), 'w') as fobj: fobj.write(scancel_cmd) log.info('Done!') return 0