def retro_eval(predir, session_index=None): ''' Method to run eval sessions by scanning a predir for ckpt files. Used to rerun failed eval sessions. @example yarn retro_eval data/reinforce_cartpole_2018_01_22_211751 ''' logger.info(f'Retro-evaluate sessions from predir {predir}') # collect all unique prepaths first prepaths = [] s_filter = '' if session_index is None else f'_s{session_index}_' for filename in os.listdir(predir): if filename.endswith('model.pth') and s_filter in filename: res = re.search('.+epi(\d+)-totalt(\d+)', filename) if res is not None: prepath = f'{predir}/{res[0]}' if prepath not in prepaths: prepaths.append(prepath) if ps.is_empty(prepaths): return logger.info(f'Starting retro eval') np.random.shuffle( prepaths) # so that CUDA_ID by trial/session index is spread out rand_spec = util.prepath_to_spec( prepaths[0]) # get any prepath, read its max session max_session = rand_spec['meta']['max_session'] util.parallelize_fn(run_wait_eval, prepaths, num_cpus=max_session)
def run(self): num_cpus = ps.get(self.spec['meta'], 'resources.num_cpus', util.NUM_CPUS) info_spaces = [] for _s in range(self.spec['meta']['max_session']): self.info_space.tick('session') info_spaces.append(deepcopy(self.info_space)) if util.get_lab_mode() == 'train' and len(info_spaces) > 1: session_datas = util.parallelize_fn(self.init_session_and_run, info_spaces, num_cpus) else: # dont parallelize when debugging to allow render session_datas = [self.init_session_and_run(info_space) for info_space in info_spaces] self.session_data_dict = {data.index[0]: data for data in session_datas} self.data = analysis.analyze_trial(self) self.close() return self.data
def run_sessions(self): logger.info('Running sessions') info_spaces = [] for _s in range(self.spec['meta']['max_session']): self.info_space.tick('session') info_spaces.append(deepcopy(self.info_space)) if util.get_lab_mode() == 'train' and len(info_spaces) > 1: # when training a single spec over multiple sessions session_datas = util.parallelize_fn( self.init_session_and_run, info_spaces, ps.get(self.spec['meta'], 'resources.num_cpus', util.NUM_CPUS)) else: session_datas = [] for info_space in info_spaces: session_data = self.init_session_and_run(info_space) session_datas.append(session_data) if analysis.is_unfit(session_data): break return session_datas
def run(self): info_spaces = [] for _s in range(self.spec['meta']['max_session']): self.info_space.tick('session') info_spaces.append(deepcopy(self.info_space)) if self.spec['meta']['train_mode']: session_datas = util.parallelize_fn(self.init_session_and_run, info_spaces) else: # dont parallelize when debugging to allow render session_datas = [ self.init_session_and_run(info_space) for info_space in info_spaces ] self.session_data_dict = { data.index[0]: data for data in session_datas } self.data = analysis.analyze_trial(self) self.close() return self.data