def reset_internal_config(run_name: str): j = json.load(open(os.path.join(get_run_folder_path(run_name), 'internal_config.json'), 'r')) if j['state'] == 'finished': j['state'] = 'ft' j['ft_started'] = False j['finished'] = False json.dump(j, open(os.path.join(get_run_folder_path(run_name), 'internal_config.json'), 'w'))
def _fully_train_logging(model: Network, loss: float, epoch: int, attempt: int, acc: float = -1): print('epoch: {}\nloss: {}'.format(epoch, loss)) log = {} metric_name = 'accuracy_fm_' + str(model.target_feature_multiplier) + ( "_r_" + str(attempt) if attempt > 0 else "") if acc != -1: log[metric_name] = acc print('accuracy: {}'.format(acc)) print('\n') internal_config.ft_epoch = epoch save_config(config.run_name) if config.use_wandb: log['loss_' + str(attempt)] = loss wandb.log(log) model.save() wandb.save(model.save_location()) wandb.config.update({'current_ft_epoch': epoch}, allow_val_change=True) wandb.save(join(get_run_folder_path(config.run_name), 'config.json'))
def get_config_path(path: str, scheduler_run_name: str) -> Tuple[str, str]: """picks which config to use next.""" config_paths = read_json(build_file_path(path)) # dict: path -> num_runs for config_path in config_paths: n_runs = config_paths[config_path] for i in range(n_runs): config_dict = read_json(build_file_path(config_path)) scheduled_run_name = config_dict['run_name'] run_name = _get_effective_run_name(scheduled_run_name, i, scheduler_run_name) run_path = run_man.get_run_folder_path(run_name) run_folder_exists = os.path.exists(run_path) if run_folder_exists: cfg.internal_config.load(run_name) run_currently_running_in_another_process = run_folder_exists and cfg.internal_config.running if run_currently_running_in_another_process: print('run {} is being run in another process, moving on'.format(run_name)) if cfg.internal_config.finished or run_currently_running_in_another_process: cfg.internal_config.__init__() # reset internal config continue print('scheduler running', run_name) if run_folder_exists: cfg.internal_config.running = True cfg.internal_config.save(run_name, False) return config_path, run_name raise Exception('Could not find any non-running/non-finished configs in the batch run')
def save(self, run_name: str, wandb_save=True): file_path = join(runs_manager.get_run_folder_path(run_name), 'internal_config.json') with open(file_path, 'w+') as f: json.dump(self.__dict__, f, indent=2) if configuration.config.use_wandb and wandb_save: try: wandb.save(file_path) except ValueError: print('Error: You must call `wandb.init` before calling save. This happens because wandb is not ' 'initialized in the main thread in fully training. If you were not fully training this is should ' 'be investigated, otherwise ignore it')
def get_fully_train_state(run_name): """reads the inner config of the given run, and determines if it is to be evolved/ FT'd""" run_path = run_man.get_run_folder_path(run_name) path_exists = os.path.exists(run_path) if path_exists: cfg.internal_config.load(run_name) fully_training = cfg.internal_config.state == 'ft' continue_fully_training = fully_training and cfg.internal_config.ft_started cfg.internal_config.__init__() return fully_training, continue_fully_training
def __init__(self, fm: float, best: int, attempt: int): """ @param fm: feature multiplier: how much bigger or smaller to make each layer @param best: the ranking of the network in evolution - ie best = 1 mean that network got the highest accuracy in evolution """ path = join(get_run_folder_path(config.run_name), 'logs', f'fm{fm}', f'best{best}') file = join(path, f'attempt{attempt}.log') Path(path).mkdir(parents=True, exist_ok=True) self.logger = logging.getLogger(file) self.logger.addHandler(logging.FileHandler(file, 'a')) self.logger.setLevel(logging.DEBUG) self.logger.info(f'fm:{fm}') self.logger.info(f'best:{best}') self.logger.info(f'attempt:{attempt}') self.logger.info(f'config:{config.__dict__}') print(f'INITIALIZED FT LOGGER. DIR={file}') sys.stdout.flush()
def set_run_inactive(run_name: str): j = json.load(open(os.path.join(get_run_folder_path(run_name), 'internal_config.json'), 'r')) j['running'] = False json.dump(j, open(os.path.join(get_run_folder_path(run_name), 'internal_config.json'), 'w'))
def reset_internal_config(run_name: str): j = json.load(open(os.path.join(get_run_folder_path(run_name), 'internal_config.json'), 'r')) if j['state'] == 'finished': j['state'] = 'ft' j['ft_started'] = False j['finished'] = False json.dump(j, open(os.path.join(get_run_folder_path(run_name), 'internal_config.json'), 'w')) def set_run_inactive(run_name: str): j = json.load(open(os.path.join(get_run_folder_path(run_name), 'internal_config.json'), 'r')) j['running'] = False json.dump(j, open(os.path.join(get_run_folder_path(run_name), 'internal_config.json'), 'w')) if __name__ == '__main__': dirs = [d for d in os.listdir(__get_runs_folder_path()) if os.path.isdir(get_run_folder_path(d))] for dir in dirs: try: wipe(dir) reset_internal_config(dir) set_run_inactive(dir) except FileNotFoundError: print(f'no relevant files in {dir}')
config.fully_train_best_n_blueprints) print( f'best blueprints ({len(best_blueprints)}): {[b[0].id for b in best_blueprints]}' ) best = 1 for bp, _ in best_blueprints: for fm in [1, 3, 5]: old_file_name = f'bp-{bp.id}_fm-{fm}.model' new_file_name = f'bp-{bp.id}_fm-{fm}-best-{best}.model' old_file_path = os.path.join(get_fully_train_folder_path(run_name), old_file_name) new_file_path = os.path.join(get_fully_train_folder_path(run_name), new_file_name) if os.path.exists(old_file_path): print(f'renaming: {old_file_name} to {new_file_name}') os.rename(old_file_path, new_file_path) best += 1 if __name__ == '__main__': for run in os.listdir(__get_runs_folder_path()): if 'base' in run or 'elite' in run: fix(get_run_folder_path(run)) # fix(get_run_folder_path('elite_1'))