def wandb_init(): starting_new_fully_train_run = config.fully_train and not config.resume_fully_train starting_new_evolutionary_run = not run_folder_exists( config.run_name) and not config.wandb_run_path continuing_a_run = run_folder_exists( config.run_name) or config.resume_fully_train if starting_new_fully_train_run or starting_new_evolutionary_run: # Either new evolution run or new fully train run evo_run_path = config.wandb_run_path _new_run() if config.fully_train: # links the new fully_train wandb run to wandb.config['evolution_run_path'] = evo_run_path elif continuing_a_run: print('resuming') _resume_run() if config.resume_fully_train: print('resuming ft at', wandb.config.evolution_run_path) download_generations(run_path=wandb.config.evolution_run_path, replace=True) download_model(run_path=wandb.config.evolution_run_path, replace=True) else: raise Exception("Something went wrong with wandb") wandb.config.update(config.__dict__, allow_val_change=True)
def load_simple_config(config_path: str, wandb_resume_run_fn, wandb_new_run_fn, ngpus: Optional[int] = None, use_wandb_override=True): """ Used for loading a normal run that is not part of a batch run. Therefore it is much more simple than loading a batch config Steps: * Read config to get run name and wandb related info * Read saved config if it is a saved run * Load wandb if wandb is requested * Read original config again for overwrites * Overwrite n_gpus option if required * Save config to run folder @param config_path: path to the config, can be relative to configuration/configs @param wandb_resume_run_fn: function that allows wandb to resume @param wandb_new_run_fn: function that creates a new wandb run @param ngpus: number of gpus if config option should be overridden :param use_wandb_override: determines whether wandb actions should be taken in the config loading """ config.read(config_path) run_name = config.run_name print(f'Run name: {run_name}') if run_man.run_folder_exists(run_name): print('Run folder already exists, reading its config') run_man.load_config(run_name) # load saved config if config.use_wandb and use_wandb_override: wandb_resume_run_fn() else: print(f'No runs folder detected with name {run_name}. Creating one') if config.use_wandb and use_wandb_override: wandb_new_run_fn() run_man.set_up_run_folder(config.run_name, use_wandb_override) config.read( config_path ) # overwrite saved/wandb config with provided config (only values present in this config) if ngpus is not None: # n_gpu override config.n_gpus = ngpus run_man.save_config(run_name, use_wandb_override=use_wandb_override) print(f'config: {config.__dict__}')
def load_saved_config(effective_run_name, cli_cfg_file_name): wandb_run_path = config.read_option(cli_cfg_file_name, 'wandb_run_path') if wandb_run_path: # wandb downloaded runs are not used by the batch scheduler. # this must be a standalone run print(f'downloading run from wandb with path: {wandb_run_path}') # if a wandb run path is specified - the cli configs run name is ignored, instead the run name # is determined by the saved config effective_run_name = download_run(run_path=wandb_run_path, replace=True) print('Run name', effective_run_name) if run_man.run_folder_exists(effective_run_name): print('Run folder already exists, reading its config') run_man.load_config(effective_run_name) return effective_run_name
def load_batch_config(): """ there are 3 possible levels of configs to be loaded: 1: a saved config which is attached to an existing run which has been executed before this config does not exist when starting a fresh run, only when continuing an existing one 2: a scheduled config. If a run scheduler is used, it will point to a one of the configs in its schedule 3: the cli config, which is specified as a run arg to the main program when no run schedule is used, the cli config values overwrite the saved config (if one exists) an example of when this is desirable is to change the num gpu's when continuing a run, or to change the man num of generations, to evolve a population for longer when a run schedule is specified, it will fetch a config file eg: mms.json It may be desirable to override certain properties of all runs in a schedule An example of this is schedule = {elite,base} - we may want to turn on DataAug for bot ie: transform the schedule into {da_elite,da_base} thus when a run schedule is used, the cli config starting the schedule may contain overriding config values (eg: da=true) therefore the priority of configs when a schedule is being used is: saved config (if exists) - lowest scheduled config - middle cli config - highest """ cli_args = get_cli_args() stagger(cli_args.stagger_number) if cli_args.ngpus is not None: config.n_gpus = cli_args.ngpus effective_run_name, scheduled_cfg_file_name = get_batch_schedule_run_names( cli_args) effective_run_name = load_saved_config(effective_run_name, cli_args.config) if scheduled_cfg_file_name: print(f'reading scheduled config: {scheduled_cfg_file_name}') config.read(scheduled_cfg_file_name) print(f'Reading cli config {cli_args.config}') config.read(cli_args.config) # final authority on config values # must detect whether the scheduler is calling for a fully train, or an evolutionary run fully_train, resume_fully_train = batch_runner.get_fully_train_state( effective_run_name) print( f'scheduler is starting run with FT = {fully_train} continue FT = {resume_fully_train}' ) config.fully_train = fully_train config.resume_fully_train = resume_fully_train config.run_name = effective_run_name if cli_args.ngpus is not None: config.n_gpus = cli_args.ngpus else: print(f'no gpu argument given, using config value of {config.n_gpus}') # Full config is now loaded if config.use_wandb and not fully_train: wandb_init() if not run_man.run_folder_exists(config.run_name): print(f'New run, setting up run folder for {config.run_name}') run_man.set_up_run_folder(config.run_name) print(f'Saving conf to run {config.run_name}') config.fully_train = fully_train config.resume_fully_train = resume_fully_train run_man.save_config(config.run_name, use_wandb_override=not config.fully_train) print(f'config: {config.__dict__}')