def hp_search(self): if not self.remote: if self.opt_model.max_instances_at_once > torch.cuda.device_count(): raise Exception(''' 'max_instances_at_once' must be smaller or equal to the number of available gpus''') if not hasattr(self.opt_model, 'name'): logger.info("no 'update_optimal_model' method, checking for model.txt file . . . ") self.update_optimal_model() # initialize tuner and gun i.e. ongoing_trials = OngoingTrials() tuner = Tuner(self.opt_model, ongoing_trials) gun = Launcher(self.opt_model, ongoing_trials, remote=self.remote) logger.info('commencing hyper-parameter search . . . ') tuner.search_hp() gun.launch_trials() tuner.end_trial() # starting second set of trials tuner.search_hp() while ongoing_trials.status is not 'STOPPED': gun.launch_trials() tuner.end_trial() # starting next set of trials tuner.search_hp() best_trial = tuner.get_best_trial() logger.info('best trial: ', json.dumps(best_trial)) if os.path.exists(self.path_to_best_trial): logger.info('overwriting best_trial.json . . .') os.remove(self.path_to_best_trial) with open(self.path_to_best_trial, 'w') as fp: json.dump(best_trial, fp) logger.info('results saved to best_trial.json')
def hp_search(self): if not self.remote: if self.opt_model.max_instances_at_once > torch.cuda.device_count( ): raise Exception( ''' 'max_instances_at_once' must be smaller or equal to the number of available gpus''' ) if not hasattr(self.opt_model, 'name'): logger.info( "no 'update_optimal_model' method, checking for model.txt file . . . " ) self.update_optimal_model() # initialize tuner and gun i.e. ongoing_trials = OngoingTrials() tuner = Tuner(self.opt_model, ongoing_trials) gun = Launcher(self.opt_model, ongoing_trials, remote=self.remote) logger.info('commencing hyper-parameter search . . . ') tuner.search_hp() gun.launch_trials() tuner.end_trial() # starting second set of trials tuner.search_hp() while ongoing_trials.status is not 'STOPPED': gun.launch_trials() tuner.end_trial() # starting next set of trials tuner.search_hp() trials = tuner.get_trials() sorted_trial_ids = tuner.get_sorted_trial_ids() string1 = self.path_to_best_checkpoint.split('.')[0] for i in range(len(sorted_trial_ids)): save_checkpoint_location = string1 + str(i) + '.pt' if os.path.exists(save_checkpoint_location): logger.info('overwriting checkpoint . . .') os.remove(save_checkpoint_location) logger.info('trial ' + sorted_trial_ids[i] + '\tval: ' + str(trials[sorted_trial_ids[i]]['metrics'])) torch.save(trials[sorted_trial_ids[i]]['checkpoint'], save_checkpoint_location) logger.info('best trial: ' + str(trials[sorted_trial_ids[0]]['hp_values']) + '\nbest value: ' + str(trials[sorted_trial_ids[0]]['metrics'])) best_trial = trials[sorted_trial_ids[0]]['hp_values'] if os.path.exists(self.path_to_best_trial): logger.info('overwriting best_trial.json . . .') os.remove(self.path_to_best_trial) with open(self.path_to_best_trial, 'w') as fp: json.dump(best_trial, fp) logger.info('results saved to best_trial.json')