Ejemplo n.º 1
0
    def hp_search(self):
        if not self.remote:
            if self.opt_model.max_instances_at_once > torch.cuda.device_count():
                raise Exception(''' 'max_instances_at_once' must be smaller or equal to the number of available gpus''')
        if not hasattr(self.opt_model, 'name'):
            logger.info("no 'update_optimal_model' method, checking for model.txt file . . . ")
            self.update_optimal_model()
        # initialize tuner and gun i.e.
        ongoing_trials = OngoingTrials()
        tuner = Tuner(self.opt_model, ongoing_trials)
        gun = Launcher(self.opt_model, ongoing_trials, remote=self.remote)
        logger.info('commencing hyper-parameter search . . . ')
        tuner.search_hp()
        gun.launch_trials()
        tuner.end_trial()
        # starting second set of trials
        tuner.search_hp()
        while ongoing_trials.status is not 'STOPPED':
            gun.launch_trials()
            tuner.end_trial()
            # starting next set of trials
            tuner.search_hp()

        best_trial = tuner.get_best_trial()
        logger.info('best trial: ', json.dumps(best_trial))
        if os.path.exists(self.path_to_best_trial):
            logger.info('overwriting best_trial.json . . .')
            os.remove(self.path_to_best_trial)
        with open(self.path_to_best_trial, 'w') as fp:
            json.dump(best_trial, fp)
            logger.info('results saved to best_trial.json')
Ejemplo n.º 2
0
    def hp_search(self):
        if not self.remote:
            if self.opt_model.max_instances_at_once > torch.cuda.device_count(
            ):
                raise Exception(
                    ''' 'max_instances_at_once' must be smaller or equal to the number of available gpus'''
                )
        if not hasattr(self.opt_model, 'name'):
            logger.info(
                "no 'update_optimal_model' method, checking for model.txt file . . . "
            )
            self.update_optimal_model()
        # initialize tuner and gun i.e.
        ongoing_trials = OngoingTrials()
        tuner = Tuner(self.opt_model, ongoing_trials)
        gun = Launcher(self.opt_model, ongoing_trials, remote=self.remote)
        logger.info('commencing hyper-parameter search . . . ')
        tuner.search_hp()
        gun.launch_trials()
        tuner.end_trial()
        # starting second set of trials
        tuner.search_hp()
        while ongoing_trials.status is not 'STOPPED':
            gun.launch_trials()
            tuner.end_trial()
            # starting next set of trials
            tuner.search_hp()

        trials = tuner.get_trials()
        sorted_trial_ids = tuner.get_sorted_trial_ids()

        string1 = self.path_to_best_checkpoint.split('.')[0]
        for i in range(len(sorted_trial_ids)):
            save_checkpoint_location = string1 + str(i) + '.pt'
            if os.path.exists(save_checkpoint_location):
                logger.info('overwriting checkpoint . . .')
                os.remove(save_checkpoint_location)
            logger.info('trial ' + sorted_trial_ids[i] + '\tval: ' +
                        str(trials[sorted_trial_ids[i]]['metrics']))
            torch.save(trials[sorted_trial_ids[i]]['checkpoint'],
                       save_checkpoint_location)

        logger.info('best trial: ' +
                    str(trials[sorted_trial_ids[0]]['hp_values']) +
                    '\nbest value: ' +
                    str(trials[sorted_trial_ids[0]]['metrics']))

        best_trial = trials[sorted_trial_ids[0]]['hp_values']
        if os.path.exists(self.path_to_best_trial):
            logger.info('overwriting best_trial.json . . .')
            os.remove(self.path_to_best_trial)
        with open(self.path_to_best_trial, 'w') as fp:
            json.dump(best_trial, fp)
            logger.info('results saved to best_trial.json')