Beispiel #1
0
def test_nni_cli():
    import nnicli as nc

    config_file = 'config_test/examples/mnist.test.yml'

    try:
        # Sleep here to make sure previous stopped exp has enough time to exit to avoid port conflict
        time.sleep(6)
        print(GREEN + 'Testing nnicli:' + config_file + CLEAR)
        nc.start_nni(config_file)
        time.sleep(3)
        nc.set_endpoint('http://localhost:8080')
        print(nc.version())
        print(nc.get_job_statistics())
        print(nc.get_experiment_status())
        nc.list_trial_jobs()

        print(GREEN + 'Test nnicli {}: TEST PASS'.format(config_file) + CLEAR)
    except Exception as error:
        print(RED + 'Test nnicli {}: TEST FAIL'.format(config_file) + CLEAR)
        print('%r' % error)
        traceback.print_exc()
        raise error
    finally:
        nc.stop_nni()
Beispiel #2
0
    def execute(self, X, y, scoring, X_val=None, y_val=None):
        if X_val is not None and y_val is not None:
            x_train, x_test, y_train, y_test = X, X_val, y, y_val
        else:
            x_train, x_test, y_train, y_test = train_test_split(
                X, y, random_state=88, test_size=0.2)

        self.dao.insert_run_info(self.run_id, None, None, x_train, y_train,
                                 scoring, None, x_test, y_test)

        for model_key in self.model_repo.repo:
            max_trials = self.model_repo.count_map[model_key]
            self.exec_config['max_trials'] = max_trials

            self._generate_nni_exec_config(model_key)
            self._generate_model_exec_files(model_key)
            self._populate_remote_machine_details(model_key)

            config_path = os.path.join(self.run_dir, model_key + "_nni.yaml")
            base_port = self.exec_config['base_port']
            endpoint = "http://localhost:{0}".format(base_port)
            nc.set_endpoint(endpoint)

            try:
                self._stop_nni(model_key, base_port, False)

                logger.info('Starting Hyperparameter Tuning with {0}'.format(
                    model_key))
                self._start_nni(model_key, config_path, base_port)
                webbrowser.open(endpoint)

                exp_id = nc.get_experiment_profile()['id']
                log_dir = nc.get_experiment_profile()['logDir']
                self.dao.update_run_info_exp(self.run_id, exp_id, log_dir)

                succeed_count = 0
                seconds = 1
                while True:
                    time.sleep(1)
                    stats = nc.get_job_statistics()
                    for stat in stats:
                        if stat['trialJobStatus'] == 'SUCCEEDED':
                            succeed_count = stat['trialJobNumber']

                    if seconds % 10 == 0:
                        print(
                            '{0} seconds taken. Number of trials succeeded: {1}'
                            .format(seconds, succeed_count))

                    experiment = nc.get_experiment_status()
                    if experiment['status'] == 'DONE':
                        print(
                            'Experiment finished. {0} seconds taken. Number of trials succeeded: {1}'
                            .format(seconds, succeed_count))
                        break

                    seconds += 1
            finally:
                self._stop_nni(model_key, base_port)
Beispiel #3
0
 def __call__(self, rest_endpoint, experiment_dir, nni_source_dir,
              **kwargs):
     print(rest_endpoint)
     nc.set_endpoint(rest_endpoint)
     #print(nc.version())
     print(nc.get_job_statistics())
     print(nc.get_experiment_status())
     print(nc.list_trial_jobs())