def test_nni_cli(): import nnicli as nc config_file = 'config_test/examples/mnist.test.yml' try: # Sleep here to make sure previous stopped exp has enough time to exit to avoid port conflict time.sleep(6) print(GREEN + 'Testing nnicli:' + config_file + CLEAR) nc.start_nni(config_file) time.sleep(3) nc.set_endpoint('http://localhost:8080') print(nc.version()) print(nc.get_job_statistics()) print(nc.get_experiment_status()) nc.list_trial_jobs() print(GREEN + 'Test nnicli {}: TEST PASS'.format(config_file) + CLEAR) except Exception as error: print(RED + 'Test nnicli {}: TEST FAIL'.format(config_file) + CLEAR) print('%r' % error) traceback.print_exc() raise error finally: nc.stop_nni()
def execute(self, X, y, scoring, X_val=None, y_val=None): if X_val is not None and y_val is not None: x_train, x_test, y_train, y_test = X, X_val, y, y_val else: x_train, x_test, y_train, y_test = train_test_split( X, y, random_state=88, test_size=0.2) self.dao.insert_run_info(self.run_id, None, None, x_train, y_train, scoring, None, x_test, y_test) for model_key in self.model_repo.repo: max_trials = self.model_repo.count_map[model_key] self.exec_config['max_trials'] = max_trials self._generate_nni_exec_config(model_key) self._generate_model_exec_files(model_key) self._populate_remote_machine_details(model_key) config_path = os.path.join(self.run_dir, model_key + "_nni.yaml") base_port = self.exec_config['base_port'] endpoint = "http://localhost:{0}".format(base_port) nc.set_endpoint(endpoint) try: self._stop_nni(model_key, base_port, False) logger.info('Starting Hyperparameter Tuning with {0}'.format( model_key)) self._start_nni(model_key, config_path, base_port) webbrowser.open(endpoint) exp_id = nc.get_experiment_profile()['id'] log_dir = nc.get_experiment_profile()['logDir'] self.dao.update_run_info_exp(self.run_id, exp_id, log_dir) succeed_count = 0 seconds = 1 while True: time.sleep(1) stats = nc.get_job_statistics() for stat in stats: if stat['trialJobStatus'] == 'SUCCEEDED': succeed_count = stat['trialJobNumber'] if seconds % 10 == 0: print( '{0} seconds taken. Number of trials succeeded: {1}' .format(seconds, succeed_count)) experiment = nc.get_experiment_status() if experiment['status'] == 'DONE': print( 'Experiment finished. {0} seconds taken. Number of trials succeeded: {1}' .format(seconds, succeed_count)) break seconds += 1 finally: self._stop_nni(model_key, base_port)
def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs): print(rest_endpoint) nc.set_endpoint(rest_endpoint) #print(nc.version()) print(nc.get_job_statistics()) print(nc.get_experiment_status()) print(nc.list_trial_jobs())