def launch_test(config_file, training_service, test_case_config): launch_command = get_command(test_case_config, 'launchCommand') print('launch command: ', launch_command, flush=True) proc = subprocess.run(launch_command, shell=True) assert proc.returncode == 0, 'launch command failed with code %d' % proc.returncode # set experiment ID into variable exp_var_name = test_case_config.get('setExperimentIdtoVar') if exp_var_name is not None: assert exp_var_name.startswith('$') it_variables[exp_var_name] = get_experiment_id(EXPERIMENT_URL) print('variables:', it_variables) max_duration, max_trial_num = get_max_values(config_file) print('max_duration:', max_duration, ' max_trial_num:', max_trial_num) if not test_case_config.get('experimentStatusCheck'): return bg_time = time.time() print(str(datetime.datetime.now()), ' waiting ...', flush=True) try: # wait restful server to be ready time.sleep(3) experiment_id = get_experiment_id(EXPERIMENT_URL) while True: waited_time = time.time() - bg_time if waited_time > max_duration + 10: print('waited: {}, max_duration: {}'.format( waited_time, max_duration)) break status = get_experiment_status(STATUS_URL) if status in ['DONE', 'ERROR']: print('experiment status:', status) break num_failed = len(get_failed_trial_jobs(TRIAL_JOBS_URL)) if num_failed > 0: print('failed jobs: ', num_failed) break time.sleep(1) except: print_experiment_log(experiment_id=experiment_id) raise print(str(datetime.datetime.now()), ' waiting done', flush=True) if get_experiment_status(STATUS_URL) == 'ERROR': print_experiment_log(experiment_id=experiment_id) trial_stats = get_trial_stats(TRIAL_JOBS_URL) print(json.dumps(trial_stats, indent=4), flush=True) if status != 'DONE' or trial_stats['SUCCEEDED'] + trial_stats[ 'EARLY_STOPPED'] < max_trial_num: print_experiment_log(experiment_id=experiment_id) print_trial_job_log(training_service, TRIAL_JOBS_URL) raise AssertionError('Failed to finish in maxExecDuration')
def invoke_validator(test_case_config, nni_source_dir, training_service): validator_config = test_case_config.get('validator') if validator_config is None or validator_config.get('class') is None: return validator = validators.__dict__[validator_config.get('class')]() kwargs = validator_config.get('kwargs', {}) print('kwargs:', kwargs) experiment_id = get_experiment_id(EXPERIMENT_URL) try: validator(REST_ENDPOINT, get_experiment_dir(EXPERIMENT_URL), nni_source_dir, **kwargs) except: print_experiment_log(experiment_id=experiment_id) print_trial_job_log(training_service, TRIAL_JOBS_URL) raise