def run(args): it_config = get_yml_content(args.config) for test_case_config in it_config['testCases']: name = test_case_config['name'] if case_excluded(name, args.exclude): print('{} excluded'.format(name)) continue if args.cases and not case_included(name, args.cases): continue # fill test case default config for k in it_config['defaultTestCaseConfig']: if k not in test_case_config: test_case_config[k] = it_config['defaultTestCaseConfig'][k] print(json.dumps(test_case_config, indent=4)) if not match_platform(test_case_config): print('skipped {}, platform {} not match [{}]'.format( name, sys.platform, test_case_config['platform'])) continue if not match_training_service(test_case_config, args.ts): print('skipped {}, training service {} not match [{}]'.format( name, args.ts, test_case_config['trainingService'])) continue wait_for_port_available(8080, 30) print('{}Testing: {}{}'.format(GREEN, name, CLEAR)) begin_time = time.time() run_test_case(test_case_config, it_config, args) print('{}Test {}: TEST PASS IN {} SECONDS{}'.format( GREEN, name, int(time.time() - begin_time), CLEAR), flush=True)
def run(args): it_config = get_yml_content(args.config) test_cases = it_config['testCases'] for test_case_id, test_case_config in enumerate(test_cases, start=1): name = test_case_config['name'] print(GREEN + '=' * 80 + CLEAR) print('## {}Testing: {}{} ##'.format(GREEN, name, CLEAR)) # Print progress on devops print( f'##vso[task.setprogress value={int(test_case_id / len(test_cases) * 100)};]{name}' ) if case_excluded(name, args.exclude): print('{} excluded'.format(name)) continue if args.cases and not case_included(name, args.cases): continue # fill test case default config for k in it_config['defaultTestCaseConfig']: if k not in test_case_config: test_case_config[k] = it_config['defaultTestCaseConfig'][k] print(json.dumps(test_case_config, indent=4)) if not match_platform(test_case_config): print('skipped {}, platform {} not match [{}]'.format( name, sys.platform, test_case_config['platform'])) continue if not match_training_service(test_case_config, args.ts): print('skipped {}, training service {} not match [{}]'.format( name, args.ts, test_case_config['trainingService'])) continue if args.ts == 'remote': if not match_remoteConfig(test_case_config, args.nni_source_dir): print('skipped {}, remoteConfig not match.'.format(name)) continue # remote mode need more time to cleanup if args.ts == 'remote' or args.ts == 'hybrid': wait_for_port_available(8080, 240) wait_for_port_available( 8081, 240 ) # some training services need one more port to listen metrics # adl mode need more time to cleanup PVC if args.ts == 'adl' and name == 'nnictl-resume-2': time.sleep(30) begin_time = time.time() run_test_case(test_case_config, it_config, args) print('{}Test {}: TEST PASS IN {} SECONDS{}\n\n'.format( GREEN, name, int(time.time() - begin_time), CLEAR), flush=True)
def naive_test(args): '''run naive integration test''' to_remove = ['tuner_search_space.json', 'tuner_result.txt', 'assessor_result.txt'] to_remove = list(map(lambda file: osp.join(NAIVE_TEST_CONFIG_DIR, file), to_remove)) remove_files(to_remove) proc = subprocess.run(['nnictl', 'create', '--config', args.config]) assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode print('Spawning trials...') nnimanager_log_path = get_nni_log_path(EXPERIMENT_URL) current_trial = 0 for _ in range(120): time.sleep(1) tuner_status = read_last_line(osp.join(NAIVE_TEST_CONFIG_DIR, 'tuner_result.txt')) assessor_status = read_last_line(osp.join(NAIVE_TEST_CONFIG_DIR, 'assessor_result.txt')) experiment_status = is_experiment_done(nnimanager_log_path) assert tuner_status != 'ERROR', 'Tuner exited with error' assert assessor_status != 'ERROR', 'Assessor exited with error' if experiment_status: break if tuner_status is not None: for line in open(osp.join(NAIVE_TEST_CONFIG_DIR, 'tuner_result.txt')): if line.strip() == 'ERROR': break trial = int(line.split(' ')[0]) if trial > current_trial: current_trial = trial print('Trial #%d done' % trial) assert experiment_status, 'Failed to finish in 2 min' ss1 = json.load(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'search_space.json'))) ss2 = json.load(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'tuner_search_space.json'))) assert ss1 == ss2, 'Tuner got wrong search space' tuner_result = set(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'tuner_result.txt'))) expected = set(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'expected_tuner_result.txt'))) # Trials may complete before NNI gets assessor's result, # so it is possible to have more final result than expected print('Tuner result:', tuner_result) print('Expected tuner result:', expected) assert tuner_result.issuperset(expected), 'Bad tuner result' assessor_result = set(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'assessor_result.txt'))) expected = set(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'expected_assessor_result.txt'))) assert assessor_result == expected, 'Bad assessor result' subprocess.run(['nnictl', 'stop']) wait_for_port_available(8080, 10)
def stop_experiment_test(args): config_file = args.config '''Test `nnictl stop` command, including `nnictl stop exp_id` and `nnictl stop all`. Simple `nnictl stop` is not tested here since it is used in all other test code''' subprocess.run(['nnictl', 'create', '--config', config_file, '--port', '8080'], check=True) subprocess.run(['nnictl', 'create', '--config', config_file, '--port', '8888'], check=True) subprocess.run(['nnictl', 'create', '--config', config_file, '--port', '8989'], check=True) subprocess.run(['nnictl', 'create', '--config', config_file, '--port', '8990'], check=True) # test cmd 'nnictl stop id` experiment_id = get_experiment_id(EXPERIMENT_URL) proc = subprocess.run(['nnictl', 'stop', experiment_id]) assert proc.returncode == 0, '`nnictl stop %s` failed with code %d' % (experiment_id, proc.returncode) wait_for_port_available(8080, 10) assert not detect_port(8080), '`nnictl stop %s` failed to stop experiments' % experiment_id # test cmd `nnictl stop --port` proc = subprocess.run(['nnictl', 'stop', '--port', '8990']) assert proc.returncode == 0, '`nnictl stop %s` failed with code %d' % (experiment_id, proc.returncode) wait_for_port_available(8990, 10) assert not detect_port(8990), '`nnictl stop %s` failed to stop experiments' % experiment_id # test cmd `nnictl stop --all` proc = subprocess.run(['nnictl', 'stop', '--all']) assert proc.returncode == 0, '`nnictl stop --all` failed with code %d' % proc.returncode wait_for_port_available(8888, 10) wait_for_port_available(8989, 10) assert not detect_port(8888) and not detect_port(8989), '`nnictl stop --all` failed to stop experiments'
def run(args): it_config = get_yml_content(args.config) for test_case_config in it_config['testCases']: name = test_case_config['name'] if case_excluded(name, args.exclude): print('{} excluded'.format(name)) continue if args.cases and not case_included(name, args.cases): continue # fill test case default config for k in it_config['defaultTestCaseConfig']: if k not in test_case_config: test_case_config[k] = it_config['defaultTestCaseConfig'][k] print(json.dumps(test_case_config, indent=4)) if not match_platform(test_case_config): print('skipped {}, platform {} not match [{}]'.format( name, sys.platform, test_case_config['platform'])) continue if not match_training_service(test_case_config, args.ts): print('skipped {}, training service {} not match [{}]'.format( name, args.ts, test_case_config['trainingService'])) continue # remote mode need more time to cleanup if args.ts == 'remote' or args.ts == 'hybrid': if args.ts == 'remote': if not match_remoteConfig(test_case_config, args.nni_source_dir): print('skipped {}, remoteConfig not match.'.format(name)) continue wait_for_port_available(8080, 240) else: wait_for_port_available(8080, 60) # adl mode need more time to cleanup PVC if args.ts == 'adl' and name == 'nnictl-resume-2': time.sleep(30) print('## {}Testing: {}{} ##'.format(GREEN, name, CLEAR)) begin_time = time.time() run_test_case(test_case_config, it_config, args) print('{}Test {}: TEST PASS IN {} SECONDS{}'.format( GREEN, name, int(time.time() - begin_time), CLEAR), flush=True)