def run_test(): '''run metrics test''' if sys.platform == 'win32': config_file = osp.join('metrics_test', 'metrics_win32.test.yml') else: config_file = osp.join('metrics_test', 'metrics.test.yml') print('Testing %s...' % config_file) proc = subprocess.run(['nnictl', 'create', '--config', config_file]) assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode max_duration, max_trial_num = get_max_values(config_file) sleep_interval = 3 for _ in range(0, max_duration, sleep_interval): time.sleep(sleep_interval) status = get_experiment_status(STATUS_URL) #print('experiment status:', status) if status == 'DONE': num_succeeded = get_succeeded_trial_num(TRIAL_JOBS_URL) print_failed_job_log('local', TRIAL_JOBS_URL) if sys.platform == "win32": time.sleep( sleep_interval ) # Windows seems to have some issues on updating in time assert num_succeeded == max_trial_num, 'only %d succeeded trial jobs, there should be %d' % ( num_succeeded, max_trial_num) check_metrics() break assert status == 'DONE', 'Failed to finish in maxExecDuration'
def run_test(config_file, training_service, local_gpu=False): '''run test per configuration file''' new_config_file, config = gen_new_config(config_file, training_service) if training_service == 'local' and not local_gpu and config['trial'][ 'gpuNum'] > 0: print('no gpu, skiping: ', config_file) return try: proc = subprocess.run( ['nnictl', 'create', '--config', new_config_file]) assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode max_duration, max_trial_num = get_max_values(new_config_file) sleep_interval = 3 for _ in range(0, max_duration + 30, sleep_interval): time.sleep(sleep_interval) status = get_experiment_status(STATUS_URL) if status == 'DONE': num_succeeded = get_succeeded_trial_num(TRIAL_JOBS_URL) if training_service == 'local': print_stderr(TRIAL_JOBS_URL) assert num_succeeded == max_trial_num, 'only %d succeeded trial jobs, there should be %d' % ( num_succeeded, max_trial_num) break assert status == 'DONE', 'Failed to finish in maxExecDuration' finally: if os.path.exists(new_config_file): os.remove(new_config_file)
def run_test(config_file, training_service, local_gpu=False): '''run test per configuration file''' new_config_file, config = gen_new_config(config_file, training_service) print(json.dumps(config, sort_keys=True, indent=4)) if training_service == 'local' and not local_gpu and config['trial'][ 'gpuNum'] > 0: print('no gpu, skiping: ', config_file) return try: proc = subprocess.run( ['nnictl', 'create', '--config', new_config_file]) assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode max_duration, max_trial_num = get_max_values(new_config_file) sleep_interval = 3 for _ in range(0, max_duration + 30, sleep_interval): time.sleep(sleep_interval) status = get_experiment_status(STATUS_URL) if status in ['DONE', 'ERROR' ] or get_failed_trial_jobs(TRIAL_JOBS_URL): break print_failed_job_log(config['trainingServicePlatform'], TRIAL_JOBS_URL) if status != 'DONE' or get_succeeded_trial_num( TRIAL_JOBS_URL) < max_trial_num: raise AssertionError('Failed to finish in maxExecDuration') finally: if os.path.exists(new_config_file): os.remove(new_config_file)
def run_test(): '''run metrics test''' config_file = 'metrics_test/metrics.test.yml' print('Testing %s...' % config_file) proc = subprocess.run(['nnictl', 'create', '--config', config_file]) assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode max_duration, max_trial_num = get_max_values(config_file) sleep_interval = 3 for _ in range(0, max_duration, sleep_interval): time.sleep(sleep_interval) status = get_experiment_status(STATUS_URL) #print('experiment status:', status) if status == 'DONE': num_succeeded = get_succeeded_trial_num(TRIAL_JOBS_URL) assert num_succeeded == max_trial_num, 'only %d succeeded trial jobs, there should be %d' % (num_succeeded, max_trial_num) check_metrics() break assert status == 'DONE', 'Failed to finish in maxExecDuration'