def check(self): """Check pipeline run results.""" if self._run_pipeline: ###### Monitor Job ###### try: start_time = datetime.now() response = self._client.wait_for_run_completion( self._run_id, self._test_timeout ) succ = (response.run.status.lower() == 'succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds utils.add_junit_test( self._test_cases, 'job completion', succ, 'waiting for job completion failure', elapsed_time ) finally: ###### Output Argo Log for Debugging ###### workflow_json = self._client._get_workflow_json(self._run_id) workflow_id = workflow_json['metadata']['name'] print("Argo Workflow Name: ", workflow_id) argo_log, _ = utils.run_bash_command( 'argo logs {} -n {}'.format( workflow_id, self._namespace ) ) print('=========Argo Workflow Log=========') print(argo_log) if not succ: utils.write_junit_xml( self._test_name, self._result, self._test_cases ) exit(1) ###### Validate the results for specific test cases ###### if self._testname == 'xgboost_training_cm': # For xgboost sample, check its confusion matrix. cm_tar_path = './confusion_matrix.tar.gz' utils.get_artifact_in_minio( workflow_json, 'confusion-matrix', cm_tar_path, 'mlpipeline-ui-metadata' ) with tarfile.open(cm_tar_path) as tar_handle: file_handles = tar_handle.getmembers() assert len(file_handles) == 1 with tar_handle.extractfile(file_handles[0]) as f: cm_data = f.read() utils.add_junit_test( self._test_cases, 'confusion matrix format', (len(cm_data) > 0), 'the confusion matrix file is empty' ) ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. ###### Write out the test result in junit xml ###### utils.write_junit_xml(self._test_name, self._result, self._test_cases)
def main(): args = parse_arguments() test_cases = [] test_name = 'Kubeflow Sample Test' ###### Initialization ###### client = Client() ###### Check Input File ###### utils.add_junit_test(test_cases, 'input generated yaml file', os.path.exists(args.input), 'yaml file is not generated') if not os.path.exists(args.input): utils.write_junit_xml(test_name, args.result, test_cases) exit() ###### Create Experiment ###### experiment_name = 'kubeflow sample experiment' response = client.create_experiment(experiment_name) experiment_id = response.id utils.add_junit_test(test_cases, 'create experiment', True) ###### Create Job ###### job_name = 'kubeflow_sample' params = { 'output': args.output, 'project': 'ml-pipeline-test', 'evaluation': 'gs://ml-pipeline-dataset/sample-test/flower/eval15.csv', 'train': 'gs://ml-pipeline-dataset/sample-test/flower/train30.csv', 'hidden-layer-size': '10,5', 'steps': '5' } response = client.run_pipeline(experiment_id, job_name, args.input, params) run_id = response.id utils.add_junit_test(test_cases, 'create pipeline run', True) ###### Monitor Job ###### start_time = datetime.now() response = client.wait_for_run_completion(run_id, 1200) succ = (response.run.status.lower() == 'succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds utils.add_junit_test(test_cases, 'job completion', succ, 'waiting for job completion failure', elapsed_time) if not succ: utils.write_junit_xml(test_name, args.result, test_cases) exit() ###### Output Argo Log for Debugging ###### workflow_json = client._get_workflow_json(run_id) workflow_id = workflow_json['metadata']['name'] #TODO: remove the namespace dependency or make is configurable. argo_log, _ = utils.run_bash_command( 'argo logs -n kubeflow -w {}'.format(workflow_id)) print("=========Argo Workflow Log=========") print(argo_log) ###### Validate the results ###### # confusion matrix should show three columns for the flower data # target, predicted, count cm_tar_path = './confusion_matrix.tar.gz' cm_filename = 'mlpipeline-ui-metadata.json' utils.get_artifact_in_minio(workflow_json, 'confusionmatrix', cm_tar_path) tar_handler = tarfile.open(cm_tar_path) tar_handler.extractall() with open(cm_filename, 'r') as f: cm_data = json.load(f) utils.add_junit_test( test_cases, 'confusion matrix format', (len(cm_data['outputs'][0]['schema']) == 3), 'the column number of the confusion matrix output is not equal to three' ) ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. ###### Write out the test result in junit xml ###### utils.write_junit_xml(test_name, args.result, test_cases)
def main(): args = parse_arguments() test_cases = [] test_name = args.testname + ' Sample Test' ###### Initialization ###### host = 'ml-pipeline.%s.svc.cluster.local:8888' % args.namespace client = Client(host=host) ###### Check Input File ###### utils.add_junit_test(test_cases, 'input generated yaml file', os.path.exists(args.input), 'yaml file is not generated') if not os.path.exists(args.input): utils.write_junit_xml(test_name, args.result, test_cases) print('Error: job not found.') exit(1) ###### Create Experiment ###### experiment_name = args.testname + ' sample experiment' response = client.create_experiment(experiment_name) experiment_id = response.id utils.add_junit_test(test_cases, 'create experiment', True) ###### Create Job ###### job_name = args.testname + '_sample' ###### Test-specific arguments ####### if args.testname == 'tfx_cab_classification': params = { 'output': args.output, 'project': 'ml-pipeline-test', 'column-names': 'gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/column-names.json', 'evaluation': 'gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/eval20.csv', 'train': 'gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/train50.csv', 'hidden-layer-size': '5', 'steps': '5' } elif args.testname == 'xgboost_training_cm': params = { 'output': args.output, 'project': 'ml-pipeline-test', 'train-data': 'gs://ml-pipeline-dataset/sample-test/sfpd/train_50.csv', 'eval-data': 'gs://ml-pipeline-dataset/sample-test/sfpd/eval_20.csv', 'schema': 'gs://ml-pipeline-dataset/sample-test/sfpd/schema.json', 'rounds': '20', 'workers': '2' } else: # Basic tests require no additional params. params = {} response = client.run_pipeline(experiment_id, job_name, args.input, params) run_id = response.id utils.add_junit_test(test_cases, 'create pipeline run', True) ###### Monitor Job ###### try: start_time = datetime.now() if args.testname == 'xgboost_training_cm': response = client.wait_for_run_completion(run_id, 1800) else: response = client.wait_for_run_completion(run_id, 1200) succ = (response.run.status.lower() == 'succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds utils.add_junit_test(test_cases, 'job completion', succ, 'waiting for job completion failure', elapsed_time) finally: ###### Output Argo Log for Debugging ###### workflow_json = client._get_workflow_json(run_id) workflow_id = workflow_json['metadata']['name'] argo_log, _ = utils.run_bash_command('argo logs -n {} -w {}'.format( args.namespace, workflow_id)) print('=========Argo Workflow Log=========') print(argo_log) if not succ: utils.write_junit_xml(test_name, args.result, test_cases) exit(1) ###### Validate the results for specific test cases ###### #TODO: Add result check for tfx-cab-classification after launch. if args.testname == 'xgboost_training_cm': cm_tar_path = './confusion_matrix.tar.gz' utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path, 'mlpipeline-ui-metadata') with tarfile.open(cm_tar_path) as tar_handle: file_handles = tar_handle.getmembers() assert len(file_handles) == 1 with tar_handle.extractfile(file_handles[0]) as f: cm_data = f.read() utils.add_junit_test(test_cases, 'confusion matrix format', (len(cm_data) > 0), 'the confusion matrix file is empty') ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. ###### Write out the test result in junit xml ###### utils.write_junit_xml(test_name, args.result, test_cases)
def main(): args = parse_arguments() test_cases = [] test_name = 'XGBoost Sample Test' ###### Initialization ###### host = 'ml-pipeline.%s.svc.cluster.local:8888' % args.namespace client = Client(host=host) ###### Check Input File ###### utils.add_junit_test(test_cases, 'input generated yaml file', os.path.exists(args.input), 'yaml file is not generated') if not os.path.exists(args.input): utils.write_junit_xml(test_name, args.result, test_cases) print('Error: job not found.') exit(1) ###### Create Experiment ###### experiment_name = 'xgboost sample experiment' response = client.create_experiment(experiment_name) experiment_id = response.id utils.add_junit_test(test_cases, 'create experiment', True) ###### Create Job ###### job_name = 'xgboost_sample' params = { 'output': args.output, 'project': 'ml-pipeline-test', 'train-data': 'gs://ml-pipeline-dataset/sample-test/sfpd/train_50.csv', 'eval-data': 'gs://ml-pipeline-dataset/sample-test/sfpd/eval_20.csv', 'schema': 'gs://ml-pipeline-dataset/sample-test/sfpd/schema.json', 'rounds': '20', 'workers': '2' } response = client.run_pipeline(experiment_id, job_name, args.input, params) run_id = response.id utils.add_junit_test(test_cases, 'create pipeline run', True) ###### Monitor Job ###### start_time = datetime.now() response = client.wait_for_run_completion(run_id, 1800) succ = (response.run.status.lower() == 'succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds utils.add_junit_test(test_cases, 'job completion', succ, 'waiting for job completion failure', elapsed_time) ###### Output Argo Log for Debugging ###### workflow_json = client._get_workflow_json(run_id) workflow_id = workflow_json['metadata']['name'] argo_log, _ = utils.run_bash_command('argo logs -n {} -w {}'.format( args.namespace, workflow_id)) print("=========Argo Workflow Log=========") print(argo_log) ###### If the job fails, skip the result validation ###### if not succ: utils.write_junit_xml(test_name, args.result, test_cases) exit(1) ###### Validate the results ###### # confusion matrix should show three columns for the flower data # target, predicted, count cm_tar_path = './confusion_matrix.tar.gz' cm_filename = 'mlpipeline-ui-metadata.json' utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path) tar_handler = tarfile.open(cm_tar_path) tar_handler.extractall() with open(cm_filename, 'r') as f: cm_data = f.read() utils.add_junit_test(test_cases, 'confusion matrix format', (len(cm_data) > 0), 'the confusion matrix file is empty') ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. ###### Write out the test result in junit xml ###### utils.write_junit_xml(test_name, args.result, test_cases)
def check(self): """Run sample test and check results.""" test_cases = [] test_name = self._testname + ' Sample Test' ###### Initialization ###### host = 'ml-pipeline.%s.svc.cluster.local:8888' % self._namespace client = Client(host=host) ###### Check Input File ###### utils.add_junit_test(test_cases, 'input generated yaml file', os.path.exists(self._input), 'yaml file is not generated') if not os.path.exists(self._input): utils.write_junit_xml(test_name, self._result, test_cases) print('Error: job not found.') exit(1) ###### Create Experiment ###### experiment_name = self._testname + ' sample experiment' response = client.create_experiment(experiment_name) experiment_id = response.id utils.add_junit_test(test_cases, 'create experiment', True) ###### Create Job ###### job_name = self._testname + '_sample' ###### Figure out arguments from associated config files. ####### test_args = {} try: with open(DEFAULT_CONFIG, 'r') as f: raw_args = yaml.safe_load(f) except yaml.YAMLError as yamlerr: raise RuntimeError('Illegal default config:{}'.format(yamlerr)) except OSError as ose: raise FileExistsError('Default config not found:{}'.format(ose)) else: test_timeout = raw_args['test_timeout'] try: with open(os.path.join(CONFIG_DIR, '%s.config.yaml' % self._testname), 'r') as f: raw_args = yaml.safe_load(f) except yaml.YAMLError as yamlerr: print('No legit yaml config file found, use default args:{}'.format(yamlerr)) except OSError as ose: print('Config file with the same name not found, use default args:{}'.format(ose)) else: test_args.update(raw_args['arguments']) if 'output' in test_args.keys(): # output is a special param that has to be specified dynamically. test_args['output'] = self._output if 'test_timeout' in raw_args.keys(): test_timeout = raw_args['test_timeout'] response = client.run_pipeline(experiment_id, job_name, self._input, test_args) run_id = response.id utils.add_junit_test(test_cases, 'create pipeline run', True) ###### Monitor Job ###### try: start_time = datetime.now() response = client.wait_for_run_completion(run_id, test_timeout) succ = (response.run.status.lower() == 'succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds utils.add_junit_test(test_cases, 'job completion', succ, 'waiting for job completion failure', elapsed_time) finally: ###### Output Argo Log for Debugging ###### workflow_json = client._get_workflow_json(run_id) workflow_id = workflow_json['metadata']['name'] argo_log, _ = utils.run_bash_command('argo logs -n {} -w {}'.format( self._namespace, workflow_id)) print('=========Argo Workflow Log=========') print(argo_log) if not succ: utils.write_junit_xml(test_name, self._result, test_cases) exit(1) ###### Validate the results for specific test cases ###### #TODO: Add result check for tfx-cab-classification after launch. if self._testname == 'xgboost_training_cm': # For xgboost sample, check its confusion matrix. cm_tar_path = './confusion_matrix.tar.gz' utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path, 'mlpipeline-ui-metadata') with tarfile.open(cm_tar_path) as tar_handle: file_handles = tar_handle.getmembers() assert len(file_handles) == 1 with tar_handle.extractfile(file_handles[0]) as f: cm_data = f.read() utils.add_junit_test(test_cases, 'confusion matrix format', (len(cm_data) > 0), 'the confusion matrix file is empty') ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. ###### Write out the test result in junit xml ###### utils.write_junit_xml(test_name, self._result, test_cases)