Ejemplo n.º 1
0
    def check(self):
        """Check pipeline run results."""
        if self._run_pipeline:
            ###### Monitor Job ######
            try:
                start_time = datetime.now()
                response = self._client.wait_for_run_completion(
                    self._run_id, self._test_timeout
                )
                succ = (response.run.status.lower() == 'succeeded')
                end_time = datetime.now()
                elapsed_time = (end_time - start_time).seconds
                utils.add_junit_test(
                    self._test_cases, 'job completion', succ,
                    'waiting for job completion failure', elapsed_time
                )
            finally:
                ###### Output Argo Log for Debugging ######
                workflow_json = self._client._get_workflow_json(self._run_id)
                workflow_id = workflow_json['metadata']['name']
                print("Argo Workflow Name: ", workflow_id)
                argo_log, _ = utils.run_bash_command(
                    'argo logs {} -n {}'.format(
                        workflow_id, self._namespace
                    )
                )
                print('=========Argo Workflow Log=========')
                print(argo_log)

            if not succ:
                utils.write_junit_xml(
                    self._test_name, self._result, self._test_cases
                )
                exit(1)

            ###### Validate the results for specific test cases ######
            if self._testname == 'xgboost_training_cm':
                # For xgboost sample, check its confusion matrix.
                cm_tar_path = './confusion_matrix.tar.gz'
                utils.get_artifact_in_minio(
                    workflow_json, 'confusion-matrix', cm_tar_path,
                    'mlpipeline-ui-metadata'
                )
                with tarfile.open(cm_tar_path) as tar_handle:
                    file_handles = tar_handle.getmembers()
                    assert len(file_handles) == 1

                    with tar_handle.extractfile(file_handles[0]) as f:
                        cm_data = f.read()
                        utils.add_junit_test(
                            self._test_cases, 'confusion matrix format',
                            (len(cm_data) > 0),
                            'the confusion matrix file is empty'
                        )

        ###### Delete Job ######
        #TODO: add deletion when the backend API offers the interface.

        ###### Write out the test result in junit xml ######
        utils.write_junit_xml(self._test_name, self._result, self._test_cases)
Ejemplo n.º 2
0
def main():
    args = parse_arguments()
    test_cases = []
    test_name = 'Kubeflow Sample Test'

    ###### Initialization ######
    client = Client()

    ###### Check Input File ######
    utils.add_junit_test(test_cases, 'input generated yaml file',
                         os.path.exists(args.input),
                         'yaml file is not generated')
    if not os.path.exists(args.input):
        utils.write_junit_xml(test_name, args.result, test_cases)
        exit()

    ###### Create Experiment ######
    experiment_name = 'kubeflow sample experiment'
    response = client.create_experiment(experiment_name)
    experiment_id = response.id
    utils.add_junit_test(test_cases, 'create experiment', True)

    ###### Create Job ######
    job_name = 'kubeflow_sample'
    params = {
        'output': args.output,
        'project': 'ml-pipeline-test',
        'evaluation': 'gs://ml-pipeline-dataset/sample-test/flower/eval15.csv',
        'train': 'gs://ml-pipeline-dataset/sample-test/flower/train30.csv',
        'hidden-layer-size': '10,5',
        'steps': '5'
    }
    response = client.run_pipeline(experiment_id, job_name, args.input, params)
    run_id = response.id
    utils.add_junit_test(test_cases, 'create pipeline run', True)

    ###### Monitor Job ######
    start_time = datetime.now()
    response = client.wait_for_run_completion(run_id, 1200)
    succ = (response.run.status.lower() == 'succeeded')
    end_time = datetime.now()
    elapsed_time = (end_time - start_time).seconds
    utils.add_junit_test(test_cases, 'job completion', succ,
                         'waiting for job completion failure', elapsed_time)
    if not succ:
        utils.write_junit_xml(test_name, args.result, test_cases)
        exit()

    ###### Output Argo Log for Debugging ######
    workflow_json = client._get_workflow_json(run_id)
    workflow_id = workflow_json['metadata']['name']
    #TODO: remove the namespace dependency or make is configurable.
    argo_log, _ = utils.run_bash_command(
        'argo logs -n kubeflow -w {}'.format(workflow_id))
    print("=========Argo Workflow Log=========")
    print(argo_log)

    ###### Validate the results ######
    #   confusion matrix should show three columns for the flower data
    #     target, predicted, count
    cm_tar_path = './confusion_matrix.tar.gz'
    cm_filename = 'mlpipeline-ui-metadata.json'
    utils.get_artifact_in_minio(workflow_json, 'confusionmatrix', cm_tar_path)
    tar_handler = tarfile.open(cm_tar_path)
    tar_handler.extractall()

    with open(cm_filename, 'r') as f:
        cm_data = json.load(f)
        utils.add_junit_test(
            test_cases, 'confusion matrix format',
            (len(cm_data['outputs'][0]['schema']) == 3),
            'the column number of the confusion matrix output is not equal to three'
        )

    ###### Delete Job ######
    #TODO: add deletion when the backend API offers the interface.

    ###### Write out the test result in junit xml ######
    utils.write_junit_xml(test_name, args.result, test_cases)
Ejemplo n.º 3
0
def main():
    args = parse_arguments()
    test_cases = []
    test_name = args.testname + ' Sample Test'

    ###### Initialization ######
    host = 'ml-pipeline.%s.svc.cluster.local:8888' % args.namespace
    client = Client(host=host)

    ###### Check Input File ######
    utils.add_junit_test(test_cases, 'input generated yaml file',
                         os.path.exists(args.input),
                         'yaml file is not generated')
    if not os.path.exists(args.input):
        utils.write_junit_xml(test_name, args.result, test_cases)
        print('Error: job not found.')
        exit(1)

    ###### Create Experiment ######
    experiment_name = args.testname + ' sample experiment'
    response = client.create_experiment(experiment_name)
    experiment_id = response.id
    utils.add_junit_test(test_cases, 'create experiment', True)

    ###### Create Job ######
    job_name = args.testname + '_sample'
    ###### Test-specific arguments #######
    if args.testname == 'tfx_cab_classification':
        params = {
            'output': args.output,
            'project': 'ml-pipeline-test',
            'column-names':
            'gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/column-names.json',
            'evaluation':
            'gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/eval20.csv',
            'train':
            'gs://ml-pipeline-dataset/sample-test/taxi-cab-classification/train50.csv',
            'hidden-layer-size': '5',
            'steps': '5'
        }
    elif args.testname == 'xgboost_training_cm':
        params = {
            'output': args.output,
            'project': 'ml-pipeline-test',
            'train-data':
            'gs://ml-pipeline-dataset/sample-test/sfpd/train_50.csv',
            'eval-data':
            'gs://ml-pipeline-dataset/sample-test/sfpd/eval_20.csv',
            'schema': 'gs://ml-pipeline-dataset/sample-test/sfpd/schema.json',
            'rounds': '20',
            'workers': '2'
        }
    else:
        # Basic tests require no additional params.
        params = {}

    response = client.run_pipeline(experiment_id, job_name, args.input, params)
    run_id = response.id
    utils.add_junit_test(test_cases, 'create pipeline run', True)

    ###### Monitor Job ######
    try:
        start_time = datetime.now()
        if args.testname == 'xgboost_training_cm':
            response = client.wait_for_run_completion(run_id, 1800)
        else:
            response = client.wait_for_run_completion(run_id, 1200)
        succ = (response.run.status.lower() == 'succeeded')
        end_time = datetime.now()
        elapsed_time = (end_time - start_time).seconds
        utils.add_junit_test(test_cases, 'job completion', succ,
                             'waiting for job completion failure',
                             elapsed_time)
    finally:
        ###### Output Argo Log for Debugging ######
        workflow_json = client._get_workflow_json(run_id)
        workflow_id = workflow_json['metadata']['name']
        argo_log, _ = utils.run_bash_command('argo logs -n {} -w {}'.format(
            args.namespace, workflow_id))
        print('=========Argo Workflow Log=========')
        print(argo_log)

    if not succ:
        utils.write_junit_xml(test_name, args.result, test_cases)
        exit(1)

    ###### Validate the results for specific test cases ######
    #TODO: Add result check for tfx-cab-classification after launch.
    if args.testname == 'xgboost_training_cm':
        cm_tar_path = './confusion_matrix.tar.gz'
        utils.get_artifact_in_minio(workflow_json, 'confusion-matrix',
                                    cm_tar_path, 'mlpipeline-ui-metadata')
        with tarfile.open(cm_tar_path) as tar_handle:
            file_handles = tar_handle.getmembers()
            assert len(file_handles) == 1

            with tar_handle.extractfile(file_handles[0]) as f:
                cm_data = f.read()
                utils.add_junit_test(test_cases, 'confusion matrix format',
                                     (len(cm_data) > 0),
                                     'the confusion matrix file is empty')

    ###### Delete Job ######
    #TODO: add deletion when the backend API offers the interface.

    ###### Write out the test result in junit xml ######
    utils.write_junit_xml(test_name, args.result, test_cases)
Ejemplo n.º 4
0
def main():
    args = parse_arguments()
    test_cases = []
    test_name = 'XGBoost Sample Test'

    ###### Initialization ######
    host = 'ml-pipeline.%s.svc.cluster.local:8888' % args.namespace
    client = Client(host=host)

    ###### Check Input File ######
    utils.add_junit_test(test_cases, 'input generated yaml file',
                         os.path.exists(args.input),
                         'yaml file is not generated')
    if not os.path.exists(args.input):
        utils.write_junit_xml(test_name, args.result, test_cases)
        print('Error: job not found.')
        exit(1)

    ###### Create Experiment ######
    experiment_name = 'xgboost sample experiment'
    response = client.create_experiment(experiment_name)
    experiment_id = response.id
    utils.add_junit_test(test_cases, 'create experiment', True)

    ###### Create Job ######
    job_name = 'xgboost_sample'
    params = {
        'output': args.output,
        'project': 'ml-pipeline-test',
        'train-data': 'gs://ml-pipeline-dataset/sample-test/sfpd/train_50.csv',
        'eval-data': 'gs://ml-pipeline-dataset/sample-test/sfpd/eval_20.csv',
        'schema': 'gs://ml-pipeline-dataset/sample-test/sfpd/schema.json',
        'rounds': '20',
        'workers': '2'
    }
    response = client.run_pipeline(experiment_id, job_name, args.input, params)
    run_id = response.id
    utils.add_junit_test(test_cases, 'create pipeline run', True)

    ###### Monitor Job ######
    start_time = datetime.now()
    response = client.wait_for_run_completion(run_id, 1800)
    succ = (response.run.status.lower() == 'succeeded')
    end_time = datetime.now()
    elapsed_time = (end_time - start_time).seconds
    utils.add_junit_test(test_cases, 'job completion', succ,
                         'waiting for job completion failure', elapsed_time)

    ###### Output Argo Log for Debugging ######
    workflow_json = client._get_workflow_json(run_id)
    workflow_id = workflow_json['metadata']['name']
    argo_log, _ = utils.run_bash_command('argo logs -n {} -w {}'.format(
        args.namespace, workflow_id))
    print("=========Argo Workflow Log=========")
    print(argo_log)

    ###### If the job fails, skip the result validation ######
    if not succ:
        utils.write_junit_xml(test_name, args.result, test_cases)
        exit(1)

    ###### Validate the results ######
    #   confusion matrix should show three columns for the flower data
    #     target, predicted, count
    cm_tar_path = './confusion_matrix.tar.gz'
    cm_filename = 'mlpipeline-ui-metadata.json'
    utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path)
    tar_handler = tarfile.open(cm_tar_path)
    tar_handler.extractall()

    with open(cm_filename, 'r') as f:
        cm_data = f.read()
        utils.add_junit_test(test_cases, 'confusion matrix format',
                             (len(cm_data) > 0),
                             'the confusion matrix file is empty')

    ###### Delete Job ######
    #TODO: add deletion when the backend API offers the interface.

    ###### Write out the test result in junit xml ######
    utils.write_junit_xml(test_name, args.result, test_cases)
Ejemplo n.º 5
0
  def check(self):
    """Run sample test and check results."""
    test_cases = []
    test_name = self._testname + ' Sample Test'

    ###### Initialization ######
    host = 'ml-pipeline.%s.svc.cluster.local:8888' % self._namespace
    client = Client(host=host)

    ###### Check Input File ######
    utils.add_junit_test(test_cases, 'input generated yaml file',
                         os.path.exists(self._input), 'yaml file is not generated')
    if not os.path.exists(self._input):
      utils.write_junit_xml(test_name, self._result, test_cases)
      print('Error: job not found.')
      exit(1)

    ###### Create Experiment ######
    experiment_name = self._testname + ' sample experiment'
    response = client.create_experiment(experiment_name)
    experiment_id = response.id
    utils.add_junit_test(test_cases, 'create experiment', True)

    ###### Create Job ######
    job_name = self._testname + '_sample'
    ###### Figure out arguments from associated config files. #######
    test_args = {}
    try:
      with open(DEFAULT_CONFIG, 'r') as f:
        raw_args = yaml.safe_load(f)
    except yaml.YAMLError as yamlerr:
      raise RuntimeError('Illegal default config:{}'.format(yamlerr))
    except OSError as ose:
      raise FileExistsError('Default config not found:{}'.format(ose))
    else:
      test_timeout = raw_args['test_timeout']

    try:
      with open(os.path.join(CONFIG_DIR, '%s.config.yaml' % self._testname), 'r') as f:
          raw_args = yaml.safe_load(f)
    except yaml.YAMLError as yamlerr:
      print('No legit yaml config file found, use default args:{}'.format(yamlerr))
    except OSError as ose:
      print('Config file with the same name not found, use default args:{}'.format(ose))
    else:
      test_args.update(raw_args['arguments'])
      if 'output' in test_args.keys():  # output is a special param that has to be specified dynamically.
        test_args['output'] = self._output
      if 'test_timeout' in raw_args.keys():
        test_timeout = raw_args['test_timeout']

    response = client.run_pipeline(experiment_id, job_name, self._input, test_args)
    run_id = response.id
    utils.add_junit_test(test_cases, 'create pipeline run', True)

    ###### Monitor Job ######
    try:
      start_time = datetime.now()
      response = client.wait_for_run_completion(run_id, test_timeout)
      succ = (response.run.status.lower() == 'succeeded')
      end_time = datetime.now()
      elapsed_time = (end_time - start_time).seconds
      utils.add_junit_test(test_cases, 'job completion', succ,
                           'waiting for job completion failure', elapsed_time)
    finally:
      ###### Output Argo Log for Debugging ######
      workflow_json = client._get_workflow_json(run_id)
      workflow_id = workflow_json['metadata']['name']
      argo_log, _ = utils.run_bash_command('argo logs -n {} -w {}'.format(
        self._namespace, workflow_id))
      print('=========Argo Workflow Log=========')
      print(argo_log)

    if not succ:
      utils.write_junit_xml(test_name, self._result, test_cases)
      exit(1)

    ###### Validate the results for specific test cases ######
    #TODO: Add result check for tfx-cab-classification after launch.
    if self._testname == 'xgboost_training_cm':
      # For xgboost sample, check its confusion matrix.
      cm_tar_path = './confusion_matrix.tar.gz'
      utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path,
                                  'mlpipeline-ui-metadata')
      with tarfile.open(cm_tar_path) as tar_handle:
        file_handles = tar_handle.getmembers()
        assert len(file_handles) == 1

        with tar_handle.extractfile(file_handles[0]) as f:
          cm_data = f.read()
          utils.add_junit_test(test_cases, 'confusion matrix format',
                               (len(cm_data) > 0),
                               'the confusion matrix file is empty')

    ###### Delete Job ######
    #TODO: add deletion when the backend API offers the interface.

    ###### Write out the test result in junit xml ######
    utils.write_junit_xml(test_name, self._result, test_cases)