예제 #1
0
    def _compile_and_run_pipeline(self, pipeline_name: Text,
                                  pipeline: tfx_pipeline.Pipeline):
        """Compiles and runs a KFP pipeline.

    Args:
      pipeline_name: The name of the pipeline.
      pipeline: The logical pipeline to run.
    """
        _ = KubeflowRunner().run(pipeline)

        file_path = os.path.join(self._test_dir,
                                 '{}.tar.gz'.format(pipeline_name))
        self.assertTrue(tf.gfile.Exists(file_path))
        tarfile.TarFile.open(file_path).extract('pipeline.yaml')
        pipeline_file = os.path.join(self._test_dir, 'pipeline.yaml')
        self.assertIsNotNone(pipeline_file)

        # Ensure cleanup regardless of whether pipeline succeeds or fails.
        self.addCleanup(self._delete_workflow, pipeline_name)
        self.addCleanup(self._delete_pipeline_output, pipeline_name)

        # Run the pipeline to completion.
        self._run_workflow(pipeline_file, pipeline_name)

        # Check if pipeline completed successfully.
        get_workflow_command = [
            'argo', '--namespace', 'kubeflow', 'get', pipeline_name
        ]
        output = subprocess.check_output(get_workflow_command).decode('utf-8')

        self.assertIsNotNone(
            re.search(r'^Status:\s+Succeeded$', output, flags=re.MULTILINE),
            'Pipeline {} failed to complete successfully:\n{}'.format(
                pipeline_name, output))
예제 #2
0
  def test_taxi_pipeline_construction_and_definition_file_exists(self):
    logical_pipeline = taxi_pipeline_kubeflow._create_pipeline(
        pipeline_name=taxi_pipeline_kubeflow._pipeline_name,
        pipeline_root=taxi_pipeline_kubeflow._pipeline_root,
        query=taxi_pipeline_kubeflow._query,
        module_file=taxi_pipeline_kubeflow._module_file,
        serving_model_dir=taxi_pipeline_kubeflow._serving_model_dir,
        beam_pipeline_args=taxi_pipeline_kubeflow._beam_pipeline_args,
        ai_platform_training_args=taxi_pipeline_kubeflow
        ._ai_platform_training_args,
        ai_platform_serving_args=taxi_pipeline_kubeflow
        ._ai_platform_serving_args)
    self.assertEqual(9, len(logical_pipeline.components))

    KubeflowRunner().run(logical_pipeline)
    file_path = os.path.join(self._tmp_dir,
                             'chicago_taxi_pipeline_kubeflow.tar.gz')
    self.assertTrue(tf.gfile.Exists(file_path))
예제 #3
0
    # Uses TFMA to compute a evaluation statistics over features of a model.
    model_analyzer = Evaluator(
        examples=example_gen.outputs.examples,
        model_exports=trainer.outputs.output,
        feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(specs=[
            evaluator_pb2.SingleSlicingSpec(
                column_for_slicing=['trip_start_hour'])
        ]))

    # Performs quality validation of a candidate model (compared to a baseline).
    model_validator = ModelValidator(examples=example_gen.outputs.examples,
                                     model=trainer.outputs.output)

    # Checks whether the model passed the validation steps and pushes the model
    # to a file destination if check passed.
    pusher = Pusher(model_export=trainer.outputs.output,
                    model_blessing=model_validator.outputs.blessing,
                    custom_config={'cmle_serving_args': _cmle_serving_args},
                    push_destination=pusher_pb2.PushDestination(
                        filesystem=pusher_pb2.PushDestination.Filesystem(
                            base_directory=_serving_model_dir)))

    return [
        example_gen, statistics_gen, infer_schema, validate_stats, transform,
        trainer, model_analyzer, model_validator, pusher
    ]


pipeline = KubeflowRunner().run(_create_pipeline())
예제 #4
0
    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=[
            example_gen, statistics_gen, infer_schema, validate_stats, transform,
            trainer, model_analyzer, model_validator, pusher
        ],
        additional_pipeline_args={
            'tfx_image': 'tensorflow/tfx:0.14.0rc1'
        },
        log_root='/var/tmp/tfx/logs',
    )


if __name__ == '__main__':
    mount_volume_op = onprem.mount_pvc(
        "tfx-pvc",
        "tfx-pv",
        _tfx_root)
    config = KubeflowDagRunnerConfig(
        pipeline_operator_funcs=[mount_volume_op]
    )
    _pipeline = _create_pipeline(
        pipeline_name=_pipeline_name,
        pipeline_root=_pipeline_root,
        data_root=os.path.join(_pipeline_root, 'data'),
        module_file=_module_file,
        serving_model_dir=_serving_model_dir,
        )
    KubeflowRunner(config=config).run(_pipeline)
예제 #5
0
    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=[
            example_gen, statistics_gen, infer_schema, validate_stats,
            transform, trainer, model_analyzer, model_validator, pusher
        ],
        additional_pipeline_args={
            'beam_pipeline_args': beam_pipeline_args,
            # Optional args:
            # 'tfx_image': custom docker image to use for components.
            # This is needed if TFX package is not installed from an RC
            # or released version.
        },
        log_root='/var/tmp/tfx/logs',
    )


if __name__ == '__main__':
    KubeflowRunner().run(
        _create_pipeline(
            pipeline_name=_pipeline_name,
            pipeline_root=_pipeline_root,
            query=_query,
            module_file=_module_file,
            serving_model_dir=_serving_model_dir,
            beam_pipeline_args=_beam_pipeline_args,
            ai_platform_training_args=_ai_platform_training_args,
            ai_platform_serving_args=_ai_platform_serving_args,
        ))