def create_run_from_pipeline_func( self, pipeline_func: Callable, arguments: Mapping[str, str], run_name=None, experiment_name=None, pipeline_conf: kfp.dsl.PipelineConf = None, namespace=None): '''Runs pipeline on KFP-enabled Kubernetes cluster. This command compiles the pipeline function, creates or gets an experiment and submits the pipeline for execution. Args: pipeline_func: A function that describes a pipeline by calling components and composing them into execution graph. arguments: Arguments to the pipeline function provided as a dict. run_name: Optional. Name of the run to be shown in the UI. experiment_name: Optional. Name of the experiment to add the run to. namespace: kubernetes namespace where the pipeline runs are created. For single user deployment, leave it as None; For multi user, input a namespace where the user is authorized ''' #TODO: Check arguments against the pipeline function pipeline_name = pipeline_func.__name__ run_name = run_name or pipeline_name + ' ' + datetime.now().strftime( '%Y-%m-%d %H-%M-%S') try: (_, pipeline_package_path) = tempfile.mkstemp(suffix='.zip') compiler.Compiler().compile(pipeline_func, pipeline_package_path, pipeline_conf=pipeline_conf) return self.create_run_from_pipeline_package( pipeline_package_path, arguments, run_name, experiment_name, namespace) finally: os.remove(pipeline_package_path)
def test_compile_pipeline_with_misused_inputvalue_should_raise_error(self): upstream_op = components.load_component_from_text(""" name: upstream compoent outputs: - {name: model, type: Model} implementation: container: image: dummy args: - {outputPath: model} """) downstream_op = components.load_component_from_text(""" name: compoent with misused placeholder inputs: - {name: model, type: Model} implementation: container: image: dummy args: - {inputValue: model} """) @dsl.pipeline(name='test-pipeline', pipeline_root='dummy_root') def my_pipeline(): downstream_op(model=upstream_op().output) with self.assertRaisesRegex( TypeError, ' type "Model" cannot be paired with InputValuePlaceholder.'): compiler.Compiler().compile(pipeline_func=my_pipeline, package_path='output.yaml')
def create_run_from_pipeline_func( self, pipeline_func: Callable, arguments: Mapping[str, str], run_name: Optional[str] = None, experiment_name: Optional[str] = None, pipeline_conf: Optional[kfp.dsl.PipelineConf] = None, namespace: Optional[str] = None): """Runs pipeline on KFP-enabled Kubernetes cluster. This command compiles the pipeline function, creates or gets an experiment and submits the pipeline for execution. Args: pipeline_func: A function that describes a pipeline by calling components and composing them into execution graph. arguments: Arguments to the pipeline function provided as a dict. run_name: Optional. Name of the run to be shown in the UI. experiment_name: Optional. Name of the experiment to add the run to. pipeline_conf: Optional. Pipeline configuration ops that will be applied to all the ops in the pipeline func. namespace: Kubernetes namespace where the pipeline runs are created. For single user deployment, leave it as None; For multi user, input a namespace where the user is authorized """ #TODO: Check arguments against the pipeline function pipeline_name = pipeline_func.__name__ run_name = run_name or pipeline_name + ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S') with tempfile.TemporaryDirectory() as tmpdir: pipeline_package_path = os.path.join(tmpdir, 'pipeline.yaml') compiler.Compiler().compile(pipeline_func, pipeline_package_path, pipeline_conf=pipeline_conf) return self.create_run_from_pipeline_package(pipeline_package_path, arguments, run_name, experiment_name, namespace)
def test_compile_pipeline_with_bool(self): with tempfile.TemporaryDirectory() as tmpdir: predict_op = components.load_component_from_text(""" name: predict inputs: - {name: generate_explanation, type: Boolean, default: False} implementation: container: image: gcr.io/my-project/my-image:tag args: - {inputValue: generate_explanation} """) @dsl.pipeline(name='test-boolean-pipeline') def simple_pipeline(): predict_op(generate_explanation=True) target_json_file = os.path.join(tmpdir, 'result.yaml') compiler.Compiler().compile(pipeline_func=simple_pipeline, package_path=target_json_file) self.assertTrue(os.path.exists(target_json_file)) with open(target_json_file, 'r') as f: f.read()
def test_passing_arbitrary_artifact_to_input_expecting_concrete_artifact( self): producer_op1 = components.load_component_from_text(""" name: producer compoent outputs: - {name: output, type: SomeArbitraryType} implementation: container: image: dummy args: - {outputPath: output} """) @dsl.component def consumer_op(input1: dsl.Input[dsl.Dataset]): pass @dsl.pipeline(name='test-pipeline') def my_pipeline(): consumer_op(input1=producer_op1().output) consumer_op(input1=producer_op2().output) with self.assertRaisesRegex( type_utils.InconsistentTypeException, 'Incompatible argument passed to the input "input1" of component' ' "consumer-op": Argument type "SomeArbitraryType" is' ' incompatible with the input type "Dataset"'): compiler.Compiler().compile(pipeline_func=my_pipeline, package_path='result.yaml')
def create_run_from_pipeline_func( self, pipeline_func: Callable, arguments: Mapping[str, str], run_name: Optional[str] = None, experiment_name: Optional[str] = None, pipeline_conf: Optional[dsl.PipelineConf] = None, namespace: Optional[str] = None, mode: dsl.PipelineExecutionMode = dsl.PipelineExecutionMode.V1_LEGACY, launcher_image: Optional[str] = None, pipeline_root: Optional[str] = None, ): """Runs pipeline on KFP-enabled Kubernetes cluster. This command compiles the pipeline function, creates or gets an experiment and submits the pipeline for execution. Args: pipeline_func: A function that describes a pipeline by calling components and composing them into execution graph. arguments: Arguments to the pipeline function provided as a dict. run_name: Optional. Name of the run to be shown in the UI. experiment_name: Optional. Name of the experiment to add the run to. pipeline_conf: Optional. Pipeline configuration ops that will be applied to all the ops in the pipeline func. namespace: Kubernetes namespace where the pipeline runs are created. For single user deployment, leave it as None; For multi user, input a namespace where the user is authorized mode: The PipelineExecutionMode to use when compiling and running pipeline_func. launcher_image: The launcher image to use if the mode is specified as PipelineExecutionMode.V2_COMPATIBLE. Should only be needed for tests or custom deployments right now. pipeline_root: The root path of the pipeline outputs. This argument should be used only for pipeline compiled with dsl.PipelineExecutionMode.V2_COMPATIBLE or dsl.PipelineExecutionMode.V2_ENGINGE mode. """ if pipeline_root is not None and mode == dsl.PipelineExecutionMode.V1_LEGACY: raise ValueError('`pipeline_root` should not be used with ' 'dsl.PipelineExecutionMode.V1_LEGACY mode.') #TODO: Check arguments against the pipeline function pipeline_name = pipeline_func.__name__ run_name = run_name or pipeline_name + ' ' + datetime.datetime.now( ).strftime('%Y-%m-%d %H-%M-%S') with tempfile.TemporaryDirectory() as tmpdir: pipeline_package_path = os.path.join(tmpdir, 'pipeline.yaml') compiler.Compiler(mode=mode, launcher_image=launcher_image).compile( pipeline_func=pipeline_func, package_path=pipeline_package_path, pipeline_conf=pipeline_conf) return self.create_run_from_pipeline_package( pipeline_file=pipeline_package_path, arguments=arguments, run_name=run_name, experiment_name=experiment_name, namespace=namespace, pipeline_root=pipeline_root, )
def test_compile_pipeline_with_default_value(self): with tempfile.TemporaryDirectory() as tmpdir: producer_op = components.load_component_from_text(""" name: producer inputs: - {name: location, type: String, default: 'us-central1'} - {name: name, type: Integer, default: 1} - {name: nodefault, type: String} implementation: container: image: gcr.io/my-project/my-image:tag args: - {inputValue: location} """) @dsl.pipeline(name='test-pipeline') def simple_pipeline(): producer = producer_op(location='1', nodefault='string') target_json_file = os.path.join(tmpdir, 'result.json') compiler.Compiler().compile(pipeline_func=simple_pipeline, package_path=target_json_file) self.assertTrue(os.path.exists(target_json_file)) with open(target_json_file, 'r') as f: f.read() pass
def main(host: str, create_pipeline: str, create_run_in: str, force: bool): if create_pipeline != "" and create_run_in != "": logging.error( 'only one of --create-run-in and --create-pipeline may be set') return 1 if host != "": client = kfp.Client(host=host) else: client = kfp.Client() run_id = "" if create_run_in != "": logging.info('creating run in pipeline "{}"'.format(create_run_in)) pid = pipeline_id(client, create_run_in) if pid == "": logging.error('could not find pipeline "{}" to create job'.format( create_run_in)) sys.exit(1) # Create a run in the target pipeline using the new pipeline ID run_info = client.run_pipeline( job_name="pach-job-{}".format(os.environ["PACH_JOB_ID"]), pipeline_id=pid, experiment_id=experiment_id(client, "Default"), params={ "s3_endpoint": os.environ["S3_ENDPOINT"], "input_bucket": "input", }) run_id = run_info.id elif create_pipeline != "": # Local machine is just creating the pipeline with tempfile.NamedTemporaryFile(suffix='.zip') as pipeline_file: compiler.Compiler().compile(kubeflow_pipeline, pipeline_file.name) pid = pipeline_id(client, create_pipeline) if pid != "": client.delete_pipeline(pid) logging.info("creating pipeline: {}".format(create_pipeline)) try: client.upload_pipeline(pipeline_file.name, create_pipeline) except TypeError: pass # https://github.com/kubeflow/pipelines/issues/2764 # This can be removed once KF proper uses the latest KFP else: # Pachyderm job is creating both the pipeline and the run run_id = client.create_run_from_pipeline_func( kubeflow_pipeline, run_name="pach-job-{}".format(os.environ["PACH_JOB_ID"]), arguments={ "s3_endpoint": os.environ["S3_ENDPOINT"], "input_bucket": "input", }).run_id if run_id != "": logging.info("waiting on kubeflow run id: {}".format(run_id)) j = client.wait_for_run_completion(run_id, 60) assert j.run.status == 'Succeeded' return 0
def test_compile_pipeline_with_missing_task_should_raise_error(self): @dsl.pipeline(name='test-pipeline', pipeline_root='dummy_root') def my_pipeline(text: str): pass with self.assertRaisesRegex(ValueError, 'Task is missing from pipeline.'): compiler.Compiler().compile(pipeline_func=my_pipeline, package_path='output.yaml')
def test_passing_missing_type_annotation_on_pipeline_input_should_error( self): @dsl.pipeline(name='test-pipeline', pipeline_root='gs://path') def my_pipeline(input1): pass with self.assertRaisesRegex( TypeError, 'Missing type annotation for argument: input1'): compiler.Compiler().compile(pipeline_func=my_pipeline, package_path='output.yaml')
def test_compile_pipeline_with_invalid_name_should_raise_error(self): @dsl.pipeline(name='') def my_pipeline(): VALID_PRODUCER_COMPONENT_SAMPLE(input_param='input') with tempfile.TemporaryDirectory() as tmpdir: output_path = os.path.join(tmpdir, 'output.yaml') compiler.Compiler().compile(pipeline_func=my_pipeline, package_path=output_path)
def test_cannot_write_to_bad_extension(self): with tempfile.TemporaryDirectory() as tmpdir: pipeline_spec = self.make_pipeline_spec() target_file = os.path.join(tmpdir, 'result.bad_extension') with self.assertRaisesRegex(ValueError, r'.* should end with "\.yaml".*'): compiler.Compiler().compile(pipeline_func=pipeline_spec, package_path=target_file)
def save(cls, project, workflow_spec: WorkflowSpec, target, artifact_path=None): workflow_file = workflow_spec.get_source_file(project.spec.context) functions = FunctionsDict(project) pipeline = create_pipeline( project, workflow_file, functions, secrets=project._secrets, ) artifact_path = artifact_path or project.spec.artifact_path conf = new_pipe_meta(artifact_path, ttl=workflow_spec.ttl) compiler.Compiler().compile(pipeline, target, pipeline_conf=conf) workflow_spec.clear_tmp()
def test_compile_component_simple(self, extension: str): @dsl.component def hello_world(text: str) -> str: """Hello world component.""" return text with tempfile.TemporaryDirectory() as tempdir: output_json = os.path.join(tempdir, f'component{extension}') compiler.Compiler().compile(pipeline_func=hello_world, package_path=output_json) with open(output_json, 'r') as f: pipeline_spec = yaml.safe_load(f) self.assertEqual(pipeline_spec['pipelineInfo']['name'], 'hello-world')
def test_use_task_final_status_in_non_exit_op(self): @dsl.component def print_op(status: PipelineTaskFinalStatus): return status @dsl.pipeline(name='test-pipeline') def my_pipeline(text: bool): print_op() with self.assertRaisesRegex( ValueError, 'PipelineTaskFinalStatus can only be used in an exit task.'): compiler.Compiler().compile(pipeline_func=my_pipeline, package_path='result.yaml')
def test_can_write_to_yaml(self, extension): with tempfile.TemporaryDirectory() as tmpdir: pipeline_spec = self.make_pipeline_spec() target_file = os.path.join(tmpdir, f'result{extension}') compiler.Compiler().compile(pipeline_func=pipeline_spec, package_path=target_file) self.assertTrue(os.path.exists(target_file)) with open(target_file) as f: pipeline_spec = yaml.safe_load(f) self.assertEqual(self.pipeline_name, pipeline_spec['pipelineInfo']['name'])
def test_can_write_to_json(self): with tempfile.TemporaryDirectory() as tmpdir: pipeline_spec = self.make_pipeline_spec() target_file = os.path.join(tmpdir, 'result.json') with self.assertWarnsRegex(DeprecationWarning, r'Compiling to JSON is deprecated'): compiler.Compiler().compile(pipeline_func=pipeline_spec, package_path=target_file) with open(target_file) as f: pipeline_spec = json.load(f) self.assertEqual(self.pipeline_name, pipeline_spec['pipelineInfo']['name'])
def test_compile_component_with_pipeline_name(self): @dsl.component def hello_world(text: str = 'default_string') -> str: """Hello world component.""" return text with tempfile.TemporaryDirectory() as tempdir: output_json = os.path.join(tempdir, 'component.yaml') compiler.Compiler().compile(pipeline_func=hello_world, package_path=output_json, pipeline_name='custom-name') with open(output_json, 'r') as f: pipeline_spec = yaml.safe_load(f) self.assertEqual(pipeline_spec['pipelineInfo']['name'], 'custom-name')
def test_set_pipeline_root_through_pipeline_decorator(self): with tempfile.TemporaryDirectory() as tmpdir: @dsl.pipeline(name='test-pipeline', pipeline_root='gs://path') def my_pipeline(): VALID_PRODUCER_COMPONENT_SAMPLE(input_param='input') target_json_file = os.path.join(tmpdir, 'result.yaml') compiler.Compiler().compile(pipeline_func=my_pipeline, package_path=target_json_file) self.assertTrue(os.path.exists(target_json_file)) with open(target_json_file) as f: pipeline_spec = yaml.load(f) self.assertEqual('gs://path', pipeline_spec['defaultPipelineRoot'])
def test_compile_component_with_default(self): @dsl.component def hello_world(text: str = 'default_string') -> str: """Hello world component.""" return text with tempfile.TemporaryDirectory() as tempdir: output_json = os.path.join(tempdir, 'component.yaml') compiler.Compiler().compile(pipeline_func=hello_world, package_path=output_json) with open(output_json, 'r') as f: pipeline_spec = yaml.safe_load(f) self.assertEqual(pipeline_spec['pipelineInfo']['name'], 'hello-world') self.assertEqual( pipeline_spec['root']['inputDefinitions']['parameters']['text'] ['defaultValue'], 'default_string')
def test_passing_concrete_artifact_to_input_expecting_generic_artifact( self): producer_op1 = components.load_component_from_text(""" name: producer compoent outputs: - {name: output, type: Dataset} implementation: container: image: dummy args: - {outputPath: output} """) @dsl.component def producer_op2(output: dsl.Output[dsl.Model]): pass consumer_op1 = components.load_component_from_text(""" name: consumer compoent inputs: - {name: input1, type: Artifact} implementation: container: image: dummy args: - {inputPath: input1} """) @dsl.component def consumer_op2(input1: dsl.Input[dsl.Artifact]): pass @dsl.pipeline(name='test-pipeline') def my_pipeline(): consumer_op1(input1=producer_op1().output) consumer_op1(input1=producer_op2().output) consumer_op2(input1=producer_op1().output) consumer_op2(input1=producer_op2().output) with tempfile.TemporaryDirectory() as tmpdir: target_yaml_file = os.path.join(tmpdir, 'result.yaml') compiler.Compiler().compile(pipeline_func=my_pipeline, package_path=target_yaml_file) self.assertTrue(os.path.exists(target_yaml_file))
def test_compile_component_two_inputs(self): @dsl.component def hello_world(text: str, integer: int) -> str: """Hello world component.""" print(integer) return text with tempfile.TemporaryDirectory() as tempdir: output_json = os.path.join(tempdir, 'component.yaml') compiler.Compiler().compile(pipeline_func=hello_world, package_path=output_json) with open(output_json, 'r') as f: pipeline_spec = yaml.safe_load(f) self.assertEqual( pipeline_spec['root']['inputDefinitions']['parameters']['integer'] ['parameterType'], 'NUMBER_INTEGER')
def test_compile_simple_pipeline(self): with tempfile.TemporaryDirectory() as tmpdir: producer_op = components.load_component_from_text(""" name: producer inputs: - {name: input_param, type: String} outputs: - {name: output_model, type: Model} - {name: output_value, type: Integer} implementation: container: image: gcr.io/my-project/my-image:tag args: - {inputValue: input_param} - {outputPath: output_model} - {outputPath: output_value} """) consumer_op = components.load_component_from_text(""" name: consumer inputs: - {name: input_model, type: Model} - {name: input_value, type: Integer} implementation: container: image: gcr.io/my-project/my-image:tag args: - {inputPath: input_model} - {inputValue: input_value} """) @dsl.pipeline(name='test-pipeline') def simple_pipeline(pipeline_input: str = 'Hello KFP!'): producer = producer_op(input_param=pipeline_input) consumer = consumer_op( input_model=producer.outputs['output_model'], input_value=producer.outputs['output_value']) target_file = os.path.join(tmpdir, 'result.yaml') compiler.Compiler().compile(pipeline_func=simple_pipeline, package_path=target_file) self.assertTrue(os.path.exists(target_file)) with open(target_file, 'r') as f: f.read()
def create_pipeline(self, pipeline_func): """ Create a new Kubeflow Pipeline using the provided pipeline function Args: pipeline_func: The method decorated by @dsl.pipeline which defines the pipeline Returns: The Kubeflow Pipeline object created """ pipeline_name = pipeline_func.__name__ try: (_, pipeline_package_path) = tempfile.mkstemp(suffix='.zip') compiler.Compiler().compile(pipeline_func, pipeline_package_path) return self.kfp_client.upload_pipeline(pipeline_package_path, pipeline_name) finally: os.remove(pipeline_package_path)
def create_pipeline(self, pipeline_func, pipeline_name): """ Create a new Kubeflow Pipeline using the provided pipeline function Args: pipeline_func: The method decorated by @dsl.pipeline which defines the pipeline Returns: The Kubeflow Pipeline object created """ try: (_, pipeline_package_path) = tempfile.mkstemp(suffix=".zip") compiler.Compiler().compile(pipeline_func, pipeline_package_path) logging.info(f"Compiled piopeline to: {pipeline_package_path}") return self.kfp_client.upload_pipeline(pipeline_package_path, pipeline_name) finally: pass
def test_invalid_after_dependency(self): @dsl.component def producer_op() -> str: return 'a' @dsl.component def dummy_op(msg: str = ''): pass @dsl.pipeline(name='test-pipeline') def my_pipeline(text: str): with dsl.Condition(text == 'a'): producer_task = producer_op() dummy_op().after(producer_task) with self.assertRaisesRegex( RuntimeError, 'Task dummy-op cannot dependent on any task inside the group:' ): compiler.Compiler().compile(pipeline_func=my_pipeline, package_path='result.yaml')
def test_compile_pipeline_with_misused_inputpath_should_raise_error(self): component_op = components.load_component_from_text(""" name: compoent with misused placeholder inputs: - {name: text, type: String} implementation: container: image: dummy args: - {inputPath: text} """) @dsl.pipeline(name='test-pipeline', pipeline_root='dummy_root') def my_pipeline(text: str): component_op(text=text) with self.assertRaisesRegex( TypeError, ' type "String" cannot be paired with InputPathPlaceholder.'): compiler.Compiler().compile(pipeline_func=my_pipeline, package_path='output.yaml')
def test_invalid_data_dependency(self): @dsl.component def producer_op() -> str: return 'a' @dsl.component def dummy_op(msg: str = ''): pass @dsl.pipeline(name='test-pipeline') def my_pipeline(text: bool): with dsl.ParallelFor(['a, b']): producer_task = producer_op() dummy_op(msg=producer_task.output) with self.assertRaisesRegex( RuntimeError, 'Task dummy-op cannot dependent on any task inside the group:' ): compiler.Compiler().compile(pipeline_func=my_pipeline, package_path='result.yaml')
def test_compile_pipeline_with_misused_outputuri_should_raise_error(self): component_op = components.load_component_from_text(""" name: compoent with misused placeholder outputs: - {name: value, type: Integer} implementation: container: image: dummy args: - {outputUri: value} """) @dsl.pipeline(name='test-pipeline', pipeline_root='dummy_root') def my_pipeline(): component_op() with self.assertRaisesRegex( TypeError, ' type "Integer" cannot be paired with OutputUriPlaceholder.'): compiler.Compiler().compile(pipeline_func=my_pipeline, package_path='output.yaml')
def test_use_task_final_status_in_non_exit_op_yaml(self): print_op = components.load_component_from_text(""" name: Print Op inputs: - {name: message, type: PipelineTaskFinalStatus} implementation: container: image: python:3.7 command: - echo - {inputValue: message} """) @dsl.pipeline(name='test-pipeline') def my_pipeline(text: bool): print_op() with self.assertRaisesRegex( ValueError, 'PipelineTaskFinalStatus can only be used in an exit task.'): compiler.Compiler().compile(pipeline_func=my_pipeline, package_path='result.yaml')