def main( context: str, gcr_root: str, gcs_root: str, experiment: str = 'v2_sample_test', timeout_mins: float = 40, kfp_package_path: str = 'git+https://github.com/kubeflow/pipelines#egg=kfp&subdirectory=sdk/python', samples_config: str = os.path.join('samples', 'test', 'config.yaml'), ): REPO_ROOT = os.path.join('..', '..', '..', '..') samples_config_path = os.path.join(REPO_ROOT, samples_config) samples_config_content = None with open(samples_config_path, 'r') as stream: samples_config_content = yaml.safe_load(stream) client = kfp.Client() # TODO(Bobgy): avoid using private fields when getting loaded config host = client._existing_config.host client.create_experiment( name=experiment, description='An experiment with Kubeflow Pipelines v2 sample test runs.' ) conf = kfp.dsl.PipelineConf() conf.set_timeout( timeout_mins * _MINUTE ) # add timeout to avoid pipelines stuck in running leak indefinetely print('Using KFP package path: {}'.format(kfp_package_path)) run_result = client.create_run_from_pipeline_func( v2_sample_test, { 'samples_config': samples_config_content, 'context': context, 'image_registry': f'{gcr_root}/test', 'gcs_root': gcs_root, 'kfp_host': host, 'kfp_package_path': kfp_package_path, }, experiment_name=experiment, pipeline_conf=conf, ) print("Run details page URL:") print(f"{host}/#/runs/details/{run_result.run_id}") run_response = run_result.wait_for_run_completion(timeout_mins * _MINUTE) run = run_response.run from pprint import pprint # Hide verbose content run_response.run.pipeline_spec.workflow_manifest = None pprint(run_response.run) print("Run details page URL:") print(f"{host}/#/runs/details/{run_result.run_id}") assert run.status == 'Succeeded'
def get_run_info(run_id: str): """Example of getting run info for current pipeline run.""" print(f'Current run ID is {run_id}.') # KFP API server is usually available as ml-pipeline service in the same # namespace, but for full Kubeflow deployment, you need to edit this to # http://ml-pipeline.kubeflow:8888, because your pipelines are running in # user namespaces, but the API is at kubeflow namespace. import kfp client = kfp.Client(host='http://ml-pipeline:8888') run_info = client.get_run(run_id=run_id) # Hide verbose info print(run_info.run)
def main(): args = parse_arguments() ###### Initialization ###### client = kfp.Client(args.host) print("host is {}".format(args.host)) ###### Create Experiment ###### print("Creating experiment") experiment_name = "kfp-functional-e2e-expriment-" + "".join( random.choices(string.ascii_uppercase + string.digits, k=5)) response = client.create_experiment(experiment_name) experiment_id = response.id print("Experiment with id {} created".format(experiment_id)) try: ###### Create Run from Pipeline Func ###### print("Creating Run from Pipeline Func") response = client.create_run_from_pipeline_func( hello_world_pipeline, arguments={}, experiment_name=experiment_name) run_id = response.run_id print("Run {} created".format(run_id)) ###### Monitor Run ###### start_time = datetime.now() response = client.wait_for_run_completion( run_id, constants.RUN_TIMEOUT_SECONDS) succ = (response.run.status.lower() == 'succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds if succ: print("Run succeeded in {} seconds".format(elapsed_time)) else: print("Run can't complete in {} seconds".format(elapsed_time)) finally: ###### Archive Experiment ###### print( "Archive experiment has a serious performance problem right now, so we temporarily disable it." ) print( "TODO(Bobgy): re-enable archiving experiment action after fixing https://github.com/kubeflow/pipelines/issues/6815#issuecomment-955938098" )
@v2.dsl.component(kfp_package_path=_KFP_PACKAGE_PATH) def produce_dir_with_files_v2_python_op(output_dir: Output[Artifact], num_files: int = 10, subdir: str = 'texts'): import os subdir_path = os.path.join(output_dir.path, subdir) os.makedirs(subdir_path, exist_ok=True) for i in range(num_files): file_path = os.path.join(subdir_path, str(i) + '.txt') with open(file_path, 'w') as f: f.write(str(i)) @kfp.dsl.pipeline(name='dir-pipeline-v2') def dir_pipeline_v2(subdir: str = 'texts'): produce_dir_python_v2_task = produce_dir_with_files_v2_python_op( num_files=15, subdir=subdir, ) list_dir_files_v2_python_op( input_dir=produce_dir_python_v2_task.output, subdir=subdir, ) if __name__ == '__main__': kfp_endpoint = None kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(dir_pipeline, arguments={})
gcs_path_prefix=model_dir_uri, ).set_display_name('Upload model').output # Setting the model as prod upload_to_gcs_op( data=model_uri, gcs_path=prod_model_pointer_uri, ).set_display_name('Set prod model') if __name__ == '__main__': # Running the first time. The trainer will train the model from scratch and set as prod after testing it pipelin_run = kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func( continuous_training_pipeline, arguments=dict( model_dir_uri=model_dir_uri, training_start_date='2019-02-01', training_end_date='2019-03-01', ), ) pipelin_run.wait_for_run_completion() # Running the second time. The trainer should warm-start the training from the prod model and set the new model as prod after testing it kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func( continuous_training_pipeline, arguments=dict( model_dir_uri=model_dir_uri, training_start_date='2019-02-01', training_end_date='2019-03-01', ), )
name="my-in-coop2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo op2 %s" % item.b], ) op_out = dsl.ContainerOp( name="my-out-cop", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo %s" % my_pipe_param], ) if __name__ == '__main__': from kfp.deprecated import compiler print(compiler.Compiler().compile(pipeline, package_path=None)) import kfp.deprecated as kfp client = kfp.Client(host='127.0.0.1:8080/pipeline') pkg_path = '/tmp/witest_pkg.tar.gz' compiler.Compiler().compile(pipeline, package_path=pkg_path) exp = client.create_experiment('withparams_exp') client.run_pipeline( experiment_id=exp.id, job_name='withitem_basic', pipeline_package_path=pkg_path, params={}, )
return datetime.datetime.now().isoformat() def caching_pipeline(seconds: float = 60): # All outputs of successful executions are cached work_task = do_work_op(seconds) # Test 1 # Running the pipeline for the first time. # The pipeline performs work and the results are cached. # The pipeline run time should be ~60 seconds. print("Starting test 1") start_time = datetime.datetime.now() kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func( caching_pipeline, arguments=dict(seconds=60), ).wait_for_run_completion(timeout=999) elapsed_time = datetime.datetime.now() - start_time print(f"Total run time: {int(elapsed_time.total_seconds())} seconds") # Test 2 # Running the pipeline the second time. # The pipeline should reuse the cached results and complete faster. # The pipeline run time should be <60 seconds. print("Starting test 2") start_time = datetime.datetime.now() kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func( caching_pipeline, arguments=dict(seconds=60), ).wait_for_run_completion(timeout=999) elapsed_time = datetime.datetime.now() - start_time