def mnist_pipeline( train_images='https://people.canonical.com/~knkski/train-images-idx3-ubyte.gz', train_labels='https://people.canonical.com/~knkski/train-labels-idx1-ubyte.gz', test_images='https://people.canonical.com/~knkski/t10k-images-idx3-ubyte.gz', test_labels='https://people.canonical.com/~knkski/t10k-labels-idx1-ubyte.gz', storage_endpoint='minio:9000', bucket='mnist', train_epochs=2, train_batch_size=128, ): # Ensure minio bucket is created ensure_bucket = ensure_bucket_task(storage_endpoint, bucket) # Load mnist data and transform it into numpy array load = load_task( storage_endpoint, bucket, train_images, train_labels, test_images, test_labels ).after(ensure_bucket) load.output_artifact_paths['mnist.npz'] = '/output/mnist.npz' # Train model on transformed mnist dataset train = train_task( storage_endpoint, bucket, load.outputs['filename'], train_epochs, train_batch_size ).after(load) train.output_artifact_paths['model'] = '/output/model.hdf5' serve = serve_sidecar() test = ( test_task(storage_endpoint, bucket, train.outputs['filename'], train.outputs['examples']) .after(train) .add_sidecar(serve) ) # Ensure that each step has volumes attached to where ever data gets written to dsl.get_pipeline_conf().add_op_transformer(attach_output_volume)
def custom_artifact_location(secret_name: str = "mlpipeline-minio-artifact", tag: str = '1.31.0', namespace: str = "kubeflow", bucket: str = "mlpipeline"): # configures artifact location pipeline_artifact_location = dsl.ArtifactLocation.s3( bucket=bucket, endpoint="minio-service.%s:9000" % namespace, # parameterize minio-service endpoint insecure=True, access_key_secret=V1SecretKeySelector(name=secret_name, key="accesskey"), secret_key_secret={ "name": secret_name, "key": "secretkey" }, # accepts dict also ) # set pipeline level artifact location dsl.get_pipeline_conf().set_artifact_location(pipeline_artifact_location) # artifacts in this op are stored to endpoint `minio-service.<namespace>:9000` op = dsl.ContainerOp(name="foo", image="busybox:%s" % tag, command=['sh', '-c', 'echo hello > /tmp/output.txt'], file_outputs={'output': '/tmp/output.txt'})
def some_pipeline(): task1 = some_op() task2 = some_op() task3 = some_op() dsl.get_pipeline_conf().op_transformers.append( lambda op: op.set_retry(5))
def test_train( ): """Pipeline steps""" persistent_volume_path = '/mnt/azure' model_name = 'test' operations = {} image_size = 160 training_folder = 'train' training_dataset = 'train.txt' model_folder = 'Privacy' # train operations['train'] = dsl.ContainerOp( name='train', image='svangara.azurecr.io/training:3', command=['python'], arguments=[ '/scripts/train.py', '--outputs', model_folder ] ) dsl.get_pipeline_conf().add_op_transformer(transformer)
def foo_pipeline(tag: str, namespace: str = "kubeflow", bucket: str = "foobar"): # configures artifact location pipeline_artifact_location = dsl.ArtifactLocation.s3( bucket=bucket, endpoint="minio-service.%s:9000" % namespace, insecure=True, access_key_secret={"name": "minio", "key": "accesskey"}, secret_key_secret=V1SecretKeySelector(name="minio", key="secretkey")) # configures artifact location using AWS IAM role (no access key provided) aws_artifact_location = dsl.ArtifactLocation.s3( bucket=bucket, endpoint="s3.amazonaws.com", region="ap-southeast-1", insecure=False) # set pipeline level artifact location dsl.get_pipeline_conf().set_artifact_location(pipeline_artifact_location) # pipeline level artifact location (to minio) op1 = dsl.ContainerOp( name='foo', image='busybox:%s' % tag, output_artifact_paths={ 'out_art': '/tmp/out_art.txt', }, )
def param_substitutions(param = dsl.PipelineParam(name='param')): dsl.get_pipeline_conf().op_transformers.append(add_common_labels(param)) op = dsl.ContainerOp( name="cop", image="image", )
def save_most_frequent_word(message: str): """A pipeline function describing the orchestration of the workflow.""" counter = GetFrequentWordOp(name='get-Frequent', message=message) # Call set_image_pull_secrets after get_pipeline_conf(). dsl.get_pipeline_conf()\ .set_image_pull_secrets([k8s_client.V1ObjectReference(name="secretA")])
def imagepullsecrets_pipeline( message: str = "When flies fly behind flies, then flies are following flies."): """A pipeline function describing the orchestration of the workflow.""" counter = get_frequent_word_op(message=message) # Call set_image_pull_secrets after get_pipeline_conf(). dsl.get_pipeline_conf() \ .set_image_pull_secrets([k8s_client.V1ObjectReference(name="secretA")])
def inject_env_vars(): dsl.get_pipeline_conf().set_image_pull_secrets([ k8s_client.V1ObjectReference( name="k8scc01covidacr-registry-connection") ]) for var in ('AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'AWS_REGION', 'S3_REGION', 'S3_ENDPOINT', 'S3_USE_HTTPS', 'S3_VERIFY_SSL'): inject_env_var(var)
def update_endpoint_pipeline( region="", endpoint_url="", image="", model_name="", endpoint_config_name="", endpoint_name="", model_artifact_url="", variant_name_1="", instance_type_1="", instance_type_2="", initial_instance_count_1="", initial_variant_weight_1="", network_isolation="", role="", update_endpoint="", ): create_model = sagemaker_model_op( region=region, endpoint_url=endpoint_url, model_name=model_name, image=image, model_artifact_url=model_artifact_url, network_isolation=network_isolation, role=role, ) deploy_model = sagemaker_deploy_op( region=region, endpoint_url=endpoint_url, endpoint_config_name=endpoint_config_name, endpoint_name=endpoint_name, model_name_1=create_model.output, variant_name_1=variant_name_1, instance_type_1=instance_type_1, initial_instance_count_1=initial_instance_count_1, initial_variant_weight_1=initial_variant_weight_1, ) update_model = sagemaker_deploy_op( region=region, endpoint_url=endpoint_url, endpoint_config_name=endpoint_config_name, endpoint_name=deploy_model.output, model_name_1=create_model.output, variant_name_1=variant_name_1, instance_type_1=instance_type_2, initial_instance_count_1=initial_instance_count_1, initial_variant_weight_1=initial_variant_weight_1, update_endpoint=update_endpoint, ) dsl.get_pipeline_conf().set_image_pull_policy(policy="Always")
def train_pipeline(): operations = {} operations['training'] = dsl.ContainerOp( name='Training', image= 'rajatsethi7/my_docker_image', command=['python3'], arguments=["main.py"] ) dsl.get_pipeline_conf() return operations
def training_pipeline(gcp_bucket: str, project: str): # pre_image = f"gcr.io/{project}/pre_image:{github_sha}" # train_forecast_image = f"gcr.io/{project}/train_forecast_image:{github_sha}" operations = {} operations['training'] = dsl.ContainerOp( name='Training', image='rajatsethi7/my_docker_image', command=['python3'], arguments=["main.py"]) dsl.get_pipeline_conf() return operations
def convert_kedro_pipeline_to_kfp() -> None: """Convert from a Kedro pipeline into a kfp container graph.""" dsl.get_pipeline_conf().set_ttl_seconds_after_finished( self.run_config.ttl ) node_dependencies = self.context.pipelines.get( pipeline ).node_dependencies with self._create_pipeline_exit_handler(): kfp_ops = self._build_kfp_ops( node_dependencies, image, image_pull_policy ) for node, dependencies in node_dependencies.items(): for dependency in dependencies: kfp_ops[node.name].after(kfp_ops[dependency.name])
def onnx_pipeline(model, output_onnx_path, model_type, output_perf_result_path, execution_providers="", model_inputs_names="", model_outputs_names="", model_input_shapes="", model_initial_types="", caffe_model_prototxt="", target_opset=7): # Create a component named "Convert To ONNX" and "ONNX Runtime Perf". Edit the V1PersistentVolumeClaimVolumeSource # name to match the persistent volume claim you created if needed. By default the names match ../azure-files-sc.yaml # and ../azure-files-pvc.yaml convert_op = onnxConverterOp( 'Convert To ONNX', '%s' % model, '%s' % output_onnx_path, '%s' % model_type, '%s' % model_inputs_names, '%s' % model_outputs_names, '%s' % model_input_shapes, '%s' % model_initial_types, '%s' % caffe_model_prototxt, '%s' % target_opset).add_volume( k8s_client.V1Volume(name='pipeline-nfs', persistent_volume_claim=k8s_client. V1PersistentVolumeClaimVolumeSource( claim_name='azurefile'))).add_volume_mount( k8s_client.V1VolumeMount( mount_path='/mnt', name='pipeline-nfs')) perf_op = perfTestOp( 'ONNX Runtime Perf', convert_op.output, '%s' % output_perf_result_path, '%s' % execution_providers, ).add_volume( k8s_client.V1Volume(name='pipeline-nfs', persistent_volume_claim=k8s_client. V1PersistentVolumeClaimVolumeSource( claim_name='azurefile'))).add_volume_mount( k8s_client.V1VolumeMount( mount_path='/mnt', name='pipeline-nfs')).set_gpu_limit(1) dsl.get_pipeline_conf().set_image_pull_secrets( [k8s_client.V1ObjectReference(name="regcred")])
def custom_artifact_location( tag: str, namespace: str = "kubeflow", bucket: str = "mybucket" ): # configures artifact location pipeline_artifact_location = dsl.ArtifactLocation.s3( bucket=bucket, endpoint="minio-service.%s:9000" % namespace, # parameterize minio-service endpoint insecure=True, access_key_secret=V1SecretKeySelector(name="minio", key="accesskey"), secret_key_secret={"name": "minio", "key": "secretkey"}, # accepts dict also ) # set pipeline level artifact location dsl.get_pipeline_conf().set_artifact_location(pipeline_artifact_location) # artifacts in this op are stored to endpoint `minio-service.<namespace>:9000` op = dsl.ContainerOp(name="foo", image="busybox:%s" % tag)
def timeseries_pipeline(gcp_bucket: str, project: str, train_data :str="train.csv", forecast_data: str="forecast.csv"): """The kfp pipeline function. Arguments: gcp_bucket {str} -- The google bucket project {str} -- The gcp project where the data should be stored Keyword Arguments: train_data {str} -- The name of the train file that is uploaded to the bucket (default: {"train.csv"}) forecast_date {str} -- The name of the forecast file uploaded to the bucket (default: {"forecast.csv"}) """ pre_image = f"gcr.io/{project}/pre_image:{github_sha}" train_forecast_image = f"gcr.io/{project}/train_forecast_image:{github_sha}" operations = {} operations['preprocess'] = dsl.ContainerOp( name='Preprocess', image=pre_image, command=['python3'], arguments=["main.py", "--url", "https://raw.githubusercontent.com/facebook/prophet/master/examples/example_wp_log_peyton_manning.csv", "--bucket", gcp_bucket, "--destination_blob_name", train_data ] ).set_image_pull_policy('Always') operations['train_forecast'] = dsl.ContainerOp( name='Forecast', image=train_forecast_image, command=['python3'], arguments=["main.py", "--bucket", gcp_bucket, "--source_blob_name", train_data, "--forecast_blob_name", forecast_data ] ).set_image_pull_policy('Always') operations["train_forecast"].after(operations["preprocess"]) for _,operation in operations.items(): operation.apply(gcp.use_gcp_secret('user-gcp-sa')) dsl.get_pipeline_conf() return operations
def mnist_pipeline( train_images='https://people.canonical.com/~knkski/train-images-idx3-ubyte.gz', train_labels='https://people.canonical.com/~knkski/train-labels-idx1-ubyte.gz', test_images='https://people.canonical.com/~knkski/t10k-images-idx3-ubyte.gz', test_labels='https://people.canonical.com/~knkski/t10k-labels-idx1-ubyte.gz', train_epochs: int = 2, train_batch_size: int = 128, ): # Load mnist data and transform it into numpy array load = load_task(train_images, train_labels, test_images, test_labels) # Train model on transformed mnist dataset train = train_task(load.outputs['traintest_output'], train_epochs, train_batch_size) serve = serve_sidecar() test_task(train.outputs['model_path'], load.outputs['validation_output']).add_sidecar(serve) # Ensure that each step has volumes attached to where ever data gets written to dsl.get_pipeline_conf().add_op_transformer(attach_output_volume)
def transform_pipeline(): op1 = print_op('hey, what are you up to?') op2 = print_op('train my model.') dsl.get_pipeline_conf().add_op_transformer(add_annotation_and_label)
def pipeline_parallelism(): op1 = print_op('hey, what are you up to?') op2 = print_op('train my model.') dsl.get_pipeline_conf().set_parallelism(1)
def some_pipeline(): some_op() dsl.get_pipeline_conf().set_dns_config( V1PodDNSConfig( nameservers=["1.2.3.4"], options=[V1PodDNSConfigOption(name="ndots", value="2")]))
def some_pipeline(): some_op() dsl.get_pipeline_conf().set_default_pod_node_selector( label_name="cloud.google.com/gke-accelerator", value="nvidia-tesla-p4")
def some_pipeline(): task1 = some_op() task2 = some_op() dsl.get_pipeline_conf().set_image_pull_policy(policy="Alwayss")
def some_pipeline(): task1 = some_op() task2 = some_op() task3 = some_other_op().set_image_pull_policy("IfNotPresent") dsl.get_pipeline_conf().set_image_pull_policy(policy="Always")
def some_pipeline(): some_op() dsl.get_pipeline_conf().set_pod_disruption_budget("100%")
def some_pipeline(): some_op() dsl.get_pipeline_conf().set_ttl_seconds_after_finished(86400)
def some_pipeline(): some_op() some_op() some_op() dsl.get_pipeline_conf().set_parallelism(1)
def mnist_pipeline( name=model_name, namespace=user_namespace, storageclass=storageclass, step=4000): # step 1: create a Katib experiment to tune hyperparameters objectiveConfig = { "type": "minimize", "goal": 0.001, "objectiveMetricName": "loss", } algorithmConfig = {"algorithmName" : "random"} parameters = [ {"name": "--tf-learning-rate", "parameterType": "double", "feasibleSpace": {"min": "0.01","max": "0.03"}}, {"name": "--tf-batch-size", "parameterType": "discrete", "feasibleSpace": {"list": ["16", "32", "64"]}}, ] rawTemplate = { "apiVersion": "kubeflow.org/v1", "kind": "TFJob", "metadata": { "name": "{{.Trial}}", "namespace": "{{.NameSpace}}" }, "spec": { "tfReplicaSpecs": { "Chief": { "replicas": 1, "restartPolicy": "OnFailure", "template": { "spec": { "containers": [ { "command": [ "sh", "-c" ], "args": [ "python /opt/model.py --tf-train-steps=2000 {{- with .HyperParameters}} {{- range .}} {{.Name}}={{.Value}} {{- end}} {{- end}}" ], "image": "liuhougangxa/tf-estimator-mnist", "name": "tensorflow" } ] } } }, "Worker": { "replicas": 3, "restartPolicy": "OnFailure", "template": { "spec": { "containers": [ { "command": [ "sh", "-c" ], "args": [ "python /opt/model.py --tf-train-steps=2000 {{- with .HyperParameters}} {{- range .}} {{.Name}}={{.Value}} {{- end}} {{- end}}" ], "image": "liuhougangxa/tf-estimator-mnist", "name": "tensorflow" } ] } } } } } } trialTemplate = { "goTemplate": { "rawTemplate": json.dumps(rawTemplate) } } metricsCollectorSpec = { "source": { "fileSystemPath": { "path": "/tmp/tf", "kind": "Directory" } }, "collector": { "kind": "TensorFlowEvent" } } katib_experiment_launcher_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/katib-launcher/component.yaml') op1 = katib_experiment_launcher_op( experiment_name=name, experiment_namespace=namespace, parallel_trial_count=3, max_trial_count=12, objective=str(objectiveConfig), algorithm=str(algorithmConfig), trial_template=str(trialTemplate), parameters=str(parameters), metrics_collector=str(metricsCollectorSpec), # experiment_timeout_minutes=experimentTimeoutMinutes, delete_finished_experiment=False) # step2: create a TFJob to train your model with best hyperparameter tuned by Katib tfjobjson_template = Template(""" { "apiVersion": "kubeflow.org/v1", "kind": "TFJob", "metadata": { "name": "$name", "namespace": "$namespace", "annotations": { "sidecar.istio.io/inject": "false" } }, "spec": { "tfReplicaSpecs": { "Chief": { "replicas": 1, "restartPolicy": "OnFailure", "template": { "metadata": { "annotations": { "sidecar.istio.io/inject": "false" } }, "spec": { "volumes": [ { "name": "export-model", "persistentVolumeClaim": { "claimName": "$modelpvc" } } ], "containers": [ { "command": [ "sh", "-c" ], "args": [ "python /opt/model.py --tf-train-steps=$step --tf-export-dir=/mnt/export $args" ], "image": "liuhougangxa/tf-estimator-mnist", "name": "tensorflow", "volumeMounts": [ { "mountPath": "/mnt/export", "name": "export-model" } ] } ] } } }, "Worker": { "replicas": 3, "restartPolicy": "OnFailure", "template": { "metadata": { "annotations": { "sidecar.istio.io/inject": "false" } }, "spec": { "volumes": [ { "name": "export-model", "persistentVolumeClaim": { "claimName": "$modelpvc" } } ], "containers": [ { "command": [ "sh", "-c" ], "args": [ "python /opt/model.py --tf-train-steps=$step --tf-export-dir=/mnt/export $args" ], "image": "liuhougangxa/tf-estimator-mnist", "name": "tensorflow", "volumeMounts": [ { "mountPath": "/mnt/export", "name": "export-model" } ] } ] } } } } } } """) convert_op = func_to_container_op(convert_mnist_experiment_result) op2 = convert_op(op1.output) volume_template = Template(""" { "apiVersion": "v1", "kind": "PersistentVolumeClaim", "metadata": { "name": "{{workflow.name}}-modelpvc", "namespace": "$namespace" }, "spec": { "accessModes": ["ReadWriteMany"], "resources": { "requests": { "storage": "1Gi" } }, "storageClassName": "$storageclass" } } """) volopjson = volume_template.substitute({'namespace': namespace, 'storageclass': storageclass}) volop = json.loads(volopjson) modelvolop = dsl.ResourceOp( name="modelpvc", k8s_resource=volop ) tfjobjson = tfjobjson_template.substitute( {'args': op2.output, 'name': name, 'namespace': namespace, 'step': step, 'modelpvc': modelvolop.outputs["name"] }) tfjob = json.loads(tfjobjson) train = dsl.ResourceOp( name="train", k8s_resource=tfjob, success_condition='status.replicaStatuses.Worker.succeeded==3,status.replicaStatuses.Chief.succeeded==1' ) # step 3: model inferencese by KFServing Inferenceservice inferenceservice_template = Template(""" { "apiVersion": "serving.kubeflow.org/v1alpha2", "kind": "InferenceService", "metadata": { "name": "$name", "namespace": "$namespace" }, "spec": { "default": { "predictor": { "tensorflow": { "storageUri": "pvc://$modelpvc/" } } } } } """) inferenceservicejson = inferenceservice_template.substitute({'modelpvc': modelvolop.outputs["name"], 'name': name, 'namespace': namespace}) inferenceservice = json.loads(inferenceservicejson) inference = dsl.ResourceOp( name="inference", k8s_resource=inferenceservice, success_condition='status.url').after(train) dsl.get_pipeline_conf().add_op_transformer(add_istio_annotation)
def xgb_train_pipeline( output='gs://your-gcs-bucket', project='your-gcp-project', cluster_name='xgb-%s' % dsl.RUN_ID_PLACEHOLDER, region='us-central1', train_data='gs://ml-pipeline-playground/sfpd/train.csv', eval_data='gs://ml-pipeline-playground/sfpd/eval.csv', schema='gs://ml-pipeline-playground/sfpd/schema.json', target='resolution', rounds=200, workers=2, true_label='ACTION', ): output_template = str(output) + '/' + dsl.RUN_ID_PLACEHOLDER + '/data' # Current GCP pyspark/spark op do not provide outputs as return values, instead, # we need to use strings to pass the uri around. analyze_output = output_template transform_output_train = os.path.join(output_template, 'train', 'part-*') transform_output_eval = os.path.join(output_template, 'eval', 'part-*') train_output = os.path.join(output_template, 'train_output') predict_output = os.path.join(output_template, 'predict_output') with dsl.ExitHandler(exit_op=dataproc_delete_cluster_op( project_id=project, region=region, name=cluster_name )): _create_cluster_op = dataproc_create_cluster_op( project_id=project, region=region, name=cluster_name, initialization_actions=[ os.path.join(_PYSRC_PREFIX, 'initialization_actions.sh'), ], image_version='1.2' ) _analyze_op = dataproc_analyze_op( project=project, region=region, cluster_name=cluster_name, schema=schema, train_data=train_data, output=output_template ).after(_create_cluster_op).set_display_name('Analyzer') _transform_op = dataproc_transform_op( project=project, region=region, cluster_name=cluster_name, train_data=train_data, eval_data=eval_data, target=target, analysis=analyze_output, output=output_template ).after(_analyze_op).set_display_name('Transformer') _train_op = dataproc_train_op( project=project, region=region, cluster_name=cluster_name, train_data=transform_output_train, eval_data=transform_output_eval, target=target, analysis=analyze_output, workers=workers, rounds=rounds, output=train_output ).after(_transform_op).set_display_name('Trainer') _predict_op = dataproc_predict_op( project=project, region=region, cluster_name=cluster_name, data=transform_output_eval, model=train_output, target=target, analysis=analyze_output, output=predict_output ).after(_train_op).set_display_name('Predictor') _cm_op = confusion_matrix_op( predictions=os.path.join(predict_output, 'part-*.csv'), output_dir=output_template ).after(_predict_op) _roc_op = roc_op( predictions_dir=os.path.join(predict_output, 'part-*.csv'), true_class=true_label, true_score_column=true_label, output_dir=output_template ).after(_predict_op) dsl.get_pipeline_conf().add_op_transformer( gcp.use_gcp_secret('user-gcp-sa'))
def retry_sample_pipeline(): op1 = RandomFailure1Op('0,1,2,3').set_timeout(10) op2 = RandomFailure1Op('0,1') dsl.get_pipeline_conf().set_timeout(50)
def node_selector_pipeline(): dsl.get_pipeline_conf().set_default_pod_node_selector( 'kubernetes.io/os', 'linux') echo_op()