Exemple #1
0
def mnist_pipeline(
    train_images='https://people.canonical.com/~knkski/train-images-idx3-ubyte.gz',
    train_labels='https://people.canonical.com/~knkski/train-labels-idx1-ubyte.gz',
    test_images='https://people.canonical.com/~knkski/t10k-images-idx3-ubyte.gz',
    test_labels='https://people.canonical.com/~knkski/t10k-labels-idx1-ubyte.gz',
    storage_endpoint='minio:9000',
    bucket='mnist',
    train_epochs=2,
    train_batch_size=128,
):
    # Ensure minio bucket is created
    ensure_bucket = ensure_bucket_task(storage_endpoint, bucket)

    # Load mnist data and transform it into numpy array
    load = load_task(
        storage_endpoint, bucket, train_images, train_labels, test_images, test_labels
    ).after(ensure_bucket)
    load.output_artifact_paths['mnist.npz'] = '/output/mnist.npz'

    # Train model on transformed mnist dataset
    train = train_task(
        storage_endpoint, bucket, load.outputs['filename'], train_epochs, train_batch_size
    ).after(load)
    train.output_artifact_paths['model'] = '/output/model.hdf5'

    serve = serve_sidecar()
    test = (
        test_task(storage_endpoint, bucket, train.outputs['filename'], train.outputs['examples'])
        .after(train)
        .add_sidecar(serve)
    )

    # Ensure that each step has volumes attached to where ever data gets written to
    dsl.get_pipeline_conf().add_op_transformer(attach_output_volume)
Exemple #2
0
def custom_artifact_location(secret_name: str = "mlpipeline-minio-artifact",
                             tag: str = '1.31.0',
                             namespace: str = "kubeflow",
                             bucket: str = "mlpipeline"):

    # configures artifact location
    pipeline_artifact_location = dsl.ArtifactLocation.s3(
        bucket=bucket,
        endpoint="minio-service.%s:9000" %
        namespace,  # parameterize minio-service endpoint
        insecure=True,
        access_key_secret=V1SecretKeySelector(name=secret_name,
                                              key="accesskey"),
        secret_key_secret={
            "name": secret_name,
            "key": "secretkey"
        },  # accepts dict also
    )

    # set pipeline level artifact location
    dsl.get_pipeline_conf().set_artifact_location(pipeline_artifact_location)

    # artifacts in this op are stored to endpoint `minio-service.<namespace>:9000`
    op = dsl.ContainerOp(name="foo",
                         image="busybox:%s" % tag,
                         command=['sh', '-c', 'echo hello > /tmp/output.txt'],
                         file_outputs={'output': '/tmp/output.txt'})
Exemple #3
0
        def some_pipeline():
            task1 = some_op()
            task2 = some_op()
            task3 = some_op()

            dsl.get_pipeline_conf().op_transformers.append(
                lambda op: op.set_retry(5))
Exemple #4
0
def test_train(
):
  """Pipeline steps"""

  persistent_volume_path = '/mnt/azure'
  model_name = 'test'
  operations = {}
  image_size = 160
  training_folder = 'train'
  training_dataset = 'train.txt'
  model_folder = 'Privacy'


  # train
  operations['train'] = dsl.ContainerOp(
    name='train',
    image='svangara.azurecr.io/training:3',
    command=['python'],
    arguments=[
      '/scripts/train.py',
      '--outputs', model_folder
    ]
  )

  dsl.get_pipeline_conf().add_op_transformer(transformer)
def foo_pipeline(tag: str, namespace: str = "kubeflow", bucket: str = "foobar"):

    # configures artifact location
    pipeline_artifact_location = dsl.ArtifactLocation.s3(
                            bucket=bucket,
                            endpoint="minio-service.%s:9000" % namespace,
                            insecure=True,
                            access_key_secret={"name": "minio", "key": "accesskey"},
                            secret_key_secret=V1SecretKeySelector(name="minio", key="secretkey"))

    # configures artifact location using AWS IAM role (no access key provided)
    aws_artifact_location = dsl.ArtifactLocation.s3(
                            bucket=bucket,
                            endpoint="s3.amazonaws.com",
                            region="ap-southeast-1",
                            insecure=False)

    # set pipeline level artifact location
    dsl.get_pipeline_conf().set_artifact_location(pipeline_artifact_location)

    # pipeline level artifact location (to minio)
    op1 = dsl.ContainerOp(
        name='foo', 
        image='busybox:%s' % tag,
        output_artifact_paths={
            'out_art': '/tmp/out_art.txt',
        },
    )
def param_substitutions(param = dsl.PipelineParam(name='param')):
    dsl.get_pipeline_conf().op_transformers.append(add_common_labels(param))

    op = dsl.ContainerOp(
        name="cop",
        image="image",
    )
def save_most_frequent_word(message: str):
    """A pipeline function describing the orchestration of the workflow."""

    counter = GetFrequentWordOp(name='get-Frequent', message=message)
    # Call set_image_pull_secrets after get_pipeline_conf().
    dsl.get_pipeline_conf()\
      .set_image_pull_secrets([k8s_client.V1ObjectReference(name="secretA")])
Exemple #8
0
def imagepullsecrets_pipeline(
        message: str = "When flies fly behind flies, then flies are following flies."):
    """A pipeline function describing the orchestration of the workflow."""

    counter = get_frequent_word_op(message=message)
    # Call set_image_pull_secrets after get_pipeline_conf().
    dsl.get_pipeline_conf() \
        .set_image_pull_secrets([k8s_client.V1ObjectReference(name="secretA")])
def inject_env_vars():
    dsl.get_pipeline_conf().set_image_pull_secrets([
        k8s_client.V1ObjectReference(
            name="k8scc01covidacr-registry-connection")
    ])
    for var in ('AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'AWS_REGION',
                'S3_REGION', 'S3_ENDPOINT', 'S3_USE_HTTPS', 'S3_VERIFY_SSL'):
        inject_env_var(var)
def update_endpoint_pipeline(
    region="",
    endpoint_url="",
    image="",
    model_name="",
    endpoint_config_name="",
    endpoint_name="",
    model_artifact_url="",
    variant_name_1="",
    instance_type_1="",
    instance_type_2="",
    initial_instance_count_1="",
    initial_variant_weight_1="",
    network_isolation="",
    role="",
    update_endpoint="",
):
    create_model = sagemaker_model_op(
        region=region,
        endpoint_url=endpoint_url,
        model_name=model_name,
        image=image,
        model_artifact_url=model_artifact_url,
        network_isolation=network_isolation,
        role=role,
    )

    deploy_model = sagemaker_deploy_op(
        region=region,
        endpoint_url=endpoint_url,
        endpoint_config_name=endpoint_config_name,
        endpoint_name=endpoint_name,
        model_name_1=create_model.output,
        variant_name_1=variant_name_1,
        instance_type_1=instance_type_1,
        initial_instance_count_1=initial_instance_count_1,
        initial_variant_weight_1=initial_variant_weight_1,
    )

    update_model = sagemaker_deploy_op(
        region=region,
        endpoint_url=endpoint_url,
        endpoint_config_name=endpoint_config_name,
        endpoint_name=deploy_model.output,
        model_name_1=create_model.output,
        variant_name_1=variant_name_1,
        instance_type_1=instance_type_2,
        initial_instance_count_1=initial_instance_count_1,
        initial_variant_weight_1=initial_variant_weight_1,
        update_endpoint=update_endpoint,
    )

    dsl.get_pipeline_conf().set_image_pull_policy(policy="Always")
    def train_pipeline():
        operations = {}
        operations['training'] = dsl.ContainerOp(
            name='Training',
            image= 'rajatsethi7/my_docker_image',
            command=['python3'],
            arguments=["main.py"]
        )
        
        dsl.get_pipeline_conf()

        return operations
Exemple #12
0
    def training_pipeline(gcp_bucket: str, project: str):
        # pre_image = f"gcr.io/{project}/pre_image:{github_sha}"
        # train_forecast_image = f"gcr.io/{project}/train_forecast_image:{github_sha}"
        operations = {}
        operations['training'] = dsl.ContainerOp(
            name='Training',
            image='rajatsethi7/my_docker_image',
            command=['python3'],
            arguments=["main.py"])

        dsl.get_pipeline_conf()

        return operations
Exemple #13
0
 def convert_kedro_pipeline_to_kfp() -> None:
     """Convert from a Kedro pipeline into a kfp container graph."""
     dsl.get_pipeline_conf().set_ttl_seconds_after_finished(
         self.run_config.ttl
     )
     node_dependencies = self.context.pipelines.get(
         pipeline
     ).node_dependencies
     with self._create_pipeline_exit_handler():
         kfp_ops = self._build_kfp_ops(
             node_dependencies, image, image_pull_policy
         )
         for node, dependencies in node_dependencies.items():
             for dependency in dependencies:
                 kfp_ops[node.name].after(kfp_ops[dependency.name])
Exemple #14
0
def onnx_pipeline(model,
                  output_onnx_path,
                  model_type,
                  output_perf_result_path,
                  execution_providers="",
                  model_inputs_names="",
                  model_outputs_names="",
                  model_input_shapes="",
                  model_initial_types="",
                  caffe_model_prototxt="",
                  target_opset=7):

    # Create a component named "Convert To ONNX" and "ONNX Runtime Perf". Edit the V1PersistentVolumeClaimVolumeSource
    # name to match the persistent volume claim you created if needed. By default the names match ../azure-files-sc.yaml
    # and ../azure-files-pvc.yaml
    convert_op = onnxConverterOp(
        'Convert To ONNX', '%s' % model, '%s' % output_onnx_path,
        '%s' % model_type, '%s' % model_inputs_names,
        '%s' % model_outputs_names, '%s' % model_input_shapes,
        '%s' % model_initial_types, '%s' % caffe_model_prototxt,
        '%s' % target_opset).add_volume(
            k8s_client.V1Volume(name='pipeline-nfs',
                                persistent_volume_claim=k8s_client.
                                V1PersistentVolumeClaimVolumeSource(
                                    claim_name='azurefile'))).add_volume_mount(
                                        k8s_client.V1VolumeMount(
                                            mount_path='/mnt',
                                            name='pipeline-nfs'))

    perf_op = perfTestOp(
        'ONNX Runtime Perf',
        convert_op.output,
        '%s' % output_perf_result_path,
        '%s' % execution_providers,
    ).add_volume(
        k8s_client.V1Volume(name='pipeline-nfs',
                            persistent_volume_claim=k8s_client.
                            V1PersistentVolumeClaimVolumeSource(
                                claim_name='azurefile'))).add_volume_mount(
                                    k8s_client.V1VolumeMount(
                                        mount_path='/mnt',
                                        name='pipeline-nfs')).set_gpu_limit(1)

    dsl.get_pipeline_conf().set_image_pull_secrets(
        [k8s_client.V1ObjectReference(name="regcred")])
def custom_artifact_location(
    tag: str, namespace: str = "kubeflow", bucket: str = "mybucket"
):

    # configures artifact location
    pipeline_artifact_location = dsl.ArtifactLocation.s3(
        bucket=bucket,
        endpoint="minio-service.%s:9000" % namespace,  # parameterize minio-service endpoint
        insecure=True,
        access_key_secret=V1SecretKeySelector(name="minio", key="accesskey"),
        secret_key_secret={"name": "minio", "key": "secretkey"},  # accepts dict also
    )

    # set pipeline level artifact location
    dsl.get_pipeline_conf().set_artifact_location(pipeline_artifact_location)

    # artifacts in this op are stored to endpoint `minio-service.<namespace>:9000`
    op = dsl.ContainerOp(name="foo", image="busybox:%s" % tag)
Exemple #16
0
    def timeseries_pipeline(gcp_bucket: str, project: str, train_data :str="train.csv", forecast_data: str="forecast.csv"):
        """The kfp pipeline function. 
        
        Arguments:
            gcp_bucket {str} -- The google bucket
            project {str} -- The gcp project where the data should be stored
        
        Keyword Arguments:
            train_data {str} -- The name of the train file that is uploaded to the bucket (default: {"train.csv"})
            forecast_date {str} -- The name of the forecast file uploaded to the bucket (default: {"forecast.csv"})
        """
        pre_image = f"gcr.io/{project}/pre_image:{github_sha}"
        train_forecast_image = f"gcr.io/{project}/train_forecast_image:{github_sha}"
        operations = {}
        operations['preprocess'] = dsl.ContainerOp(
            name='Preprocess',
            image=pre_image,
            command=['python3'],
            arguments=["main.py",
                    "--url", "https://raw.githubusercontent.com/facebook/prophet/master/examples/example_wp_log_peyton_manning.csv",
                    "--bucket", gcp_bucket,
                    "--destination_blob_name", train_data
            ]
        ).set_image_pull_policy('Always')

        operations['train_forecast'] = dsl.ContainerOp(
            name='Forecast',
            image=train_forecast_image,
            command=['python3'],
            arguments=["main.py",
                    "--bucket", gcp_bucket,
                    "--source_blob_name", train_data,
                    "--forecast_blob_name", forecast_data
            ]
        ).set_image_pull_policy('Always')
        operations["train_forecast"].after(operations["preprocess"])
        

        for _,operation in operations.items():
            operation.apply(gcp.use_gcp_secret('user-gcp-sa'))
            dsl.get_pipeline_conf()

        return operations
Exemple #17
0
def mnist_pipeline(
    train_images='https://people.canonical.com/~knkski/train-images-idx3-ubyte.gz',
    train_labels='https://people.canonical.com/~knkski/train-labels-idx1-ubyte.gz',
    test_images='https://people.canonical.com/~knkski/t10k-images-idx3-ubyte.gz',
    test_labels='https://people.canonical.com/~knkski/t10k-labels-idx1-ubyte.gz',
    train_epochs: int = 2,
    train_batch_size: int = 128,
):
    # Load mnist data and transform it into numpy array
    load = load_task(train_images, train_labels, test_images, test_labels)

    # Train model on transformed mnist dataset
    train = train_task(load.outputs['traintest_output'], train_epochs,
                       train_batch_size)

    serve = serve_sidecar()
    test_task(train.outputs['model_path'],
              load.outputs['validation_output']).add_sidecar(serve)

    # Ensure that each step has volumes attached to where ever data gets written to
    dsl.get_pipeline_conf().add_op_transformer(attach_output_volume)
def transform_pipeline():
    op1 = print_op('hey, what are you up to?')
    op2 = print_op('train my model.')
    dsl.get_pipeline_conf().add_op_transformer(add_annotation_and_label)
Exemple #19
0
def pipeline_parallelism():
  op1 = print_op('hey, what are you up to?')
  op2 = print_op('train my model.')
  dsl.get_pipeline_conf().set_parallelism(1)
Exemple #20
0
 def some_pipeline():
     some_op()
     dsl.get_pipeline_conf().set_dns_config(
         V1PodDNSConfig(
             nameservers=["1.2.3.4"],
             options=[V1PodDNSConfigOption(name="ndots", value="2")]))
Exemple #21
0
 def some_pipeline():
     some_op()
     dsl.get_pipeline_conf().set_default_pod_node_selector(
         label_name="cloud.google.com/gke-accelerator",
         value="nvidia-tesla-p4")
Exemple #22
0
 def some_pipeline():
     task1 = some_op()
     task2 = some_op()
     dsl.get_pipeline_conf().set_image_pull_policy(policy="Alwayss")
Exemple #23
0
        def some_pipeline():
            task1 = some_op()
            task2 = some_op()
            task3 = some_other_op().set_image_pull_policy("IfNotPresent")

            dsl.get_pipeline_conf().set_image_pull_policy(policy="Always")
Exemple #24
0
 def some_pipeline():
     some_op()
     dsl.get_pipeline_conf().set_pod_disruption_budget("100%")
Exemple #25
0
 def some_pipeline():
     some_op()
     dsl.get_pipeline_conf().set_ttl_seconds_after_finished(86400)
Exemple #26
0
 def some_pipeline():
     some_op()
     some_op()
     some_op()
     dsl.get_pipeline_conf().set_parallelism(1)
Exemple #27
0
def mnist_pipeline(
        name=model_name,
        namespace=user_namespace,
        storageclass=storageclass,
        step=4000):
    # step 1: create a Katib experiment to tune hyperparameters
    objectiveConfig = {
      "type": "minimize",
      "goal": 0.001,
      "objectiveMetricName": "loss",
    }
    algorithmConfig = {"algorithmName" : "random"}
    parameters = [
      {"name": "--tf-learning-rate", "parameterType": "double", "feasibleSpace": {"min": "0.01","max": "0.03"}},
      {"name": "--tf-batch-size", "parameterType": "discrete", "feasibleSpace": {"list": ["16", "32", "64"]}},
    ]
    rawTemplate = {
      "apiVersion": "kubeflow.org/v1",
      "kind": "TFJob",
      "metadata": {
         "name": "{{.Trial}}",
         "namespace": "{{.NameSpace}}"
      },
      "spec": {
        "tfReplicaSpecs": {
          "Chief": {
            "replicas": 1,
            "restartPolicy": "OnFailure",
            "template": {
              "spec": {
                "containers": [
                {
                  "command": [
                    "sh",
                    "-c"
                  ],
                  "args": [
                    "python /opt/model.py --tf-train-steps=2000 {{- with .HyperParameters}} {{- range .}} {{.Name}}={{.Value}} {{- end}} {{- end}}"
                  ],
                  "image": "liuhougangxa/tf-estimator-mnist",
                  "name": "tensorflow"
                }
                ]
              }
            }
          },
          "Worker": {
            "replicas": 3,
            "restartPolicy": "OnFailure",
            "template": {
              "spec": {
                "containers": [
                {
                  "command": [
                    "sh",
                    "-c"
                  ],
                  "args": [ 
                    "python /opt/model.py --tf-train-steps=2000 {{- with .HyperParameters}} {{- range .}} {{.Name}}={{.Value}} {{- end}} {{- end}}"
                  ],
                  "image": "liuhougangxa/tf-estimator-mnist",
                  "name": "tensorflow"
                }
                ]
              }
            }
          }
        }
      }
    }
    
    trialTemplate = {
      "goTemplate": {
        "rawTemplate": json.dumps(rawTemplate)
      }
    }

    metricsCollectorSpec = {
      "source": {
        "fileSystemPath": {
          "path": "/tmp/tf",
          "kind": "Directory"
        }
      },
      "collector": {
        "kind": "TensorFlowEvent"
      }
    }

    katib_experiment_launcher_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/katib-launcher/component.yaml')
    op1 = katib_experiment_launcher_op(
            experiment_name=name,
            experiment_namespace=namespace,
            parallel_trial_count=3,
            max_trial_count=12,
            objective=str(objectiveConfig),
            algorithm=str(algorithmConfig),
            trial_template=str(trialTemplate),
            parameters=str(parameters),
            metrics_collector=str(metricsCollectorSpec),
            # experiment_timeout_minutes=experimentTimeoutMinutes,
            delete_finished_experiment=False)

    # step2: create a TFJob to train your model with best hyperparameter tuned by Katib
    tfjobjson_template = Template("""
{
  "apiVersion": "kubeflow.org/v1",
  "kind": "TFJob",
  "metadata": {
    "name": "$name",
    "namespace": "$namespace",
    "annotations": {
        "sidecar.istio.io/inject": "false"
    }
  },
  "spec": {
    "tfReplicaSpecs": {
      "Chief": {
        "replicas": 1,
        "restartPolicy": "OnFailure",
        "template": {
          "metadata": {
            "annotations": {
              "sidecar.istio.io/inject": "false"
            }
          },
          "spec": {
            "volumes": [
              {
                "name": "export-model",
                "persistentVolumeClaim": {
                  "claimName": "$modelpvc"
                }
              }
            ],
            "containers": [
              {
                "command": [
                  "sh",
                  "-c"
                ],
                "args": [
                  "python /opt/model.py --tf-train-steps=$step --tf-export-dir=/mnt/export $args"
                ],
                "image": "liuhougangxa/tf-estimator-mnist",
                "name": "tensorflow",
                "volumeMounts": [
                  {
                    "mountPath": "/mnt/export",
                    "name": "export-model"
                  }
                ]
              }
            ]
          }
        }
      },
      "Worker": {
        "replicas": 3,
        "restartPolicy": "OnFailure",
        "template": {
          "metadata": {
            "annotations": {
              "sidecar.istio.io/inject": "false"
            }
          },
          "spec": {
            "volumes": [
              {
                "name": "export-model",
                "persistentVolumeClaim": {
                  "claimName": "$modelpvc"
                }
              }
            ],
            "containers": [
              {
                "command": [
                  "sh",
                  "-c"
                ],
                "args": [
                  "python /opt/model.py --tf-train-steps=$step --tf-export-dir=/mnt/export $args"
                ],
                "image": "liuhougangxa/tf-estimator-mnist",
                "name": "tensorflow",
                "volumeMounts": [
                  {
                    "mountPath": "/mnt/export",
                    "name": "export-model"
                  }
                ]
              }
            ]
          }
        }
      }
    }
  }
}
""")

    convert_op = func_to_container_op(convert_mnist_experiment_result)
    op2 = convert_op(op1.output)
    
    volume_template = Template("""
{
  "apiVersion": "v1",
  "kind": "PersistentVolumeClaim",
  "metadata": {
    "name": "{{workflow.name}}-modelpvc",
    "namespace": "$namespace"
  },
  "spec": {
      "accessModes": ["ReadWriteMany"],
      "resources": {
          "requests": {
              "storage": "1Gi"
          }
      },
      "storageClassName": "$storageclass"
   }
}
""")
    
    volopjson = volume_template.substitute({'namespace': namespace, 'storageclass': storageclass})
    volop = json.loads(volopjson)

    modelvolop = dsl.ResourceOp(
        name="modelpvc",
        k8s_resource=volop
    )

    tfjobjson = tfjobjson_template.substitute(
            {'args': op2.output,
             'name': name,
             'namespace': namespace,
             'step': step,
             'modelpvc': modelvolop.outputs["name"]
            })

    tfjob = json.loads(tfjobjson)

    train = dsl.ResourceOp(
        name="train",
        k8s_resource=tfjob,
        success_condition='status.replicaStatuses.Worker.succeeded==3,status.replicaStatuses.Chief.succeeded==1'
    )

    # step 3: model inferencese by KFServing Inferenceservice
    inferenceservice_template = Template("""
{
  "apiVersion": "serving.kubeflow.org/v1alpha2",
  "kind": "InferenceService",
  "metadata": {
    "name": "$name",
    "namespace": "$namespace"
  },
  "spec": {
    "default": {
      "predictor": {
        "tensorflow": {
          "storageUri": "pvc://$modelpvc/"
        }
      }
    }
  }
}
""")
    inferenceservicejson = inferenceservice_template.substitute({'modelpvc': modelvolop.outputs["name"],
                                                                 'name': name,
                                                                 'namespace': namespace})
    inferenceservice =  json.loads(inferenceservicejson)
    inference = dsl.ResourceOp(
      name="inference",
      k8s_resource=inferenceservice,
      success_condition='status.url').after(train)
    
    dsl.get_pipeline_conf().add_op_transformer(add_istio_annotation)
def xgb_train_pipeline(
    output='gs://your-gcs-bucket',
    project='your-gcp-project',
    cluster_name='xgb-%s' % dsl.RUN_ID_PLACEHOLDER,
    region='us-central1',
    train_data='gs://ml-pipeline-playground/sfpd/train.csv',
    eval_data='gs://ml-pipeline-playground/sfpd/eval.csv',
    schema='gs://ml-pipeline-playground/sfpd/schema.json',
    target='resolution',
    rounds=200,
    workers=2,
    true_label='ACTION',
):
    output_template = str(output) + '/' + dsl.RUN_ID_PLACEHOLDER + '/data'

    # Current GCP pyspark/spark op do not provide outputs as return values, instead,
    # we need to use strings to pass the uri around.
    analyze_output = output_template
    transform_output_train = os.path.join(output_template, 'train', 'part-*')
    transform_output_eval = os.path.join(output_template, 'eval', 'part-*')
    train_output = os.path.join(output_template, 'train_output')
    predict_output = os.path.join(output_template, 'predict_output')

    with dsl.ExitHandler(exit_op=dataproc_delete_cluster_op(
        project_id=project,
        region=region,
        name=cluster_name
    )):
        _create_cluster_op = dataproc_create_cluster_op(
            project_id=project,
            region=region,
            name=cluster_name,
            initialization_actions=[
              os.path.join(_PYSRC_PREFIX,
                           'initialization_actions.sh'),
            ],
            image_version='1.2'
        )

        _analyze_op = dataproc_analyze_op(
            project=project,
            region=region,
            cluster_name=cluster_name,
            schema=schema,
            train_data=train_data,
            output=output_template
        ).after(_create_cluster_op).set_display_name('Analyzer')

        _transform_op = dataproc_transform_op(
            project=project,
            region=region,
            cluster_name=cluster_name,
            train_data=train_data,
            eval_data=eval_data,
            target=target,
            analysis=analyze_output,
            output=output_template
        ).after(_analyze_op).set_display_name('Transformer')

        _train_op = dataproc_train_op(
            project=project,
            region=region,
            cluster_name=cluster_name,
            train_data=transform_output_train,
            eval_data=transform_output_eval,
            target=target,
            analysis=analyze_output,
            workers=workers,
            rounds=rounds,
            output=train_output
        ).after(_transform_op).set_display_name('Trainer')

        _predict_op = dataproc_predict_op(
            project=project,
            region=region,
            cluster_name=cluster_name,
            data=transform_output_eval,
            model=train_output,
            target=target,
            analysis=analyze_output,
            output=predict_output
        ).after(_train_op).set_display_name('Predictor')

        _cm_op = confusion_matrix_op(
            predictions=os.path.join(predict_output, 'part-*.csv'),
            output_dir=output_template
        ).after(_predict_op)

        _roc_op = roc_op(
            predictions_dir=os.path.join(predict_output, 'part-*.csv'),
            true_class=true_label,
            true_score_column=true_label,
            output_dir=output_template
        ).after(_predict_op)

    dsl.get_pipeline_conf().add_op_transformer(
        gcp.use_gcp_secret('user-gcp-sa'))
Exemple #29
0
def retry_sample_pipeline():
    op1 = RandomFailure1Op('0,1,2,3').set_timeout(10)
    op2 = RandomFailure1Op('0,1')
    dsl.get_pipeline_conf().set_timeout(50)
def node_selector_pipeline():
    dsl.get_pipeline_conf().set_default_pod_node_selector(
        'kubernetes.io/os', 'linux')
    echo_op()