コード例 #1
0
def titanic_suvival_prediction(region='aws-region',
    log_s3_uri="s3://mlops-kubeflow-pipeline-data/emr/titanic/logs",
    cluster_name="emr-cluster",
    job_name='spark-ml-trainner',
    input='s3://mlops-kubeflow-pipeline-data/emr/titanic/train.csv',
    output='s3://mlops-kubeflow-pipeline-data/emr/titanic/output',
    jar_path='s3://mlops-kubeflow-pipeline-data/emr/titanic/titanic-survivors-prediction_2.11-1.0.jar',
    main_class='com.amazonaws.emr.titanic.Titanic',
    instance_type="m4.xlarge",
    instance_count="3"
    ):

    create_cluster = emr_create_cluster_op(
        region=region,
        name=cluster_name,
        instance_type=instance_type,
        instance_count=instance_count,
        log_s3_uri=log_s3_uri,
    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))

    training_and_prediction = emr_submit_spark_job_op(
        region=region,
        jobflow_id=create_cluster.output,
        job_name=job_name,
        jar_path=jar_path,
        main_class=main_class,
        input=input,
        output=output
    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))

    delete_cluster = emr_delete_cluster_op(
      region=region,
      jobflow_id=create_cluster.output,
      dependent=training_and_prediction.outputs['job_id']
    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
コード例 #2
0
def twitter_classification(
        s3_raw_data='s3://kubeflow-meda/data/raw/tweets.csv',
        s3_model_data='s3://kubeflow-meda/models',
        model_name='NNET'):

    # preprocess data. cleansing and feature engineering. Also creating s3 folder structure to
    # store data and artifacts of the model run.
    preprocess = preprocess_op(s3_raw_data=s3_raw_data,
                               model_name=model_name).apply(
                                   use_aws_secret('aws-secret',
                                                  'AWS_ACCESS_KEY_ID',
                                                  'AWS_SECRET_ACCESS_KEY'))

    training = train_op(
        s3_training_data=preprocess.outputs['s3_training_data'],
        s3_training_predictions=preprocess.outputs['s3_training_predictions'],
        s3_model_artifacts=preprocess.outputs['s3_model_artifacts'],
        model_name=model_name,
        max_length=preprocess.outputs['max_length'],
        vocab_size=preprocess.outputs['vocab_size']).apply(
            use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                           'AWS_SECRET_ACCESS_KEY'))

    testing = test_op(
        s3_testing_data=preprocess.outputs['s3_testing_data'],
        s3_testing_predictions=preprocess.outputs['s3_testing_predictions'],
        s3_model_artifacts=training.outputs['s3_model_artifacts'],
        model_name=model_name).apply(
            use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                           'AWS_SECRET_ACCESS_KEY'))
コード例 #3
0
def batch_transform_pipeline(
    region="",
    image="",
    model_name="",
    job_name="",
    model_artifact_url="",
    instance_type="",
    instance_count="",
    data_input="",
    data_type="",
    content_type="",
    compression_type="",
    output_location="",
    max_concurrent="",
    max_payload="",
    batch_strategy="",
    split_type="",
    network_isolation="",
    role="",
):
    create_model = sagemaker_model_op(
        region=region,
        model_name=model_name,
        image=image,
        model_artifact_url=model_artifact_url,
        network_isolation=network_isolation,
        role=role,
    ).apply(
        use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID",
                       "AWS_SECRET_ACCESS_KEY"))

    sagemaker_batch_transform_op(
        region=region,
        model_name=create_model.output,
        job_name=job_name,
        instance_type=instance_type,
        instance_count=instance_count,
        max_concurrent=max_concurrent,
        max_payload=max_payload,
        batch_strategy=batch_strategy,
        input_location=data_input,
        data_type=data_type,
        content_type=content_type,
        split_type=split_type,
        compression_type=compression_type,
        output_location=output_location,
    ).apply(
        use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID",
                       "AWS_SECRET_ACCESS_KEY"))
コード例 #4
0
def mnist_classification(
        region='us-west-2',
        image='174872318107.dkr.ecr.us-west-2.amazonaws.com/kmeans:1',
        dataset_path='s3://kubeflow-pipeline-data/mnist_kmeans_example/data',
        instance_type='ml.c4.8xlarge',
        instance_count='2',
        volume_size='50',
        model_output_path='s3://kubeflow-pipeline-data/mnist_kmeans_example/model',
        batch_transform_input='s3://kubeflow-pipeline-data/mnist_kmeans_example/input',
        batch_transform_ouput='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
        role_arn=''):

    training = sagemaker_train_op(
        region=region,
        image=image,
        instance_type=instance_type,
        instance_count=instance_count,
        volume_size=volume_size,
        dataset_path=dataset_path,
        model_artifact_path=model_output_path,
        role=role_arn,
    ).apply(
        use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                       'AWS_SECRET_ACCESS_KEY'))

    create_model = sagemaker_model_op(
        region=region,
        image=image,
        model_artifact_url=training.outputs['model_artifact_url'],
        model_name=training.outputs['job_name'],
        role=role_arn).apply(
            use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                           'AWS_SECRET_ACCESS_KEY'))

    prediction = sagemaker_deploy_op(region=region,
                                     model_name=create_model.output).apply(
                                         use_aws_secret(
                                             'aws-secret', 'AWS_ACCESS_KEY_ID',
                                             'AWS_SECRET_ACCESS_KEY'))

    batch_transform = sagemaker_batch_transform_op(
        region=region,
        model_name=create_model.output,
        input_location=batch_transform_input,
        output_location=batch_transform_ouput).apply(
            use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                           'AWS_SECRET_ACCESS_KEY'))
コード例 #5
0
def training(
        region='us-east-1',
        endpoint_url='',
        image='382416733822.dkr.ecr.us-east-1.amazonaws.com/kmeans:1',
        training_input_mode='File',
        hyperparameters='{"k": "10", "feature_dim": "784"}',
        channels='[                                                                          \
                    {                                                                        \
                      "ChannelName": "train",                                                \
                      "DataSource": {                                                        \
                        "S3DataSource": {                                                    \
                          "S3Uri": "s3://kubeflow-pipeline-data/mnist_kmeans_example/data",  \
                          "S3DataType": "S3Prefix",                                          \
                          "S3DataDistributionType": "FullyReplicated"                        \
                        }                                                                    \
                      },                                                                     \
                      "ContentType": "",                                                     \
                      "CompressionType": "None",                                             \
                      "RecordWrapperType": "None",                                           \
                      "InputMode": "File"                                                    \
                    }                                                                        \
                  ]',
        instance_type='ml.p2.xlarge',
        instance_count='1',
        volume_size='50',
        max_run_time='3600',
        model_artifact_path='s3://kubeflow-pipeline-data/mnist_kmeans_example/data',
        output_encryption_key='',
        network_isolation='True',
        traffic_encryption='False',
        spot_instance='False',
        max_wait_time='3600',
        checkpoint_config='{}',
        role=''):
    training = sagemaker_train_op(
        region=region,
        endpoint_url=endpoint_url,
        image=image,
        training_input_mode=training_input_mode,
        hyperparameters=hyperparameters,
        channels=channels,
        instance_type=instance_type,
        instance_count=instance_count,
        volume_size=volume_size,
        max_run_time=max_run_time,
        model_artifact_path=model_artifact_path,
        output_encryption_key=output_encryption_key,
        network_isolation=network_isolation,
        traffic_encryption=traffic_encryption,
        spot_instance=spot_instance,
        max_wait_time=max_wait_time,
        checkpoint_config=checkpoint_config,
        role=role,
    ).apply(
        use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                       'AWS_SECRET_ACCESS_KEY'))
コード例 #6
0
def titanic_suvival_prediction(
    region="us-west-2",
    log_s3_uri="s3://kubeflow-pipeline-data/emr/titanic/logs",
    cluster_name="emr-cluster",
    job_name="spark-ml-trainner",
    input="s3://kubeflow-pipeline-data/emr/titanic/train.csv",
    output="s3://kubeflow-pipeline-data/emr/titanic/output",
    jar_path="s3://kubeflow-pipeline-data/emr/titanic/titanic-survivors-prediction_2.11-1.0.jar",
    main_class="com.amazonaws.emr.titanic.Titanic",
    instance_type="m4.xlarge",
    instance_count="3",
):

    create_cluster = emr_create_cluster_op(
        region=region,
        name=cluster_name,
        instance_type=instance_type,
        instance_count=instance_count,
        log_s3_uri=log_s3_uri,
    ).apply(
        use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID",
                       "AWS_SECRET_ACCESS_KEY"))

    training_and_prediction = emr_submit_spark_job_op(
        region=region,
        jobflow_id=create_cluster.output,
        job_name=job_name,
        jar_path=jar_path,
        main_class=main_class,
        input=input,
        output=output,
    ).apply(
        use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID",
                       "AWS_SECRET_ACCESS_KEY"))

    delete_cluster = emr_delete_cluster_op(
        region=region,
        jobflow_id=create_cluster.output,
        dependent=training_and_prediction.outputs["job_id"],
    ).apply(
        use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID",
                       "AWS_SECRET_ACCESS_KEY"))
コード例 #7
0
def pipeline_use_aws_secret():
    secret_name = "kfp-aws-secret"

    dsl.ContainerOp(
        name='mnist_use_aws_secret',
        image='kangwoo/kfp-mnist-storage:0.0.1',
        arguments=['--model', 's3://tensorflow/kfp/mnist/model']).apply(
            aws.use_aws_secret(
                secret_name,
                aws_access_key_id_name='AWS_ACCESS_KEY_ID',
                aws_secret_access_key_name='AWS_SECRET_ACCESS_KEY'))
コード例 #8
0
def create_endpoint_pipeline(
    region="",
    endpoint_url="",
    image="",
    model_name="",
    endpoint_config_name="",
    endpoint_name="",
    model_artifact_url="",
    variant_name_1="",
    instance_type_1="",
    initial_instance_count_1="",
    initial_variant_weight_1="",
    network_isolation="",
    role="",
):
    create_model = sagemaker_model_op(
        region=region,
        endpoint_url=endpoint_url,
        model_name=model_name,
        image=image,
        model_artifact_url=model_artifact_url,
        network_isolation=network_isolation,
        role=role,
    ).apply(
        use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID",
                       "AWS_SECRET_ACCESS_KEY"))

    sagemaker_deploy_op(
        region=region,
        endpoint_url=endpoint_url,
        endpoint_config_name=endpoint_config_name,
        endpoint_name=endpoint_name,
        model_name_1=create_model.output,
        variant_name_1=variant_name_1,
        instance_type_1=instance_type_1,
        initial_instance_count_1=initial_instance_count_1,
        initial_variant_weight_1=initial_variant_weight_1,
    ).apply(
        use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID",
                       "AWS_SECRET_ACCESS_KEY"))
コード例 #9
0
    def test_use_aws_secret(self):
        with Pipeline('somename') as p:
            op1 = ContainerOp(name='op1', image='image')
            op1 = op1.apply(
                use_aws_secret('myaws-secret', 'key_id', 'access_key'))
            assert len(op1.env_variables) == 2

            index = 0
            for expected in ['key_id', 'access_key']:
                assert op1.env_variables[index].name == expected
                assert op1.env_variables[
                    index].value_from.secret_key_ref.name == 'myaws-secret'
                assert op1.env_variables[
                    index].value_from.secret_key_ref.key == expected
                index += 1
コード例 #10
0
    def test_use_aws_secret(self):
        op1 = ContainerOp(name='op1', image='image')
        op1 = op1.apply(use_aws_secret('myaws-secret', 'key_id', 'access_key'))
        assert len(op1.container.env) == 2

        index = 0
        for expected_name, expected_key in [('AWS_ACCESS_KEY_ID', 'key_id'),
                                            ('AWS_SECRET_ACCESS_KEY',
                                             'access_key')]:
            assert op1.container.env[index].name == expected_name
            assert op1.container.env[
                index].value_from.secret_key_ref.name == 'myaws-secret'
            assert op1.container.env[
                index].value_from.secret_key_ref.key == expected_key
            index += 1
コード例 #11
0
ファイル: hpo_pipeline.py プロジェクト: usiddiqu/kfp-tekton
def hpo_pipeline(
    region="",
    algorithm_name="",
    training_input_mode="",
    static_parameters="",
    integer_parameters="",
    channels="",
    categorical_parameters="",
    early_stopping_type="",
    max_parallel_jobs="",
    max_num_jobs="",
    metric_name="",
    metric_type="",
    hpo_strategy="",
    instance_type="",
    instance_count="",
    volume_size="",
    max_run_time="",
    output_location="",
    network_isolation="",
    max_wait_time="",
    role="",
):
    sagemaker_hpo_op(
        region=region,
        algorithm_name=algorithm_name,
        training_input_mode=training_input_mode,
        static_parameters=static_parameters,
        integer_parameters=integer_parameters,
        channels=channels,
        categorical_parameters=categorical_parameters,
        early_stopping_type=early_stopping_type,
        max_parallel_jobs=max_parallel_jobs,
        max_num_jobs=max_num_jobs,
        metric_name=metric_name,
        metric_type=metric_type,
        strategy=hpo_strategy,
        instance_type=instance_type,
        instance_count=instance_count,
        volume_size=volume_size,
        max_run_time=max_run_time,
        output_location=output_location,
        network_isolation=network_isolation,
        max_wait_time=max_wait_time,
        role=role,
    ).apply(
        use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID",
                       "AWS_SECRET_ACCESS_KEY"))
コード例 #12
0
def create_model_pipeline(
    region="",
    endpoint_url="",
    image="",
    model_name="",
    model_artifact_url="",
    network_isolation="",
    role="",
):
    sagemaker_model_op(
        region=region,
        endpoint_url=endpoint_url,
        model_name=model_name,
        image=image,
        model_artifact_url=model_artifact_url,
        network_isolation=network_isolation,
        role=role,
    ).apply(use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"))
コード例 #13
0
def training(
        region='us-east-1',
        endpoint_url='',
        image='382416733822.dkr.ecr.us-east-1.amazonaws.com/kmeans:1',
        training_input_mode='File',
        hyperparameters={
            "k": "10",
            "feature_dim": "784"
        },
        channels=channelObjList,
        instance_type='ml.p2.xlarge',
        instance_count=1,
        volume_size=50,
        max_run_time=3600,
        model_artifact_path='s3://kubeflow-pipeline-data/mnist_kmeans_example/data',
        output_encryption_key='',
        network_isolation=True,
        traffic_encryption=False,
        spot_instance=False,
        max_wait_time=3600,
        checkpoint_config={},
        role=''):
    training = sagemaker_train_op(
        region=region,
        endpoint_url=endpoint_url,
        image=image,
        training_input_mode=training_input_mode,
        hyperparameters=hyperparameters,
        channels=channels,
        instance_type=instance_type,
        instance_count=instance_count,
        volume_size=volume_size,
        max_run_time=max_run_time,
        model_artifact_path=model_artifact_path,
        output_encryption_key=output_encryption_key,
        network_isolation=network_isolation,
        traffic_encryption=traffic_encryption,
        spot_instance=spot_instance,
        max_wait_time=max_wait_time,
        checkpoint_config=checkpoint_config,
        role=role,
    ).apply(
        use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                       'AWS_SECRET_ACCESS_KEY'))
コード例 #14
0
def training_pipeline(
    region="",
    endpoint_url="",
    image="",
    training_input_mode="",
    hyperparameters="",
    channels="",
    instance_type="",
    instance_count="",
    volume_size="",
    max_run_time="",
    model_artifact_path="",
    output_encryption_key="",
    network_isolation="",
    traffic_encryption="",
    spot_instance="",
    max_wait_time="",
    checkpoint_config="{}",
    role="",
):
    sagemaker_train_op(
        region=region,
        endpoint_url=endpoint_url,
        image=image,
        training_input_mode=training_input_mode,
        hyperparameters=hyperparameters,
        channels=channels,
        instance_type=instance_type,
        instance_count=instance_count,
        volume_size=volume_size,
        max_run_time=max_run_time,
        model_artifact_path=model_artifact_path,
        output_encryption_key=output_encryption_key,
        network_isolation=network_isolation,
        traffic_encryption=traffic_encryption,
        spot_instance=spot_instance,
        max_wait_time=max_wait_time,
        checkpoint_config=checkpoint_config,
        role=role,
    ).apply(
        use_aws_secret("aws-secret", "AWS_ACCESS_KEY_ID",
                       "AWS_SECRET_ACCESS_KEY"))
コード例 #15
0
def iris_prod_pipeline(
        location: dsl.PipelineParam = dsl.PipelineParam(
            name='location', value='FOLDER_NAME_TO_MODELS'),
        model_name: dsl.PipelineParam = dsl.PipelineParam(name="model_name",
                                                          value="MODEL NAME"),
        is_deploy: dsl.PipelineParam = dsl.PipelineParam(name="is_deploy",
                                                         param_type='bool')):
    _load_s3 = load_s3_op(location, model_name).apply(
        aws.use_aws_secret(secret_name='s3-secrets'))

    seldon_config = yaml.load(
        open("iris_prod_pipeline/components/deploy/deploy_iris.yaml"))

    with dsl.Condition(is_deploy == True, name='deploy'):
        _deploy = dsl.ResourceOp(
            name="seldondeploy",
            k8s_resource=seldon_config,
            attribute_outputs={"name": "{.metadata.name}"})

        _deploy.after(_load_s3)
コード例 #16
0
def iris_train_pipeline(
        kernel: dsl.PipelineParam = dsl.PipelineParam(
            name='kernel', value='linear, poly, rbf, sigmoid or precomputed'),
        C: dsl.PipelineParam = dsl.PipelineParam(
            name='C', value='Float value, default value is 1'),
        n_neighbors: dsl.PipelineParam = dsl.PipelineParam(name='n_neighbors',
                                                           value='int value'),
        n_splits: dsl.PipelineParam = dsl.PipelineParam(
            name='n_splits', value="Number of splits for fold"),
        location: dsl.PipelineParam = dsl.PipelineParam(
            name='location', value='FOLDER_NAME_TO_MODELS'),
        svm_filename: dsl.PipelineParam = dsl.PipelineParam(
            name='svm-filename', value='SVM_NAME'),
        lr_filename: dsl.PipelineParam = dsl.PipelineParam(
            name='logistic-regression-filename',
            value='LOGISTIC_REGRESSION_NAME'),
        dt_filename: dsl.PipelineParam = dsl.PipelineParam(
            name='decision-tree-filename', value='DECISION_TREE_NAME'),
        knn_filename: dsl.PipelineParam = dsl.PipelineParam(
            name='knn-filename', value='KNN_NAME'),
        label1: dsl.PipelineParam = dsl.PipelineParam(name='labels',
                                                      value='Label 1'),
        label2: dsl.PipelineParam = dsl.PipelineParam(name='labels',
                                                      value='Label 2'),
        label3: dsl.PipelineParam = dsl.PipelineParam(name='labels',
                                                      value='Label 3')):
    _load_data = load_op()

    _transform = transform_op(dsl.InputArgumentPath(
        _load_data.outputs['iris'])).after(_load_data)

    _svm = svm_op(
        str(svm_filename) + '.pkl',
        dsl.InputArgumentPath(_transform.outputs['X_train']),
        dsl.InputArgumentPath(_transform.outputs['y_train']),
        dsl.InputArgumentPath(_transform.outputs['X_test']), kernel, C,
        n_splits).after(_transform)

    _lr = lr_op(dsl.InputArgumentPath(_transform.outputs['X_train']),
                dsl.InputArgumentPath(_transform.outputs['y_train']),
                dsl.InputArgumentPath(_transform.outputs['X_test']),
                str(lr_filename) + '.pkl', n_splits).after(_transform)

    _dt = dt_op(dsl.InputArgumentPath(_transform.outputs['X_train']),
                dsl.InputArgumentPath(_transform.outputs['y_train']),
                dsl.InputArgumentPath(_transform.outputs['X_test']),
                str(dt_filename) + '.pkl', n_splits).after(_transform)

    _knn = knn_op(
        dsl.InputArgumentPath(_transform.outputs['X_train']),
        dsl.InputArgumentPath(_transform.outputs['y_train']),
        dsl.InputArgumentPath(_transform.outputs['X_test']),
        n_neighbors,
        n_splits,
        str(knn_filename) + '.pkl',
    ).after(_transform)

    models = [
        dsl.InputArgumentPath(_svm.outputs['svm_model']),
        dsl.InputArgumentPath(_lr.outputs['lr_model']),
        dsl.InputArgumentPath(_dt.outputs['dt_model']),
        dsl.InputArgumentPath(_knn.outputs['knn_model']),
    ]
    _save_s3 = save_s3_op(
        models, location,
        [svm_filename, lr_filename, dt_filename, knn_filename]).after(
            _svm, _lr, _dt,
            _knn).apply(aws.use_aws_secret(secret_name='s3-secrets'))

    _evaluation_knn = evaluation_op(
        dsl.InputArgumentPath(_knn.outputs['knn_predict']),
        dsl.InputArgumentPath(_transform.outputs['y_test']),
        [label1, label2, label3],
        dsl.InputArgumentPath(_transform.outputs['y_train']),
        dsl.InputArgumentPath(_knn.outputs['knn_y_scores'])).after(_knn)
    _evaluation_dt = evaluation_op(
        dsl.InputArgumentPath(_dt.outputs['dt_predict']),
        dsl.InputArgumentPath(_transform.outputs['y_test']),
        [label1, label2, label3],
        dsl.InputArgumentPath(_transform.outputs['y_train']),
        dsl.InputArgumentPath(_dt.outputs['dt_y_scores'])).after(_dt)
    _evaluation_svm = evaluation_op(
        dsl.InputArgumentPath(_svm.outputs['svm_predict']),
        dsl.InputArgumentPath(_transform.outputs['y_test']),
        [label1, label2, label3],
        dsl.InputArgumentPath(_transform.outputs['y_train']),
        dsl.InputArgumentPath(_dt.outputs['svm_y_scores'])).after(_svm)
    _evaluation_svm = evaluation_op(
        dsl.InputArgumentPath(_lr.outputs['lr_predict']),
        dsl.InputArgumentPath(_transform.outputs['y_test']),
        [label1, label2, label3],
        dsl.InputArgumentPath(_transform.outputs['y_train']),
        dsl.InputArgumentPath(_dt.outputs['lr_y_scores'])).after(_lr)
コード例 #17
0
def mnist_classification(
        region='us-west-2',
        image='174872318107.dkr.ecr.us-west-2.amazonaws.com/kmeans:1',
        training_input_mode='File',
        hpo_strategy='Bayesian',
        hpo_metric_name='test:msd',
        hpo_metric_type='Minimize',
        hpo_early_stopping_type='Off',
        hpo_static_parameters={
            "k": "10",
            "feature_dim": "784"
        },
        hpo_integer_parameters=[{
            "Name": "mini_batch_size",
            "MinValue": "500",
            "MaxValue": "600"
        }, {
            "Name": "extra_center_factor",
            "MinValue": "10",
            "MaxValue": "20"
        }],
        hpo_continuous_parameters=[],
        hpo_categorical_parameters=[{
            "Name": "init_method",
            "Values": ["random", "kmeans++"]
        }],
        hpo_channels=hpoChannels,
        hpo_spot_instance=False,
        hpo_max_wait_time=3600,
        hpo_checkpoint_config={},
        output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
        output_encryption_key='',
        instance_type='ml.p2.16xlarge',
        instance_count=1,
        volume_size=50,
        hpo_max_num_jobs=9,
        hpo_max_parallel_jobs=3,
        max_run_time=3600,
        endpoint_url='',
        network_isolation=True,
        traffic_encryption=False,
        train_channels=trainChannels,
        train_spot_instance=False,
        train_max_wait_time=3600,
        train_checkpoint_config={},
        batch_transform_instance_type='ml.m4.xlarge',
        batch_transform_input='s3://kubeflow-pipeline-data/mnist_kmeans_example/input',
        batch_transform_data_type='S3Prefix',
        batch_transform_content_type='text/csv',
        batch_transform_compression_type='None',
        batch_transform_ouput='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
        batch_transform_max_concurrent=4,
        batch_transform_max_payload=6,
        batch_strategy='MultiRecord',
        batch_transform_split_type='Line',
        role_arn=''):

    hpo = sagemaker_hpo_op(
        region=region,
        endpoint_url=endpoint_url,
        image=image,
        training_input_mode=training_input_mode,
        strategy=hpo_strategy,
        metric_name=hpo_metric_name,
        metric_type=hpo_metric_type,
        early_stopping_type=hpo_early_stopping_type,
        static_parameters=hpo_static_parameters,
        integer_parameters=hpo_integer_parameters,
        continuous_parameters=hpo_continuous_parameters,
        categorical_parameters=hpo_categorical_parameters,
        channels=hpo_channels,
        output_location=output_location,
        output_encryption_key=output_encryption_key,
        instance_type=instance_type,
        instance_count=instance_count,
        volume_size=volume_size,
        max_num_jobs=hpo_max_num_jobs,
        max_parallel_jobs=hpo_max_parallel_jobs,
        max_run_time=max_run_time,
        network_isolation=network_isolation,
        traffic_encryption=traffic_encryption,
        spot_instance=hpo_spot_instance,
        max_wait_time=hpo_max_wait_time,
        checkpoint_config=hpo_checkpoint_config,
        role=role_arn,
    ).apply(
        use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                       'AWS_SECRET_ACCESS_KEY'))

    training = sagemaker_train_op(
        region=region,
        endpoint_url=endpoint_url,
        image=image,
        training_input_mode=training_input_mode,
        hyperparameters=hpo.outputs['best_hyperparameters'],
        channels=train_channels,
        instance_type=instance_type,
        instance_count=instance_count,
        volume_size=volume_size,
        max_run_time=max_run_time,
        model_artifact_path=output_location,
        output_encryption_key=output_encryption_key,
        network_isolation=network_isolation,
        traffic_encryption=traffic_encryption,
        spot_instance=train_spot_instance,
        max_wait_time=train_max_wait_time,
        checkpoint_config=train_checkpoint_config,
        role=role_arn,
    ).apply(
        use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                       'AWS_SECRET_ACCESS_KEY'))

    create_model = sagemaker_model_op(
        region=region,
        endpoint_url=endpoint_url,
        model_name=training.outputs['job_name'],
        image=training.outputs['training_image'],
        model_artifact_url=training.outputs['model_artifact_url'],
        network_isolation=network_isolation,
        role=role_arn).apply(
            use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                           'AWS_SECRET_ACCESS_KEY'))

    prediction = sagemaker_deploy_op(
        region=region,
        endpoint_url=endpoint_url,
        model_name_1=create_model.output,
    ).apply(
        use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                       'AWS_SECRET_ACCESS_KEY'))

    batch_transform = sagemaker_batch_transform_op(
        region=region,
        endpoint_url=endpoint_url,
        model_name=create_model.output,
        instance_type=batch_transform_instance_type,
        instance_count=instance_count,
        max_concurrent=batch_transform_max_concurrent,
        max_payload=batch_transform_max_payload,
        batch_strategy=batch_strategy,
        input_location=batch_transform_input,
        data_type=batch_transform_data_type,
        content_type=batch_transform_content_type,
        split_type=batch_transform_split_type,
        compression_type=batch_transform_compression_type,
        output_location=batch_transform_ouput).apply(
            use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                           'AWS_SECRET_ACCESS_KEY'))
コード例 #18
0
def kf_pipeline(input_data='reddit_train.csv', ):
    """
    Pipeline
    """

    tokenize_training_step = dsl.ContainerOp(
        name='tokenize',
        image=f"{REGISTRY}/tokenize:{TAG}",
        command="python",
        arguments=[
            "-m",
            "src.steps.tokenize.pipeline_step"
        ],
        file_outputs={"tokenize_location": "/tokenized_location.txt",
                      "labels_location": "/labels_location.txt"},
        pvolumes={}
    ).apply(aws.use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY')) \
        .set_image_pull_policy('Always')

    vectorize_training_step = dsl.ContainerOp(
        name='vectorize',
        image=f"{REGISTRY}/tokenize:{TAG}",
        command="python",
        arguments=[
            "-m",
            "src.steps.tfidftransformer.pipeline_step",
            "--input-data", tokenize_training_step.outputs['tokenize_location']
        ],
        file_outputs={"tfidftransformer_location": "/vectorizer_location.txt",
                      "tfidfvectors_location": "/vectors_location.txt"},
        pvolumes={}
    ).apply(aws.use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY')) \
        .set_image_pull_policy('Always')

    lr_training_step = dsl.ContainerOp(
        name='logical regression',
        image=f"{REGISTRY}/tokenize:{TAG}",
        command="python",
        arguments=[
            "-m",
            "src.steps.lrclassifier.pipeline_step",
            "--labels-data", tokenize_training_step.outputs['labels_location'],
            "--vectors-data", vectorize_training_step.outputs['tfidfvectors_location']
        ],
        file_outputs={"lr_model_location": "/lr_model_location.txt"},
        pvolumes={}
    ).apply(aws.use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY')) \
        .set_image_pull_policy('Always')

    tokenize_build_step = dsl.ContainerOp(
        name='Build tokenize Serving',
        image=f"{REGISTRY}/kaniko-executor:{TAG}",
        arguments=[
            "--dockerfile=Dockerfile",
            f"--build-arg=TOKENIZE_MODEL={tokenize_training_step.outputs['tokenize_location']}",
            "--context=dir:///workspace",
            f"--destination={REGISTRY}/tokenizeserving:{TAG}"
        ],
        pvolumes={}
    ).apply(aws.use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY')) \
        .set_image_pull_policy('Always') \
        .after(lr_training_step)

    vectorize_build_step = dsl.ContainerOp(
        name='Build vectorize Serving',
        image=f"{REGISTRY}/kaniko-executor:{TAG}",
        arguments=[
            "--dockerfile=Dockerfile",
            f"--build-arg=VECTORIZE_MODEL={vectorize_training_step.outputs['tfidftransformer_location']}",
            "--context=dir:///workspace",
            f"--destination={REGISTRY}/vecotrizeserving:{TAG}"
        ],
        pvolumes={}
    ).apply(aws.use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY')) \
        .set_image_pull_policy('Always') \
        .after(tokenize_build_step)

    lr_model_build_step = dsl.ContainerOp(
        name='Build LR Serving',
        image=f"{REGISTRY}/kaniko-executor:{TAG}",
        arguments=[
            "--dockerfile=Dockerfile",
            f"--build-arg=LR_MODEL={lr_training_step.outputs['lr_model_location']}",
            "--context=dir:///workspace",
            f"--destination={REGISTRY}/lrserving:{TAG}"
        ],
        pvolumes={}
    ).apply(aws.use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY')) \
        .set_image_pull_policy('Always') \
        .after(vectorize_build_step)
コード例 #19
0
def mnist_classification(
        region='us-west-2',
        image='174872318107.dkr.ecr.us-west-2.amazonaws.com/kmeans:1',
        training_input_mode='File',
        hpo_strategy='Bayesian',
        hpo_metric_name='test:msd',
        hpo_metric_type='Minimize',
        hpo_early_stopping_type='Off',
        hpo_static_parameters='{"k": "10", "feature_dim": "784"}',
        hpo_integer_parameters='[{"Name": "mini_batch_size", "MinValue": "500", "MaxValue": "600"}, {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}]',
        hpo_continuous_parameters='[]',
        hpo_categorical_parameters='[{"Name": "init_method", "Values": ["random", "kmeans++"]}]',
        hpo_channels='[{"ChannelName": "train", \
                "DataSource": { \
                    "S3DataSource": { \
                        "S3Uri": "s3://kubeflow-pipeline-data/mnist_kmeans_example/train_data",  \
                        "S3DataType": "S3Prefix", \
                        "S3DataDistributionType": "FullyReplicated" \
                        } \
                    }, \
                "ContentType": "", \
                "CompressionType": "None", \
                "RecordWrapperType": "None", \
                "InputMode": "File"}, \
               {"ChannelName": "test", \
                "DataSource": { \
                    "S3DataSource": { \
                        "S3Uri": "s3://kubeflow-pipeline-data/mnist_kmeans_example/test_data", \
                        "S3DataType": "S3Prefix", \
                        "S3DataDistributionType": "FullyReplicated" \
                        } \
                    }, \
                "ContentType": "", \
                "CompressionType": "None", \
                "RecordWrapperType": "None", \
                "InputMode": "File"}]',
        output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
        output_encryption_key='',
        instance_type='ml.p2.16xlarge',
        instance_count='1',
        volume_size='50',
        hpo_max_num_jobs='9',
        hpo_max_parallel_jobs='3',
        max_run_time='3600',
        network_isolation='True',
        traffic_encryption='False',
        train_channels='[{"ChannelName": "train", \
                "DataSource": { \
                    "S3DataSource": { \
                        "S3Uri": "s3://kubeflow-pipeline-data/mnist_kmeans_example/train_data",  \
                        "S3DataType": "S3Prefix", \
                        "S3DataDistributionType": "FullyReplicated" \
                        } \
                    }, \
                "ContentType": "", \
                "CompressionType": "None", \
                "RecordWrapperType": "None", \
                "InputMode": "File"}]',
        batch_transform_instance_type='ml.m4.xlarge',
        batch_transform_input='s3://kubeflow-pipeline-data/mnist_kmeans_example/input',
        batch_transform_data_type='S3Prefix',
        batch_transform_content_type='text/csv',
        batch_transform_compression_type='None',
        batch_transform_ouput='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
        batch_transform_max_concurrent='4',
        batch_transform_max_payload='6',
        batch_strategy='MultiRecord',
        batch_transform_split_type='Line',
        role_arn=''):

    hpo = sagemaker_hpo_op(
        region=region,
        image=image,
        training_input_mode=training_input_mode,
        strategy=hpo_strategy,
        metric_name=hpo_metric_name,
        metric_type=hpo_metric_type,
        early_stopping_type=hpo_early_stopping_type,
        static_parameters=hpo_static_parameters,
        integer_parameters=hpo_integer_parameters,
        continuous_parameters=hpo_continuous_parameters,
        categorical_parameters=hpo_categorical_parameters,
        channels=hpo_channels,
        output_location=output_location,
        output_encryption_key=output_encryption_key,
        instance_type=instance_type,
        instance_count=instance_count,
        volume_size=volume_size,
        max_num_jobs=hpo_max_num_jobs,
        max_parallel_jobs=hpo_max_parallel_jobs,
        max_run_time=max_run_time,
        network_isolation=network_isolation,
        traffic_encryption=traffic_encryption,
        role=role_arn,
    ).apply(
        use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                       'AWS_SECRET_ACCESS_KEY'))

    training = sagemaker_train_op(
        region=region,
        image=image,
        training_input_mode=training_input_mode,
        hyperparameters=hpo.outputs['best_hyperparameters'],
        channels=train_channels,
        instance_type=instance_type,
        instance_count=instance_count,
        volume_size=volume_size,
        max_run_time=max_run_time,
        model_artifact_path=output_location,
        output_encryption_key=output_encryption_key,
        network_isolation=network_isolation,
        traffic_encryption=traffic_encryption,
        role=role_arn,
    ).apply(
        use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                       'AWS_SECRET_ACCESS_KEY'))

    create_model = sagemaker_model_op(
        region=region,
        model_name=training.outputs['job_name'],
        image=training.outputs['training_image'],
        model_artifact_url=training.outputs['model_artifact_url'],
        network_isolation=network_isolation,
        role=role_arn).apply(
            use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                           'AWS_SECRET_ACCESS_KEY'))

    prediction = sagemaker_deploy_op(
        region=region,
        model_name_1=create_model.output,
    ).apply(
        use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                       'AWS_SECRET_ACCESS_KEY'))

    batch_transform = sagemaker_batch_transform_op(
        region=region,
        model_name=create_model.output,
        instance_type=batch_transform_instance_type,
        instance_count=instance_count,
        max_concurrent=batch_transform_max_concurrent,
        max_payload=batch_transform_max_payload,
        batch_strategy=batch_strategy,
        input_location=batch_transform_input,
        data_type=batch_transform_data_type,
        content_type=batch_transform_content_type,
        split_type=batch_transform_split_type,
        compression_type=batch_transform_compression_type,
        output_location=batch_transform_ouput).apply(
            use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                           'AWS_SECRET_ACCESS_KEY'))
コード例 #20
0
    def _cc_pipeline(self,
                     pipeline,
                     pipeline_name,
                     pipeline_version='',
                     experiment_name='',
                     cos_directory=None,
                     export=False):

        runtime_configuration = self._get_metadata_configuration(namespace=MetadataManager.NAMESPACE_RUNTIMES,
                                                                 name=pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_secret = runtime_configuration.metadata.get('cos_secret')

        if cos_directory is None:
            cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        self.log_pipeline_info(pipeline_name,
                               f"processing pipeline dependencies to: {cos_endpoint} "
                               f"bucket: {cos_bucket} folder: {cos_directory}")
        t0_all = time.time()

        emptydir_volume_size = ''
        container_runtime = bool(os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(pipeline.operations)

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(pipeline, sorted_operations)

        for operation in sorted_operations:

            operation_artifact_archive = self._get_dependency_archive_name(operation)

            self.log.debug("Creating pipeline component :\n {op} archive : {archive}".format(
                           op=operation, archive=operation_artifact_archive))

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = self._collect_envs(operation,
                                               cos_secret=cos_secret,
                                               cos_username=cos_username,
                                               cos_password=cos_password)

            # Include any envs set on the operation
            pipeline_envs.update(operation.env_vars_as_dict(logger=self.log))

            sanitized_operation_name = self._sanitize_operation_name(operation.name)

            # create pipeline operation
            notebook_ops[operation.id] = NotebookOp(name=sanitized_operation_name,
                                                    pipeline_name=pipeline_name,
                                                    experiment_name=experiment_name,
                                                    notebook=operation.filename,
                                                    cos_endpoint=cos_endpoint,
                                                    cos_bucket=cos_bucket,
                                                    cos_directory=cos_directory,
                                                    cos_dependencies_archive=operation_artifact_archive,
                                                    pipeline_version=pipeline_version,
                                                    pipeline_source=pipeline.source,
                                                    pipeline_inputs=operation.inputs,
                                                    pipeline_outputs=operation.outputs,
                                                    pipeline_envs=pipeline_envs,
                                                    emptydir_volume_size=emptydir_volume_size,
                                                    cpu_request=operation.cpu,
                                                    mem_request=operation.memory,
                                                    gpu_limit=operation.gpu,
                                                    image=operation.runtime_image,
                                                    file_outputs={
                                                        'mlpipeline-metrics':
                                                            '{}/mlpipeline-metrics.json'
                                                            .format(pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']),
                                                        'mlpipeline-ui-metadata':
                                                            '{}/mlpipeline-ui-metadata.json'
                                                            .format(pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'])
                                                    })

            if cos_secret and not export:
                notebook_ops[operation.id].apply(use_aws_secret(cos_secret))

            image_namespace = self._get_metadata_configuration(namespace=MetadataManager.NAMESPACE_RUNTIME_IMAGES)
            for image_instance in image_namespace:
                if image_instance.metadata['image_name'] == operation.runtime_image and \
                   image_instance.metadata.get('pull_policy'):
                    notebook_ops[operation.id].container.set_image_pull_policy(image_instance.metadata['pull_policy'])

            self.log_pipeline_info(pipeline_name,
                                   f"processing operation dependencies for id: {operation.id}",
                                   operation_name=operation.name)

            self._upload_dependencies_to_object_store(runtime_configuration,
                                                      cos_directory,
                                                      operation)

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all))

        return notebook_ops
コード例 #21
0
    def _cc_pipeline(
        self, pipeline, pipeline_name, pipeline_version="", experiment_name="", cos_directory=None, export=False
    ):

        runtime_configuration = self._get_metadata_configuration(
            schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID, name=pipeline.runtime_config
        )

        cos_endpoint = runtime_configuration.metadata["cos_endpoint"]
        cos_username = runtime_configuration.metadata.get("cos_username")
        cos_password = runtime_configuration.metadata.get("cos_password")
        cos_secret = runtime_configuration.metadata.get("cos_secret")
        cos_bucket = runtime_configuration.metadata.get("cos_bucket")
        if cos_directory is None:
            cos_directory = pipeline_name

        engine = runtime_configuration.metadata["engine"]

        self.log_pipeline_info(
            pipeline_name,
            f"processing pipeline dependencies to: {cos_endpoint} " f"bucket: {cos_bucket} folder: {cos_directory}",
        )
        t0_all = time.time()

        emptydir_volume_size = ""
        container_runtime = bool(os.getenv("CRIO_RUNTIME", "False").lower() == "true")

        # Create dictionary that maps component Id to its ContainerOp instance
        target_ops = {}

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(pipeline.operations)

        # Determine whether access to cloud storage is required
        for operation in sorted_operations:
            if isinstance(operation, GenericOperation):
                self._verify_cos_connectivity(runtime_configuration)
                break

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(pipeline, sorted_operations)

        for operation in sorted_operations:

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = "20Gi"

            sanitized_operation_name = self._sanitize_operation_name(operation.name)

            # Create pipeline operation
            # If operation is one of the "generic" set of NBs or scripts, construct custom ExecuteFileOp
            if isinstance(operation, GenericOperation):

                # Collect env variables
                pipeline_envs = self._collect_envs(
                    operation, cos_secret=cos_secret, cos_username=cos_username, cos_password=cos_password
                )

                operation_artifact_archive = self._get_dependency_archive_name(operation)

                self.log.debug(f"Creating pipeline component:\n {operation} archive : {operation_artifact_archive}")

                target_ops[operation.id] = ExecuteFileOp(
                    name=sanitized_operation_name,
                    pipeline_name=pipeline_name,
                    experiment_name=experiment_name,
                    notebook=operation.filename,
                    cos_endpoint=cos_endpoint,
                    cos_bucket=cos_bucket,
                    cos_directory=cos_directory,
                    cos_dependencies_archive=operation_artifact_archive,
                    pipeline_version=pipeline_version,
                    pipeline_source=pipeline.source,
                    pipeline_inputs=operation.inputs,
                    pipeline_outputs=operation.outputs,
                    pipeline_envs=pipeline_envs,
                    emptydir_volume_size=emptydir_volume_size,
                    cpu_request=operation.cpu,
                    mem_request=operation.memory,
                    gpu_limit=operation.gpu,
                    workflow_engine=engine,
                    image=operation.runtime_image,
                    file_outputs={
                        "mlpipeline-metrics": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-metrics.json",  # noqa
                        "mlpipeline-ui-metadata": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-ui-metadata.json",  # noqa
                    },
                )

                if operation.doc:
                    target_ops[operation.id].add_pod_annotation("elyra/node-user-doc", operation.doc)

                # TODO Can we move all of this to apply to non-standard components as well? Test when servers are up
                if cos_secret and not export:
                    target_ops[operation.id].apply(use_aws_secret(cos_secret))

                image_namespace = self._get_metadata_configuration(RuntimeImages.RUNTIME_IMAGES_SCHEMASPACE_ID)
                for image_instance in image_namespace:
                    if image_instance.metadata["image_name"] == operation.runtime_image and image_instance.metadata.get(
                        "pull_policy"
                    ):
                        target_ops[operation.id].container.set_image_pull_policy(image_instance.metadata["pull_policy"])

                self.log_pipeline_info(
                    pipeline_name,
                    f"processing operation dependencies for id: {operation.id}",
                    operation_name=operation.name,
                )

                self._upload_dependencies_to_object_store(runtime_configuration, cos_directory, operation)

            # If operation is a "non-standard" component, load it's spec and create operation with factory function
            else:
                # Retrieve component from cache
                component = ComponentCache.instance().get_component(self._type, operation.classifier)

                # Convert the user-entered value of certain properties according to their type
                for component_property in component.properties:
                    # Get corresponding property's value from parsed pipeline
                    property_value = operation.component_params.get(component_property.ref)

                    self.log.debug(
                        f"Processing component parameter '{component_property.name}' "
                        f"of type '{component_property.data_type}'"
                    )

                    if component_property.data_type == "inputpath":
                        output_node_id = property_value["value"]
                        output_node_parameter_key = property_value["option"].replace("elyra_output_", "")
                        operation.component_params[component_property.ref] = target_ops[output_node_id].outputs[
                            output_node_parameter_key
                        ]
                    elif component_property.data_type == "inputvalue":
                        active_property = property_value["activeControl"]
                        active_property_value = property_value.get(active_property, None)

                        # If the value is not found, assign it the default value assigned in parser
                        if active_property_value is None:
                            active_property_value = component_property.value

                        if isinstance(active_property_value, dict) and set(active_property_value.keys()) == {
                            "value",
                            "option",
                        }:
                            output_node_id = active_property_value["value"]
                            output_node_parameter_key = active_property_value["option"].replace("elyra_output_", "")
                            operation.component_params[component_property.ref] = target_ops[output_node_id].outputs[
                                output_node_parameter_key
                            ]
                        elif component_property.default_data_type == "dictionary":
                            processed_value = self._process_dictionary_value(active_property_value)
                            operation.component_params[component_property.ref] = processed_value
                        elif component_property.default_data_type == "list":
                            processed_value = self._process_list_value(active_property_value)
                            operation.component_params[component_property.ref] = processed_value
                        else:
                            operation.component_params[component_property.ref] = active_property_value

                # Build component task factory
                try:
                    factory_function = components.load_component_from_text(component.definition)
                except Exception as e:
                    # TODO Fix error messaging and break exceptions down into categories
                    self.log.error(f"Error loading component spec for {operation.name}: {str(e)}")
                    raise RuntimeError(f"Error loading component spec for {operation.name}.")

                # Add factory function, which returns a ContainerOp task instance, to pipeline operation dict
                try:
                    comp_spec_inputs = [
                        inputs.name.lower().replace(" ", "_") for inputs in factory_function.component_spec.inputs
                    ]

                    # Remove inputs and outputs from params dict
                    # TODO: need to have way to retrieve only required params
                    parameter_removal_list = ["inputs", "outputs"]
                    for component_param in operation.component_params_as_dict.keys():
                        if component_param not in comp_spec_inputs:
                            parameter_removal_list.append(component_param)

                    for parameter in parameter_removal_list:
                        operation.component_params_as_dict.pop(parameter, None)

                    # Create ContainerOp instance and assign appropriate user-provided name
                    sanitized_component_params = {
                        self._sanitize_param_name(name): value
                        for name, value in operation.component_params_as_dict.items()
                    }
                    container_op = factory_function(**sanitized_component_params)
                    container_op.set_display_name(operation.name)

                    if operation.doc:
                        container_op.add_pod_annotation("elyra/node-user-doc", operation.doc)

                    target_ops[operation.id] = container_op
                except Exception as e:
                    # TODO Fix error messaging and break exceptions down into categories
                    self.log.error(f"Error constructing component {operation.name}: {str(e)}")
                    raise RuntimeError(f"Error constructing component {operation.name}.")

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = target_ops[operation.id]
            for parent_operation_id in operation.parent_operation_ids:
                parent_op = target_ops[parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all))

        return target_ops
コード例 #22
0
def ground_truth_test(
        region='us-west-2',
        team_name='ground-truth-demo-team',
        team_description='Team for mini image classification labeling job',
        user_pool='',
        user_groups='',
        client_id='',
        ground_truth_train_job_name='mini-image-classification-demo-train',
        ground_truth_validation_job_name='mini-image-classification-demo-validation',
        ground_truth_label_attribute_name='category',
        ground_truth_train_manifest_location='s3://your-bucket-name/gt-demo-images/ground-truth-demo/train.manifest',
        ground_truth_validation_manifest_location='s3://your-bucket-name/gt-demo-images/ground-truth-demo/validation.manifest',
        ground_truth_output_location='s3://your-bucket-name/gt-demo-images/ground-truth-demo/output',
        ground_truth_task_type='image classification',
        ground_truth_worker_type='private',
        ground_truth_label_category_config='s3://your-bucket-name/gt-demo-images/ground-truth-demo/class_labels.json',
        ground_truth_ui_template='s3://your-bucket-name/gt-demo-images/ground-truth-demo/instructions.template',
        ground_truth_title='Mini image classification',
        ground_truth_description='Test for Ground Truth KFP component',
        ground_truth_num_workers_per_object='1',
        ground_truth_time_limit='30',
        ground_truth_task_availibility='3600',
        ground_truth_max_concurrent_tasks='20',
        training_algorithm_name='image classification',
        training_input_mode='Pipe',
        training_hyperparameters='{"num_classes": "2", "num_training_samples": "14", "mini_batch_size": "2"}',
        training_channels='[{"ChannelName": "train", \
                "DataSource": { \
                    "S3DataSource": { \
                        "S3Uri": "",  \
                        "S3DataType": "AugmentedManifestFile", \
                        "S3DataDistributionType": "FullyReplicated", \
                        "AttributeNames": ["source-ref", "category"] \
                        } \
                    }, \
                "ContentType": "application/x-recordio", \
                "CompressionType": "None", \
                "RecordWrapperType": "RecordIO"}, \
                {"ChannelName": "validation", \
                    "DataSource": { \
                        "S3DataSource": { \
                            "S3Uri": "",  \
                            "S3DataType": "AugmentedManifestFile", \
                            "S3DataDistributionType": "FullyReplicated", \
                            "AttributeNames": ["source-ref", "category"] \
                            } \
                        }, \
                    "ContentType": "application/x-recordio", \
                    "CompressionType": "None", \
                    "RecordWrapperType": "RecordIO"}]',
        training_output_location='s3://your-bucket-name/gt-demo-images/training-output',
        training_instance_type='ml.p2.xlarge',
        training_instance_count='1',
        training_volume_size='50',
        training_max_run_time='3600',
        role_arn=''):

    workteam = sagemaker_workteam_op(region=region,
                                     team_name=team_name,
                                     description=team_description,
                                     user_pool=user_pool,
                                     user_groups=user_groups,
                                     client_id=client_id).apply(
                                         use_aws_secret(
                                             'aws-secret', 'AWS_ACCESS_KEY_ID',
                                             'AWS_SECRET_ACCESS_KEY'))

    ground_truth_train = sagemaker_gt_op(
        region=region,
        role=role_arn,
        job_name=ground_truth_train_job_name,
        label_attribute_name=ground_truth_label_attribute_name,
        manifest_location=ground_truth_train_manifest_location,
        output_location=ground_truth_output_location,
        task_type=ground_truth_task_type,
        worker_type=ground_truth_worker_type,
        workteam_arn=workteam.output,
        label_category_config=ground_truth_label_category_config,
        ui_template=ground_truth_ui_template,
        title=ground_truth_title,
        description=ground_truth_description,
        num_workers_per_object=ground_truth_num_workers_per_object,
        time_limit=ground_truth_time_limit,
        task_availibility=ground_truth_task_availibility,
        max_concurrent_tasks=ground_truth_max_concurrent_tasks).apply(
            use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                           'AWS_SECRET_ACCESS_KEY'))

    ground_truth_validation = sagemaker_gt_op(
        region=region,
        role=role_arn,
        job_name=ground_truth_validation_job_name,
        label_attribute_name=ground_truth_label_attribute_name,
        manifest_location=ground_truth_validation_manifest_location,
        output_location=ground_truth_output_location,
        task_type=ground_truth_task_type,
        worker_type=ground_truth_worker_type,
        workteam_arn=workteam.output,
        label_category_config=ground_truth_label_category_config,
        ui_template=ground_truth_ui_template,
        title=ground_truth_title,
        description=ground_truth_description,
        num_workers_per_object=ground_truth_num_workers_per_object,
        time_limit=ground_truth_time_limit,
        task_availibility=ground_truth_task_availibility,
        max_concurrent_tasks=ground_truth_max_concurrent_tasks).apply(
            use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                           'AWS_SECRET_ACCESS_KEY'))

    training = sagemaker_train_op(
        region=region,
        algorithm_name=training_algorithm_name,
        training_input_mode=training_input_mode,
        hyperparameters=training_hyperparameters,
        channels=training_channels,
        data_location_1=ground_truth_train.outputs['output_manifest_location'],
        data_location_2=ground_truth_validation.
        outputs['output_manifest_location'],
        instance_type=training_instance_type,
        instance_count=training_instance_count,
        volume_size=training_volume_size,
        max_run_time=training_max_run_time,
        model_artifact_path=training_output_location,
        role=role_arn).apply(
            use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                           'AWS_SECRET_ACCESS_KEY'))
コード例 #23
0
def ground_truth_test(
        region='us-west-2',
        team_name='ground-truth-demo-team',
        team_description='Team for mini image classification labeling job',
        user_pool='',
        user_groups='',
        client_id='',
        ground_truth_train_job_name='mini-image-classification-demo-train',
        ground_truth_validation_job_name='mini-image-classification-demo-validation',
        ground_truth_label_attribute_name='category',
        ground_truth_train_manifest_location='s3://your-bucket-name/mini-image-classification/ground-truth-demo/train.manifest',
        ground_truth_validation_manifest_location='s3://your-bucket-name/mini-image-classification/ground-truth-demo/validation.manifest',
        ground_truth_output_location='s3://your-bucket-name/mini-image-classification/ground-truth-demo/output',
        ground_truth_task_type='image classification',
        ground_truth_worker_type='private',
        ground_truth_label_category_config='s3://your-bucket-name/mini-image-classification/ground-truth-demo/class_labels.json',
        ground_truth_ui_template='s3://your-bucket-name/mini-image-classification/ground-truth-demo/instructions.template',
        ground_truth_title='Mini image classification',
        ground_truth_description='Test for Ground Truth KFP component',
        ground_truth_num_workers_per_object=1,
        ground_truth_time_limit=30,
        ground_truth_task_availibility=3600,
        ground_truth_max_concurrent_tasks=20,
        training_algorithm_name='image classification',
        training_input_mode='Pipe',
        training_hyperparameters={
            "num_classes": "2",
            "num_training_samples": "14",
            "mini_batch_size": "2"
        },
        training_output_location='s3://your-bucket-name/mini-image-classification/training-output',
        training_instance_type='ml.p2.xlarge',
        training_instance_count=1,
        training_volume_size=50,
        training_max_run_time=3600,
        role_arn=''):

    workteam = sagemaker_workteam_op(region=region,
                                     team_name=team_name,
                                     description=team_description,
                                     user_pool=user_pool,
                                     user_groups=user_groups,
                                     client_id=client_id).apply(
                                         use_aws_secret(
                                             'aws-secret', 'AWS_ACCESS_KEY_ID',
                                             'AWS_SECRET_ACCESS_KEY'))

    ground_truth_train = sagemaker_gt_op(
        region=region,
        role=role_arn,
        job_name=ground_truth_train_job_name,
        label_attribute_name=ground_truth_label_attribute_name,
        manifest_location=ground_truth_train_manifest_location,
        output_location=ground_truth_output_location,
        task_type=ground_truth_task_type,
        worker_type=ground_truth_worker_type,
        workteam_arn=workteam.output,
        label_category_config=ground_truth_label_category_config,
        ui_template=ground_truth_ui_template,
        title=ground_truth_title,
        description=ground_truth_description,
        num_workers_per_object=ground_truth_num_workers_per_object,
        time_limit=ground_truth_time_limit,
        task_availibility=ground_truth_task_availibility,
        max_concurrent_tasks=ground_truth_max_concurrent_tasks).apply(
            use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                           'AWS_SECRET_ACCESS_KEY'))

    ground_truth_validation = sagemaker_gt_op(
        region=region,
        role=role_arn,
        job_name=ground_truth_validation_job_name,
        label_attribute_name=ground_truth_label_attribute_name,
        manifest_location=ground_truth_validation_manifest_location,
        output_location=ground_truth_output_location,
        task_type=ground_truth_task_type,
        worker_type=ground_truth_worker_type,
        workteam_arn=workteam.output,
        label_category_config=ground_truth_label_category_config,
        ui_template=ground_truth_ui_template,
        title=ground_truth_title,
        description=ground_truth_description,
        num_workers_per_object=ground_truth_num_workers_per_object,
        time_limit=ground_truth_time_limit,
        task_availibility=ground_truth_task_availibility,
        max_concurrent_tasks=ground_truth_max_concurrent_tasks).apply(
            use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                           'AWS_SECRET_ACCESS_KEY'))

    channelObj['ChannelName'] = 'train'
    channelObj['DataSource']['S3DataSource']['S3Uri'] = str(
        ground_truth_train.outputs['output_manifest_location'])
    channelObjList.append(copy.deepcopy(channelObj))
    channelObj['ChannelName'] = 'validation'
    channelObj['DataSource']['S3DataSource']['S3Uri'] = str(
        ground_truth_validation.outputs['output_manifest_location'])
    channelObjList.append(copy.deepcopy(channelObj))

    training = sagemaker_train_op(region=region,
                                  algorithm_name=training_algorithm_name,
                                  training_input_mode=training_input_mode,
                                  hyperparameters=training_hyperparameters,
                                  channels=json.dumps(channelObjList),
                                  instance_type=training_instance_type,
                                  instance_count=training_instance_count,
                                  volume_size=training_volume_size,
                                  max_run_time=training_max_run_time,
                                  model_artifact_path=training_output_location,
                                  role=role_arn).apply(
                                      use_aws_secret('aws-secret',
                                                     'AWS_ACCESS_KEY_ID',
                                                     'AWS_SECRET_ACCESS_KEY'))
コード例 #24
0
def s3_sync_pipeline():
    echo_task = s3_sync().apply(
        use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                       'AWS_SECRET_ACCESS_KEY'))
コード例 #25
0
    def _cc_pipeline(self,
                     pipeline,
                     pipeline_name,
                     pipeline_version='',
                     experiment_name='',
                     cos_directory=None,
                     export=False):

        runtime_configuration = self._get_metadata_configuration(
            namespace=MetadataManager.NAMESPACE_RUNTIMES,
            name=pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_secret = runtime_configuration.metadata.get('cos_secret')

        if cos_directory is None:
            cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        self.log_pipeline_info(
            pipeline_name,
            f"processing pipeline dependencies to: {cos_endpoint} "
            f"bucket: {cos_bucket} folder: {cos_directory}")
        t0_all = time.time()

        emptydir_volume_size = ''
        container_runtime = bool(
            os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(
            pipeline.operations)

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(
            pipeline, sorted_operations)

        for operation in sorted_operations:

            operation_artifact_archive = self._get_dependency_archive_name(
                operation)

            self.log.debug(
                "Creating pipeline component :\n {op} archive : {archive}".
                format(op=operation, archive=operation_artifact_archive))

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = dict()
            if not cos_secret:
                pipeline_envs['AWS_ACCESS_KEY_ID'] = cos_username
                pipeline_envs['AWS_SECRET_ACCESS_KEY'] = cos_password
            # Convey pipeline logging enablement to operation
            pipeline_envs['ELYRA_ENABLE_PIPELINE_INFO'] = str(
                self.enable_pipeline_info)
            # Setting identifies a writable directory in the container image.
            # Only Unix-style path spec is supported.
            pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'] = self.WCD

            if operation.env_vars:
                for env_var in operation.env_vars:
                    # Strip any of these special characters from both key and value
                    # Splits on the first occurrence of '='
                    result = [x.strip(' \'\"') for x in env_var.split('=', 1)]
                    # Should be non empty key with a value
                    if len(result) == 2 and result[0] != '':
                        pipeline_envs[result[0]] = result[1]

            sanitized_operation_name = self._sanitize_operation_name(
                operation.name)

            # create pipeline operation
            notebook_ops[operation.id] = NotebookOp(
                name=sanitized_operation_name,
                pipeline_name=pipeline_name,
                experiment_name=experiment_name,
                notebook=operation.filename,
                cos_endpoint=cos_endpoint,
                cos_bucket=cos_bucket,
                cos_directory=cos_directory,
                cos_dependencies_archive=operation_artifact_archive,
                pipeline_version=pipeline_version,
                pipeline_source=pipeline.source,
                pipeline_inputs=operation.inputs,
                pipeline_outputs=operation.outputs,
                pipeline_envs=pipeline_envs,
                emptydir_volume_size=emptydir_volume_size,
                cpu_request=operation.cpu,
                mem_request=operation.memory,
                gpu_limit=operation.gpu,
                image=operation.runtime_image,
                file_outputs={
                    'mlpipeline-metrics':
                    '{}/mlpipeline-metrics.json'.format(
                        pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']),
                    'mlpipeline-ui-metadata':
                    '{}/mlpipeline-ui-metadata.json'.format(
                        pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'])
                })

            if cos_secret and not export:
                notebook_ops[operation.id].apply(use_aws_secret(cos_secret))

            image_namespace = self._get_metadata_configuration(
                namespace=MetadataManager.NAMESPACE_RUNTIME_IMAGES)
            for image_instance in image_namespace:
                if image_instance.metadata['image_name'] == operation.runtime_image and \
                   image_instance.metadata.get('pull_policy'):
                    notebook_ops[operation.id].container.set_image_pull_policy(
                        image_instance.metadata['pull_policy'])

            self.log_pipeline_info(
                pipeline_name,
                f"processing operation dependencies for id: {operation.id}",
                operation_name=operation.name)

            self._upload_dependencies_to_object_store(runtime_configuration,
                                                      cos_directory, operation)

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[
                    parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name,
                               "pipeline dependencies processed",
                               duration=(time.time() - t0_all))

        return notebook_ops
コード例 #26
0
def hpo_test(
    region='us-west-2',
    hpo_job_name='HPO-kmeans-sample',
    image='',
    algorithm_name='K-Means',
    training_input_mode='File',
    metric_definitions='{}',
    strategy='Bayesian',
    metric_name='test:msd',
    metric_type='Minimize',
    early_stopping_type='Off',
    static_parameters='{"k": "10", "feature_dim": "784"}',
    integer_parameters='[{"Name": "mini_batch_size", "MinValue": "450", "MaxValue": "550"}, \
                         {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}]',
    continuous_parameters='[]',
    categorical_parameters='[{"Name": "init_method", "Values": ["random", "kmeans++"]}]',
    channels='[{"ChannelName": "train", \
                "DataSource": { \
                    "S3DataSource": { \
                        "S3Uri": "s3://kubeflow-pipeline-data/mnist_kmeans_example/data",  \
                        "S3DataType": "S3Prefix", \
                        "S3DataDistributionType": "FullyReplicated" \
                        } \
                    }, \
                "ContentType": "", \
                "CompressionType": "None", \
                "RecordWrapperType": "None", \
                "InputMode": "File"}, \
               {"ChannelName": "test", \
                "DataSource": { \
                    "S3DataSource": { \
                        "S3Uri": "s3://kubeflow-pipeline-data/mnist_kmeans_example/data", \
                        "S3DataType": "S3Prefix", \
                        "S3DataDistributionType": "FullyReplicated" \
                        } \
                    }, \
                "ContentType": "", \
                "CompressionType": "None", \
                "RecordWrapperType": "None", \
                "InputMode": "File"}]',
    output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
    output_encryption_key='',
    instance_type='ml.p2.16xlarge',
    instance_count='1',
    volume_size='50',
    max_num_jobs='1',
    max_parallel_jobs='1',
    resource_encryption_key='',
    max_run_time='3600',
    vpc_security_group_ids='',
    vpc_subnets='',
    network_isolation='True',
    traffic_encryption='False',
    warm_start_type='',
    parent_hpo_jobs='',
    tags='{}',
    role_arn='',
):

    training = sagemaker_hpo_op(
        region=region,
        job_name=hpo_job_name,
        image=image,
        training_input_mode=training_input_mode,
        algorithm_name=algorithm_name,
        metric_definitions=metric_definitions,
        strategy=strategy,
        metric_name=metric_name,
        metric_type=metric_type,
        early_stopping_type=early_stopping_type,
        static_parameters=static_parameters,
        integer_parameters=integer_parameters,
        continuous_parameters=continuous_parameters,
        categorical_parameters=categorical_parameters,
        channels=channels,
        output_location=output_location,
        output_encryption_key=output_encryption_key,
        instance_type=instance_type,
        instance_count=instance_count,
        volume_size=volume_size,
        max_num_jobs=max_num_jobs,
        max_parallel_jobs=max_parallel_jobs,
        resource_encryption_key=resource_encryption_key,
        max_run_time=max_run_time,
        vpc_security_group_ids=vpc_security_group_ids,
        vpc_subnets=vpc_subnets,
        network_isolation=network_isolation,
        traffic_encryption=traffic_encryption,
        warm_start_type=warm_start_type,
        parent_hpo_jobs=parent_hpo_jobs,
        tags=tags,
        role=role_arn,
    ).apply(
        use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID',
                       'AWS_SECRET_ACCESS_KEY'))
コード例 #27
0
ファイル: origin.py プロジェクト: pawanrana/kubeflow-workshop
def apply_config_map_and_aws_secret(op):
    return (op.apply(use_config_map(configmap)).apply(
        use_aws_secret()).set_image_pull_policy('Always'))
コード例 #28
0
ファイル: processor_kfp.py プロジェクト: elyra-ai/elyra
    def _cc_pipeline(self,
                     pipeline,
                     pipeline_name,
                     pipeline_version='',
                     experiment_name='',
                     cos_directory=None,
                     export=False):

        runtime_configuration = self._get_metadata_configuration(
            namespace=MetadataManager.NAMESPACE_RUNTIMES,
            name=pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_secret = runtime_configuration.metadata.get('cos_secret')

        if cos_directory is None:
            cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        self.log_pipeline_info(
            pipeline_name,
            f"processing pipeline dependencies to: {cos_endpoint} "
            f"bucket: {cos_bucket} folder: {cos_directory}")
        t0_all = time.time()

        emptydir_volume_size = ''
        container_runtime = bool(
            os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(
            pipeline.operations)

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(
            pipeline, sorted_operations)

        for operation in sorted_operations:

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = self._collect_envs(operation,
                                               cos_secret=cos_secret,
                                               cos_username=cos_username,
                                               cos_password=cos_password)

            sanitized_operation_name = self._sanitize_operation_name(
                operation.name)

            # Create pipeline operation
            # If operation is one of the "standard" set of NBs or scripts, construct custom NotebookOp
            if operation.classifier in [
                    "execute-notebook-node", "execute-python-node",
                    "execute-r-node"
            ]:

                operation_artifact_archive = self._get_dependency_archive_name(
                    operation)

                self.log.debug(
                    "Creating pipeline component :\n {op} archive : {archive}".
                    format(op=operation, archive=operation_artifact_archive))

                notebook_ops[operation.id] = NotebookOp(
                    name=sanitized_operation_name,
                    pipeline_name=pipeline_name,
                    experiment_name=experiment_name,
                    notebook=operation.filename,
                    cos_endpoint=cos_endpoint,
                    cos_bucket=cos_bucket,
                    cos_directory=cos_directory,
                    cos_dependencies_archive=operation_artifact_archive,
                    pipeline_version=pipeline_version,
                    pipeline_source=pipeline.source,
                    pipeline_inputs=operation.inputs,
                    pipeline_outputs=operation.outputs,
                    pipeline_envs=pipeline_envs,
                    emptydir_volume_size=emptydir_volume_size,
                    cpu_request=operation.cpu,
                    mem_request=operation.memory,
                    gpu_limit=operation.gpu,
                    image=operation.runtime_image,
                    file_outputs={
                        'mlpipeline-metrics':
                        '{}/mlpipeline-metrics.json'.format(
                            pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']),
                        'mlpipeline-ui-metadata':
                        '{}/mlpipeline-ui-metadata.json'.format(
                            pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'])
                    })

                # TODO Can we move all of this to apply to non-standard components as well? Test when servers are up
                if cos_secret and not export:
                    notebook_ops[operation.id].apply(
                        use_aws_secret(cos_secret))

                image_namespace = self._get_metadata_configuration(
                    namespace=MetadataManager.NAMESPACE_RUNTIME_IMAGES)
                for image_instance in image_namespace:
                    if image_instance.metadata['image_name'] == operation.runtime_image and \
                            image_instance.metadata.get('pull_policy'):
                        notebook_ops[operation.id].container. \
                            set_image_pull_policy(image_instance.metadata['pull_policy'])

                self.log_pipeline_info(
                    pipeline_name,
                    f"processing operation dependencies for id: {operation.id}",
                    operation_name=operation.name)

                self._upload_dependencies_to_object_store(
                    runtime_configuration, cos_directory, operation)

            # If operation is a "non-standard" component, load it's spec and create operation with factory function
            else:
                component_source = {}
                component_source[
                    operation.
                    component_source_type] = operation.component_source

                # Build component task factory
                try:
                    factory_function = components.load_component(
                        **component_source)
                except Exception:
                    # TODO Fix error messaging and break exceptions down into categories
                    self.log.error(
                        f"There was an error while loading component spec for {operation.name}."
                    )
                    raise RuntimeError(
                        f"There was an error while loading component spec for {operation.name}."
                    )

                # Add factory function, which returns a ContainerOp task instance, to pipeline operation dict
                try:
                    notebook_ops[operation.id] = factory_function(
                        **operation.component_params)
                except Exception:
                    # TODO Fix error messaging and break exceptions down into categories
                    self.log.error(
                        f"There was an error while constructing component {operation.name}."
                    )
                    raise RuntimeError(
                        f"There was an error while constructing component {operation.name}."
                    )

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[
                    parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name,
                               "pipeline dependencies processed",
                               duration=(time.time() - t0_all))

        return notebook_ops
コード例 #29
0
def hpo_test(region='us-west-2',
    hpo_job_name='HPO-kmeans-sample',
    image='',
    algorithm_name='K-Means',
    training_input_mode='File',
    metric_definitions={},
    strategy='Bayesian',
    metric_name='test:msd',
    metric_type='Minimize',
    early_stopping_type='Off',
    static_parameters={"k": "10", "feature_dim": "784"},
    integer_parameters=[{"Name": "mini_batch_size", "MinValue": "450", "MaxValue": "550"}, \
                         {"Name": "extra_center_factor", "MinValue": "10", "MaxValue": "20"}],
    continuous_parameters=[],
    categorical_parameters=[{"Name": "init_method", "Values": ["random", "kmeans++"]}],
    channels=channelObjList,
    output_location='s3://kubeflow-pipeline-data/mnist_kmeans_example/output',
    output_encryption_key='',
    instance_type='ml.p2.16xlarge',
    instance_count=1,
    volume_size=50,
    max_num_jobs=1,
    max_parallel_jobs=1,
    resource_encryption_key='',
    max_run_time=3600,
    vpc_security_group_ids='',
    vpc_subnets='',
    endpoint_url='',
    network_isolation=True,
    traffic_encryption=False,
    warm_start_type='',
    parent_hpo_jobs='',
    spot_instance=False,
    max_wait_time=3600,
    checkpoint_config={},
    tags={},
    role_arn='',
    ):

    training = sagemaker_hpo_op(
        region=region,
        endpoint_url=endpoint_url,
        job_name=hpo_job_name,
        image=image,
        training_input_mode=training_input_mode,
        algorithm_name=algorithm_name,
        metric_definitions=metric_definitions,
        strategy=strategy,
        metric_name=metric_name,
        metric_type=metric_type,
        early_stopping_type=early_stopping_type,
        static_parameters=static_parameters,
        integer_parameters=integer_parameters,
        continuous_parameters=continuous_parameters,
        categorical_parameters=categorical_parameters,
        channels=channels,
        output_location=output_location,
        output_encryption_key=output_encryption_key,
        instance_type=instance_type,
        instance_count=instance_count,
        volume_size=volume_size,
        max_num_jobs=max_num_jobs,
        max_parallel_jobs=max_parallel_jobs,
        resource_encryption_key=resource_encryption_key,
        max_run_time=max_run_time,
        vpc_security_group_ids=vpc_security_group_ids,
        vpc_subnets=vpc_subnets,
        network_isolation=network_isolation,
        traffic_encryption=traffic_encryption,
        warm_start_type=warm_start_type,
        parent_hpo_jobs=parent_hpo_jobs,
        spot_instance=spot_instance,
        max_wait_time=max_wait_time,
        checkpoint_config=checkpoint_config,
        tags=tags,
        role=role_arn,
    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))