def create_pl_comp():
    ''''creates pipeline components using python functions. return list of components'''

    # Define your components code as standalone python functions:======================
    def add(a: float, b: float) -> float:
        '''Calculates sum of two arguments'''
        return a + b

    def multiply(c: float, d: float) -> float:
        '''Calculates the product'''
        return c * d

    # convert the python functions to a task factory (function that return a task object)
    add_op = comp.create_component_from_func(
        add,
        output_component_file='add_component.yaml',
    )
    # factory function used to create kfp.dsl.ContainerOp class instances for your pipeline

    add_op.component_spec.save('add_component.yaml')
    #add_op.component.OutputTextFile('Output.txt')

    # product_op is a task factory that creates a task object when given argument
    product_op = comp.create_component_from_func(
        multiply, output_component_file='multiple_component.yaml')

    component_lst = [add_op, product_op]

    return component_lst
Example #2
0
    def test_handling_list_arguments_containing_serializable_python_objects(
            self):
        '''Checks that lists containing python objects with .to_struct() can be properly serialized.'''
        class MyClass:
            def to_struct(self):
                return {'foo': [7, 42]}

        def assert_values_are_correct(
            list_param: list,
            dict_param: dict,
        ) -> int:
            import unittest
            unittest.TestCase().assertEqual(list_param,
                                            [1, {
                                                'foo': [7, 42]
                                            }, 3])
            unittest.TestCase().assertEqual(dict_param,
                                            {'k1': {
                                                'foo': [7, 42]
                                            }})
            return 1

        task_factory = comp.create_component_from_func(
            assert_values_are_correct)

        self.helper_test_component_using_local_call(
            task_factory,
            arguments=dict(
                list_param=[1, MyClass(), 3],
                dict_param={'k1': MyClass()},
            ),
            expected_output_values={'Output': '1'},
        )
Example #3
0
    def test_python_component_decorator(self):
        # Deprecated
        from kfp.dsl import python_component
        from kfp.components import create_component_from_func

        expected_name = 'Sum component name'
        expected_description = 'Sum component description'
        expected_image = 'org/image'

        @python_component(name=expected_name,
                          description=expected_description,
                          base_image=expected_image)
        def add_two_numbers_decorated(
            a: float,
            b: float,
        ) -> float:
            '''Returns sum of two arguments'''
            return a + b

        op = create_component_from_func(add_two_numbers_decorated)

        component_spec = op.component_spec
        self.assertEqual(component_spec.name, expected_name)
        self.assertEqual(component_spec.description.strip(),
                         expected_description.strip())
        self.assertEqual(component_spec.implementation.container.image,
                         expected_image)
    def test_annotations_stripping(self):
        import typing
        import collections

        MyFuncOutputs = typing.NamedTuple('Outputs', [('sum', int),
                                                      ('product', int)])

        class CustomType1:
            pass

        def my_func(
            param1: CustomType1 = None,  # This caused failure previously
            param2: collections.
            OrderedDict = None,  # This caused failure previously
        ) -> MyFuncOutputs:  # This caused failure previously
            assert param1 == None
            assert param2 == None
            return (8, 15)

        task_factory = comp.create_component_from_func(my_func)

        self.helper_test_component_using_local_call(task_factory,
                                                    arguments={},
                                                    expected_output_values={
                                                        'sum': '8',
                                                        'product': '15'
                                                    })
Example #5
0
    def test_fail_on_handling_list_arguments_containing_python_objects(self):
        '''Checks that lists containing python objects not having .to_struct() raise error during serialization.'''
        class MyClass:
            pass

        def consume_list(list_param: list, ) -> int:
            return 1

        def consume_dict(dict_param: dict, ) -> int:
            return 1

        list_op = comp.create_component_from_func(consume_list)
        dict_op = comp.create_component_from_func(consume_dict)

        with self.assertRaises(Exception):
            list_op([1, MyClass(), 3])

        with self.assertRaises(Exception):
            dict_op({'k1': MyClass()})
def artifact_passing_pipeline():
    producer_task = producer_op()
    processor_task = processor_op(producer_task.outputs['output_1'],
                                  producer_task.outputs['output_2'])
    consumer_task = consumer_op(processor_task.outputs['output_1'],
                                processor_task.outputs['output_2'])

    markdown_task = create_component_from_func(func=metadata_and_metrics)()
    # This line is only needed for compiling using dsl-compile to work
    kfp.dsl.get_pipeline_conf(
    ).data_passing_method = volume_based_data_passing_method
    def test_handling_list_arguments_containing_pipelineparam(self):
        '''Checks that lists containing PipelineParam can be properly serialized'''
        def consume_list(list_param: list) -> int:
            pass

        import kfp
        task_factory = create_component_from_func(consume_list)
        task = task_factory([1, 2, 3, kfp.dsl.PipelineParam('aaa'), 4, 5, 6])

        full_command_line = task.command + task.arguments
        for arg in full_command_line:
            self.assertNotIn('PipelineParam', arg)
Example #8
0
    def test_compile_pipeline_with_importer_on_inputpath_should_raise_error(
            self):

        # YAML componet authoring
        component_op = components.load_component_from_text("""
        name: compoent with misused placeholder
        inputs:
        - {name: model, type: Model}
        implementation:
          container:
            image: dummy
            args:
            - {inputPath: model}
        """)

        @dsl.pipeline(name='my-component')
        def my_pipeline(model):
            component_op(model=model)

        with self.assertRaisesRegex(
                TypeError,
                'Input "model" with type "Model" is not connected to any upstream '
                'output. However it is used with InputPathPlaceholder.'):
            compiler.Compiler().compile(pipeline_func=my_pipeline,
                                        pipeline_root='dummy',
                                        output_path='output.json')

        # Python function based component authoring
        def my_component(datasets: components.InputPath('Datasets')):
            pass

        component_op = components.create_component_from_func(my_component)

        @dsl.pipeline(name='my-component')
        def my_pipeline(datasets):
            component_op(datasets=datasets)

        with self.assertRaisesRegex(
                TypeError,
                'Input "datasets" with type "Datasets" is not connected to any upstream '
                'output. However it is used with InputPathPlaceholder.'):
            compiler.Compiler().compile(pipeline_func=my_pipeline,
                                        pipeline_root='dummy',
                                        output_path='output.json')
Example #9
0
def main(args):
    OUT_COMPONENTS_DIR = args.output_component_dir
    OUT_PIPELINE_DIR = args.output_pipeline_dir

    # Write the component file of Python function
    hello_component = cpt.create_component_from_func(
        func=hello_kubeflow,
        output_component_file=f'{OUT_COMPONENTS_DIR}/hello_kubeflow.component')

    # Read the component file
    hello_component = cpt.load_component_from_file(
        filename=f'{OUT_COMPONENTS_DIR}/hello_kubeflow.component')

    # Write a pipeline function using the Kubeflow Pipelines DSL
    @dsl.pipeline(name='Hello Kubeflow Pipeline',
                  description='A Hello Kubeflow pipeline')
    def hello_kubeflow_pipeline(name='Ivan'):
        task = hello_component(name)

    # Compile the pipeline to generate a compressed YAML definition of the pipeline
    cmp.Compiler().compile(
        pipeline_func=hello_kubeflow_pipeline,
        package_path=f'{OUT_PIPELINE_DIR}/hello_kubeflow_pipeline.zip')
Example #10
0
def compile():
    create_component_from_func(
        upload_model,
        output_component_file='./kf_hs/steps/upload_model/component.yaml',
        base_image='kineticcookie/demo-kf',
    )
    create_component_from_func(
        update_application,
        output_component_file='./kf_hs/steps/update_application/component.yaml',
        base_image='kineticcookie/demo-kf',
    )
    create_component_from_func(
        train,
        output_component_file='./kf_hs/steps/train_model/component.yaml',
        base_image='kineticcookie/demo-kf',
    )
    compiler.Compiler().compile(pipeline, "./output/pipeline.tar.gz")
    print("Done! Compiled to ./output/pipeline.tar.gz")
Example #11
0
def main(args):
    OUT_COMPONENTS_DIR = args.output_component_dir
    OUT_PIPELINE_DIR = args.output_pipeline_dir

    # Because we have a non standard library, create a container
    tokenizer_component = cpt.func_to_container_op(
        tokenizer,
        packages_to_install=['nltk==3.5'],
        output_component_file=f'{OUT_COMPONENTS_DIR}/tokenizer.component')
    count_tokens_component = cpt.create_component_from_func(
        count_tokens,
        output_component_file=f'{OUT_COMPONENTS_DIR}/count_tokens.component')

    @dsl.pipeline(name='Count Kubeflow Pipeline',
                  description='Count Number of tokens in a sentence')
    def count_kubeflow_pipeline(sentence='Ciao Kubeflow, come stai oggi?'):
        tokenizer_task = tokenizer_component(sentence=sentence)
        count_tokens_task = count_tokens_component(
            tokens=tokenizer_task.output)

    complier = cmp.Compiler()
    complier.compile(
        pipeline_func=count_kubeflow_pipeline,
        package_path=f'{OUT_PIPELINE_DIR}/count_kubeflow_pipeline.zip')
Example #12
0
    if label_column is not None:
        df = df.drop(columns=[df.columns[label_column]])

    testing_data = xgboost.DMatrix(data=df, )

    model = xgboost.Booster(model_file=model_path)

    predictions = model.predict(testing_data)

    Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)
    numpy.savetxt(predictions_path, predictions)


if __name__ == '__main__':
    create_component_from_func(
        xgboost_predict,
        output_component_file='component.yaml',
        base_image='python:3.7',
        packages_to_install=[
            'xgboost==1.1.1',
            'pandas==1.0.5',
        ],
        annotations={
            "author":
            "Alexey Volkov <*****@*****.**>",
            "canonical_location":
            "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Predict/component.yaml",
        },
    )
Example #13
0
from kfp.v2 import compiler


def flip_coin() -> str:
    """Flip a coin and output heads or tails randomly."""
    import random
    result = 'heads' if random.randint(0, 1) == 0 else 'tails'
    return result


def print_msg(msg: str):
    """Print a message."""
    print(msg)


flip_coin_op = components.create_component_from_func(flip_coin)

print_op = components.create_component_from_func(print_msg)


@dsl.pipeline(name='nested-conditions-pipeline')
def my_pipeline():
    flip1 = flip_coin_op()
    print_op(flip1.output)
    flip2 = flip_coin_op()
    print_op(flip2.output)

    with dsl.Condition(flip1.output != 'no-such-result'):  # always true
        flip3 = flip_coin_op()
        print_op(flip3.output)
    df = df.drop([0], axis = 0)
    weather_df = dfreplace(df, ',', '')
     
    for i in weather_df.columns:
        weather_df[i] = weather_df[i].astype(str)
        weather_df[i][weather_df[i].apply(lambda i: True if re.search('^\s*$', str(i)) else False)]=np.NaN
    print(weather_df.columns)
    print(weather_df)
    print('trying to write to GS')
    weather_df.to_parquet(raw_data_path, compression='GZIP')
    print('Done!')
    return raw_data_path

# %%
# create a KFP component
download_raw_data_op = comp.create_component_from_func(
    download_raw_data, output_component_file='download_raw_data.yaml', packages_to_install=['fastparquet', 'fsspec', 'gcsfs', "google-cloud-storage"])

# %%
"""
#### Component - Feature processing
"""

# %%
def feature_processing(raw_data_path: str, new_feature_data_path: str) -> str:
    '''calculate features for our machine learning model'''
    import pandas as pd
    from datetime import datetime

    # read dataframe
    weather_df = pd.read_parquet(raw_data_path)
    
Example #15
0
        predictions_path: Output path for the predictions.
        label_column: Column containing the label data.

    Annotations:
        author: Alexey Volkov <*****@*****.**>
    '''
    from pathlib import Path

    import numpy
    import xgboost

    csv_data_spec = data_path + '?format=csv'
    # Only specifying the column if it's passed.
    if label_column is not None:
        csv_data_spec += '&label_column=' + str(label_column)
    testing_data = xgboost.DMatrix(csv_data_spec)

    model = xgboost.Booster(model_file=model_path)

    predictions = model.predict(testing_data)

    Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)
    numpy.savetxt(predictions_path, predictions)


if __name__ == '__main__':
    create_component_from_func(xgboost_predict,
                               output_component_file='component.yaml',
                               base_image='python:3.7',
                               packages_to_install=['xgboost==1.0.2'])
Example #16
0
        data_path: InputPath('ApacheArrowFeather'),
        output_data_path: OutputPath('ApacheParquet'),
):
    '''Converts Apache Arrow Feather to Apache Parquet.

    [Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)
    [Apache Parquet](https://parquet.apache.org/)

    Annotations:
        author: Alexey Volkov <*****@*****.**>
    '''
    from pyarrow import feather, parquet

    table = feather.read_table(data_path)
    parquet.write_table(table, output_data_path)


if __name__ == '__main__':
    create_component_from_func(
        convert_apache_arrow_feather_to_apache_parquet,
        output_component_file='component.yaml',
        base_image='python:3.7',
        packages_to_install=['pyarrow==0.17.1'],
        annotations={
            "author":
            "Alexey Volkov <*****@*****.**>",
            "canonical_location":
            "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/from_ApacheArrowFeather/component.yaml",
        },
    )
Example #17
0
        'metrics': metric_specs,
    }
    study = {'study_config': study_config}

    create_study_request = ml_api.projects().locations().studies().create(
        parent=f'projects/{gcp_project_id}/locations/{gcp_region}',
        studyId=study_id,
        body=study,
    )
    create_study_response = create_study_request.execute()
    study_name = create_study_response['name']
    return (study_name, )


if __name__ == '__main__':
    create_study_in_gcp_ai_platform_optimizer_op = create_component_from_func(
        create_study_in_gcp_ai_platform_optimizer,
        base_image='python:3.8',
        packages_to_install=[
            'google-api-python-client==1.12.3', 'google-cloud-storage==1.31.2',
            'google-auth==1.21.3'
        ],
        output_component_file='component.yaml',
        annotations={
            "author":
            "Alexey Volkov <*****@*****.**>",
            "canonical_location":
            "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/google-cloud/Optimizer/Create_study/component.yaml",
        },
    )
Example #18
0
    table_specs = [s for s in list_table_specs_response]
    print('table_specs=')
    print(table_specs)
    table_spec_name = table_specs[table_index].name

    list_column_specs_response = client.list_column_specs(table_spec_name)
    column_specs = [s for s in list_column_specs_response]
    print('column_specs=')
    print(column_specs)

    target_column_spec = [s for s in column_specs if s.display_name == target_column_name][0]
    feature_column_specs = [s for s in column_specs if s.display_name != target_column_name]
    feature_column_names = [s.name for s in feature_column_specs]

    import json
    return (target_column_spec.name, json.dumps(feature_column_names))


if __name__ == '__main__':
    from kfp.components import create_component_from_func

    automl_split_dataset_table_column_names_op = create_component_from_func(
        automl_split_dataset_table_column_names,
        output_component_file='component.yaml',
        base_image='python:3.7',
        annotations={
            "author": "Alexey Volkov <*****@*****.**>",
            "canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/gcp/automl/split_dataset_table_column_names/component.yaml",
        },
    )
Example #19
0
    folds = list(splitter.split(df))

    fold_paths = [
        (train_1_path, test_1_path),
        (train_2_path, test_2_path),
        (train_3_path, test_3_path),
        (train_4_path, test_4_path),
        (train_5_path, test_5_path),
    ]

    for i in range(max_number_of_folds):
        (train_path, test_path) = fold_paths[i]
        if i < len(folds):
            (train_indices, test_indices) = folds[i]
            train_fold = df.iloc[train_indices]
            test_fold = df.iloc[test_indices]
        else:
            train_fold = df.iloc[0:0]
            test_fold = df.iloc[0:0]
        train_fold.to_csv(train_path, index=False)
        test_fold.to_csv(test_path, index=False)


if __name__ == '__main__':
    split_table_into_folds_op = create_component_from_func(
        split_table_into_folds,
        base_image='python:3.7',
        packages_to_install=['scikit-learn==0.23.1', 'pandas==1.0.5'],
        output_component_file='component.yaml',
    )
Example #20
0
    model.compile(
        loss=loss_name,
        optimizer=optimizer,
        metrics=metrics,
    )

    history = model.fit(
        x_train,
        y_train_one_hot,
        batch_size=batch_size,
        epochs=num_epochs,
        shuffle=True
    )

    model.save(model_path)

    metrics_history = {name: [float(value) for value in values] for name, values in history.history.items()}
    final_metrics = {name: values[-1] for name, values in metrics_history.items()}
    final_loss = final_metrics['loss']
    return (final_loss, final_metrics, metrics_history)


if __name__ == '__main__':
    keras_train_classifier_from_csv_op = create_component_from_func(
        keras_train_classifier_from_csv,
        base_image='tensorflow/tensorflow:2.2.0',
        packages_to_install=['keras==2.3.1', 'pandas==1.0.5'],
        output_component_file='component.yaml',
    )
Example #21
0
        number_of_items=number_of_items,
        max_absolute_error=max_absolute_error,
        mean_absolute_error=mean_absolute_error,
        mean_squared_error=mean_squared_error,
        root_mean_squared_error=root_mean_squared_error,
    )

    return (
        number_of_items,
        max_absolute_error,
        mean_absolute_error,
        mean_squared_error,
        root_mean_squared_error,
        metrics,
    )


if __name__ == '__main__':
    calculate_regression_metrics_from_csv_op = create_component_from_func(
        calculate_regression_metrics_from_csv,
        output_component_file='component.yaml',
        base_image='python:3.7',
        packages_to_install=['numpy==1.19.0'],
        annotations={
            "author":
            "Alexey Volkov <*****@*****.**>",
            "canonical_location":
            "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml",
        },
    )
Example #22
0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Fail pipeline."""

from kfp import components, dsl


def fail():
    '''Fails'''
    import sys
    sys.exit(1)


fail_op = components.create_component_from_func(fail,
                                                base_image='alpine:latest')


@dsl.pipeline(name='fail_pipeline')
def fail_pipeline():
    fail_task = fail_op()
Example #23
0
    if label_column:
        column_descriptions = {label_column: 'Label'}
        column_description_path = tempfile.NamedTemporaryFile(
            delete=False).name
        with open(column_description_path, 'w') as column_description_file:
            for idx, kind in column_descriptions.items():
                column_description_file.write('{}\t{}\n'.format(idx, kind))
    else:
        column_description_path = None

    eval_data = Pool(
        data_path,
        column_description=column_description_path,
        has_header=True,
        delimiter=',',
    )

    model = CatBoost()
    model.load_model(model_path)

    predictions = model.predict(eval_data, prediction_type='Probability')
    numpy.savetxt(predictions_path, predictions)


if __name__ == '__main__':
    catboost_predict_class_probabilities_op = create_component_from_func(
        catboost_predict_class_probabilities,
        output_component_file='component.yaml',
        base_image='python:3.7',
        packages_to_install=['catboost==0.23'])
Example #24
0
from kfp.components import create_component_from_func


def build_list(
    item_1: dict = None,
    item_2: dict = None,
    item_3: dict = None,
    item_4: dict = None,
    item_5: dict = None,
) -> list:
    """Creates a JSON array from multiple items.

    Annotations:
        author: Alexey Volkov <*****@*****.**>
    """
    result = []
    for item in [item_1, item_2, item_3, item_4, item_5]:
        if item is not None:
            result.append(item)
    return result


if __name__ == '__main__':
    build_list_op = create_component_from_func(
        build_list,
        base_image='python:3.8',
        output_component_file='component.yaml',
    )
Example #25
0
    input_config = {
        'bigquery_source': {
            'input_uri': input_uri,
        },
    }
    response = client.import_data(
        dataset_path,
        input_config,
        retry or google.api_core.gapic_v1.method.DEFAULT,
        timeout or google.api_core.gapic_v1.method.DEFAULT,
        metadata,
    )
    result = response.result()
    print(result)
    metadata = response.metadata
    print(metadata)
    return (dataset_path)


if __name__ == '__main__':
    from kfp.components import create_component_from_func
    automl_import_data_from_bigquery_op = create_component_from_func(
        automl_import_data_from_bigquery,
        output_component_file='component.yaml',
        base_image='python:3.7',
        annotations={
            "author": "Alexey Volkov <*****@*****.**>",
            "canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/gcp/automl/import_data_from_bigquery/component.yaml",
        },
    )
Example #26
0
        should_stop_trial = True
        complete_response = trials_api.complete(
            name=fix_resource_name(trial_name), ).execute()
        return (trial_name, complete_response, should_stop_trial)
    else:
        check_early_stopping_response = trials_api.checkEarlyStoppingState(
            name=fix_resource_name(trial_name), ).execute()
        operation_name = check_early_stopping_response['name']
        while True:
            get_operation_response = operations_api.get(
                name=fix_resource_name(operation_name), ).execute()
            if get_operation_response.get('done'):
                break
            logging.info('Not finished yet: ' + str(get_operation_response))
            time.sleep(10)
        operation_response = get_operation_response['response']
        should_stop_trial = operation_response['shouldStop']
        return (trial_name, add_measurement_response, should_stop_trial)


if __name__ == '__main__':
    add_measurement_for_trial_in_gcp_ai_platform_optimizer_op = create_component_from_func(
        add_measurement_for_trial_in_gcp_ai_platform_optimizer,
        base_image='python:3.8',
        packages_to_install=[
            'google-api-python-client==1.12.3', 'google-cloud-storage==1.31.2',
            'google-auth==1.21.3'
        ],
        output_component_file='component.yaml',
    )
Example #27
0
from kfp.components import create_component_from_func, InputPath, OutputPath


def convert_to_tensorflow_saved_model_from_onnx_model(
    model_path: InputPath('OnnxModel'),
    converted_model_path: OutputPath('TensorflowSavedModel'),
):
    import onnx
    import onnx_tf

    onnx_model = onnx.load(model_path)
    tf_rep = onnx_tf.backend.prepare(onnx_model)
    tf_rep.export_graph(converted_model_path)

    
if __name__ == '__main__':
    convert_to_tensorflow_saved_model_from_onnx_model_op = create_component_from_func(
        convert_to_tensorflow_saved_model_from_onnx_model,
        output_component_file='component.yaml',
        base_image='tensorflow/tensorflow:2.4.1',
        packages_to_install=['onnx-tf==1.7.0', 'onnx==1.8.0'],  # onnx-tf==1.7.0 is not compatible with onnx==1.8.1
        annotations={
            "author": "Alexey Volkov <*****@*****.**>",
            "canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/OnnxModel/to_TensorflowSavedModel/component.yaml",
        },
    )
Example #28
0
            model_format=model_format,
            gcs_destination=automl.GcsDestination(
                output_uri_prefix=gcs_output_uri_prefix, ),
        ),
    )

    print('Operation started:')
    print(response.operation)
    result = response.result()
    metadata = response.metadata
    print('Operation finished:')
    print(metadata)
    return (metadata.export_model_details.output_info.gcs_output_directory, )


if __name__ == '__main__':
    automl_export_model_to_gcs_op = create_component_from_func(
        automl_export_model_to_gcs,
        output_component_file='component.yaml',
        base_image='python:3.8',
        packages_to_install=[
            'google-cloud-automl==2.0.0',
        ],
        annotations={
            "author":
            "Alexey Volkov <*****@*****.**>",
            "canonical_location":
            "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/gcp/automl/export_model_to_gcs/component.yaml",
        },
    )
Example #29
0
"""Two step v2-compatible pipeline."""

from kfp import components, dsl
from kfp.components import InputPath, OutputPath


def preprocess(uri: str, some_int: int, output_parameter_one: OutputPath(int),
               output_dataset_one: OutputPath('Dataset')):
    '''Dummy Preprocess Step.'''
    with open(output_dataset_one, 'w') as f:
        f.write('Output dataset')
    with open(output_parameter_one, 'w') as f:
        f.write("{}".format(1234))


preprocess_op = components.create_component_from_func(preprocess,
                                                      base_image='python:3.9')


@components.create_component_from_func
def train_op(dataset: InputPath('Dataset'),
             model: OutputPath('Model'),
             num_steps: int = 100):
    '''Dummy Training Step.'''

    with open(dataset, 'r') as input_file:
        input_string = input_file.read()
        with open(model, 'w') as output_file:
            for i in range(num_steps):
                output_file.write("Step {}\n{}\n=====\n".format(
                    i, input_string))
Example #30
0
def run_component(args):
    OUT_COMPONENTS_DIR = args.out_component_dir
    get_word_component = cpt.create_component_from_func(get_word,
                                                        output_component_file=f'{OUT_COMPONENTS_DIR}/get_word.component')