def notebook_pipeline():
    """A pipeline to run a Jupyter notebook with elyra-ai/kfp-notebook and Papermill."""

    from kfp_notebook.pipeline import NotebookOp

    notebook_op = NotebookOp(name="${name}",
                             notebook="${notebook}",
                             cos_endpoint="${cos_endpoint}",
                             cos_bucket="${cos_bucket}",
                             cos_directory="${cos_directory}",
                             cos_dependencies_archive="${cos_dependencies_archive}",
                             requirements_url="${requirements_url}",
                             image="${image}")

    from kubernetes.client.models import V1EnvVar

    notebook_op.container.add_env_variable(V1EnvVar(name='AWS_ACCESS_KEY_ID', value="${cos_username}"))
    notebook_op.container.add_env_variable(V1EnvVar(name='AWS_SECRET_ACCESS_KEY', value="${cos_password}"))

    from kfp import onprem

    notebook_op.container.add_env_variable(V1EnvVar(name='DATA_DIR', value="${mount_path}"))
    notebook_op.apply(onprem.mount_pvc(pvc_name='${dataset_pvc}',
                                       volume_name='${dataset_pvc}',
                                       volume_mount_path='${mount_path}'))
Ejemplo n.º 2
0
def test_construct_with_env_variables_tekton():
    notebook_op = NotebookOp(name="test",
                             pipeline_name="test-pipeline",
                             experiment_name="experiment-name",
                             notebook="test_notebook.ipynb",
                             cos_endpoint="http://testserver:32525",
                             cos_bucket="test_bucket",
                             cos_directory="test_directory",
                             cos_dependencies_archive="test_archive.tgz",
                             pipeline_envs={
                                 "ENV_VAR_ONE": "1",
                                 "ENV_VAR_TWO": "2",
                                 "ENV_VAR_THREE": "3"
                             },
                             image="test/image:dev",
                             workflow_engine="Tekton")

    confirmation_names = [
        "ENV_VAR_ONE", "ENV_VAR_TWO", "ENV_VAR_THREE", "ELYRA_RUN_NAME"
    ]
    confirmation_values = ["1", "2", "3"]
    field_path = "metadata.annotations['pipelines.kubeflow.org/run_name']"
    for env_val in notebook_op.container.env:
        assert env_val.name in confirmation_names
        confirmation_names.remove(env_val.name)
        if env_val.name == 'ELYRA_RUN_NAME':
            assert env_val.value_from.field_ref.field_path == field_path, env_val.value_from.field_ref
        else:
            assert env_val.value in confirmation_values
            confirmation_values.remove(env_val.value)

    # Verify confirmation values have been drained.
    assert len(confirmation_names) == 0
    assert len(confirmation_values) == 0
Ejemplo n.º 3
0
def test_construct_with_env_variables():
    notebook_op = NotebookOp(name="test",
                             notebook="test_notebook.ipynb",
                             cos_endpoint="http://testserver:32525",
                             cos_bucket="test_bucket",
                             cos_directory="test_directory",
                             cos_dependencies_archive="test_archive.tgz",
                             pipeline_envs={
                                 "ENV_VAR_ONE": "1",
                                 "ENV_VAR_TWO": "2",
                                 "ENV_VAR_THREE": "3"
                             },
                             image="test/image:dev")

    confirmation_names = ["ENV_VAR_ONE", "ENV_VAR_TWO", "ENV_VAR_THREE"]
    confirmation_values = ["1", "2", "3"]
    for env_val in notebook_op.container.env:
        assert env_val.name in confirmation_names
        assert env_val.value in confirmation_values
        confirmation_names.remove(env_val.name)
        confirmation_values.remove(env_val.value)

    # Verify confirmation values have been drained.
    assert len(confirmation_names) == 0
    assert len(confirmation_values) == 0
Ejemplo n.º 4
0
def notebook_op():
    return NotebookOp(name="test",
                      notebook="test_notebook.ipynb",
                      cos_endpoint="http://testserver:32525",
                      cos_bucket="test_bucket",
                      cos_directory="test_directory",
                      cos_dependencies_archive="test_archive.tgz",
                      image="test/image:dev")
Ejemplo n.º 5
0
def test_fail_without_name():
    with pytest.raises(TypeError):
        NotebookOp(notebook="test_notebook.ipynb",
                   cos_endpoint="http://testserver:32525",
                   cos_bucket="test_bucket",
                   cos_directory="test_directory",
                   cos_dependencies_archive="test_archive.tgz",
                   image="test/image:dev")
Ejemplo n.º 6
0
def test_fail_without_runtime_image():
    with pytest.raises(ValueError) as error_info:
        NotebookOp(name="test",
                   notebook="test_notebook.ipynb",
                   cos_endpoint="http://testserver:32525",
                   cos_bucket="test_bucket",
                   cos_directory="test_directory",
                   cos_dependencies_archive="test_archive.tgz")
    assert "You need to provide an image." == str(error_info.value)
Ejemplo n.º 7
0
def test_properly_set_python_script_name_when_in_subdirectory():
    notebook_op = NotebookOp(name="test",
                             notebook="foo/test.py",
                             cos_endpoint="http://testserver:32525",
                             cos_bucket="test_bucket",
                             cos_directory="test_directory",
                             cos_dependencies_archive="test_archive.tgz",
                             image="test/image:dev")
    assert "test.py" == notebook_op.notebook_name
Ejemplo n.º 8
0
def test_override_bootstrap_url():
    notebook_op = NotebookOp(name="test",
                             bootstrap_script_url="https://test.server.com/bootscript.py",
                             notebook="test_notebook.ipynb",
                             cos_endpoint="http://testserver:32525",
                             cos_bucket="test_bucket",
                             cos_directory="test_directory",
                             cos_dependencies_archive="test_archive.tgz",
                             image="test/image:dev")
    assert notebook_op.bootstrap_script_url == "https://test.server.com/bootscript.py"
Ejemplo n.º 9
0
def test_override_requirements_url():
    notebook_op = NotebookOp(name="test",
                             requirements_url="https://test.server.com/requirements.py",
                             notebook="test_notebook.ipynb",
                             cos_endpoint="http://testserver:32525",
                             cos_bucket="test_bucket",
                             cos_directory="test_directory",
                             cos_dependencies_archive="test_archive.tgz",
                             image="test/image:dev")
    assert notebook_op.requirements_url == "https://test.server.com/requirements.py"
Ejemplo n.º 10
0
def test_fail_without_notebook():
    with pytest.raises(TypeError):
        NotebookOp(name="test",
                   pipeline_name="test-pipeline",
                   experiment_name="experiment-name",
                   cos_endpoint="http://testserver:32525",
                   cos_bucket="test_bucket",
                   cos_directory="test_directory",
                   cos_dependencies_archive="test_archive.tgz",
                   image="test/image:dev")
Ejemplo n.º 11
0
def test_fail_with_empty_string_as_notebook():
    with pytest.raises(ValueError) as error_info:
        NotebookOp(name="test",
                   notebook="",
                   cos_endpoint="http://testserver:32525",
                   cos_bucket="test_bucket",
                   cos_directory="test_directory",
                   cos_dependencies_archive="test_archive.tgz",
                   image="test/image:dev")
    assert "You need to provide a notebook." == str(error_info.value)
Ejemplo n.º 12
0
def test_construct_with_bad_pipeline_outputs():
    with pytest.raises(ValueError) as error_info:
        NotebookOp(name="test",
                   notebook="test_notebook.ipynb",
                   cos_endpoint="http://testserver:32525",
                   cos_bucket="test_bucket",
                   cos_directory="test_directory",
                   cos_dependencies_archive="test_archive.tgz",
                   pipeline_outputs=['test_output1.txt', 'test;output2.txt'],
                   image="test/image:dev")
    assert "Illegal character (;) found in filename 'test;output2.txt'." == str(error_info.value)
Ejemplo n.º 13
0
def test_user_volume_size():
    notebook_op = NotebookOp(name="test",
                             notebook="test_notebook.ipynb",
                             cos_endpoint="http://testserver:32525",
                             cos_bucket="test_bucket",
                             cos_directory="test_directory",
                             cos_dependencies_archive="test_archive.tgz",
                             image="test/image:dev",
                             emptydir_volume_size='20Gi')
    assert notebook_op.emptydir_volume_size == '20Gi'
    assert notebook_op.container_work_dir_root_path == '/mnt/'
Ejemplo n.º 14
0
def test_fail_with_empty_string_as_name():
    with pytest.raises(ValueError):
        NotebookOp(name="",
                   pipeline_name="test-pipeline",
                   experiment_name="experiment-name",
                   notebook="test_notebook.ipynb",
                   cos_endpoint="http://testserver:32525",
                   cos_bucket="test_bucket",
                   cos_directory="test_directory",
                   cos_dependencies_archive="test_archive.tgz",
                   image="test/image:dev")
Ejemplo n.º 15
0
def test_properly_set_notebook_name_when_in_subdirectory():
    notebook_op = NotebookOp(name="test",
                             pipeline_name="test-pipeline",
                             experiment_name="experiment-name",
                             notebook="foo/test_notebook.ipynb",
                             cos_endpoint="http://testserver:32525",
                             cos_bucket="test_bucket",
                             cos_directory="test_directory",
                             cos_dependencies_archive="test_archive.tgz",
                             image="test/image:dev")
    assert "test_notebook.ipynb" == notebook_op.notebook_name
Ejemplo n.º 16
0
def test_construct_with_only_pipeline_outputs():
    notebook_op = NotebookOp(name="test",
                             notebook="test_notebook.ipynb",
                             cos_endpoint="http://testserver:32525",
                             cos_bucket="test_bucket",
                             cos_directory="test_directory",
                             cos_dependencies_archive="test_archive.tgz",
                             pipeline_outputs=['test_output1.txt', 'test,output2.txt'],
                             pipeline_envs={},
                             image="test/image:dev")
    assert notebook_op.pipeline_outputs == ['test_output1.txt', 'test,output2.txt']
    assert '--outputs "test_output1.txt;test,output2.txt"' in notebook_op.container.args[0]
Ejemplo n.º 17
0
def test_user_crio_volume_creation():
    notebook_op = NotebookOp(name="test",
                             notebook="test_notebook.ipynb",
                             cos_endpoint="http://testserver:32525",
                             cos_bucket="test_bucket",
                             cos_directory="test_directory",
                             cos_dependencies_archive="test_archive.tgz",
                             image="test/image:dev",
                             emptydir_volume_size='20Gi')
    assert notebook_op.emptydir_volume_size == '20Gi'
    assert notebook_op.container_work_dir_root_path == '/opt/app-root/src/'
    assert notebook_op.container.volume_mounts.__len__() == 1
    assert notebook_op.container.env.__len__() == 1
Ejemplo n.º 18
0
def test_user_crio_volume_creation():
    notebook_op = NotebookOp(name="test",
                             pipeline_name="test-pipeline",
                             experiment_name="experiment-name",
                             notebook="test_notebook.ipynb",
                             cos_endpoint="http://testserver:32525",
                             cos_bucket="test_bucket",
                             cos_directory="test_directory",
                             cos_dependencies_archive="test_archive.tgz",
                             image="test/image:dev",
                             emptydir_volume_size='20Gi')
    assert notebook_op.emptydir_volume_size == '20Gi'
    assert notebook_op.container_work_dir_root_path == '/opt/app-root/src/'
    assert notebook_op.container.volume_mounts.__len__() == 1
    # Environment variables: PYTHONPATH, ELYRA_RUN_NAME
    assert notebook_op.container.env.__len__() == 2, notebook_op.container.env
Ejemplo n.º 19
0
def run_notebook_op(op_name):

    notebook_op = NotebookOp(name=op_name,
                             notebook='Untitled.ipynb',
                             cos_endpoint=cos_endpoint,
                             cos_bucket=cos_bucket,
                             cos_directory=cos_directory,
                             cos_dependencies_archive=cos_dependencies_archive,
                             pipeline_outputs=outputs,
                             pipeline_inputs=inputs,
                             image=image)

    notebook_op.container.add_env_variable(
        V1EnvVar(name='AWS_ACCESS_KEY_ID', value=cos_username))
    notebook_op.container.add_env_variable(
        V1EnvVar(name='AWS_SECRET_ACCESS_KEY', value=cos_password))
    notebook_op.container.set_image_pull_policy('Always')
    return notebook_op
Ejemplo n.º 20
0
def notebook_pipeline():
    """A pipeline to run a Jupyter notebook with elyra-ai/kfp-notebook and Papermill."""

    from kfp_notebook.pipeline import NotebookOp

    notebook_op = NotebookOp(name="${name}",
                             notebook="${notebook}",
                             cos_endpoint="${cos_endpoint}",
                             cos_bucket="${cos_bucket}",
                             cos_directory="${cos_directory}",
                             cos_dependencies_archive="${cos_dependencies_archive}",
                             requirements_url="${requirements_url}",
                             image="${image}")

    from kubernetes.client.models import V1EnvVar

    notebook_op.container.add_env_variable(V1EnvVar(name='AWS_ACCESS_KEY_ID', value="${cos_username}"))
    notebook_op.container.add_env_variable(V1EnvVar(name='AWS_SECRET_ACCESS_KEY', value="${cos_password}"))
    notebook_op.container.set_image_pull_policy('Always')
Ejemplo n.º 21
0
def test_construct_wiildcard_outputs():
    notebook_op = NotebookOp(
        name="test",
        pipeline_name="test-pipeline",
        experiment_name="experiment-name",
        notebook="test_notebook.ipynb",
        cos_endpoint="http://testserver:32525",
        cos_bucket="test_bucket",
        cos_directory="test_directory",
        cos_dependencies_archive="test_archive.tgz",
        pipeline_inputs=['test_input1.txt', 'test_input2.txt'],
        pipeline_outputs=['test_out*', 'foo.tar'],
        image="test/image:dev")
    assert notebook_op.pipeline_inputs == [
        'test_input1.txt', 'test_input2.txt'
    ]
    assert notebook_op.pipeline_outputs == ['test_out*', 'foo.tar']

    assert '--inputs "test_input1.txt;test_input2.txt"' in notebook_op.container.args[
        0]
    assert '--outputs "test_out*;foo.tar"' in notebook_op.container.args[0]
Ejemplo n.º 22
0
    def _cc_pipeline(self,
                     pipeline,
                     pipeline_name,
                     pipeline_version='',
                     experiment_name='',
                     cos_directory=None,
                     export=False):

        runtime_configuration = self._get_metadata_configuration(namespace=MetadataManager.NAMESPACE_RUNTIMES,
                                                                 name=pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_secret = runtime_configuration.metadata.get('cos_secret')

        if cos_directory is None:
            cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        self.log_pipeline_info(pipeline_name,
                               f"processing pipeline dependencies to: {cos_endpoint} "
                               f"bucket: {cos_bucket} folder: {cos_directory}")
        t0_all = time.time()

        emptydir_volume_size = ''
        container_runtime = bool(os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(pipeline.operations)

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(pipeline, sorted_operations)

        for operation in sorted_operations:

            operation_artifact_archive = self._get_dependency_archive_name(operation)

            self.log.debug("Creating pipeline component :\n {op} archive : {archive}".format(
                           op=operation, archive=operation_artifact_archive))

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = self._collect_envs(operation,
                                               cos_secret=cos_secret,
                                               cos_username=cos_username,
                                               cos_password=cos_password)

            # Include any envs set on the operation
            pipeline_envs.update(operation.env_vars_as_dict(logger=self.log))

            sanitized_operation_name = self._sanitize_operation_name(operation.name)

            # create pipeline operation
            notebook_ops[operation.id] = NotebookOp(name=sanitized_operation_name,
                                                    pipeline_name=pipeline_name,
                                                    experiment_name=experiment_name,
                                                    notebook=operation.filename,
                                                    cos_endpoint=cos_endpoint,
                                                    cos_bucket=cos_bucket,
                                                    cos_directory=cos_directory,
                                                    cos_dependencies_archive=operation_artifact_archive,
                                                    pipeline_version=pipeline_version,
                                                    pipeline_source=pipeline.source,
                                                    pipeline_inputs=operation.inputs,
                                                    pipeline_outputs=operation.outputs,
                                                    pipeline_envs=pipeline_envs,
                                                    emptydir_volume_size=emptydir_volume_size,
                                                    cpu_request=operation.cpu,
                                                    mem_request=operation.memory,
                                                    gpu_limit=operation.gpu,
                                                    image=operation.runtime_image,
                                                    file_outputs={
                                                        'mlpipeline-metrics':
                                                            '{}/mlpipeline-metrics.json'
                                                            .format(pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']),
                                                        'mlpipeline-ui-metadata':
                                                            '{}/mlpipeline-ui-metadata.json'
                                                            .format(pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'])
                                                    })

            if cos_secret and not export:
                notebook_ops[operation.id].apply(use_aws_secret(cos_secret))

            image_namespace = self._get_metadata_configuration(namespace=MetadataManager.NAMESPACE_RUNTIME_IMAGES)
            for image_instance in image_namespace:
                if image_instance.metadata['image_name'] == operation.runtime_image and \
                   image_instance.metadata.get('pull_policy'):
                    notebook_ops[operation.id].container.set_image_pull_policy(image_instance.metadata['pull_policy'])

            self.log_pipeline_info(pipeline_name,
                                   f"processing operation dependencies for id: {operation.id}",
                                   operation_name=operation.name)

            self._upload_dependencies_to_object_store(runtime_configuration,
                                                      cos_directory,
                                                      operation)

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all))

        return notebook_ops
Ejemplo n.º 23
0
    def _cc_pipeline(self, pipeline, pipeline_name):

        runtime_configuration = self._get_runtime_configuration(pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        self.log_pipeline_info(pipeline_name,
                               f"processing pipeline dependencies to: {cos_endpoint} "
                               f"bucket: {cos_bucket} folder: {pipeline_name}")
        t0_all = time.time()

        emptydir_volume_size = ''
        container_runtime = bool(os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.
        for operation in pipeline.operations.values():
            parent_io = []  # gathers inputs & outputs relative to parent
            for parent_operation_id in operation.parent_operations:
                parent_operation = pipeline.operations[parent_operation_id]
                if parent_operation.inputs:
                    parent_io.extend(parent_operation.inputs)
                if parent_operation.outputs:
                    parent_io.extend(parent_operation.outputs)

                if parent_io:
                    operation.inputs = parent_io

        for operation in pipeline.operations.values():

            operation_artifact_archive = self._get_dependency_archive_name(operation)

            self.log.debug("Creating pipeline component :\n {op} archive : {archive}".format(
                           op=operation, archive=operation_artifact_archive))

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = dict()
            pipeline_envs['AWS_ACCESS_KEY_ID'] = cos_username
            pipeline_envs['AWS_SECRET_ACCESS_KEY'] = cos_password
            # Convey pipeline logging enablement to operation
            pipeline_envs['ELYRA_ENABLE_PIPELINE_INFO'] = str(self.enable_pipeline_info)
            # Setting identifies a writable directory in the container image.
            # Only Unix-style path spec is supported.
            pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'] = self.WCD

            if operation.env_vars:
                for env_var in operation.env_vars:
                    # Strip any of these special characters from both key and value
                    # Splits on the first occurrence of '='
                    result = [x.strip(' \'\"') for x in env_var.split('=', 1)]
                    # Should be non empty key with a value
                    if len(result) == 2 and result[0] != '':
                        pipeline_envs[result[0]] = result[1]

            # create pipeline operation
            notebook_ops[operation.id] = NotebookOp(name=operation.name,
                                                    notebook=operation.filename,
                                                    cos_endpoint=cos_endpoint,
                                                    cos_bucket=cos_bucket,
                                                    cos_directory=cos_directory,
                                                    cos_dependencies_archive=operation_artifact_archive,
                                                    pipeline_inputs=operation.inputs,
                                                    pipeline_outputs=operation.outputs,
                                                    pipeline_envs=pipeline_envs,
                                                    emptydir_volume_size=emptydir_volume_size,
                                                    image=operation.runtime_image,
                                                    file_outputs={
                                                        'mlpipeline-metrics':
                                                            '{}/mlpipeline-metrics.json'
                                                            .format(pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']),
                                                        'mlpipeline-ui-metadata':
                                                            '{}/mlpipeline-ui-metadata.json'
                                                            .format(pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'])
                                                    })

            self.log_pipeline_info(pipeline_name,
                                   f"processing operation dependencies for id: {operation.id}",
                                   operation_name=operation.name)

            # upload operation dependencies to object storage
            try:
                t0 = time.time()
                dependency_archive_path = self._generate_dependency_archive(operation)
                self.log_pipeline_info(pipeline_name,
                                       f"generated dependency archive: {dependency_archive_path}",
                                       operation_name=operation.name,
                                       duration=(time.time() - t0))

                cos_client = CosClient(config=runtime_configuration)
                t0 = time.time()
                cos_client.upload_file_to_dir(dir=cos_directory,
                                              file_name=operation_artifact_archive,
                                              file_path=dependency_archive_path)
                self.log_pipeline_info(pipeline_name,
                                       f"uploaded dependency archive to: {cos_directory}/{operation_artifact_archive}",
                                       operation_name=operation.name,
                                       duration=(time.time() - t0))

            except FileNotFoundError as ex:
                self.log.error("Dependencies were not found building archive for operation: {}".
                               format(operation.name), exc_info=True)
                raise FileNotFoundError("Node '{}' referenced dependencies that were not found: {}".
                                        format(operation.name, ex))

            except BaseException as ex:
                self.log.error("Error uploading artifacts to object storage for operation: {}".
                               format(operation.name), exc_info=True)
                raise ex from ex

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all))

        return notebook_ops
Ejemplo n.º 24
0
    def _cc_pipeline(self,
                     pipeline,
                     pipeline_name,
                     pipeline_version='',
                     experiment_name='',
                     cos_directory=None):

        runtime_configuration = self._get_metadata_configuration(
            namespace=MetadataManager.NAMESPACE_RUNTIMES,
            name=pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        if cos_directory is None:
            cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        self.log_pipeline_info(
            pipeline_name,
            f"processing pipeline dependencies to: {cos_endpoint} "
            f"bucket: {cos_bucket} folder: {cos_directory}")
        t0_all = time.time()

        emptydir_volume_size = ''
        container_runtime = bool(
            os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(
            pipeline.operations)

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(
            pipeline, sorted_operations)

        for operation in sorted_operations:

            operation_artifact_archive = self._get_dependency_archive_name(
                operation)

            self.log.debug(
                "Creating pipeline component :\n {op} archive : {archive}".
                format(op=operation, archive=operation_artifact_archive))

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = dict()
            pipeline_envs['AWS_ACCESS_KEY_ID'] = cos_username
            pipeline_envs['AWS_SECRET_ACCESS_KEY'] = cos_password
            # Convey pipeline logging enablement to operation
            pipeline_envs['ELYRA_ENABLE_PIPELINE_INFO'] = str(
                self.enable_pipeline_info)
            # Setting identifies a writable directory in the container image.
            # Only Unix-style path spec is supported.
            pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'] = self.WCD

            if operation.env_vars:
                for env_var in operation.env_vars:
                    # Strip any of these special characters from both key and value
                    # Splits on the first occurrence of '='
                    result = [x.strip(' \'\"') for x in env_var.split('=', 1)]
                    # Should be non empty key with a value
                    if len(result) == 2 and result[0] != '':
                        pipeline_envs[result[0]] = result[1]

            sanitized_operation_name = self._sanitize_operation_name(
                operation.name)

            # create pipeline operation
            notebook_ops[operation.id] = NotebookOp(
                name=sanitized_operation_name,
                pipeline_name=pipeline_name,
                experiment_name=experiment_name,
                notebook=operation.filename,
                cos_endpoint=cos_endpoint,
                cos_bucket=cos_bucket,
                cos_directory=cos_directory,
                cos_dependencies_archive=operation_artifact_archive,
                pipeline_version=pipeline_version,
                pipeline_source=pipeline.source,
                pipeline_inputs=operation.inputs,
                pipeline_outputs=operation.outputs,
                pipeline_envs=pipeline_envs,
                emptydir_volume_size=emptydir_volume_size,
                cpu_request=operation.cpu,
                mem_request=operation.memory,
                gpu_limit=operation.gpu,
                image=operation.runtime_image,
                file_outputs={
                    'mlpipeline-metrics':
                    '{}/mlpipeline-metrics.json'.format(
                        pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']),
                    'mlpipeline-ui-metadata':
                    '{}/mlpipeline-ui-metadata.json'.format(
                        pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'])
                })

            image_namespace = self._get_metadata_configuration(
                namespace=MetadataManager.NAMESPACE_RUNTIME_IMAGES)
            for image_instance in image_namespace:
                if image_instance.metadata['image_name'] == operation.runtime_image and \
                   image_instance.metadata.get('pull_policy'):
                    notebook_ops[operation.id].container.set_image_pull_policy(
                        image_instance.metadata['pull_policy'])

            self.log_pipeline_info(
                pipeline_name,
                f"processing operation dependencies for id: {operation.id}",
                operation_name=operation.name)

            self._upload_dependencies_to_object_store(runtime_configuration,
                                                      cos_directory, operation)

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[
                    parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name,
                               "pipeline dependencies processed",
                               duration=(time.time() - t0_all))

        return notebook_ops
Ejemplo n.º 25
0
    def _cc_pipeline(self,
                     pipeline,
                     pipeline_name,
                     pipeline_version='',
                     experiment_name='',
                     cos_directory=None,
                     export=False):

        runtime_configuration = self._get_metadata_configuration(
            namespace=MetadataManager.NAMESPACE_RUNTIMES,
            name=pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_secret = runtime_configuration.metadata.get('cos_secret')

        if cos_directory is None:
            cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        self.log_pipeline_info(
            pipeline_name,
            f"processing pipeline dependencies to: {cos_endpoint} "
            f"bucket: {cos_bucket} folder: {cos_directory}")
        t0_all = time.time()

        emptydir_volume_size = ''
        container_runtime = bool(
            os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(
            pipeline.operations)

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(
            pipeline, sorted_operations)

        for operation in sorted_operations:

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = self._collect_envs(operation,
                                               cos_secret=cos_secret,
                                               cos_username=cos_username,
                                               cos_password=cos_password)

            sanitized_operation_name = self._sanitize_operation_name(
                operation.name)

            # Create pipeline operation
            # If operation is one of the "standard" set of NBs or scripts, construct custom NotebookOp
            if operation.classifier in [
                    "execute-notebook-node", "execute-python-node",
                    "execute-r-node"
            ]:

                operation_artifact_archive = self._get_dependency_archive_name(
                    operation)

                self.log.debug(
                    "Creating pipeline component :\n {op} archive : {archive}".
                    format(op=operation, archive=operation_artifact_archive))

                notebook_ops[operation.id] = NotebookOp(
                    name=sanitized_operation_name,
                    pipeline_name=pipeline_name,
                    experiment_name=experiment_name,
                    notebook=operation.filename,
                    cos_endpoint=cos_endpoint,
                    cos_bucket=cos_bucket,
                    cos_directory=cos_directory,
                    cos_dependencies_archive=operation_artifact_archive,
                    pipeline_version=pipeline_version,
                    pipeline_source=pipeline.source,
                    pipeline_inputs=operation.inputs,
                    pipeline_outputs=operation.outputs,
                    pipeline_envs=pipeline_envs,
                    emptydir_volume_size=emptydir_volume_size,
                    cpu_request=operation.cpu,
                    mem_request=operation.memory,
                    gpu_limit=operation.gpu,
                    image=operation.runtime_image,
                    file_outputs={
                        'mlpipeline-metrics':
                        '{}/mlpipeline-metrics.json'.format(
                            pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']),
                        'mlpipeline-ui-metadata':
                        '{}/mlpipeline-ui-metadata.json'.format(
                            pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'])
                    })

                # TODO Can we move all of this to apply to non-standard components as well? Test when servers are up
                if cos_secret and not export:
                    notebook_ops[operation.id].apply(
                        use_aws_secret(cos_secret))

                image_namespace = self._get_metadata_configuration(
                    namespace=MetadataManager.NAMESPACE_RUNTIME_IMAGES)
                for image_instance in image_namespace:
                    if image_instance.metadata['image_name'] == operation.runtime_image and \
                            image_instance.metadata.get('pull_policy'):
                        notebook_ops[operation.id].container. \
                            set_image_pull_policy(image_instance.metadata['pull_policy'])

                self.log_pipeline_info(
                    pipeline_name,
                    f"processing operation dependencies for id: {operation.id}",
                    operation_name=operation.name)

                self._upload_dependencies_to_object_store(
                    runtime_configuration, cos_directory, operation)

            # If operation is a "non-standard" component, load it's spec and create operation with factory function
            else:
                component_source = {}
                component_source[
                    operation.
                    component_source_type] = operation.component_source

                # Build component task factory
                try:
                    factory_function = components.load_component(
                        **component_source)
                except Exception:
                    # TODO Fix error messaging and break exceptions down into categories
                    self.log.error(
                        f"There was an error while loading component spec for {operation.name}."
                    )
                    raise RuntimeError(
                        f"There was an error while loading component spec for {operation.name}."
                    )

                # Add factory function, which returns a ContainerOp task instance, to pipeline operation dict
                try:
                    notebook_ops[operation.id] = factory_function(
                        **operation.component_params)
                except Exception:
                    # TODO Fix error messaging and break exceptions down into categories
                    self.log.error(
                        f"There was an error while constructing component {operation.name}."
                    )
                    raise RuntimeError(
                        f"There was an error while constructing component {operation.name}."
                    )

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[
                    parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name,
                               "pipeline dependencies processed",
                               duration=(time.time() - t0_all))

        return notebook_ops
Ejemplo n.º 26
0
def test_normalize_label_value():
    valid_middle_chars = '-_.'

    # test min length
    assert NotebookOp._normalize_label_value(None) == ''
    assert NotebookOp._normalize_label_value('') == ''
    # test max length (63)
    assert NotebookOp._normalize_label_value('a' * 63) ==\
        'a' * 63
    assert NotebookOp._normalize_label_value('a' * 64) ==\
        'a' * 63  # truncated
    # test first and last char
    assert NotebookOp._normalize_label_value('1') == '1'
    assert NotebookOp._normalize_label_value('22') == '22'
    assert NotebookOp._normalize_label_value('3_3') == '3_3'
    assert NotebookOp._normalize_label_value('4u4') == '4u4'
    assert NotebookOp._normalize_label_value('5$5') == '5_5'

    # test first char
    for c in string.printable:
        if c in string.ascii_letters + string.digits:
            # first char is valid
            # no length violation
            assert NotebookOp._normalize_label_value(c) == c
            assert NotebookOp._normalize_label_value(c + 'B') == c + 'B'
            # max length
            assert NotebookOp._normalize_label_value(c + 'B' * 62) ==\
                (c + 'B' * 62)
            # max length exceeded
            assert NotebookOp._normalize_label_value(c + 'B' * 63) ==\
                (c + 'B' * 62)  # truncated
        else:
            # first char is invalid, e.g. '#a', and becomes the
            # second char, which might require replacement
            rv = c
            if c not in valid_middle_chars:
                rv = '_'
            # no length violation
            assert NotebookOp._normalize_label_value(c) == 'a' + rv + 'a'
            assert NotebookOp._normalize_label_value(c + 'B') == 'a' + rv + 'B'
            # max length
            assert NotebookOp._normalize_label_value(c + 'B' * 62) ==\
                ('a' + rv + 'B' * 61)  # truncated
            # max length exceeded
            assert NotebookOp._normalize_label_value(c + 'B' * 63) ==\
                ('a' + rv + 'B' * 61)  # truncated

    # test last char
    for c in string.printable:
        if c in string.ascii_letters + string.digits:
            # no length violation
            assert NotebookOp._normalize_label_value('b' + c) == 'b' + c
            # max length
            assert NotebookOp._normalize_label_value('b' * 62 + c) ==\
                ('b' * 62 + c)
            # max length exceeded
            assert NotebookOp._normalize_label_value('b' * 63 + c) ==\
                ('b' * 63)
        else:
            # last char is invalid, e.g. 'a#', and requires
            # patching
            rv = c
            if c not in valid_middle_chars:
                rv = '_'
            # no length violation (char is appended)
            assert NotebookOp._normalize_label_value('b' + c) == 'b' + rv + 'a'
            # max length (char is replaced)
            assert NotebookOp._normalize_label_value('b' * 62 + c) ==\
                ('b' * 62 + 'a')
            # max length exceeded (no action required)
            assert NotebookOp._normalize_label_value('b' * 63 + c) ==\
                ('b' * 63)

    # test first and last char
    for c in string.printable:
        if c in string.ascii_letters + string.digits:
            # no length violation
            assert NotebookOp._normalize_label_value(c + 'b' + c) ==\
                c + 'b' + c  # nothing is modified
            # max length
            assert NotebookOp._normalize_label_value(c + 'b' * 61 + c) ==\
                (c + 'b' * 61 + c)  # nothing is modified
            # max length exceeded
            assert NotebookOp._normalize_label_value(c + 'b' * 62 + c) ==\
                c + 'b' * 62  # truncate only
        else:
            # first and last characters are invalid, e.g. '#a#'
            rv = c
            if c not in valid_middle_chars:
                rv = '_'
            # no length violation
            assert NotebookOp._normalize_label_value(c + 'b' + c) ==\
                'a' + rv + 'b' + rv + 'a'
            # max length
            assert NotebookOp._normalize_label_value(c + 'b' * 59 + c) ==\
                ('a' + rv + 'b' * 59 + rv + 'a')
            # max length exceeded after processing, scenario 1
            # resolved by adding char before first, replace last
            assert NotebookOp._normalize_label_value(c + 'b' * 60 + c) ==\
                ('a' + rv + 'b' * 60 + 'a')
            # max length exceeded after processing, scenario 2
            # resolved by adding char before first, appending after last
            assert NotebookOp._normalize_label_value(c + 'b' * 59 + c) ==\
                ('a' + rv + 'b' * 59 + rv + 'a')
            # max length exceeded before processing, scenario 1
            # resolved by adding char before first, truncating last
            assert NotebookOp._normalize_label_value(c + 'b' * 62 + c) ==\
                ('a' + rv + 'b' * 61)
            # max length exceeded before processing, scenario 2
            # resolved by adding char before first, replacing last
            assert NotebookOp._normalize_label_value(c + 'b' * 60 + c * 3) ==\
                ('a' + rv + 'b' * 60 + 'a')

    # test char in a position other than first and last
    # if invalid, the char is replaced with '_'
    for c in string.printable:
        if c in string.ascii_letters + string.digits + '-_.':
            assert NotebookOp._normalize_label_value('A' + c + 'Z') ==\
                'A' + c + 'Z'
        else:
            assert NotebookOp._normalize_label_value('A' + c + 'Z') == 'A_Z'

    # encore
    assert NotebookOp._normalize_label_value(r'¯\_(ツ)_/¯') == 'a_________a'
Ejemplo n.º 27
0
    def _cc_pipeline(self, pipeline, pipeline_name):

        runtime_configuration = self._get_runtime_configuration(
            pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        emptydir_volume_size = ''
        container_runtime = bool(
            os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.
        for operation in pipeline.operations.values():
            parent_io = []  # gathers inputs & outputs relative to parent
            for parent_operation_id in operation.parent_operations:
                parent_operation = pipeline.operations[parent_operation_id]
                if parent_operation.inputs:
                    parent_io.extend(parent_operation.inputs)
                if parent_operation.outputs:
                    parent_io.extend(parent_operation.outputs)

                if parent_io:
                    operation.inputs = parent_io

        for operation in pipeline.operations.values():

            operation_artifact_archive = self._get_dependency_archive_name(
                operation)

            self.log.debug(
                "Creating pipeline component :\n {op} archive : {archive}".
                format(op=operation, archive=operation_artifact_archive))

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = dict()
            pipeline_envs['AWS_ACCESS_KEY_ID'] = cos_username
            pipeline_envs['AWS_SECRET_ACCESS_KEY'] = cos_password

            if operation.env_vars:
                for env_var in operation.env_vars:
                    # Strip any of these special characters from both key and value
                    # Splits on the first occurrence of '='
                    result = [x.strip(' \'\"') for x in env_var.split('=', 1)]
                    # Should be non empty key with a value
                    if len(result) == 2 and result[0] != '':
                        pipeline_envs[result[0]] = result[1]

            # create pipeline operation
            notebook_ops[operation.id] = NotebookOp(
                name=operation.name,
                notebook=operation.filename,
                cos_endpoint=cos_endpoint,
                cos_bucket=cos_bucket,
                cos_directory=cos_directory,
                cos_dependencies_archive=operation_artifact_archive,
                pipeline_inputs=operation.inputs,
                pipeline_outputs=operation.outputs,
                pipeline_envs=pipeline_envs,
                emptydir_volume_size=emptydir_volume_size,
                image=operation.runtime_image)

            self.log.info("NotebookOp Created for Component '%s' (%s)",
                          operation.name, operation.id)

            # upload operation dependencies to object storage
            try:
                t0 = time.time()
                dependency_archive_path = self._generate_dependency_archive(
                    operation)
                t1 = time.time()
                self.log.debug(
                    "Generation of dependency archive for operation '{name}' took {duration:.3f} secs."
                    .format(name=operation.name, duration=(t1 - t0)))

                cos_client = CosClient(config=runtime_configuration)
                t0 = time.time()
                cos_client.upload_file_to_dir(
                    dir=cos_directory,
                    file_name=operation_artifact_archive,
                    file_path=dependency_archive_path)
                t1 = time.time()
                self.log.debug(
                    "Upload of dependency archive for operation '{name}' took {duration:.3f} secs."
                    .format(name=operation.name, duration=(t1 - t0)))

            except FileNotFoundError as ex:
                self.log.error(
                    "Dependencies were not found building archive for operation: {}"
                    .format(operation.name),
                    exc_info=True)
                raise FileNotFoundError(
                    "Node '{}' referenced dependencies that were not found: {}"
                    .format(operation.name, ex))

            except BaseException as ex:
                self.log.error(
                    "Error uploading artifacts to object storage for operation: {}"
                    .format(operation.name),
                    exc_info=True)
                raise ex from ex

            self.log.info(
                "Pipeline dependencies have been uploaded to object storage")

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[
                    parent_operation_id]  # Parent Operation
                op.after(parent_op)

        return notebook_ops
Ejemplo n.º 28
0
def test_construct_with_env_variables_argo():
    notebook_op = NotebookOp(name="test",
                             pipeline_name="test-pipeline",
                             experiment_name="experiment-name",
                             notebook="test_notebook.ipynb",
                             cos_endpoint="http://testserver:32525",
                             cos_bucket="test_bucket",
                             cos_directory="test_directory",
                             cos_dependencies_archive="test_archive.tgz",
                             pipeline_envs={
                                 "ENV_VAR_ONE": "1",
                                 "ENV_VAR_TWO": "2",
                                 "ENV_VAR_THREE": "3"
                             },
                             image="test/image:dev")

    confirmation_names = [
        "ENV_VAR_ONE", "ENV_VAR_TWO", "ENV_VAR_THREE", "ELYRA_RUN_NAME"
    ]
    confirmation_values = [
        "1", "2", "3",
        "{{workflow.annotations.pipelines.kubeflow.org/run_name}}"
    ]
    for env_val in notebook_op.container.env:
        assert env_val.name in confirmation_names
        assert env_val.value in confirmation_values
        confirmation_names.remove(env_val.name)
        confirmation_values.remove(env_val.value)

    # Verify confirmation values have been drained.
    assert len(confirmation_names) == 0
    assert len(confirmation_values) == 0

    # same as before but explicitly specify the workflow engine type
    # as Argo
    notebook_op = NotebookOp(name="test",
                             pipeline_name="test-pipeline",
                             experiment_name="experiment-name",
                             notebook="test_notebook.ipynb",
                             cos_endpoint="http://testserver:32525",
                             cos_bucket="test_bucket",
                             cos_directory="test_directory",
                             cos_dependencies_archive="test_archive.tgz",
                             pipeline_envs={
                                 "ENV_VAR_ONE": "1",
                                 "ENV_VAR_TWO": "2",
                                 "ENV_VAR_THREE": "3"
                             },
                             image="test/image:dev",
                             workflow_engine="Argo")

    confirmation_names = [
        "ENV_VAR_ONE", "ENV_VAR_TWO", "ENV_VAR_THREE", "ELYRA_RUN_NAME"
    ]
    confirmation_values = [
        "1", "2", "3",
        "{{workflow.annotations.pipelines.kubeflow.org/run_name}}"
    ]
    for env_val in notebook_op.container.env:
        assert env_val.name in confirmation_names
        assert env_val.value in confirmation_values
        confirmation_names.remove(env_val.name)
        confirmation_values.remove(env_val.value)

    # Verify confirmation values have been drained.
    assert len(confirmation_names) == 0
    assert len(confirmation_values) == 0
Ejemplo n.º 29
0
    def _cc_pipeline(self, pipeline, pipeline_name):

        runtime_configuration = self._get_runtime_configuration(pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.
        for pipeline_operation in pipeline.operations.values():
            parent_inputs_and_outputs = []
            for parent_operation_id in pipeline_operation.parent_operations:
                parent_operation = pipeline.operations[parent_operation_id]
                if parent_operation.inputs:
                    parent_inputs_and_outputs.extend(parent_operation.inputs)
                if parent_operation.outputs:
                    parent_inputs_and_outputs.extend(parent_operation.outputs)

                if parent_inputs_and_outputs:
                    pipeline_operation.inputs = parent_inputs_and_outputs

        for operation in pipeline.operations.values():
            operation_artifact_archive = self._get_dependency_archive_name(operation)

            self.log.debug("Creating pipeline component :\n "
                           "componentID : %s \n "
                           "name : %s \n "
                           "parent_operations : %s \n "
                           "dependencies : %s \n "
                           "dependencies include subdirectories : %s \n "
                           "filename : %s \n "
                           "archive : %s \n "
                           "inputs : %s \n "
                           "outputs : %s \n "
                           "runtime image : %s \n ",
                           operation.id,
                           operation.name,
                           operation.parent_operations,
                           operation.dependencies,
                           operation.include_subdirectories,
                           operation.filename,
                           operation_artifact_archive,
                           operation.inputs,
                           operation.outputs,
                           operation.runtime_image)

            # create pipeline operation
            notebook_op = NotebookOp(name=operation.name,
                                     notebook=operation.filename,
                                     cos_endpoint=cos_endpoint,
                                     cos_bucket=cos_bucket,
                                     cos_directory=cos_directory,
                                     cos_dependencies_archive=operation_artifact_archive,
                                     image=operation.runtime_image)

            if operation.inputs:
                notebook_op.add_pipeline_inputs(self._artifact_list_to_str(operation.inputs))
            if operation.outputs:
                notebook_op.add_pipeline_outputs(self._artifact_list_to_str(operation.outputs))

            notebook_op.add_environment_variable('AWS_ACCESS_KEY_ID', cos_username)
            notebook_op.add_environment_variable('AWS_SECRET_ACCESS_KEY', cos_password)

            # Set ENV variables
            if operation.env_vars:
                for env_var in operation.env_vars:
                    # Strip any of these special characters from both key and value
                    # Splits on the first occurrence of '='
                    result = [x.strip(' \'\"') for x in env_var.split('=', 1)]
                    # Should be non empty key with a value
                    if len(result) == 2 and result[0] != '':
                        notebook_op.add_environment_variable(result[0], result[1])

            notebook_ops[operation.id] = notebook_op

            self.log.info("NotebookOp Created for Component '%s' (%s)", operation.name, operation.id)

            # upload operation dependencies to object storage
            try:
                dependency_archive_path = self._generate_dependency_archive(operation)
                cos_client = CosClient(config=runtime_configuration)
                cos_client.upload_file_to_dir(dir=cos_directory,
                                              file_name=operation_artifact_archive,
                                              file_path=dependency_archive_path)
            except BaseException:
                self.log.error("Error uploading artifacts to object storage.", exc_info=True)
                raise

            self.log.info("Pipeline dependencies have been uploaded to object storage")

        # Process dependencies after all the operations have been created
        for pipeline_operation in pipeline.operations.values():
            op = notebook_ops[pipeline_operation.id]
            for parent_operation_id in pipeline_operation.parent_operations:
                parent_op = notebook_ops[parent_operation_id]  # Parent Operation
                op.after(parent_op)

        return notebook_ops