Python PipelineProcessor Examples

Programming Language: Python

Namespace/Package Name: elyra.pipeline

Examples at hotexamples.com: 6

Python PipelineProcessor - 6 examples found. These are the top rated real world Python examples of elyra.pipeline.PipelineProcessor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

_sort_operations(6)

_propagate_operation_inputs_outputs(5)

Example #1

Show file

File: processor_local.py Project: yangjian615/elyra

    def process(self, pipeline):
        """
        Process a pipeline locally.
        The pipeline execution consists on properly ordering the operations
        based on it's dependency graph and than delegating the execution
        to proper executor (e.g. papermill to notebooks)
        """

        self.log_pipeline_info(pipeline.name, "processing pipeline")
        t0_all = time.time()

        # Sort operations based on dependency graph (topological order)
        operations = PipelineProcessor._sort_operations(pipeline.operations)
        for operation in operations:
            try:
                t0 = time.time()
                operation_processor = self._operation_processor_registry[
                    operation.classifier]
                operation_processor.process(operation)
                self.log_pipeline_info(pipeline.name,
                                       f"completed {operation.filename}",
                                       operation_name=operation.name,
                                       duration=(time.time() - t0))
            except Exception as ex:
                raise RuntimeError(
                    f'Error processing operation {operation.name} {str(ex)}'
                ) from ex

        self.log_pipeline_info(pipeline.name,
                               "pipeline processed",
                               duration=(time.time() - t0_all))

        return LocalPipelineProcessorResponse()

Example #2

Show file

    def _cc_pipeline(self,
                     pipeline,
                     pipeline_name,
                     pipeline_version='',
                     experiment_name='',
                     cos_directory=None,
                     export=False):

        runtime_configuration = self._get_metadata_configuration(namespace=MetadataManager.NAMESPACE_RUNTIMES,
                                                                 name=pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_secret = runtime_configuration.metadata.get('cos_secret')

        if cos_directory is None:
            cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        self.log_pipeline_info(pipeline_name,
                               f"processing pipeline dependencies to: {cos_endpoint} "
                               f"bucket: {cos_bucket} folder: {cos_directory}")
        t0_all = time.time()

        emptydir_volume_size = ''
        container_runtime = bool(os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(pipeline.operations)

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(pipeline, sorted_operations)

        for operation in sorted_operations:

            operation_artifact_archive = self._get_dependency_archive_name(operation)

            self.log.debug("Creating pipeline component :\n {op} archive : {archive}".format(
                           op=operation, archive=operation_artifact_archive))

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = self._collect_envs(operation,
                                               cos_secret=cos_secret,
                                               cos_username=cos_username,
                                               cos_password=cos_password)

            # Include any envs set on the operation
            pipeline_envs.update(operation.env_vars_as_dict(logger=self.log))

            sanitized_operation_name = self._sanitize_operation_name(operation.name)

            # create pipeline operation
            notebook_ops[operation.id] = NotebookOp(name=sanitized_operation_name,
                                                    pipeline_name=pipeline_name,
                                                    experiment_name=experiment_name,
                                                    notebook=operation.filename,
                                                    cos_endpoint=cos_endpoint,
                                                    cos_bucket=cos_bucket,
                                                    cos_directory=cos_directory,
                                                    cos_dependencies_archive=operation_artifact_archive,
                                                    pipeline_version=pipeline_version,
                                                    pipeline_source=pipeline.source,
                                                    pipeline_inputs=operation.inputs,
                                                    pipeline_outputs=operation.outputs,
                                                    pipeline_envs=pipeline_envs,
                                                    emptydir_volume_size=emptydir_volume_size,
                                                    cpu_request=operation.cpu,
                                                    mem_request=operation.memory,
                                                    gpu_limit=operation.gpu,
                                                    image=operation.runtime_image,
                                                    file_outputs={
                                                        'mlpipeline-metrics':
                                                            '{}/mlpipeline-metrics.json'
                                                            .format(pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']),
                                                        'mlpipeline-ui-metadata':
                                                            '{}/mlpipeline-ui-metadata.json'
                                                            .format(pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'])
                                                    })

            if cos_secret and not export:
                notebook_ops[operation.id].apply(use_aws_secret(cos_secret))

            image_namespace = self._get_metadata_configuration(namespace=MetadataManager.NAMESPACE_RUNTIME_IMAGES)
            for image_instance in image_namespace:
                if image_instance.metadata['image_name'] == operation.runtime_image and \
                   image_instance.metadata.get('pull_policy'):
                    notebook_ops[operation.id].container.set_image_pull_policy(image_instance.metadata['pull_policy'])

            self.log_pipeline_info(pipeline_name,
                                   f"processing operation dependencies for id: {operation.id}",
                                   operation_name=operation.name)

            self._upload_dependencies_to_object_store(runtime_configuration,
                                                      cos_directory,
                                                      operation)

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all))

        return notebook_ops

Example #3

Show file

    def _cc_pipeline(self,
                     pipeline,
                     pipeline_name,
                     pipeline_version='',
                     experiment_name='',
                     cos_directory=None):

        runtime_configuration = self._get_metadata_configuration(
            namespace=MetadataManager.NAMESPACE_RUNTIMES,
            name=pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        if cos_directory is None:
            cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        self.log_pipeline_info(
            pipeline_name,
            f"processing pipeline dependencies to: {cos_endpoint} "
            f"bucket: {cos_bucket} folder: {cos_directory}")
        t0_all = time.time()

        emptydir_volume_size = ''
        container_runtime = bool(
            os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(
            pipeline.operations)

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(
            pipeline, sorted_operations)

        for operation in sorted_operations:

            operation_artifact_archive = self._get_dependency_archive_name(
                operation)

            self.log.debug(
                "Creating pipeline component :\n {op} archive : {archive}".
                format(op=operation, archive=operation_artifact_archive))

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = dict()
            pipeline_envs['AWS_ACCESS_KEY_ID'] = cos_username
            pipeline_envs['AWS_SECRET_ACCESS_KEY'] = cos_password
            # Convey pipeline logging enablement to operation
            pipeline_envs['ELYRA_ENABLE_PIPELINE_INFO'] = str(
                self.enable_pipeline_info)
            # Setting identifies a writable directory in the container image.
            # Only Unix-style path spec is supported.
            pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'] = self.WCD

            if operation.env_vars:
                for env_var in operation.env_vars:
                    # Strip any of these special characters from both key and value
                    # Splits on the first occurrence of '='
                    result = [x.strip(' \'\"') for x in env_var.split('=', 1)]
                    # Should be non empty key with a value
                    if len(result) == 2 and result[0] != '':
                        pipeline_envs[result[0]] = result[1]

            sanitized_operation_name = self._sanitize_operation_name(
                operation.name)

            # create pipeline operation
            notebook_ops[operation.id] = NotebookOp(
                name=sanitized_operation_name,
                pipeline_name=pipeline_name,
                experiment_name=experiment_name,
                notebook=operation.filename,
                cos_endpoint=cos_endpoint,
                cos_bucket=cos_bucket,
                cos_directory=cos_directory,
                cos_dependencies_archive=operation_artifact_archive,
                pipeline_version=pipeline_version,
                pipeline_source=pipeline.source,
                pipeline_inputs=operation.inputs,
                pipeline_outputs=operation.outputs,
                pipeline_envs=pipeline_envs,
                emptydir_volume_size=emptydir_volume_size,
                cpu_request=operation.cpu,
                mem_request=operation.memory,
                gpu_limit=operation.gpu,
                image=operation.runtime_image,
                file_outputs={
                    'mlpipeline-metrics':
                    '{}/mlpipeline-metrics.json'.format(
                        pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']),
                    'mlpipeline-ui-metadata':
                    '{}/mlpipeline-ui-metadata.json'.format(
                        pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'])
                })

            image_namespace = self._get_metadata_configuration(
                namespace=MetadataManager.NAMESPACE_RUNTIME_IMAGES)
            for image_instance in image_namespace:
                if image_instance.metadata['image_name'] == operation.runtime_image and \
                   image_instance.metadata.get('pull_policy'):
                    notebook_ops[operation.id].container.set_image_pull_policy(
                        image_instance.metadata['pull_policy'])

            self.log_pipeline_info(
                pipeline_name,
                f"processing operation dependencies for id: {operation.id}",
                operation_name=operation.name)

            self._upload_dependencies_to_object_store(runtime_configuration,
                                                      cos_directory, operation)

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[
                    parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name,
                               "pipeline dependencies processed",
                               duration=(time.time() - t0_all))

        return notebook_ops

Example #4

Show file

File: processor_airflow.py Project: lresende/elyra

    def _cc_pipeline(self, pipeline, pipeline_name):

        runtime_configuration = self._get_metadata_configuration(
            namespace=MetadataManager.NAMESPACE_RUNTIMES,
            name=pipeline.runtime_config)
        image_namespace = self._get_metadata_configuration(
            namespace=MetadataManager.NAMESPACE_RUNTIME_IMAGES)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_secret = runtime_configuration.metadata.get('cos_secret')
        cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = []

        self.log_pipeline_info(
            pipeline_name,
            f"processing pipeline dependencies to: {cos_endpoint} "
            f"bucket: {cos_bucket} folder: {pipeline_name}")

        t0_all = time.time()

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(
            pipeline.operations)

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(
            pipeline, sorted_operations)

        for operation in sorted_operations:
            operation_artifact_archive = self._get_dependency_archive_name(
                operation)

            self.log.debug(
                "Creating pipeline component :\n {op} archive : {archive}".
                format(op=operation, archive=operation_artifact_archive))

            # Collect env variables
            pipeline_envs = self._collect_envs(operation,
                                               cos_secret=cos_secret,
                                               cos_username=cos_username,
                                               cos_password=cos_password)

            image_pull_policy = None
            for image_instance in image_namespace:
                if image_instance.metadata['image_name'] == operation.runtime_image and \
                        image_instance.metadata.get('pull_policy'):
                    image_pull_policy = image_instance.metadata['pull_policy']

            notebook = {
                'notebook': operation.name,
                'id': operation.id,
                'filename': operation.filename,
                'runtime_image': operation.runtime_image,
                'cos_endpoint': cos_endpoint,
                'cos_bucket': cos_bucket,
                'cos_directory': cos_directory,
                'cos_dependencies_archive': operation_artifact_archive,
                'pipeline_outputs': operation.outputs,
                'pipeline_inputs': operation.inputs,
                'pipeline_envs': pipeline_envs,
                'parent_operations': operation.parent_operations,
                'image_pull_policy': image_pull_policy,
                'cpu_request': operation.cpu,
                'mem_request': operation.memory,
                'gpu_request': operation.gpu
            }

            notebook_ops.append(notebook)

            self.log_pipeline_info(
                pipeline_name,
                f"processing operation dependencies for id: {operation.id}",
                operation_name=operation.name)

            self._upload_dependencies_to_object_store(runtime_configuration,
                                                      pipeline_name, operation)

        ordered_notebook_ops = OrderedDict()

        while notebook_ops:
            for i in range(len(notebook_ops)):
                notebook = notebook_ops.pop(0)
                if not notebook['parent_operations']:
                    ordered_notebook_ops[notebook['id']] = notebook
                    self.log.debug("Root Node added : %s",
                                   ordered_notebook_ops[notebook['id']])
                elif all(deps in ordered_notebook_ops.keys()
                         for deps in notebook['parent_operations']):
                    ordered_notebook_ops[notebook['id']] = notebook
                    self.log.debug("Dependent Node added : %s",
                                   ordered_notebook_ops[notebook['id']])
                else:
                    notebook_ops.append(notebook)

        self.log_pipeline_info(pipeline_name,
                               "pipeline dependencies processed",
                               duration=(time.time() - t0_all))

        return ordered_notebook_ops

Example #5

Show file

    def _cc_pipeline(self, pipeline, pipeline_name):

        runtime_configuration = self._get_metadata_configuration(namespace=MetadataManager.NAMESPACE_RUNTIMES,
                                                                 name=pipeline.runtime_config)
        image_namespace = self._get_metadata_configuration(namespace=MetadataManager.NAMESPACE_RUNTIME_IMAGES)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = []

        self.log_pipeline_info(pipeline_name,
                               f"processing pipeline dependencies to: {cos_endpoint} "
                               f"bucket: {cos_bucket} folder: {pipeline_name}")

        t0_all = time.time()

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(pipeline.operations)

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(pipeline, sorted_operations)

        for operation in sorted_operations:
            operation_artifact_archive = self._get_dependency_archive_name(operation)

            self.log.debug("Creating pipeline component :\n {op} archive : {archive}".format(
                op=operation, archive=operation_artifact_archive))

            # Collect env variables
            pipeline_envs = dict()
            pipeline_envs['AWS_ACCESS_KEY_ID'] = cos_username
            pipeline_envs['AWS_SECRET_ACCESS_KEY'] = cos_password
            # Convey pipeline logging enablement to operation
            pipeline_envs['ELYRA_ENABLE_PIPELINE_INFO'] = str(self.enable_pipeline_info)

            # Set ENV variables in each container
            if operation.env_vars:
                for env_var in operation.env_vars:
                    # Strip any of these special characters from both key and value
                    # Splits on the first occurrence of '='
                    result = [x.strip(' \'\"') for x in env_var.split('=', 1)]
                    # Should be non empty key with a value
                    if len(result) == 2 and result[0] != '':
                        pipeline_envs[result[0]] = result[1]

            image_pull_policy = None
            for image_instance in image_namespace:
                if image_instance.metadata['image_name'] == operation.runtime_image and \
                        image_instance.metadata.get('pull_policy'):
                    image_pull_policy = image_instance.metadata['pull_policy']

            notebook = {'notebook': operation.name,
                        'id': operation.id,
                        'filename': operation.filename,
                        'runtime_image': operation.runtime_image,
                        'cos_endpoint': cos_endpoint,
                        'cos_bucket': cos_bucket,
                        'cos_directory': cos_directory,
                        'cos_dependencies_archive': operation_artifact_archive,
                        'pipeline_outputs': operation.outputs,
                        'pipeline_inputs': operation.inputs,
                        'pipeline_envs': pipeline_envs,
                        'parent_operations': operation.parent_operations,
                        'image_pull_policy': image_pull_policy,
                        'cpu_request': operation.cpu,
                        'mem_request': operation.memory,
                        'gpu_request': operation.gpu
                        }

            notebook_ops.append(notebook)

            self.log_pipeline_info(pipeline_name,
                                   f"processing operation dependencies for id: {operation.id}",
                                   operation_name=operation.name)

            self._upload_dependencies_to_object_store(runtime_configuration,
                                                      pipeline_name,
                                                      operation)

        ordered_notebook_ops = OrderedDict()

        while notebook_ops:
            for i in range(len(notebook_ops)):
                notebook = notebook_ops.pop(0)
                if not notebook['parent_operations']:
                    ordered_notebook_ops[notebook['id']] = notebook
                    self.log.debug("Root Node added : %s", ordered_notebook_ops[notebook['id']])
                elif all(deps in ordered_notebook_ops.keys() for deps in notebook['parent_operations']):
                    ordered_notebook_ops[notebook['id']] = notebook
                    self.log.debug("Dependent Node added : %s", ordered_notebook_ops[notebook['id']])
                else:
                    notebook_ops.append(notebook)

        self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all))

        return ordered_notebook_ops

Example #6

Show file

File: processor_kfp.py Project: elyra-ai/elyra

    def _cc_pipeline(self,
                     pipeline,
                     pipeline_name,
                     pipeline_version='',
                     experiment_name='',
                     cos_directory=None,
                     export=False):

        runtime_configuration = self._get_metadata_configuration(
            namespace=MetadataManager.NAMESPACE_RUNTIMES,
            name=pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_secret = runtime_configuration.metadata.get('cos_secret')

        if cos_directory is None:
            cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        self.log_pipeline_info(
            pipeline_name,
            f"processing pipeline dependencies to: {cos_endpoint} "
            f"bucket: {cos_bucket} folder: {cos_directory}")
        t0_all = time.time()

        emptydir_volume_size = ''
        container_runtime = bool(
            os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(
            pipeline.operations)

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(
            pipeline, sorted_operations)

        for operation in sorted_operations:

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = self._collect_envs(operation,
                                               cos_secret=cos_secret,
                                               cos_username=cos_username,
                                               cos_password=cos_password)

            sanitized_operation_name = self._sanitize_operation_name(
                operation.name)

            # Create pipeline operation
            # If operation is one of the "standard" set of NBs or scripts, construct custom NotebookOp
            if operation.classifier in [
                    "execute-notebook-node", "execute-python-node",
                    "execute-r-node"
            ]:

                operation_artifact_archive = self._get_dependency_archive_name(
                    operation)

                self.log.debug(
                    "Creating pipeline component :\n {op} archive : {archive}".
                    format(op=operation, archive=operation_artifact_archive))

                notebook_ops[operation.id] = NotebookOp(
                    name=sanitized_operation_name,
                    pipeline_name=pipeline_name,
                    experiment_name=experiment_name,
                    notebook=operation.filename,
                    cos_endpoint=cos_endpoint,
                    cos_bucket=cos_bucket,
                    cos_directory=cos_directory,
                    cos_dependencies_archive=operation_artifact_archive,
                    pipeline_version=pipeline_version,
                    pipeline_source=pipeline.source,
                    pipeline_inputs=operation.inputs,
                    pipeline_outputs=operation.outputs,
                    pipeline_envs=pipeline_envs,
                    emptydir_volume_size=emptydir_volume_size,
                    cpu_request=operation.cpu,
                    mem_request=operation.memory,
                    gpu_limit=operation.gpu,
                    image=operation.runtime_image,
                    file_outputs={
                        'mlpipeline-metrics':
                        '{}/mlpipeline-metrics.json'.format(
                            pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']),
                        'mlpipeline-ui-metadata':
                        '{}/mlpipeline-ui-metadata.json'.format(
                            pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'])
                    })

                # TODO Can we move all of this to apply to non-standard components as well? Test when servers are up
                if cos_secret and not export:
                    notebook_ops[operation.id].apply(
                        use_aws_secret(cos_secret))

                image_namespace = self._get_metadata_configuration(
                    namespace=MetadataManager.NAMESPACE_RUNTIME_IMAGES)
                for image_instance in image_namespace:
                    if image_instance.metadata['image_name'] == operation.runtime_image and \
                            image_instance.metadata.get('pull_policy'):
                        notebook_ops[operation.id].container. \
                            set_image_pull_policy(image_instance.metadata['pull_policy'])

                self.log_pipeline_info(
                    pipeline_name,
                    f"processing operation dependencies for id: {operation.id}",
                    operation_name=operation.name)

                self._upload_dependencies_to_object_store(
                    runtime_configuration, cos_directory, operation)

            # If operation is a "non-standard" component, load it's spec and create operation with factory function
            else:
                component_source = {}
                component_source[
                    operation.
                    component_source_type] = operation.component_source

                # Build component task factory
                try:
                    factory_function = components.load_component(
                        **component_source)
                except Exception:
                    # TODO Fix error messaging and break exceptions down into categories
                    self.log.error(
                        f"There was an error while loading component spec for {operation.name}."
                    )
                    raise RuntimeError(
                        f"There was an error while loading component spec for {operation.name}."
                    )

                # Add factory function, which returns a ContainerOp task instance, to pipeline operation dict
                try:
                    notebook_ops[operation.id] = factory_function(
                        **operation.component_params)
                except Exception:
                    # TODO Fix error messaging and break exceptions down into categories
                    self.log.error(
                        f"There was an error while constructing component {operation.name}."
                    )
                    raise RuntimeError(
                        f"There was an error while constructing component {operation.name}."
                    )

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[
                    parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name,
                               "pipeline dependencies processed",
                               duration=(time.time() - t0_all))

        return notebook_ops