예제 #1
0
파일: processor.py 프로젝트: a-ag/elyra
    def _upload_dependencies_to_object_store(self, runtime_configuration, pipeline_name, operation):
        operation_artifact_archive = self._get_dependency_archive_name(operation)
        cos_directory = pipeline_name
        # upload operation dependencies to object store
        try:
            t0 = time.time()
            dependency_archive_path = self._generate_dependency_archive(operation)
            self.log_pipeline_info(pipeline_name,
                                   f"generated dependency archive: {dependency_archive_path}",
                                   operation_name=operation.name,
                                   duration=(time.time() - t0))

            cos_client = CosClient(config=runtime_configuration)

            t0 = time.time()
            cos_client.upload_file_to_dir(dir=cos_directory,
                                          file_name=operation_artifact_archive,
                                          file_path=dependency_archive_path)
            self.log_pipeline_info(pipeline_name,
                                   f"uploaded dependency archive to: {cos_directory}/{operation_artifact_archive}",
                                   operation_name=operation.name,
                                   duration=(time.time() - t0))

        except FileNotFoundError as ex:
            self.log.error("Dependencies were not found building archive for operation: {}".
                           format(operation.name), exc_info=True)
            raise FileNotFoundError("Node '{}' referenced dependencies that were not found: {}".
                                    format(operation.name, ex))

        except BaseException as ex:
            self.log.error("Error uploading artifacts to object storage for operation: {}".
                           format(operation.name), exc_info=True)
            raise ex from ex
예제 #2
0
    def _upload_dependencies_to_object_store(self, runtime_configuration,
                                             pipeline_name, operation):
        operation_artifact_archive = self._get_dependency_archive_name(
            operation)
        cos_directory = pipeline_name
        # upload operation dependencies to object store
        try:
            t0 = time.time()
            dependency_archive_path = self._generate_dependency_archive(
                operation)
            self.log_pipeline_info(
                pipeline_name,
                f"generated dependency archive: {dependency_archive_path}",
                operation_name=operation.name,
                duration=(time.time() - t0))

            cos_client = CosClient(config=runtime_configuration)

            t0 = time.time()
            cos_client.upload_file_to_dir(dir=cos_directory,
                                          file_name=operation_artifact_archive,
                                          file_path=dependency_archive_path)
            self.log_pipeline_info(
                pipeline_name,
                f"uploaded dependency archive to: {cos_directory}/{operation_artifact_archive}",
                operation_name=operation.name,
                duration=(time.time() - t0))

        except FileNotFoundError as ex:
            self.log.error(
                "Dependencies were not found building archive for operation: {}"
                .format(operation.name),
                exc_info=True)
            raise FileNotFoundError(
                "Node '{}' referenced dependencies that were not found: {}".
                format(operation.name, ex)) from ex
        except MaxRetryError as ex:
            cos_endpoint = runtime_configuration.metadata.get('cos_endpoint')
            self.log.error(
                "Connection was refused when attempting to connect to : {}".
                format(cos_endpoint),
                exc_info=True)
            raise RuntimeError(
                "Connection was refused when attempting to upload artifacts to : '{}'. Please "
                "check your object storage settings. ".format(
                    cos_endpoint)) from ex
        except SignatureDoesNotMatch as ex:
            raise RuntimeError(
                "Connection was refused due to incorrect Object Storage credentials. "
                +
                "Please validate your runtime configuration details and retry."
            ) from ex
        except BaseException as ex:
            self.log.error(
                "Error uploading artifacts to object storage for operation: {}"
                .format(operation.name),
                exc_info=True)
            raise ex from ex
예제 #3
0
    def _cc_pipeline(self, pipeline, pipeline_name):

        runtime_configuration = self._get_runtime_configuration(pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        self.log_pipeline_info(pipeline_name,
                               f"processing pipeline dependencies to: {cos_endpoint} "
                               f"bucket: {cos_bucket} folder: {pipeline_name}")
        t0_all = time.time()

        emptydir_volume_size = ''
        container_runtime = bool(os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.
        for operation in pipeline.operations.values():
            parent_io = []  # gathers inputs & outputs relative to parent
            for parent_operation_id in operation.parent_operations:
                parent_operation = pipeline.operations[parent_operation_id]
                if parent_operation.inputs:
                    parent_io.extend(parent_operation.inputs)
                if parent_operation.outputs:
                    parent_io.extend(parent_operation.outputs)

                if parent_io:
                    operation.inputs = parent_io

        for operation in pipeline.operations.values():

            operation_artifact_archive = self._get_dependency_archive_name(operation)

            self.log.debug("Creating pipeline component :\n {op} archive : {archive}".format(
                           op=operation, archive=operation_artifact_archive))

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = dict()
            pipeline_envs['AWS_ACCESS_KEY_ID'] = cos_username
            pipeline_envs['AWS_SECRET_ACCESS_KEY'] = cos_password
            # Convey pipeline logging enablement to operation
            pipeline_envs['ELYRA_ENABLE_PIPELINE_INFO'] = str(self.enable_pipeline_info)
            # Setting identifies a writable directory in the container image.
            # Only Unix-style path spec is supported.
            pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'] = self.WCD

            if operation.env_vars:
                for env_var in operation.env_vars:
                    # Strip any of these special characters from both key and value
                    # Splits on the first occurrence of '='
                    result = [x.strip(' \'\"') for x in env_var.split('=', 1)]
                    # Should be non empty key with a value
                    if len(result) == 2 and result[0] != '':
                        pipeline_envs[result[0]] = result[1]

            # create pipeline operation
            notebook_ops[operation.id] = NotebookOp(name=operation.name,
                                                    notebook=operation.filename,
                                                    cos_endpoint=cos_endpoint,
                                                    cos_bucket=cos_bucket,
                                                    cos_directory=cos_directory,
                                                    cos_dependencies_archive=operation_artifact_archive,
                                                    pipeline_inputs=operation.inputs,
                                                    pipeline_outputs=operation.outputs,
                                                    pipeline_envs=pipeline_envs,
                                                    emptydir_volume_size=emptydir_volume_size,
                                                    image=operation.runtime_image,
                                                    file_outputs={
                                                        'mlpipeline-metrics':
                                                            '{}/mlpipeline-metrics.json'
                                                            .format(pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']),
                                                        'mlpipeline-ui-metadata':
                                                            '{}/mlpipeline-ui-metadata.json'
                                                            .format(pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'])
                                                    })

            self.log_pipeline_info(pipeline_name,
                                   f"processing operation dependencies for id: {operation.id}",
                                   operation_name=operation.name)

            # upload operation dependencies to object storage
            try:
                t0 = time.time()
                dependency_archive_path = self._generate_dependency_archive(operation)
                self.log_pipeline_info(pipeline_name,
                                       f"generated dependency archive: {dependency_archive_path}",
                                       operation_name=operation.name,
                                       duration=(time.time() - t0))

                cos_client = CosClient(config=runtime_configuration)
                t0 = time.time()
                cos_client.upload_file_to_dir(dir=cos_directory,
                                              file_name=operation_artifact_archive,
                                              file_path=dependency_archive_path)
                self.log_pipeline_info(pipeline_name,
                                       f"uploaded dependency archive to: {cos_directory}/{operation_artifact_archive}",
                                       operation_name=operation.name,
                                       duration=(time.time() - t0))

            except FileNotFoundError as ex:
                self.log.error("Dependencies were not found building archive for operation: {}".
                               format(operation.name), exc_info=True)
                raise FileNotFoundError("Node '{}' referenced dependencies that were not found: {}".
                                        format(operation.name, ex))

            except BaseException as ex:
                self.log.error("Error uploading artifacts to object storage for operation: {}".
                               format(operation.name), exc_info=True)
                raise ex from ex

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all))

        return notebook_ops
예제 #4
0
    def _cc_pipeline(self, pipeline, pipeline_name):

        runtime_configuration = self._get_runtime_configuration(pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.
        for pipeline_operation in pipeline.operations.values():
            parent_inputs_and_outputs = []
            for parent_operation_id in pipeline_operation.parent_operations:
                parent_operation = pipeline.operations[parent_operation_id]
                if parent_operation.inputs:
                    parent_inputs_and_outputs.extend(parent_operation.inputs)
                if parent_operation.outputs:
                    parent_inputs_and_outputs.extend(parent_operation.outputs)

                if parent_inputs_and_outputs:
                    pipeline_operation.inputs = parent_inputs_and_outputs

        for operation in pipeline.operations.values():
            operation_artifact_archive = self._get_dependency_archive_name(operation)

            self.log.debug("Creating pipeline component :\n "
                           "componentID : %s \n "
                           "name : %s \n "
                           "parent_operations : %s \n "
                           "dependencies : %s \n "
                           "dependencies include subdirectories : %s \n "
                           "filename : %s \n "
                           "archive : %s \n "
                           "inputs : %s \n "
                           "outputs : %s \n "
                           "runtime image : %s \n ",
                           operation.id,
                           operation.name,
                           operation.parent_operations,
                           operation.dependencies,
                           operation.include_subdirectories,
                           operation.filename,
                           operation_artifact_archive,
                           operation.inputs,
                           operation.outputs,
                           operation.runtime_image)

            # create pipeline operation
            notebook_op = NotebookOp(name=operation.name,
                                     notebook=operation.filename,
                                     cos_endpoint=cos_endpoint,
                                     cos_bucket=cos_bucket,
                                     cos_directory=cos_directory,
                                     cos_dependencies_archive=operation_artifact_archive,
                                     image=operation.runtime_image)

            if operation.inputs:
                notebook_op.add_pipeline_inputs(self._artifact_list_to_str(operation.inputs))
            if operation.outputs:
                notebook_op.add_pipeline_outputs(self._artifact_list_to_str(operation.outputs))

            notebook_op.add_environment_variable('AWS_ACCESS_KEY_ID', cos_username)
            notebook_op.add_environment_variable('AWS_SECRET_ACCESS_KEY', cos_password)

            # Set ENV variables
            if operation.env_vars:
                for env_var in operation.env_vars:
                    # Strip any of these special characters from both key and value
                    # Splits on the first occurrence of '='
                    result = [x.strip(' \'\"') for x in env_var.split('=', 1)]
                    # Should be non empty key with a value
                    if len(result) == 2 and result[0] != '':
                        notebook_op.add_environment_variable(result[0], result[1])

            notebook_ops[operation.id] = notebook_op

            self.log.info("NotebookOp Created for Component '%s' (%s)", operation.name, operation.id)

            # upload operation dependencies to object storage
            try:
                dependency_archive_path = self._generate_dependency_archive(operation)
                cos_client = CosClient(config=runtime_configuration)
                cos_client.upload_file_to_dir(dir=cos_directory,
                                              file_name=operation_artifact_archive,
                                              file_path=dependency_archive_path)
            except BaseException:
                self.log.error("Error uploading artifacts to object storage.", exc_info=True)
                raise

            self.log.info("Pipeline dependencies have been uploaded to object storage")

        # Process dependencies after all the operations have been created
        for pipeline_operation in pipeline.operations.values():
            op = notebook_ops[pipeline_operation.id]
            for parent_operation_id in pipeline_operation.parent_operations:
                parent_op = notebook_ops[parent_operation_id]  # Parent Operation
                op.after(parent_op)

        return notebook_ops
예제 #5
0
    def _cc_pipeline(self, pipeline, pipeline_name):

        runtime_configuration = self._get_runtime_configuration(
            pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_directory = pipeline_name
        bucket_name = runtime_configuration.metadata['cos_bucket']

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # Preprocess the output/input artifacts
        for pipeline_child_operation in pipeline.operations.values():
            for dependency in pipeline_child_operation.dependencies:
                pipeline_parent_operation = pipeline.operations[dependency]
                if pipeline_parent_operation.outputs:
                    pipeline_child_operation.inputs = \
                        pipeline_child_operation.inputs + pipeline_parent_operation.outputs

        for operation in pipeline.operations.values():
            operation_artifact_archive = self._get_dependency_archive_name(
                operation)

            self.log.debug(
                "Creating pipeline component :\n "
                "componentID : %s \n "
                "name : %s \n "
                "dependencies : %s \n "
                "file dependencies : %s \n "
                "dependencies include subdirectories : %s \n "
                "path of workspace : %s \n "
                "artifact archive : %s \n "
                "inputs : %s \n "
                "outputs : %s \n "
                "docker image : %s \n ", operation.id, operation.title,
                operation.dependencies, operation.file_dependencies,
                operation.recursive_dependencies, operation.artifact,
                operation_artifact_archive, operation.inputs,
                operation.outputs, operation.image)

            # create pipeline operation
            notebook_op = NotebookOp(
                name=operation.title,
                notebook=operation.artifact_name,
                cos_endpoint=cos_endpoint,
                cos_bucket=bucket_name,
                cos_directory=cos_directory,
                cos_pull_archive=operation_artifact_archive,
                pipeline_outputs=self._artifact_list_to_str(operation.outputs),
                pipeline_inputs=self._artifact_list_to_str(operation.inputs),
                image=operation.image)

            notebook_op.container.add_env_variable(
                V1EnvVar(name='AWS_ACCESS_KEY_ID', value=cos_username))
            notebook_op.container.add_env_variable(
                V1EnvVar(name='AWS_SECRET_ACCESS_KEY', value=cos_password))

            # Set ENV variables
            if operation.vars:
                for env_var in operation.vars:
                    # Strip any of these special characters from both key and value
                    # Splits on the first occurrence of '='
                    result = [x.strip(' \'\"') for x in env_var.split('=', 1)]
                    # Should be non empty key with a value
                    if len(result) == 2 and result[0] != '':
                        notebook_op.container.add_env_variable(
                            V1EnvVar(name=result[0], value=result[1]))

            notebook_ops[operation.id] = notebook_op

            self.log.info("NotebookOp Created for Component %s \n",
                          operation.id)

            # upload operation dependencies to object store
            try:
                dependency_archive_path = self._generate_dependency_archive(
                    operation)
                cos_client = CosClient(config=runtime_configuration)
                cos_client.upload_file_to_dir(
                    dir=cos_directory,
                    file_name=operation_artifact_archive,
                    file_path=dependency_archive_path)
            except BaseException:
                self.log.error("Error uploading artifacts to object storage.",
                               exc_info=True)
                raise

            self.log.info(
                "Pipeline dependencies have been uploaded to object store")

        # Process dependencies after all the operations have been created
        for pipeline_operation in pipeline.operations.values():
            op = notebook_ops[pipeline_operation.id]
            for dependency in pipeline_operation.dependencies:
                dependency_op = notebook_ops[dependency]  # Parent Operation
                op.after(dependency_op)

        return notebook_ops
예제 #6
0
    def _cc_pipeline(self, pipeline, pipeline_name):

        runtime_configuration = self._get_runtime_configuration(
            pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        emptydir_volume_size = ''
        container_runtime = bool(
            os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.
        for operation in pipeline.operations.values():
            parent_io = []  # gathers inputs & outputs relative to parent
            for parent_operation_id in operation.parent_operations:
                parent_operation = pipeline.operations[parent_operation_id]
                if parent_operation.inputs:
                    parent_io.extend(parent_operation.inputs)
                if parent_operation.outputs:
                    parent_io.extend(parent_operation.outputs)

                if parent_io:
                    operation.inputs = parent_io

        for operation in pipeline.operations.values():

            operation_artifact_archive = self._get_dependency_archive_name(
                operation)

            self.log.debug(
                "Creating pipeline component :\n {op} archive : {archive}".
                format(op=operation, archive=operation_artifact_archive))

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = dict()
            pipeline_envs['AWS_ACCESS_KEY_ID'] = cos_username
            pipeline_envs['AWS_SECRET_ACCESS_KEY'] = cos_password

            if operation.env_vars:
                for env_var in operation.env_vars:
                    # Strip any of these special characters from both key and value
                    # Splits on the first occurrence of '='
                    result = [x.strip(' \'\"') for x in env_var.split('=', 1)]
                    # Should be non empty key with a value
                    if len(result) == 2 and result[0] != '':
                        pipeline_envs[result[0]] = result[1]

            # create pipeline operation
            notebook_ops[operation.id] = NotebookOp(
                name=operation.name,
                notebook=operation.filename,
                cos_endpoint=cos_endpoint,
                cos_bucket=cos_bucket,
                cos_directory=cos_directory,
                cos_dependencies_archive=operation_artifact_archive,
                pipeline_inputs=operation.inputs,
                pipeline_outputs=operation.outputs,
                pipeline_envs=pipeline_envs,
                emptydir_volume_size=emptydir_volume_size,
                image=operation.runtime_image)

            self.log.info("NotebookOp Created for Component '%s' (%s)",
                          operation.name, operation.id)

            # upload operation dependencies to object storage
            try:
                t0 = time.time()
                dependency_archive_path = self._generate_dependency_archive(
                    operation)
                t1 = time.time()
                self.log.debug(
                    "Generation of dependency archive for operation '{name}' took {duration:.3f} secs."
                    .format(name=operation.name, duration=(t1 - t0)))

                cos_client = CosClient(config=runtime_configuration)
                t0 = time.time()
                cos_client.upload_file_to_dir(
                    dir=cos_directory,
                    file_name=operation_artifact_archive,
                    file_path=dependency_archive_path)
                t1 = time.time()
                self.log.debug(
                    "Upload of dependency archive for operation '{name}' took {duration:.3f} secs."
                    .format(name=operation.name, duration=(t1 - t0)))

            except FileNotFoundError as ex:
                self.log.error(
                    "Dependencies were not found building archive for operation: {}"
                    .format(operation.name),
                    exc_info=True)
                raise FileNotFoundError(
                    "Node '{}' referenced dependencies that were not found: {}"
                    .format(operation.name, ex))

            except BaseException as ex:
                self.log.error(
                    "Error uploading artifacts to object storage for operation: {}"
                    .format(operation.name),
                    exc_info=True)
                raise ex from ex

            self.log.info(
                "Pipeline dependencies have been uploaded to object storage")

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[
                    parent_operation_id]  # Parent Operation
                op.after(parent_op)

        return notebook_ops
예제 #7
0
    def _upload_dependencies_to_object_store(self, runtime_configuration, pipeline_name, operation):
        operation_artifact_archive = self._get_dependency_archive_name(operation)
        cos_directory = pipeline_name
        # upload operation dependencies to object store
        try:
            t0 = time.time()
            dependency_archive_path = self._generate_dependency_archive(operation)
            self.log_pipeline_info(
                pipeline_name,
                f"generated dependency archive: {dependency_archive_path}",
                operation_name=operation.name,
                duration=(time.time() - t0),
            )

            cos_client = CosClient(config=runtime_configuration)

            t0 = time.time()
            cos_client.upload_file_to_dir(
                dir=cos_directory, file_name=operation_artifact_archive, file_path=dependency_archive_path
            )
            self.log_pipeline_info(
                pipeline_name,
                f"uploaded dependency archive to: {cos_directory}/{operation_artifact_archive}",
                operation_name=operation.name,
                duration=(time.time() - t0),
            )

        except FileNotFoundError as ex:
            self.log.error(
                f"Dependencies were not found building archive for operation: {operation.name}", exc_info=True
            )
            raise FileNotFoundError(
                f"Node '{operation.name}' referenced dependencies that were not found: {ex}"
            ) from ex
        except MaxRetryError as ex:
            cos_endpoint = runtime_configuration.metadata.get("cos_endpoint")
            self.log.error(f"Connection was refused when attempting to connect to : {cos_endpoint}", exc_info=True)
            raise RuntimeError(
                f"Connection was refused when attempting to upload artifacts to : '{cos_endpoint}'. "
                "Please check your object storage settings."
            ) from ex
        except S3Error as ex:
            msg_prefix = f"Error connecting to object storage: {ex.code}."
            if ex.code == "SignatureDoesNotMatch":
                # likely cause: incorrect password
                raise RuntimeError(
                    f"{msg_prefix} Verify the password "
                    f"in runtime configuration '{runtime_configuration.display_name}' "
                    "and try again."
                ) from ex
            elif ex.code == "InvalidAccessKeyId":
                # likely cause: incorrect user id
                raise RuntimeError(
                    f"{msg_prefix} Verify the username "
                    f"in runtime configuration '{runtime_configuration.display_name}' "
                    "and try again."
                ) from ex
            else:
                raise RuntimeError(
                    f"{msg_prefix} Verify "
                    f"runtime configuration '{runtime_configuration.display_name}' "
                    "and try again."
                ) from ex
        except BaseException as ex:
            self.log.error(
                f"Error uploading artifacts to object storage for operation: {operation.name}", exc_info=True
            )
            raise ex from ex