def _upload_dependencies_to_object_store(self, runtime_configuration, pipeline_name, operation): operation_artifact_archive = self._get_dependency_archive_name(operation) cos_directory = pipeline_name # upload operation dependencies to object store try: t0 = time.time() dependency_archive_path = self._generate_dependency_archive(operation) self.log_pipeline_info(pipeline_name, f"generated dependency archive: {dependency_archive_path}", operation_name=operation.name, duration=(time.time() - t0)) cos_client = CosClient(config=runtime_configuration) t0 = time.time() cos_client.upload_file_to_dir(dir=cos_directory, file_name=operation_artifact_archive, file_path=dependency_archive_path) self.log_pipeline_info(pipeline_name, f"uploaded dependency archive to: {cos_directory}/{operation_artifact_archive}", operation_name=operation.name, duration=(time.time() - t0)) except FileNotFoundError as ex: self.log.error("Dependencies were not found building archive for operation: {}". format(operation.name), exc_info=True) raise FileNotFoundError("Node '{}' referenced dependencies that were not found: {}". format(operation.name, ex)) except BaseException as ex: self.log.error("Error uploading artifacts to object storage for operation: {}". format(operation.name), exc_info=True) raise ex from ex
def _upload_dependencies_to_object_store(self, runtime_configuration, pipeline_name, operation): operation_artifact_archive = self._get_dependency_archive_name( operation) cos_directory = pipeline_name # upload operation dependencies to object store try: t0 = time.time() dependency_archive_path = self._generate_dependency_archive( operation) self.log_pipeline_info( pipeline_name, f"generated dependency archive: {dependency_archive_path}", operation_name=operation.name, duration=(time.time() - t0)) cos_client = CosClient(config=runtime_configuration) t0 = time.time() cos_client.upload_file_to_dir(dir=cos_directory, file_name=operation_artifact_archive, file_path=dependency_archive_path) self.log_pipeline_info( pipeline_name, f"uploaded dependency archive to: {cos_directory}/{operation_artifact_archive}", operation_name=operation.name, duration=(time.time() - t0)) except FileNotFoundError as ex: self.log.error( "Dependencies were not found building archive for operation: {}" .format(operation.name), exc_info=True) raise FileNotFoundError( "Node '{}' referenced dependencies that were not found: {}". format(operation.name, ex)) from ex except MaxRetryError as ex: cos_endpoint = runtime_configuration.metadata.get('cos_endpoint') self.log.error( "Connection was refused when attempting to connect to : {}". format(cos_endpoint), exc_info=True) raise RuntimeError( "Connection was refused when attempting to upload artifacts to : '{}'. Please " "check your object storage settings. ".format( cos_endpoint)) from ex except SignatureDoesNotMatch as ex: raise RuntimeError( "Connection was refused due to incorrect Object Storage credentials. " + "Please validate your runtime configuration details and retry." ) from ex except BaseException as ex: self.log.error( "Error uploading artifacts to object storage for operation: {}" .format(operation.name), exc_info=True) raise ex from ex
def _cc_pipeline(self, pipeline, pipeline_name): runtime_configuration = self._get_runtime_configuration(pipeline.runtime_config) cos_endpoint = runtime_configuration.metadata['cos_endpoint'] cos_username = runtime_configuration.metadata['cos_username'] cos_password = runtime_configuration.metadata['cos_password'] cos_directory = pipeline_name cos_bucket = runtime_configuration.metadata['cos_bucket'] self.log_pipeline_info(pipeline_name, f"processing pipeline dependencies to: {cos_endpoint} " f"bucket: {cos_bucket} folder: {pipeline_name}") t0_all = time.time() emptydir_volume_size = '' container_runtime = bool(os.getenv('CRIO_RUNTIME', 'False').lower() == 'true') # Create dictionary that maps component Id to its ContainerOp instance notebook_ops = {} # All previous operation outputs should be propagated throughout the pipeline. # In order to process this recursively, the current operation's inputs should be combined # from its parent's inputs (which, themselves are derived from the outputs of their parent) # and its parent's outputs. for operation in pipeline.operations.values(): parent_io = [] # gathers inputs & outputs relative to parent for parent_operation_id in operation.parent_operations: parent_operation = pipeline.operations[parent_operation_id] if parent_operation.inputs: parent_io.extend(parent_operation.inputs) if parent_operation.outputs: parent_io.extend(parent_operation.outputs) if parent_io: operation.inputs = parent_io for operation in pipeline.operations.values(): operation_artifact_archive = self._get_dependency_archive_name(operation) self.log.debug("Creating pipeline component :\n {op} archive : {archive}".format( op=operation, archive=operation_artifact_archive)) if container_runtime: # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi emptydir_volume_size = '20Gi' # Collect env variables pipeline_envs = dict() pipeline_envs['AWS_ACCESS_KEY_ID'] = cos_username pipeline_envs['AWS_SECRET_ACCESS_KEY'] = cos_password # Convey pipeline logging enablement to operation pipeline_envs['ELYRA_ENABLE_PIPELINE_INFO'] = str(self.enable_pipeline_info) # Setting identifies a writable directory in the container image. # Only Unix-style path spec is supported. pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'] = self.WCD if operation.env_vars: for env_var in operation.env_vars: # Strip any of these special characters from both key and value # Splits on the first occurrence of '=' result = [x.strip(' \'\"') for x in env_var.split('=', 1)] # Should be non empty key with a value if len(result) == 2 and result[0] != '': pipeline_envs[result[0]] = result[1] # create pipeline operation notebook_ops[operation.id] = NotebookOp(name=operation.name, notebook=operation.filename, cos_endpoint=cos_endpoint, cos_bucket=cos_bucket, cos_directory=cos_directory, cos_dependencies_archive=operation_artifact_archive, pipeline_inputs=operation.inputs, pipeline_outputs=operation.outputs, pipeline_envs=pipeline_envs, emptydir_volume_size=emptydir_volume_size, image=operation.runtime_image, file_outputs={ 'mlpipeline-metrics': '{}/mlpipeline-metrics.json' .format(pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']), 'mlpipeline-ui-metadata': '{}/mlpipeline-ui-metadata.json' .format(pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']) }) self.log_pipeline_info(pipeline_name, f"processing operation dependencies for id: {operation.id}", operation_name=operation.name) # upload operation dependencies to object storage try: t0 = time.time() dependency_archive_path = self._generate_dependency_archive(operation) self.log_pipeline_info(pipeline_name, f"generated dependency archive: {dependency_archive_path}", operation_name=operation.name, duration=(time.time() - t0)) cos_client = CosClient(config=runtime_configuration) t0 = time.time() cos_client.upload_file_to_dir(dir=cos_directory, file_name=operation_artifact_archive, file_path=dependency_archive_path) self.log_pipeline_info(pipeline_name, f"uploaded dependency archive to: {cos_directory}/{operation_artifact_archive}", operation_name=operation.name, duration=(time.time() - t0)) except FileNotFoundError as ex: self.log.error("Dependencies were not found building archive for operation: {}". format(operation.name), exc_info=True) raise FileNotFoundError("Node '{}' referenced dependencies that were not found: {}". format(operation.name, ex)) except BaseException as ex: self.log.error("Error uploading artifacts to object storage for operation: {}". format(operation.name), exc_info=True) raise ex from ex # Process dependencies after all the operations have been created for operation in pipeline.operations.values(): op = notebook_ops[operation.id] for parent_operation_id in operation.parent_operations: parent_op = notebook_ops[parent_operation_id] # Parent Operation op.after(parent_op) self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all)) return notebook_ops
def _cc_pipeline(self, pipeline, pipeline_name): runtime_configuration = self._get_runtime_configuration(pipeline.runtime_config) cos_endpoint = runtime_configuration.metadata['cos_endpoint'] cos_username = runtime_configuration.metadata['cos_username'] cos_password = runtime_configuration.metadata['cos_password'] cos_directory = pipeline_name cos_bucket = runtime_configuration.metadata['cos_bucket'] # Create dictionary that maps component Id to its ContainerOp instance notebook_ops = {} # All previous operation outputs should be propagated throughout the pipeline. # In order to process this recursively, the current operation's inputs should be combined # from its parent's inputs (which, themselves are derived from the outputs of their parent) # and its parent's outputs. for pipeline_operation in pipeline.operations.values(): parent_inputs_and_outputs = [] for parent_operation_id in pipeline_operation.parent_operations: parent_operation = pipeline.operations[parent_operation_id] if parent_operation.inputs: parent_inputs_and_outputs.extend(parent_operation.inputs) if parent_operation.outputs: parent_inputs_and_outputs.extend(parent_operation.outputs) if parent_inputs_and_outputs: pipeline_operation.inputs = parent_inputs_and_outputs for operation in pipeline.operations.values(): operation_artifact_archive = self._get_dependency_archive_name(operation) self.log.debug("Creating pipeline component :\n " "componentID : %s \n " "name : %s \n " "parent_operations : %s \n " "dependencies : %s \n " "dependencies include subdirectories : %s \n " "filename : %s \n " "archive : %s \n " "inputs : %s \n " "outputs : %s \n " "runtime image : %s \n ", operation.id, operation.name, operation.parent_operations, operation.dependencies, operation.include_subdirectories, operation.filename, operation_artifact_archive, operation.inputs, operation.outputs, operation.runtime_image) # create pipeline operation notebook_op = NotebookOp(name=operation.name, notebook=operation.filename, cos_endpoint=cos_endpoint, cos_bucket=cos_bucket, cos_directory=cos_directory, cos_dependencies_archive=operation_artifact_archive, image=operation.runtime_image) if operation.inputs: notebook_op.add_pipeline_inputs(self._artifact_list_to_str(operation.inputs)) if operation.outputs: notebook_op.add_pipeline_outputs(self._artifact_list_to_str(operation.outputs)) notebook_op.add_environment_variable('AWS_ACCESS_KEY_ID', cos_username) notebook_op.add_environment_variable('AWS_SECRET_ACCESS_KEY', cos_password) # Set ENV variables if operation.env_vars: for env_var in operation.env_vars: # Strip any of these special characters from both key and value # Splits on the first occurrence of '=' result = [x.strip(' \'\"') for x in env_var.split('=', 1)] # Should be non empty key with a value if len(result) == 2 and result[0] != '': notebook_op.add_environment_variable(result[0], result[1]) notebook_ops[operation.id] = notebook_op self.log.info("NotebookOp Created for Component '%s' (%s)", operation.name, operation.id) # upload operation dependencies to object storage try: dependency_archive_path = self._generate_dependency_archive(operation) cos_client = CosClient(config=runtime_configuration) cos_client.upload_file_to_dir(dir=cos_directory, file_name=operation_artifact_archive, file_path=dependency_archive_path) except BaseException: self.log.error("Error uploading artifacts to object storage.", exc_info=True) raise self.log.info("Pipeline dependencies have been uploaded to object storage") # Process dependencies after all the operations have been created for pipeline_operation in pipeline.operations.values(): op = notebook_ops[pipeline_operation.id] for parent_operation_id in pipeline_operation.parent_operations: parent_op = notebook_ops[parent_operation_id] # Parent Operation op.after(parent_op) return notebook_ops
def _cc_pipeline(self, pipeline, pipeline_name): runtime_configuration = self._get_runtime_configuration( pipeline.runtime_config) cos_endpoint = runtime_configuration.metadata['cos_endpoint'] cos_username = runtime_configuration.metadata['cos_username'] cos_password = runtime_configuration.metadata['cos_password'] cos_directory = pipeline_name bucket_name = runtime_configuration.metadata['cos_bucket'] # Create dictionary that maps component Id to its ContainerOp instance notebook_ops = {} # Preprocess the output/input artifacts for pipeline_child_operation in pipeline.operations.values(): for dependency in pipeline_child_operation.dependencies: pipeline_parent_operation = pipeline.operations[dependency] if pipeline_parent_operation.outputs: pipeline_child_operation.inputs = \ pipeline_child_operation.inputs + pipeline_parent_operation.outputs for operation in pipeline.operations.values(): operation_artifact_archive = self._get_dependency_archive_name( operation) self.log.debug( "Creating pipeline component :\n " "componentID : %s \n " "name : %s \n " "dependencies : %s \n " "file dependencies : %s \n " "dependencies include subdirectories : %s \n " "path of workspace : %s \n " "artifact archive : %s \n " "inputs : %s \n " "outputs : %s \n " "docker image : %s \n ", operation.id, operation.title, operation.dependencies, operation.file_dependencies, operation.recursive_dependencies, operation.artifact, operation_artifact_archive, operation.inputs, operation.outputs, operation.image) # create pipeline operation notebook_op = NotebookOp( name=operation.title, notebook=operation.artifact_name, cos_endpoint=cos_endpoint, cos_bucket=bucket_name, cos_directory=cos_directory, cos_pull_archive=operation_artifact_archive, pipeline_outputs=self._artifact_list_to_str(operation.outputs), pipeline_inputs=self._artifact_list_to_str(operation.inputs), image=operation.image) notebook_op.container.add_env_variable( V1EnvVar(name='AWS_ACCESS_KEY_ID', value=cos_username)) notebook_op.container.add_env_variable( V1EnvVar(name='AWS_SECRET_ACCESS_KEY', value=cos_password)) # Set ENV variables if operation.vars: for env_var in operation.vars: # Strip any of these special characters from both key and value # Splits on the first occurrence of '=' result = [x.strip(' \'\"') for x in env_var.split('=', 1)] # Should be non empty key with a value if len(result) == 2 and result[0] != '': notebook_op.container.add_env_variable( V1EnvVar(name=result[0], value=result[1])) notebook_ops[operation.id] = notebook_op self.log.info("NotebookOp Created for Component %s \n", operation.id) # upload operation dependencies to object store try: dependency_archive_path = self._generate_dependency_archive( operation) cos_client = CosClient(config=runtime_configuration) cos_client.upload_file_to_dir( dir=cos_directory, file_name=operation_artifact_archive, file_path=dependency_archive_path) except BaseException: self.log.error("Error uploading artifacts to object storage.", exc_info=True) raise self.log.info( "Pipeline dependencies have been uploaded to object store") # Process dependencies after all the operations have been created for pipeline_operation in pipeline.operations.values(): op = notebook_ops[pipeline_operation.id] for dependency in pipeline_operation.dependencies: dependency_op = notebook_ops[dependency] # Parent Operation op.after(dependency_op) return notebook_ops
def _cc_pipeline(self, pipeline, pipeline_name): runtime_configuration = self._get_runtime_configuration( pipeline.runtime_config) cos_endpoint = runtime_configuration.metadata['cos_endpoint'] cos_username = runtime_configuration.metadata['cos_username'] cos_password = runtime_configuration.metadata['cos_password'] cos_directory = pipeline_name cos_bucket = runtime_configuration.metadata['cos_bucket'] emptydir_volume_size = '' container_runtime = bool( os.getenv('CRIO_RUNTIME', 'False').lower() == 'true') # Create dictionary that maps component Id to its ContainerOp instance notebook_ops = {} # All previous operation outputs should be propagated throughout the pipeline. # In order to process this recursively, the current operation's inputs should be combined # from its parent's inputs (which, themselves are derived from the outputs of their parent) # and its parent's outputs. for operation in pipeline.operations.values(): parent_io = [] # gathers inputs & outputs relative to parent for parent_operation_id in operation.parent_operations: parent_operation = pipeline.operations[parent_operation_id] if parent_operation.inputs: parent_io.extend(parent_operation.inputs) if parent_operation.outputs: parent_io.extend(parent_operation.outputs) if parent_io: operation.inputs = parent_io for operation in pipeline.operations.values(): operation_artifact_archive = self._get_dependency_archive_name( operation) self.log.debug( "Creating pipeline component :\n {op} archive : {archive}". format(op=operation, archive=operation_artifact_archive)) if container_runtime: # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi emptydir_volume_size = '20Gi' # Collect env variables pipeline_envs = dict() pipeline_envs['AWS_ACCESS_KEY_ID'] = cos_username pipeline_envs['AWS_SECRET_ACCESS_KEY'] = cos_password if operation.env_vars: for env_var in operation.env_vars: # Strip any of these special characters from both key and value # Splits on the first occurrence of '=' result = [x.strip(' \'\"') for x in env_var.split('=', 1)] # Should be non empty key with a value if len(result) == 2 and result[0] != '': pipeline_envs[result[0]] = result[1] # create pipeline operation notebook_ops[operation.id] = NotebookOp( name=operation.name, notebook=operation.filename, cos_endpoint=cos_endpoint, cos_bucket=cos_bucket, cos_directory=cos_directory, cos_dependencies_archive=operation_artifact_archive, pipeline_inputs=operation.inputs, pipeline_outputs=operation.outputs, pipeline_envs=pipeline_envs, emptydir_volume_size=emptydir_volume_size, image=operation.runtime_image) self.log.info("NotebookOp Created for Component '%s' (%s)", operation.name, operation.id) # upload operation dependencies to object storage try: t0 = time.time() dependency_archive_path = self._generate_dependency_archive( operation) t1 = time.time() self.log.debug( "Generation of dependency archive for operation '{name}' took {duration:.3f} secs." .format(name=operation.name, duration=(t1 - t0))) cos_client = CosClient(config=runtime_configuration) t0 = time.time() cos_client.upload_file_to_dir( dir=cos_directory, file_name=operation_artifact_archive, file_path=dependency_archive_path) t1 = time.time() self.log.debug( "Upload of dependency archive for operation '{name}' took {duration:.3f} secs." .format(name=operation.name, duration=(t1 - t0))) except FileNotFoundError as ex: self.log.error( "Dependencies were not found building archive for operation: {}" .format(operation.name), exc_info=True) raise FileNotFoundError( "Node '{}' referenced dependencies that were not found: {}" .format(operation.name, ex)) except BaseException as ex: self.log.error( "Error uploading artifacts to object storage for operation: {}" .format(operation.name), exc_info=True) raise ex from ex self.log.info( "Pipeline dependencies have been uploaded to object storage") # Process dependencies after all the operations have been created for operation in pipeline.operations.values(): op = notebook_ops[operation.id] for parent_operation_id in operation.parent_operations: parent_op = notebook_ops[ parent_operation_id] # Parent Operation op.after(parent_op) return notebook_ops
def _upload_dependencies_to_object_store(self, runtime_configuration, pipeline_name, operation): operation_artifact_archive = self._get_dependency_archive_name(operation) cos_directory = pipeline_name # upload operation dependencies to object store try: t0 = time.time() dependency_archive_path = self._generate_dependency_archive(operation) self.log_pipeline_info( pipeline_name, f"generated dependency archive: {dependency_archive_path}", operation_name=operation.name, duration=(time.time() - t0), ) cos_client = CosClient(config=runtime_configuration) t0 = time.time() cos_client.upload_file_to_dir( dir=cos_directory, file_name=operation_artifact_archive, file_path=dependency_archive_path ) self.log_pipeline_info( pipeline_name, f"uploaded dependency archive to: {cos_directory}/{operation_artifact_archive}", operation_name=operation.name, duration=(time.time() - t0), ) except FileNotFoundError as ex: self.log.error( f"Dependencies were not found building archive for operation: {operation.name}", exc_info=True ) raise FileNotFoundError( f"Node '{operation.name}' referenced dependencies that were not found: {ex}" ) from ex except MaxRetryError as ex: cos_endpoint = runtime_configuration.metadata.get("cos_endpoint") self.log.error(f"Connection was refused when attempting to connect to : {cos_endpoint}", exc_info=True) raise RuntimeError( f"Connection was refused when attempting to upload artifacts to : '{cos_endpoint}'. " "Please check your object storage settings." ) from ex except S3Error as ex: msg_prefix = f"Error connecting to object storage: {ex.code}." if ex.code == "SignatureDoesNotMatch": # likely cause: incorrect password raise RuntimeError( f"{msg_prefix} Verify the password " f"in runtime configuration '{runtime_configuration.display_name}' " "and try again." ) from ex elif ex.code == "InvalidAccessKeyId": # likely cause: incorrect user id raise RuntimeError( f"{msg_prefix} Verify the username " f"in runtime configuration '{runtime_configuration.display_name}' " "and try again." ) from ex else: raise RuntimeError( f"{msg_prefix} Verify " f"runtime configuration '{runtime_configuration.display_name}' " "and try again." ) from ex except BaseException as ex: self.log.error( f"Error uploading artifacts to object storage for operation: {operation.name}", exc_info=True ) raise ex from ex