class PipelineProcessor(LoggingConfigurable): # ABC _type = None root_dir = Unicode(allow_none=True) enable_pipeline_info = Bool(config=True, default_value=(os.getenv('ELYRA_ENABLE_PIPELINE_INFO', 'true').lower() == 'true'), help="""Produces formatted logging of informational messages with durations (default=True). (ELYRA_ENABLE_PIPELINE_INFO env var)""") def __init__(self, root_dir, **kwargs): super(PipelineProcessor, self).__init__(**kwargs) self.root_dir = root_dir @property @abstractmethod def type(self): raise NotImplementedError() @abstractmethod def process(self, pipeline) -> PipelineProcessorResponse: raise NotImplementedError() @abstractmethod def export(self, pipeline, pipeline_export_format, pipeline_export_path, overwrite): raise NotImplementedError() def log_pipeline_info(self, pipeline_name: str, action_clause: str, **kwargs): """Produces a formatted log INFO message used entirely for support purposes. This method is intended to be called for any entries that should be captured across aggregated log files to identify steps within a given pipeline and each of its operations. As a result, calls to this method should produce single-line entries in the log (no embedded newlines). Each entry is prefixed with the pipeline name. This functionality can be disabled by setting PipelineProcessor.enable_pipeline_info = False (or via env ELYRA_ENABLE_PIPELINE_INFO). General logging should NOT use this method but use logger.<level>() statements directly. :param pipeline_name: str representing the name of the pipeline that is being executed :param action_clause: str representing the action that is being logged :param **kwargs: dict representing the keyword arguments. Recognized keywords include: operation_name: str representing the name of the operation applicable for this entry duration: float value representing the duration of the action being logged """ if self.enable_pipeline_info: duration = kwargs.get('duration') duration_clause = f"({duration:.3f} secs)" if duration else "" operation_name = kwargs.get('operation_name') op_clause = f":'{operation_name}'" if operation_name else "" self.log.info(f"{self._type} '{pipeline_name}'{op_clause} - {action_clause} {duration_clause}")
class PipelineProcessor(LoggingConfigurable): # ABC _type = None root_dir = Unicode(allow_none=True) component_registry: ComponentRegistry = ComponentRegistry() enable_pipeline_info = Bool( config=True, default_value=(os.getenv('ELYRA_ENABLE_PIPELINE_INFO', 'true').lower() == 'true'), help= """Produces formatted logging of informational messages with durations (default=True). (ELYRA_ENABLE_PIPELINE_INFO env var)""" ) def __init__(self, root_dir, **kwargs): super(PipelineProcessor, self).__init__(**kwargs) self.root_dir = root_dir @property @abstractmethod def type(self): raise NotImplementedError() def get_components(self): components = self.component_registry.get_all_components( processor_type=self.type) return components @abstractmethod def process(self, pipeline) -> PipelineProcessorResponse: raise NotImplementedError() @abstractmethod def export(self, pipeline, pipeline_export_format, pipeline_export_path, overwrite): raise NotImplementedError() def log_pipeline_info(self, pipeline_name: str, action_clause: str, **kwargs): """Produces a formatted log INFO message used entirely for support purposes. This method is intended to be called for any entries that should be captured across aggregated log files to identify steps within a given pipeline and each of its operations. As a result, calls to this method should produce single-line entries in the log (no embedded newlines). Each entry is prefixed with the pipeline name. This functionality can be disabled by setting PipelineProcessor.enable_pipeline_info = False (or via env ELYRA_ENABLE_PIPELINE_INFO). General logging should NOT use this method but use logger.<level>() statements directly. :param pipeline_name: str representing the name of the pipeline that is being executed :param action_clause: str representing the action that is being logged :param **kwargs: dict representing the keyword arguments. Recognized keywords include: operation_name: str representing the name of the operation applicable for this entry duration: float value representing the duration of the action being logged """ if self.enable_pipeline_info: duration = kwargs.get('duration') duration_clause = f"({duration:.3f} secs)" if duration else "" operation_name = kwargs.get('operation_name') op_clause = f":'{operation_name}'" if operation_name else "" self.log.info( f"{self._type} '{pipeline_name}'{op_clause} - {action_clause} {duration_clause}" ) @staticmethod def _propagate_operation_inputs_outputs( pipeline: Pipeline, sorted_operations: List[Operation]) -> None: """ All previous operation outputs should be propagated throughout the pipeline. In order to process this recursively, the current operation's inputs should be combined from its parent's inputs (which, themselves are derived from the outputs of their parent) and its parent's outputs. """ for operation in sorted_operations: parent_io = set() # gathers inputs & outputs relative to parent for parent_operation_id in operation.parent_operations: parent_operation = pipeline.operations[parent_operation_id] if parent_operation.inputs: parent_io.update(parent_operation.inputs) if parent_operation.outputs: parent_io.update(parent_operation.outputs) if parent_io: parent_io.update(operation.inputs) operation.inputs = list(parent_io) @staticmethod def _sort_operations(operations_by_id: dict) -> List[Operation]: """ Sort the list of operations based on its dependency graph """ ordered_operations = [] for operation in operations_by_id.values(): PipelineProcessor._sort_operation_dependencies( operations_by_id, ordered_operations, operation) return ordered_operations @staticmethod def _sort_operation_dependencies(operations_by_id: dict, ordered_operations: list, operation: Operation) -> None: """ Helper method to the main sort operation function """ # Optimization: check if already processed if operation not in ordered_operations: # process each of the dependencies that needs to be executed first for parent_operation_id in operation.parent_operations: parent_operation = operations_by_id[parent_operation_id] if parent_operation not in ordered_operations: PipelineProcessor._sort_operation_dependencies( operations_by_id, ordered_operations, parent_operation) ordered_operations.append(operation)