Exemple #1
0
    def export(self, pipeline, pipeline_export_format, pipeline_export_path,
               overwrite):
        if pipeline_export_format not in ["py"]:
            raise ValueError(
                "Pipeline export format {} not recognized.".format(
                    pipeline_export_format))

        timestamp = datetime.now().strftime("%m%d%H%M%S")
        pipeline_name = f'{pipeline.name}-{timestamp}'

        absolute_pipeline_export_path = get_absolute_path(
            self.root_dir, pipeline_export_path)

        if os.path.exists(absolute_pipeline_export_path) and not overwrite:
            raise ValueError("File " + absolute_pipeline_export_path +
                             " already exists.")

        self.log_pipeline_info(
            pipeline_name,
            f"exporting pipeline as a .{pipeline_export_format} file")

        self.create_pipeline_file(pipeline=pipeline,
                                  pipeline_export_format="py",
                                  pipeline_export_path=pipeline_export_path,
                                  pipeline_name=pipeline_name)

        return pipeline_export_path
Exemple #2
0
 def get_valid_filepath(self, op_filename: str) -> str:
     filepath = get_absolute_path(self._root_dir, op_filename)
     if not os.path.exists(filepath):
         raise FileNotFoundError(f'Could not find {filepath}')
     if not os.path.isfile(filepath):
         raise ValueError(f'Not a file: {filepath}')
     return filepath
Exemple #3
0
    def export(self, pipeline, pipeline_export_format, pipeline_export_path,
               overwrite):
        # Verify that the AirflowPipelineProcessor supports the given export format
        self._verify_export_format(pipeline_export_format)

        timestamp = datetime.now().strftime("%m%d%H%M%S")
        pipeline_name = f"{pipeline.name}-{timestamp}"

        absolute_pipeline_export_path = get_absolute_path(
            self.root_dir, pipeline_export_path)

        if os.path.exists(absolute_pipeline_export_path) and not overwrite:
            raise ValueError("File " + absolute_pipeline_export_path +
                             " already exists.")

        self.log_pipeline_info(
            pipeline_name,
            f"exporting pipeline as a .{pipeline_export_format} file")

        new_pipeline_file_path = self.create_pipeline_file(
            pipeline=pipeline,
            pipeline_export_format="py",
            pipeline_export_path=absolute_pipeline_export_path,
            pipeline_name=pipeline_name,
        )

        return new_pipeline_file_path
Exemple #4
0
    def export(
        self, pipeline: Pipeline, pipeline_export_format: str, pipeline_export_path: str, overwrite: bool
    ) -> str:
        """
        Export pipeline as Airflow DAG
        """
        # Verify that the AirflowPipelineProcessor supports the given export format
        self._verify_export_format(pipeline_export_format)

        timestamp = datetime.now().strftime("%m%d%H%M%S")
        # Create an instance id that will be used to store
        # the pipelines' dependencies, if applicable
        pipeline_instance_id = f"{pipeline.name}-{timestamp}"

        absolute_pipeline_export_path = get_absolute_path(self.root_dir, pipeline_export_path)

        if os.path.exists(absolute_pipeline_export_path) and not overwrite:
            raise ValueError(f"File '{absolute_pipeline_export_path}' already exists.")

        self.log_pipeline_info(pipeline.name, f"exporting pipeline as a .{pipeline_export_format} file")

        new_pipeline_file_path = self.create_pipeline_file(
            pipeline=pipeline,
            pipeline_export_format="py",
            pipeline_export_path=absolute_pipeline_export_path,
            pipeline_name=pipeline.name,
            pipeline_instance_id=pipeline_instance_id,
        )

        return new_pipeline_file_path
Exemple #5
0
    def export(self, pipeline, pipeline_export_format, pipeline_export_path, overwrite):
        # Verify that the KfpPipelineProcessor supports the given export format
        self._verify_export_format(pipeline_export_format)

        t0_all = time.time()
        timestamp = datetime.now().strftime("%m%d%H%M%S")
        pipeline_name = pipeline.name
        # Unique location on COS where the pipeline run artifacts
        # will be stored
        cos_directory = f"{pipeline_name}-{timestamp}"

        # Since pipeline_export_path may be relative to the notebook directory, ensure
        # we're using its absolute form.
        absolute_pipeline_export_path = get_absolute_path(self.root_dir, pipeline_export_path)

        runtime_configuration = self._get_metadata_configuration(
            schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID, name=pipeline.runtime_config
        )

        engine = runtime_configuration.metadata.get("engine")
        if engine == "Tekton" and not TektonClient:
            raise ValueError("kfp-tekton not installed. Please install using elyra[kfp-tekton] to use Tekton engine.")

        if os.path.exists(absolute_pipeline_export_path) and not overwrite:
            raise ValueError("File " + absolute_pipeline_export_path + " already exists.")

        self.log_pipeline_info(pipeline_name, f"Exporting pipeline as a .{pipeline_export_format} file")
        # Export pipeline as static configuration file (YAML formatted)
        try:
            # Exported pipeline is not associated with an experiment
            # or a version. The association is established when the
            # pipeline is imported into KFP by the user.
            pipeline_function = lambda: self._cc_pipeline(
                pipeline, pipeline_name, cos_directory=cos_directory
            )  # nopep8
            if engine == "Tekton":
                self.log.info("Compiling pipeline for Tekton engine")
                kfp_tekton_compiler.TektonCompiler().compile(pipeline_function, absolute_pipeline_export_path)
            else:
                self.log.info("Compiling pipeline for Argo engine")
                kfp_argo_compiler.Compiler().compile(pipeline_function, absolute_pipeline_export_path)
        except RuntimeError:
            raise
        except Exception as ex:
            if ex.__cause__:
                raise RuntimeError(str(ex)) from ex
            raise RuntimeError(
                f"Error pre-processing pipeline {pipeline_name} for export at {absolute_pipeline_export_path}",
                str(ex),
            ) from ex

        self.log_pipeline_info(
            pipeline_name, f"pipeline exported: {pipeline_export_path}", duration=(time.time() - t0_all)
        )

        return pipeline_export_path  # Return the input value, not its absolute form
Exemple #6
0
    def process(self, operation: Operation):
        filepath = get_absolute_path(self._root_dir, operation.filename)
        if not os.path.exists(filepath):
            raise FileNotFoundError(f'Could not find {filepath}')
        if not os.path.isfile(filepath):
            raise ValueError(f'Not a file: {filepath}')

        file_dir = os.path.dirname(filepath)
        file_name = os.path.basename(filepath)

        self.log.debug(f'Processing: {filepath}')

        argv = self._create_execute_command(filepath, file_dir)
        envs = operation.env_vars_as_dict
        t0 = time.time()
        try:
            subprocess.run(argv, cwd=file_dir, env=envs, check=True)
        except Exception as ex:
            self.log.error(f'Internal error executing {filepath}')
            raise RuntimeError(f'Internal error executing {filepath}') from ex

        t1 = time.time()
        duration = (t1 - t0)
        self.log.debug(f'Execution of {file_name} took {duration:.3f} secs.')
Exemple #7
0
    def export(self, pipeline, pipeline_export_format, pipeline_export_path, overwrite):
        if pipeline_export_format not in ["yaml", "py"]:
            raise ValueError("Pipeline export format {} not recognized.".format(pipeline_export_format))

        t0_all = time.time()
        timestamp = datetime.now().strftime("%m%d%H%M%S")
        pipeline_name = pipeline.name
        pipeline_version_name = f'{pipeline_name}-{timestamp}'
        # work around https://github.com/kubeflow/pipelines/issues/5172
        experiment_name = pipeline_name.lower()
        # Unique identifier for the pipeline run
        job_name = f'{pipeline_name}-{timestamp}'
        # Unique location on COS where the pipeline run artifacts
        # will be stored
        cos_directory = f'{pipeline_name}-{timestamp}'

        # Since pipeline_export_path may be relative to the notebook directory, ensure
        # we're using its absolute form.
        absolute_pipeline_export_path = get_absolute_path(self.root_dir, pipeline_export_path)

        runtime_configuration = self._get_metadata_configuration(namespace=MetadataManager.NAMESPACE_RUNTIMES,
                                                                 name=pipeline.runtime_config)
        api_endpoint = runtime_configuration.metadata['api_endpoint']
        namespace = runtime_configuration.metadata.get('user_namespace')
        engine = runtime_configuration.metadata.get('engine')
        cos_secret = runtime_configuration.metadata.get('cos_secret')

        if os.path.exists(absolute_pipeline_export_path) and not overwrite:
            raise ValueError("File " + absolute_pipeline_export_path + " already exists.")

        self.log_pipeline_info(pipeline_name, f"exporting pipeline as a .{pipeline_export_format} file")
        if pipeline_export_format != "py":
            # Export pipeline as static configuration file (YAML formatted)
            try:
                # Exported pipeline is not associated with an experiment
                # or a version. The association is established when the
                # pipeline is imported into KFP by the user.
                pipeline_function = lambda: self._cc_pipeline(pipeline,
                                                              pipeline_name,
                                                              cos_directory=cos_directory)  # nopep8

                if 'Tekton' == engine:
                    self.log.info("Compiling pipeline for Tekton engine")
                    kfp_tekton_compiler.TektonCompiler().compile(pipeline_function, absolute_pipeline_export_path)
                else:
                    self.log.info("Compiling pipeline for Argo engine")
                    kfp_argo_compiler.Compiler().compile(pipeline_function, absolute_pipeline_export_path)
            except Exception as ex:
                if ex.__cause__:
                    raise RuntimeError(str(ex)) from ex
                raise RuntimeError('Error pre-processing pipeline {} for export at {}'.
                                   format(pipeline_name, absolute_pipeline_export_path), str(ex)) from ex
        else:
            # Export pipeline as Python DSL
            # Load template from installed elyra package

            loader = PackageLoader('elyra', 'templates/kfp')
            template_env = Environment(loader=loader, trim_blocks=True)

            template_env.filters['to_basename'] = lambda path: os.path.basename(path)

            template = template_env.get_template('kfp_template.jinja2')

            defined_pipeline = self._cc_pipeline(pipeline,
                                                 pipeline_name,
                                                 pipeline_version=pipeline_version_name,
                                                 experiment_name=experiment_name,
                                                 cos_directory=cos_directory,
                                                 export=True)

            description = f'Created with Elyra {__version__} pipeline editor using {pipeline.source}.'

            for key, operation in defined_pipeline.items():
                self.log.debug("component :\n "
                               "container op name : %s \n "
                               "inputs : %s \n "
                               "outputs : %s \n ",
                               operation.name,
                               operation.inputs,
                               operation.outputs)

            # The exported pipeline is by default associated with
            # an experiment.
            # The user can manually customize the generated code
            # and change the associations as desired.

            python_output = template.render(operations_list=defined_pipeline,
                                            pipeline_name=pipeline_name,
                                            pipeline_version=pipeline_version_name,
                                            experiment_name=experiment_name,
                                            run_name=job_name,
                                            engine=engine,
                                            cos_secret=cos_secret,
                                            namespace=namespace,
                                            api_endpoint=api_endpoint,
                                            pipeline_description=description,
                                            writable_container_dir=self.WCD)

            # Write to Python file and fix formatting
            with open(absolute_pipeline_export_path, "w") as fh:
                autopep_output = autopep8.fix_code(python_output)
                output_to_file = format_str(autopep_output, mode=FileMode())
                fh.write(output_to_file)

            self.log_pipeline_info(pipeline_name, "pipeline rendered", duration=(time.time() - t0_all))

        self.log_pipeline_info(pipeline_name,
                               f"pipeline exported: {pipeline_export_path}",
                               duration=(time.time() - t0_all))

        return pipeline_export_path  # Return the input value, not its absolute form
Exemple #8
0
    def export(self, pipeline, pipeline_export_format, pipeline_export_path, overwrite):
        if pipeline_export_format not in ["yaml", "py"]:
            raise ValueError("Pipeline export format {} not recognized.".format(pipeline_export_format))

        t0_all = time.time()
        pipeline_name = pipeline.name

        # Since pipeline_export_path may be relative to the notebook directory, ensure
        # we're using its absolute form.
        absolute_pipeline_export_path = get_absolute_path(self.root_dir, pipeline_export_path)

        runtime_configuration = self._get_runtime_configuration(pipeline.runtime_config)
        api_endpoint = runtime_configuration.metadata['api_endpoint']

        if os.path.exists(absolute_pipeline_export_path) and not overwrite:
            raise ValueError("File " + absolute_pipeline_export_path + " already exists.")

        self.log_pipeline_info(pipeline_name, f"exporting pipeline as a .{pipeline_export_format} file")
        if pipeline_export_format != "py":
            try:
                pipeline_function = lambda: self._cc_pipeline(pipeline, pipeline_name)  # nopep8
                kfp.compiler.Compiler().compile(pipeline_function, absolute_pipeline_export_path)
            except Exception as ex:
                raise RuntimeError('Error compiling pipeline {} for export at {}'.
                                   format(pipeline_name, absolute_pipeline_export_path), str(ex)) from ex
        else:
            # Load template from installed elyra package
            t0 = time.time()
            loader = PackageLoader('elyra', 'templates')
            template_env = Environment(loader=loader)

            template_env.filters['to_basename'] = lambda path: os.path.basename(path)

            template = template_env.get_template('kfp_template.jinja2')

            defined_pipeline = self._cc_pipeline(pipeline, pipeline_name)

            description = f'Created with Elyra {__version__} pipeline editor using {pipeline.name}.pipeline.'

            for key, operation in defined_pipeline.items():
                self.log.debug("component :\n "
                               "container op name : %s \n "
                               "inputs : %s \n "
                               "outputs : %s \n ",
                               operation.name,
                               operation.inputs,
                               operation.outputs)

            python_output = template.render(operations_list=defined_pipeline,
                                            pipeline_name=pipeline_name,
                                            api_endpoint=api_endpoint,
                                            pipeline_description=description,
                                            writable_container_dir=self.WCD)

            # Write to python file and fix formatting
            with open(absolute_pipeline_export_path, "w") as fh:
                fh.write(autopep8.fix_code(python_output))

            self.log_pipeline_info(pipeline_name, "pipeline rendered", duration=(time.time() - t0))

        self.log_pipeline_info(pipeline_name,
                               f"pipeline exported: {pipeline_export_path}",
                               duration=(time.time() - t0_all))

        return pipeline_export_path  # Return the input value, not its absolute form
Exemple #9
0
    def export(self, pipeline, pipeline_export_format, pipeline_export_path,
               overwrite):
        if pipeline_export_format not in ["yaml", "py"]:
            raise ValueError(
                "Pipeline export format {} not recognized.".format(
                    pipeline_export_format))

        pipeline_name = pipeline.name

        # Since pipeline_export_path may be relative to the notebook directory, ensure
        # we're using its absolute form.
        absolute_pipeline_export_path = get_absolute_path(
            self.root_dir, pipeline_export_path)

        runtime_configuration = self._get_runtime_configuration(
            pipeline.runtime_config)
        api_endpoint = runtime_configuration.metadata['api_endpoint']

        if os.path.exists(absolute_pipeline_export_path) and not overwrite:
            raise ValueError("File " + absolute_pipeline_export_path +
                             " already exists.")

        self.log.info('Creating pipeline definition as a .' +
                      pipeline_export_format + ' file')
        if pipeline_export_format != "py":
            try:
                pipeline_function = lambda: self._cc_pipeline(
                    pipeline, pipeline_name)  # nopep8
                t0 = time.time()
                kfp.compiler.Compiler().compile(pipeline_function,
                                                absolute_pipeline_export_path)
                t1 = time.time()
                self.log.debug(
                    "Compilation of pipeline '{name}' took {duration:.3f} secs."
                    .format(name=pipeline_name, duration=(t1 - t0)))
            except Exception as ex:
                raise RuntimeError(
                    'Error compiling pipeline {} for export at {}'.format(
                        pipeline_name, absolute_pipeline_export_path),
                    str(ex)) from ex
        else:
            # Load template from installed elyra package
            loader = PackageLoader('elyra', 'templates')
            template_env = Environment(loader=loader)

            template = template_env.get_template('kfp_template.jinja2')

            defined_pipeline = self._cc_pipeline(pipeline, pipeline_name)

            for key, operation in defined_pipeline.items():
                self.log.debug(
                    "component :\n "
                    "container op name : %s \n "
                    "inputs : %s \n "
                    "outputs : %s \n ", operation.name, operation.inputs,
                    operation.outputs)

            python_output = template.render(
                operations_list=defined_pipeline,
                pipeline_name=pipeline_name,
                api_endpoint=api_endpoint,
                pipeline_description="Elyra Pipeline")

            # Write to python file and fix formatting
            with open(absolute_pipeline_export_path, "w") as fh:
                fh.write(autopep8.fix_code(python_output))

        return pipeline_export_path  # Return the input value, not its absolute form