Beispiel #1
0
    def notebook_to_graph(self):
        # convert notebook to nx graph
        pipeline_graph, pipeline_parameters_source = parser.parse_notebook(
            self.notebook)

        # get a dict from the 'pipeline parameters' cell source code
        pipeline_parameters_dict = ast.parse_assignments_expressions(
            pipeline_parameters_source)

        # run static analysis over the source code
        to_ignore = set(pipeline_parameters_dict.keys())
        dependencies.dependencies_detection(pipeline_graph,
                                            ignore_symbols=to_ignore)

        # add an empty step at the end of the pipeline for final snapshot
        if self.auto_snapshot:
            auto_snapshot_name = 'final_auto_snapshot'
            # add a link from all the last steps of the pipeline to
            # the final auto snapshot one.
            leaf_steps = [
                x for x in pipeline_graph.nodes()
                if pipeline_graph.out_degree(x) == 0
            ]
            for node in leaf_steps:
                pipeline_graph.add_edge(node, auto_snapshot_name)
            data = {auto_snapshot_name: {'source': '', 'ins': [], 'outs': []}}
            nx.set_node_attributes(pipeline_graph, data)

        # TODO: Additional Step required:
        #  Run a static analysis over every step to check that pipeline
        #  parameters are not assigned with new values.
        return pipeline_graph, pipeline_parameters_dict
Beispiel #2
0
    def run(self):
        self.logger.debug("------------- Kale Start Run -------------")
        try:
            # validate provided metadata
            self.validate_metadata()

            # convert notebook to nx graph
            pipeline_graph, pipeline_parameters_code_block = parser.parse_notebook(
                self.source_path, self.nbformat_version)

            pipeline_parameters_dict = dep_analysis.pipeline_parameters_detection(
                pipeline_parameters_code_block)

            # run static analysis over the source code
            dep_analysis.variables_dependencies_detection(
                pipeline_graph,
                ignore_symbols=set(pipeline_parameters_dict.keys()))

            # TODO: Run a static analysis over every step to check that pipeline parameters are not assigned with new values.

            # in case the user did not specify a custom docker image, use the same base image of
            # the current Notebook Server
            if self.docker_base_image == '':
                try:
                    self.docker_base_image = pod_utils.get_docker_base_image()
                except ConfigException:
                    # no K8s config found
                    # use kfp default image
                    pass
                except Exception:
                    raise

            # generate full kfp pipeline definition
            kfp_code = generate_code.gen_kfp_code(
                nb_graph=pipeline_graph,
                experiment_name=self.experiment_name,
                pipeline_name=self.pipeline_name,
                pipeline_description=self.pipeline_description,
                pipeline_parameters=pipeline_parameters_dict,
                docker_base_image=self.docker_base_image,
                volumes=self.volumes,
                deploy_pipeline=self.run_pipeline,
                working_dir=self.abs_working_dir)

            # save kfp generated code
            self.save_pipeline(kfp_code)

            # deploy pipeline to KFP instance
            if self.upload_pipeline or self.run_pipeline:
                return self.deploy_pipeline_to_kfp(self.output_path)
        except Exception as e:
            # self.logger.debug(traceback.print_exc())
            self.logger.debug(e, exc_info=True)
            self.logger.error(e)
            self.logger.error(
                "To see full traceback run Kale with --debug flag or have a look at kale.log logfile"
            )
Beispiel #3
0
    def notebook_to_graph(self):
        """Convert an annotated Notebook to a Graph."""
        # convert notebook to nx graph
        (pipeline_graph, pipeline_parameters_source, pipeline_metrics_source,
         imports_and_functions) = parser.parse_notebook(
             self.notebook, self.pipeline_metadata)

        # get a dict from the 'pipeline parameters' cell source code
        pipeline_parameters_dict = ast.parse_assignments_expressions(
            pipeline_parameters_source)

        # get a list of variables that need to be logged as pipeline metrics
        pipeline_metrics = ast.parse_metrics_print_statements(
            pipeline_metrics_source)

        # run static analysis over the source code
        dependencies.dependencies_detection(
            pipeline_graph,
            pipeline_parameters=pipeline_parameters_dict,
            imports_and_functions=imports_and_functions)
        dependencies.assign_metrics(pipeline_graph, pipeline_metrics)

        # if there are multiple DAG leaves, add an empty step at the end of the
        # pipeline for final snapshot
        leaf_steps = graphutils.get_leaf_nodes(pipeline_graph)
        if self.pipeline_metadata.get("autosnapshot") and len(leaf_steps) > 1:
            auto_snapshot_name = 'final_auto_snapshot'
            # add a link from all the last steps of the pipeline to
            # the final auto snapshot one.
            for node in leaf_steps:
                pipeline_graph.add_edge(node, auto_snapshot_name)
            step_defaults = parser.parse_steps_defaults(
                self.pipeline_metadata.get("steps_defaults", []))
            data = {
                auto_snapshot_name: {
                    "source": "",
                    "ins": [],
                    "outs": [],
                    "annotations": step_defaults.get("annotations"),
                    "labels": step_defaults.get("labels"),
                    "limits": step_defaults.get("limits")
                }
            }
            nx.set_node_attributes(pipeline_graph, data)

        # TODO: Additional Step required:
        #  Run a static analysis over every step to check that pipeline
        #  parameters are not assigned with new values.
        return pipeline_graph, pipeline_parameters_dict
Beispiel #4
0
    def notebook_to_graph(self):
        # convert notebook to nx graph
        pipeline_graph, pipeline_parameters_code_block = parser.parse_notebook(
            self.notebook)

        pipeline_parameters_dict = dep_analysis.pipeline_parameters_detection(
            pipeline_parameters_code_block)

        # run static analysis over the source code
        dep_analysis.variables_dependencies_detection(
            pipeline_graph,
            ignore_symbols=set(pipeline_parameters_dict.keys()))

        # TODO: Additional Step required:
        #  Run a static analysis over every step to check that pipeline
        #  parameters are not assigned with new values.
        return pipeline_graph, pipeline_parameters_dict
Beispiel #5
0
    def run(self):
        # convert notebook to nx graph
        pipeline_graph = parser.parse_notebook(self.source_path,
                                               self.nbformat_version)

        # run static analysis over the source code
        dep_analysis.variables_dependencies_detection(pipeline_graph)

        # generate full kfp pipeline definition
        kfp_code = generate_code.gen_kfp_code(
            nb_graph=pipeline_graph,
            pipeline_name=self.pipeline_name,
            pipeline_description=self.pipeline_description,
            docker_base_image=self.docker_base_image,
            mount_host_path=self.mount_host_path,
            mount_container_path=self.mount_container_path,
            deploy_pipeline=self.deploy_pipeline)

        # save kfp generated code
        self.save_pipeline(kfp_code)

        # deploy pipeline to KFP instance
        if self.deploy_pipeline:
            self.deploy_pipeline_to_kfp(self.output_path)
Beispiel #6
0
    def notebook_to_graph(self):
        """Convert an annotated Notebook to a Graph."""
        # convert notebook to nx graph
        (pipeline_graph, pipeline_parameters_source, pipeline_metrics_source,
         imports_and_functions) = parser.parse_notebook(self.notebook)

        # get a dict from the 'pipeline parameters' cell source code
        pipeline_parameters_dict = ast.parse_assignments_expressions(
            pipeline_parameters_source)

        # get a list of variables that need to be logged as pipeline metrics
        pipeline_metrics = ast.parse_metrics_print_statements(
            pipeline_metrics_source)

        # run static analysis over the source code
        dependencies.dependencies_detection(
            pipeline_graph,
            pipeline_parameters=pipeline_parameters_dict,
            imports_and_functions=imports_and_functions)
        dependencies.assign_metrics(pipeline_graph, pipeline_metrics)

        # add an empty step at the end of the pipeline for final snapshot
        if self.auto_snapshot:
            auto_snapshot_name = 'final_auto_snapshot'
            # add a link from all the last steps of the pipeline to
            # the final auto snapshot one.
            leaf_steps = graph_utils.get_leaf_nodes(pipeline_graph)
            for node in leaf_steps:
                pipeline_graph.add_edge(node, auto_snapshot_name)
            data = {auto_snapshot_name: {'source': '', 'ins': [], 'outs': []}}
            nx.set_node_attributes(pipeline_graph, data)

        # TODO: Additional Step required:
        #  Run a static analysis over every step to check that pipeline
        #  parameters are not assigned with new values.
        return pipeline_graph, pipeline_parameters_dict
Beispiel #7
0
    def notebook_to_graph(self):
        # convert notebook to nx graph
        (pipeline_graph, pipeline_parameters_source,
         pipeline_metrics_source) = parser.parse_notebook(self.notebook)

        # get a dict from the 'pipeline parameters' cell source code
        pipeline_parameters_dict = ast.parse_assignments_expressions(
            pipeline_parameters_source)

        # get a list of variables that need to be logged as pipeline metrics
        pipeline_metrics = ast.parse_metrics_print_statements(
            pipeline_metrics_source)

        # if there are some pipeline metrics, create an additional step at the
        # end of the pipeline to log them.
        # By adding this step before dependencies detection, we make sure that
        # the necessary variables are marshalled at the beginning of the step.
        if len(pipeline_metrics):
            pipeline_metrics_name = "pipeline_metrics"
            # add a link from all the last steps of the pipeline to
            # the final auto snapshot one.
            leaf_steps = [
                x for x in pipeline_graph.nodes()
                if pipeline_graph.out_degree(x) == 0
            ]
            for node in leaf_steps:
                pipeline_graph.add_edge(node, pipeline_metrics_name)
            # generate the code that dumps the pipeline metrics to file
            template_env = _initialize_templating_env()
            metrics_template = template_env.get_template(
                'pipeline_metrics_template.jinja2')
            # need to be a list since it will be treated as a code cell and
            # passed to the ipykernel
            metrics_source = [
                metrics_template.render(pipeline_metrics=pipeline_metrics)
            ]
            data = {
                pipeline_metrics_name: {
                    'source': metrics_source,
                    'ins': [],
                    'outs': []
                }
            }
            nx.set_node_attributes(pipeline_graph, data)

        # run static analysis over the source code
        dependencies.dependencies_detection(
            pipeline_graph, pipeline_parameters=pipeline_parameters_dict)

        # add an empty step at the end of the pipeline for final snapshot
        if self.auto_snapshot:
            auto_snapshot_name = 'final_auto_snapshot'
            # add a link from all the last steps of the pipeline to
            # the final auto snapshot one.
            leaf_steps = [
                x for x in pipeline_graph.nodes()
                if pipeline_graph.out_degree(x) == 0
            ]
            for node in leaf_steps:
                pipeline_graph.add_edge(node, auto_snapshot_name)
            data = {auto_snapshot_name: {'source': '', 'ins': [], 'outs': []}}
            nx.set_node_attributes(pipeline_graph, data)

        # TODO: Additional Step required:
        #  Run a static analysis over every step to check that pipeline
        #  parameters are not assigned with new values.
        return pipeline_graph, pipeline_parameters_dict