def notebook_to_graph(self): # convert notebook to nx graph pipeline_graph, pipeline_parameters_source = parser.parse_notebook( self.notebook) # get a dict from the 'pipeline parameters' cell source code pipeline_parameters_dict = ast.parse_assignments_expressions( pipeline_parameters_source) # run static analysis over the source code to_ignore = set(pipeline_parameters_dict.keys()) dependencies.dependencies_detection(pipeline_graph, ignore_symbols=to_ignore) # add an empty step at the end of the pipeline for final snapshot if self.auto_snapshot: auto_snapshot_name = 'final_auto_snapshot' # add a link from all the last steps of the pipeline to # the final auto snapshot one. leaf_steps = [ x for x in pipeline_graph.nodes() if pipeline_graph.out_degree(x) == 0 ] for node in leaf_steps: pipeline_graph.add_edge(node, auto_snapshot_name) data = {auto_snapshot_name: {'source': '', 'ins': [], 'outs': []}} nx.set_node_attributes(pipeline_graph, data) # TODO: Additional Step required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return pipeline_graph, pipeline_parameters_dict
def run(self): self.logger.debug("------------- Kale Start Run -------------") try: # validate provided metadata self.validate_metadata() # convert notebook to nx graph pipeline_graph, pipeline_parameters_code_block = parser.parse_notebook( self.source_path, self.nbformat_version) pipeline_parameters_dict = dep_analysis.pipeline_parameters_detection( pipeline_parameters_code_block) # run static analysis over the source code dep_analysis.variables_dependencies_detection( pipeline_graph, ignore_symbols=set(pipeline_parameters_dict.keys())) # TODO: Run a static analysis over every step to check that pipeline parameters are not assigned with new values. # in case the user did not specify a custom docker image, use the same base image of # the current Notebook Server if self.docker_base_image == '': try: self.docker_base_image = pod_utils.get_docker_base_image() except ConfigException: # no K8s config found # use kfp default image pass except Exception: raise # generate full kfp pipeline definition kfp_code = generate_code.gen_kfp_code( nb_graph=pipeline_graph, experiment_name=self.experiment_name, pipeline_name=self.pipeline_name, pipeline_description=self.pipeline_description, pipeline_parameters=pipeline_parameters_dict, docker_base_image=self.docker_base_image, volumes=self.volumes, deploy_pipeline=self.run_pipeline, working_dir=self.abs_working_dir) # save kfp generated code self.save_pipeline(kfp_code) # deploy pipeline to KFP instance if self.upload_pipeline or self.run_pipeline: return self.deploy_pipeline_to_kfp(self.output_path) except Exception as e: # self.logger.debug(traceback.print_exc()) self.logger.debug(e, exc_info=True) self.logger.error(e) self.logger.error( "To see full traceback run Kale with --debug flag or have a look at kale.log logfile" )
def notebook_to_graph(self): """Convert an annotated Notebook to a Graph.""" # convert notebook to nx graph (pipeline_graph, pipeline_parameters_source, pipeline_metrics_source, imports_and_functions) = parser.parse_notebook( self.notebook, self.pipeline_metadata) # get a dict from the 'pipeline parameters' cell source code pipeline_parameters_dict = ast.parse_assignments_expressions( pipeline_parameters_source) # get a list of variables that need to be logged as pipeline metrics pipeline_metrics = ast.parse_metrics_print_statements( pipeline_metrics_source) # run static analysis over the source code dependencies.dependencies_detection( pipeline_graph, pipeline_parameters=pipeline_parameters_dict, imports_and_functions=imports_and_functions) dependencies.assign_metrics(pipeline_graph, pipeline_metrics) # if there are multiple DAG leaves, add an empty step at the end of the # pipeline for final snapshot leaf_steps = graphutils.get_leaf_nodes(pipeline_graph) if self.pipeline_metadata.get("autosnapshot") and len(leaf_steps) > 1: auto_snapshot_name = 'final_auto_snapshot' # add a link from all the last steps of the pipeline to # the final auto snapshot one. for node in leaf_steps: pipeline_graph.add_edge(node, auto_snapshot_name) step_defaults = parser.parse_steps_defaults( self.pipeline_metadata.get("steps_defaults", [])) data = { auto_snapshot_name: { "source": "", "ins": [], "outs": [], "annotations": step_defaults.get("annotations"), "labels": step_defaults.get("labels"), "limits": step_defaults.get("limits") } } nx.set_node_attributes(pipeline_graph, data) # TODO: Additional Step required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return pipeline_graph, pipeline_parameters_dict
def notebook_to_graph(self): # convert notebook to nx graph pipeline_graph, pipeline_parameters_code_block = parser.parse_notebook( self.notebook) pipeline_parameters_dict = dep_analysis.pipeline_parameters_detection( pipeline_parameters_code_block) # run static analysis over the source code dep_analysis.variables_dependencies_detection( pipeline_graph, ignore_symbols=set(pipeline_parameters_dict.keys())) # TODO: Additional Step required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return pipeline_graph, pipeline_parameters_dict
def run(self): # convert notebook to nx graph pipeline_graph = parser.parse_notebook(self.source_path, self.nbformat_version) # run static analysis over the source code dep_analysis.variables_dependencies_detection(pipeline_graph) # generate full kfp pipeline definition kfp_code = generate_code.gen_kfp_code( nb_graph=pipeline_graph, pipeline_name=self.pipeline_name, pipeline_description=self.pipeline_description, docker_base_image=self.docker_base_image, mount_host_path=self.mount_host_path, mount_container_path=self.mount_container_path, deploy_pipeline=self.deploy_pipeline) # save kfp generated code self.save_pipeline(kfp_code) # deploy pipeline to KFP instance if self.deploy_pipeline: self.deploy_pipeline_to_kfp(self.output_path)
def notebook_to_graph(self): """Convert an annotated Notebook to a Graph.""" # convert notebook to nx graph (pipeline_graph, pipeline_parameters_source, pipeline_metrics_source, imports_and_functions) = parser.parse_notebook(self.notebook) # get a dict from the 'pipeline parameters' cell source code pipeline_parameters_dict = ast.parse_assignments_expressions( pipeline_parameters_source) # get a list of variables that need to be logged as pipeline metrics pipeline_metrics = ast.parse_metrics_print_statements( pipeline_metrics_source) # run static analysis over the source code dependencies.dependencies_detection( pipeline_graph, pipeline_parameters=pipeline_parameters_dict, imports_and_functions=imports_and_functions) dependencies.assign_metrics(pipeline_graph, pipeline_metrics) # add an empty step at the end of the pipeline for final snapshot if self.auto_snapshot: auto_snapshot_name = 'final_auto_snapshot' # add a link from all the last steps of the pipeline to # the final auto snapshot one. leaf_steps = graph_utils.get_leaf_nodes(pipeline_graph) for node in leaf_steps: pipeline_graph.add_edge(node, auto_snapshot_name) data = {auto_snapshot_name: {'source': '', 'ins': [], 'outs': []}} nx.set_node_attributes(pipeline_graph, data) # TODO: Additional Step required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return pipeline_graph, pipeline_parameters_dict
def notebook_to_graph(self): # convert notebook to nx graph (pipeline_graph, pipeline_parameters_source, pipeline_metrics_source) = parser.parse_notebook(self.notebook) # get a dict from the 'pipeline parameters' cell source code pipeline_parameters_dict = ast.parse_assignments_expressions( pipeline_parameters_source) # get a list of variables that need to be logged as pipeline metrics pipeline_metrics = ast.parse_metrics_print_statements( pipeline_metrics_source) # if there are some pipeline metrics, create an additional step at the # end of the pipeline to log them. # By adding this step before dependencies detection, we make sure that # the necessary variables are marshalled at the beginning of the step. if len(pipeline_metrics): pipeline_metrics_name = "pipeline_metrics" # add a link from all the last steps of the pipeline to # the final auto snapshot one. leaf_steps = [ x for x in pipeline_graph.nodes() if pipeline_graph.out_degree(x) == 0 ] for node in leaf_steps: pipeline_graph.add_edge(node, pipeline_metrics_name) # generate the code that dumps the pipeline metrics to file template_env = _initialize_templating_env() metrics_template = template_env.get_template( 'pipeline_metrics_template.jinja2') # need to be a list since it will be treated as a code cell and # passed to the ipykernel metrics_source = [ metrics_template.render(pipeline_metrics=pipeline_metrics) ] data = { pipeline_metrics_name: { 'source': metrics_source, 'ins': [], 'outs': [] } } nx.set_node_attributes(pipeline_graph, data) # run static analysis over the source code dependencies.dependencies_detection( pipeline_graph, pipeline_parameters=pipeline_parameters_dict) # add an empty step at the end of the pipeline for final snapshot if self.auto_snapshot: auto_snapshot_name = 'final_auto_snapshot' # add a link from all the last steps of the pipeline to # the final auto snapshot one. leaf_steps = [ x for x in pipeline_graph.nodes() if pipeline_graph.out_degree(x) == 0 ] for node in leaf_steps: pipeline_graph.add_edge(node, auto_snapshot_name) data = {auto_snapshot_name: {'source': '', 'ins': [], 'outs': []}} nx.set_node_attributes(pipeline_graph, data) # TODO: Additional Step required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return pipeline_graph, pipeline_parameters_dict