def notebook_to_graph(self): # convert notebook to nx graph pipeline_graph, pipeline_parameters_source = parser.parse_notebook( self.notebook) # get a dict from the 'pipeline parameters' cell source code pipeline_parameters_dict = ast.parse_assignments_expressions( pipeline_parameters_source) # run static analysis over the source code to_ignore = set(pipeline_parameters_dict.keys()) dependencies.dependencies_detection(pipeline_graph, ignore_symbols=to_ignore) # add an empty step at the end of the pipeline for final snapshot if self.auto_snapshot: auto_snapshot_name = 'final_auto_snapshot' # add a link from all the last steps of the pipeline to # the final auto snapshot one. leaf_steps = [ x for x in pipeline_graph.nodes() if pipeline_graph.out_degree(x) == 0 ] for node in leaf_steps: pipeline_graph.add_edge(node, auto_snapshot_name) data = {auto_snapshot_name: {'source': '', 'ins': [], 'outs': []}} nx.set_node_attributes(pipeline_graph, data) # TODO: Additional Step required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return pipeline_graph, pipeline_parameters_dict
def notebook_to_graph(self): """Convert an annotated Notebook to a Graph.""" # convert notebook to nx graph (pipeline_graph, pipeline_parameters_source, pipeline_metrics_source, imports_and_functions) = parser.parse_notebook( self.notebook, self.pipeline_metadata) # get a dict from the 'pipeline parameters' cell source code pipeline_parameters_dict = ast.parse_assignments_expressions( pipeline_parameters_source) # get a list of variables that need to be logged as pipeline metrics pipeline_metrics = ast.parse_metrics_print_statements( pipeline_metrics_source) # run static analysis over the source code dependencies.dependencies_detection( pipeline_graph, pipeline_parameters=pipeline_parameters_dict, imports_and_functions=imports_and_functions) dependencies.assign_metrics(pipeline_graph, pipeline_metrics) # if there are multiple DAG leaves, add an empty step at the end of the # pipeline for final snapshot leaf_steps = graphutils.get_leaf_nodes(pipeline_graph) if self.pipeline_metadata.get("autosnapshot") and len(leaf_steps) > 1: auto_snapshot_name = 'final_auto_snapshot' # add a link from all the last steps of the pipeline to # the final auto snapshot one. for node in leaf_steps: pipeline_graph.add_edge(node, auto_snapshot_name) step_defaults = parser.parse_steps_defaults( self.pipeline_metadata.get("steps_defaults", [])) data = { auto_snapshot_name: { "source": "", "ins": [], "outs": [], "annotations": step_defaults.get("annotations"), "labels": step_defaults.get("labels"), "limits": step_defaults.get("limits") } } nx.set_node_attributes(pipeline_graph, data) # TODO: Additional Step required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return pipeline_graph, pipeline_parameters_dict
def get_pipeline_parameters(request, source_notebook_path): """Get the pipeline parameters tagged in the notebook.""" # read notebook log = request.log if hasattr(request, "log") else logger try: notebook = nbformat.read(source_notebook_path, as_version=nbformat.NO_CONVERT) params_source = parser.get_pipeline_parameters_source(notebook) if params_source == '': raise ValueError("No pipeline parameters found. Please tag a cell" " of the notebook with the `pipeline-parameters`" " tag.") # get a dict from the 'pipeline parameters' cell source code params_dict = ast.parse_assignments_expressions(params_source) except ValueError as e: log.exception("Value Error during parsing of pipeline parameters") raise RPCInternalError(details=str(e), trans_id=request.trans_id) # convert dict in list so its easier to parse in js params = [[k, *v] for k, v in params_dict.items()] log.info("Pipeline parameters:") for ln in tabulate(params, headers=["name", "type", "value"]).split("\n"): log.info(ln) return params
def notebook_to_graph(self): """Convert an annotated Notebook to a Graph.""" # convert notebook to nx graph (pipeline_graph, pipeline_parameters_source, pipeline_metrics_source, imports_and_functions) = parser.parse_notebook(self.notebook) # get a dict from the 'pipeline parameters' cell source code pipeline_parameters_dict = ast.parse_assignments_expressions( pipeline_parameters_source) # get a list of variables that need to be logged as pipeline metrics pipeline_metrics = ast.parse_metrics_print_statements( pipeline_metrics_source) # run static analysis over the source code dependencies.dependencies_detection( pipeline_graph, pipeline_parameters=pipeline_parameters_dict, imports_and_functions=imports_and_functions) dependencies.assign_metrics(pipeline_graph, pipeline_metrics) # add an empty step at the end of the pipeline for final snapshot if self.auto_snapshot: auto_snapshot_name = 'final_auto_snapshot' # add a link from all the last steps of the pipeline to # the final auto snapshot one. leaf_steps = graph_utils.get_leaf_nodes(pipeline_graph) for node in leaf_steps: pipeline_graph.add_edge(node, auto_snapshot_name) data = {auto_snapshot_name: {'source': '', 'ins': [], 'outs': []}} nx.set_node_attributes(pipeline_graph, data) # TODO: Additional Step required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return pipeline_graph, pipeline_parameters_dict
def notebook_to_graph(self): # convert notebook to nx graph (pipeline_graph, pipeline_parameters_source, pipeline_metrics_source) = parser.parse_notebook(self.notebook) # get a dict from the 'pipeline parameters' cell source code pipeline_parameters_dict = ast.parse_assignments_expressions( pipeline_parameters_source) # get a list of variables that need to be logged as pipeline metrics pipeline_metrics = ast.parse_metrics_print_statements( pipeline_metrics_source) # if there are some pipeline metrics, create an additional step at the # end of the pipeline to log them. # By adding this step before dependencies detection, we make sure that # the necessary variables are marshalled at the beginning of the step. if len(pipeline_metrics): pipeline_metrics_name = "pipeline_metrics" # add a link from all the last steps of the pipeline to # the final auto snapshot one. leaf_steps = [ x for x in pipeline_graph.nodes() if pipeline_graph.out_degree(x) == 0 ] for node in leaf_steps: pipeline_graph.add_edge(node, pipeline_metrics_name) # generate the code that dumps the pipeline metrics to file template_env = _initialize_templating_env() metrics_template = template_env.get_template( 'pipeline_metrics_template.jinja2') # need to be a list since it will be treated as a code cell and # passed to the ipykernel metrics_source = [ metrics_template.render(pipeline_metrics=pipeline_metrics) ] data = { pipeline_metrics_name: { 'source': metrics_source, 'ins': [], 'outs': [] } } nx.set_node_attributes(pipeline_graph, data) # run static analysis over the source code dependencies.dependencies_detection( pipeline_graph, pipeline_parameters=pipeline_parameters_dict) # add an empty step at the end of the pipeline for final snapshot if self.auto_snapshot: auto_snapshot_name = 'final_auto_snapshot' # add a link from all the last steps of the pipeline to # the final auto snapshot one. leaf_steps = [ x for x in pipeline_graph.nodes() if pipeline_graph.out_degree(x) == 0 ] for node in leaf_steps: pipeline_graph.add_edge(node, auto_snapshot_name) data = {auto_snapshot_name: {'source': '', 'ins': [], 'outs': []}} nx.set_node_attributes(pipeline_graph, data) # TODO: Additional Step required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return pipeline_graph, pipeline_parameters_dict
def test_parse_assignments_expressions_exc(code): """Test parse_assignments_expressions function.""" with pytest.raises(ValueError): kale_ast.parse_assignments_expressions(code)
def test_parse_assignments_expressions(code, target): """Test parse_assignments_expressions function.""" res = kale_ast.parse_assignments_expressions(code) compare(res, target)
def parse_pipeline_parameters(self, source: str): """Get pipeline parameters from source code.""" pipeline_parameters = ast.parse_assignments_expressions(source) for name, (v_type, v_value) in pipeline_parameters.items(): pipeline_parameters[name] = ast.PipelineParam(v_type, v_value) self.pipeline.pipeline_parameters = pipeline_parameters