def notebook_to_graph(self): # convert notebook to nx graph (pipeline_graph, pipeline_parameters_source, pipeline_metrics_source) = parser.parse_notebook(self.notebook) # get a dict from the 'pipeline parameters' cell source code pipeline_parameters_dict = ast.parse_assignments_expressions( pipeline_parameters_source) # get a list of variables that need to be logged as pipeline metrics pipeline_metrics = ast.parse_metrics_print_statements( pipeline_metrics_source) # if there are some pipeline metrics, create an additional step at the # end of the pipeline to log them. # By adding this step before dependencies detection, we make sure that # the necessary variables are marshalled at the beginning of the step. if len(pipeline_metrics): pipeline_metrics_name = "pipeline_metrics" # add a link from all the last steps of the pipeline to # the final auto snapshot one. leaf_steps = [ x for x in pipeline_graph.nodes() if pipeline_graph.out_degree(x) == 0 ] for node in leaf_steps: pipeline_graph.add_edge(node, pipeline_metrics_name) # generate the code that dumps the pipeline metrics to file template_env = _initialize_templating_env() metrics_template = template_env.get_template( 'pipeline_metrics_template.jinja2') # need to be a list since it will be treated as a code cell and # passed to the ipykernel metrics_source = [ metrics_template.render(pipeline_metrics=pipeline_metrics) ] data = { pipeline_metrics_name: { 'source': metrics_source, 'ins': [], 'outs': [] } } nx.set_node_attributes(pipeline_graph, data) # run static analysis over the source code dependencies.dependencies_detection( pipeline_graph, pipeline_parameters=pipeline_parameters_dict) # add an empty step at the end of the pipeline for final snapshot if self.auto_snapshot: auto_snapshot_name = 'final_auto_snapshot' # add a link from all the last steps of the pipeline to # the final auto snapshot one. leaf_steps = [ x for x in pipeline_graph.nodes() if pipeline_graph.out_degree(x) == 0 ] for node in leaf_steps: pipeline_graph.add_edge(node, auto_snapshot_name) data = {auto_snapshot_name: {'source': '', 'ins': [], 'outs': []}} nx.set_node_attributes(pipeline_graph, data) # TODO: Additional Step required: # Run a static analysis over every step to check that pipeline # parameters are not assigned with new values. return pipeline_graph, pipeline_parameters_dict
def template(): """Reusable function template.""" tmpl_dir = os.path.join(THIS_DIR, '../../templates') env = generate_code._initialize_templating_env(tmpl_dir) return env.get_template('function_template.jinja2')