Example #1
0
    def notebook_to_graph(self):
        # convert notebook to nx graph
        (pipeline_graph, pipeline_parameters_source,
         pipeline_metrics_source) = parser.parse_notebook(self.notebook)

        # get a dict from the 'pipeline parameters' cell source code
        pipeline_parameters_dict = ast.parse_assignments_expressions(
            pipeline_parameters_source)

        # get a list of variables that need to be logged as pipeline metrics
        pipeline_metrics = ast.parse_metrics_print_statements(
            pipeline_metrics_source)

        # if there are some pipeline metrics, create an additional step at the
        # end of the pipeline to log them.
        # By adding this step before dependencies detection, we make sure that
        # the necessary variables are marshalled at the beginning of the step.
        if len(pipeline_metrics):
            pipeline_metrics_name = "pipeline_metrics"
            # add a link from all the last steps of the pipeline to
            # the final auto snapshot one.
            leaf_steps = [
                x for x in pipeline_graph.nodes()
                if pipeline_graph.out_degree(x) == 0
            ]
            for node in leaf_steps:
                pipeline_graph.add_edge(node, pipeline_metrics_name)
            # generate the code that dumps the pipeline metrics to file
            template_env = _initialize_templating_env()
            metrics_template = template_env.get_template(
                'pipeline_metrics_template.jinja2')
            # need to be a list since it will be treated as a code cell and
            # passed to the ipykernel
            metrics_source = [
                metrics_template.render(pipeline_metrics=pipeline_metrics)
            ]
            data = {
                pipeline_metrics_name: {
                    'source': metrics_source,
                    'ins': [],
                    'outs': []
                }
            }
            nx.set_node_attributes(pipeline_graph, data)

        # run static analysis over the source code
        dependencies.dependencies_detection(
            pipeline_graph, pipeline_parameters=pipeline_parameters_dict)

        # add an empty step at the end of the pipeline for final snapshot
        if self.auto_snapshot:
            auto_snapshot_name = 'final_auto_snapshot'
            # add a link from all the last steps of the pipeline to
            # the final auto snapshot one.
            leaf_steps = [
                x for x in pipeline_graph.nodes()
                if pipeline_graph.out_degree(x) == 0
            ]
            for node in leaf_steps:
                pipeline_graph.add_edge(node, auto_snapshot_name)
            data = {auto_snapshot_name: {'source': '', 'ins': [], 'outs': []}}
            nx.set_node_attributes(pipeline_graph, data)

        # TODO: Additional Step required:
        #  Run a static analysis over every step to check that pipeline
        #  parameters are not assigned with new values.
        return pipeline_graph, pipeline_parameters_dict
Example #2
0
def template():
    """Reusable function template."""
    tmpl_dir = os.path.join(THIS_DIR, '../../templates')
    env = generate_code._initialize_templating_env(tmpl_dir)
    return env.get_template('function_template.jinja2')