예제 #1
0
파일: dependencies.py 프로젝트: sylus/kale
def assign_metrics(nb_graph: nx.DiGraph, pipeline_metrics: dict):
    """Assign pipeline metrics to specific pipeline steps.

    This assignment follows a similar logic to the detection of `out`
    dependencies. Starting from a temporary step - child of all the leaf nodes,
    all the nodes in the pipelines are traversed in reversed topological order.
    When a step shows one of the metrics as part of its code, then that metric
    is assigned to the step.

    Args:
        nb_graph: nx DiGraph with pipeline code blocks
        pipeline_metrics (dict): a dict of pipeline metrics where the key is
            the KFP sanitized name and the value the name of the original
            variable.
    """
    # create a temporary step at the end of the pipeline to simplify the
    # iteration from the leaf steps
    tmp_step = "_tmp"
    leaf_steps = graphutils.get_leaf_nodes(nb_graph)
    if not leaf_steps:
        return
    [nb_graph.add_edge(node, tmp_step) for node in leaf_steps]

    # pipeline_metrics is a dict having sanitized variable names as keys and
    # the corresponding variable names as values. Here we need to refer to
    # the sanitized names using the python variables.
    # XXX: We could change parse_metrics_print_statements() to return the
    # XXX: reverse dictionary, but that would require changing either
    # XXX: rpc.nb.get_pipeline_metrics() or change in the JupyterLab Extension
    # XXX: parsing of the RPC result
    rev_pipeline_metrics = {v: k for k, v in pipeline_metrics.items()}
    metrics_left = set(rev_pipeline_metrics.keys())
    for anc in graphutils.get_ordered_ancestors(nb_graph, tmp_step):
        if not metrics_left:
            break

        anc_data = nb_graph.nodes(data=True)[anc]
        anc_source = '\n'.join(anc_data['source'])
        # get all the marshal candidates from father's source and intersect
        # with the metrics that have not been matched yet
        marshal_candidates = kale_ast.get_marshal_candidates(anc_source)
        assigned_metrics = metrics_left.intersection(marshal_candidates)
        # Remove the metrics that have already been assigned.
        metrics_left.difference_update(assigned_metrics)
        # Generate code to produce the metrics artifact in the current step
        if assigned_metrics:
            code = METRICS_TEMPLATE % ("    " + ",\n    ".join([
                '"%s": %s' % (rev_pipeline_metrics[x], x)
                for x in sorted(assigned_metrics)
            ]))
            anc_data['source'].append(code)
        # need to have a `metrics` flag set to true in order to set the
        # metrics output artifact in the pipeline template
        nx.set_node_attributes(nb_graph, {anc: {'metrics': True}})

    nb_graph.remove_node(tmp_step)
예제 #2
0
    def notebook_to_graph(self):
        """Convert an annotated Notebook to a Graph."""
        # convert notebook to nx graph
        (pipeline_graph, pipeline_parameters_source, pipeline_metrics_source,
         imports_and_functions) = parser.parse_notebook(
             self.notebook, self.pipeline_metadata)

        # get a dict from the 'pipeline parameters' cell source code
        pipeline_parameters_dict = ast.parse_assignments_expressions(
            pipeline_parameters_source)

        # get a list of variables that need to be logged as pipeline metrics
        pipeline_metrics = ast.parse_metrics_print_statements(
            pipeline_metrics_source)

        # run static analysis over the source code
        dependencies.dependencies_detection(
            pipeline_graph,
            pipeline_parameters=pipeline_parameters_dict,
            imports_and_functions=imports_and_functions)
        dependencies.assign_metrics(pipeline_graph, pipeline_metrics)

        # if there are multiple DAG leaves, add an empty step at the end of the
        # pipeline for final snapshot
        leaf_steps = graphutils.get_leaf_nodes(pipeline_graph)
        if self.pipeline_metadata.get("autosnapshot") and len(leaf_steps) > 1:
            auto_snapshot_name = 'final_auto_snapshot'
            # add a link from all the last steps of the pipeline to
            # the final auto snapshot one.
            for node in leaf_steps:
                pipeline_graph.add_edge(node, auto_snapshot_name)
            step_defaults = parser.parse_steps_defaults(
                self.pipeline_metadata.get("steps_defaults", []))
            data = {
                auto_snapshot_name: {
                    "source": "",
                    "ins": [],
                    "outs": [],
                    "annotations": step_defaults.get("annotations"),
                    "labels": step_defaults.get("labels"),
                    "limits": step_defaults.get("limits")
                }
            }
            nx.set_node_attributes(pipeline_graph, data)

        # TODO: Additional Step required:
        #  Run a static analysis over every step to check that pipeline
        #  parameters are not assigned with new values.
        return pipeline_graph, pipeline_parameters_dict
예제 #3
0
파일: generate_code.py 프로젝트: suzil/kale
def generate_pipeline(template, nb_graph, step_names, lightweight_components,
                      metadata):
    """Use the pipeline template to generate Python code."""
    # All the Pipeline steps that do not have children
    leaf_steps = graphutils.get_leaf_nodes(nb_graph)

    # create a dict with step names and their parameters
    all_step_parameters = {
        step:
        sorted(nb_graph.nodes(data=True)[step].get('parameters', {}).keys())
        for step in step_names
    }

    pipeline_code = template.render(
        nb_graph=nb_graph,
        lightweight_components=lightweight_components,
        step_names=step_names,
        step_prevs=pipeline_dependencies_tasks(nb_graph),
        leaf_steps=leaf_steps,
        all_step_parameters=all_step_parameters,
        **metadata)
    # fix code style using pep8 guidelines
    return autopep8.fix_code(pipeline_code)