Example #1
0
def detect_in_dependencies(nb_graph: nx.DiGraph,
                           pipeline_parameters: dict = None):
    """Detect missing names from the code blocks in the graph.

    Args:
        nb_graph: nx DiGraph with pipeline code blocks
        pipeline_parameters: Pipeline parameters dict
    """
    block_names = nb_graph.nodes()
    for block in block_names:
        source_code = '\n'.join(nb_graph.nodes(data=True)[block]['source'])
        commented_source_code = utils.comment_magic_commands(source_code)
        ins = pyflakes_report(code=commented_source_code)

        # Pipeline parameters will be part of the names that are missing,
        # but of course we don't want to marshal them in as they will be
        # present as parameters
        relevant_parameters = set()
        if pipeline_parameters:
            # Not all pipeline parameters are needed in every pipeline step,
            # these are the parameters that are actually needed by this step.
            relevant_parameters = ins.intersection(pipeline_parameters.keys())
            ins.difference_update(relevant_parameters)
        step_params = {k: pipeline_parameters[k] for k in relevant_parameters}
        nx.set_node_attributes(
            nb_graph, {block: {
                'ins': sorted(ins),
                'parameters': step_params
            }})
Example #2
0
def detect_in_dependencies(source_code: str, pipeline_parameters: dict = None):
    """Detect missing names from one pipeline step source code.

    Args:
        source_code: Multiline Python source code
        pipeline_parameters: Pipeline parameters dict
    """
    commented_source_code = utils.comment_magic_commands(source_code)
    ins = pyflakes_report(code=commented_source_code)

    # Pipeline parameters will be part of the names that are missing,
    # but of course we don't want to marshal them in as they will be
    # present as parameters
    relevant_parameters = set()
    if pipeline_parameters:
        # Not all pipeline parameters are needed in every pipeline step,
        # these are the parameters that are actually needed by this step.
        relevant_parameters = ins.intersection(pipeline_parameters.keys())
        ins.difference_update(relevant_parameters)
    step_params = {k: pipeline_parameters[k] for k in relevant_parameters}
    return ins, step_params
Example #3
0
def test_comment_magic_commands():
    """Test the magic utils properly comments a multiline code block."""
    code = '''
%%a magic cell command
some code
%matplotlib inline
%consecutive command
some other code
some other code
%another command
some other code
    '''

    target = '''
#%%a magic cell command
some code
#%matplotlib inline
#%consecutive command
some other code
some other code
#%another command
some other code
    '''
    assert utils.comment_magic_commands(code) == target.strip()
Example #4
0
def get_all_names(code):
    """Get all matching nodes from the ast of the input code block.

    Matching nodes:

        - ast.Name
        - ast.FunctionDef
        - ast.ClassDef
        - ast.Import
        - ast.ImportFrom
        - ast.Tuple

    This function is just used to make a cross reference with the missing names
    detected by the Flakes report. It is not used to arbitrary detect variable
    dependencies.

    Known missing detections:

        - Function and Class parameters

    Args:
        code: multiple string representing Python code

    Returns: a list of string names
    """
    names = set()

    # Comment IPython magic commands.
    # Note #1: This is needed to correctly parse the code using AST, as it does
    #  not understand IPython magic commands.
    # Note #2: This will comment out both in-line magics and cell magics. This
    #  can lead to potential errors in case a cell magic like `%%capture out`
    #  is used. In that case, Kale would detect as missing the `out` variable
    #  declared by the magic command and will try to marshal it in at the
    #  beginning of the pipeline step. These cases should be very rare, and
    #  will be handled case by case as specific issues arise.
    # Note #3: Magic commands are preserved in the resulting Python executable,
    #  they are commented just here in order to make AST run.
    commented_code = utils.comment_magic_commands(code)

    tree = ast.parse(commented_code)
    for block in tree.body:
        for node in walk(block):
            if isinstance(node, (ast.Name, )):
                names.add(node.id)
            if isinstance(node, (
                    ast.FunctionDef,
                    ast.ClassDef,
            )):
                names.add(node.name)
            if isinstance(node, (
                    ast.Import,
                    ast.ImportFrom,
            )):
                for _n in node.names:
                    if _n.asname is None:
                        names.add(_n.name)
                    else:
                        names.add(_n.asname)
            if isinstance(node, (ast.Tuple, ast.List)):
                names.update(get_list_tuple_names(node))
    return names