def detect_in_dependencies(nb_graph: nx.DiGraph, pipeline_parameters: dict = None): """Detect missing names from the code blocks in the graph. Args: nb_graph: nx DiGraph with pipeline code blocks pipeline_parameters: Pipeline parameters dict """ block_names = nb_graph.nodes() for block in block_names: source_code = '\n'.join(nb_graph.nodes(data=True)[block]['source']) commented_source_code = utils.comment_magic_commands(source_code) ins = pyflakes_report(code=commented_source_code) # Pipeline parameters will be part of the names that are missing, # but of course we don't want to marshal them in as they will be # present as parameters relevant_parameters = set() if pipeline_parameters: # Not all pipeline parameters are needed in every pipeline step, # these are the parameters that are actually needed by this step. relevant_parameters = ins.intersection(pipeline_parameters.keys()) ins.difference_update(relevant_parameters) step_params = {k: pipeline_parameters[k] for k in relevant_parameters} nx.set_node_attributes( nb_graph, {block: { 'ins': sorted(ins), 'parameters': step_params }})
def detect_in_dependencies(source_code: str, pipeline_parameters: dict = None): """Detect missing names from one pipeline step source code. Args: source_code: Multiline Python source code pipeline_parameters: Pipeline parameters dict """ commented_source_code = utils.comment_magic_commands(source_code) ins = pyflakes_report(code=commented_source_code) # Pipeline parameters will be part of the names that are missing, # but of course we don't want to marshal them in as they will be # present as parameters relevant_parameters = set() if pipeline_parameters: # Not all pipeline parameters are needed in every pipeline step, # these are the parameters that are actually needed by this step. relevant_parameters = ins.intersection(pipeline_parameters.keys()) ins.difference_update(relevant_parameters) step_params = {k: pipeline_parameters[k] for k in relevant_parameters} return ins, step_params
def test_comment_magic_commands(): """Test the magic utils properly comments a multiline code block.""" code = ''' %%a magic cell command some code %matplotlib inline %consecutive command some other code some other code %another command some other code ''' target = ''' #%%a magic cell command some code #%matplotlib inline #%consecutive command some other code some other code #%another command some other code ''' assert utils.comment_magic_commands(code) == target.strip()
def get_all_names(code): """Get all matching nodes from the ast of the input code block. Matching nodes: - ast.Name - ast.FunctionDef - ast.ClassDef - ast.Import - ast.ImportFrom - ast.Tuple This function is just used to make a cross reference with the missing names detected by the Flakes report. It is not used to arbitrary detect variable dependencies. Known missing detections: - Function and Class parameters Args: code: multiple string representing Python code Returns: a list of string names """ names = set() # Comment IPython magic commands. # Note #1: This is needed to correctly parse the code using AST, as it does # not understand IPython magic commands. # Note #2: This will comment out both in-line magics and cell magics. This # can lead to potential errors in case a cell magic like `%%capture out` # is used. In that case, Kale would detect as missing the `out` variable # declared by the magic command and will try to marshal it in at the # beginning of the pipeline step. These cases should be very rare, and # will be handled case by case as specific issues arise. # Note #3: Magic commands are preserved in the resulting Python executable, # they are commented just here in order to make AST run. commented_code = utils.comment_magic_commands(code) tree = ast.parse(commented_code) for block in tree.body: for node in walk(block): if isinstance(node, (ast.Name, )): names.add(node.id) if isinstance(node, ( ast.FunctionDef, ast.ClassDef, )): names.add(node.name) if isinstance(node, ( ast.Import, ast.ImportFrom, )): for _n in node.names: if _n.asname is None: names.add(_n.name) else: names.add(_n.asname) if isinstance(node, (ast.Tuple, ast.List)): names.update(get_list_tuple_names(node)) return names