def _prepare_transformer_assets(fn: Callable, assets: Dict = None): notebook_path = jputils.get_notebook_path() processor = NotebookProcessor(nb_path=notebook_path, skip_validation=True) fn_source = astutils.get_function_source(fn, strip_signature=False) missing_names = flakeutils.pyflakes_report( processor.get_imports_and_functions() + "\n" + fn_source) if not assets: assets = dict() if not isinstance(assets, dict): ValueError("Please provide preprocessing assets as a dictionary" " mapping variables *names* to their objects") missing_assets = [x not in assets.keys() for x in missing_names] if any(missing_assets): raise RuntimeError( "The following abjects are a dependency for the" " provided preprocessing function. Please add the" " to the `preprocessing_assets` dictionary: %s" % [a for a, m in zip(missing_names, missing_assets) if m]) # save function and assets utils.clean_dir(TRANSFORMER_ASSETS_DIR) marshal.set_data_dir(TRANSFORMER_ASSETS_DIR) marshal.save(fn, TRANSFORMER_FN_ASSET_NAME) for asset_name, asset_value in assets.items(): marshal.save(asset_value, asset_name) # save notebook as well shutil.copy( notebook_path, os.path.join(TRANSFORMER_ASSETS_DIR, TRANSFORMER_SRC_NOTEBOOK_NAME))
def _detect_fns_free_variables(self, source_code: str, imports_and_functions: str = "", step_parameters: dict = None): """Return the function's free variables. Free variable: _If a variable is used in a code block but not defined there, it is a free variable._ An Example: ``` x = 5 def foo(): print(x) ``` In the example above, `x` is a free variable for function `foo`, because it is defined outside of the context of `foo`. Here we run the PyFlakes report over the function body to get all the missing names (i.e. free variables), excluding the function arguments. Args: source_code: Multiline Python source code imports_and_functions: Multiline Python source that is prepended to every pipeline step. It should contain the code cells that where tagged as `import` and `functions`. We prepend this code to the function body because it will always be present in any pipeline step. step_parameters: Step parameters names. The step parameters are removed from the pyflakes report, as these names will always be available in the step's context. Returns (dict): A dictionary with the name of the function as key and a list of variables names + consumed pipeline parameters as values. """ fns_free_vars = dict() # now check the functions' bodies for free variables. fns is a # dict function_name -> function_source fns = astutils.parse_functions(source_code) for fn_name, fn in fns.items(): code = imports_and_functions + "\n" + fn free_vars = flakeutils.pyflakes_report(code=code) # the pipeline parameters that are used in the function consumed_params = {} if step_parameters: consumed_params = free_vars.intersection( step_parameters.keys()) # remove the used parameters form the free variables, as they # need to be handled differently. free_vars.difference_update(consumed_params) fns_free_vars[fn_name] = (free_vars, consumed_params) return fns_free_vars
def _detect_in_dependencies(self, source_code: str, pipeline_parameters: dict = None): """Detect missing names from one pipeline step source code. Args: source_code: Multiline Python source code pipeline_parameters: Pipeline parameters dict """ commented_source_code = utils.comment_magic_commands(source_code) ins = flakeutils.pyflakes_report(code=commented_source_code) # Pipeline parameters will be part of the names that are missing, # but of course we don't want to marshal them in as they will be # present as parameters relevant_parameters = set() if pipeline_parameters: # Not all pipeline parameters are needed in every pipeline step, # these are the parameters that are actually needed by this step. relevant_parameters = ins.intersection(pipeline_parameters.keys()) ins.difference_update(relevant_parameters) step_params = {k: pipeline_parameters[k] for k in relevant_parameters} return ins, step_params