Beispiel #1
0
def _prepare_transformer_assets(fn: Callable, assets: Dict = None):
    notebook_path = jputils.get_notebook_path()
    processor = NotebookProcessor(nb_path=notebook_path, skip_validation=True)
    fn_source = astutils.get_function_source(fn, strip_signature=False)
    missing_names = flakeutils.pyflakes_report(
        processor.get_imports_and_functions() + "\n" + fn_source)
    if not assets:
        assets = dict()
    if not isinstance(assets, dict):
        ValueError("Please provide preprocessing assets as a dictionary"
                   " mapping variables *names* to their objects")
    missing_assets = [x not in assets.keys() for x in missing_names]
    if any(missing_assets):
        raise RuntimeError(
            "The following abjects are a dependency for the"
            " provided preprocessing function. Please add the"
            " to the `preprocessing_assets` dictionary: %s" %
            [a for a, m in zip(missing_names, missing_assets) if m])
    # save function and assets
    utils.clean_dir(TRANSFORMER_ASSETS_DIR)
    marshal.set_data_dir(TRANSFORMER_ASSETS_DIR)
    marshal.save(fn, TRANSFORMER_FN_ASSET_NAME)
    for asset_name, asset_value in assets.items():
        marshal.save(asset_value, asset_name)
    # save notebook as well
    shutil.copy(
        notebook_path,
        os.path.join(TRANSFORMER_ASSETS_DIR, TRANSFORMER_SRC_NOTEBOOK_NAME))
Beispiel #2
0
    def _detect_fns_free_variables(self,
                                   source_code: str,
                                   imports_and_functions: str = "",
                                   step_parameters: dict = None):
        """Return the function's free variables.

        Free variable: _If a variable is used in a code block but not defined
        there, it is a free variable._

        An Example:

        ```
        x = 5
        def foo():
            print(x)
        ```

        In the example above, `x` is a free variable for function `foo`,
        because it is defined outside of the context of `foo`.

        Here we run the PyFlakes report over the function body to get all the
        missing names (i.e. free variables), excluding the function arguments.

        Args:
            source_code: Multiline Python source code
            imports_and_functions: Multiline Python source that is prepended
                to every pipeline step. It should contain the code cells that
                where tagged as `import` and `functions`. We prepend this code
                to the function body because it will always be present in any
                pipeline step.
            step_parameters: Step parameters names. The step parameters
                are removed from the pyflakes report, as these names will
                always be available in the step's context.

        Returns (dict): A dictionary with the name of the function as key and
            a list of variables names + consumed pipeline parameters as values.
        """
        fns_free_vars = dict()
        # now check the functions' bodies for free variables. fns is a
        # dict function_name -> function_source
        fns = astutils.parse_functions(source_code)
        for fn_name, fn in fns.items():
            code = imports_and_functions + "\n" + fn
            free_vars = flakeutils.pyflakes_report(code=code)
            # the pipeline parameters that are used in the function
            consumed_params = {}
            if step_parameters:
                consumed_params = free_vars.intersection(
                    step_parameters.keys())
                # remove the used parameters form the free variables, as they
                # need to be handled differently.
                free_vars.difference_update(consumed_params)
            fns_free_vars[fn_name] = (free_vars, consumed_params)
        return fns_free_vars
Beispiel #3
0
    def _detect_in_dependencies(self,
                                source_code: str,
                                pipeline_parameters: dict = None):
        """Detect missing names from one pipeline step source code.

        Args:
            source_code: Multiline Python source code
            pipeline_parameters: Pipeline parameters dict
        """
        commented_source_code = utils.comment_magic_commands(source_code)
        ins = flakeutils.pyflakes_report(code=commented_source_code)

        # Pipeline parameters will be part of the names that are missing,
        # but of course we don't want to marshal them in as they will be
        # present as parameters
        relevant_parameters = set()
        if pipeline_parameters:
            # Not all pipeline parameters are needed in every pipeline step,
            # these are the parameters that are actually needed by this step.
            relevant_parameters = ins.intersection(pipeline_parameters.keys())
            ins.difference_update(relevant_parameters)
        step_params = {k: pipeline_parameters[k] for k in relevant_parameters}
        return ins, step_params