Ejemplo n.º 1
0
    def _post_init_validation(self, value):
        """
        Validate notebook after initialization (run pyflakes to detect
        syntax errors)
        """
        # NOTE: what happens if I pass source code with errors to parso?
        # maybe we don't need to use pyflakes after all
        # we can also use compile. can pyflakes detect things that
        # compile cannot?
        params_cell, _ = find_cell_with_tag(self._nb_obj_unrendered,
                                            'parameters')

        if params_cell is None:
            loc = ' "{}"'.format(self.loc) if self.loc else ''
            msg = ('Notebook{} does not have a cell tagged '
                   '"parameters"'.format(loc))

            if self.loc and Path(self.loc).suffix == '.py':
                msg += """.
Add a cell at the top like this:

# + tags=["parameters"]
# your code here...
# -
"""
            if self.loc and Path(self.loc).suffix == '.ipynb':
                url = ('https://papermill.readthedocs.io/'
                       'en/stable/usage-parameterize.html')
                msg += ('. Add a cell at the top and tag it as "parameters". '
                        f'Click here for instructions: {url}')

            raise SourceInitializationError(msg)
Ejemplo n.º 2
0
    def _post_init_validation(self, value):
        """
        Validate notebook after initialization (run pyflakes to detect
        syntax errors)
        """
        # NOTE: what happens if I pass source code with errors to parso?
        # maybe we don't need to use pyflakes after all
        # we can also use compile. can pyflakes detect things that
        # compile cannot?
        params_cell, _ = find_cell_with_tag(self._nb_obj_unrendered,
                                            'parameters')

        if params_cell is None:
            loc = ' "{}"'.format(self.loc) if self.loc else ''
            msg = ('Notebook{} does not have a cell tagged '
                   '"parameters"'.format(loc))

            if self.loc and Path(self.loc).suffix == '.py':
                msg += """.
Add a cell at the top like this:

# + tags=["parameters"]
upstream = None
product = None
# -

Go to: https://ploomber.io/s/params for more information
"""
            if self.loc and Path(self.loc).suffix == '.ipynb':
                msg += ('. Add a cell at the top and tag it as "parameters". '
                        'Go to the next URL for '
                        'details: https://ploomber.io/s/params')

            raise SourceInitializationError(msg)
Ejemplo n.º 3
0
def check_notebook(nb, params, filename):
    """
    Perform static analysis on a Jupyter notebook code cell sources

    Parameters
    ----------
    nb : NotebookNode
        Notebook object. Must have a cell with the tag "parameters"

    params : dict
        Parameter that will be added to the notebook source

    filename : str
        Filename to identify pyflakes warnings and errors

    Raises
    ------
    SyntaxError
        If the notebook's code contains syntax errors

    TypeError
        If params and nb do not match (unexpected or missing parameters)

    RenderError
        When certain pyflakes errors are detected (e.g., undefined name)
    """
    params_cell, _ = find_cell_with_tag(nb, 'parameters')
    check_source(nb)
    check_params(params, params_cell['source'], filename)
Ejemplo n.º 4
0
def test_find_cell_with_tag(nb, tag, source, index):
    nb_ = jupytext.reads(nb)

    cell, index_found = find_cell_with_tag(nb_, tag)

    if source:
        assert cell['source'] == source
    else:
        assert cell is None

    assert index_found == index
Ejemplo n.º 5
0
def _cleanup_rendered_nb(nb):
    cell, i = find_cell_with_tag(nb, 'injected-parameters')

    if i is not None:
        print('Removing injected-parameters cell...')
        nb['cells'].pop(i)

    cell, i = find_cell_with_tag(nb, 'debugging-settings')

    if i is not None:
        print('Removing debugging-settings cell...')
        nb['cells'].pop(i)

    # papermill adds "tags" to all cells that don't have them, remove them
    # if they are empty to avoid cluttering the script
    for cell in nb['cells']:
        if 'tags' in cell.get('metadata', {}):
            if not len(cell['metadata']['tags']):
                del cell['metadata']['tags']

    return nb
Ejemplo n.º 6
0
def check_notebook(nb, params, filename):
    """
    Perform static analysis on a Jupyter notebook code cell sources

    Parameters
    ----------
    nb_source : str
        Jupyter notebook source code in jupytext's py format,
        must have a cell with the tag "parameters"

    params : dict
        Parameter that will be added to the notebook source

    filename : str
        Filename to identify pyflakes warnings and errors

    Raises
    ------
    RenderError
        If the notebook does not have a cell with the tag 'parameters',
        if the parameters in the notebook do not match the passed params or
        if pyflakes validation fails
    """
    # variable to collect all error messages
    error_message = '\n'

    params_cell, _ = find_cell_with_tag(nb, 'parameters')

    # compare passed parameters with declared
    # parameters. This will make our notebook behave more
    # like a "function", if any parameter is passed but not
    # declared, this will return an error message, if any parameter
    # is declared but not passed, a warning is shown
    res_params = compare_params(params_cell['source'], params)
    error_message += res_params

    # run pyflakes and collect errors
    res = check_source(nb, filename=filename)

    # pyflakes returns "warnings" and "errors", collect them separately
    if res['warnings']:
        error_message += 'pyflakes warnings:\n' + res['warnings']

    if res['errors']:
        error_message += 'pyflakes errors:\n' + res['errors']

    # if any errors were returned, raise an exception
    if error_message != '\n':
        raise RenderError(error_message)

    return True
Ejemplo n.º 7
0
    def overwrite(self, obj):
        """
        Overwrite the function's body with the notebook contents, excluding
        injected parameters and cells whose first line is "#". obj can be
        either a notebook object or a path
        """
        self._reload_fn()

        if isinstance(obj, (str, Path)):
            nb = nbformat.read(obj, as_version=nbformat.NO_CONVERT)
        else:
            nb = obj

        nb.cells = nb.cells[:last_non_empty_cell(nb.cells)]

        # remove cells that are only needed for the nb but not for the function
        code_cells = [c['source'] for c in nb.cells if keep_cell(c)]

        # add 4 spaces to each code cell, exclude white space lines
        code_cells = [indent_cell(code) for code in code_cells]

        # get the original file where the function is defined
        content = self.path_to_source.read_text()
        content_lines = content.splitlines()
        trailing_newline = content[-1] == '\n'

        # an upstream parameter
        fn_starts, fn_ends = function_lines(self.fn)

        # keep the file the same until you reach the function definition plus
        # an offset to account for the signature (which might span >1 line)
        _, body_start = parse_function(self.fn)
        keep_until = fn_starts + body_start
        header = content_lines[:keep_until]

        # the footer is everything below the end of the original definition
        footer = content_lines[fn_ends:]

        # if there is anything at the end, we have to add an empty line to
        # properly end the function definition, if this is the last definition
        # in the file, we don't have to add this
        if footer:
            footer = [''] + footer

        new_content = '\n'.join(header + code_cells + footer)

        # replace old top imports with new ones
        new_content_lines = new_content.splitlines()
        _, line = extract_imports_top(parso.parse(new_content),
                                      new_content_lines)
        imports_top_cell, _ = find_cell_with_tag(nb, 'imports-top')

        # ignore trailing whitespace in top imports cell but keep original
        # amount of whitespace separating the last import and the first name
        # definition
        content_to_write = (imports_top_cell['source'].rstrip() + '\n' +
                            '\n'.join(new_content_lines[line - 1:]))

        # if the original file had a trailing newline, keep it
        if trailing_newline:
            content_to_write += '\n'

        # NOTE: this last part parses the code several times, we can improve
        # performance by only parsing once
        m = parso.parse(content_to_write)
        fn_def = find_function_with_name(m, self.fn.__name__)
        fn_code = fn_def.get_code()

        has_upstream_dependencies = PythonCallableExtractor(
            fn_code).extract_upstream()
        upstream_in_func_sig = upstream_in_func_signature(fn_code)

        if not upstream_in_func_sig and has_upstream_dependencies:
            fn_code_new = add_upstream_to_func_signature(fn_code)
            content_to_write = _replace_fn_source(content_to_write, fn_def,
                                                  fn_code_new)

        elif upstream_in_func_sig and not has_upstream_dependencies:
            fn_code_new = remove_upstream_to_func_signature(fn_code)
            content_to_write = _replace_fn_source(content_to_write, fn_def,
                                                  fn_code_new)

        self.path_to_source.write_text(content_to_write)
Ejemplo n.º 8
0
 def _get_parameters_cell(self):
     self._read_nb_str_unrendered()
     cell, _ = find_cell_with_tag(self._nb_obj_unrendered, tag='parameters')
     return cell.source