def _prepare_nb_for_cache(self, nb: nbf.NotebookNode, deepcopy=False): """Prepare in-place, we remove non-code cells. """ if deepcopy: nb = copy.deepcopy(nb) nb.cells = [cell for cell in nb.cells if cell.cell_type == "code"] return nb
def _create_hashable_nb( self, nb: nbf.NotebookNode, compare_nb_meta=("kernelspec", ), compare_cell_meta=None, ): """Create a notebook containing only content desired for hashing.""" nb = copy.deepcopy(nb) nb.metadata = nbf.from_dict({ k: v for k, v in nb.metadata.items() if compare_nb_meta is None or (k in compare_nb_meta) }) diff_cells = [] for cell in nb.cells: if cell.cell_type != "code": continue diff_cell = nbf.from_dict({ "cell_type": cell.cell_type, "source": cell.source, "metadata": { k: v for k, v in cell.metadata.items() if compare_cell_meta is None or (k in compare_cell_meta) }, "execution_count": None, "outputs": [], }) diff_cells.append(diff_cell) nb.cells = diff_cells return nb
def create_hashed_notebook( self, nb: nbf.NotebookNode, nb_metadata: Optional[Iterable[str]] = ("kernelspec", ), cell_metadata: Optional[Iterable[str]] = None, ) -> Tuple[nbf.NotebookNode, str]: """Convert a notebook to a standard format and hash. Note: we always hash notebooks as version 4.4, to allow for matching notebooks of different versions :param nb_metadata: The notebook metadata keys to hash (if None, use all) :param cell_metadata: The cell metadata keys to hash (if None, use all) :return: (notebook, hash) """ # copy the notebook nb = copy.deepcopy(nb) # update the notebook to consistent version 4.4 nb = nbf.convert(nb, to_version=NB_VERSION) if nb.nbformat_minor > 5: raise CachingError( "notebook version greater than 4.5 not yet supported") # remove non-code cells nb.cells = [cell for cell in nb.cells if cell.cell_type == "code"] # create notebook for hashing, with selected metadata hash_nb = nbf.from_dict({ "nbformat": nb.nbformat, "nbformat_minor": 4, # v4.5 include cell ids, which we do not cache "metadata": { k: v for k, v in nb.metadata.items() if nb_metadata is None or (k in nb_metadata) }, "cells": [{ "cell_type": cell.cell_type, "source": cell.source, "metadata": { k: v for k, v in cell.metadata.items() if cell_metadata is None or (k in cell_metadata) }, "execution_count": None, "outputs": [], } for cell in nb.cells if cell.cell_type == "code"], }) # hash notebook string = nbf.writes(hash_nb, nbf.NO_CONVERT) hash_string = hashlib.md5(string.encode()).hexdigest() return (nb, hash_string)