Example #1
0
def clean_cells(data: Dict[str, Any], nb_source: str,
                remove_outputs: bool) -> None:
    """Remove empty cells and clean code cells.

  Args:
    data: Object representing a parsed JSON notebook.
    nb_source: JSON string of entire notebook contents.
    remove_outputs: Boolean True to remove code cell outputs, False to keep.
  """
    # Clear leading and trailing newlines.
    for cell in data["cells"]:
        cell_source = cell["source"]
        while cell_source and cell_source[0] == "\n":
            cell_source.pop(0)
        while cell_source and cell_source[-1] == "\n":
            cell_source.pop()
        cell["source"] = cell_source

    # Remove empty cells.
    data["cells"] = [cell for cell in data["cells"] if any(cell["source"])]

    # Clean cell metadata.
    cell_count = 0
    for cell in data["cells"]:
        cell_count += 1
        cell_metadata = cell.get("metadata", {})
        if "id" not in cell_metadata:
            cell_metadata["id"] = notebook_utils.generate_cell_id(
                cell["source"], cell_count)
        notebook_utils.del_entries_except(cell_metadata,
                                          keep=["id", "cellView", "colab"])
        _clean_metadata_colab(cell_metadata, remove_outputs)

        cell["metadata"] = cell_metadata

    # The presence of this field indicates that ouputs are already saved.
    has_outputs = True if '"output_type"' in nb_source else False

    for cell in data["cells"]:
        if cell["cell_type"] == "code":
            _clean_code_cell(cell, remove_outputs)

    if has_outputs and remove_outputs:
        notebook_utils.warn("Removed the existing output cells.")
Example #2
0
def _clean_metadata_colab(cell_metadata: Dict[str, Any],
                          remove_outputs: bool) -> None:
    """Clean up a cell's `metadata.colab` field.

  Remove all `metadata.colab` contents except for `metadata.colab.resources`, if
  present. The Colab resources are used to embed data within the notebook and
  can be treated like output cells (kept unless explictly removed).

  Args:
    cell_metadata: object representing the parsed JSON metadata from a cell.
    remove_outputs: Boolean to clear cell output.
  """
    colab = cell_metadata.pop("colab", {})
    # If no outputs, just clear out `metadata.colab`.
    if remove_outputs:
        return

    # Clear around `resources` if not empty. Otherwise, clear out `metata.colab`.
    if colab.get("resources"):
        notebook_utils.del_entries_except(colab, keep=["resources"])
        cell_metadata["colab"] = colab
Example #3
0
def clean_root(data: Dict[str, Any], filepath: pathlib.Path) -> None:
    """Deletes extra top-level notebook fields and metadata.

  Jupyter format spec:
  https://nbformat.readthedocs.io/en/latest/format_description.html

  Args:
    data: object representing a parsed JSON notebook.
    filepath: String of notebook filepath passed to the command-line.
  """
    # These top-level fields are required:
    notebook_utils.del_entries_except(
        data, keep=["cells", "metadata", "nbformat_minor", "nbformat"])
    # All metadata is optional according to spec, but we use some of it.
    notebook_utils.del_entries_except(
        data["metadata"], keep=["accelerator", "colab", "kernelspec"])

    metadata = data.get("metadata", {})
    colab = metadata.get("colab", {})

    # Set top-level notebook defaults.
    data["nbformat"] = 4
    data["nbformat_minor"] = 0

    # Colab metadata
    notebook_utils.del_entries_except(
        colab, keep=["collapsed_sections", "name", "toc_visible"])
    colab["name"] = os.path.basename(filepath)
    colab["toc_visible"] = True
    metadata["colab"] = colab

    # Kernelspec metadata
    kernelspec = metadata.get("kernelspec", {})
    notebook_utils.del_entries_except(kernelspec,
                                      keep=["display_name", "name"])

    supported_kernels = {"python3": "Python 3", "swift": "Swift"}
    kernel_name = kernelspec.get("name")
    if kernel_name not in supported_kernels:
        kernel_name = "python3"  # Notebook defaults to Python3 (same as Colab).

    kernelspec["name"] = kernel_name
    kernelspec["display_name"] = supported_kernels[kernel_name]
    metadata["kernelspec"] = kernelspec

    data["metadata"] = metadata