def config_from_metadata(nb: NotebookNode) -> dict: """Extract configuration data from notebook/cell metadata.""" nb_metadata = nb.get("metadata", {}).get(META_KEY, {}) validate_metadata(nb_metadata, "/metadata") diff_replace = [tuple(d) for d in nb_metadata.get("diff_replace", [])] diff_ignore = set(nb_metadata.get("diff_ignore", [])) for i, cell in enumerate(nb.get("cells", [])): cell_metadata = cell.get("metadata", {}).get(META_KEY, {}) validate_metadata(cell_metadata, f"/cells/{i}/metadata") diff_replace.extend([ (f"/cells/{i}{p}", x, r) for p, x, r in cell_metadata.get("diff_replace", []) ]) diff_ignore.update( [f"/cells/{i}{p}" for p in cell_metadata.get("diff_ignore", [])]) return MetadataConfig( tuple(diff_replace), diff_ignore, nb_metadata.get("skip", False), nb_metadata.get("skip_reason", ""), )
def extract_glue_data_cell( cell: NotebookNode) -> list[tuple[str, NotebookNode]]: """Extract glue data from a single cell.""" outputs = [] data = [] for output in cell.get("outputs", []): meta = output.get("metadata", {}) if "scrapbook" not in meta: outputs.append(output) continue key = meta["scrapbook"]["name"] mime_prefix = len(meta["scrapbook"].get("mime_prefix", "")) output["data"] = { k[mime_prefix:]: v for k, v in output["data"].items() } data.append((key, output)) if not mime_prefix: # assume that the output is a displayable object outputs.append(output) cell.outputs = outputs return data
def blacken_code(cell: NotebookNode, resources: dict, index: int) -> Tuple[NotebookNode, dict]: """Format python source code with black (see https://black.readthedocs.io).""" try: import black except ImportError: raise ImportError( "black not installed: see https://black.readthedocs.io") if cell.get("cell_type", None) != "code": return cell, resources # TODO use metadata to set target versions and whether to raise on exceptions # i.e. black.FileMode(target_versions, {black.TargetVersion.PY36}) try: cell.source = black.format_str(cell.source, mode=black.FileMode()) except (SyntaxError, black.InvalidInput): logger.debug(f"cell {index} could not be formatted by black.") # code cells don't require a trailing new line cell.source = cell.source.rstrip() return cell, resources
def beautifulsoup(cell: NotebookNode, resources: dict, index: int) -> Tuple[NotebookNode, dict]: """Format text/html and image/svg+xml outputs with beautiful-soup. See: https://beautiful-soup-4.readthedocs.io. """ try: from bs4 import BeautifulSoup except ImportError: raise ImportError( "bs4 not installed: see https://beautiful-soup-4.readthedocs.io") if cell.get("cell_type", None) != "code": return cell, resources if "outputs" not in cell: return cell, resources for i, output in enumerate(cell.outputs): if output.output_type not in ["execute_result", "display_data"]: continue for mimetype, value in output.get("data", {}).items(): if mimetype not in ["text/html", "image/svg+xml"]: continue path = f"/cells/{index}/outputs/{i}/{mimetype}" # TODO use metadata to set builder and whether to raise on exceptions try: output["data"][mimetype] = BeautifulSoup( output["data"][mimetype], "html.parser").prettify() # record which paths have been formatted (mainly for testing) resources.setdefault("beautifulsoup", []).append(path) except Exception: # TODO what exceptions might be raised? logger.debug( f"{path} could not be formatted by beautiful-soup.") return cell, resources