def load_export_config(export_config_path): """load the export configuration""" if isinstance(export_config_path, string_types): export_config_path = pathlib.Path(export_config_path) data = read_file_from_directory( export_config_path.parent, export_config_path.name, "export configuration", logger, interp_ext=True, ) # validate against schema global _EXPORT_SCHEMA if _EXPORT_SCHEMA is None: # lazy load schema once _EXPORT_SCHEMA = read_file_from_directory( get_module_path(schema), _EXPORT_SCHEMA_FILE, "export configuration schema", logger, interp_ext=True, ) try: jsonschema.validate(data, _EXPORT_SCHEMA) except jsonschema.ValidationError as err: handle_error( "validation of export config {} failed against {}: {}".format( export_config_path, _EXPORT_SCHEMA_FILE, err.message), jsonschema.ValidationError, logger=logger, ) return data
def create_exporter_cls(class_str): # type: (str) -> nbconvert.exporters.Exporter """dynamically load export class""" export_class_path = class_str.split(".") module_path = ".".join(export_class_path[0:-1]) class_name = export_class_path[-1] try: export_module = importlib.import_module(module_path) except ModuleNotFoundError: # noqa: F821 handle_error( "module {} containing exporter class {} not found".format( module_path, class_name), ModuleNotFoundError, logger=logger, ) # noqa: F821 if hasattr(export_module, class_name): export_class = getattr(export_module, class_name) else: handle_error( "module {} does not contain class {}".format( module_path, class_name), ImportError, logger=logger, ) return export_class
def handle_error(self, msg, err_type, raise_msg=None, log_msg=None): """ handle error by logging it then raising """ handle_error(msg, err_type, self.logger, raise_msg=raise_msg, log_msg=log_msg)
def load_template(template_key, template_dict): if template_dict is None: return None if "directory" in template_dict["outline"]: outline_template = read_file_from_directory( template_dict["outline"]["directory"], template_dict["outline"]["file"], "template outline", logger, interp_ext=False) outline_name = os.path.join(template_dict["outline"]["directory"], template_dict["outline"]["file"]) else: outline_template = read_file_from_module( template_dict["outline"]["module"], template_dict["outline"]["file"], "template outline", logger, interp_ext=False) outline_name = os.path.join(template_dict["outline"]["module"], template_dict["outline"]["file"]) segments = [] for snum, segment in enumerate(template_dict.get("segments", [])): if "file" not in segment: handle_error( "'file' expected in segment {}".format(snum), KeyError, logger) if "directory" in segment: seg_data = read_file_from_directory( segment["directory"], segment["file"], "template segment", logger, interp_ext=True) elif "module" in segment: seg_data = read_file_from_module( segment["module"], segment["file"], "template segment", logger, interp_ext=True) else: handle_error( "'directory' or 'module' expected in segment {}".format(snum), KeyError, logger) segments.append(seg_data) template_str = create_template(outline_template, outline_name, segments) return str_to_jinja(template_str, template_key)
def _load_config_file(self, replacements): # find conversion configuration self.logger.info("finding conversion configuration: {}".format( self.conversion)) export_config_path = None if isinstance(self.conversion, string_types): outformat_path = pathlib.Path(self.conversion) else: outformat_path = self.conversion if outformat_path.exists(): # TODO use pathlib approach # if is outformat is a path that exists, use that export_config_path = outformat_path else: # else search internally export_config_path = get_export_config_path( self.conversion, self.plugin_folder_paths) if export_config_path is None: handle_error( "could not find conversion configuration: {}".format( self.conversion), IOError, self.logger, ) # read conversion configuration and create self.logger.info("loading conversion configuration") data = load_export_config(export_config_path) self.logger.info("creating exporter") exporter_cls = create_exporter_cls(data["exporter"]["class"]) self.logger.info("creating template and loading filters") template_name = "template_file" jinja_template = load_template(template_name, data["template"]) self.logger.info("creating process configuration") export_config = self._create_export_config(data["exporter"], template_name, replacements) pprocs, pproc_config = self._create_pproc_config( data.get("postprocessors", {}), replacements) return (exporter_cls, jinja_template, export_config, pprocs, pproc_config)
def create_template(outline_template, outline_name, segment_datas, outpath=None): # type: (dict, Tuple[dict]) -> str """ build a latex jinja template from; - a jinja(2) template outline, which may contain segment placeholders, - and json segment files adhering to the segment.schema.json schema if a segment contains the key "overwrite", then its value should be a list of keys, such that these key values overwrite any entries before Parameters ---------- outline_template: str segment_datas: tuple or dict outpath: None or str if not None, output to path """ # get the placeholders @ipubreplace{above|below}{name} regex = re.compile("\\@ipubreplace\\{([^\\}]+)\\}\\{([^\\}]+)\\}", re.MULTILINE) placeholder_tuple = regex.findall(outline_template) if not placeholder_tuple: if segment_datas: handle_error( "the segment data is provided, " + "but the outline template contains no placeholders", KeyError, logger, ) if outpath: _output_to_file(outline_template, outpath) return outline_template placeholders = {name: append for append, name in placeholder_tuple} # TODO validate that placeholders to not exist multiple times, # with above and below replacements = {key: "" for key in placeholders.keys()} docstrings = ["outline: {}".format(outline_name)] if segment_datas: docstrings.append("with segments:") global _SEGMENT_SCHEMA if _SEGMENT_SCHEMA is None: # lazy segment schema once _SEGMENT_SCHEMA = read_file_from_directory( get_module_path(schema), _SEGMENT_SCHEMA_FILE, "segment configuration schema", logger, interp_ext=True, ) for seg_num, segment_data in enumerate(segment_datas): # validate segment try: jsonschema.validate(segment_data, _SEGMENT_SCHEMA) except jsonschema.ValidationError as err: handle_error( "validation of template segment {} failed: {}".format( seg_num, err.message), jsonschema.ValidationError, logger=logger, ) # get description of segment docstrings.append("- {0}: {1}".format(segment_data["identifier"], segment_data["description"])) # find what key to overwrite overwrite = segment_data.get("overwrite", []) logger.debug("overwrite keys: {}".format(overwrite)) for key, segtext in segment_data.get("segments").items(): if key not in placeholders: handle_error( "the segment key '{}' ".format(key) + "is not contained in the outline template", KeyError, logger, ) if not isinstance(segtext, string_types): segtext = "\n".join(segtext) if key in overwrite: replacements[key] = segtext elif placeholders[key] == "above": replacements[key] = segtext + "\n" + replacements[key] elif placeholders[key] == "below": replacements[key] = replacements[key] + "\n" + segtext else: handle_error( ("the placeholder @ipubreplace{{{0}}}{{{1}}} ".format( key, placeholders[key]) + "should specify 'above' or 'below' appending"), jsonschema.ValidationError, logger=logger, ) if "meta_docstring" in placeholders: docstring = "\n".join([s for s in docstrings if s]).replace("'", '"') replacements["meta_docstring"] = docstring if "ipypub_version" in placeholders: # TODO add option to include ipypub version in output file # not included by default, # since tests need to be changed to ignore version number replacements["ipypub_version"] = "" # str(__version__) prefix = "@ipubreplace{" replace_dict = { prefix + append + "}{" + name + "}": replacements.get(name, "") for append, name in placeholder_tuple } outline = multireplace(outline_template, replace_dict) if outpath: _output_to_file(outline, outpath) return outline
def merge_notebooks(ipynb_path, ignore_prefix="_", to_str=False, as_version=4): """ merge one or more ipynb's, if more than one, then the meta data is taken from the first Parameters ---------- ipynb_path: str or pathlib.Path ignore_prefix : str ignore filename starting with this prefix to_str: bool return as a string, else return nbformat object as_version: int notebook format vesion Returns ------ finalnb: jupyter.notebook meta_path : pathlib.Path path to notebook containing meta file """ meta_path = "" if isinstance(ipynb_path, string_types): ipynb_path = pathlib.Path(ipynb_path) if not ipynb_path.exists(): handle_error("the notebook path does not exist: {}".format(ipynb_path), IOError, logger) final_nb = None if ipynb_path.is_dir(): logger.info("Merging all notebooks in directory") for ipath in alphanumeric_sort(ipynb_path.glob("*.ipynb")): if os.path.basename(ipath.name).startswith(ignore_prefix): continue with ipath.open("r", encoding="utf-8") as f: if (sys.version_info.major == 3 and sys.version_info.minor < 6 and "win" not in sys.platform): data = f.read() if hasattr(data, "decode"): data = data.decode("utf-8") nb = nbformat.reads(data, as_version=as_version) else: nb = nbformat.read(f, as_version=as_version) if final_nb is None: meta_path = ipath final_nb = nb else: final_nb.cells.extend(nb.cells) else: logger.info("Reading notebook") with ipynb_path.open("r", encoding="utf-8") as f: if (sys.version_info.major == 3 and sys.version_info.minor < 6 and "win" not in sys.platform): data = f.read() if hasattr(data, "decode"): data = data.decode("utf-8") final_nb = nbformat.reads(data, as_version=as_version) else: final_nb = nbformat.read(f, as_version=as_version) meta_path = ipynb_path if not hasattr(final_nb.metadata, "name"): final_nb.metadata.name = "" final_nb.metadata.name += "_merged" if to_str: if sys.version_info > (3, 0): return nbformat.writes(final_nb) else: return nbformat.writes(final_nb).encode("utf-8") if final_nb is None: handle_error( "no acceptable notebooks found for path: {}".format( ipynb_path.name), IOError, logger, ) return final_nb, meta_path
def publish(self, ipynb_path, nb_node=None): """ convert one or more Jupyter notebooks to a published format paths can be string of an existing file or folder, or a pathlib.Path like object all files linked in the documents are placed into a single files_folder Parameters ---------- ipynb_path: str or pathlib.Path notebook file or directory nb_node: None or nbformat.NotebookNode a pre-converted notebook Returns -------- outdata: dict containing keys; "outpath", "exporter", "stream", "main_filepath", "resources" """ # setup the input and output paths if isinstance(ipynb_path, string_types): ipynb_path = pathlib.Path(ipynb_path) ipynb_name, ipynb_ext = os.path.splitext(ipynb_path.name) outdir = (os.path.join(os.getcwd(), "converted") if self.outpath is None else str(self.outpath)) with self._log_handlers(ipynb_name, outdir): if not ipynb_path.exists() and not nb_node: handle_error( "the notebook path does not exist: {}".format(ipynb_path), IOError, self.logger, ) # log start of conversion self.logger.info("started ipypublish v{0} at {1}".format( ipypublish.__version__, time.strftime("%c"))) self.logger.info("logging to: {}".format( os.path.join(outdir, ipynb_name + ".nbpub.log"))) self.logger.info("running for ipynb(s) at: {0}".format(ipynb_path)) self.logger.info("with conversion configuration: {0}".format( self.conversion)) if nb_node is None and ipynb_ext in self.pre_conversion_funcs: func = self.pre_conversion_funcs[ipynb_ext] self.logger.info("running pre-conversion with: {}".format( inspect.getmodule(func))) try: nb_node = func(ipynb_path) except Exception as err: handle_error( "pre-conversion failed for {}: {}".format( ipynb_path, err), err, self.logger, ) # doesn't work with folders # if (ipynb_ext != ".ipynb" and nb_node is None): # handle_error( # 'the file extension is not associated with any ' # 'pre-converter: {}'.format(ipynb_ext), # TypeError, self.logger) if nb_node is None: # merge all notebooks # TODO allow notebooks to remain separate # (would require creating a main.tex with the preamble in etc ) # Could make everything a 'PyProcess', # with support for multiple streams final_nb, meta_path = merge_notebooks( ipynb_path, ignore_prefix=self.ignore_prefix) else: final_nb, meta_path = (nb_node, ipynb_path) # validate the notebook metadata against the schema if self.validate_nb_metadata: nb_metadata_schema = read_file_from_directory( get_module_path(schema), "doc_metadata.schema.json", "doc_metadata.schema", self.logger, interp_ext=True, ) try: jsonschema.validate(final_nb.metadata, nb_metadata_schema) except jsonschema.ValidationError as err: handle_error( "validation of notebook level metadata failed: {}\n" "see the doc_metadata.schema.json for full spec". format(err.message), jsonschema.ValidationError, logger=self.logger, ) # set text replacements for export configuration replacements = { self.meta_path_placeholder: str(meta_path), self.files_folder_placeholder: "{}{}".format(get_valid_filename(ipynb_name), self.folder_suffix), } self.logger.debug("notebooks meta path: {}".format(meta_path)) # load configuration file ( exporter_cls, jinja_template, econfig, pprocs, pconfig, ) = self._load_config_file(replacements) # run nbconvert self.logger.info("running nbconvert") exporter, stream, resources = self.export_notebook( final_nb, exporter_cls, econfig, jinja_template) # postprocess results main_filepath = os.path.join(outdir, ipynb_name + exporter.file_extension) for post_proc_name in pprocs: proc_class = find_entry_point( post_proc_name, "ipypublish.postprocessors", self.logger, "ipypublish", ) proc = proc_class(pconfig) stream, main_filepath, resources = proc.postprocess( stream, exporter.output_mimetype, main_filepath, resources) self.logger.info("process finished successfully") return { "outpath": outdir, "exporter": exporter, "stream": stream, "main_filepath": main_filepath, "resources": resources, }
def parse(self, inputstring, document): # type: (Union[str, list[str]], nodes.document) -> None """Parse text and generate a document tree.""" # fix for when calling on readthedocs self.env = self.env or document.settings.env self.config = self.config or document.settings.env.config # get file for conversion filepath = self.env.doc2path(self.env.docname) filedir = os.path.dirname(filepath) self.logger.info("ipypublish: converting {}".format(filepath)) config = { "IpyPubMain": { "conversion": self.config.ipysphinx_export_config, "plugin_folder_paths": self.config.ipysphinx_config_folders, "outpath": filedir, "folder_suffix": self.config.ipysphinx_folder_suffix, "log_to_stdout": False, "log_to_file": False, "default_pporder_kwargs": dict(clear_existing=False, dump_files=True) } } if self.config.ipysphinx_preconverters: # NB: jupytext is already a default for .Rmd config["IpyPubMain"]["pre_conversion_funcs"] = ( self.config.ipysphinx_preconverters) publish = IpyPubMain(config=config) outdata = publish(filepath) self.logger.info("ipypublish: successful conversion") # check we got back restructuredtext exporter = outdata["exporter"] if not exporter.output_mimetype == 'text/restructuredtext': handle_error( "ipypublish: the output content is not of type " "text/restructuredtext: {}".format(exporter.output_mimetype), TypeError, self.logger) # TODO document use of orphan if outdata["resources"].get("ipub", {}).get("orphan", False): rst.Parser.parse(self, ':orphan:', document) # parse a prolog if self.env.config.ipysphinx_prolog: prolog = exporter.environment.from_string( self.env.config.ipysphinx_prolog).render(env=self.env) rst.Parser.parse(self, prolog, document) # parse the main body of the file rst.Parser.parse(self, outdata["stream"], document) # parse an epilog if self.env.config.ipysphinx_epilog: prolog = exporter.environment.from_string( self.env.config.ipysphinx_epilog).render(env=self.env) rst.Parser.parse(self, prolog, document)