Example #1
0
def load_export_config(export_config_path):
    """load the export configuration"""
    if isinstance(export_config_path, string_types):
        export_config_path = pathlib.Path(export_config_path)

    data = read_file_from_directory(
        export_config_path.parent,
        export_config_path.name,
        "export configuration",
        logger,
        interp_ext=True,
    )

    # validate against schema
    global _EXPORT_SCHEMA
    if _EXPORT_SCHEMA is None:
        # lazy load schema once
        _EXPORT_SCHEMA = read_file_from_directory(
            get_module_path(schema),
            _EXPORT_SCHEMA_FILE,
            "export configuration schema",
            logger,
            interp_ext=True,
        )
    try:
        jsonschema.validate(data, _EXPORT_SCHEMA)
    except jsonschema.ValidationError as err:
        handle_error(
            "validation of export config {} failed against {}: {}".format(
                export_config_path, _EXPORT_SCHEMA_FILE, err.message),
            jsonschema.ValidationError,
            logger=logger,
        )

    return data
Example #2
0
def create_exporter_cls(class_str):
    # type: (str) -> nbconvert.exporters.Exporter
    """dynamically load export class"""
    export_class_path = class_str.split(".")
    module_path = ".".join(export_class_path[0:-1])
    class_name = export_class_path[-1]
    try:
        export_module = importlib.import_module(module_path)
    except ModuleNotFoundError:  # noqa: F821
        handle_error(
            "module {} containing exporter class {} not found".format(
                module_path, class_name),
            ModuleNotFoundError,
            logger=logger,
        )  # noqa: F821
    if hasattr(export_module, class_name):
        export_class = getattr(export_module, class_name)
    else:
        handle_error(
            "module {} does not contain class {}".format(
                module_path, class_name),
            ImportError,
            logger=logger,
        )

    return export_class
Example #3
0
 def handle_error(self, msg, err_type, raise_msg=None, log_msg=None):
     """ handle error by logging it then raising
     """
     handle_error(msg,
                  err_type,
                  self.logger,
                  raise_msg=raise_msg,
                  log_msg=log_msg)
def load_template(template_key, template_dict):

    if template_dict is None:
        return None

    if "directory" in template_dict["outline"]:
        outline_template = read_file_from_directory(
            template_dict["outline"]["directory"],
            template_dict["outline"]["file"],
            "template outline", logger, interp_ext=False)
        outline_name = os.path.join(template_dict["outline"]["directory"],
                                    template_dict["outline"]["file"])
    else:
        outline_template = read_file_from_module(
            template_dict["outline"]["module"],
            template_dict["outline"]["file"],
            "template outline", logger, interp_ext=False)
        outline_name = os.path.join(template_dict["outline"]["module"],
                                    template_dict["outline"]["file"])

    segments = []
    for snum, segment in enumerate(template_dict.get("segments", [])):

        if "file" not in segment:
            handle_error(
                "'file' expected in segment {}".format(snum),
                KeyError, logger)

        if "directory" in segment:
            seg_data = read_file_from_directory(
                segment["directory"],
                segment["file"], "template segment", logger, interp_ext=True)
        elif "module" in segment:
            seg_data = read_file_from_module(
                segment["module"],
                segment["file"], "template segment", logger, interp_ext=True)
        else:
            handle_error(
                "'directory' or 'module' expected in segment {}".format(snum),
                KeyError, logger)

        segments.append(seg_data)

    template_str = create_template(outline_template, outline_name, segments)

    return str_to_jinja(template_str, template_key)
Example #5
0
    def _load_config_file(self, replacements):
        # find conversion configuration
        self.logger.info("finding conversion configuration: {}".format(
            self.conversion))
        export_config_path = None
        if isinstance(self.conversion, string_types):
            outformat_path = pathlib.Path(self.conversion)
        else:
            outformat_path = self.conversion
        if outformat_path.exists():  # TODO use pathlib approach
            # if is outformat is a path that exists, use that
            export_config_path = outformat_path
        else:
            # else search internally
            export_config_path = get_export_config_path(
                self.conversion, self.plugin_folder_paths)

        if export_config_path is None:
            handle_error(
                "could not find conversion configuration: {}".format(
                    self.conversion),
                IOError,
                self.logger,
            )

        # read conversion configuration and create
        self.logger.info("loading conversion configuration")
        data = load_export_config(export_config_path)
        self.logger.info("creating exporter")
        exporter_cls = create_exporter_cls(data["exporter"]["class"])
        self.logger.info("creating template and loading filters")
        template_name = "template_file"
        jinja_template = load_template(template_name, data["template"])
        self.logger.info("creating process configuration")
        export_config = self._create_export_config(data["exporter"],
                                                   template_name, replacements)
        pprocs, pproc_config = self._create_pproc_config(
            data.get("postprocessors", {}), replacements)

        return (exporter_cls, jinja_template, export_config, pprocs,
                pproc_config)
Example #6
0
def create_template(outline_template,
                    outline_name,
                    segment_datas,
                    outpath=None):
    # type: (dict, Tuple[dict]) -> str
    """ build a latex jinja template from;

    - a jinja(2) template outline,
      which may contain segment placeholders,
    - and json segment files adhering to the segment.schema.json schema

    if a segment contains the key "overwrite",
    then its value should be a list of keys,
    such that these key values overwrite any entries before

    Parameters
    ----------
    outline_template: str
    segment_datas: tuple or dict
    outpath:  None or str
        if not None, output to path

    """
    # get the placeholders @ipubreplace{above|below}{name}
    regex = re.compile("\\@ipubreplace\\{([^\\}]+)\\}\\{([^\\}]+)\\}",
                       re.MULTILINE)
    placeholder_tuple = regex.findall(outline_template)

    if not placeholder_tuple:
        if segment_datas:
            handle_error(
                "the segment data is provided, " +
                "but the outline template contains no placeholders",
                KeyError,
                logger,
            )

        if outpath:
            _output_to_file(outline_template, outpath)
        return outline_template

    placeholders = {name: append for append, name in placeholder_tuple}
    # TODO validate that placeholders to not exist multiple times,
    # with above and below

    replacements = {key: "" for key in placeholders.keys()}
    docstrings = ["outline: {}".format(outline_name)]

    if segment_datas:
        docstrings.append("with segments:")
        global _SEGMENT_SCHEMA
        if _SEGMENT_SCHEMA is None:
            # lazy segment schema once
            _SEGMENT_SCHEMA = read_file_from_directory(
                get_module_path(schema),
                _SEGMENT_SCHEMA_FILE,
                "segment configuration schema",
                logger,
                interp_ext=True,
            )

    for seg_num, segment_data in enumerate(segment_datas):

        # validate segment
        try:
            jsonschema.validate(segment_data, _SEGMENT_SCHEMA)
        except jsonschema.ValidationError as err:
            handle_error(
                "validation of template segment {} failed: {}".format(
                    seg_num, err.message),
                jsonschema.ValidationError,
                logger=logger,
            )

        # get description of segment
        docstrings.append("- {0}: {1}".format(segment_data["identifier"],
                                              segment_data["description"]))

        # find what key to overwrite
        overwrite = segment_data.get("overwrite", [])
        logger.debug("overwrite keys: {}".format(overwrite))

        for key, segtext in segment_data.get("segments").items():

            if key not in placeholders:
                handle_error(
                    "the segment key '{}' ".format(key) +
                    "is not contained in the outline template",
                    KeyError,
                    logger,
                )

            if not isinstance(segtext, string_types):
                segtext = "\n".join(segtext)
            if key in overwrite:
                replacements[key] = segtext
            elif placeholders[key] == "above":
                replacements[key] = segtext + "\n" + replacements[key]
            elif placeholders[key] == "below":
                replacements[key] = replacements[key] + "\n" + segtext
            else:
                handle_error(
                    ("the placeholder @ipubreplace{{{0}}}{{{1}}} ".format(
                        key, placeholders[key]) +
                     "should specify 'above' or 'below' appending"),
                    jsonschema.ValidationError,
                    logger=logger,
                )

    if "meta_docstring" in placeholders:
        docstring = "\n".join([s for s in docstrings if s]).replace("'", '"')
        replacements["meta_docstring"] = docstring
    if "ipypub_version" in placeholders:
        # TODO add option to include ipypub version in output file
        # not included by default,
        # since tests need to be changed to ignore version number
        replacements["ipypub_version"] = ""  # str(__version__)

    prefix = "@ipubreplace{"
    replace_dict = {
        prefix + append + "}{" + name + "}": replacements.get(name, "")
        for append, name in placeholder_tuple
    }
    outline = multireplace(outline_template, replace_dict)

    if outpath:
        _output_to_file(outline, outpath)

    return outline
Example #7
0
def merge_notebooks(ipynb_path, ignore_prefix="_", to_str=False, as_version=4):
    """ merge one or more ipynb's,
    if more than one, then the meta data is taken from the first

    Parameters
    ----------
    ipynb_path: str or pathlib.Path
    ignore_prefix : str
        ignore filename starting with this prefix
    to_str: bool
        return as a string, else return nbformat object
    as_version: int
        notebook format vesion

    Returns
    ------
    finalnb: jupyter.notebook
    meta_path : pathlib.Path
        path to notebook containing meta file

    """
    meta_path = ""
    if isinstance(ipynb_path, string_types):
        ipynb_path = pathlib.Path(ipynb_path)
    if not ipynb_path.exists():
        handle_error("the notebook path does not exist: {}".format(ipynb_path),
                     IOError, logger)

    final_nb = None
    if ipynb_path.is_dir():
        logger.info("Merging all notebooks in directory")
        for ipath in alphanumeric_sort(ipynb_path.glob("*.ipynb")):
            if os.path.basename(ipath.name).startswith(ignore_prefix):
                continue
            with ipath.open("r", encoding="utf-8") as f:
                if (sys.version_info.major == 3 and sys.version_info.minor < 6
                        and "win" not in sys.platform):
                    data = f.read()
                    if hasattr(data, "decode"):
                        data = data.decode("utf-8")
                    nb = nbformat.reads(data, as_version=as_version)
                else:
                    nb = nbformat.read(f, as_version=as_version)
            if final_nb is None:
                meta_path = ipath
                final_nb = nb
            else:
                final_nb.cells.extend(nb.cells)
    else:
        logger.info("Reading notebook")
        with ipynb_path.open("r", encoding="utf-8") as f:
            if (sys.version_info.major == 3 and sys.version_info.minor < 6
                    and "win" not in sys.platform):
                data = f.read()
                if hasattr(data, "decode"):
                    data = data.decode("utf-8")
                final_nb = nbformat.reads(data, as_version=as_version)
            else:
                final_nb = nbformat.read(f, as_version=as_version)
        meta_path = ipynb_path
    if not hasattr(final_nb.metadata, "name"):
        final_nb.metadata.name = ""
    final_nb.metadata.name += "_merged"

    if to_str:
        if sys.version_info > (3, 0):
            return nbformat.writes(final_nb)
        else:
            return nbformat.writes(final_nb).encode("utf-8")

    if final_nb is None:
        handle_error(
            "no acceptable notebooks found for path: {}".format(
                ipynb_path.name),
            IOError,
            logger,
        )

    return final_nb, meta_path
Example #8
0
    def publish(self, ipynb_path, nb_node=None):
        """ convert one or more Jupyter notebooks to a published format

        paths can be string of an existing file or folder,
        or a pathlib.Path like object

        all files linked in the documents are placed into a single files_folder

        Parameters
        ----------
        ipynb_path: str or pathlib.Path
            notebook file or directory
        nb_node: None or nbformat.NotebookNode
            a pre-converted notebook

        Returns
        --------
        outdata: dict
            containing keys;
            "outpath", "exporter", "stream", "main_filepath", "resources"

        """
        # setup the input and output paths
        if isinstance(ipynb_path, string_types):
            ipynb_path = pathlib.Path(ipynb_path)
        ipynb_name, ipynb_ext = os.path.splitext(ipynb_path.name)
        outdir = (os.path.join(os.getcwd(), "converted")
                  if self.outpath is None else str(self.outpath))

        with self._log_handlers(ipynb_name, outdir):

            if not ipynb_path.exists() and not nb_node:
                handle_error(
                    "the notebook path does not exist: {}".format(ipynb_path),
                    IOError,
                    self.logger,
                )

            # log start of conversion
            self.logger.info("started ipypublish v{0} at {1}".format(
                ipypublish.__version__, time.strftime("%c")))
            self.logger.info("logging to: {}".format(
                os.path.join(outdir, ipynb_name + ".nbpub.log")))
            self.logger.info("running for ipynb(s) at: {0}".format(ipynb_path))
            self.logger.info("with conversion configuration: {0}".format(
                self.conversion))

            if nb_node is None and ipynb_ext in self.pre_conversion_funcs:
                func = self.pre_conversion_funcs[ipynb_ext]
                self.logger.info("running pre-conversion with: {}".format(
                    inspect.getmodule(func)))
                try:
                    nb_node = func(ipynb_path)
                except Exception as err:
                    handle_error(
                        "pre-conversion failed for {}: {}".format(
                            ipynb_path, err),
                        err,
                        self.logger,
                    )

            # doesn't work with folders
            # if (ipynb_ext != ".ipynb" and nb_node is None):
            #     handle_error(
            #         'the file extension is not associated with any '
            #         'pre-converter: {}'.format(ipynb_ext),
            # TypeError, self.logger)

            if nb_node is None:
                # merge all notebooks
                # TODO allow notebooks to remain separate
                # (would require creating a main.tex with the preamble in etc )
                # Could make everything a 'PyProcess',
                # with support for multiple streams
                final_nb, meta_path = merge_notebooks(
                    ipynb_path, ignore_prefix=self.ignore_prefix)
            else:
                final_nb, meta_path = (nb_node, ipynb_path)

            # validate the notebook metadata against the schema
            if self.validate_nb_metadata:
                nb_metadata_schema = read_file_from_directory(
                    get_module_path(schema),
                    "doc_metadata.schema.json",
                    "doc_metadata.schema",
                    self.logger,
                    interp_ext=True,
                )
                try:
                    jsonschema.validate(final_nb.metadata, nb_metadata_schema)
                except jsonschema.ValidationError as err:
                    handle_error(
                        "validation of notebook level metadata failed: {}\n"
                        "see the doc_metadata.schema.json for full spec".
                        format(err.message),
                        jsonschema.ValidationError,
                        logger=self.logger,
                    )

            # set text replacements for export configuration
            replacements = {
                self.meta_path_placeholder:
                str(meta_path),
                self.files_folder_placeholder:
                "{}{}".format(get_valid_filename(ipynb_name),
                              self.folder_suffix),
            }

            self.logger.debug("notebooks meta path: {}".format(meta_path))

            # load configuration file
            (
                exporter_cls,
                jinja_template,
                econfig,
                pprocs,
                pconfig,
            ) = self._load_config_file(replacements)

            # run nbconvert
            self.logger.info("running nbconvert")
            exporter, stream, resources = self.export_notebook(
                final_nb, exporter_cls, econfig, jinja_template)

            # postprocess results
            main_filepath = os.path.join(outdir,
                                         ipynb_name + exporter.file_extension)

            for post_proc_name in pprocs:
                proc_class = find_entry_point(
                    post_proc_name,
                    "ipypublish.postprocessors",
                    self.logger,
                    "ipypublish",
                )
                proc = proc_class(pconfig)
                stream, main_filepath, resources = proc.postprocess(
                    stream, exporter.output_mimetype, main_filepath, resources)

            self.logger.info("process finished successfully")

        return {
            "outpath": outdir,
            "exporter": exporter,
            "stream": stream,
            "main_filepath": main_filepath,
            "resources": resources,
        }
Example #9
0
    def parse(self, inputstring, document):
        # type: (Union[str, list[str]], nodes.document) -> None
        """Parse text and generate a document tree."""

        # fix for when calling on readthedocs
        self.env = self.env or document.settings.env
        self.config = self.config or document.settings.env.config

        # get file for conversion
        filepath = self.env.doc2path(self.env.docname)
        filedir = os.path.dirname(filepath)
        self.logger.info("ipypublish: converting {}".format(filepath))

        config = {
            "IpyPubMain": {
                "conversion":
                self.config.ipysphinx_export_config,
                "plugin_folder_paths":
                self.config.ipysphinx_config_folders,
                "outpath":
                filedir,
                "folder_suffix":
                self.config.ipysphinx_folder_suffix,
                "log_to_stdout":
                False,
                "log_to_file":
                False,
                "default_pporder_kwargs":
                dict(clear_existing=False, dump_files=True)
            }
        }
        if self.config.ipysphinx_preconverters:
            # NB: jupytext is already a default for .Rmd
            config["IpyPubMain"]["pre_conversion_funcs"] = (
                self.config.ipysphinx_preconverters)
        publish = IpyPubMain(config=config)
        outdata = publish(filepath)

        self.logger.info("ipypublish: successful conversion")

        # check we got back restructuredtext
        exporter = outdata["exporter"]
        if not exporter.output_mimetype == 'text/restructuredtext':
            handle_error(
                "ipypublish: the output content is not of type "
                "text/restructuredtext: {}".format(exporter.output_mimetype),
                TypeError, self.logger)

        # TODO document use of orphan
        if outdata["resources"].get("ipub", {}).get("orphan", False):
            rst.Parser.parse(self, ':orphan:', document)

        # parse a prolog
        if self.env.config.ipysphinx_prolog:
            prolog = exporter.environment.from_string(
                self.env.config.ipysphinx_prolog).render(env=self.env)
            rst.Parser.parse(self, prolog, document)

        # parse the main body of the file
        rst.Parser.parse(self, outdata["stream"], document)

        # parse an epilog
        if self.env.config.ipysphinx_epilog:
            prolog = exporter.environment.from_string(
                self.env.config.ipysphinx_epilog).render(env=self.env)
            rst.Parser.parse(self, prolog, document)