Esempio n. 1
0
def read_variable_files(paths: List[str],
                        variables: Optional[dict] = None) -> dict:
    """
    Read multiple serialized data files into a user_variables dictionary.
    Provide `paths` (a list of URLs or local file paths).
    Paths can optionally have a namespace prepended.
    For example:

    ```python
    paths = [
        'https://git.io/vbkqm',  # update the dictionary's top-level
        'namespace_1=https://git.io/vbkqm',  # store under 'namespace_1' key
        'namespace_2=some_local_path.json',  # store under 'namespace_2' key
    ]
    ```

    If a namespace is not provided, the JSON must contain a dictionary as its
    top level. Namespaces should consist only of ASCII alphanumeric characters
    (includes underscores, first character cannot be numeric).

    Pass a dictionary to `variables` to update an existing dictionary rather
    than create a new dictionary.
    """
    if variables is None:
        variables = {}
    for path in paths:
        logging.info(f"Reading user-provided templating variables at {path!r}")
        # Match only namespaces that are valid jinja2 variable names
        # http://jinja.pocoo.org/docs/2.10/api/#identifier-naming
        match = re.match(r"([a-zA-Z_][a-zA-Z0-9_]*)=(.+)", path)
        if match:
            namespace, path = match.groups()
            logging.info(
                f"Using the {namespace!r} namespace for template variables from {path!r}"
            )
        try:
            if match:
                obj = {namespace: read_serialized_data(path)}
            else:
                obj = read_serialized_dict(path)
        except Exception:
            logging.exception(
                f"Error reading template variables from {path!r}")
            continue
        assert isinstance(obj, dict)
        conflicts = variables.keys() & obj.keys()
        if conflicts:
            logging.warning(
                f"Template variables in {path!r} overwrite existing "
                "values for the following keys:\n" + "\n".join(conflicts))
        variables.update(obj)
    logging.debug(f"Reading user-provided templating variables complete:\n"
                  f"{json.dumps(variables, indent=2, ensure_ascii=False)}")
    return variables
Esempio n. 2
0
def load_variables(args) -> dict:
    """
    Read `metadata.yaml` and files specified by `--template-variables-path` to generate
    manuscript variables available for jinja2 templating.

    Returns a dictionary, refered to as `variables`, with the following keys:

    - `pandoc`: a dictionary for passing options to Pandoc via the `yaml_metadata_block`.
      Fields in `pandoc` are either generated by Manubot or hard-coded by the user if `metadata.yaml`
      includes a `pandoc` dictionary.
    - `manubot`: a dictionary for manubot-related information and metadata.
      Fields in `manubot` are either generated by Manubot or hard-coded by the user if `metadata.yaml`
      includes a `manubot` dictionary.
    - All fields from a manuscript's `metadata.yaml` that are not interpreted by Manubot are
      copied to `variables`. Interpreted fields include `pandoc`, `manubot`, `title`,
      `keywords`, `authors` (formerly `author_info`, now deprecated), `lang`, and `thumbnail`.
    - User-specified fields inserted according to the `--template-variables-path` option.
      User-specified variables take highest precedence and can overwrite values for existing
      keys like `pandoc` or `manubot` (dangerous).
    """
    # Generated manuscript variables
    variables = {"pandoc": {}, "manubot": {}}

    # Read metadata which contains pandoc_yaml_metadata
    # as well as authors information.
    if args.meta_yaml_path.is_file():
        metadata = read_serialized_dict(args.meta_yaml_path)
    else:
        metadata = {}
        logging.warning(
            f"missing {args.meta_yaml_path} file with yaml_metadata_block for pandoc"
        )

    # Interpreted keys that are intended for pandoc
    move_to_pandoc = "title", "keywords", "lang"
    for key in move_to_pandoc:
        if key in metadata:
            variables["pandoc"][key] = metadata.pop(key)

    # Add date to metadata
    now = datetime_now()
    logging.info(
        f"Using {now:%Z} timezone.\n"
        f"Dating manuscript with the current datetime: {now.isoformat()}")
    variables["pandoc"]["date-meta"] = now.date().isoformat()
    variables["manubot"]["date"] = f"{now:%B} {now.day}, {now.year}"

    # Process authors metadata
    if "author_info" in metadata:
        authors = metadata.pop("author_info", [])
        warnings.warn(
            "metadata.yaml: 'author_info' is deprecated. Use 'authors' instead.",
            category=DeprecationWarning,
        )
    else:
        authors = metadata.pop("authors", [])
    if authors is None:
        authors = []
    variables["pandoc"]["author-meta"] = [author["name"] for author in authors]
    variables["manubot"]["authors"] = authors
    add_author_affiliations(variables["manubot"])

    # Set repository version metadata for CI builds
    ci_params = get_continuous_integration_parameters()
    if ci_params:
        variables["manubot"]["ci_source"] = ci_params

    # Add manuscript URLs
    variables["manubot"].update(
        get_manuscript_urls(metadata.pop("html_url", None)))

    # Add software versions
    variables["manubot"].update(get_software_versions())

    # Add thumbnail URL if present
    thumbnail_url = get_thumbnail_url(metadata.pop("thumbnail", None))
    if thumbnail_url:
        variables["manubot"]["thumbnail_url"] = thumbnail_url

    # Update variables with metadata.yaml pandoc/manubot dicts
    for key in "pandoc", "manubot":
        dict_ = metadata.pop(key, {})
        if not isinstance(dict_, dict):
            logging.warning(
                f"load_variables expected metadata.yaml field {key!r} to be a dict."
                f"Received a {dict_.__class__.__name__!r} instead.")
            continue
        variables[key].update(dict_)

    # Update variables with uninterpreted metadata.yaml fields
    variables.update(metadata)

    # Update variables with user-provided variables here
    variables = read_variable_files(args.template_variables_path, variables)

    # Add header-includes metadata with <meta> information for the HTML output's <head>
    variables["pandoc"]["header-includes"] = get_header_includes(variables)

    assert args.skip_citations
    # Extend Pandoc's metadata.bibliography field with manual references paths
    bibliographies = variables["pandoc"].get("bibliography", [])
    if isinstance(bibliographies, str):
        bibliographies = [bibliographies]
    assert isinstance(bibliographies, list)
    bibliographies.extend(args.manual_references_paths)
    bibliographies = list(map(os.fspath, bibliographies))
    variables["pandoc"]["bibliography"] = bibliographies
    # enable pandoc-manubot-cite option to write bibliography to a file
    variables["pandoc"]["manubot-output-bibliography"] = os.fspath(
        args.references_path)
    variables["pandoc"]["manubot-output-citekeys"] = os.fspath(
        args.citations_path)
    variables["pandoc"]["manubot-requests-cache-path"] = os.fspath(
        args.requests_cache_path)
    variables["pandoc"][
        "manubot-clear-requests-cache"] = args.clear_requests_cache

    return variables