Exemple #1
0
def get_metadata_and_variables(args):
    """
    Process metadata.yaml and create variables available for jinja2 templating.
    """
    # Generated manuscript variables
    variables = collections.OrderedDict()

    # Read metadata which contains pandoc_yaml_metadata
    # as well as author_info.
    if args.meta_yaml_path.is_file():
        with args.meta_yaml_path.open(encoding="utf-8-sig") as read_file:
            metadata = yaml.safe_load(read_file)
            assert isinstance(metadata, dict)
    else:
        metadata = {}
        logging.warning(
            f"missing {args.meta_yaml_path} file with yaml_metadata_block for pandoc"
        )

    # Add date to metadata
    now = datetime_now()
    logging.info(
        f"Using {now:%Z} timezone.\n"
        f"Dating manuscript with the current datetime: {now.isoformat()}"
    )
    metadata["date-meta"] = now.date().isoformat()
    variables["date"] = f"{now:%B} {now.day}, {now.year}"

    # Process authors metadata
    authors = metadata.pop("author_info", [])
    if authors is None:
        authors = []
    metadata["author-meta"] = [author["name"] for author in authors]
    variables["authors"] = authors
    variables = add_author_affiliations(variables)

    # Set repository version metadata for CI builds
    ci_params = get_continuous_integration_parameters()
    if ci_params:
        variables["ci_source"] = add_manuscript_urls_to_ci_params(ci_params)

    # Add thumbnail URL if present
    thumbnail_url = get_thumbnail_url(metadata.pop("thumbnail", None))
    if thumbnail_url:
        variables["thumbnail_url"] = thumbnail_url

    # Update variables with user-provided variables here
    user_variables = read_jsons(args.template_variables_path)
    variables.update(user_variables)

    return metadata, variables
Exemple #2
0
def parse_args():
    parser = argparse.ArgumentParser(
        description="Randomize metadata.authors. Ovewrites metadata.yaml")
    parser.add_argument("--path",
                        default="content/metadata.yaml",
                        help="path to metadata.yaml")
    parser.add_argument(
        "--shuffle",
        action="store_true",
        help="shuffle authors using HEAD commit as random seed",
    )
    parser.add_argument(
        "--only-on-ci",
        action="store_true",
        help="do nothing if CI environment variable is not true",
    )
    args = parser.parse_args()
    vars(args)["execute"] = True
    if args.only_on_ci:
        ci_params = get_continuous_integration_parameters()
        vars(args)["execute"] = bool(ci_params)
    return args
Exemple #3
0
def load_variables(args) -> dict:
    """
    Read `metadata.yaml` and files specified by `--template-variables-path` to generate
    manuscript variables available for jinja2 templating.

    Returns a dictionary, refered to as `variables`, with the following keys:

    - `pandoc`: a dictionary for passing options to Pandoc via the `yaml_metadata_block`.
      Fields in `pandoc` are either generated by Manubot or hard-coded by the user if `metadata.yaml`
      includes a `pandoc` dictionary.
    - `manubot`: a dictionary for manubot-related information and metadata.
      Fields in `manubot` are either generated by Manubot or hard-coded by the user if `metadata.yaml`
      includes a `manubot` dictionary.
    - All fields from a manuscript's `metadata.yaml` that are not interpreted by Manubot are
      copied to `variables`. Interpreted fields include `pandoc`, `manubot`, `title`,
      `keywords`, `authors` (formerly `author_info`, now deprecated), `lang`, and `thumbnail`.
    - User-specified fields inserted according to the `--template-variables-path` option.
      User-specified variables take highest precedence and can overwrite values for existing
      keys like `pandoc` or `manubot` (dangerous).
    """
    # Generated manuscript variables
    variables = {"pandoc": {}, "manubot": {}}

    # Read metadata which contains pandoc_yaml_metadata
    # as well as authors information.
    if args.meta_yaml_path.is_file():
        metadata = read_serialized_dict(args.meta_yaml_path)
    else:
        metadata = {}
        logging.warning(
            f"missing {args.meta_yaml_path} file with yaml_metadata_block for pandoc"
        )

    # Interpreted keys that are intended for pandoc
    move_to_pandoc = "title", "keywords", "lang"
    for key in move_to_pandoc:
        if key in metadata:
            variables["pandoc"][key] = metadata.pop(key)

    # Add date to metadata
    now = datetime_now()
    logging.info(
        f"Using {now:%Z} timezone.\n"
        f"Dating manuscript with the current datetime: {now.isoformat()}")
    variables["pandoc"]["date-meta"] = now.date().isoformat()
    variables["manubot"]["date"] = f"{now:%B} {now.day}, {now.year}"

    # Process authors metadata
    if "author_info" in metadata:
        authors = metadata.pop("author_info", [])
        warnings.warn(
            "metadata.yaml: 'author_info' is deprecated. Use 'authors' instead.",
            category=DeprecationWarning,
        )
    else:
        authors = metadata.pop("authors", [])
    if authors is None:
        authors = []
    variables["pandoc"]["author-meta"] = [author["name"] for author in authors]
    variables["manubot"]["authors"] = authors
    add_author_affiliations(variables["manubot"])

    # Set repository version metadata for CI builds
    ci_params = get_continuous_integration_parameters()
    if ci_params:
        variables["manubot"]["ci_source"] = ci_params

    # Add manuscript URLs
    variables["manubot"].update(
        get_manuscript_urls(metadata.pop("html_url", None)))

    # Add software versions
    variables["manubot"].update(get_software_versions())

    # Add thumbnail URL if present
    thumbnail_url = get_thumbnail_url(metadata.pop("thumbnail", None))
    if thumbnail_url:
        variables["manubot"]["thumbnail_url"] = thumbnail_url

    # Update variables with metadata.yaml pandoc/manubot dicts
    for key in "pandoc", "manubot":
        dict_ = metadata.pop(key, {})
        if not isinstance(dict_, dict):
            logging.warning(
                f"load_variables expected metadata.yaml field {key!r} to be a dict."
                f"Received a {dict_.__class__.__name__!r} instead.")
            continue
        variables[key].update(dict_)

    # Update variables with uninterpreted metadata.yaml fields
    variables.update(metadata)

    # Update variables with user-provided variables here
    variables = read_variable_files(args.template_variables_path, variables)

    # Add header-includes metadata with <meta> information for the HTML output's <head>
    variables["pandoc"]["header-includes"] = get_header_includes(variables)

    assert args.skip_citations
    # Extend Pandoc's metadata.bibliography field with manual references paths
    bibliographies = variables["pandoc"].get("bibliography", [])
    if isinstance(bibliographies, str):
        bibliographies = [bibliographies]
    assert isinstance(bibliographies, list)
    bibliographies.extend(args.manual_references_paths)
    bibliographies = list(map(os.fspath, bibliographies))
    variables["pandoc"]["bibliography"] = bibliographies
    # enable pandoc-manubot-cite option to write bibliography to a file
    variables["pandoc"]["manubot-output-bibliography"] = os.fspath(
        args.references_path)
    variables["pandoc"]["manubot-output-citekeys"] = os.fspath(
        args.citations_path)
    variables["pandoc"]["manubot-requests-cache-path"] = os.fspath(
        args.requests_cache_path)
    variables["pandoc"][
        "manubot-clear-requests-cache"] = args.clear_requests_cache

    return variables
Exemple #4
0
def configure_args(args):
    """
    Perform additional processing of arguments that is not handled by argparse.
    Derive additional variables and add them to args.
    For example, add directories to args and create them if neccessary.
    Note that versions_directory is the parent of version_directory.
    """
    args_dict = vars(args)

    # If --timestamp specified, check that opentimestamps-client is installed
    if args.timestamp:
        ots_executable_path = shutil.which("ots")
        if not ots_executable_path:
            logging.error(
                "manubot webpage --timestamp was specified but opentimestamps-client not found on system. "
                "Setting --timestamp=False. "
                "Fix this by installing https://pypi.org/project/opentimestamps-client/"
            )
            args_dict["timestamp"] = False

    # Directory where Manubot outputs reside
    args_dict["output_directory"] = pathlib.Path("output")

    # Set webpage directory
    args_dict["webpage_directory"] = pathlib.Path("webpage")
    args.webpage_directory.mkdir(exist_ok=True)

    # Create webpage/v directory (if it doesn't already exist)
    args_dict["versions_directory"] = args.webpage_directory.joinpath("v")
    args.versions_directory.mkdir(exist_ok=True)

    # Checkout existing version directories
    checkout_existing_versions(args)

    # Apply --version argument defaults
    if args.version is None:
        from manubot.process.ci import get_continuous_integration_parameters

        ci_params = get_continuous_integration_parameters()
        if ci_params:
            args_dict["version"] = ci_params.get("commit", "local")
        else:
            args_dict["version"] = "local"

    # Create empty webpage/v/version directory
    version_directory = args.versions_directory.joinpath(args.version)
    if version_directory.is_dir():
        logging.warning(
            f"{version_directory} exists: replacing it with an empty directory"
        )
        shutil.rmtree(version_directory)
    version_directory.mkdir()
    args_dict["version_directory"] = version_directory

    # Symlink webpage/v/latest to point to webpage/v/commit
    latest_directory = args.versions_directory.joinpath("latest")
    if latest_directory.is_symlink() or latest_directory.is_file():
        latest_directory.unlink()
    elif latest_directory.is_dir():
        shutil.rmtree(latest_directory)
    latest_directory.symlink_to(args.version, target_is_directory=True)
    args_dict["latest_directory"] = latest_directory

    # Create freeze directory
    freeze_directory = args.versions_directory.joinpath("freeze")
    freeze_directory.mkdir(exist_ok=True)
    args_dict["freeze_directory"] = freeze_directory

    return args