Beispiel #1
0
def process_citations(doc: pf.Doc) -> None:
    """
    Apply citation-by-identifier to a Python object representation of
    Pandoc's Abstract Syntax Tree.
    """
    # process metadata.manubot-bibliography-cache
    bib_cache = doc.get_metadata(key="manubot-bibliography-cache")
    if not (bib_cache is None or isinstance(bib_cache, str)):
        logging.warning(
            f"Expected metadata.manubot-bibliography-cache to be a string or null (None), "
            f"but received a {bib_cache.__class__.__name__}. Setting to None.")
        bib_cache = None
    doc.manubot["bibliography_cache"] = bib_cache
    # process metadata.citekey-aliases
    citekey_aliases = doc.get_metadata("citekey-aliases", default={})
    if not isinstance(citekey_aliases, dict):
        logging.warning(
            f"Expected metadata.citekey-aliases to be a dict, "
            f"but received a {citekey_aliases.__class__.__name__}. Disregarding."
        )
        citekey_aliases = dict()
    doc.manubot["citekey_aliases"] = citekey_aliases
    doc.walk(_get_reference_link_citekey_aliases)
    doc.walk(_get_citekeys_action)
    manuscript_citekeys = doc.manubot["manuscript_citekeys"]
    citations = Citations(input_ids=manuscript_citekeys,
                          aliases=citekey_aliases)
    citations.csl_item_failure_log_level = "ERROR"

    requests_cache_path = doc.get_metadata("manubot-requests-cache-path")
    if requests_cache_path:
        from manubot.process.requests_cache import RequestsCache

        req_cache = RequestsCache(requests_cache_path)
        req_cache.mkdir()
        req_cache.install()
        if doc.get_metadata("manubot-clear-requests-cache", default=False):
            req_cache.clear()

    citations.filter_pandoc_xnos()
    citations.load_manual_references(**_get_load_manual_references_kwargs(doc))
    citations.inspect(log_level="WARNING")
    citations.get_csl_items()
    doc.manubot["citekey_shortener"] = citations.input_to_csl_id
    doc.walk(_citation_to_id_action)

    if requests_cache_path:
        req_cache.close()

    citations.write_citekeys_tsv(
        path=doc.get_metadata("manubot-output-citekeys"))
    citations.write_csl_items(
        path=doc.get_metadata("manubot-output-bibliography"))
    citations.write_csl_items(path=doc.manubot["bibliography_cache"])
    # Update pandoc metadata with fields that this filter
    # has either consumed, created, or modified.
    doc.metadata["bibliography"] = []
    doc.metadata["references"] = citations.csl_items
    doc.metadata["citekey_aliases"] = citekey_aliases
Beispiel #2
0
def process_citations(doc):
    """
    Apply citation-by-identifier to a Python object representation of
    Pandoc's Abstract Syntax Tree.

    The following Pandoc metadata fields are considered:

    - bibliography (use to define reference metadata manually)
    - citekey-aliases (use to define tags for cite-by-id citations)
    - manubot-requests-cache-path
    - manubot-clear-requests-cache
    - manubot-output-citekeys: path to write TSV table of citekeys
    - manubot-output-bibliography: path to write generated CSL JSON bibliography
    """
    citekey_aliases = doc.get_metadata("citekey-aliases", default={})
    if not isinstance(citekey_aliases, dict):
        logging.warning(
            f"Expected metadata.citekey-aliases to be a dict, "
            f"but received a {citekey_aliases.__class__.__name__}. Disregarding."
        )
        citekey_aliases = dict()

    global_variables["citekey_aliases"] = citekey_aliases
    doc.walk(_get_reference_link_citekey_aliases)
    doc.walk(_get_citekeys_action)
    manuscript_citekeys = global_variables["manuscript_citekeys"]
    citations = Citations(input_ids=manuscript_citekeys,
                          aliases=citekey_aliases)
    citations.csl_item_failure_log_level = "ERROR"

    requests_cache_path = doc.get_metadata("manubot-requests-cache-path")
    if requests_cache_path:
        from manubot.process.requests_cache import RequestsCache

        req_cache = RequestsCache(requests_cache_path)
        req_cache.mkdir()
        req_cache.install()
        if doc.get_metadata("manubot-clear-requests-cache", default=False):
            req_cache.clear()

    citations.filter_pandoc_xnos()
    citations.load_manual_references(**_get_load_manual_references_kwargs(doc))
    citations.inspect(log_level="WARNING")
    citations.get_csl_items()
    global_variables["citekey_shortener"] = citations.input_to_csl_id
    doc.walk(_citation_to_id_action)

    if requests_cache_path:
        req_cache.close()

    citations.write_citekeys_tsv(
        path=doc.get_metadata("manubot-output-citekeys"))
    citations.write_csl_json(
        path=doc.get_metadata("manubot-output-bibliography"))
    # Update pandoc metadata with fields that this filter
    # has either consumed, created, or modified.
    doc.metadata["bibliography"] = []
    doc.metadata["references"] = citations.csl_items
    doc.metadata["citekey_aliases"] = citekey_aliases