Beispiel #1
0
def test_citations_check_collisions(caplog):
    input_ids = [
        "citekey-1",
        "citekey-1",
        "citekey-2",
        "Citekey-2",
    ]
    citations = Citations(input_ids)
    citations.check_collisions()
    assert not caplog.records
Beispiel #2
0
def test_citations_filter_pandoc_xnos():
    input_ids = [
        "fig:pandoc-fignos-key",  # should filter
        "eq:pandoc-eqnos-key",  # should filter
        "tbl:pandoc-tablenos-key",  # should filter
        "not-pandoc-xnos-key",  # should keep
    ]
    citations = Citations(input_ids)
    citations.filter_pandoc_xnos()
    assert len(citations.citekeys) == 1
    assert citations.citekeys[0].input_id == "not-pandoc-xnos-key"
Beispiel #3
0
def test_citations_filter_unhandled():
    input_ids = [
        "citekey-with-no-prefix",
        "bad-prefix:citekey",
        ":empty-prefix",
        "doi:handled-prefix",
    ]
    citations = Citations(input_ids)
    citations.filter_unhandled()
    assert len(citations.citekeys) == 1
    assert citations.citekeys[0].input_id == "doi:handled-prefix"
Beispiel #4
0
def test_citations_inspect():
    input_ids = [
        "citekey-1",  # passes inspection
        "arXiv:1806.05726v1",  # passes inspection
        "arXiv:bad-id",
        "DOI:bad-id",
        "pmid:bad-id",
        "DOID:not-disease-ontology-id",
    ]
    citations = Citations(input_ids)
    report = citations.inspect(log_level="INFO")
    print(report)
    assert len(report.splitlines()) == 4
    assert "pmid:bad-id -- PubMed Identifiers should be 1-8 digits" in report
Beispiel #5
0
def test_citations_check_multiple_input_ids(caplog):
    input_ids = [
        "doi:10/b6vnmd",
        "DOI:10/B6VNMD",
        "doi:10.1016/s0933-3657(96)00367-3",
        "ugly-doi-alias",
        "other-citekey",
    ]
    citekey_aliases = {"ugly-doi-alias": "DOI:10.1016/s0933-3657(96)00367-3"}
    citations = Citations(input_ids, citekey_aliases)
    citations.check_multiple_input_ids()
    expected = "Multiple citekey input_ids refer to the same standard_id doi:10.1016/s0933-3657(96)00367-3:"
    "['doi:10/b6vnmd', 'DOI:10/B6VNMD', 'doi:10.1016/s0933-3657(96)00367-3', 'ugly-doi-alias']"
    assert expected in caplog.text
Beispiel #6
0
def cli_cite(args):
    """
    Main function for the manubot cite command-line interface.

    Does not allow user to directly specify Pandoc's --to argument, due to
    inconsistent citation rendering by output format. See
    https://github.com/jgm/pandoc/issues/4834
    """
    _parse_cli_cite_args(args)
    citations = Citations(
        input_ids=args.citekeys, prune_csl_items=args.prune_csl, sort_csl_items=False
    )
    citations.load_manual_references(paths=args.bibliography)
    citations.inspect(log_level="WARNING")
    csl_items = citations.get_csl_items()

    # output CSL JSON data, if --render is False
    if not args.render:
        write_file = args.output.open("wb") if args.output else sys.stdout.buffer
        with write_file:
            write_file.write(citations.csl_json.encode())
        return

    # use Pandoc to render references
    pandoc_metadata = {"nocite": "@*", "csl": args.csl, "references": csl_items}
    call_pandoc(metadata=pandoc_metadata, path=args.output, format=args.format)
Beispiel #7
0
def test_citations_citekeys_tsv():
    input_ids = [
        "citekey-1",
        "arXiv:1806.05726v1",
        "DOI:10.7717/peerj.338",
        "pmid:29618526",
    ]
    citations = Citations(input_ids)
    citekeys_tsv = citations.citekeys_tsv
    assert isinstance(citekeys_tsv, str)
    assert "arxiv:1806.05726v1" in citekeys_tsv.splitlines()[2].split("\t")
Beispiel #8
0
def test_citations_csl_serialization(csl_format):
    ccr_dir = pathlib.Path(__file__).parent.joinpath("cite-command-rendered")
    citations = Citations(
        ["arxiv:1806.05726v1", "doi:10.7717/peerj.338", "pubmed:29618526"])
    citations.load_manual_references(
        paths=[ccr_dir.joinpath("input-bibliography.json")])
    citations.get_csl_items()
    path_out = ccr_dir.joinpath(f"output-bibliography.{csl_format}")
    # uncomment the following line to regenerate test output
    # citations.write_csl_items(path_out)
    csl_out = getattr(citations, f"csl_{csl_format}")
    assert csl_out == path_out.read_text()
Beispiel #9
0
def process_citations(doc: pf.Doc) -> None:
    """
    Apply citation-by-identifier to a Python object representation of
    Pandoc's Abstract Syntax Tree.
    """
    # process metadata.manubot-bibliography-cache
    bib_cache = doc.get_metadata(key="manubot-bibliography-cache")
    if not (bib_cache is None or isinstance(bib_cache, str)):
        logging.warning(
            f"Expected metadata.manubot-bibliography-cache to be a string or null (None), "
            f"but received a {bib_cache.__class__.__name__}. Setting to None.")
        bib_cache = None
    doc.manubot["bibliography_cache"] = bib_cache
    # process metadata.citekey-aliases
    citekey_aliases = doc.get_metadata("citekey-aliases", default={})
    if not isinstance(citekey_aliases, dict):
        logging.warning(
            f"Expected metadata.citekey-aliases to be a dict, "
            f"but received a {citekey_aliases.__class__.__name__}. Disregarding."
        )
        citekey_aliases = dict()
    doc.manubot["citekey_aliases"] = citekey_aliases
    doc.walk(_get_reference_link_citekey_aliases)
    doc.walk(_get_citekeys_action)
    manuscript_citekeys = doc.manubot["manuscript_citekeys"]
    citations = Citations(input_ids=manuscript_citekeys,
                          aliases=citekey_aliases)
    citations.csl_item_failure_log_level = "ERROR"

    requests_cache_path = doc.get_metadata("manubot-requests-cache-path")
    if requests_cache_path:
        from manubot.process.requests_cache import RequestsCache

        req_cache = RequestsCache(requests_cache_path)
        req_cache.mkdir()
        req_cache.install()
        if doc.get_metadata("manubot-clear-requests-cache", default=False):
            req_cache.clear()

    citations.filter_pandoc_xnos()
    citations.load_manual_references(**_get_load_manual_references_kwargs(doc))
    citations.inspect(log_level="WARNING")
    citations.get_csl_items()
    doc.manubot["citekey_shortener"] = citations.input_to_csl_id
    doc.walk(_citation_to_id_action)

    if requests_cache_path:
        req_cache.close()

    citations.write_citekeys_tsv(
        path=doc.get_metadata("manubot-output-citekeys"))
    citations.write_csl_items(
        path=doc.get_metadata("manubot-output-bibliography"))
    citations.write_csl_items(path=doc.manubot["bibliography_cache"])
    # Update pandoc metadata with fields that this filter
    # has either consumed, created, or modified.
    doc.metadata["bibliography"] = []
    doc.metadata["references"] = citations.csl_items
    doc.metadata["citekey_aliases"] = citekey_aliases
Beispiel #10
0
def process_citations(doc):
    """
    Apply citation-by-identifier to a Python object representation of
    Pandoc's Abstract Syntax Tree.

    The following Pandoc metadata fields are considered:

    - bibliography (use to define reference metadata manually)
    - citekey-aliases (use to define tags for cite-by-id citations)
    - manubot-requests-cache-path
    - manubot-clear-requests-cache
    - manubot-output-citekeys: path to write TSV table of citekeys
    - manubot-output-bibliography: path to write generated CSL JSON bibliography
    """
    citekey_aliases = doc.get_metadata("citekey-aliases", default={})
    if not isinstance(citekey_aliases, dict):
        logging.warning(
            f"Expected metadata.citekey-aliases to be a dict, "
            f"but received a {citekey_aliases.__class__.__name__}. Disregarding."
        )
        citekey_aliases = dict()

    global_variables["citekey_aliases"] = citekey_aliases
    doc.walk(_get_reference_link_citekey_aliases)
    doc.walk(_get_citekeys_action)
    manuscript_citekeys = global_variables["manuscript_citekeys"]
    citations = Citations(input_ids=manuscript_citekeys,
                          aliases=citekey_aliases)
    citations.csl_item_failure_log_level = "ERROR"

    requests_cache_path = doc.get_metadata("manubot-requests-cache-path")
    if requests_cache_path:
        from manubot.process.requests_cache import RequestsCache

        req_cache = RequestsCache(requests_cache_path)
        req_cache.mkdir()
        req_cache.install()
        if doc.get_metadata("manubot-clear-requests-cache", default=False):
            req_cache.clear()

    citations.filter_pandoc_xnos()
    citations.load_manual_references(**_get_load_manual_references_kwargs(doc))
    citations.inspect(log_level="WARNING")
    citations.get_csl_items()
    global_variables["citekey_shortener"] = citations.input_to_csl_id
    doc.walk(_citation_to_id_action)

    if requests_cache_path:
        req_cache.close()

    citations.write_citekeys_tsv(
        path=doc.get_metadata("manubot-output-citekeys"))
    citations.write_csl_json(
        path=doc.get_metadata("manubot-output-bibliography"))
    # Update pandoc metadata with fields that this filter
    # has either consumed, created, or modified.
    doc.metadata["bibliography"] = []
    doc.metadata["references"] = citations.csl_items
    doc.metadata["citekey_aliases"] = citekey_aliases