def test_citations_check_collisions(caplog): input_ids = [ "citekey-1", "citekey-1", "citekey-2", "Citekey-2", ] citations = Citations(input_ids) citations.check_collisions() assert not caplog.records
def test_citations_filter_pandoc_xnos(): input_ids = [ "fig:pandoc-fignos-key", # should filter "eq:pandoc-eqnos-key", # should filter "tbl:pandoc-tablenos-key", # should filter "not-pandoc-xnos-key", # should keep ] citations = Citations(input_ids) citations.filter_pandoc_xnos() assert len(citations.citekeys) == 1 assert citations.citekeys[0].input_id == "not-pandoc-xnos-key"
def test_citations_filter_unhandled(): input_ids = [ "citekey-with-no-prefix", "bad-prefix:citekey", ":empty-prefix", "doi:handled-prefix", ] citations = Citations(input_ids) citations.filter_unhandled() assert len(citations.citekeys) == 1 assert citations.citekeys[0].input_id == "doi:handled-prefix"
def test_citations_inspect(): input_ids = [ "citekey-1", # passes inspection "arXiv:1806.05726v1", # passes inspection "arXiv:bad-id", "DOI:bad-id", "pmid:bad-id", "DOID:not-disease-ontology-id", ] citations = Citations(input_ids) report = citations.inspect(log_level="INFO") print(report) assert len(report.splitlines()) == 4 assert "pmid:bad-id -- PubMed Identifiers should be 1-8 digits" in report
def test_citations_check_multiple_input_ids(caplog): input_ids = [ "doi:10/b6vnmd", "DOI:10/B6VNMD", "doi:10.1016/s0933-3657(96)00367-3", "ugly-doi-alias", "other-citekey", ] citekey_aliases = {"ugly-doi-alias": "DOI:10.1016/s0933-3657(96)00367-3"} citations = Citations(input_ids, citekey_aliases) citations.check_multiple_input_ids() expected = "Multiple citekey input_ids refer to the same standard_id doi:10.1016/s0933-3657(96)00367-3:" "['doi:10/b6vnmd', 'DOI:10/B6VNMD', 'doi:10.1016/s0933-3657(96)00367-3', 'ugly-doi-alias']" assert expected in caplog.text
def cli_cite(args): """ Main function for the manubot cite command-line interface. Does not allow user to directly specify Pandoc's --to argument, due to inconsistent citation rendering by output format. See https://github.com/jgm/pandoc/issues/4834 """ _parse_cli_cite_args(args) citations = Citations( input_ids=args.citekeys, prune_csl_items=args.prune_csl, sort_csl_items=False ) citations.load_manual_references(paths=args.bibliography) citations.inspect(log_level="WARNING") csl_items = citations.get_csl_items() # output CSL JSON data, if --render is False if not args.render: write_file = args.output.open("wb") if args.output else sys.stdout.buffer with write_file: write_file.write(citations.csl_json.encode()) return # use Pandoc to render references pandoc_metadata = {"nocite": "@*", "csl": args.csl, "references": csl_items} call_pandoc(metadata=pandoc_metadata, path=args.output, format=args.format)
def test_citations_citekeys_tsv(): input_ids = [ "citekey-1", "arXiv:1806.05726v1", "DOI:10.7717/peerj.338", "pmid:29618526", ] citations = Citations(input_ids) citekeys_tsv = citations.citekeys_tsv assert isinstance(citekeys_tsv, str) assert "arxiv:1806.05726v1" in citekeys_tsv.splitlines()[2].split("\t")
def test_citations_csl_serialization(csl_format): ccr_dir = pathlib.Path(__file__).parent.joinpath("cite-command-rendered") citations = Citations( ["arxiv:1806.05726v1", "doi:10.7717/peerj.338", "pubmed:29618526"]) citations.load_manual_references( paths=[ccr_dir.joinpath("input-bibliography.json")]) citations.get_csl_items() path_out = ccr_dir.joinpath(f"output-bibliography.{csl_format}") # uncomment the following line to regenerate test output # citations.write_csl_items(path_out) csl_out = getattr(citations, f"csl_{csl_format}") assert csl_out == path_out.read_text()
def process_citations(doc: pf.Doc) -> None: """ Apply citation-by-identifier to a Python object representation of Pandoc's Abstract Syntax Tree. """ # process metadata.manubot-bibliography-cache bib_cache = doc.get_metadata(key="manubot-bibliography-cache") if not (bib_cache is None or isinstance(bib_cache, str)): logging.warning( f"Expected metadata.manubot-bibliography-cache to be a string or null (None), " f"but received a {bib_cache.__class__.__name__}. Setting to None.") bib_cache = None doc.manubot["bibliography_cache"] = bib_cache # process metadata.citekey-aliases citekey_aliases = doc.get_metadata("citekey-aliases", default={}) if not isinstance(citekey_aliases, dict): logging.warning( f"Expected metadata.citekey-aliases to be a dict, " f"but received a {citekey_aliases.__class__.__name__}. Disregarding." ) citekey_aliases = dict() doc.manubot["citekey_aliases"] = citekey_aliases doc.walk(_get_reference_link_citekey_aliases) doc.walk(_get_citekeys_action) manuscript_citekeys = doc.manubot["manuscript_citekeys"] citations = Citations(input_ids=manuscript_citekeys, aliases=citekey_aliases) citations.csl_item_failure_log_level = "ERROR" requests_cache_path = doc.get_metadata("manubot-requests-cache-path") if requests_cache_path: from manubot.process.requests_cache import RequestsCache req_cache = RequestsCache(requests_cache_path) req_cache.mkdir() req_cache.install() if doc.get_metadata("manubot-clear-requests-cache", default=False): req_cache.clear() citations.filter_pandoc_xnos() citations.load_manual_references(**_get_load_manual_references_kwargs(doc)) citations.inspect(log_level="WARNING") citations.get_csl_items() doc.manubot["citekey_shortener"] = citations.input_to_csl_id doc.walk(_citation_to_id_action) if requests_cache_path: req_cache.close() citations.write_citekeys_tsv( path=doc.get_metadata("manubot-output-citekeys")) citations.write_csl_items( path=doc.get_metadata("manubot-output-bibliography")) citations.write_csl_items(path=doc.manubot["bibliography_cache"]) # Update pandoc metadata with fields that this filter # has either consumed, created, or modified. doc.metadata["bibliography"] = [] doc.metadata["references"] = citations.csl_items doc.metadata["citekey_aliases"] = citekey_aliases
def process_citations(doc): """ Apply citation-by-identifier to a Python object representation of Pandoc's Abstract Syntax Tree. The following Pandoc metadata fields are considered: - bibliography (use to define reference metadata manually) - citekey-aliases (use to define tags for cite-by-id citations) - manubot-requests-cache-path - manubot-clear-requests-cache - manubot-output-citekeys: path to write TSV table of citekeys - manubot-output-bibliography: path to write generated CSL JSON bibliography """ citekey_aliases = doc.get_metadata("citekey-aliases", default={}) if not isinstance(citekey_aliases, dict): logging.warning( f"Expected metadata.citekey-aliases to be a dict, " f"but received a {citekey_aliases.__class__.__name__}. Disregarding." ) citekey_aliases = dict() global_variables["citekey_aliases"] = citekey_aliases doc.walk(_get_reference_link_citekey_aliases) doc.walk(_get_citekeys_action) manuscript_citekeys = global_variables["manuscript_citekeys"] citations = Citations(input_ids=manuscript_citekeys, aliases=citekey_aliases) citations.csl_item_failure_log_level = "ERROR" requests_cache_path = doc.get_metadata("manubot-requests-cache-path") if requests_cache_path: from manubot.process.requests_cache import RequestsCache req_cache = RequestsCache(requests_cache_path) req_cache.mkdir() req_cache.install() if doc.get_metadata("manubot-clear-requests-cache", default=False): req_cache.clear() citations.filter_pandoc_xnos() citations.load_manual_references(**_get_load_manual_references_kwargs(doc)) citations.inspect(log_level="WARNING") citations.get_csl_items() global_variables["citekey_shortener"] = citations.input_to_csl_id doc.walk(_citation_to_id_action) if requests_cache_path: req_cache.close() citations.write_citekeys_tsv( path=doc.get_metadata("manubot-output-citekeys")) citations.write_csl_json( path=doc.get_metadata("manubot-output-bibliography")) # Update pandoc metadata with fields that this filter # has either consumed, created, or modified. doc.metadata["bibliography"] = [] doc.metadata["references"] = citations.csl_items doc.metadata["citekey_aliases"] = citekey_aliases