def citation_to_citeproc(citation, prune=True): """ Return a dictionary with citation metadata """ citation == standardize_citation(citation, warn_if_changed=True) source, identifier = citation.split(':', 1) if source in citeproc_retrievers: citeproc_retriever = import_function(citeproc_retrievers[source]) csl_item = citeproc_retriever(identifier) else: msg = f'Unsupported citation source {source} in {citation}' raise ValueError(msg) from manubot import __version__ as manubot_version from manubot.cite.citeproc import ( citeproc_passthrough, append_to_csl_item_note, ) note_text = f'This CSL JSON Item was automatically generated by Manubot v{manubot_version} using citation-by-identifier.' note_dict = { 'standard_id': citation, } append_to_csl_item_note(csl_item, note_text, note_dict) short_id = get_citation_short_id(citation) csl_item = citeproc_passthrough(csl_item, set_id=short_id, prune=prune) return csl_item
def citekey_to_csl_item(citekey, prune=True): """ Generate a CSL Item (Python dictionary) for the input citekey. """ citekey == standardize_citekey(citekey, warn_if_changed=True) source, identifier = citekey.split(':', 1) if source in citeproc_retrievers: citeproc_retriever = import_function(citeproc_retrievers[source]) csl_item = citeproc_retriever(identifier) else: msg = f'Unsupported citation source {source!r} in {citekey!r}' raise ValueError(msg) from manubot import __version__ as manubot_version from manubot.cite.citeproc import ( csl_item_passthrough, append_to_csl_item_note, ) note_text = f'This CSL JSON Item was automatically generated by Manubot v{manubot_version} using citation-by-identifier.' note_dict = { 'standard_id': citekey, } append_to_csl_item_note(csl_item, note_text, note_dict) short_citekey = shorten_citekey(citekey) csl_item = csl_item_passthrough(csl_item, set_id=short_citekey, prune=prune) return csl_item
def test_append_to_csl_item_note(input_note, text, dictionary, expected_note): csl_item = { 'id': 'test_csl_item', 'type': 'entry', 'note': input_note, } append_to_csl_item_note(csl_item, text, dictionary) output_note = csl_item['note'] assert output_note == expected_note
def csl_item_set_standard_id(csl_item): """ Extract the standard_id (standard citation key) for a csl_item and modify the csl_item in-place to set its "id" field. The standard_id is extracted from a "standard_citation" field, the "note" field, or the "id" field. If extracting the citation from the "id" field, uses the infer_citekey_prefix function to set the prefix. For example, if the extracted standard_id does not begin with a supported prefix (e.g. "doi:", "pmid:" or "raw:"), the citation is assumed to be raw and given a "raw:" prefix. The extracted citation (referred to as "original_standard_id") is checked for validity and standardized, after which it is the final "standard_id". Regarding csl_item modification, the csl_item "id" field is set to the standard_citation and the note field is created or updated with key-value pairs for standard_id, original_standard_id, and original_id. Note that the Manubot software generally refers to the "id" of a CSL Item as a citekey. However, in this context, we use "id" rather than "citekey" for consistency with CSL's "id" field. """ if not isinstance(csl_item, dict): raise ValueError( "csl_item must be a CSL Data Item represented as a Python dictionary") from manubot.cite.citeproc import ( append_to_csl_item_note, parse_csl_item_note, ) note_dict = parse_csl_item_note(csl_item.get('note', '')) original_id = None original_standard_id = None if 'id' in csl_item: original_id = csl_item['id'] original_standard_id = infer_citekey_prefix(original_id) if 'standard_id' in note_dict: original_standard_id = note_dict['standard_id'] if 'standard_citation' in csl_item: original_standard_id = csl_item.pop('standard_citation') if original_standard_id is None: raise ValueError( 'csl_item_set_standard_id could not detect a field with a citation / standard_citation. ' 'Consider setting the CSL Item "id" field.') assert is_valid_citekey(original_standard_id, allow_raw=True) standard_id = standardize_citekey( original_standard_id, warn_if_changed=False) add_to_note = {} if original_id and original_id != standard_id: if original_id != note_dict.get('original_id'): add_to_note['original_id'] = original_id if original_standard_id and original_standard_id != standard_id: if original_standard_id != note_dict.get('original_standard_id'): add_to_note['original_standard_id'] = original_standard_id if standard_id != note_dict.get('standard_id'): add_to_note['standard_id'] = standard_id append_to_csl_item_note(csl_item, dictionary=add_to_note) csl_item['id'] = standard_id return csl_item
def load_manual_references(paths=[], extra_csl_items=[]): """ Read manual references (overrides) from files specified by a list of paths. Returns a standard_citation to CSL Item dictionary. extra_csl_items specifies JSON CSL stored as a Python object, to be used in addition to the CSL JSON stored as text in the file specified by path. Set paths=[] to only use extra_csl_items. """ csl_items = [] for path in paths: path = pathlib.Path(path) if not path.is_file(): logging.warning( f'process.load_bibliographies is skipping a non-existent path: {path}' ) continue for csl_item in load_bibliography(path): append_to_csl_item_note( csl_item, text= f'This CSL JSON Item was loaded by Manubot v{manubot_version} from a manual reference file.', dictionary={'manual_reference_filename': path.name}, ) csl_items.append(csl_item) csl_items.extend(extra_csl_items) manual_refs = dict() for csl_item in csl_items: try: csl_item_set_standard_id(csl_item) except Exception: csl_item_str = json.dumps(csl_item, indent=2) logging.info( f'Skipping csl_item where setting standard_id failed:\n{csl_item_str}', exc_info=True) continue standard_id = csl_item['id'] csl_item = citeproc_passthrough( csl_item, set_id=get_citation_short_id(standard_id)) manual_refs[standard_id] = csl_item return manual_refs