def parse_references(reference_lines, recid=1, kbs_files=None): """Parse a list of references Given a list of raw reference lines (list of strings), output the MARC-XML content extracted version """ # RefExtract knowledge bases kbs = get_kbs(custom_kbs_files=kbs_files) # Identify journal titles, report numbers, URLs, DOIs, and authors... (processed_references, counts, dummy_bad_titles_count) = \ parse_references_elements(reference_lines, kbs) # Generate marc xml using the elements list xml_out = build_xml_references(processed_references) # Generate the xml string to be outputted return create_xml_record(counts, recid, xml_out)
def cli_main(options, args): if options.help or not args: usage() return if options.kb_journals: kbs_files = {'journals': options.kb_journals} else: kbs_files = {} kb = get_kbs(custom_kbs_files=kbs_files)['journals'] out_records = [] for path in args: f = open(path) try: xml = f.read() finally: f.close() out_records += convert_journals_list(kb, create_records(xml)) write_records(options, out_records)
def setUp(self): from invenio.legacy.refextract.kbs import get_kbs kb = [("TEST JOURNAL NAME", "Converted")] kbs_files = {'journals': kb} self.kb = get_kbs(custom_kbs_files=kbs_files)['journals']