def extract_references(filepath): """Extract references from PDF and return in INSPIRE format.""" references = extract_references_from_file( filepath, reference_format="{title},{volume},{page}", override_kbs_files={'journals': get_mappings_from_kbname(cfg['REFEXTRACT_KB_NAME'])} ) mapped_references = [] if references.get('references'): for ref in references.get('references'): reference = {} reference["journal_pubnote"] = ref.get('journal_reference') reference["year"] = ref.get('year') reference["collaboration"] = ref.get('collaboration') reference["title"] = ref.get('title') reference["misc"] = ref.get('misc') reference["number"] = ref.get('linemarker') reference["authors"] = ref.get('author') reference["isbn"] = ref.get('isbn') reference["doi"] = ref.get('doi') reference["report_number"] = ref.get('reportnumber') reference["publisher"] = ref.get('publisher') reference["recid"] = ref.get('recid') for key, value in reference.items(): if value and isinstance(value, list): reference[key] = ",".join(value) elif not value: del reference[key] mapped_references.append(reference) return mapped_references
def extract_journal_info(obj, eng): """Extract journal, volume etc. from any freetext publication info.""" model = eng.workflow_definition.model(obj) record = get_record_from_model(model) publication_info = record.get("publication_info") if not publication_info: return new_publication_info = [] for pubnote in publication_info: freetext = pubnote.get("pubinfo_freetext") if freetext: extracted_publication_info = extract_journal_reference( freetext, override_kbs_files={'journals': get_mappings_from_kbname(cfg['REFEXTRACT_KB_NAME'])} ) if extracted_publication_info: if "volume" in extracted_publication_info: pubnote["journal_volume"] = extracted_publication_info.get( "volume" ) if "title" in extracted_publication_info: pubnote["journal_title"] = extracted_publication_info.get( "title" ) if "year" in extracted_publication_info: pubnote["year"] = extracted_publication_info.get( "year" ) if "page" in extracted_publication_info: pubnote["page_artid"] = extracted_publication_info.get( "page" ) new_publication_info.append(pubnote) record["publication_info"] = new_publication_info model.update()