def extract_journal_info(obj, eng): """Extract journal, volume etc. from any freetext publication info.""" publication_info = obj.data.get("publication_info") if not publication_info: return new_publication_info = [] for pubnote in publication_info: freetext = pubnote.get("pubinfo_freetext") if freetext: extracted_publication_info = extract_journal_reference( freetext, # override_kbs_files={'journals': get_mappings_from_kbname(cfg['REFEXTRACT_KB_NAME'])} ) if extracted_publication_info: if "volume" in extracted_publication_info: pubnote["journal_volume"] = extracted_publication_info.get( "volume" ) if "title" in extracted_publication_info: pubnote["journal_title"] = extracted_publication_info.get( "title" ) if "year" in extracted_publication_info: pubnote["year"] = extracted_publication_info.get( "year" ) if "page" in extracted_publication_info: pubnote["page_artid"] = extracted_publication_info.get( "page" ) new_publication_info.append(pubnote) obj.data["publication_info"] = new_publication_info
def extract_journal_info(obj, eng): """Extract the journal information from ``pubinfo_freetext``. Runs ``extract_journal_reference`` on the ``pubinfo_freetext`` key of each ``publication_info``, if it exists, and uses the extracted information to populate the other keys. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ if not obj.data.get('publication_info'): return for publication_info in obj.data['publication_info']: try: with local_refextract_kbs_path() as kbs_path: extracted_publication_info = extract_journal_reference( publication_info['pubinfo_freetext'], override_kbs_files=kbs_path, ) if not extracted_publication_info: continue if extracted_publication_info.get('title'): publication_info['journal_title'] = extracted_publication_info[ 'title'] if extracted_publication_info.get('volume'): publication_info[ 'journal_volume'] = extracted_publication_info['volume'] if extracted_publication_info.get('page'): page_start, page_end, artid = split_page_artid( extracted_publication_info['page']) if page_start: publication_info['page_start'] = page_start if page_end: publication_info['page_end'] = page_end if artid: publication_info['artid'] = artid if extracted_publication_info.get('year'): year = maybe_int(extracted_publication_info['year']) if year: publication_info['year'] = year except KeyError: pass obj.data['publication_info'] = convert_old_publication_info_to_new( obj.data['publication_info'])
def extract_journal_info(obj, eng): """Extract the journal information from ``pubinfo_freetext``. Runs ``extract_journal_reference`` on the ``pubinfo_freetext`` key of each ``publication_info``, if it exists, and uses the extracted information to populate the other keys. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ if not obj.data.get('publication_info'): return for publication_info in obj.data['publication_info']: try: with local_refextract_kbs_path() as kbs_path: extracted_publication_info = extract_journal_reference( publication_info['pubinfo_freetext'], override_kbs_files=kbs_path, ) if not extracted_publication_info: continue if extracted_publication_info.get('title'): publication_info['journal_title'] = extracted_publication_info['title'] if extracted_publication_info.get('volume'): publication_info['journal_volume'] = extracted_publication_info['volume'] if extracted_publication_info.get('page'): page_start, page_end, artid = split_page_artid(extracted_publication_info['page']) if page_start: publication_info['page_start'] = page_start if page_end: publication_info['page_end'] = page_end if artid: publication_info['artid'] = artid if extracted_publication_info.get('year'): year = maybe_int(extracted_publication_info['year']) if year: publication_info['year'] = year except KeyError: pass obj.data['publication_info'] = convert_old_publication_info_to_new(obj.data['publication_info'])
def extract_journal_info(obj, eng): """Extract journal, volume etc. from any freetext publication info.""" publication_info = get_value(obj.data, "publication_info") if not publication_info: return new_publication_info = [] for pubnote in publication_info: if not pubnote: continue freetext = pubnote.get("pubinfo_freetext") if freetext: if isinstance(freetext, (list, tuple)): freetext = ". ".join(freetext) extracted_publication_info = extract_journal_reference( freetext, # override_kbs_files={ # 'journals': get_mappings_from_kbname(['REFEXTRACT_KB_NAME']) # } ) if extracted_publication_info: if "volume" in extracted_publication_info: pubnote["journal_volume"] = extracted_publication_info.get( "volume" ) if "title" in extracted_publication_info: pubnote["journal_title"] = extracted_publication_info.get( "title" ) if "year" in extracted_publication_info: year = maybe_int(extracted_publication_info.get('year')) if year is not None: pubnote['year'] = year if "page" in extracted_publication_info: page_start, page_end, artid = split_page_artid( extracted_publication_info.get("page")) if page_start: pubnote["page_start"] = page_start if page_end: pubnote["page_end"] = page_end if artid: pubnote["artid"] = artid if any(value for value in pubnote.values()): new_publication_info.append(pubnote) obj.data["publication_info"] = new_publication_info
def extract_journal_info(obj, eng): """Extract journal, volume etc. from any freetext publication info.""" publication_info = get_value(obj.data, "publication_info") if not publication_info: return new_publication_info = [] for pubnote in publication_info: if not pubnote: continue freetext = pubnote.get("pubinfo_freetext") if not freetext: continue if isinstance(freetext, (list, tuple)): freetext = ". ".join(freetext) extracted_publication_info = extract_journal_reference( freetext, # override_kbs_files={ # 'journals': get_mappings_from_kbname(['REFEXTRACT_KB_NAME']) # } ) if extracted_publication_info: if "volume" in extracted_publication_info: pubnote["journal_volume"] = extracted_publication_info.get( "volume" ) if "title" in extracted_publication_info: pubnote["journal_title"] = extracted_publication_info.get( "title" ) if "year" in extracted_publication_info: pubnote["year"] = extracted_publication_info.get( "year" ) if "page" in extracted_publication_info: page_start, page_end, artid = split_page_artid( extracted_publication_info.get("page")) if page_start: pubnote["page_start"] = page_start if page_end: pubnote["page_end"] = page_end if artid: pubnote["artid"] = artid new_publication_info.append(pubnote) obj.data["publication_info"] = new_publication_info
def extract_journal_info(obj, eng): """Extract journal, volume etc. from any freetext publication info.""" model = eng.workflow_definition.model(obj) record = get_record_from_model(model) publication_info = record.get("publication_info") if not publication_info: return new_publication_info = [] for pubnote in publication_info: freetext = pubnote.get("pubinfo_freetext") if freetext: extracted_publication_info = extract_journal_reference( freetext, override_kbs_files={'journals': get_mappings_from_kbname(cfg['REFEXTRACT_KB_NAME'])} ) if extracted_publication_info: if "volume" in extracted_publication_info: pubnote["journal_volume"] = extracted_publication_info.get( "volume" ) if "title" in extracted_publication_info: pubnote["journal_title"] = extracted_publication_info.get( "title" ) if "year" in extracted_publication_info: pubnote["year"] = extracted_publication_info.get( "year" ) if "page" in extracted_publication_info: pubnote["page_artid"] = extracted_publication_info.get( "page" ) new_publication_info.append(pubnote) record["publication_info"] = new_publication_info model.update()