def extract_journal_info(obj, eng): """Extract the journal information from ``pubinfo_freetext``. Runs ``extract_journal_reference`` on the ``pubinfo_freetext`` key of each ``publication_info``, if it exists, and uses the extracted information to populate the other keys. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ if not obj.data.get('publication_info'): return for publication_info in obj.data['publication_info']: try: with local_refextract_kbs_path() as kbs_path: extracted_publication_info = extract_journal_reference( publication_info['pubinfo_freetext'], override_kbs_files=kbs_path, ) if not extracted_publication_info: continue if extracted_publication_info.get('title'): publication_info['journal_title'] = extracted_publication_info[ 'title'] if extracted_publication_info.get('volume'): publication_info[ 'journal_volume'] = extracted_publication_info['volume'] if extracted_publication_info.get('page'): page_start, page_end, artid = split_page_artid( extracted_publication_info['page']) if page_start: publication_info['page_start'] = page_start if page_end: publication_info['page_end'] = page_end if artid: publication_info['artid'] = artid if extracted_publication_info.get('year'): year = maybe_int(extracted_publication_info['year']) if year: publication_info['year'] = year except KeyError: pass obj.data['publication_info'] = convert_old_publication_info_to_new( obj.data['publication_info'])
def refextract_url(): """Run refextract on a URL.""" with local_refextract_kbs_path() as kbs_path: extracted_references = extract_references_from_url( request.json['url'], override_kbs_files=kbs_path, reference_format=u'{title},{volume},{page}') references = map_refextract_to_schema(extracted_references) return jsonify(references)
def extract_references_from_text(text, source=None, custom_kbs_file=None): """Extract references from text and return in INSPIRE format.""" with local_refextract_kbs_path() as kbs_path: extracted_references = extract_references_from_string( text, override_kbs_files=kbs_path, reference_format=u'{title},{volume},{page}', ) return map_refextract_to_schema(extracted_references, source=source)
def refextract_text(): """Run refextract on a piece of text.""" with local_refextract_kbs_path() as kbs_path: extracted_references = extract_references_from_string( request.json['text'], override_kbs_files=kbs_path, reference_format=u'{title},{volume},{page}') references = map_refextract_to_schema(extracted_references) references = match_references(references) return jsonify(references)
def refextract_text(): """Run refextract on a piece of text.""" with local_refextract_kbs_path() as kbs_path: extracted_references = extract_references_from_string( request.json['text'], override_kbs_files=kbs_path, reference_format=u'{title},{volume},{page}' ) references = map_refextract_to_schema(extracted_references) return jsonify(references)
def refextract_url(): """Run refextract on a URL.""" with local_refextract_kbs_path() as kbs_path: extracted_references = extract_references_from_url( request.json['url'], override_kbs_files=kbs_path, reference_format=u'{title},{volume},{page}' ) references = map_refextract_to_schema(extracted_references) references = match_references(references) return jsonify(references)
def extract_journal_info(obj, eng): """Extract the journal information from ``pubinfo_freetext``. Runs ``extract_journal_reference`` on the ``pubinfo_freetext`` key of each ``publication_info``, if it exists, and uses the extracted information to populate the other keys. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ if not obj.data.get('publication_info'): return for publication_info in obj.data['publication_info']: try: with local_refextract_kbs_path() as kbs_path: extracted_publication_info = extract_journal_reference( publication_info['pubinfo_freetext'], override_kbs_files=kbs_path, ) if not extracted_publication_info: continue if extracted_publication_info.get('title'): publication_info['journal_title'] = extracted_publication_info['title'] if extracted_publication_info.get('volume'): publication_info['journal_volume'] = extracted_publication_info['volume'] if extracted_publication_info.get('page'): page_start, page_end, artid = split_page_artid(extracted_publication_info['page']) if page_start: publication_info['page_start'] = page_start if page_end: publication_info['page_end'] = page_end if artid: publication_info['artid'] = artid if extracted_publication_info.get('year'): year = maybe_int(extracted_publication_info['year']) if year: publication_info['year'] = year except KeyError: pass obj.data['publication_info'] = convert_old_publication_info_to_new(obj.data['publication_info'])