def test_convert_old_publication_info_to_new_handles_hidden_with_volume_variations(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_record': { '$ref': 'http://localhost:5000/api/journals/1214521', }, 'journal_title': 'Phys.Lett.', 'journal_volume': '72B', }, { 'hidden': True, 'journal_title': 'Phys.Lett.', 'journal_volume': 'B72', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_title': 'Phys.Lett.B', 'journal_volume': '72', }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def _preprocess_journal_query_value(third_journal_field, old_publication_info_values): """Transforms the given journal query value (old publication info) to the new one. Args: third_journal_field (six.text_type): The final field to be used for populating the old publication info. old_publication_info_values (six.text_type): The old publication info. It must be one of {only title, title & volume, title & volume & artid/page_start}. Returns: (dict) The new publication info. """ # Prepare old publication info for :meth:`inspire_schemas.utils.convert_old_publication_info_to_new`. publication_info_keys = [ ElasticSearchVisitor.JOURNAL_TITLE, ElasticSearchVisitor.JOURNAL_VOLUME, third_journal_field, ] values_list = [ value.strip() for value in old_publication_info_values.split(',') if value ] old_publication_info = [{ key: value for key, value in zip(publication_info_keys, values_list) if value }] # We are always assuming that the returned list will not be empty. In the situation of a journal query with no # value, a malformed query will be generated instead. new_publication_info = convert_old_publication_info_to_new( old_publication_info)[0] return new_publication_info
def test_convert_old_publication_info_to_new_excludes_specific_journal_titles(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_record': { '$ref': 'http://localhost:5000/api/journals/1213107', }, 'journal_title': 'eConf', 'journal_volume': 'C16', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_record': { '$ref': 'http://localhost:5000/api/journals/1213107', }, 'journal_title': 'eConf', 'journal_volume': 'C16', }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new_does_not_raise_when_deducing_year_from_malformed_volume(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'artid': '159', 'journal_title': 'JHEP', 'journal_volume': 'S160', 'page_start': '159', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'artid': '159', 'journal_title': 'JHEP', 'journal_volume': 'S160', 'page_start': '159', }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new_handles_volumes_with_letters_in_the_middle(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_record': { '$ref': 'http://localhost:5000/api/journals/1214764', }, 'journal_title': 'Eur.Phys.J.', 'journal_volume': 'A28S1', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_title': 'Eur.Phys.J.A', 'journal_volume': '28S1', }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new_handles_year_added_to_volumes(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'artid': '137', 'journal_title': 'JHEP', 'journal_volume': '1709', 'year': 2017, 'page_start': '137', } ] assert utils.validate(publication_info, subschema) is None expected = [ { 'artid': '137', 'journal_title': 'JHEP', 'journal_volume': '09', 'year': 2017, 'page_start': '137', } ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new_deduces_year_from_volume(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'artid': '065', 'journal_title': 'JHEP', 'journal_volume': '9905', 'page_start': '065', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'artid': '065', 'journal_title': 'JHEP', 'journal_volume': '05', 'page_start': '065', 'year': 1999, }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new_handles_journal_titles_not_ending_with_a_dot(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_record': { '$ref': 'http://localhost:5000/api/journals/1214745', }, 'journal_title': 'Fizika', 'journal_volume': 'B19', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_title': 'Fizika B', 'journal_volume': '19', }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new_handles_renamed_journals(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'artid': '525', 'journal_title': 'Nucl.Phys.Proc.Suppl.', 'journal_volume': '118', 'page_start': '525', } ] assert utils.validate(publication_info, subschema) is None expected = [ { 'artid': '525', 'journal_title': 'Nucl.Phys.B Proc.Suppl.', 'journal_volume': '118', 'page_start': '525', } ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new_handles_journal_titles_with_already_a_letter(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_record': { '$ref': 'http://localhost:5000/api/journals/1213787', }, 'journal_title': 'Kumamoto J.Sci.Ser.A', 'journal_volume': '13', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_record': { '$ref': 'http://localhost:5000/api/journals/1213787', }, 'journal_title': 'Kumamoto J.Sci.Ser.A', 'journal_volume': '13', }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_record': { '$ref': 'http://localhost:5000/api/journals/1214516', }, 'journal_title': 'Phys.Rev.', 'journal_volume': 'C48', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_title': 'Phys.Rev.C', 'journal_volume': '48', }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new_handles_volumes_with_dashes(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_record': { '$ref': 'http://localhost:5000/api/journals/1214551', }, 'journal_title': 'Nucl.Instrum.Meth.', 'journal_volume': 'A626-627', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_title': 'Nucl.Instrum.Meth.A', 'journal_volume': '626-627', }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def convert_publication_infos(record, blob): if not record.get('publication_info'): return record record['publication_info'] = convert_old_publication_info_to_new( record['publication_info']) return record
def extract_journal_info(obj, eng): """Extract the journal information from ``pubinfo_freetext``. Runs ``extract_journal_reference`` on the ``pubinfo_freetext`` key of each ``publication_info``, if it exists, and uses the extracted information to populate the other keys. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ if not obj.data.get('publication_info'): return for publication_info in obj.data['publication_info']: try: with local_refextract_kbs_path() as kbs_path: extracted_publication_info = extract_journal_reference( publication_info['pubinfo_freetext'], override_kbs_files=kbs_path, ) if not extracted_publication_info: continue if extracted_publication_info.get('title'): publication_info['journal_title'] = extracted_publication_info[ 'title'] if extracted_publication_info.get('volume'): publication_info[ 'journal_volume'] = extracted_publication_info['volume'] if extracted_publication_info.get('page'): page_start, page_end, artid = split_page_artid( extracted_publication_info['page']) if page_start: publication_info['page_start'] = page_start if page_end: publication_info['page_end'] = page_end if artid: publication_info['artid'] = artid if extracted_publication_info.get('year'): year = maybe_int(extracted_publication_info['year']) if year: publication_info['year'] = year except KeyError: pass obj.data['publication_info'] = convert_old_publication_info_to_new( obj.data['publication_info'])
def extract_journal_info(obj, eng): """Extract the journal information from ``pubinfo_freetext``. Runs ``extract_journal_reference`` on the ``pubinfo_freetext`` key of each ``publication_info``, if it exists, and uses the extracted information to populate the other keys. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ if not obj.data.get('publication_info'): return for publication_info in obj.data['publication_info']: try: with local_refextract_kbs_path() as kbs_path: extracted_publication_info = extract_journal_reference( publication_info['pubinfo_freetext'], override_kbs_files=kbs_path, ) if not extracted_publication_info: continue if extracted_publication_info.get('title'): publication_info['journal_title'] = extracted_publication_info['title'] if extracted_publication_info.get('volume'): publication_info['journal_volume'] = extracted_publication_info['volume'] if extracted_publication_info.get('page'): page_start, page_end, artid = split_page_artid(extracted_publication_info['page']) if page_start: publication_info['page_start'] = page_start if page_end: publication_info['page_end'] = page_end if artid: publication_info['artid'] = artid if extracted_publication_info.get('year'): year = maybe_int(extracted_publication_info['year']) if year: publication_info['year'] = year except KeyError: pass obj.data['publication_info'] = convert_old_publication_info_to_new(obj.data['publication_info'])
def convert_old_publication_info_to_new(self, reference): try: publication_info = [ get_value(reference, "reference.publication_info", default={}) ] converted_publication_info = convert_old_publication_info_to_new( publication_info) reference["reference"][ "publication_info"] = converted_publication_info[0] return reference except Exception as e: LOGGER.exception( "Error converting old `publication_info` to new.", publication_info=publication_info, reference=reference, exec=e, ) return reference
def test_convert_old_publication_info_to_new_does_not_double_letters(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_title': 'Proc.Roy.Soc.Lond.A', 'journal_volume': 'A120', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_title': 'Proc.Roy.Soc.Lond.A', 'journal_volume': '120', }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new_does_not_double_letters_when_letter_with_volume( ): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_title': 'Nucl.Phys.Proc.Suppl.', 'journal_volume': 'B120', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_title': 'Nucl.Phys.B Proc.Suppl.', 'journal_volume': '120', }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result