def _normalize(a_value): for category in valid_arxiv_categories(): if a_value.lower() == category.lower(): return normalize_arxiv_category(category) schema = load_schema('elements/inspire_field') valid_inspire_categories = schema['properties']['term']['enum'] for category in valid_inspire_categories: if a_value.lower() == category.lower(): return category field_codes_to_inspire_categories = { 'a': 'Astrophysics', 'b': 'Accelerators', 'c': 'Computing', 'e': 'Experiment-HEP', 'g': 'Gravitation and Cosmology', 'i': 'Instrumentation', 'l': 'Lattice', 'm': 'Math and Math Physics', 'n': 'Theory-Nucl', 'o': 'Other', 'p': 'Phenomenology-HEP', 'q': 'General Physics', 't': 'Theory-HEP', 'x': 'Experiment-Nucl', } return field_codes_to_inspire_categories.get(a_value.lower())
def arxiv_categories(self): categories = self.root.xpath('.//categories/text()').extract_first( default='[]') categories = categories.split() categories_without_old = [ normalize_arxiv_category(arxiv_cat) for arxiv_cat in categories ] return dedupe_list(categories_without_old)
def arxiv_categories(self, key, value): category = value.get('a', '') if category.lower() == 'physics-other': return None elif category.lower() == 'physics.acc-phys': return 'physics.acc-ph' return normalize_arxiv_category(value.get('a'))
def add_arxiv_categories(record, blob): if not record.get('arxiv_eprints') or not blob.get('65017'): return record for category in force_list(get_value(blob, '65017')): if category.get('2') == 'arXiv' and category.get('a'): record['arxiv_eprints'][0]['categories'].append( normalize_arxiv_category(category['a'])) return record
def arxiv_eprints(self, key, value): """Populate the ``arxiv_eprints`` key. Also populates the ``report_numbers`` key through side effects. """ def _get_clean_arxiv_eprint(id_): return id_.split(':')[-1] def _is_arxiv_eprint(id_, source): return source.lower() == 'arxiv' def _is_hidden_report_number(other_id, source): return other_id def _get_clean_source(source): if source == 'arXiv:reportnumber': return 'arXiv' return source arxiv_eprints = self.get('arxiv_eprints', []) report_numbers = self.get('report_numbers', []) values = force_list(value) for value in values: id_ = force_single_element(value.get('a', '')) other_id = force_single_element(value.get('z', '')) categories = [ normalize_arxiv_category(category) for category in force_list(value.get('c')) ] source = force_single_element(value.get('9', '')) if _is_arxiv_eprint(id_, source): arxiv_eprints.append({ 'categories': categories, 'value': _get_clean_arxiv_eprint(id_), }) elif _is_hidden_report_number(other_id, source): report_numbers.append({ 'hidden': True, 'source': _get_clean_source(source), 'value': other_id, }) else: report_numbers.append({ 'source': _get_clean_source(source), 'value': id_, }) self['report_numbers'] = report_numbers return arxiv_eprints
def test_normalize_arxiv_category_returns_existing_category_for_wrong_caps(): expected = 'hep-th' result = utils.normalize_arxiv_category('HeP-Th') assert expected == result
def test_normalize_arxiv_category_returns_existing_category_for_obsolete(): expected = 'math.FA' result = utils.normalize_arxiv_category('funct-an') assert expected == result
def test_normalize_arxiv_category_returns_input_for_inexistent_category(): expected = u'😃' result = utils.normalize_arxiv_category(u'😃') assert expected == result
def test_normalize_arxiv_category_returns_input_for_correct_category(): expected = 'hep-th' result = utils.normalize_arxiv_category('hep-th') assert expected == result
def test_normalize_arxiv_category_returns_existing_category_when_dot_is_dash(): expected = 'math.FA' result = utils.normalize_arxiv_category('math-fa') assert expected == result