def normalize_field_categories(sender, *args, **kwargs): """Normalize field_categories.""" for idx, field in enumerate(sender.get('field_categories', [])): if field.get('scheme') == "INSPIRE" or '_scheme' in field or '_term' in field: # Already normalized form continue original_term = field.get('term') normalized_term = classify_field(original_term) scheme = 'INSPIRE' if normalized_term else None original_scheme = field.get('scheme') if isinstance(original_scheme, (list, tuple)): original_scheme = original_scheme[0] updated_field = { '_scheme': original_scheme, 'scheme': scheme, '_term': original_term, 'term': normalized_term, } source = field.get('source') if source: if 'automatically' in source: source = 'INSPIRE' updated_field['source'] = source sender['field_categories'][idx].update(updated_field)
def normalize_field_categories(sender, *args, **kwargs): """Normalize the content of the `field_categories` key. We use the heuristic that a field is normalized if its scheme is 'INSPIRE' or if it contains either the `_scheme` key or the `_term` key. If the field wasn't normalized we use some mapping defined in the configuration to output a `term` belonging to a known set of values. We also use the heuristic that the source is 'INSPIRE' if it contains the word 'automatically', otherwise we preserve it. """ def _is_normalized(field): scheme_is_inspire = field.get('scheme') == 'INSPIRE' return scheme_is_inspire or '_scheme' in field or '_term' in field def _is_from_inspire(term): return term and term != 'Other' for i, field in enumerate(sender.get('field_categories', [])): if _is_normalized(field): continue original_term = field.get('term') normalized_term = classify_field(original_term) scheme = 'INSPIRE' if _is_from_inspire(normalized_term) else None original_scheme = field.get('scheme') if isinstance(original_scheme, (list, tuple)): original_scheme = original_scheme[0] updated_field = { '_scheme': original_scheme, 'scheme': scheme, '_term': original_term, 'term': normalized_term, } source = field.get('source') if source: if 'automatically' in source: source = 'INSPIRE' updated_field['source'] = source sender['field_categories'][i].update(updated_field)
def test_classify_field_falls_back_on_other(): expected = 'Other' result = classify_field('FOO') assert expected == result
def test_classify_field_ignores_case(): expected = 'Astrophysics' result = classify_field('ASTRO-PH.CO') assert expected == result
def test_classify_field_returns_category_if_found_among_values(): expected = 'Astrophysics' result = classify_field('Astrophysics') assert expected == result
def test_classify_field_returns_category_if_found_among_keys(): expected = 'Math and Math Physics' result = classify_field('alg-geom') assert expected == result
def test_classify_field_returns_none_on_non_string_value(): assert classify_field(0) is None
def test_classify_field_returns_none_on_falsy_value(): assert classify_field('') is None
def test_classify_field_returns_none_on_unknown_values(): assert classify_field('FOO') is None