def test_normalize_name_handles_names_with_dots_initials(): expected = 'Smith, J.P.' assert expected == normalize_name('Smith, J. P.') assert expected == normalize_name('Smith, J.P.') assert expected == normalize_name('Smith, J.P. ') assert expected == normalize_name('Smith, J. P. ')
def test_normalize_name_handles_multiple_middle_names(): expected = 'Almeida, C.A.S.' assert expected == normalize_name('Almeida, C. A. S.') assert expected == normalize_name('Almeida, C. A.S.') assert expected == normalize_name('Almeida, C.A. S.') assert expected == normalize_name('Almeida, C.A.S.')
def add_author(self, full_name, role=None): self._ensure_reference_field('authors', []) if role is not None: inspire_role = 'editor' if role == 'ed.' else role self.obj['reference']['authors'].append({ 'full_name': normalize_name(full_name), 'inspire_role': inspire_role, }) else: self.obj['reference']['authors'].append({ 'full_name': normalize_name(full_name), })
def contact_details(self, key, value): """Populate the ``contact_details`` key. Also populates the ``reference_letters`` key through side effects. """ contact_details = self.get('contact_details', []) reference_letters = self.get('reference_letters', {}) emails = force_list(value.get('m')) names = force_list(value.get('p')) if len(names) == 1 and len(emails) > 1: names = [names[0] for _ in emails] values_o = force_list(value.get('o')) contact_details.extend({ 'name': normalize_name(name), 'email': email, } for (name, email) in zip_longest(names, emails)) for value_o in values_o: if '@' in value_o: reference_letters.setdefault('emails', []).append(value_o) else: reference_letters.setdefault('urls', []).append({ 'value': value_o, }) self['reference_letters'] = reference_letters return contact_details
def _generate_fieldnames_if_bai_query(self, node_value, bai_field_variation, query_bai_field_if_dots_in_name): """Generates new fieldnames in case of BAI query. Args: node_value (six.text_type): The node's value (i.e. author name). bai_field_variation (six.text_type): Which field variation to query ('search' or 'raw'). query_bai_field_if_dots_in_name (bool): Whether to query BAI field (in addition to author's name field) if dots exist in the name and name contains no whitespace. Returns: list: Fieldnames to query on, in case of BAI query or None, otherwise. Raises: ValueError, if ``field_variation`` is not one of ('search', 'raw'). """ if bai_field_variation not in (FieldVariations.search, FieldVariations.raw): raise ValueError('Non supported field variation "{}".'.format(bai_field_variation)) normalized_author_name = normalize_name(node_value).strip('.') if ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['author'] and \ ElasticSearchVisitor.BAI_REGEX.match(node_value): return [ElasticSearchVisitor.AUTHORS_BAI_FIELD + '.' + bai_field_variation] elif not whitespace.search(normalized_author_name) and \ query_bai_field_if_dots_in_name and \ ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['author'] and \ '.' in normalized_author_name: # Case of partial BAI, e.g. ``J.Smith``. return [ElasticSearchVisitor.AUTHORS_BAI_FIELD + '.' + bai_field_variation] + \ force_list(ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['author']) else: return None
def add_advisor(self, name, ids=None, degree_type=None, record=None, curated=False): """Add an advisor. Args: :param name: full name of the advisor. :type name: string :param ids: list with the IDs of the advisor. :type ids: list :param degree_type: one of the allowed types of degree the advisor helped with. :type degree_type: string :param record: URI for the advisor. :type record: string :param curated: if the advisor relation has been curated i.e. has been verified. :type curated: boolean """ new_advisor = {} new_advisor['name'] = normalize_name(name) if ids: new_advisor['ids'] = force_list(ids) if degree_type: new_advisor['degree_type'] = degree_type if record: new_advisor['record'] = record new_advisor['curated_relation'] = curated self._append_to('advisors', new_advisor)
def _generate_exact_author_query(self, author_name_or_bai): """Generates a term query handling authors and BAIs. Notes: If given value is a BAI, search for the provided value in the raw field variation of `ElasticSearchVisitor.AUTHORS_BAI_FIELD`. Otherwise, the value will be procesed in the same way as the indexed value (i.e. lowercased and normalized (inspire_utils.normalize_name and then NFKC normalization). E.g. Searching for 'Smith, J.' is the same as searching for: 'Smith, J', 'smith, j.', 'smith j', 'j smith', 'j. smith', 'J Smith', 'J. Smith'. """ if ElasticSearchVisitor.BAI_REGEX.match(author_name_or_bai): bai = author_name_or_bai.lower() query = self._generate_term_query( '.'.join((ElasticSearchVisitor.AUTHORS_BAI_FIELD, FieldVariations.search)), bai) else: author_name = normalize( 'NFKC', normalize_name(author_name_or_bai)).lower() query = self._generate_term_query( ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['exact-author'], author_name) return generate_nested_query( ElasticSearchVisitor.AUTHORS_NESTED_QUERY_PATH, query)
def set_name(self, name): """Set the name for the author. Args: :param name: should be the family name, the given names, or both, and at least one is required. :type name: string """ self._ensure_field('name', {}) self.obj['name']['value'] = normalize_name(name)
def add_speaker(self, name=None, record=None, affiliations=None, ids=None, curated_relation=None): """ Args: name (str): name of the contact. record (dict): dictionary with ``$ref`` pointing to proper record. If string, then will be converted to proper dict. affiliations (list): list of affiliations objects """ self._append_to( 'speakers', name=normalize_name(name), record=record, affiliations=affiliations, ids=ids, curated_relation=curated_relation, )
def _converted_author(value): def _get_ids_from_0(subfield): """Transform IDs from CDS into INSPIRE-style IDs.""" ids = {} ids_i = [] ids_j = [] segments = subfield.split('|') for segment in segments: match = RE_IDS.match(segment) if match: ids[match.group('schema').upper()] = match.group('id') for schema, id_ in ids.items(): if schema == 'INSPIRE': ids_i.append(id_) elif schema == 'SZGECERN': ids_j.append(u'CCID-{}'.format(id_)) elif schema == 'CDS': continue else: ids_j.append(id_) return ids_i, ids_j value = vanilla_dict(value) if 'beard' in value.get('9', '').lower(): value.pop('0', None) return value subfields_i = force_list(value.get('i')) subfields_j = force_list(value.get('j')) for id_ in force_list(value.pop('0', None)): ids_i, ids_j = _get_ids_from_0(id_) subfields_i.extend(ids_i) subfields_j.extend(ids_j) value['a'] = normalize_name(value['a']) value['i'] = subfields_i value['j'] = subfields_j return value
def test_normalize_name_handles_unicode(): expected = u'蕾拉' assert expected == normalize_name(u'蕾拉')
def test_normalize_name_handles_jimmy(): # http://jimmy.pink expected = 'Jimmy' assert expected == normalize_name('Jimmy')
def test_normalize_name_handles_names_with_several_last_names(): expected = 'Smith Davis, J.P.' assert expected == normalize_name('Smith Davis, J.P.')
def test_normalize_name_handles_names_with_spaces(): expected = 'Smith, J.P.' assert expected == normalize_name('Smith, J P ') assert expected == normalize_name('Smith, J P')
def _get_value(value): a_value = force_single_element(value.get('a', '')) q_value = force_single_element(value.get('q', '')) return a_value or normalize_name(q_value)
def test_normalize_name_handles_names_with_middle_initial(): expected = 'Smith, John P.' assert expected == normalize_name('Smith, John P.') assert expected == normalize_name('Smith, John P. ') assert expected == normalize_name('Smith, John P ')
def test_normalize_name_full(): expected = 'Smith, John Peter' assert expected == normalize_name('Smith, John Peter')
def test_normalize_name_handles_titles(input_author_name, expected): assert normalize_name(input_author_name) == expected
def set_full_name(self, full_name): self._ensure_field('full_name', normalize_name(full_name))
def test_normalize_name_converts_unicode_apostrophe_to_normal_apostrophe(): expected = u'M\'Gregor, Jimmy' assert expected == normalize_name(u'M’Gregor, Jimmy')
def test_normalize_name_handles_names_with_first_initial(): expected = 'Smith, J. Peter' assert expected == normalize_name('Smith, J Peter') assert expected == normalize_name('Smith, J. Peter') assert expected == normalize_name('Smith, J. Peter ')
def set_name(self, value): self.obj['name'] = {'value': normalize_name(value)}
def test_normalize_name_handles_multiple_middle_names_with_and_without_initials(): expected = 'Smith, J.A. Peter J.' assert expected == normalize_name('Smith, J. A. Peter J.') assert expected == normalize_name('Smith, J.A. Peter J.')