def test_name_variations_existance(self): """Test if name variations are saved in the record """ from invenio_ext.es import es from inspirehep.modules.authors.utils import author_tokenize rec = es.get(index='hep', id=1343791) rec_list = rec['_source']['authors'][0]['name_variations'] name = rec['_source']['authors'][0]['full_name'] token_list = author_tokenize(name) self.assertEqual(token_list, rec_list)
def test_author_tokenizer(self): """Test author name tokenizer """ from inspirehep.modules.authors.utils import author_tokenize token_list = author_tokenize("Ellis Richard") self.assertEqual(token_list, ['E Richard', 'Ellis Richard', 'Richard', 'Richard E', 'Richard Ellis', 'Richard, E', 'Richard, Ellis'])
def generate_name_variatons(recid, json, *args, **kwargs): """Populate a json record before indexing it. Adds a field for all the possible variations of an authors name :param recid: The id of the record that is going to be indexed. :param json: The json representation of the record that is going to be indexed. """ from inspirehep.modules.authors.utils import author_tokenize authors = json.get("authors") if authors: for author in authors: name = author.get("full_name") if name: author.update({"name_variations": author_tokenize(name)})