def authorlist(text): """Call the relevant formatter depending on the existence of affiliations. Input can be in two different formats: F. Lastname, F.M. Otherlastname or F. Lastname1, F.M. Otherlastname1,2 1 CERN 2 Otheraffiliation There should always be only one affiliation per line and the affiliation ids in author names should be separated with commas. """ if not text: return '' if not isinstance(text, six.text_type): text = text.decode('utf-8') # Do some pre-cleaning of the input string text = text.replace('\r', '') # Input from the form contains unwanted \r's text = text.replace(u'†', '') text = text.replace(u'∗', '') text = re.sub(r'(\n+)', r'\n', text) text = replace_undesirable_characters(text) text = wash_line(text) # Assume that if there are numbers in author name, they are affilation ids. # Also assume that if one author has an affiliation, everyone else has too. if re.search(r'([a-zA-Z_-]+[\n]*\d+)', text): return authorlist_with_affiliations(text) return authorlist_without_affiliations(text)
def authorlist(text): """ Return an author-structure parsed from text and optional additional information. """ builder = LiteratureBuilder() text = replace_undesirable_characters(text) result = create_authors(text) if "authors" in result: for fullname, author_affs in result["authors"]: builder.add_author( builder.make_author(fullname, raw_affiliations=author_affs) ) result["authors"] = builder.record["authors"] return result
def authorlist(text): """ Return an author-structure parsed from text and optional additional information. """ from inspire_schemas.api import LiteratureBuilder from refextract.documents.pdf import replace_undesirable_characters from inspirehep.modules.tools.authorlist import create_authors builder = LiteratureBuilder() text = replace_undesirable_characters(text) result = create_authors(text) if 'authors' in result: for fullname, author_affs in result['authors']: builder.add_author( builder.make_author(fullname, raw_affiliations=author_affs)) result['authors'] = builder.record['authors'] return result
def authorlist(text): """ Return an author-structure parsed from text and optional additional information. """ from inspire_schemas.api import LiteratureBuilder from refextract.documents.pdf import replace_undesirable_characters from inspirehep.modules.tools.authorlist import create_authors builder = LiteratureBuilder() text = replace_undesirable_characters(text) result = create_authors(text) if 'authors' in result: for fullname, author_affs in result['authors']: builder.add_author( builder.make_author(fullname, raw_affiliations=author_affs) ) result['authors'] = builder.record['authors'] return result