예제 #1
0
def authorlist(text):
    """Call the relevant formatter depending on the existence of affiliations.

    Input can be in two different formats:
        F. Lastname, F.M. Otherlastname
    or
        F. Lastname1, F.M. Otherlastname1,2
        1 CERN
        2 Otheraffiliation

    There should always be only one affiliation per line and the affiliation
    ids in author names should be separated with commas.
    """
    if not text:
        return ''
    if not isinstance(text, six.text_type):
        text = text.decode('utf-8')
    # Do some pre-cleaning of the input string
    text = text.replace('\r', '')  # Input from the form contains unwanted \r's
    text = text.replace(u'†', '')
    text = text.replace(u'∗', '')
    text = re.sub(r'(\n+)', r'\n', text)
    text = replace_undesirable_characters(text)
    text = wash_line(text)

    # Assume that if there are numbers in author name, they are affilation ids.
    # Also assume that if one author has an affiliation, everyone else has too.
    if re.search(r'([a-zA-Z_-]+[\n]*\d+)', text):
        return authorlist_with_affiliations(text)
    return authorlist_without_affiliations(text)
예제 #2
0
def authorlist(text):
    """Call the relevant formatter depending on the existence of affiliations.

    Input can be in two different formats:
        F. Lastname, F.M. Otherlastname
    or
        F. Lastname1, F.M. Otherlastname1,2
        1 CERN
        2 Otheraffiliation

    There should always be only one affiliation per line and the affiliation
    ids in author names should be separated with commas.
    """
    if not text:
        return ''
    if not isinstance(text, six.text_type):
        text = text.decode('utf-8')
    # Do some pre-cleaning of the input string
    text = text.replace('\r', '')  # Input from the form contains unwanted \r's
    text = text.replace(u'†', '')
    text = text.replace(u'∗', '')
    text = re.sub(r'(\n+)', r'\n', text)
    text = replace_undesirable_characters(text)
    text = wash_line(text)

    # Assume that if there are numbers in author name, they are affilation ids.
    # Also assume that if one author has an affiliation, everyone else has too.
    if re.search(r'([a-zA-Z_-]+[\n]*\d+)', text):
        return authorlist_with_affiliations(text)
    return authorlist_without_affiliations(text)
예제 #3
0
def authorlist(text):
    """
    Return an author-structure parsed from text
    and optional additional information.
    """
    builder = LiteratureBuilder()

    text = replace_undesirable_characters(text)
    result = create_authors(text)

    if "authors" in result:
        for fullname, author_affs in result["authors"]:
            builder.add_author(
                builder.make_author(fullname, raw_affiliations=author_affs)
            )
        result["authors"] = builder.record["authors"]
    return result
예제 #4
0
def authorlist(text):
    """
    Return an author-structure parsed from text
    and optional additional information.
    """

    from inspire_schemas.api import LiteratureBuilder
    from refextract.documents.pdf import replace_undesirable_characters
    from inspirehep.modules.tools.authorlist import create_authors

    builder = LiteratureBuilder()

    text = replace_undesirable_characters(text)
    result = create_authors(text)

    if 'authors' in result:
        for fullname, author_affs in result['authors']:
            builder.add_author(
                builder.make_author(fullname, raw_affiliations=author_affs))
        result['authors'] = builder.record['authors']
    return result
예제 #5
0
def authorlist(text):
    """
    Return an author-structure parsed from text
    and optional additional information.
    """

    from inspire_schemas.api import LiteratureBuilder
    from refextract.documents.pdf import replace_undesirable_characters
    from inspirehep.modules.tools.authorlist import create_authors

    builder = LiteratureBuilder()

    text = replace_undesirable_characters(text)
    result = create_authors(text)

    if 'authors' in result:
        for fullname, author_affs in result['authors']:
            builder.add_author(
                builder.make_author(fullname, raw_affiliations=author_affs)
            )
        result['authors'] = builder.record['authors']
    return result