Esempio n. 1
0
def check_author_compability_with_lit_authors(literature_control_number):
    current_author_profile = _get_current_user_author_profile()
    if not current_author_profile:
        return False

    lit_record = _get_lit_record_from_db(literature_control_number)
    if not lit_record:
        return False

    author_name = current_author_profile.get_value("name.value")
    author_parsed_name = ParsedName.loads(author_name)

    matched_authors_recid_last_name = _check_names_compability(
        lit_record, author_parsed_name, last_names_only=True)
    if matched_authors_recid_last_name:
        return matched_authors_recid_last_name

    matched_authors_recid_full_name = _check_names_compability(
        lit_record, author_parsed_name)
    if matched_authors_recid_full_name:
        return matched_authors_recid_full_name

    matched_author_recid_name_with_initials = _find_matching_author_in_lit_record(
        author_parsed_name, literature_control_number)
    if matched_author_recid_name_with_initials:
        return matched_author_recid_name_with_initials
Esempio n. 2
0
def match_literature_author(author, record):
    configs = [
        current_app.config["AUTHOR_MATCHER_NAME_CONFIG"],
        current_app.config["AUTHOR_MATCHER_NAME_INITIALS_CONFIG"],
    ]

    validators = [(collaboration_validator, affiliations_validator), None]

    parsed_name = ParsedName.loads(author.get("full_name"))
    author_matcher_data = {
        "first_name": parsed_name.first,
        "last_name": parsed_name.last,
        "full_name": author.get("full_name"),
        "collaborations": get_value(record, "collaborations.value", []),
        "affiliations": get_value(author, "affiliations.value", []),
    }

    for config, validator in zip(configs, validators):
        matched_records = match_literature_author_with_config(
            author_matcher_data, config)
        matched_author_data = (
            get_reference_and_bai_if_unambiguous_literature_author_match(
                matched_records))
        if not matched_author_data and validator:
            for validator_function in validator:
                valid_matches = (
                    match for match in matched_records
                    if validator_function(author_matcher_data, match))
                matched_author_data = (
                    get_reference_and_bai_if_unambiguous_literature_author_match(
                        valid_matches))
                if matched_author_data:
                    break
        if matched_author_data:
            return matched_author_data
Esempio n. 3
0
def _name_variation_has_only_initials(name):
    """Detects whether the name variation consists only from initials."""
    def _is_initial(name_variation):
        return len(name_variation) == 1 or u'.' in name_variation

    parsed_name = ParsedName.loads(name)

    return all([_is_initial(name_part) for name_part in parsed_name])
Esempio n. 4
0
def get_authors(record):
    """Return the authors of a record.

    Queries the Institution records linked from the authors affiliations
    to add, whenever it exists, the HAL identifier of the institution to
    the affiliation.

    Args:
        record(InspireRecord): a record.

    Returns:
        list(dict): the authors of the record.

    Examples:
        >>> record = {
        ...     'authors': [
        ...         'affiliations': [
        ...             {
        ...                 'record': {
        ...                     '$ref': 'http://localhost:5000/api/institutions/902725',
        ...                 }
        ...             },
        ...         ],
        ...     ],
        ... }
        >>> authors = get_authors(record)
        >>> authors[0]['hal_id']
        '300037'

    """
    hal_id_map = _get_hal_id_map(record)

    result = []

    for author in record.get('authors', []):
        affiliations = []

        parsed_name = ParsedName.loads(author['full_name'])
        first_name, last_name = parsed_name.first, parsed_name.last

        for affiliation in author.get('affiliations', []):
            recid = get_recid_from_ref(affiliation.get('record'))
            if recid in hal_id_map and hal_id_map[recid]:
                affiliations.append({'hal_id': hal_id_map[recid]})

        result.append({
            'affiliations': affiliations,
            'first_name': first_name,
            'last_name': last_name,
        })

    return result
Esempio n. 5
0
def get_authors(record):
    """Return the authors of a record.

    Queries the Institution records linked from the authors affiliations
    to add, whenever it exists, the HAL identifier of the institution to
    the affiliation.

    Args:
        record(InspireRecord): a record.

    Returns:
        list(dict): the authors of the record.

    Examples:
        >>> record = {
        ...     'authors': [
        ...         'affiliations': [
        ...             {
        ...                 'record': {
        ...                     '$ref': 'http://localhost:5000/api/institutions/902725',
        ...                 }
        ...             },
        ...         ],
        ...     ],
        ... }
        >>> authors = get_authors(record)
        >>> authors[0]['hal_id']
        '300037'

    """
    hal_id_map = _get_hal_id_map(record)

    result = []

    for author in record.get('authors', []):
        affiliations = []

        parsed_name = ParsedName.loads(author['full_name'])
        first_name, last_name = parsed_name.first, parsed_name.last

        for affiliation in author.get('affiliations', []):
            recid = get_recid_from_ref(affiliation.get('record'))
            if recid in hal_id_map and hal_id_map[recid]:
                affiliations.append({'hal_id': hal_id_map[recid]})

        result.append({
            'affiliations': affiliations,
            'first_name': first_name,
            'last_name': last_name,
        })

    return result
Esempio n. 6
0
    def build_texkey_first_part(cls, data):
        full_name = get_value(data, "authors[0].full_name")
        if full_name:
            parsed_name = ParsedName.loads(full_name)
            parsed_name = (parsed_name.last if len(parsed_name) > 1 else
                           full_name.split(",")[0])
        else:
            parsed_name = None

        if parsed_name and len(data["authors"]) < 10:
            return cls.sanitize(parsed_name)
        elif "collaborations" in data:
            return cls.sanitize(data["collaborations"][0]["value"])
        elif "corporate_author" in data:
            return cls.sanitize(data["corporate_author"][0])
        elif "proceedings" in data["document_type"]:
            return cls.sanitize("Proceedings")
        elif parsed_name:
            return cls.sanitize(parsed_name)
        return None
Esempio n. 7
0
def generate_minimal_name_variations(author_name):
    """Generate a small number of name variations.

    Notes:
        Unidecodes the name, so that we use its transliterated version, since this is how the field is being indexed.

        For names with more than one part, {lastname} x {non lastnames, non lastnames initial} variations.
        Additionally, it generates the swapped version of those, for supporting queries like ``Mele Salvatore`` which
        ``ParsedName`` parses as lastname: Salvatore and firstname: Mele. So in those cases, we need to generate both
        ``Mele, Salvatore`` and ``Mele, S``.

        Wherever, the '-' is replaced by ' ', it's done because it's the way the name variations are being index, thus
        we want our minimal name variations to be generated identically. This has to be done after the creation of
        ParsedName, otherwise the name is parsed differently. E.g. 'Caro-Estevez' as is, it's a lastname, if we replace
        the '-' with ' ', then it's a firstname and lastname.
    """
    parsed_name = ParsedName.loads(unidecode(author_name))

    if len(parsed_name) > 1:
        lastnames = parsed_name.last.replace('-', ' ')

        non_lastnames = ' '.join(parsed_name.first_list +
                                 parsed_name.middle_list +
                                 parsed_name.suffix_list)
        # Strip extra whitespace added if any of middle_list and suffix_list are empty.
        non_lastnames = non_lastnames.strip().replace('-', ' ')

        # Adding into a set first, so as to drop identical name variations.
        return list({
            name_variation.lower()
            for name_variation in [
                lastnames + ' ' + non_lastnames,
                lastnames + ' ' + non_lastnames[0],
                non_lastnames + ' ' + lastnames,
                non_lastnames + ' ' + lastnames[0],
            ] if not _name_variation_has_only_initials(name_variation)
        })
    else:
        return [parsed_name.dumps().replace('-', ' ').lower()]
Esempio n. 8
0
def get_display_name_for_author_name(author_name):
    parsed_name = ParsedName.loads(author_name)
    return " ".join(parsed_name.first_list + parsed_name.last_list)
Esempio n. 9
0
def get_author_display_name(name):
    """Returns the display name in format Firstnames Lastnames"""
    parsed_name = ParsedName.loads(name)
    return " ".join(parsed_name.first_list + parsed_name.last_list)
Esempio n. 10
0
def get_author_display_name(name):
    """Returns the display name in format Firstnames Lastnames"""
    parsed_name = ParsedName.loads(name)
    return " ".join(parsed_name.first_list + parsed_name.last_list)