예제 #1
0
def match_reference_control_numbers_with_relaxed_journal_titles(reference):
    """Match reference and return the `control_number`.

    Args:
        reference (dict): the metadata of a reference.
    Returns:
        list: list of matched recids or None.
    """
    if reference.get("curated_relation"):
        try:
            return [get_recid_from_ref(reference["record"])]
        except KeyError:
            return None

    configs = match_reference_config(reference,
                                     use_relaxed_titles_matching=True)

    matches = set()
    for config in configs:
        matched_recids = [
            matched_record["_source"]["control_number"]
            for matched_record in match(reference, config)
        ]
        matches.update(matched_recids)
    matches = list(matches)[0:5]

    return matches
예제 #2
0
def match_reference(reference):
    """Match references given a reference metadata using InspireMatcher queires.

    Args:
        reference: The reference metadata
        config: The configurtaion(s) for InspireMatcher queries

    Returns:
        The record ID of the matched reference
    """

    config_default = current_app.config[
        'WORKFLOWS_REFERENCE_MATCHER_DEFAULT_CONFIG']
    config_jcap_and_jhep = current_app.config[
        'WORKFLOWS_REFERENCE_MATCHER_JHEP_AND_JCAP_CONFIG']

    journal_title = get_value(reference,
                              'reference.publication_info.journal_title')
    config = config_jcap_and_jhep if journal_title in ['JCAP', 'JHEP'
                                                       ] else config_default
    result = next(match(reference, config), None)
    if result:
        matched_recid = result['_source']['control_number']
        reference['record'] = get_record_ref(matched_recid, 'literature')
    return reference
예제 #3
0
def match_reference_with_config(reference,
                                config,
                                previous_matched_recid=None):
    """Match a reference using inspire-matcher given the config.

    Args:
        reference (dict): the metadata of the reference.
        config (dict): the list of inspire-matcher configurations for queries.
        previous_matched_recid (int): the record id of the last matched
            reference from the list of references.

    Returns:
        dict: the matched reference.
    """
    # XXX: avoid this type casting.
    try:
        reference['reference']['publication_info']['year'] = str(
            reference['reference']['publication_info']['year'])
    except KeyError:
        pass

    matched_recids = [
        matched_record['_source']['control_number']
        for matched_record in match(reference, config)
    ]
    matched_recids = dedupe_list(matched_recids)

    same_as_previous = any(matched_recid == previous_matched_recid
                           for matched_recid in matched_recids)
    if len(matched_recids) == 1:
        _add_match_to_reference(reference, matched_recids[0], config['index'])
    elif same_as_previous:
        _add_match_to_reference(reference, previous_matched_recid,
                                config['index'])

    # XXX: avoid this type casting.
    try:
        reference['reference']['publication_info']['year'] = int(
            reference['reference']['publication_info']['year'])
    except KeyError:
        pass

    return reference
예제 #4
0
def match_reference_with_config(reference, config, previous_matched_recid=None):
    """Match a reference using inspire-matcher given the config.

    Args:
        reference (dict): the metadata of the reference.
        config (dict): the list of inspire-matcher configurations for queries.
        previous_matched_recid (int): the record id of the last matched
            reference from the list of references.

    Returns:
        dict: the matched reference.
    """
    # XXX: avoid this type casting.
    try:
        reference['reference']['publication_info']['year'] = str(
            reference['reference']['publication_info']['year'])
    except KeyError:
        pass

    matched_recids = [matched_record['_source']['control_number'] for matched_record in match(reference, config)]
    matched_recids = dedupe_list(matched_recids)

    same_as_previous = any(matched_recid == previous_matched_recid for matched_recid in matched_recids)
    if len(matched_recids) == 1:
        _add_match_to_reference(reference, matched_recids[0], config['index'])
    elif same_as_previous:
        _add_match_to_reference(reference, previous_matched_recid, config['index'])

    # XXX: avoid this type casting.
    try:
        reference['reference']['publication_info']['year'] = int(
            reference['reference']['publication_info']['year'])
    except KeyError:
        pass

    return reference