def compare_fieldvalues_authorname(field_comparisons, threshold, matches_needed):
    """
    Performs field validation given an list of field comparisons using a technique
    that is meant for author-names taking into account initials vs. full-name,
    using matching techniques available from BibAuthorId.

    Each comparison is done according to given threshold which the result must
    be equal or above to match.

    During validation the fields are compared and matches are counted per
    field, up to the given amount of matches needed is met, causing the
    function to return True. If validation ends before this threshold is met
    it will return False.

    @param field_comparisons: list of comparisons, each which contains a list
        of field-value to field-value comparisons.
    @type field_comparisons: list

    @param threshold: number describing the match threshold a comparison must
        exceed to become a positive match.
    @type threshold: float

    @param matches_needed: number of positive field matches needed for the entire
        comparison process to give a positive result.
    @type matches_needed: int

    @return: tuple of matching result, True if enough matches are found, False if not,
        and number of matches.
    @rtype: tuple
    """
    matches_found = 0
    # Loop over all possible comparisons field by field, if a match is found,
    # we are done with this field and break out to try and match next field.
    for comparisons in field_comparisons:
        for value, other_value in comparisons:
            # Grab both permutations of a name (before, after and after, before)
            # and compare to each unique commutative combination. Ex:
            # Doe,J vs. Smith,J -> [(('Smith,J', 'Doe,J'), ('Smith,J', 'J,Doe')),
            #                       (('J,Smith', 'Doe,J'), ('J,Smith', 'J,Doe'))]
            author_comparisons = [pair for pair in get_paired_comparisons(\
                                          get_reversed_string_variants(value), \
                                          get_reversed_string_variants(other_value))][0]
            for str1, str2 in author_comparisons:
                # Author-name comparison - using BibAuthorid function
                diff = compare_names(str1, str2)
                if diff >= threshold:
                    matches_found += 1
                    break
            else:
                # We continue as no match was found
                continue
            # We break out as a match was found
            break
        # If we already have found required number of matches, we return immediately
        if matches_found >= matches_needed:
            return True, matches_found
    # Often authors are not matching fully, so lets allow for the number of matches to
    # be a little lower, using the same threshold
    result = matches_found >= matches_needed or matches_found / float(matches_needed) > threshold
    return result, matches_found
def _compare_names(bib1, bib2):
    #metadata_comparison_print("Comparing names.")

    name1 = get_name_by_bibrecref(bib1)
    name2 = get_name_by_bibrecref(bib2)

    if name1 and name2:
        return compare_names(name1, name2, False)
    return '?'
def _compare_names(bib1, bib2):
    #metadata_comparison_print("Comparing names.")

    name1 = get_name_by_bibrecref(bib1)
    name2 = get_name_by_bibrecref(bib2)

    if name1 and name2:
        return compare_names(name1, name2, False)
    return '?'
def _compare_names(bib1, bib2):
    metadata_comparison_print("Comparing names.")

    name1 = get_name_by_bibrecref(bib1)
    name2 = get_name_by_bibrecref(bib2)

    metadata_comparison_print(" Found %s and %s" % (name1,name2))
    if name1 and name2:
        cmpv = compare_names(name1, name2, False)
        metadata_comparison_print(" cmp(%s,%s) = %s" % (name1, name2, str(cmpv)))
        return cmpv
    return '?'
Beispiel #5
0
def _compare_names(bib1, bib2):
    metadata_comparison_print("Comparing names.")

    name1 = get_name_by_bibrecref(bib1)
    name2 = get_name_by_bibrecref(bib2)

    metadata_comparison_print(" Found %s and %s" % (name1,name2))
    if name1 and name2:
        cmpv = compare_names(name1, name2, False)
        metadata_comparison_print(" cmp(%s,%s) = %s" % (name1, name2, str(cmpv)))
        return cmpv
    return '?'
Beispiel #6
0
def compare_fieldvalues_authorname(field_comparisons, threshold,
                                   matches_needed):
    """
    Performs field validation given an list of field comparisons using a technique
    that is meant for author-names taking into account initials vs. full-name,
    using matching techniques available from BibAuthorId.

    Each comparison is done according to given threshold which the result must
    be equal or above to match.

    During validation the fields are compared and matches are counted per
    field, up to the given amount of matches needed is met, causing the
    function to return True. If validation ends before this threshold is met
    it will return False.

    @param field_comparisons: list of comparisons, each which contains a list
        of field-value to field-value comparisons.
    @type field_comparisons: list

    @param threshold: number describing the match threshold a comparison must
        exceed to become a positive match.
    @type threshold: float

    @param matches_needed: number of positive field matches needed for the entire
        comparison process to give a positive result.
    @type matches_needed: int

    @return: tuple of matching result, True if enough matches are found, False if not,
        and number of matches.
    @rtype: tuple
    """
    matches_found = 0
    # Loop over all possible comparisons field by field, if a match is found,
    # we are done with this field and break out to try and match next field.
    for comparisons in field_comparisons:
        for value, other_value in comparisons:
            # Grab both permutations of a name (before, after and after, before)
            # and compare to each unique commutative combination. Ex:
            # Doe,J vs. Smith,J -> [(('Smith,J', 'Doe,J'), ('Smith,J', 'J,Doe')),
            #                       (('J,Smith', 'Doe,J'), ('J,Smith', 'J,Doe'))]
            author_comparisons = [pair for pair in get_paired_comparisons(\
                                          get_reversed_string_variants(value), \
                                          get_reversed_string_variants(other_value))][0]
            for str1, str2 in author_comparisons:
                # Author-name comparison - using BibAuthorid function
                diff = compare_names(str1, str2)
                if diff >= threshold:
                    matches_found += 1
                    break
            else:
                # We continue as no match was found
                continue
            # We break out as a match was found
            break
        # If we already have found required number of matches, we return immediately
        if matches_found >= matches_needed:
            return True, matches_found
    # Often authors are not matching fully, so lets allow for the number of matches to
    # be a little lower, using the same threshold
    result = matches_found >= matches_needed or matches_found / float(
        matches_needed) > threshold
    return result, matches_found