Python get_paired_comparisons Examples, invenio.bibmatch_validator.get_paired_comparisons Python Examples

Example #1

0

Show file

    def test_validation_compare_title(self):
        """BibMatch comparison: compare title"""
        original_record_instances = [
            'Assault frequency and preformation probability'
        ]
        matched_record_instances = [
            'Assault frequency and preformation probability : The alpha emission process'
        ]
        comparisons = get_paired_comparisons(original_record_instances,
                                             matched_record_instances)
        threshold = 0.9
        matches_needed = 1
        # This should fail
        result, dummy = compare_fieldvalues_normal(comparisons, threshold,
                                                   matches_needed)
        self.assertFalse(result)
        # Title search however, takes separators into account
        result, dummy = compare_fieldvalues_title(comparisons, threshold,
                                                  matches_needed)
        self.assertTrue(result)

        # Check longer titles
        original_record_instances = ['Buffered Electropolishing \xe2\x80\x93 A New Way for ' \
                                     'Achieving Extremely Smooth Surface Finish on Nb SRF ' \
                                     'Cavities to be Used in Particle Accelerators']
        matched_record_instances = ['Buffered Electropolishing: A New Way for Achieving ' \
                                    'Extremely Smooth Surface Finish on Nb SRF Cavities ' \
                                    'To be Used in Particle Accelerators']
        comparisons = get_paired_comparisons(original_record_instances,
                                             matched_record_instances)
        result, dummy = compare_fieldvalues_title(comparisons, threshold,
                                                  matches_needed)
        self.assertTrue(result)

Example #2

0

Show file

File: bibmatch_unit_tests.py Project: AlbertoPeon/invenio

    def test_validation_get_paired_comparisons(self):
        """bibmatch - validation: check generated paired comparisons """
        first_list = [1,2,3]
        second_list = [4,5]
        # Should return empty, as lists are not equal in length
        self.assertFalse(get_paired_comparisons(first_list, second_list, False))

        # Should return result, in un-ordered mode
        result = [((1, 4), (1, 5)), ((2, 4), (2, 5)), ((3, 4), (3, 5))]
        self.assertEqual(result, get_paired_comparisons(first_list, second_list))

Example #3

0

Show file

    def test_validation_get_paired_comparisons(self):
        """bibmatch - validation: check generated paired comparisons """
        first_list = [1,2,3]
        second_list = [4,5]
        # Should return empty, as lists are not equal in length
        self.assertFalse(get_paired_comparisons(first_list, second_list, False))

        # Should return result, in un-ordered mode
        result = [((1, 4), (1, 5)), ((2, 4), (2, 5)), ((3, 4), (3, 5))]
        self.assertEqual(result, get_paired_comparisons(first_list, second_list))

Example #4

0

Show file

File: bibmatch_unit_tests.py Project: AlbertoPeon/invenio

    def test_validation_compare_date(self):
        """BibMatch comparison: compare date"""
        original_record_instances = ['2002-02']
        matched_record_instances = ['2001']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 1.0
        matches_needed = 1
        result, dummy = compare_fieldvalues_date(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['2001-02']
        matched_record_instances = ['2001']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_date(comparisons, threshold, matches_needed)
        self.assertTrue(result)

Example #5

0

Show file

File: bibmatch_unit_tests.py Project: AlbertoPeon/invenio

    def test_validation_compare_identifiers(self):
        """BibMatch comparison: compare identifiers"""
        original_record_instances = ['REP-NO-02123']
        matched_record_instances = ['REPNO123']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 1.0
        matches_needed = 1
        result, dummy = compare_fieldvalues_identifier(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['REP-NO-0123']
        matched_record_instances = ['REPNO123']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_identifier(comparisons, threshold, matches_needed)
        self.assertTrue(result)

Example #6

0

Show file

File: bibmatch_unit_tests.py Project: AlbertoPeon/invenio

    def test_validation_compare_strings(self):
        """BibMatch comparison: compare strings"""
        original_record_instances = ['This is some random text']
        matched_record_instances = ['I have some random text about nothing']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 0.8
        matches_needed = 1
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['This is some random text']
        matched_record_instances = ['Is some random text']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertTrue(result)

Example #7

0

Show file

File: bibmatch_unit_tests.py Project: AlbertoPeon/invenio

    def test_validation_compare_authors(self):
        """BibMatch comparison: compare authors"""
        original_record_instances = ['Brodsky, Stanley J.']
        matched_record_instances = ['Brodsky, S.J.', 'Not, M E']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 0.8
        matches_needed = 1
        result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed)
        self.assertTrue(result)

        original_record_instances = ['Brodsky, J.']
        matched_record_instances = ['Brodsky, S.J.']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed)
        self.assertFalse(result)

Example #8

0

Show file

    def test_validation_compare_identifiers(self):
        """BibMatch comparison: compare identifiers"""
        original_record_instances = ['REP-NO-02123']
        matched_record_instances = ['REPNO123']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 1.0
        matches_needed = 1
        result, dummy = compare_fieldvalues_identifier(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['REP-NO-0123']
        matched_record_instances = ['REPNO123']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_identifier(comparisons, threshold, matches_needed)
        self.assertTrue(result)

Example #9

0

Show file

    def test_validation_compare_strings(self):
        """BibMatch comparison: compare strings"""
        original_record_instances = ['This is some random text']
        matched_record_instances = ['I have some random text about nothing']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 0.8
        matches_needed = 1
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['This is some random text']
        matched_record_instances = ['Is some random text']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertTrue(result)

Example #10

0

Show file

    def test_validation_compare_authors(self):
        """BibMatch comparison: compare authors"""
        original_record_instances = ['Brodsky, Stanley J.']
        matched_record_instances = ['Brodsky, S.J.', 'Not, M E']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 0.85
        matches_needed = 1
        result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed)
        self.assertTrue(result)

        original_record_instances = ['Brodsky, J.']
        matched_record_instances = ['Brodsky, S.J.']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed)
        self.assertFalse(result)

Example #11

0

Show file

    def test_validation_compare_date(self):
        """BibMatch comparison: compare date"""
        original_record_instances = ['2002-02']
        matched_record_instances = ['2001']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 1.0
        matches_needed = 1
        result, dummy = compare_fieldvalues_date(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['2001-02']
        matched_record_instances = ['2001']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_date(comparisons, threshold, matches_needed)
        self.assertTrue(result)

Example #12

0

Show file

File: bibmatch_unit_tests.py Project: AlbertoPeon/invenio

    def test_validation_compare_title(self):
        """BibMatch comparison: compare title"""
        original_record_instances = ['Assault frequency and preformation probability']
        matched_record_instances = ['Assault frequency and preformation probability : The alpha emission process']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 0.9
        matches_needed = 1
        # This should fail
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertFalse(result)
        # Title search however, takes separators into account
        result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed)
        self.assertTrue(result)

        # Check longer titles
        original_record_instances = ['Buffered Electropolishing \xe2\x80\x93 A New Way for ' \
                                     'Achieving Extremely Smooth Surface Finish on Nb SRF ' \
                                     'Cavities to be Used in Particle Accelerators']
        matched_record_instances = ['Buffered Electropolishing: A New Way for Achieving ' \
                                    'Extremely Smooth Surface Finish on Nb SRF Cavities ' \
                                    'To be Used in Particle Accelerators']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed)
        self.assertTrue(result)