Example #1
0
    def test_validation_compare_title(self):
        """BibMatch comparison: compare title"""
        original_record_instances = [
            'Assault frequency and preformation probability'
        ]
        matched_record_instances = [
            'Assault frequency and preformation probability : The alpha emission process'
        ]
        comparisons = get_paired_comparisons(original_record_instances,
                                             matched_record_instances)
        threshold = 0.9
        matches_needed = 1
        # This should fail
        result, dummy = compare_fieldvalues_normal(comparisons, threshold,
                                                   matches_needed)
        self.assertFalse(result)
        # Title search however, takes separators into account
        result, dummy = compare_fieldvalues_title(comparisons, threshold,
                                                  matches_needed)
        self.assertTrue(result)

        # Check longer titles
        original_record_instances = ['Buffered Electropolishing \xe2\x80\x93 A New Way for ' \
                                     'Achieving Extremely Smooth Surface Finish on Nb SRF ' \
                                     'Cavities to be Used in Particle Accelerators']
        matched_record_instances = ['Buffered Electropolishing: A New Way for Achieving ' \
                                    'Extremely Smooth Surface Finish on Nb SRF Cavities ' \
                                    'To be Used in Particle Accelerators']
        comparisons = get_paired_comparisons(original_record_instances,
                                             matched_record_instances)
        result, dummy = compare_fieldvalues_title(comparisons, threshold,
                                                  matches_needed)
        self.assertTrue(result)
    def test_validation_get_paired_comparisons(self):
        """bibmatch - validation: check generated paired comparisons """
        first_list = [1,2,3]
        second_list = [4,5]
        # Should return empty, as lists are not equal in length
        self.assertFalse(get_paired_comparisons(first_list, second_list, False))

        # Should return result, in un-ordered mode
        result = [((1, 4), (1, 5)), ((2, 4), (2, 5)), ((3, 4), (3, 5))]
        self.assertEqual(result, get_paired_comparisons(first_list, second_list))
Example #3
0
    def test_validation_get_paired_comparisons(self):
        """bibmatch - validation: check generated paired comparisons """
        first_list = [1,2,3]
        second_list = [4,5]
        # Should return empty, as lists are not equal in length
        self.assertFalse(get_paired_comparisons(first_list, second_list, False))

        # Should return result, in un-ordered mode
        result = [((1, 4), (1, 5)), ((2, 4), (2, 5)), ((3, 4), (3, 5))]
        self.assertEqual(result, get_paired_comparisons(first_list, second_list))
    def test_validation_compare_date(self):
        """BibMatch comparison: compare date"""
        original_record_instances = ['2002-02']
        matched_record_instances = ['2001']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 1.0
        matches_needed = 1
        result, dummy = compare_fieldvalues_date(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['2001-02']
        matched_record_instances = ['2001']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_date(comparisons, threshold, matches_needed)
        self.assertTrue(result)
    def test_validation_compare_identifiers(self):
        """BibMatch comparison: compare identifiers"""
        original_record_instances = ['REP-NO-02123']
        matched_record_instances = ['REPNO123']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 1.0
        matches_needed = 1
        result, dummy = compare_fieldvalues_identifier(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['REP-NO-0123']
        matched_record_instances = ['REPNO123']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_identifier(comparisons, threshold, matches_needed)
        self.assertTrue(result)
    def test_validation_compare_strings(self):
        """BibMatch comparison: compare strings"""
        original_record_instances = ['This is some random text']
        matched_record_instances = ['I have some random text about nothing']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 0.8
        matches_needed = 1
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['This is some random text']
        matched_record_instances = ['Is some random text']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertTrue(result)
    def test_validation_compare_authors(self):
        """BibMatch comparison: compare authors"""
        original_record_instances = ['Brodsky, Stanley J.']
        matched_record_instances = ['Brodsky, S.J.', 'Not, M E']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 0.8
        matches_needed = 1
        result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed)
        self.assertTrue(result)

        original_record_instances = ['Brodsky, J.']
        matched_record_instances = ['Brodsky, S.J.']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed)
        self.assertFalse(result)
Example #8
0
    def test_validation_compare_identifiers(self):
        """BibMatch comparison: compare identifiers"""
        original_record_instances = ['REP-NO-02123']
        matched_record_instances = ['REPNO123']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 1.0
        matches_needed = 1
        result, dummy = compare_fieldvalues_identifier(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['REP-NO-0123']
        matched_record_instances = ['REPNO123']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_identifier(comparisons, threshold, matches_needed)
        self.assertTrue(result)
Example #9
0
    def test_validation_compare_strings(self):
        """BibMatch comparison: compare strings"""
        original_record_instances = ['This is some random text']
        matched_record_instances = ['I have some random text about nothing']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 0.8
        matches_needed = 1
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['This is some random text']
        matched_record_instances = ['Is some random text']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertTrue(result)
Example #10
0
    def test_validation_compare_authors(self):
        """BibMatch comparison: compare authors"""
        original_record_instances = ['Brodsky, Stanley J.']
        matched_record_instances = ['Brodsky, S.J.', 'Not, M E']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 0.85
        matches_needed = 1
        result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed)
        self.assertTrue(result)

        original_record_instances = ['Brodsky, J.']
        matched_record_instances = ['Brodsky, S.J.']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed)
        self.assertFalse(result)
Example #11
0
    def test_validation_compare_date(self):
        """BibMatch comparison: compare date"""
        original_record_instances = ['2002-02']
        matched_record_instances = ['2001']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 1.0
        matches_needed = 1
        result, dummy = compare_fieldvalues_date(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['2001-02']
        matched_record_instances = ['2001']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_date(comparisons, threshold, matches_needed)
        self.assertTrue(result)
    def test_validation_compare_title(self):
        """BibMatch comparison: compare title"""
        original_record_instances = ['Assault frequency and preformation probability']
        matched_record_instances = ['Assault frequency and preformation probability : The alpha emission process']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 0.9
        matches_needed = 1
        # This should fail
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertFalse(result)
        # Title search however, takes separators into account
        result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed)
        self.assertTrue(result)

        # Check longer titles
        original_record_instances = ['Buffered Electropolishing \xe2\x80\x93 A New Way for ' \
                                     'Achieving Extremely Smooth Surface Finish on Nb SRF ' \
                                     'Cavities to be Used in Particle Accelerators']
        matched_record_instances = ['Buffered Electropolishing: A New Way for Achieving ' \
                                    'Extremely Smooth Surface Finish on Nb SRF Cavities ' \
                                    'To be Used in Particle Accelerators']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed)
        self.assertTrue(result)