Esempio n. 1
0
    def test_validation_compare_strings(self):
        """BibMatch comparison: compare strings"""
        original_record_instances = ['This is some random text']
        matched_record_instances = ['I have some random text about nothing']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 0.8
        matches_needed = 1
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['This is some random text']
        matched_record_instances = ['Is some random text']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertTrue(result)
Esempio n. 2
0
    def test_validation_compare_strings(self):
        """BibMatch comparison: compare strings"""
        original_record_instances = ['This is some random text']
        matched_record_instances = ['I have some random text about nothing']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 0.8
        matches_needed = 1
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertFalse(result)

        original_record_instances = ['This is some random text']
        matched_record_instances = ['Is some random text']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertTrue(result)
Esempio n. 3
0
    def test_validation_compare_title(self):
        """BibMatch comparison: compare title"""
        original_record_instances = [
            'Assault frequency and preformation probability'
        ]
        matched_record_instances = [
            'Assault frequency and preformation probability : The alpha emission process'
        ]
        comparisons = get_paired_comparisons(original_record_instances,
                                             matched_record_instances)
        threshold = 0.9
        matches_needed = 1
        # This should fail
        result, dummy = compare_fieldvalues_normal(comparisons, threshold,
                                                   matches_needed)
        self.assertFalse(result)
        # Title search however, takes separators into account
        result, dummy = compare_fieldvalues_title(comparisons, threshold,
                                                  matches_needed)
        self.assertTrue(result)

        # Check longer titles
        original_record_instances = ['Buffered Electropolishing \xe2\x80\x93 A New Way for ' \
                                     'Achieving Extremely Smooth Surface Finish on Nb SRF ' \
                                     'Cavities to be Used in Particle Accelerators']
        matched_record_instances = ['Buffered Electropolishing: A New Way for Achieving ' \
                                    'Extremely Smooth Surface Finish on Nb SRF Cavities ' \
                                    'To be Used in Particle Accelerators']
        comparisons = get_paired_comparisons(original_record_instances,
                                             matched_record_instances)
        result, dummy = compare_fieldvalues_title(comparisons, threshold,
                                                  matches_needed)
        self.assertTrue(result)
Esempio n. 4
0
    def test_validation_compare_title(self):
        """BibMatch comparison: compare title"""
        original_record_instances = ['Assault frequency and preformation probability']
        matched_record_instances = ['Assault frequency and preformation probability : The alpha emission process']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        threshold = 0.9
        matches_needed = 1
        # This should fail
        result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed)
        self.assertFalse(result)
        # Title search however, takes separators into account
        result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed)
        self.assertTrue(result)

        # Check longer titles
        original_record_instances = ['Buffered Electropolishing \xe2\x80\x93 A New Way for ' \
                                     'Achieving Extremely Smooth Surface Finish on Nb SRF ' \
                                     'Cavities to be Used in Particle Accelerators']
        matched_record_instances = ['Buffered Electropolishing: A New Way for Achieving ' \
                                    'Extremely Smooth Surface Finish on Nb SRF Cavities ' \
                                    'To be Used in Particle Accelerators']
        comparisons = get_paired_comparisons(original_record_instances, matched_record_instances)
        result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed)
        self.assertTrue(result)