def test_validation_compare_strings(self): """BibMatch comparison: compare strings""" original_record_instances = ['This is some random text'] matched_record_instances = ['I have some random text about nothing'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) threshold = 0.8 matches_needed = 1 result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed) self.assertFalse(result) original_record_instances = ['This is some random text'] matched_record_instances = ['Is some random text'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed) self.assertTrue(result)
def test_validation_compare_title(self): """BibMatch comparison: compare title""" original_record_instances = [ 'Assault frequency and preformation probability' ] matched_record_instances = [ 'Assault frequency and preformation probability : The alpha emission process' ] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) threshold = 0.9 matches_needed = 1 # This should fail result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed) self.assertFalse(result) # Title search however, takes separators into account result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed) self.assertTrue(result) # Check longer titles original_record_instances = ['Buffered Electropolishing \xe2\x80\x93 A New Way for ' \ 'Achieving Extremely Smooth Surface Finish on Nb SRF ' \ 'Cavities to be Used in Particle Accelerators'] matched_record_instances = ['Buffered Electropolishing: A New Way for Achieving ' \ 'Extremely Smooth Surface Finish on Nb SRF Cavities ' \ 'To be Used in Particle Accelerators'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed) self.assertTrue(result)
def test_validation_compare_title(self): """BibMatch comparison: compare title""" original_record_instances = ['Assault frequency and preformation probability'] matched_record_instances = ['Assault frequency and preformation probability : The alpha emission process'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) threshold = 0.9 matches_needed = 1 # This should fail result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed) self.assertFalse(result) # Title search however, takes separators into account result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed) self.assertTrue(result) # Check longer titles original_record_instances = ['Buffered Electropolishing \xe2\x80\x93 A New Way for ' \ 'Achieving Extremely Smooth Surface Finish on Nb SRF ' \ 'Cavities to be Used in Particle Accelerators'] matched_record_instances = ['Buffered Electropolishing: A New Way for Achieving ' \ 'Extremely Smooth Surface Finish on Nb SRF Cavities ' \ 'To be Used in Particle Accelerators'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed) self.assertTrue(result)