def test_validation_compare_title(self): """BibMatch comparison: compare title""" original_record_instances = [ 'Assault frequency and preformation probability' ] matched_record_instances = [ 'Assault frequency and preformation probability : The alpha emission process' ] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) threshold = 0.9 matches_needed = 1 # This should fail result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed) self.assertFalse(result) # Title search however, takes separators into account result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed) self.assertTrue(result) # Check longer titles original_record_instances = ['Buffered Electropolishing \xe2\x80\x93 A New Way for ' \ 'Achieving Extremely Smooth Surface Finish on Nb SRF ' \ 'Cavities to be Used in Particle Accelerators'] matched_record_instances = ['Buffered Electropolishing: A New Way for Achieving ' \ 'Extremely Smooth Surface Finish on Nb SRF Cavities ' \ 'To be Used in Particle Accelerators'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed) self.assertTrue(result)
def test_validation_get_paired_comparisons(self): """bibmatch - validation: check generated paired comparisons """ first_list = [1,2,3] second_list = [4,5] # Should return empty, as lists are not equal in length self.assertFalse(get_paired_comparisons(first_list, second_list, False)) # Should return result, in un-ordered mode result = [((1, 4), (1, 5)), ((2, 4), (2, 5)), ((3, 4), (3, 5))] self.assertEqual(result, get_paired_comparisons(first_list, second_list))
def test_validation_compare_date(self): """BibMatch comparison: compare date""" original_record_instances = ['2002-02'] matched_record_instances = ['2001'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) threshold = 1.0 matches_needed = 1 result, dummy = compare_fieldvalues_date(comparisons, threshold, matches_needed) self.assertFalse(result) original_record_instances = ['2001-02'] matched_record_instances = ['2001'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) result, dummy = compare_fieldvalues_date(comparisons, threshold, matches_needed) self.assertTrue(result)
def test_validation_compare_identifiers(self): """BibMatch comparison: compare identifiers""" original_record_instances = ['REP-NO-02123'] matched_record_instances = ['REPNO123'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) threshold = 1.0 matches_needed = 1 result, dummy = compare_fieldvalues_identifier(comparisons, threshold, matches_needed) self.assertFalse(result) original_record_instances = ['REP-NO-0123'] matched_record_instances = ['REPNO123'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) result, dummy = compare_fieldvalues_identifier(comparisons, threshold, matches_needed) self.assertTrue(result)
def test_validation_compare_strings(self): """BibMatch comparison: compare strings""" original_record_instances = ['This is some random text'] matched_record_instances = ['I have some random text about nothing'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) threshold = 0.8 matches_needed = 1 result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed) self.assertFalse(result) original_record_instances = ['This is some random text'] matched_record_instances = ['Is some random text'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed) self.assertTrue(result)
def test_validation_compare_authors(self): """BibMatch comparison: compare authors""" original_record_instances = ['Brodsky, Stanley J.'] matched_record_instances = ['Brodsky, S.J.', 'Not, M E'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) threshold = 0.8 matches_needed = 1 result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed) self.assertTrue(result) original_record_instances = ['Brodsky, J.'] matched_record_instances = ['Brodsky, S.J.'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed) self.assertFalse(result)
def test_validation_compare_authors(self): """BibMatch comparison: compare authors""" original_record_instances = ['Brodsky, Stanley J.'] matched_record_instances = ['Brodsky, S.J.', 'Not, M E'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) threshold = 0.85 matches_needed = 1 result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed) self.assertTrue(result) original_record_instances = ['Brodsky, J.'] matched_record_instances = ['Brodsky, S.J.'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) result, dummy = compare_fieldvalues_authorname(comparisons, threshold, matches_needed) self.assertFalse(result)
def test_validation_compare_title(self): """BibMatch comparison: compare title""" original_record_instances = ['Assault frequency and preformation probability'] matched_record_instances = ['Assault frequency and preformation probability : The alpha emission process'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) threshold = 0.9 matches_needed = 1 # This should fail result, dummy = compare_fieldvalues_normal(comparisons, threshold, matches_needed) self.assertFalse(result) # Title search however, takes separators into account result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed) self.assertTrue(result) # Check longer titles original_record_instances = ['Buffered Electropolishing \xe2\x80\x93 A New Way for ' \ 'Achieving Extremely Smooth Surface Finish on Nb SRF ' \ 'Cavities to be Used in Particle Accelerators'] matched_record_instances = ['Buffered Electropolishing: A New Way for Achieving ' \ 'Extremely Smooth Surface Finish on Nb SRF Cavities ' \ 'To be Used in Particle Accelerators'] comparisons = get_paired_comparisons(original_record_instances, matched_record_instances) result, dummy = compare_fieldvalues_title(comparisons, threshold, matches_needed) self.assertTrue(result)