def test_tanimoto_coeff(self): """Test abydos.distance.sim_tanimoto.""" self.assertEqual(sim_tanimoto('', ''), 1) self.assertEqual(sim_tanimoto('nelson', ''), 0) self.assertEqual(sim_tanimoto('', 'neilsen'), 0) self.assertAlmostEqual(sim_tanimoto('nelson', 'neilsen'), 4/11) self.assertEqual(sim_tanimoto('', '', 2), 1) self.assertEqual(sim_tanimoto('nelson', '', 2), 0) self.assertEqual(sim_tanimoto('', 'neilsen', 2), 0) self.assertAlmostEqual(sim_tanimoto('nelson', 'neilsen', 2), 4/11) # supplied q-gram tests self.assertEqual(sim_tanimoto(QGrams(''), QGrams('')), 1) self.assertEqual(sim_tanimoto(QGrams('nelson'), QGrams('')), 0) self.assertEqual(sim_tanimoto(QGrams(''), QGrams('neilsen')), 0) self.assertAlmostEqual(sim_tanimoto(QGrams('nelson'), QGrams('neilsen')), 4/11) # non-q-gram tests self.assertEqual(sim_tanimoto('', '', 0), 1) self.assertEqual(sim_tanimoto('the quick', '', 0), 0) self.assertEqual(sim_tanimoto('', 'the quick', 0), 0) self.assertAlmostEqual(sim_tanimoto(NONQ_FROM, NONQ_TO, 0), 1/3) self.assertAlmostEqual(sim_tanimoto(NONQ_TO, NONQ_FROM, 0), 1/3)
def test_dist_overlap(self): """Test abydos.distance.dist_overlap.""" self.assertEqual(dist_overlap('', ''), 0) self.assertEqual(dist_overlap('nelson', ''), 1) self.assertEqual(dist_overlap('', 'neilsen'), 1) self.assertAlmostEqual(dist_overlap('nelson', 'neilsen'), 3/7) self.assertEqual(dist_overlap('', '', 2), 0) self.assertEqual(dist_overlap('nelson', '', 2), 1) self.assertEqual(dist_overlap('', 'neilsen', 2), 1) self.assertAlmostEqual(dist_overlap('nelson', 'neilsen', 2), 3/7) # supplied q-gram tests self.assertEqual(dist_overlap(QGrams(''), QGrams('')), 0) self.assertEqual(dist_overlap(QGrams('nelson'), QGrams('')), 1) self.assertEqual(dist_overlap(QGrams(''), QGrams('neilsen')), 1) self.assertAlmostEqual(dist_overlap(QGrams('nelson'), QGrams('neilsen')), 3/7) # non-q-gram tests self.assertEqual(dist_overlap('', '', 0), 0) self.assertEqual(dist_overlap('the quick', '', 0), 1) self.assertEqual(dist_overlap('', 'the quick', 0), 1) self.assertAlmostEqual(dist_overlap(NONQ_FROM, NONQ_TO, 0), 3/7) self.assertAlmostEqual(dist_overlap(NONQ_TO, NONQ_FROM, 0), 3/7)
def test_dist_jaccard(self): """Test abydos.distance.dist_jaccard.""" self.assertEqual(dist_jaccard('', ''), 0) self.assertEqual(dist_jaccard('nelson', ''), 1) self.assertEqual(dist_jaccard('', 'neilsen'), 1) self.assertAlmostEqual(dist_jaccard('nelson', 'neilsen'), 7/11) self.assertEqual(dist_jaccard('', '', 2), 0) self.assertEqual(dist_jaccard('nelson', '', 2), 1) self.assertEqual(dist_jaccard('', 'neilsen', 2), 1) self.assertAlmostEqual(dist_jaccard('nelson', 'neilsen', 2), 7/11) # supplied q-gram tests self.assertEqual(dist_jaccard(QGrams(''), QGrams('')), 0) self.assertEqual(dist_jaccard(QGrams('nelson'), QGrams('')), 1) self.assertEqual(dist_jaccard(QGrams(''), QGrams('neilsen')), 1) self.assertAlmostEqual(dist_jaccard(QGrams('nelson'), QGrams('neilsen')), 7/11) # non-q-gram tests self.assertEqual(dist_jaccard('', '', 0), 0) self.assertEqual(dist_jaccard('the quick', '', 0), 1) self.assertEqual(dist_jaccard('', 'the quick', 0), 1) self.assertAlmostEqual(dist_jaccard(NONQ_FROM, NONQ_TO, 0), 2/3) self.assertAlmostEqual(dist_jaccard(NONQ_TO, NONQ_FROM, 0), 2/3)
def test_sim_dice(self): """Test abydos.distance.sim_dice.""" self.assertEqual(sim_dice('', ''), 1) self.assertEqual(sim_dice('nelson', ''), 0) self.assertEqual(sim_dice('', 'neilsen'), 0) self.assertAlmostEqual(sim_dice('nelson', 'neilsen'), 8/15) self.assertEqual(sim_dice('', '', 2), 1) self.assertEqual(sim_dice('nelson', '', 2), 0) self.assertEqual(sim_dice('', 'neilsen', 2), 0) self.assertAlmostEqual(sim_dice('nelson', 'neilsen', 2), 8/15) # supplied q-gram tests self.assertEqual(sim_dice(QGrams(''), QGrams('')), 1) self.assertEqual(sim_dice(QGrams('nelson'), QGrams('')), 0) self.assertEqual(sim_dice(QGrams(''), QGrams('neilsen')), 0) self.assertAlmostEqual(sim_dice(QGrams('nelson'), QGrams('neilsen')), 8/15) # non-q-gram tests self.assertEqual(sim_dice('', '', 0), 1) self.assertEqual(sim_dice('the quick', '', 0), 0) self.assertEqual(sim_dice('', 'the quick', 0), 0) self.assertAlmostEqual(sim_dice(NONQ_FROM, NONQ_TO, 0), 1/2) self.assertAlmostEqual(sim_dice(NONQ_TO, NONQ_FROM, 0), 1/2)
def test_dist_tversky(self): """Test abydos.distance.dist_tversky.""" self.assertEqual(dist_tversky('', ''), 0) self.assertEqual(dist_tversky('nelson', ''), 1) self.assertEqual(dist_tversky('', 'neilsen'), 1) self.assertAlmostEqual(dist_tversky('nelson', 'neilsen'), 7/11) self.assertEqual(dist_tversky('', '', 2), 0) self.assertEqual(dist_tversky('nelson', '', 2), 1) self.assertEqual(dist_tversky('', 'neilsen', 2), 1) self.assertAlmostEqual(dist_tversky('nelson', 'neilsen', 2), 7/11) # test valid alpha & beta self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, -1, -1) self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, -1, 0) self.assertRaises(ValueError, dist_tversky, 'abcd', 'dcba', 2, 0, -1) # test empty QGrams self.assertAlmostEqual(dist_tversky('nelson', 'neilsen', 7), 1.0) # test unequal alpha & beta self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 1), 8/11) self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 2), 7/10) self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 2), 10/13) # test bias parameter self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 1, 0.5), 4/11) self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 1, 0.5), 2/9) self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 1, 2, 0.5), 8/15) self.assertAlmostEqual(dist_tversky('niall', 'neal', 2, 2, 2, 0.5), 4/11) # supplied q-gram tests self.assertEqual(dist_tversky(QGrams(''), QGrams('')), 0) self.assertEqual(dist_tversky(QGrams('nelson'), QGrams('')), 1) self.assertEqual(dist_tversky(QGrams(''), QGrams('neilsen')), 1) self.assertAlmostEqual(dist_tversky(QGrams('nelson'), QGrams('neilsen')), 7/11) # non-q-gram tests self.assertEqual(dist_tversky('', '', 0), 0) self.assertEqual(dist_tversky('the quick', '', 0), 1) self.assertEqual(dist_tversky('', 'the quick', 0), 1) self.assertAlmostEqual(dist_tversky(NONQ_FROM, NONQ_TO, 0), 2/3) self.assertAlmostEqual(dist_tversky(NONQ_TO, NONQ_FROM, 0), 2/3)
def test_minkowski(self): """Test abydos.distance.minkowski.""" self.assertEqual(minkowski('', ''), 0) self.assertEqual(minkowski('nelson', ''), 7) self.assertEqual(minkowski('', 'neilsen'), 8) self.assertAlmostEqual(minkowski('nelson', 'neilsen'), 7) self.assertEqual(minkowski('', '', 2), 0) self.assertEqual(minkowski('nelson', '', 2), 7) self.assertEqual(minkowski('', 'neilsen', 2), 8) self.assertAlmostEqual(minkowski('nelson', 'neilsen', 2), 7) # supplied q-gram tests self.assertEqual(minkowski(QGrams(''), QGrams('')), 0) self.assertEqual(minkowski(QGrams('nelson'), QGrams('')), 7) self.assertEqual(minkowski(QGrams(''), QGrams('neilsen')), 8) self.assertAlmostEqual(minkowski(QGrams('nelson'), QGrams('neilsen')), 7) # non-q-gram tests self.assertEqual(minkowski('', '', 0), 0) self.assertEqual(minkowski('the quick', '', 0), 2) self.assertEqual(minkowski('', 'the quick', 0), 2) self.assertAlmostEqual(minkowski(NONQ_FROM, NONQ_TO, 0), 8) self.assertAlmostEqual(minkowski(NONQ_TO, NONQ_FROM, 0), 8) # test l_0 "norm" self.assertEqual(minkowski('', '', 1, 0), 0) self.assertEqual(minkowski('a', '', 1, 0), 1) self.assertEqual(minkowski('a', 'b', 1, 0), 2) self.assertEqual(minkowski('ab', 'b', 1, 0), 1) self.assertEqual(minkowski('aab', 'b', 1, 0), 1) self.assertEqual(minkowski('', '', 1, 0, True), 0) self.assertEqual(minkowski('a', '', 1, 0, True), 1) self.assertEqual(minkowski('a', 'b', 1, 0, True), 1) self.assertEqual(minkowski('ab', 'b', 1, 0, True), 1/2) self.assertEqual(minkowski('aab', 'b', 1, 0, True), 1/2) self.assertEqual(minkowski('aaab', 'b', 1, 0, True), 1/2) self.assertEqual(minkowski('aaab', 'ab', 1, 0, True), 1/2) # test with alphabet self.assertEqual(minkowski('ab', 'b', 1, alphabet=26), 1) self.assertEqual(minkowski('ab', 'b', 1, normalized=True, alphabet=26), 1/26) self.assertEqual(minkowski('ab', 'b', 1, normalized=True, alphabet='abcdefghijklmnopqrstuvwxyz'), 1/26)
def test_qgram_intersections(self): """Test abydos.tokenizer.qgram.QGrams intersections.""" self.assertEqual(sorted(QGrams('NELSON') & QGrams('')), []) self.assertEqual(sorted(QGrams('') & QGrams('NEILSEN')), []) self.assertEqual(sorted(QGrams('NELSON') & QGrams('NEILSEN')), sorted(['$N', 'NE', 'LS', 'N#'])) self.assertEqual(sorted(QGrams('NELSON') & QGrams('NOSLEN')), sorted(['$N', 'N#'])) self.assertEqual(sorted(QGrams('NAIL') & QGrams('LIAN')), []) self.assertEqual( sorted( QGrams('NELSON', start_stop='') & QGrams('NEILSEN', start_stop='')), sorted(['NE', 'LS'])) self.assertEqual( sorted( QGrams('NELSON', start_stop='') & QGrams('NOSLEN', start_stop='')), []) self.assertEqual( sorted( QGrams('NAIL', start_stop='') & QGrams('LIAN', start_stop='')), [])
def test_qgrams(self): """Test abydos.tokenizer.qgram.QGrams.""" self.assertEqual(sorted(QGrams('').elements()), []) self.assertEqual(sorted(QGrams('a', 2).elements()), []) self.assertEqual(sorted(QGrams('NELSON', 0).elements()), []) self.assertEqual(sorted(QGrams('NELSON', -1).elements()), []) self.assertEqual( sorted(QGrams('NELSON', 3).elements()), sorted(['$$N', '$NE', 'NEL', 'ELS', 'LSO', 'SON', 'ON#', 'N##'])) self.assertEqual(sorted(QGrams('NELSON', 7).elements()), sorted([])) # http://www.sound-ex.com/alternative_qgram.htm self.assertEqual(sorted(QGrams('NELSON').elements()), sorted(['$N', 'NE', 'EL', 'LS', 'SO', 'ON', 'N#'])) self.assertEqual( sorted(QGrams('NEILSEN').elements()), sorted(['$N', 'NE', 'EI', 'IL', 'LS', 'SE', 'EN', 'N#'])) self.assertEqual(sorted(QGrams('NELSON', start_stop='').elements()), sorted(['NE', 'EL', 'LS', 'SO', 'ON'])) self.assertEqual(sorted(QGrams('NEILSEN', start_stop='').elements()), sorted(['NE', 'EI', 'IL', 'LS', 'SE', 'EN'])) # qval=(1,2) self.assertEqual( sorted(QGrams('NELSON', qval=(1, 2)).elements()), sorted([ '$N', 'E', 'EL', 'L', 'LS', 'N', 'N', 'N#', 'NE', 'O', 'ON', 'S', 'SO' ])) self.assertEqual( sorted(QGrams('NELSON', qval=(2, 1)).elements()), sorted([ '$N', 'E', 'EL', 'L', 'LS', 'N', 'N', 'N#', 'NE', 'O', 'ON', 'S', 'SO' ])) self.assertEqual( sorted(QGrams('NELSON', qval=range(3)).elements()), sorted([ '$N', 'E', 'EL', 'L', 'LS', 'N', 'N', 'N#', 'NE', 'O', 'ON', 'S', 'SO' ])) self.assertEqual(QGrams('NELSON', qval=(1, 2)).count(), 13) # skip=(1,2) self.assertEqual( sorted(QGrams('NELSON', skip=(2, 1, 0)).elements()), sorted([ '$E', '$L', '$N', 'EL', 'EO', 'ES', 'LN', 'LO', 'LS', 'N', 'N', 'N#', 'NE', 'NL', 'NS', 'O', 'O#', 'ON', 'S#', 'SN', 'SO' ])) self.assertEqual( sorted(QGrams('NELSON', skip=(2, 1, 0)).elements()), sorted([ '$E', '$L', '$N', 'EL', 'EO', 'ES', 'LN', 'LO', 'LS', 'N', 'N', 'N#', 'NE', 'NL', 'NS', 'O', 'O#', 'ON', 'S#', 'SN', 'SO' ])) self.assertEqual( sorted(QGrams('NELSON', skip=range(3)).elements()), sorted([ '$E', '$L', '$N', 'EL', 'EO', 'ES', 'LN', 'LO', 'LS', 'N', 'N', 'N#', 'NE', 'NL', 'NS', 'O', 'O#', 'ON', 'S#', 'SN', 'SO' ])) self.assertEqual(QGrams('NELSON', skip=(0, 1, 2)).count(), 21)