Example #1
0
    def test_occurrence_halved_fingerprint(self):
        """Test abydos.fingerprint.OccurrenceHalved."""
        # Base case
        self.assertEqual(self.fp.fingerprint(''), 0)

        # https://arxiv.org/pdf/1711.08475.pdf
        self.assertEqual(self.fp.fingerprint('instance'), 0b0110010010111000)

        self.assertEqual(self.fp.fingerprint('inst'), 0b0001000010100100)
        self.assertEqual(
            OccurrenceHalved(15).fingerprint('instance'), 0b0110010010111000)
        self.assertEqual(
            OccurrenceHalved(32).fingerprint('instance'),
            0b01100100101110000000000100000000,
        )
        self.assertEqual(
            OccurrenceHalved(64).fingerprint('instance'),
            0b01100100101110000000000100000000 << 32,
        )

        # Test wrapper
        self.assertEqual(
            occurrence_halved_fingerprint('instance', 32),
            0b01100100101110000000000100000000,
        )
Example #2
0
class OccurrenceHalvedFingerprintTestCases(unittest.TestCase):
    """Test Cisłak & Grabowski's occurrence halved fingerprint functions.

    abydos.fingerprint.OccurrenceHalved
    """

    fp = OccurrenceHalved()

    def test_occurrence_halved_fingerprint(self):
        """Test abydos.fingerprint.OccurrenceHalved."""
        # Base case
        self.assertEqual(self.fp.fingerprint(''), 0)

        # https://arxiv.org/pdf/1711.08475.pdf
        self.assertEqual(self.fp.fingerprint('instance'), 0b0110010010111000)

        self.assertEqual(self.fp.fingerprint('inst'), 0b0001000010100100)
        self.assertEqual(self.fp.fingerprint('instance', 15),
                         0b0110010010111000)
        self.assertEqual(
            self.fp.fingerprint('instance', 32),
            0b01100100101110000000000100000000,
        )
        self.assertEqual(
            self.fp.fingerprint('instance', 64),
            0b01100100101110000000000100000000 << 32,
        )

        # Test wrapper
        self.assertEqual(
            occurrence_halved_fingerprint('instance', 32),
            0b01100100101110000000000100000000,
        )
class OccurrenceHalvedFingerprintTestCases(unittest.TestCase):
    """Test Cisłak & Grabowski's occurrence halved fingerprint functions.

    abydos.fingerprint.OccurrenceHalved
    """

    fp = OccurrenceHalved()

    def test_occurrence_halved_fingerprint(self):
        """Test abydos.fingerprint.OccurrenceHalved."""
        # Base case
        self.assertEqual(self.fp.fingerprint(''), '0' * 16)

        # https://arxiv.org/pdf/1711.08475.pdf
        self.assertEqual(self.fp.fingerprint('instance'), '0110010010111000')

        self.assertEqual(self.fp.fingerprint('inst'), '0001000010100100')
        self.assertEqual(
            OccurrenceHalved(15).fingerprint('instance'), '110010010111000')
        self.assertEqual(
            OccurrenceHalved(32).fingerprint('instance'),
            '01100100101110000000000100000000',
        )
        self.assertEqual(
            OccurrenceHalved(64).fingerprint('instance'),
            '01100100101110000000000100000000' + '0' * 32,
        )
    def test_occurrence_halved_fingerprint(self):
        """Test abydos.fingerprint.OccurrenceHalved."""
        # Base case
        self.assertEqual(self.fp.fingerprint(''), '0' * 16)

        # https://arxiv.org/pdf/1711.08475.pdf
        self.assertEqual(self.fp.fingerprint('instance'), '0110010010111000')

        self.assertEqual(self.fp.fingerprint('inst'), '0001000010100100')
        self.assertEqual(
            OccurrenceHalved(15).fingerprint('instance'), '110010010111000')
        self.assertEqual(
            OccurrenceHalved(32).fingerprint('instance'),
            '01100100101110000000000100000000',
        )
        self.assertEqual(
            OccurrenceHalved(64).fingerprint('instance'),
            '01100100101110000000000100000000' + '0' * 32,
        )
Example #5
0
    'bwtrlef': BWTRLEF().fingerprint,
    'consonant': Consonant().fingerprint,
    'consonant_2': Consonant(variant=2).fingerprint,
    'consonant_3': Consonant(variant=3).fingerprint,
    'consonant_nd': Consonant(doubles=False).fingerprint,
    'count': Count().fingerprint,
    'count_32': Count(n_bits=32).fingerprint,
    'extract': Extract().fingerprint,
    'extract_2': Extract(letter_list=2).fingerprint,
    'extract_3': Extract(letter_list=3).fingerprint,
    'extract_4': Extract(letter_list=4).fingerprint,
    'extract_position_frequency': ExtractPositionFrequency().fingerprint,
    'lacss': LACSS().fingerprint,
    'lc_cutter': LCCutter().fingerprint,
    'occurrence': Occurrence().fingerprint,
    'occurrence_halved': OccurrenceHalved().fingerprint,
    'omission_key': OmissionKey().fingerprint,
    'phonetic': Phonetic().fingerprint,
    'position': Position().fingerprint,
    'position_32_2': Position(n_bits=32, bits_per_letter=2).fingerprint,
    'qgram': QGram().fingerprint,
    'qgram_q3': QGram(qval=3).fingerprint,
    'qgram_ssj': QGram(start_stop='$#', joiner=' ').fingerprint,
    'skeleton_key': SkeletonKey().fingerprint,
    'string': String().fingerprint,
    'synoname_toolcode': synoname.fingerprint,
    'synoname_toolcode_2name': lambda _: synoname.fingerprint(_, _),
}


class BigListOfNaughtyStringsTestCases(unittest.TestCase):
Example #6
0
    Position,
    QGram,
    SkeletonKey,
    String,
    SynonameToolcode,
)

from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char

string = String()
qgram = QGram()
phonetic = Phonetic()
skeleton = SkeletonKey()
omission = OmissionKey()
occurrence = Occurrence()
occurrence_halved = OccurrenceHalved()
count = Count()
position = Position()
synoname = SynonameToolcode()

algorithms = {
    'str_fingerprint':
    string.fingerprint,
    'qgram_fingerprint':
    qgram.fingerprint,
    'qgram_fingerprint_3':
    lambda _: qgram.fingerprint(_, qval=3),
    'qgram_fingerprint_ssj':
    lambda _: qgram.fingerprint(_, start_stop='$#', joiner=' '),
    'phonetic_fingerprint':
    phonetic.fingerprint,