Exemple #1
0
class BWTTestCases(unittest.TestCase):
    """Test abydos.compression.BWT.encode and .decode."""

    coder = BWT()
    coder_pipe = BWT('|')
    coder_dollar = BWT('$')

    def test_bwt_encode(self):
        """Test abydos.compression.BWT.encode."""
        # Examples from Wikipedia entry on BWT
        self.assertEqual(self.coder.encode(''), '\x00')
        self.assertEqual(self.coder_pipe.encode('^BANANA'), 'BNN^AA|A')
        self.assertEqual(
            self.coder_pipe.encode(
                'SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES'),
            'TEXYDST.E.IXIXIXXSSMPPS.B..E.|.UESFXDIIOIIITS',
        )

        self.assertEqual(self.coder_dollar.encode('aardvark'), 'k$avrraad')

        self.assertRaises(ValueError, self.coder_dollar.encode, 'ABC$')
        self.assertRaises(ValueError, self.coder.encode, 'ABC\0')

        # Test wrapper
        self.assertEqual(bwt_encode('aardvark', '$'), 'k$avrraad')

    def test_bwt_decode(self):
        """Test abydos.compression.BWT.decode."""
        self.assertEqual(self.coder.decode(''), '')
        self.assertEqual(self.coder.decode('\x00'), '')
        self.assertEqual(self.coder_pipe.decode('BNN^AA|A'), '^BANANA')
        self.assertEqual(
            self.coder_pipe.decode(
                'TEXYDST.E.IXIXIXXSSMPPS.B..E.|.UESFXDIIOIIITS'),
            'SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES',
        )

        self.assertEqual(self.coder_dollar.decode('k$avrraad'), 'aardvark')

        self.assertRaises(ValueError, self.coder_dollar.decode, 'ABC')
        self.assertRaises(ValueError, self.coder.decode, 'ABC')

        # Test wrapper
        self.assertEqual(bwt_decode('BNN^AA|A', '|'), '^BANANA')

    def test_bwt_roundtripping(self):
        """Test abydos.compression.BWT.encode & .decode roundtripping."""
        for w in (
                '',
                'Banana',
                'The quick brown fox, etc.',
                'it is better a chylde unborne than untaught',
                'manners maketh man',
                'בְּרֵאשִׁית, בָּרָא אֱלֹהִים',
                'Ein Rückblick bietet sich folglich an.',
        ):
            self.assertEqual(self.coder.decode(self.coder.encode(w)), w)
            self.assertEqual(
                self.coder_dollar.decode(self.coder_dollar.encode(w)), w)
class RLETestCases(unittest.TestCase):
    """Test abydos.compression.RLE.encode & .decode."""

    rle = RLE()
    bwt = BWT()

    bws = 'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'

    def test_rle_encode(self):
        """Test abydos.compression.RLE.encode."""
        self.assertEqual(self.rle.encode(''), '')
        self.assertEqual(self.rle.encode(self.bwt.encode('')), '\x00')
        self.assertEqual(self.rle.encode('banana'), 'banana')
        self.assertEqual(
            self.rle.encode(self.bwt.encode('banana')), 'annb\x00aa'
        )
        self.assertEqual(self.rle.encode(self.bws), '12WB12W3B24WB14W')
        self.assertEqual(
            self.rle.encode(self.bwt.encode(self.bws)), 'WWBWWB45WB\x003WB10WB'
        )
        self.assertEqual(self.rle.encode('Schifffahrt'), 'Schi3fahrt')
        # Test wrapper
        self.assertEqual(rle_encode(self.bws, False), '12WB12W3B24WB14W')
        self.assertEqual(rle_encode(self.bws), 'WWBWWB45WB\x003WB10WB')

    def test_rle_decode(self):
        """Test abydos.compression.RLE.decode."""
        self.assertEqual(self.rle.decode(''), '')
        self.assertEqual(self.bwt.decode(self.rle.decode('\x00')), '')
        self.assertEqual(self.rle.decode('banana'), 'banana')
        self.assertEqual(
            self.bwt.decode(self.rle.decode('annb\x00aa')), 'banana'
        )
        self.assertEqual(self.rle.decode('12WB12W3B24WB14W'), self.bws)
        self.assertEqual(self.rle.decode('12W1B12W3B24W1B14W'), self.bws)
        self.assertEqual(
            self.bwt.decode(self.rle.decode('WWBWWB45WB\x003WB10WB')), self.bws
        )
        self.assertEqual(self.rle.decode('Schi3fahrt'), 'Schifffahrt')
        # Test wrapper
        self.assertEqual(rle_decode('12W1B12W3B24W1B14W', False), self.bws)
        self.assertEqual(rle_decode('WWBWWB45WB\x003WB10WB'), self.bws)

    def test_rle_roundtripping(self):
        """Test abydos.compression.RLE.encode & .decode roundtripping."""
        self.assertEqual(self.rle.decode(self.rle.encode('')), '')
        self.assertEqual(
            self.bwt.decode(
                self.rle.decode(self.rle.encode(self.bwt.encode('')))
            ),
            '',
        )
        self.assertEqual(self.rle.decode(self.rle.encode('banana')), 'banana')
        self.assertEqual(
            self.bwt.decode(
                self.rle.decode(self.rle.encode(self.bwt.encode('banana')))
            ),
            'banana',
        )
        self.assertEqual(self.rle.decode(self.rle.encode(self.bws)), self.bws)
        self.assertEqual(
            self.bwt.decode(
                self.rle.decode(self.rle.encode(self.bwt.encode(self.bws)))
            ),
            self.bws,
        )
        self.assertEqual(
            self.rle.decode(self.rle.encode('Schifffahrt')), 'Schifffahrt'
        )
        self.assertEqual(
            self.bwt.decode(
                self.rle.decode(
                    self.rle.encode(self.bwt.encode('Schifffahrt'))
                )
            ),
            'Schifffahrt',
        )