コード例 #1
0
 def test_expected_compression_results(self):
     """ This test asserts some expected behavior in terms out compressed output, if you change the compression
         algorithm (or decoding table) this will change. """
     self.assertEqual(len(compress('thethethe')), 3)
     self.assertEqual(len(compress('thewhich')), 2)
     self.assertEqual(len(compress('123thewhich123')), 12)
     self.assertEqual(len(compress('not-a-g00d-Exampl333')), 20)
コード例 #2
0
ファイル: test_smaz.py プロジェクト: CordySmith/PySmaz
 def test_expected_compression_results(self):
     """ This test asserts some expected behavior in terms out compressed output, if you change the compression
         algorithm (or decoding table) this will change. """
     self.assertEqual(len(compress("thethethe")), 3)
     self.assertEqual(len(compress("thewhich")), 2)
     self.assertEqual(len(compress("123thewhich123")), 12)
     self.assertEqual(len(compress("not-a-g00d-Exampl333")), 20)
コード例 #3
0
ファイル: test_smaz.py プロジェクト: CordySmith/PySmaz
    def cycle(
        self, input_str, quiet=False, compress_tree=None, decompress_table=None, show_input_and_output=True, strict=True
    ):
        """ Exercise a complete co -> dec cycle """
        compressed_text = compress(input_str, compression_tree=compress_tree, backtracking=False, check_ascii=strict)
        backtracked_compressed_text = compress(
            input_str, compression_tree=compress_tree, backtracking=True, check_ascii=strict
        )
        decompressed_text = decompress(compressed_text, decompress_table=decompress_table)
        backtracked_decompressed_text = decompress(backtracked_compressed_text, decompress_table=decompress_table)
        classic_compresssed_text = compress_classic(input_str)
        classic_decompressed_test = decompress(classic_compresssed_text)

        if not quiet and input_str:
            print("---------------------------------------------------------------------")
            if show_input_and_output:
                print(decompressed_text)
                print(compressed_text)
            if backtracked_compressed_text != compressed_text:
                if show_input_and_output:
                    print("--back tracked:--")
                    print(backtracked_compressed_text)
            ratio = 1.0 / (float(len(input_str)) / float(len(compressed_text)))
            b_ratio = 1.0 / (float(len(input_str)) / float(len(backtracked_compressed_text)))
            c_ratio = 1.0 / (float(len(input_str)) / float(len(classic_compresssed_text)))
            bz2c = bz2.compress(input_str)
            zlibc = zlib.compress(input_str, 9)
            bz2ratio = 1.0 / (float(len(input_str)) / float(len(bz2c)))
            zlibratio = 1.0 / (float(len(input_str)) / float(len(zlibc)))
            if backtracked_compressed_text != compressed_text:
                print(
                    "backtracked compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes"
                    % (b_ratio, b_ratio * 100.0, len(input_str), len(backtracked_compressed_text))
                )
                self.assertTrue(
                    len(compressed_text) >= len(backtracked_compressed_text),
                    "Back-tracking (%d) should always be better than not-backtracking (%d)"
                    % (len(input_str), len(backtracked_compressed_text)),
                )
            print(
                "compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes"
                % (ratio, ratio * 100.0, len(input_str), len(compressed_text))
            )
            print(" vs ")
            print("  zlib ratio 1:%f (%.2f%%) to %d bytes" % (zlibratio, zlibratio * 100.0, len(zlibc)))
            print("  bz2 ratio 1:%f (%.2f%%) to %d bytes" % (bz2ratio, bz2ratio * 100.0, len(bz2c)))
            print(
                "  smaz classic 1:%f (%.2f%%) to %d bytes" % (c_ratio, c_ratio * 100.0, len(classic_compresssed_text))
            )

        self.assertEqual(input_str, decompressed_text)
        self.assertEqual(input_str, backtracked_decompressed_text)
        self.assertEqual(input_str, classic_decompressed_test)
コード例 #4
0
ファイル: botFunctions.py プロジェクト: wertousek/JirkaOvcak
def encrypt(text_to_encrypt, encryption_base):
    digits = []
    for i in range(48, 48 + encryption_base):
        try:
            digits.append(bytes(chr(i), "utf-8").decode("utf-8"))
        except UnicodeEncodeError:
            pass
    text = smaz.compress(str(text_to_encrypt))
    if text == b"":
        text = zlib.compress(bytes(text_to_encrypt, encoding="utf-8"))
    textInts = [i for i in text]
    textNum = ""
    result = -1
    remainder = -1
    cipher = """"""
    for i in textInts:
        m = str(i)
        for _ in range(3 - len(m)):
            m = f"0{m}"
        textNum = f"{textNum}{m}"

    try:
        result = int(textNum)
    except:
        result = 0
    while result != 0:
        remainder = result % len(digits)
        result = result // len(digits)
        cipher = f"{digits[remainder]}{cipher}"
    return cipher
コード例 #5
0
    def cycle(self, input_str, quiet=False, compress_tree=None, decompress_table=None, show_input_and_output=True,
              strict=True):
        """ Exercise a complete co -> dec cycle """
        compressed_text = compress(input_str, compression_tree=compress_tree, backtracking=False,
                                   check_ascii=strict)
        backtracked_compressed_text = compress(input_str, compression_tree=compress_tree, backtracking=True,
                                               check_ascii=strict)
        decompressed_text = decompress(compressed_text, decompress_table=decompress_table)
        backtracked_decompressed_text = decompress(backtracked_compressed_text, decompress_table=decompress_table)
        classic_compresssed_text = compress_classic(input_str)
        classic_decompressed_test = decompress(classic_compresssed_text)

        if not quiet and input_str:
            print('---------------------------------------------------------------------')
            if show_input_and_output:
                print(decompressed_text)
                print(compressed_text)
            if backtracked_compressed_text != compressed_text:
                if show_input_and_output:
                    print('--back tracked:--')
                    print(backtracked_compressed_text)
            ratio = 1.0 / (float(len(input_str)) / float(len(compressed_text)))
            b_ratio = 1.0 / (float(len(input_str)) / float(len(backtracked_compressed_text)))
            c_ratio = 1.0 / (float(len(input_str)) / float(len(classic_compresssed_text)))
            bz2c = bz2.compress(input_str)
            zlibc = zlib.compress(input_str, 9)
            bz2ratio = 1.0 / (float(len(input_str)) / float(len(bz2c)))
            zlibratio = 1.0 / (float(len(input_str)) / float(len(zlibc)))
            if backtracked_compressed_text != compressed_text:
                print(('backtracked compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes' %
                      (b_ratio, b_ratio * 100., len(input_str), len(backtracked_compressed_text))))
                self.assertTrue(len(compressed_text) >= len(backtracked_compressed_text),
                                'Back-tracking (%d) should always be better than not-backtracking (%d)'
                                % (len(input_str), len(backtracked_compressed_text)))
            print(('compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes' %
                  (ratio, ratio * 100., len(input_str), len(compressed_text))))
            print(' vs ')
            print(('  zlib ratio 1:%f (%.2f%%) to %d bytes' %
                  (zlibratio, zlibratio * 100., len(zlibc))))
            print(('  bz2 ratio 1:%f (%.2f%%) to %d bytes' %
                  (bz2ratio, bz2ratio * 100., len(bz2c))))
            print(('  smaz classic 1:%f (%.2f%%) to %d bytes' %
                  (c_ratio, c_ratio * 100., len(classic_compresssed_text))))

        self.assertEqual(input_str, decompressed_text)
        self.assertEqual(input_str, backtracked_decompressed_text)
        self.assertEqual(input_str, classic_decompressed_test)
コード例 #6
0
    def test_scaling(self):
        """ Test (but don't assert) that SMAZ scales linearly with string length - i.e. O(N) """
        print('factor should remain roughly constant if performance is O(N)')
        for i in (1, 5, 10, 20, 50, 100, 250, 500, 1000, 2500, 10000, 100000):
            runs = 1
            if i < 10000:
                runs = 100
                if i < 500:
                    runs = 1000

            tick = datetime.datetime.now()
            cdata = [compress(FIVE_MEGABYTES_OF_MOBY_DICK[0:i]) for _ in range(runs)]
            tock = datetime.datetime.now()
            tdf = self.timedelta_to_float(tock - tick)
            print(('%i, %f, factor: %.10f - %d' % (i, tdf, tdf / (float(i) * float(runs)), len(cdata))))
コード例 #7
0
 def assert_smaz_optimal(self, comb, display=False):
     """ Assert that SMAZ is optimal for a given string, setting display shows the output """
     if display:
         print(comb)
     smaz_comp = bz2_comp = zlib_comp = 0
     try:
         bz2_comp = bz2.compress(comb)
         zlib_comp = zlib.compress(comb, 9)
         smaz_comp = compress(comb)
         self.assertTrue(len(bz2_comp) >= len(smaz_comp))
         self.assertTrue(len(zlib_comp) >= len(smaz_comp))
     except AssertionError:
         raise AssertionError(
             'Found String (%d) where SMAZ not >=. SMAZ len: %d bz2 len: %d zlib len: %d string: %s' %
             (len(comb), len(smaz_comp), len(bz2_comp), len(zlib_comp), comb))
コード例 #8
0
ファイル: test_smaz.py プロジェクト: CordySmith/PySmaz
    def test_scaling(self):
        """ Test (but don't assert) that SMAZ scales linearly with string length - i.e. O(N) """
        print("factor should remain roughly constant if performance is O(N)")
        for i in (1, 5, 10, 20, 50, 100, 250, 500, 1000, 2500, 10000, 100000):
            runs = 1
            if i < 10000:
                runs = 100
                if i < 500:
                    runs = 1000

            tick = datetime.datetime.now()
            cdata = [compress(FIVE_MEGABYTES_OF_MOBY_DICK[0:i]) for _ in xrange(runs)]
            tock = datetime.datetime.now()
            tdf = self.timedelta_to_float(tock - tick)
            print("%i, %f, factor: %.10f - %d" % (i, tdf, tdf / (float(i) * float(runs)), len(cdata)))
コード例 #9
0
ファイル: test_smaz.py プロジェクト: CordySmith/PySmaz
 def assert_smaz_optimal(self, comb, display=False):
     """ Assert that SMAZ is optimal for a given string, setting display shows the output """
     if display:
         print(comb)
     smaz_comp = bz2_comp = zlib_comp = 0
     try:
         bz2_comp = bz2.compress(comb)
         zlib_comp = zlib.compress(comb, 9)
         smaz_comp = compress(comb)
         self.assertTrue(len(bz2_comp) >= len(smaz_comp))
         self.assertTrue(len(zlib_comp) >= len(smaz_comp))
     except AssertionError:
         raise AssertionError(
             "Found String (%d) where SMAZ not >=. SMAZ len: %d bz2 len: %d zlib len: %d string: %s"
             % (len(comb), len(smaz_comp), len(bz2_comp), len(zlib_comp), comb)
         )
コード例 #10
0
    def corpus_line_by_line(self, filename):
        """ Process a .txt corpus file line by line
        """
        with open(filename, 'r') as f:
            lines = f.read()
        test_data = lines.split('\n')
        c_data = []
        bz_data = []
        zlib_data = []
        c_cl_data = []
        c_cl_path_data = []
        for test in test_data:
            c_data.append(compress(test))
            bz_data.append(bz2.compress(test))
            zlib_data.append(zlib.compress(test))
            c_cl_data.append(compress_classic(test, pathological_case_detection=False))
            c_cl_path_data.append(compress_classic(test, pathological_case_detection=True))

        print(('Total data size %d bytes' % sum(len(x) for x in test_data)))
        print((' Smaz size %d bytes' % sum(len(x) for x in c_data)))
        print((' bz2 size %d bytes' % sum(len(x) for x in bz_data)))
        print((' zlib size %d bytes' % sum(len(x) for x in zlib_data)))
        print((' Smaz classic size %d bytes' % sum(len(x) for x in c_cl_data)))
        print((' Smaz classic with pathological case detection size %d bytes' % sum(len(x) for x in c_cl_path_data)))
コード例 #11
0
ファイル: test_smaz.py プロジェクト: CordySmith/PySmaz
    def corpus_line_by_line(self, filename):
        """ Process a .txt corpus file line by line
        """
        with open(filename, "r") as f:
            lines = f.read()
        test_data = lines.split("\n")
        c_data = []
        bz_data = []
        zlib_data = []
        c_cl_data = []
        c_cl_path_data = []
        for test in test_data:
            c_data.append(compress(test))
            bz_data.append(bz2.compress(test))
            zlib_data.append(zlib.compress(test))
            c_cl_data.append(compress_classic(test, pathological_case_detection=False))
            c_cl_path_data.append(compress_classic(test, pathological_case_detection=True))

        print("Total data size %d bytes" % sum(len(x) for x in test_data))
        print(" Smaz size %d bytes" % sum(len(x) for x in c_data))
        print(" bz2 size %d bytes" % sum(len(x) for x in bz_data))
        print(" zlib size %d bytes" % sum(len(x) for x in zlib_data))
        print(" Smaz classic size %d bytes" % sum(len(x) for x in c_cl_data))
        print(" Smaz classic with pathological case detection size %d bytes" % sum(len(x) for x in c_cl_path_data))
コード例 #12
0
ファイル: dinospeak.py プロジェクト: glezos/dinospeak
 def compress(self, inp):
     return smaz.compress(inp)
コード例 #13
0
 def test_ascii_check(self):
     """ Test the ascii check """
     self.assertTrue(_check_ascii('1230ABCZADSADW'))
     self.assertFalse(_check_ascii(chr(129) + chr(129)))
     self.assertEqual(None, compress(chr(129), raise_on_error=False))
コード例 #14
0
 def test_ascii(self):
     """ By default, we check we are only processing ascii data"""
     self.assertRaises(ValueError, compress, chr(129))
     for i in range(127):
         compress(chr(i))  # Doesn't raise - valid data
コード例 #15
0
ファイル: test_smaz.py プロジェクト: CordySmith/PySmaz
 def test_ascii_check(self):
     """ Test the ascii check """
     self.assertTrue(_check_ascii("1230ABCZADSADW"))
     self.assertFalse(_check_ascii(chr(129) + chr(129)))
     self.assertEquals(None, compress(chr(129), raise_on_error=False))
コード例 #16
0
ファイル: test_smaz.py プロジェクト: CordySmith/PySmaz
 def test_ascii(self):
     """ By default, we check we are only processing ascii data"""
     self.assertRaises(ValueError, compress, chr(129))
     for i in xrange(127):
         compress(chr(i))  # Doesn't raise - valid data