Beispiel #1
0
    def cycle(
        self, input_str, quiet=False, compress_tree=None, decompress_table=None, show_input_and_output=True, strict=True
    ):
        """ Exercise a complete co -> dec cycle """
        compressed_text = compress(input_str, compression_tree=compress_tree, backtracking=False, check_ascii=strict)
        backtracked_compressed_text = compress(
            input_str, compression_tree=compress_tree, backtracking=True, check_ascii=strict
        )
        decompressed_text = decompress(compressed_text, decompress_table=decompress_table)
        backtracked_decompressed_text = decompress(backtracked_compressed_text, decompress_table=decompress_table)
        classic_compresssed_text = compress_classic(input_str)
        classic_decompressed_test = decompress(classic_compresssed_text)

        if not quiet and input_str:
            print("---------------------------------------------------------------------")
            if show_input_and_output:
                print(decompressed_text)
                print(compressed_text)
            if backtracked_compressed_text != compressed_text:
                if show_input_and_output:
                    print("--back tracked:--")
                    print(backtracked_compressed_text)
            ratio = 1.0 / (float(len(input_str)) / float(len(compressed_text)))
            b_ratio = 1.0 / (float(len(input_str)) / float(len(backtracked_compressed_text)))
            c_ratio = 1.0 / (float(len(input_str)) / float(len(classic_compresssed_text)))
            bz2c = bz2.compress(input_str)
            zlibc = zlib.compress(input_str, 9)
            bz2ratio = 1.0 / (float(len(input_str)) / float(len(bz2c)))
            zlibratio = 1.0 / (float(len(input_str)) / float(len(zlibc)))
            if backtracked_compressed_text != compressed_text:
                print(
                    "backtracked compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes"
                    % (b_ratio, b_ratio * 100.0, len(input_str), len(backtracked_compressed_text))
                )
                self.assertTrue(
                    len(compressed_text) >= len(backtracked_compressed_text),
                    "Back-tracking (%d) should always be better than not-backtracking (%d)"
                    % (len(input_str), len(backtracked_compressed_text)),
                )
            print(
                "compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes"
                % (ratio, ratio * 100.0, len(input_str), len(compressed_text))
            )
            print(" vs ")
            print("  zlib ratio 1:%f (%.2f%%) to %d bytes" % (zlibratio, zlibratio * 100.0, len(zlibc)))
            print("  bz2 ratio 1:%f (%.2f%%) to %d bytes" % (bz2ratio, bz2ratio * 100.0, len(bz2c)))
            print(
                "  smaz classic 1:%f (%.2f%%) to %d bytes" % (c_ratio, c_ratio * 100.0, len(classic_compresssed_text))
            )

        self.assertEqual(input_str, decompressed_text)
        self.assertEqual(input_str, backtracked_decompressed_text)
        self.assertEqual(input_str, classic_decompressed_test)
Beispiel #2
0
    def corpus_line_by_line(self, filename):
        """ Process a .txt corpus file line by line
        """
        with open(filename, 'r') as f:
            lines = f.read()
        test_data = lines.split('\n')
        c_data = []
        bz_data = []
        zlib_data = []
        c_cl_data = []
        c_cl_path_data = []
        for test in test_data:
            c_data.append(compress(test))
            bz_data.append(bz2.compress(test))
            zlib_data.append(zlib.compress(test))
            c_cl_data.append(compress_classic(test, pathological_case_detection=False))
            c_cl_path_data.append(compress_classic(test, pathological_case_detection=True))

        print(('Total data size %d bytes' % sum(len(x) for x in test_data)))
        print((' Smaz size %d bytes' % sum(len(x) for x in c_data)))
        print((' bz2 size %d bytes' % sum(len(x) for x in bz_data)))
        print((' zlib size %d bytes' % sum(len(x) for x in zlib_data)))
        print((' Smaz classic size %d bytes' % sum(len(x) for x in c_cl_data)))
        print((' Smaz classic with pathological case detection size %d bytes' % sum(len(x) for x in c_cl_path_data)))
Beispiel #3
0
    def corpus_line_by_line(self, filename):
        """ Process a .txt corpus file line by line
        """
        with open(filename, "r") as f:
            lines = f.read()
        test_data = lines.split("\n")
        c_data = []
        bz_data = []
        zlib_data = []
        c_cl_data = []
        c_cl_path_data = []
        for test in test_data:
            c_data.append(compress(test))
            bz_data.append(bz2.compress(test))
            zlib_data.append(zlib.compress(test))
            c_cl_data.append(compress_classic(test, pathological_case_detection=False))
            c_cl_path_data.append(compress_classic(test, pathological_case_detection=True))

        print("Total data size %d bytes" % sum(len(x) for x in test_data))
        print(" Smaz size %d bytes" % sum(len(x) for x in c_data))
        print(" bz2 size %d bytes" % sum(len(x) for x in bz_data))
        print(" zlib size %d bytes" % sum(len(x) for x in zlib_data))
        print(" Smaz classic size %d bytes" % sum(len(x) for x in c_cl_data))
        print(" Smaz classic with pathological case detection size %d bytes" % sum(len(x) for x in c_cl_path_data))
Beispiel #4
0
    def cycle(self, input_str, quiet=False, compress_tree=None, decompress_table=None, show_input_and_output=True,
              strict=True):
        """ Exercise a complete co -> dec cycle """
        compressed_text = compress(input_str, compression_tree=compress_tree, backtracking=False,
                                   check_ascii=strict)
        backtracked_compressed_text = compress(input_str, compression_tree=compress_tree, backtracking=True,
                                               check_ascii=strict)
        decompressed_text = decompress(compressed_text, decompress_table=decompress_table)
        backtracked_decompressed_text = decompress(backtracked_compressed_text, decompress_table=decompress_table)
        classic_compresssed_text = compress_classic(input_str)
        classic_decompressed_test = decompress(classic_compresssed_text)

        if not quiet and input_str:
            print('---------------------------------------------------------------------')
            if show_input_and_output:
                print(decompressed_text)
                print(compressed_text)
            if backtracked_compressed_text != compressed_text:
                if show_input_and_output:
                    print('--back tracked:--')
                    print(backtracked_compressed_text)
            ratio = 1.0 / (float(len(input_str)) / float(len(compressed_text)))
            b_ratio = 1.0 / (float(len(input_str)) / float(len(backtracked_compressed_text)))
            c_ratio = 1.0 / (float(len(input_str)) / float(len(classic_compresssed_text)))
            bz2c = bz2.compress(input_str)
            zlibc = zlib.compress(input_str, 9)
            bz2ratio = 1.0 / (float(len(input_str)) / float(len(bz2c)))
            zlibratio = 1.0 / (float(len(input_str)) / float(len(zlibc)))
            if backtracked_compressed_text != compressed_text:
                print(('backtracked compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes' %
                      (b_ratio, b_ratio * 100., len(input_str), len(backtracked_compressed_text))))
                self.assertTrue(len(compressed_text) >= len(backtracked_compressed_text),
                                'Back-tracking (%d) should always be better than not-backtracking (%d)'
                                % (len(input_str), len(backtracked_compressed_text)))
            print(('compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes' %
                  (ratio, ratio * 100., len(input_str), len(compressed_text))))
            print(' vs ')
            print(('  zlib ratio 1:%f (%.2f%%) to %d bytes' %
                  (zlibratio, zlibratio * 100., len(zlibc))))
            print(('  bz2 ratio 1:%f (%.2f%%) to %d bytes' %
                  (bz2ratio, bz2ratio * 100., len(bz2c))))
            print(('  smaz classic 1:%f (%.2f%%) to %d bytes' %
                  (c_ratio, c_ratio * 100., len(classic_compresssed_text))))

        self.assertEqual(input_str, decompressed_text)
        self.assertEqual(input_str, backtracked_decompressed_text)
        self.assertEqual(input_str, classic_decompressed_test)