def cycle( self, input_str, quiet=False, compress_tree=None, decompress_table=None, show_input_and_output=True, strict=True ): """ Exercise a complete co -> dec cycle """ compressed_text = compress(input_str, compression_tree=compress_tree, backtracking=False, check_ascii=strict) backtracked_compressed_text = compress( input_str, compression_tree=compress_tree, backtracking=True, check_ascii=strict ) decompressed_text = decompress(compressed_text, decompress_table=decompress_table) backtracked_decompressed_text = decompress(backtracked_compressed_text, decompress_table=decompress_table) classic_compresssed_text = compress_classic(input_str) classic_decompressed_test = decompress(classic_compresssed_text) if not quiet and input_str: print("---------------------------------------------------------------------") if show_input_and_output: print(decompressed_text) print(compressed_text) if backtracked_compressed_text != compressed_text: if show_input_and_output: print("--back tracked:--") print(backtracked_compressed_text) ratio = 1.0 / (float(len(input_str)) / float(len(compressed_text))) b_ratio = 1.0 / (float(len(input_str)) / float(len(backtracked_compressed_text))) c_ratio = 1.0 / (float(len(input_str)) / float(len(classic_compresssed_text))) bz2c = bz2.compress(input_str) zlibc = zlib.compress(input_str, 9) bz2ratio = 1.0 / (float(len(input_str)) / float(len(bz2c))) zlibratio = 1.0 / (float(len(input_str)) / float(len(zlibc))) if backtracked_compressed_text != compressed_text: print( "backtracked compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes" % (b_ratio, b_ratio * 100.0, len(input_str), len(backtracked_compressed_text)) ) self.assertTrue( len(compressed_text) >= len(backtracked_compressed_text), "Back-tracking (%d) should always be better than not-backtracking (%d)" % (len(input_str), len(backtracked_compressed_text)), ) print( "compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes" % (ratio, ratio * 100.0, len(input_str), len(compressed_text)) ) print(" vs ") print(" zlib ratio 1:%f (%.2f%%) to %d bytes" % (zlibratio, zlibratio * 100.0, len(zlibc))) print(" bz2 ratio 1:%f (%.2f%%) to %d bytes" % (bz2ratio, bz2ratio * 100.0, len(bz2c))) print( " smaz classic 1:%f (%.2f%%) to %d bytes" % (c_ratio, c_ratio * 100.0, len(classic_compresssed_text)) ) self.assertEqual(input_str, decompressed_text) self.assertEqual(input_str, backtracked_decompressed_text) self.assertEqual(input_str, classic_decompressed_test)
def corpus_line_by_line(self, filename): """ Process a .txt corpus file line by line """ with open(filename, 'r') as f: lines = f.read() test_data = lines.split('\n') c_data = [] bz_data = [] zlib_data = [] c_cl_data = [] c_cl_path_data = [] for test in test_data: c_data.append(compress(test)) bz_data.append(bz2.compress(test)) zlib_data.append(zlib.compress(test)) c_cl_data.append(compress_classic(test, pathological_case_detection=False)) c_cl_path_data.append(compress_classic(test, pathological_case_detection=True)) print(('Total data size %d bytes' % sum(len(x) for x in test_data))) print((' Smaz size %d bytes' % sum(len(x) for x in c_data))) print((' bz2 size %d bytes' % sum(len(x) for x in bz_data))) print((' zlib size %d bytes' % sum(len(x) for x in zlib_data))) print((' Smaz classic size %d bytes' % sum(len(x) for x in c_cl_data))) print((' Smaz classic with pathological case detection size %d bytes' % sum(len(x) for x in c_cl_path_data)))
def corpus_line_by_line(self, filename): """ Process a .txt corpus file line by line """ with open(filename, "r") as f: lines = f.read() test_data = lines.split("\n") c_data = [] bz_data = [] zlib_data = [] c_cl_data = [] c_cl_path_data = [] for test in test_data: c_data.append(compress(test)) bz_data.append(bz2.compress(test)) zlib_data.append(zlib.compress(test)) c_cl_data.append(compress_classic(test, pathological_case_detection=False)) c_cl_path_data.append(compress_classic(test, pathological_case_detection=True)) print("Total data size %d bytes" % sum(len(x) for x in test_data)) print(" Smaz size %d bytes" % sum(len(x) for x in c_data)) print(" bz2 size %d bytes" % sum(len(x) for x in bz_data)) print(" zlib size %d bytes" % sum(len(x) for x in zlib_data)) print(" Smaz classic size %d bytes" % sum(len(x) for x in c_cl_data)) print(" Smaz classic with pathological case detection size %d bytes" % sum(len(x) for x in c_cl_path_data))
def cycle(self, input_str, quiet=False, compress_tree=None, decompress_table=None, show_input_and_output=True, strict=True): """ Exercise a complete co -> dec cycle """ compressed_text = compress(input_str, compression_tree=compress_tree, backtracking=False, check_ascii=strict) backtracked_compressed_text = compress(input_str, compression_tree=compress_tree, backtracking=True, check_ascii=strict) decompressed_text = decompress(compressed_text, decompress_table=decompress_table) backtracked_decompressed_text = decompress(backtracked_compressed_text, decompress_table=decompress_table) classic_compresssed_text = compress_classic(input_str) classic_decompressed_test = decompress(classic_compresssed_text) if not quiet and input_str: print('---------------------------------------------------------------------') if show_input_and_output: print(decompressed_text) print(compressed_text) if backtracked_compressed_text != compressed_text: if show_input_and_output: print('--back tracked:--') print(backtracked_compressed_text) ratio = 1.0 / (float(len(input_str)) / float(len(compressed_text))) b_ratio = 1.0 / (float(len(input_str)) / float(len(backtracked_compressed_text))) c_ratio = 1.0 / (float(len(input_str)) / float(len(classic_compresssed_text))) bz2c = bz2.compress(input_str) zlibc = zlib.compress(input_str, 9) bz2ratio = 1.0 / (float(len(input_str)) / float(len(bz2c))) zlibratio = 1.0 / (float(len(input_str)) / float(len(zlibc))) if backtracked_compressed_text != compressed_text: print(('backtracked compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes' % (b_ratio, b_ratio * 100., len(input_str), len(backtracked_compressed_text)))) self.assertTrue(len(compressed_text) >= len(backtracked_compressed_text), 'Back-tracking (%d) should always be better than not-backtracking (%d)' % (len(input_str), len(backtracked_compressed_text))) print(('compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes' % (ratio, ratio * 100., len(input_str), len(compressed_text)))) print(' vs ') print((' zlib ratio 1:%f (%.2f%%) to %d bytes' % (zlibratio, zlibratio * 100., len(zlibc)))) print((' bz2 ratio 1:%f (%.2f%%) to %d bytes' % (bz2ratio, bz2ratio * 100., len(bz2c)))) print((' smaz classic 1:%f (%.2f%%) to %d bytes' % (c_ratio, c_ratio * 100., len(classic_compresssed_text)))) self.assertEqual(input_str, decompressed_text) self.assertEqual(input_str, backtracked_decompressed_text) self.assertEqual(input_str, classic_decompressed_test)