def encode_block(in_bytes): if len(in_bytes) == 1: block = BitArray() block.append(Bits('0b1')) block.append(in_bytes) return block.tobytes() bw_xf, eof_idx = burrows_wheeler_transform(in_bytes) front_xf = move_to_front_transform(bw_xf) rle_data = run_length_encode(front_xf) huff_data, huff_symbols, serialized_tree = huffman_encode(rle_data, symbol_bits=8) huff_len = len(huff_data) tree_len = len(serialized_tree) block = BitArray() block.append(Bits('0b0')) block.append(Bits(uint=tree_len, length=16)) block.append(Bits(serialized_tree)) block.append(Bits(uint=huff_symbols, length=16)) block.append(Bits(uint=huff_len, length=16)) block.append(Bits(huff_data)) block.append(Bits(uint=eof_idx, length=BLOCK_SIZE_BITS)) return block.tobytes()
def lz77huff_encode(input_data, window_bits=DEFAULT_WINDOW_BITS): tokens = lz77_encode_to_tokens(input_data, window_bits) symbols = [s for tok in tokens for s in tok] bits = BitArray() for s in symbols: bits.append(Bits(uint=s, length=window_bits)) encoded_data, num_symbols, serialized_tree = huffman_encode(bits.tobytes(), symbol_bits=window_bits) return encoded_data, num_symbols, serialized_tree
def main(): if len(sys.argv) < 3: print("\nUsage: python3 driver.py <inFilename> <outFilename>\n" "Include \'.txt\' extension on filenames.\n") quit() in_file = sys.argv[1] out_file = sys.argv[2] huffman.huffman_encode(in_file, out_file) input_bit_count = huffman.char_count(in_file)*8 output_bit_count = huffman.char_count(out_file) print('\nFile encoded. The original file contained ' + str(input_bit_count) + ' bits, while ' 'the encoded file contains ' + str(output_bit_count) + ' bits. The original file was ' 'reduced to ' + str(round(float(output_bit_count/input_bit_count)*100, 1)) + ' % of ' 'it\'s original size.\n')
for s, bits in codes.items(): print "\t%s \t%s" % (s, bits) # Entropy and mean bits per symbol: ent = entropy(freq) mbs = mean_bits(freq, codes) print "entropy ", ent print "mean bits", mbs # Compression ratio: cr = CHARSIZE / mbs print "Huffman compression ratio %.2f:1" % cr print "------------------------" # Encode the text using the generated huffman codes encoded = huff.huffman_encode(text, codes) decoded = huff.huffman_decode(encoded, codes) # Calcualte number of bits used by the plain text and the huffman encoded bits bits_text = len(text) * CHARSIZE * 1.0 bits_encoded = len(encoded) print "text: ", text[:16], "..." print "encoded: ", encoded[:16], "..." print "text == decoded:", text == decoded print "size after compression: %.5f%%" % (bits_encoded / bits_text) print "NOTE: compression ratio afected by symbol '10' being 2 characters."
def forward(self, input): if self.transform: self.channels = self.out_channels if not hasattr(self, 'mn'): self.register_buffer('mn', torch.zeros(self.channels, 1)) # Calculate projection matrix if needed if self.collectStats: im = F.conv2d(input, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) N, C, H, W = im.shape # N x C x H x W im = featuresReshape(im, N, C, H, W, self.microBlockSz, self.channelsDiv) self.mn = torch.mean(im, dim=1, keepdim=True) # Centering the data im = im - self.mn self.get_stats_params(im) # projection imProj = torch.matmul(self.u.t(), im) # conv + bn if exists + projection im2 = F.conv2d(input, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) imProj2 = featuresReshape(im2, N, C, H, W, self.microBlockSz, self.channelsDiv) assert (torch.max(torch.abs(imProj - imProj2)) < 0.1) else: # conv + bn if exists + projection im = F.conv2d(input, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) N, C, H, W = im.shape # N x C x H x W imProj = featuresReshape(im, N, C, H, W, self.microBlockSz, self.channelsDiv) mult = torch.zeros(1).to(imProj) add = torch.zeros(1).to(imProj) self.collectStats = False dynMax = torch.max(imProj) dynMin = torch.min(imProj) if self.actBitwidth < 30: imProj, mult, add = part_quant(imProj, max=dynMax, min=dynMin, bitwidth=self.actBitwidth) self.act_size = imProj.numel() self.bit_per_entry = shannon_entropy(imProj).item() self.bit_count = self.bit_per_entry * self.act_size if False: #add if want to show huffman code in additional to theoretical entropy self.bit_countH = huffman_encode(imProj) self.bit_per_entryH = self.bit_countH / self.act_size if self.actBitwidth < 30: imProj = imProj * mult + add imProj = torch.matmul(self.u, imProj) # return original mean imProj = imProj + self.mn input = featuresReshapeBack(imProj, N, C, H, W, self.microBlockSz, self.channelsDiv) else: input = F.conv2d(input, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) return input
def forward(self, input): if self.transform: N, C, H, W = input.shape # N x C x H x W im = featuresReshape(input, N, C, H, W, self.microBlockSz, self.channelsDiv) self.channels = im.shape[0] mn = torch.mean(im, dim=1, keepdim=True) # Centering the data im = im - mn # Calculate projection matrix if needed if self.collectStats: self.u, self.s = get_projection_matrix(im, self.transformType, self.eigenVar) self.original_channels = self.u.shape[0] self.channels = self.u.shape[1] # projection imProj = torch.matmul(self.u.t(), im) mult = torch.zeros(1).to(imProj) add = torch.zeros(1).to(imProj) dynMax = torch.max(imProj) dynMin = torch.min(imProj) if self.actBitwidth < 30: imProj, mult, add = part_quant(imProj, max=dynMax, min=dynMin, bitwidth=self.actBitwidth) self.act_size = imProj.numel() self.bit_per_entry = shannon_entropy(imProj).item() self.bit_count = self.bit_per_entry * self.act_size if False: #add if want to show huffman code in additional to theoretical entropy self.bit_countH = huffman_encode(imProj) self.bit_per_entryH = self.bit_countH / self.act_size if self.actBitwidth < 30: imProj = imProj * mult + add imProj = torch.matmul(self.u, imProj) # Bias Correction imProj = imProj - torch.mean(imProj, dim=1, keepdim=True) self.mse = torch.sum((imProj - im)**2) # return original mean imProj = imProj + mn # return to general input = featuresReshapeBack(imProj, N, C, H, W, self.microBlockSz, self.channelsDiv) self.collectStats = False input = self.relu(input) return input
print 'Start program' try: print 'Start open files' source = open('source.txt') print 'Open source...OK' target_lzw = open('lzw_result.txt', 'w') target_lzw_dict = open('lzw_dict.txt', 'w') target_huffman = open('huffman_result.txt', 'w') target_huffman_dict = open('huffman_dict.txt', 'w') all_text = source.read() print 'open files...OK' except IOError as e: print 'error' lzw_encode_text, lzw_encode_dict = LZW_encode(all_text) print 'lzw encode...OK' target_lzw.write(''.join(lzw_encode_text)) target_lzw_dict.write(''.join(lzw_encode_dict)) print 'lzw file write...OK' coding = {} result = huffman_encode(all_text, coding) print 'huffman encode...OK' target_huffman.write(''.join(result)) target_huffman_dict.write(''.join(coding)) print 'huffman file write...OK' target_huffman.close() target_lzw.close() target_huffman_dict.close() target_lzw_dict.close()
def huffman_call(self, widget): inp_str = self.txt_inp_text.get_text() out_code, out_encoded = huffman_encode(inp_str) # print(out_code, out_encoded) print("Tamanho original da string: ", len(inp_str)) print("Tamanho após codificação: ", len(out_encoded))
def lzw_huff_encode(in_bytes, symbol_len=DEFAULT_SYMBOL_LEN): lzw_bytes = lzwf_encode(input_data, code_len=symbol_len) return huffman_encode(lzw_bytes, symbol_bits=symbol_len)