def encode_block(in_bytes):
    if len(in_bytes) == 1:
        block = BitArray()
        block.append(Bits('0b1'))
        block.append(in_bytes)
        return block.tobytes()

    bw_xf, eof_idx = burrows_wheeler_transform(in_bytes)
    front_xf = move_to_front_transform(bw_xf)
    rle_data = run_length_encode(front_xf)
    huff_data, huff_symbols, serialized_tree = huffman_encode(rle_data,
                                                              symbol_bits=8)
    huff_len = len(huff_data)
    tree_len = len(serialized_tree)

    block = BitArray()
    block.append(Bits('0b0'))
    block.append(Bits(uint=tree_len, length=16))
    block.append(Bits(serialized_tree))
    block.append(Bits(uint=huff_symbols, length=16))
    block.append(Bits(uint=huff_len, length=16))
    block.append(Bits(huff_data))
    block.append(Bits(uint=eof_idx, length=BLOCK_SIZE_BITS))

    return block.tobytes()
Esempio n. 2
0
def lz77huff_encode(input_data, window_bits=DEFAULT_WINDOW_BITS):
    tokens = lz77_encode_to_tokens(input_data, window_bits)
    symbols = [s for tok in tokens for s in tok]
    bits = BitArray()
    for s in symbols:
        bits.append(Bits(uint=s, length=window_bits))
    encoded_data, num_symbols, serialized_tree = huffman_encode(bits.tobytes(),
                                                                symbol_bits=window_bits)
    return encoded_data, num_symbols, serialized_tree
Esempio n. 3
0
def main():
    if len(sys.argv) < 3:
        print("\nUsage: python3 driver.py <inFilename> <outFilename>\n"
              "Include \'.txt\' extension on filenames.\n")
        quit()

    in_file = sys.argv[1]
    out_file = sys.argv[2]

    huffman.huffman_encode(in_file, out_file)

    input_bit_count = huffman.char_count(in_file)*8
    output_bit_count = huffman.char_count(out_file)

    print('\nFile encoded. The original file contained ' + str(input_bit_count)  + ' bits, while '
            'the encoded file contains ' + str(output_bit_count) + ' bits. The original file was '
            'reduced to ' + str(round(float(output_bit_count/input_bit_count)*100, 1)) + ' % of '
            'it\'s original size.\n')
Esempio n. 4
0
    for s, bits in codes.items():
        print "\t%s \t%s" % (s, bits)

    # Entropy and mean bits per symbol:
    ent = entropy(freq)
    mbs = mean_bits(freq, codes)

    print "entropy  ", ent
    print "mean bits", mbs

    # Compression ratio:
    cr = CHARSIZE / mbs
    print "Huffman compression ratio %.2f:1" % cr

    print "------------------------"

    # Encode the text using the generated huffman codes
    encoded = huff.huffman_encode(text, codes)

    decoded = huff.huffman_decode(encoded, codes)

    # Calcualte number of bits used by the plain text and the huffman encoded bits
    bits_text = len(text) * CHARSIZE * 1.0
    bits_encoded = len(encoded)

    print "text: ", text[:16], "..."
    print "encoded: ", encoded[:16], "..."
    print "text == decoded:", text == decoded
    print "size after compression: %.5f%%" % (bits_encoded / bits_text)
    print "NOTE: compression ratio afected by symbol '10' being 2 characters."
    def forward(self, input):
        if self.transform:
            self.channels = self.out_channels

            if not hasattr(self, 'mn'):
                self.register_buffer('mn', torch.zeros(self.channels, 1))

            # Calculate projection matrix if needed
            if self.collectStats:
                im = F.conv2d(input, self.weight, self.bias, self.stride,
                              self.padding, self.dilation, self.groups)
                N, C, H, W = im.shape  # N x C x H x W

                im = featuresReshape(im, N, C, H, W, self.microBlockSz,
                                     self.channelsDiv)
                self.mn = torch.mean(im, dim=1, keepdim=True)

                # Centering the data
                im = im - self.mn
                self.get_stats_params(im)
                # projection
                imProj = torch.matmul(self.u.t(), im)

                # conv + bn if exists + projection
                im2 = F.conv2d(input, self.weight, self.bias, self.stride,
                               self.padding, self.dilation, self.groups)
                imProj2 = featuresReshape(im2, N, C, H, W, self.microBlockSz,
                                          self.channelsDiv)
                assert (torch.max(torch.abs(imProj - imProj2)) < 0.1)
            else:
                # conv + bn if exists + projection
                im = F.conv2d(input, self.weight, self.bias, self.stride,
                              self.padding, self.dilation, self.groups)
                N, C, H, W = im.shape  # N x C x H x W
                imProj = featuresReshape(im, N, C, H, W, self.microBlockSz,
                                         self.channelsDiv)

            mult = torch.zeros(1).to(imProj)
            add = torch.zeros(1).to(imProj)

            self.collectStats = False

            dynMax = torch.max(imProj)
            dynMin = torch.min(imProj)

            if self.actBitwidth < 30:
                imProj, mult, add = part_quant(imProj,
                                               max=dynMax,
                                               min=dynMin,
                                               bitwidth=self.actBitwidth)

            self.act_size = imProj.numel()
            self.bit_per_entry = shannon_entropy(imProj).item()
            self.bit_count = self.bit_per_entry * self.act_size
            if False:  #add if want to show huffman code in additional to theoretical entropy
                self.bit_countH = huffman_encode(imProj)
                self.bit_per_entryH = self.bit_countH / self.act_size

            if self.actBitwidth < 30:
                imProj = imProj * mult + add

            imProj = torch.matmul(self.u, imProj)

            # return original mean
            imProj = imProj + self.mn

            input = featuresReshapeBack(imProj, N, C, H, W, self.microBlockSz,
                                        self.channelsDiv)

        else:
            input = F.conv2d(input, self.weight, self.bias, self.stride,
                             self.padding, self.dilation, self.groups)

        return input
    def forward(self, input):

        if self.transform:
            N, C, H, W = input.shape  # N x C x H x W
            im = featuresReshape(input, N, C, H, W, self.microBlockSz,
                                 self.channelsDiv)

            self.channels = im.shape[0]

            mn = torch.mean(im, dim=1, keepdim=True)
            # Centering the data
            im = im - mn

            # Calculate projection matrix if needed
            if self.collectStats:
                self.u, self.s = get_projection_matrix(im, self.transformType,
                                                       self.eigenVar)
                self.original_channels = self.u.shape[0]

            self.channels = self.u.shape[1]

            # projection
            imProj = torch.matmul(self.u.t(), im)

            mult = torch.zeros(1).to(imProj)
            add = torch.zeros(1).to(imProj)

            dynMax = torch.max(imProj)
            dynMin = torch.min(imProj)

            if self.actBitwidth < 30:
                imProj, mult, add = part_quant(imProj,
                                               max=dynMax,
                                               min=dynMin,
                                               bitwidth=self.actBitwidth)

            self.act_size = imProj.numel()
            self.bit_per_entry = shannon_entropy(imProj).item()
            self.bit_count = self.bit_per_entry * self.act_size
            if False:  #add if want to show huffman code in additional to theoretical entropy
                self.bit_countH = huffman_encode(imProj)
                self.bit_per_entryH = self.bit_countH / self.act_size

            if self.actBitwidth < 30:
                imProj = imProj * mult + add

            imProj = torch.matmul(self.u, imProj)

            # Bias Correction
            imProj = imProj - torch.mean(imProj, dim=1, keepdim=True)

            self.mse = torch.sum((imProj - im)**2)
            # return original mean
            imProj = imProj + mn

            # return to general
            input = featuresReshapeBack(imProj, N, C, H, W, self.microBlockSz,
                                        self.channelsDiv)

            self.collectStats = False

        input = self.relu(input)
        return input
Esempio n. 7
0
print 'Start program'
try:
	print 'Start open files'
	source = open('source.txt')
	print 'Open source...OK'
	target_lzw = open('lzw_result.txt', 'w')
	target_lzw_dict = open('lzw_dict.txt', 'w')
	target_huffman = open('huffman_result.txt', 'w')
	target_huffman_dict = open('huffman_dict.txt', 'w')
	all_text = source.read()
	print 'open files...OK'
except IOError as e:
	print 'error'

lzw_encode_text, lzw_encode_dict = LZW_encode(all_text)
print 'lzw encode...OK'
target_lzw.write(''.join(lzw_encode_text))
target_lzw_dict.write(''.join(lzw_encode_dict))
print 'lzw file write...OK'

coding = {}
result = huffman_encode(all_text, coding)
print 'huffman encode...OK'
target_huffman.write(''.join(result))
target_huffman_dict.write(''.join(coding))
print 'huffman file write...OK'

target_huffman.close()
target_lzw.close()
target_huffman_dict.close()
target_lzw_dict.close()
Esempio n. 8
0
 def huffman_call(self, widget):
     inp_str = self.txt_inp_text.get_text()
     out_code, out_encoded = huffman_encode(inp_str)
     # print(out_code, out_encoded)
     print("Tamanho original da string: ", len(inp_str))
     print("Tamanho após codificação: ", len(out_encoded))
def lzw_huff_encode(in_bytes, symbol_len=DEFAULT_SYMBOL_LEN):
    lzw_bytes = lzwf_encode(input_data, code_len=symbol_len)
    return huffman_encode(lzw_bytes, symbol_bits=symbol_len)