def encode(self, img): # encode first m rows data = [] for row in range(0, self.num_rows): data += img[row].tolist() codec_first_rows = HuffmanCodec.from_data(data) encoded_first_rows = codec_first_rows.encode(data) # encode remain rows prediction_error_img = self.predErrImg(img) data = [] for row in range(0, img.shape[0] - self.num_rows): data += prediction_error_img[row].tolist() codec_remaining_rows = HuffmanCodec.from_data(data) encoded_remaining_rows = codec_remaining_rows.encode(data) # return {'height': self.img.shape[0], # 'width': self.img.shape[1], # 'codec_first_rows': codec_first_rows, # 'encoded_first_rows': encoded_first_rows, # 'codec_remaining_rows': codec_remaining_rows, # 'encoded_remaining_rows': encoded_remaining_rows} return (img.shape[0], img.shape[1], codec_first_rows, encoded_first_rows, codec_remaining_rows, encoded_remaining_rows)
def compute_compression_ratio(data, som, winners, width): diff = Dataset.differential_coding(winners.flatten(), width) codeNormal = HuffmanCodec.from_data(winners).encode(winners) codeDiff = HuffmanCodec.from_data(diff).encode(diff) hd = np.concatenate(som.get_som_as_list(), 0) * 255 hd = np.array(hd, 'uint8') header = HuffmanCodec.from_data(hd).encode(hd) return len(codeNormal) / len(codeDiff), len(data) * len( data[0]) / (len(header) + len(codeDiff))
def compute_compression_metrics(data, som, winners, width): diff = ImageData.differential_coding(winners.flatten(), width) normal_code = HuffmanCodec.from_data(winners).encode(winners) differential_code = HuffmanCodec.from_data(diff).encode(diff) hd = np.concatenate(som.get_som_as_list(), 0) * 255 hd = np.array(hd, 'uint8') header = HuffmanCodec.from_data(hd).encode(hd) return len(normal_code) / len(differential_code), len(data) * len( data[0]) / (len(header) + len(differential_code))
def test_string_data(data): codec = HuffmanCodec.from_data(data) encoded = codec.encode(data) assert type(encoded) == type(b'') assert len(encoded) < len(data) decoded = codec.decode(encoded) assert decoded == data
def strDict(self, strList, encStr): lib = {} for i in strList: codec = HuffmanCodec.from_data(i) encoded = codec.encode(encStr) lib.update({i: encoded}) return lib
def huffmanCoding(self, model, quantized, k): modelSize = 0 compressedModelSize = 0 for layer in model.layers: if not layer.weights: continue else: length = 1 shape = layer.weights[0].shape for l in shape: length = l * length w_vec = np.reshape(layer.weights[0].numpy(), (1, length))[0] codec = HuffmanCodec.from_data(w_vec[k:]) encoded = codec.encode(w_vec[k:]) compressedModelSize = compressedModelSize + len(encoded) if quantized: modelSize = modelSize + ((len(w_vec) - k) * quantized) // 8 else: modelSize = modelSize + (len(w_vec) - k) * 4 self.modelSize = modelSize self.compressedSize = compressedModelSize
def get_codec(self): """Generate the codec for encoding and decoding based on random sentence Args: None Output: the codec """ codec = HuffmanCodec.from_data( "the quick brown fox jumps over the lazy dog") return codec
def test_save(tmp_path: Path): codec1 = HuffmanCodec.from_data('aabcbcdbabdbcbd') path = str(tmp_path / 'foo' / 'bar.huff') codec1.save(path) output1 = codec1.encode('abcdabcd') codec2 = PrefixCodec.load(path) output2 = codec2.encode('abcdabcd') assert output1 == output2 assert codec1.decode(output1) == codec2.decode(output2)
def encode(frame, huffman=False): dct_frame = dct(frame) if not huffman: rle_frame = rle(dct_frame.flatten()) return rle_frame else: codec = HuffmanCodec.from_data(dct_frame.flatten()) rle_frame = codec.encode(dct_frame.flatten()) return rle_frame, codec
def test_print_code_table2(): codec = HuffmanCodec.from_data("aaaaa") out = io.StringIO() codec.print_code_table(out=out) actual = out.getvalue().split('\n') expected = "Bits Code Value Symbol\n 1 0 0 _EOF\n 1 1 1 'a'\n".split( '\n') assert actual[0] == expected[0] assert set(actual[1:]) == set(expected[1:])
def generate_table(self,inputfilename): self.generate_data_list(inputfilename) self.codec = HuffmanCodec.from_data(self.__data_ns) self.table = self.codec.get_code_table() self.__strings_table = {} for symbol in self.table.keys(): if not type(symbol) is int: self.eof = symbol bitsize, value = self.table[symbol] self.__strings_table[symbol] = bin(value)[2:].rjust(bitsize, '0')
def huffman_coding_decoding(RLE_total): RLE_total_new = str(RLE_total) codec = HuffmanCodec.from_data(RLE_total_new) print("Huffman Code Table: \n") codec.print_code_table() coded_string = codec.encode(RLE_total_new) decoded_string = codec.decode(coded_string) return codec, coded_string, decoded_string
def encodedStringDict(self, listOfStrings=[]): codec = HuffmanCodec.from_data(listOfStrings) encodeResults = codec.encode(listOfStrings) def decodeListOfString(encode): decodeResults = codec.decode(encode) return decodeResults decodedList = decodeListOfString(encodeResults) encodedDict = dict(zip(listOfStrings, encodeResults)) return encodedDict, decodedList
def save_compressed(self, som, name): # winners = np.zeros((neuron_nbr, neuron_nbr)) # for i in range(len(self.data)): # w = som.winner(self.data[i]/255) # winners[w] += 1 winners = som.winners() file = open(output_path + name, "w") # file.write(str(som.get_som_as_list())) # res = "" # str_win = "" diff = Dataset.differential_coding(winners.flatten(), self.nb_pictures[1]) # for i in range(len(self.data)): # res += str(diff[i])+" " # str_win += str(winners[i])+" " # # Codebook compression # codebook = som.get_som_as_list() # str_codebook = "" # for i in codebook: # for j in range(len(i)): # str_codebook += str(j)+" " # str_codebook += "\n" codeNormal = HuffmanCodec.from_data(winners).encode(winners) codeDiff = HuffmanCodec.from_data(diff).encode(diff) hd = np.concatenate(som.get_som_as_list(), 0) * 255 hd = np.array(hd, 'uint8') header = HuffmanCodec.from_data(hd).encode(hd) file.write(str(header)) file.write(str(codeDiff)) file.close() print("Taux de compression du codage différentiel :", len(codeNormal) / len(codeDiff)) print( "Taux de compression total :", len(self.data) * len(self.data[0]) / (len(header) + len(codeDiff)))
def Huffman_Encoder(binary_map, bits_group_num=64): binary_map = float2bin(binary_map) map_length = len(binary_map) if map_length % bits_group_num: bools_list = list(binary_map[:-(map_length % bits_group_num)].reshape( -1, bits_group_num)) bools_list.append(binary_map[-(map_length % bits_group_num):]) else: bools_list = list(binary_map.reshape(-1, bits_group_num)) bits_string = [b.tobytes() for b in bools_list] codec = HuffmanCodec.from_data(bits_string) output = codec.encode(bits_string) return output, codec
def decode_huffman_encoded_string(self, encoded_item): """Decode a given string previously Huffman encoded and latin1 decoded. Parameters: encoded_item (dictionary): a dictionary with a string key and a Huffman encoded and latin1 decoded value. Returns: A Huffman decoded string as JSON. """ encoded_item = json.loads(encoded_item) key = next(iter(encoded_item)) item_codec = HuffmanCodec.from_data(key) return json.dumps(item_codec.decode( encoded_item[key].encode('latin1')))
def compress_pic(self, quant_blocks): """ compress the matrix data :param quant_blocks: written quantified matrix :return:huffman tree and encoded data """ sequence = list( chain(*[ list(chain(*[self.mat2sequence(mat) for mat in line])) for line in quant_blocks ])) codec = HuffmanCodec.from_data(sequence) encoded = codec.encode(sequence) return codec, encoded
def huffman_encode_strings(self, items): """Build a dictionary of strings - the key being the original string, and the value being a (Huffman) encoded version of that string. Parameters: items (list): a list of strings. Returns: A dictionary of Huffman encoded strings with original strings as keys and encoded strings as latin1 decoded values as JSON. """ encoded_items = {} for item in items: item_codec = HuffmanCodec.from_data(item) encoded_item = item_codec.encode(item) encoded_items.update({item: encoded_item.decode('latin1')}) return json.dumps(encoded_items)
def encode(model, out_file='out.pkl'): print('Start encoding...') quanti_dic = OrderedDict() for k, v in model.state_dict().items(): print(k) if 'running' in k or 'batches' in k: print("Ignoring {}".format(k)) quanti_dic[k] = v continue else: layer_w = v.data.cpu().numpy().flatten() codec = HuffmanCodec.from_data(layer_w) encoded = codec.encode(layer_w) quanti_dic[k] = [encoded, codec] outfile = open(out_file, 'wb') pickle.dump(quanti_dic, outfile) outfile.close() print('Done. Save to {}'.format(out_file))
def decodeStr(self, strLis, decStr): codec = HuffmanCodec.from_data(strLis) decoded = codec.decode(decStr) decoded2 = decoded.decode("utf-8") return decoded2
def encode(self, index): self.codebook = HuffmanCodec.from_data(index) self.encoded_index = self.codebook.encode(index) return self.encoded_index
def test_decode_concat(): codec = HuffmanCodec.from_data([1, 2, 3]) encoded = codec.encode([1, 2, 1, 2, 3, 2, 1]) decoded = codec.decode(encoded, concat=sum) assert decoded == 12
def test_non_string_symbols(data): codec = HuffmanCodec.from_data(data) encoded = codec.encode(data) assert type(encoded) == type(b'') decoded = codec.decode(encoded) assert decoded == data
''' Microservice that performs a variety of functions: 1) Squares every odd number in a vector of integers 2) Generate string:encoding key store from a list of strings 3) Decode an encoded string ''' # Nameko import from nameko.rpc import rpc # Huffman encoder/decoder from dahuffman import HuffmanCodec ### Use NLTK Gutenberg corpus to create a frequency distribution of letters ### Use that to perform static Huffman encoding from nltk.corpus import gutenberg codec = HuffmanCodec.from_data(gutenberg.raw()) # Define the service class InvictusService(): name = "invictus_service" # Function that squares a number if it's odd def odd_square(self, number): if (number % 2 != 0): return number * number return number # RPC to apply odd_square to a list of integers @rpc def apply_odd_square(self, array):
def encodeAudio(fileName): audiofile = fileName print("Archivo audio=", audiofile) if len(sys.argv) == 3: quality = float(sys.argv[2]) else: quality = 100.0 fs, x = wav.read(audiofile) try: channels = x.shape[ 1] #numero de canales, 2 para estereo(2 columnas en x) except IndexError: channels = 1 # 1 para mono x = np.expand_dims(x, axis=1) #añade canales dimension 1 print("Canales=", channels) N = 1024 #numero de MDCY subbands nfilts = 64 #numero de subbands en bark domain #Sine window: fb = np.sin(np.pi / (2 * N) * (np.arange(int(1.5 * N)) + 0.5)) #Guarda en archivo pickle binario: #Quita extension del nombre del archivo name, ext = os.path.splitext(audiofile) #nueva extension .acod para el archivo encode encfile = name + '.acod' print("Archivo comprimido:", encfile) totalbytes = 0 with open(encfile, 'wb') as codedfile: #abrimos archivo comprimido pickle.dump(fs, codedfile) pickle.dump(channels, codedfile) for chan in range(channels): #loop sobre canales print("Canal ", chan) #Calcular cuantificado en el dominio Bark y subbandas cuantificadas yq, y, mTbarkquant = MDCT_psayac_quant_enc(x[:, chan], fs, fb, N, nfilts, quality=quality) print("Huffman Coding") mTbarkquantflattened = np.reshape(mTbarkquant, (1, -1), order='F') mTbarkquantflattened = mTbarkquantflattened[0] #quitar dimension 0 codecmTbarkquant = HuffmanCodec.from_data(mTbarkquantflattened) #Huffman tabla tablemTbarkquant = codecmTbarkquant.get_code_table() #Huffman encoded mTbarkquantc = codecmTbarkquant.encode(mTbarkquantflattened) #Calcula Huffman coder para cuantificado valor subbandas samples: yqflattened = np.reshape(yq, (1, -1), order='F') yqflattened = yqflattened[0] #quitar dimension 0 codecyq = HuffmanCodec.from_data(yqflattened) #Huffman tabla tableyq = codecyq.get_code_table() #Huffman encoded yqc = codecyq.encode(yqflattened) pickle.dump(tablemTbarkquant, codedfile) #factor de escala tabla pickle.dump(tableyq, codedfile) #subanda huffman tabla samples pickle.dump(mTbarkquantc, codedfile) #Huffman coded factor de escala pickle.dump(yqc, codedfile) #Huffman coded subandas samples totalbytes += len(tablemTbarkquant) + len(tableyq) + len( mTbarkquantc) + len(yqc) numsamples = np.prod(x.shape) print("Numero total de bytes=", totalbytes) print("Numero total de samples:", numsamples) print("bytes por sample=", totalbytes / numsamples)
with open(encfile, 'wb') as codedfile: #open compressed file pickle.dump(fs,codedfile) #write sampling rate pickle.dump(channels,codedfile) #write number of channels for chan in range(channels): #loop over channels: print("channel ", chan) #Compute quantized masking threshold in the Bark domain and quantized subbands yq, y, mTbarkquant=MDCT_psayac_quant_enc(x[:,chan],fs,fb,N, nfilts,quality=quality) print("Huffman Coding") #Train Huffman coder for quantized masking threshold in the Bark domain (scalefactors), #with flattening the masking threshold array in column (subband) order: mTbarkquantflattened=np.reshape(mTbarkquant, (1,-1),order='F') mTbarkquantflattened=mTbarkquantflattened[0] #remove dimension 0 codecmTbarkquant=HuffmanCodec.from_data(mTbarkquantflattened) #Huffman table for it: tablemTbarkquant=codecmTbarkquant.get_code_table() #Huffman encoded: mTbarkquantc=codecmTbarkquant.encode(mTbarkquantflattened) #Compute Huffman coder for the quantized subband values: #Train with flattened quantized subband samples yqflattened=np.reshape(yq,(1,-1),order='F') yqflattened=yqflattened[0] #remove dimension 0 codecyq=HuffmanCodec.from_data(yqflattened) #Huffman table for it: tableyq=codecyq.get_code_table() #Huffman encoded: yqc=codecyq.encode(yqflattened)
def compress_pic_before(self): sequence = tuple([tuple(line) for line in self.zig_data]) codec = HuffmanCodec.from_data(sequence) encoded = codec.encode(sequence) return codec, encoded
def PC_Encode(self, line): line = line.replace('\n', '') codec = HuffmanCodec.from_data(line) encoded = codec.encode(line) return (encoded, codec)
def main(string): QF = 1 start = time.time() B = 8 im = cv2.imread(string) h,w=np.array(im.shape[:2])/B * B h = int(h) w = int(w) jpg_len = 0 b = b"" b += h.to_bytes(HEIGHT_BITS, byteorder='big') b += w.to_bytes(WIDTH_BITS, byteorder='big') block=np.array([[B,B]]) #first component is col, second component is row scol=block[0,0] srow=block[0,1] imYCC=cv2.cvtColor(im, cv2.COLOR_BGR2YCR_CB) #*Subsample Chrominance Channels====================== SSV=2 SSH=2 crf=cv2.boxFilter(imYCC[:,:,1],ddepth=-1,ksize=(2,2)) cbf=cv2.boxFilter(imYCC[:,:,2],ddepth=-1,ksize=(2,2)) y = imYCC[:,:,0] crsub=crf[::SSV,::SSH] cbsub=cbf[::SSV,::SSH] imSub=[y,crsub,cbsub] #*===================================================== ch=['Y','Cr','Cb'] it = 0 for idx,channel in enumerate(imSub): channelrows=channel.shape[0] channelcols=channel.shape[1] TransQuant = np.zeros((channelrows,channelcols), np.float32) blocksV=channelrows/B blocksH=channelcols/B vis0 = np.zeros((channelrows,channelcols), np.float32) vis0[:channelrows, :channelcols] = channel vis0=vis0 - 128 dc_c = [] ac_c = [] rle_ecoded = [] for row in range(int(blocksV)): for col in range(int(blocksH)): #* Using DCT currentblock = cv2.dct(vis0[row*B:(row+1)*B,col*B:(col+1)*B]) #*Quantization TransQuant[row*B:(row+1)*B,col*B:(col+1)*B]=np.round(currentblock/Q[idx]) #*Zigzag zz = zigzag(TransQuant[row*B:(row+1)*B,col*B:(col+1)*B]) #*DPCM - Vectorizing dc_c.append(zz[0]) ac_c.append(zz[1:]) #*RLE on ac encoded as (skip, value) #*(0, 0) end the block rle_ecoded.extend(rle(ac_c[-1])) #Save dc table dc_codec = HuffmanCodec.from_data(dc_c) dc_codec.save("dc_table/dc_table{0}.tb" .format(it)) jpg_len += os.stat("dc_table/dc_table{0}.tb" .format(it)).st_size encoded = dc_codec.encode(dc_c) l = len(encoded) b += l.to_bytes(DC_BITS, byteorder='big') b += encoded #Save ac table ac_codec = HuffmanCodec.from_data(rle_ecoded) ac_codec.save("ac_table/ac_table{0}.tb" .format(it)) jpg_len += os.stat("ac_table/ac_table{0}.tb" .format(it)).st_size encoded = ac_codec.encode(rle_ecoded) #string += encoded l = len(encoded) b += l.to_bytes(AC_BITS, byteorder='big') b += encoded it += 1 file = open("encoded.jpg", "wb") file.write(b) end = time.time() r = (len(b) + jpg_len) / (w * h) #print("Hệ số nén r : {0}" .format(r)) #print("time: {0}s" .format(end - start)) return r, end - start