def compress_all(matrix): m = matrix # print('Matrix', m, 'length', len(m.flatten())) a = m.flatten() a_s = RAS.sliceFA(a) a_s = a_s[0] print('a_s', a_s) a_s = a_s.flatten() # print('sliced & flattened matrix as int array', a_s, 'length', len(a_s), 'type', a_s.dtype) c_huff = Huffman.Huffman_Do(a_s)
def main(input, m_s): #input = file, m_s = maxsearchbuffer # extra credit x = 16 MAXSEARCH = int(m_s) MAXLH = int(math.pow(2, (x - (math.log(MAXSEARCH, 2))))) list = RAS.convert_array_to_list(input) file = RAS.convert_list_to_string(list) # file = RAS.convert_list_to_bytes(array) # file = RAS.convert_bytes_to_ASCII(file) searchiterator = 0 lhiterator = 0 while lhiterator < len(input): search = input[searchiterator:lhiterator] look_ahead = input[lhiterator:lhiterator + MAXLH] (offset, length, char) = LZ77_search(search, look_ahead) print('look here', offset, length, char) shifted_offset = offset << 6 offset_and_length = shifted_offset + length print('type char', type(char)) # char = char.encode('ASCII') ol_bytes = struct.pack(">Hc", offset_and_length, char) # file.write(ol_bytes) lhiterator = lhiterator + length + 1 searchiterator = lhiterator - MAXSEARCH if searchiterator < 0: searchiterator = 0 file.close() #def parse(file): #convert array to list? or to string? '''
def activations_compression(model, data, settings): # Get Output Activations x_test = data[2] batch_size = settings[0] number_of_layers = model.layers print('number of layers:', len(number_of_layers)) num_layers = len(number_of_layers) all_layers = list() for layer_index in range(1, num_layers): all_layers.append(model.get_layer(name=None, index=layer_index).output) # print('intermediate layer number', layer_index, 'is layer:', model.get_layer(name=None, index=layer_index).output) #print('intermediate layer activations:', Model(inputs=model.input, outputs=model.get_layer(name=None, index=layer_index).output)) intermediate_layer_model_input = model.input intermediate_layer_model = Model(inputs=intermediate_layer_model_input, outputs=all_layers) data = x_test num_batches = data.shape[0] // batch_size for batch_idx in range(num_batches): start = batch_idx * batch_size end = start + batch_size intermediate_output = intermediate_layer_model.predict(data[start:end]) #print("Intermediate result batch {}/{} done".format(batch_idx, num_batches)) # loop to measure size of each layer, compresses it with all the compression algorithms and saves the ratios as well for i in range(len(intermediate_output)): #adjust to save matrizes - which type of file and how? #with io.open("ResNetv2_activations_of_layer_" + str(i + 1) + ".txt", 'w', encoding='utf-8') as f: #f.write(str(intermediate_output[i])) #number of entries in matrix: num_entries = len(intermediate_output[i].flatten()) print('number of entries of', i + 1, 'layer is:', num_entries) #size of matrix in KB size_entries = RAS.get_obj_size(intermediate_output[i]) print('size of entries of', i + 1, 'layer is: ', size_entries, 'KB') #compress with all the algorithms (try with huffman then add others) compression = Compression_Main.compress_all(intermediate_output[i]) #print(i + 1, 'th layer activations', intermediate_output[i]) i += 1 return
import numpy as np import struct from Extras import RAS array = np.random.randint(1, 3, size=(3, 3)) #array = array.flatten() print('array', array) list = RAS.convert_array_to_list(array) c_array = [list.count(x) for x in set(list)] print('c_array', c_array) def Streaming_rANS_encoder(s_input, symbol_counts, range_factor): total_counts = np.sum(symbol_counts) # Represents M bitstream = [] #initialize stream state = low_level*total_counts #state initialized to lM for s in s_input: #iterate over the input # Output bits to the stream to bring the state in the range for the next encoding while state >= range_factor*symbol_counts[s]: bitstream.append( state%2 ) state = state/2 state = C_rANS(s, state, symbol_counts) # The rANS encoding step return state, bitstream
import heapq import os import numpy as np from Extras import RAS arr = np.random.randint(1, 101, size=(100, 100)) string = RAS.convert_array_to_list(arr) class HeapNode: def __init__(self, char, freq): self.char = char self.freq = freq self.left = None self.right = None def __cmp__(self, other): if (other == None): return -1 if (not isinstance(other, HeapNode)): return -1 return self.freq > other.freq class HuffmanCoding: def __init__(self, path): self.path = path self.heap = [] self.codes = {} self.reverse_mapping = {}
def print_code_table(self): """Prints a table of all characters, codes, and code lengths found in the input""" for i in self.head.char_list: length, code = self.get_code(i) print("'{0}'\t\t{1}\t\t{1:0{2}b}".format(i, code, length)) def __repr__(self): return "<HuffmanTree: head={}>".format(self.head) if __name__ == "__main__": # in_str = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc hendrerit nulla et sodales dapibus. Nullam mauris orci" # in_str = in_str * 500 arr = np.random.randint(1, 101, size=(100, 100)) arr = RAS.convert_array_to_list(arr) in_str = RAS.convert_list_to_string(arr) str_size = len(in_str.encode('utf-8')) print("Original text: {}\n".format(in_str)) tree = HuffmanTree() tree.build_tree(in_str) encoded_text = tree.encode(in_str) print("Encoded text: {}\n".format(" ".join("{:02x}".format(c) for c in encoded_text))) new_tree = HuffmanTree() decoded_text = new_tree.decode(encoded_text) print("Decoded text: {}\n".format(decoded_text))
def Huffman_Do(array): print('HUFFMAN:') array = RAS.convert_array_to_list(array) in_len = len(array) in_str = RAS.convert_list_to_string(array) print('length of list:', in_len) str_size = len(in_str.encode('utf-8')) print("Original text: {}\n".format(in_str)) tree = HuffmanTree() tree.build_tree(in_str) encoded_text = tree.encode(in_str) print('type of encoded_text', type(encoded_text), encoded_text) e_string = format(" ".join("{:02x}".format(c) for c in encoded_text)) print("Encoded text: {}\n".format(" ".join("{:02x}".format(c) for c in encoded_text))) print('length of encoded:', len(e_string), e_string) e_list = RAS.convert_string_to_list(e_string) print('e_list', e_list) print('length of e_list', len(e_list)) print(e_string.find('d')) new_tree = HuffmanTree() decoded_text = new_tree.decode(encoded_text) ''' # # Calculating frequency freq = {} for c in in_str: if c in freq: freq[c] += 1 else: freq[c] = 1 freq = sorted(freq.items(), key=lambda x: x[1], reverse=True) nodes = freq while len(nodes) > 1: (key1, c1) = nodes[-1] (key2, c2) = nodes[-2] nodes = nodes[:-2] node = NodeTree(key1, key2) nodes.append((node, c1 + c2)) nodes = sorted(nodes, key=lambda x: x[1], reverse=True) huffmanCode = huffman_code_tree(nodes[0][0]) print(' Char | Huffman code ') print('----------------------') for (char, frequency) in freq: print(' %-4r |%12s' % (char, huffmanCode[char])) print('freq', freq) print('huffmancode', huffmanCode) # ''' print("Decoded text: {}\n".format(decoded_text)) print("Total length of input (in bytes): {}".format(str_size)) print("Total length of encoded text (in bytes): {}".format( len(encoded_text))) print("Compression ratio: {}".format(len(encoded_text) / str_size)) return encoded_text, decoded_text
import numpy as np from Extras import RAS import random arr = np.random.randint(1, 8, size=(3, 3)) arr = RAS.convert_array_to_list(arr) arr = list(map(int, arr)) print('array', arr) RANS64_L = 2**30 MIN_PROB = 8 prob_bits = 14 prob_scale = 1 << prob_bits def argmax(values): if not values: return -1 # Empty list has no argmax current_max = values[0] current_max_index = 0 for i in range(1,len(values)): if values[i] > current_max: current_max = values[i] current_max_index = i return current_max_index def float_to_int_probs(float_probs): pdf = []
def decompress_file(input_file: str, output_file: str): """Open and read an input file, decompress it, and write the compressed values to the output file""" try: with open(input_file, "rb") as f: input_array = bytearray(f.read()) except FileNotFoundError: print(f"Could not find input file at: {input_file}") raise except Exception: raise compressed_input = decompress(from_bytes(input_array)) with open(output_file, "w") as f: f.write(compressed_input) array = np.random.randint(1, 101, size=(50, 50)) list = RAS.convert_array_to_list(array) string = RAS.convert_list_to_string(list) compressed = compress(string) print('compressed', compressed) decompressed = decompress(compressed) print('decompressed', decompressed) print('ration', len(compressed) / len(decompressed))
''' for encoder x = name of file y = size of max search buffer, i.e. 1024 ''' import struct import sys import math import os import numpy as np from Extras import RAS arr = np.random.randint(1, 101, size=(100, 100)) #arr = RAS.convert_array_to_list(arr) arr = RAS.convert_list_to_bytes(arr) def LZ77_search(search, look_ahead): ls = len(search) llh = len(look_ahead) if (ls == 0): return (0, 0, look_ahead[0]) if (llh) == 0: return (-1, -1, "") best_length = 0 best_offset = 0 buf = search + look_ahead