def camunzip(filename): if (filename[-1] == 'h'): method = 'huffman' elif (filename[-1] == 's'): method = 'shannon_fano' elif (filename[-2] + filename[-1] == 'ar'): print("dfass") method = 'arithmetic' elif (filename[-2] + filename[-1] == 'ad'): method = 'arithmetic_adaptive' elif (filename[-2] + filename[-1] == 'ca'): method = 'context_adaptive' else: raise NameError('Unknown compression method') with open(filename, 'rb') as fin: y = fin.read() y = bytes2bits(y) if method == 'arithmetic' or method == 'arithmetic_adaptive' or method == 'context_adaptive': pfile = filename[:-2] + 'p' else: pfile = filename[:-1] + 'p' with open(pfile, 'r') as fp: frequencies = load(fp) n = sum([frequencies[a] for a in frequencies]) p = dict([(a, frequencies[a] / n) for a in frequencies]) if method == 'huffman' or method == 'shannon_fano': if (method == 'huffman'): xt = huffman(p) c = xtree2code(xt) else: c = shannon_fano(p) xt = code2xtree(c) x = vl_decode(y, xt) elif method == 'arithmetic_adaptive': x = arithmetic_ftr_adaptive.decode(y) elif method == 'arithmetic': x = arithmetic.decode(y, p, n) elif method == 'context_adaptive': x = arithmetic_ftr.decode(y) else: raise NameError('This will never happen (famous last words)') #'.cuz' for Cam UnZipped (don't want to overwrite the original file...) if method == 'arithmetic' or method == 'arithmetic_adaptive' or method == 'context_adaptive': outfile = filename[:-5] + '.cuz' else: outfile = filename[:-4] + '.cuz' with open(outfile, 'w') as fout: for c in x: fout.write(c)
def unzip(self, filename): "Decompress a file compressed by camzip" filename = self.content_dir + filename if filename[-1] == 'h': method = 'huffman' elif filename[-1] == 's': method = 'shannon_fano' elif filename[-1] == 'a': method = 'arithmetic' else: raise NameError('Unknown compression method') with open(filename, 'rb') as fin: y = fin.read() y = bytes2bits(y) # Load frequencies file pfile = filename[:-1] + 'p' with open(pfile, 'r') as fp: frequencies = load(fp) tot = sum(list(frequencies.values())) p = dict([(a, frequencies[a] / tot) for a in frequencies]) # Initial conditions cp, f_initial = self._build_conditional_pdf(frequencies) tot_initial = sum(list(f_initial.values())) p_initial = dict([(a, f_initial[a] / tot_initial) for a in f_initial]) if method == 'huffman' or method == 'shannon_fano': c = {} xt = {} init_key = '$' # unused character if len(cp.keys()) == 1: c[init_key], xt[init_key] = self._build_structures(method, p) else: c[init_key], xt[init_key] = self._build_structures( method, p_initial) for key in cp.keys(): c[key], xt[key] = self._build_structures(method, cp[key]) x = vl_decode(y, xt) elif method == 'arithmetic': x = arithmetic.decode(y, p, tot) else: raise NameError('This will never happen (famous last words)') # '.cuz' for Cam UnZipped (don't want to overwrite the original file...) outfile = filename[:-4] + '.cuz' with open(outfile, 'w') as fout: fout.write(x) self._convert_to_LF(outfile)
def main() -> None: """Main function""" string = input("Enter the string: ") freq_dict, code = arith.encode(string) print(f"\nEncoded message:") print(f"(common) {code.numerator} / {code.denominator}") print(f"(decimal) {float(code):.30e}\n") decoded_string = arith.decode(code, freq_dict) print(f"Decoded message is '{decoded_string}'")
def fetch_info(self, username): profiles = open("profiles.txt", "r") blob = profiles.read() blob_decoded = codify.decode(blob, 58) xmlfile = etree.fromstring(blob_decoded) result = "" for element in etree.iterparse(xmlfile): if element.text == username: self.is_logged_in = YES return element.root else: pass
def camunzip(filename, length_of_LSTM_context=30): if (filename[-1] == 'h'): method = 'huffman' elif (filename[-1] == 's'): method = 'shannon_fano' elif (filename[-1] == 'a'): method = 'arithmetic' elif (filename[-1] == "c"): method = "conditional-arithmetic" else: raise NameError('Unknown compression method') with open(filename, 'rb') as fin: y = fin.read() y = bytes2bits(y) pfile = filename[:-1] + 'p' with open(pfile, 'r') as fp: frequencies = load(fp) n = sum([frequencies[a] for a in frequencies]) p = dict([(int(a), frequencies[a] / n) for a in frequencies]) if method == 'huffman' or method == 'shannon_fano': if (method == 'huffman'): xt = huffman(p) c = xtree2code(xt) else: c = shannon_fano(p) xt = code2xtree(c) x = vl_decode(y, xt) elif method == 'arithmetic': x = arithmetic.decode(y, p, n) elif method == "conditional-arithmetic": x = con_ari.decode(y, p, n, length_of_LSTM_context) else: raise NameError('This will never happen (famous last words)') # '.cuz' for Cam UnZipped (don't want to overwrite the original file...) outfile = filename[:-4] + '.cuz' with open(outfile, 'wb') as fout: fout.write(bytes(x)) return x
def camunzip(filename, cond_prob_dict_filename=' ', cum_prob_dict_filename=' ', orig_message_filename=' '): if not filename[-1] == 'z': if filename[-1] == 'h': method = 'huffman' elif filename[-1] == 's': method = 'shannon_fano' elif filename[-1] == 'a': method = 'arithmetic' else: raise NameError('Unknown compression method') with open(filename, 'rb') as fin: y = fin.read() y = bytes2bits(y) pfile = filename[:-1] + 'p' with open(pfile, 'r') as fp: frequencies = load(fp) n = sum([frequencies[a] for a in frequencies]) p = dict([(int(a), frequencies[a] / n) for a in frequencies]) if method == 'huffman' or method == 'shannon_fano': if method == 'huffman': xt = huffman(p) c = xtree2code(xt) else: c = shannon_fano(p) xt = code2xtree(c) x = vl_decode(y, xt) elif method == 'arithmetic': x = arithmetic.decode(y, p, n) else: raise NameError('This will never happen (famous last words)') # '.cuz' for Cam UnZipped (don't want to overwrite the original file...) outfile = filename[:-4] + '.cuz' else: with open('encoded_messages/' + filename + '_zipped.cz', 'r') as zipped_file: with open('cond_prob_models/' + cond_prob_dict_filename, 'r') as cond_prob_file: context_dict = json.load(cond_prob_file) with open('cond_prob_models/' + cum_prob_dict_filename, 'r') as cum_prob_file: cumulative_dict = json.load(cum_prob_file) with open('text_files/' + orig_message_filename, 'r', encoding='utf-8-sig') as file: original_message = file.read() x = contextual_arithmetic.decode(bytes2bits(zipped_file.read()), context_dict, cumulative_dict, len(original_message)) with open(outfile, 'wb') as fout: fout.write(bytes(x))
def camunzip(filename, b=0.1, num=0, scale=(100000, 1), pr=0, pc=0): if (filename[-1] == 'h'): method = 'huffman' elif (filename[-1] == 's'): method = 'shannon_fano' elif (filename[-1] == 'a'): method = 'arithmetic' elif (filename[-1] == 'c'): method = 'carithmeticac' elif (filename[-1] == 'i'): method = 'iadhuffman' elif (filename[-1] == 'f'): method = 'fcondarithmetic' elif (filename[-1] == 'g'): method = 'gadconarithmetic' elif (filename[-1] == 'j'): method = 'jadconarithmetic' else: raise NameError('Unknown compression method') with open(filename, 'rb') as fin: #* y = fin.read() y = bytes2bits(y) pfile = filename[:-1] + 'p' with open(pfile, 'r') as fp: frequencies = load(fp) n = sum([frequencies[a] for a in frequencies]) p = dict([(int(a), frequencies[a] / n) for a in frequencies]) #* if method == 'huffman' or method == 'shannon_fano': if (method == 'huffman'): xt = huffman(p) c = xtree2code(xt) else: c = shannon_fano(p) xt = code2xtree(c) x = vl_decode(y, xt) elif method == 'arithmetic': x = arithmetic.decode(y, p, n) elif method == 'carithmeticac': x = arithmeticac.decode(y, b, n, num, scale) elif method == 'iadhuffman': x = adhuffmandec(y, pr) elif method == 'fcondarithmetic': x = condarithmetic.decode(y, pc, n) elif method == 'gadconarithmetic': x = adconarithmetic.decode(y, b, n) elif method == 'jadconarithmetic': x = adconarithmetic2.decode(y, b, n) else: raise NameError('This will never happen (famous last words)') # '.cuz' for Cam UnZipped (don't want to overwrite the original file...) outfile = filename[:-4] + '.cuz' with open(outfile, 'wb') as fout: fout.write(bytes(x)) #*
import arithmetic as arith from vl_codes import bytes2bits, bits2bytes from os import stat from itertools import groupby filename = 'hamlet.txt' Nin = stat(filename).st_size f = open(filename, 'r') hamlet = f.read() frequencies = dict([(key, len(list(group))) for key, group in groupby(sorted(hamlet))]) p = dict([(a,frequencies[a]/Nin) for a in frequencies]) f.close() hamlet = hamlet * 10 Nin = Nin * 10 arith_encoded = arith.encode(hamlet, p, probability_on_the_go=False) arith_decoded = arith.decode(arith_encoded, p, Nin, probability_on_the_go=False) hamlet_zipped = bits2bytes(arith_encoded) Nout = len(hamlet_zipped) print(Nout/Nin) print(8 * Nout/Nin) arith_encoded = arith.encode(hamlet, p, probability_on_the_go=True) arith_decoded = arith.decode(arith_encoded, p, Nin, probability_on_the_go=True) hamlet_zipped = bits2bytes(arith_encoded) Nout = len(hamlet_zipped) print(Nout/Nin) print(8 * Nout/Nin)
from vl_codes import bytes2bits, bits2bytes import arithmetic as arith f = open('hamlet.txt', 'r') hamlet = f.read() f.close() from itertools import groupby frequencies = dict([(key, len(list(group))) for key, group in groupby(sorted(hamlet))]) Nin = sum([frequencies[a] for a in frequencies]) p = dict([(a, frequencies[a] / Nin) for a in frequencies]) print(f'File length: {Nin}') arith_encoded = arith.encode(hamlet, p) arith_decoded = arith.decode(arith_encoded, p, Nin) #============================================================================== # c = huffman(p) #============================================================================== #print(xtree2newick(code2xtree(c))) #============================================================================== # hamlet_sf = vl_encode(hamlet,c); # print(f'Length of binary sequence: {len(hamlet_sf)}') #============================================================================== #============================================================================== # x = bits2bytes([0,1]) # print([format(a, '08b') for a in x])