def __init__(self, inPutPath, outPutPath, isPaired, readNum, bucketIndexLen, lossless, verbose): self.mutiDict = {} ### kmers for buckets self.sequenceTable = [] #store sequence for output self.kmerLen = bucketIndexLen self.indexLen = bucketIndexLen self.bucketDict = {} #nested_dict(2, int) self.encodeBucketPath = {} self.newNodeNum = 0 self.simpleNodeNum = 0 self.tipNodeNum = 0 self.bifurNodeNum = 0 self.deleteBifurRatio = 0.2 self.inPutPath = inPutPath self.outPutPath = outPutPath self.dna2num = {"A": 0, "C": 1, "G": 2, "T": 3} self.num2dna = {0: "A", 1: "C", 2: "G", 3: "T"} self.recdna = {"A": "T", "C": "G", "G": "C", "T": "A"} self.dna2bit = {"A": '0b00', "C": '0b01', "G": '0b10', "T": '0b11'} self.num2dna = {0: "A", 1: "C", 2: "G", 3: "T"} self.firstSeq = BitStream() self.numFlag = BitStream() self.freq3 = arithmeticcoding.SimpleFrequencyTable( arithmeticcoding.FlatFrequencyTable(3)) self.freq4 = arithmeticcoding.SimpleFrequencyTable( arithmeticcoding.FlatFrequencyTable(4)) self.freqs = arithmeticcoding.SimpleFrequencyTable( arithmeticcoding.FlatFrequencyTable(4)) self.bitoutL = arithmeticcoding.BitInputStream( open(self.inPutPath + ".bifurL", "rb")) self.bitoutR = arithmeticcoding.BitInputStream( open(self.inPutPath + ".bifurR", "rb")) self.decodeSeqPathL = self.openFileLeft() self.decodeSeqPathR = self.openFileRight() self.outFileName = outPutPath + ".dna" self.paired = isPaired self.seqLen = 0 #length for current read self.outPairFileName = [outPutPath + "_1.dna", outPutPath + "_2.dna"] self.outFile = None self.outPairFile = None self.readNum = readNum self.seqence = "" ##encode seq self.bucketIndex = [] #bucket index self.bucketCov = [] # reads number in bucket self.readIndexPos = [] #index positions in each read self.readLen = [] self.readrc = sream() # read in forward or backward self.readN = { "flag": sream(), "pos": [], "l": [] } # N in read indicate, number, position and length self.numFlag = sream() #new nodes indicate self.lossless = lossless self.verbose = verbose self.openOutFile() #prepare output file
def main(inputfile, outputfile): # Perform file decompression with open(outputfile, "wb") as out, open(inputfile, "rb") as inp: bitin = arithmeticcoding.BitInputStream(inp) decompress(bitin, out) print('Decompress Success !!!')
def entropy_decoding(frame_index, lat, path_bin, path_lat, sigma, mu): if lat == 'mv': bias = 50 else: bias = 100 bin_name = 'f' + str(frame_index).zfill(3) + '_' + lat + '.bin' bitin = arithmeticcoding.BitInputStream(open(path_bin + bin_name, "rb")) dec = arithmeticcoding.ArithmeticDecoder(32, bitin) latent = np.zeros([1, mu.shape[1], mu.shape[2], mu.shape[3]]) for h in range(mu.shape[1]): for w in range(mu.shape[2]): for ch in range(mu.shape[3]): mu_val = mu[0, h, w, ch] + bias sigma_val = sigma[0, h, w, ch] freq = arithmeticcoding.logFrequencyTable_exp( mu_val, sigma_val, np.int(bias * 2 + 1)) symbol = dec.read(freq) latent[0, h, w, ch] = symbol - bias bitin.close() np.save(path_lat + '/f' + str(frame_index).zfill(3) + '_' + lat + '.npy', latent) print('Decoded latent_' + lat + ' frame', frame_index) return latent
def decomparess(inputfile, outfile, model): bitin = arithmeticcoding.BitInputStream(open(inputfile, "rb")) initfreqs = arithmeticcoding.FlatFrequencyTable(AE_SIZE) freqs = arithmeticcoding.SimpleFrequencyTable(initfreqs) dec = arithmeticcoding.ArithmeticDecoder(bitin) prev_chars = [] i = 0 with open(outfile, "w") as out: while(True): guesses = dec.read(freqs) if guesses == MAGIC_EOF: break print('guesses',guesses) freqs.increment(guesses) for _ in range(guesses): char = predict(prev_chars, model, indices_char) out.write(char) print("i",i) literal = dec.read(freqs) print('lit',chr(literal)) out.write(chr(literal)) freqs.increment(literal) prev_chars.append(chr(literal)) if len(prev_chars) > maxlen: prev_chars.pop(0) i = i + 1 bitin.close()
def start(self, dictionary_size=256): self.dictionary_size = dictionary_size self.inp = open(self.inputfile, "rb") self.out = open(self.outputfile, "wb") self.bitin = arithmeticcoding.BitInputStream(self.inp) #self.freqsTable = arithmeticcoding.SimpleFrequencyTable([float(i % 8 + 1) for i in range(self.dictionary_size + 1)]) self.freqsTable = arithmeticcoding.FlatFrequencyTable( self.dictionary_size + 1) self.decoder = arithmeticcoding.ArithmeticDecoder(32, self.bitin)
def main(args): # Handle command line arguments if len(args) != 2: sys.exit("Usage: python ppm-decompress.py InputFile OutputFile") inputfile = args[0] outputfile = args[1] # Perform file decompression with open(inputfile, "rb") as inp, open(outputfile, "wb") as out: bitin = arithmeticcoding.BitInputStream(inp) decompress(bitin, out)
def main(args): # Handle command line arguments if len(args) != 2: sys.exit("Usage: python arithmetic_decompress.py InputFile OutputFile") inputfile, outputfile = args # Perform file decompression with open(outputfile, "wb") as out, open(inputfile, "rb") as inp: bitin = arithmeticcoding.BitInputStream(inp) freqs = read_frequencies(bitin) decompress(freqs, bitin, out)
def final_function(args): """ Final function to decompress the given text file with PPM compression. """ # Handle command line arguments if len(args) != 2: sys.exit("Usage: python ppm_decompress.py InputFile OutputFile") inputfile = args[0] outputfile = args[1] # Perform file decompression with open(inputfile, "rb") as inp, open(outputfile, "wb") as out: bitin = arithmeticcoding.BitInputStream(inp) decompress(bitin, out)
def main(args): # Handle command line arguments if len(args) != 2: sys.exit("Usage: python arithmetic-decompress.py InputFile OutputFile") inputfile = args[0] outputfile = args[1] # Perform file decompression bitin = arithmeticcoding.BitInputStream(open(inputfile, "rb")) with open(outputfile, "wb") as out: try: freqs = read_frequencies(bitin) decompress(freqs, bitin, out) finally: bitin.close()
def decompress(input_file): decode = bytearray() with open(input_file, "rb") as inp: bitin = arithmeticcoding.BitInputStream(inp) initfreqs = arithmeticcoding.FlatFrequencyTable(257) freqs = arithmeticcoding.SimpleFrequencyTable(initfreqs) dec = arithmeticcoding.ArithmeticDecoder(32, bitin) while True: # Decode and write one byte symbol = dec.read(freqs) if symbol == 256: # EOF symbol break decode.extend(bytes((symbol, ))) freqs.increment(symbol) return pickle.loads(decode)
def main(binfile): #inp = np.random.randint(5,size=[10]) #inp.tolist() inp = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] with contextlib.closing( arithmeticcoding.BitOutputStream(open(binfile, "wb"))) as bitout: compress(inp, bitout) # Perform file decompression with open(binfile, "rb") as bitsfile: bitin = arithmeticcoding.BitInputStream(bitsfile) out = decompress(bitin) bpp = 1.0 * -np.log(0.1) / np.log(2.0) + 9.0 * -np.log(0.1) / np.log( 2.0) + 1.0 * -np.log(0.3) / np.log(2.0) print(bpp / 8.0, "bytes") print(inp, out)
def decompress(model): dec_char = '' bit_in = arithmeticcoding.BitInputStream(open('./result/data.bin', 'rb')) dec = arithmeticcoding.ArithmeticDecoder(bit_in) out_f = open('./result/recover.txt', 'w') index = 0 num_line = 0 hidden = None # context = [] while True: if index == 0: freq = generate_freqs(pro=1, first_step=True) dec_char = dec.read(freq) index += 1 # print(freq.frequencies, end='') # print(int2char[dec_char]) if dec_char == len(characters): break # context.append(int2char[dec_char]) out_f.write(int2char[dec_char]) else: out, hidden = predict(model, np.array(dec_char), hidden) out = out[0] # (35, ) freq = generate_freqs(pro=out, first_step=False) index += 1 dec_char = dec.read(freq) if dec_char == len(characters): break # context.append(int2char[dec_char]) out_f.write(int2char[dec_char]) if index == 100: index = 0 num_line += 1 hidden = None out_f.write('\n') if num_line % 100 == 0: print(num_line) # context = [] out_f.close()
def decode(self, compressed_file, recon_path): # with TOP N dimensions fileobj = open(compressed_file, mode='rb') fileobj.read(1) #dummy buf = fileobj.read(4) arr = np.frombuffer(buf, dtype=np.uint16) w = int(arr[0]) h = int(arr[1]) padded_w = int(math.ceil(w / 16) * 16) padded_h = int(math.ceil(h / 16) * 16) y_hat, z_hat, sigma_z = self.sess.run( [self.y_hat, self.z_hat, self.sigma_z], feed_dict={self.input_x: np.zeros( (1, 3, padded_h, padded_w))}) # NCHW padded_y1_hat = np.pad(y_hat[:, :self.M1, :, :], ((0, 0), (0, 0), (3, 0), (2, 1)), 'constant', constant_values=((0, 0), (0, 0), (0, 0), (0, 0))) ############### decode zhat #################################### bitin = arithmeticcoding.BitInputStream(fileobj) dec = arithmeticcoding.ArithmeticDecoder(bitin) printProgressBar(0, z_hat.shape[1], prefix='Decoding z_hat:', suffix='Complete', length=50) for ch_idx in range(z_hat.shape[1]): printProgressBar(ch_idx + 1, z_hat.shape[1], prefix='Decoding z_hat:', suffix='Complete', length=50) mu_val = 255 sigma_val = sigma_z[ch_idx] # exp_sigma_val = np.exp(sigma_val) freq = arithmeticcoding.ModelFrequencyTable(mu_val, sigma_val) for h_idx in range(z_hat.shape[2]): for w_idx in range(z_hat.shape[3]): symbol = dec.read(freq) if symbol == 512: # EOF symbol print("EOF symbol") break z_hat[:, ch_idx, h_idx, w_idx] = symbol - 255 # bitin.close() ################## ################################################# # Entropy decoding y # padded_z = np.zeros_like(padded_z, dtype = np.float32) h_s_out = self.sess.run(self.h_s_out, feed_dict={self.z_hat: z_hat}) c_prime = h_s_out[:, :self.M1, :, :] sigma2 = h_s_out[:, self.M1:, :, :] padded_c_prime = np.pad(c_prime, ((0, 0), (0, 0), (3, 0), (2, 1)), 'constant', constant_values=((0, 0), (0, 0), (0, 0), (0, 0))) padded_y1_hat[:, :, :, :] = 0.0 y_hat[:, :, :, :] = 0.0 # bitin = arithmeticcoding.BitInputStream(open(dec_inputfile, "rb")) # dec = arithmeticcoding.ArithmeticDecoder(bitin) printProgressBar(0, y_hat.shape[2], prefix='Decoding y_hat:', suffix='Complete', length=50) for h_idx in range(y_hat.shape[2]): printProgressBar(h_idx + 1, y_hat.shape[2], prefix='Decoding y_hat:', suffix='Complete', length=50) for w_idx in range(y_hat.shape[3]): c_prime_i = self.extractor_prime(padded_c_prime, h_idx, w_idx) c_doubleprime_i = self.extractor_doubleprime( padded_y1_hat, h_idx, w_idx) concatenated_c_i = np.concatenate([c_doubleprime_i, c_prime_i], axis=1) pred_mean, pred_sigma = self.sess.run( [self.pred_mean, self.pred_sigma], feed_dict={self.concatenated_c_i: concatenated_c_i}) zero_means = np.zeros([ pred_mean.shape[0], self.M2, pred_mean.shape[2], pred_mean.shape[3] ]) concat_pred_mean = np.concatenate([pred_mean, zero_means], axis=1) concat_pred_sigma = np.concatenate([ pred_sigma, sigma2[:, :, h_idx:h_idx + 1, w_idx:w_idx + 1] ], axis=1) for ch_idx in range(self.M): mu_val = concat_pred_mean[0, ch_idx, 0, 0] + 255 sigma_val = concat_pred_sigma[0, ch_idx, 0, 0] freq = arithmeticcoding.ModelFrequencyTable( mu_val, sigma_val) symbol = dec.read(freq) if symbol == 512: # EOF symbol print("EOF symbol") break if ch_idx < self.M1: padded_y1_hat[:, ch_idx, h_idx + 3, w_idx + 2] = symbol - 255 y_hat[:, ch_idx, h_idx, w_idx] = symbol - 255 bitin.close() ################################################# recon = self.sess.run(self.recon_image, {self.y_hat: y_hat}) recon = recon[0, -h:, -w:, :] im = Image.fromarray(recon.astype(np.uint8)) im.save(recon_path) return
def decode(self, compressed_file, recon_path): # with TOP N dimensions fileobj = open(compressed_file, mode='rb') fileobj.read(1) #dummy buf = fileobj.read(4) arr = np.frombuffer(buf, dtype=np.uint16) w = int(arr[0]) h = int(arr[1]) padded_w = int(math.ceil(w / 16) * 16) padded_h = int(math.ceil(h / 16) * 16) y_hat, z_hat, sigma_z = self.sess.run([self.y_hat, self.z_hat, self.sigma_z], feed_dict={self.input_x: np.zeros((1, 3, padded_h, padded_w))}) # NCHW ############### decode zhat #################################### bitin = arithmeticcoding.BitInputStream(fileobj) dec = arithmeticcoding.ArithmeticDecoder(bitin) z_hat[:, :, :, :] = 0.0 printProgressBar(0, z_hat.shape[1], prefix='Decoding z_hat:', suffix='Complete', length=50) for ch_idx in range(z_hat.shape[1]): printProgressBar(ch_idx + 1, z_hat.shape[1], prefix='Decoding z_hat:', suffix='Complete', length=50) mu_val = 255 sigma_val = sigma_z[ch_idx] freq = arithmeticcoding.ModelFrequencyTable(mu_val, sigma_val) for h_idx in range(z_hat.shape[2]): for w_idx in range(z_hat.shape[3]): symbol = dec.read(freq) if symbol == 512: # EOF symbol print("EOF symbol") break z_hat[:, ch_idx, h_idx, w_idx] = symbol - 255 ############### decode yhat #################################### c_prime = self.sess.run(self.c_prime, feed_dict={self.z_hat: z_hat}) # c_prime = np.round(c_prime, decimals=4) padded_c_prime = np.pad(c_prime, ((0, 0), (0, 0), (3, 0), (2, 1)), 'constant', constant_values=((0, 0), (0, 0), (0, 0), (0, 0))) padded_y_hat = np.pad(y_hat, ((0, 0), (0, 0), (3, 0), (2, 1)), 'constant', constant_values=((0, 0), (0, 0), (0, 0), (0, 0))) padded_y_hat[:, :, :, :] = 0.0 printProgressBar(0, y_hat.shape[2], prefix='Decoding y_hat:', suffix='Complete', length=50) for h_idx in range(y_hat.shape[2]): printProgressBar(h_idx + 1, y_hat.shape[2], prefix='Decoding y_hat:', suffix='Complete', length=50) for w_idx in range(y_hat.shape[3]): c_prime_i = self.extractor_prime(padded_c_prime, h_idx, w_idx) c_doubleprime_i = self.extractor_doubleprime(padded_y_hat, h_idx, w_idx) concatenated_c_i = np.concatenate([c_doubleprime_i, c_prime_i], axis=1) pred_mean, pred_sigma = self.sess.run( [self.pred_mean, self.pred_sigma], feed_dict={self.concatenated_c_i: concatenated_c_i}) for ch_idx in range(self.M): mu_val = pred_mean[0, ch_idx, 0, 0] + 255 sigma_val = pred_sigma[0, ch_idx, 0, 0] freq = arithmeticcoding.ModelFrequencyTable(mu_val, sigma_val) symbol = dec.read(freq) if symbol == 512: # EOF symbol print("EOF symbol") break padded_y_hat[:, ch_idx, h_idx + 3, w_idx + 2] = symbol - 255 bitin.close() y_hat = padded_y_hat[:, :, 3:, 2:-1] ################################################# recon = self.sess.run(self.recon_image, {self.y_hat: y_hat}) recon = recon[0, -h:, -w:, :] im = Image.fromarray(recon.astype(np.uint8)) im.save(recon_path) return
def decode(self, compressed_file, recon_path): # with TOP N dimensions fileobj = open(compressed_file, mode='rb') fileobj.read(1) #dummy buf = fileobj.read(4) arr = np.frombuffer(buf, dtype=np.uint16) w = int(arr[0]) h = int(arr[1]) new_w = int(math.ceil(float(w) / 2.0) * 2) new_h = int(math.ceil(float(h) / 2.0) * 2) pad_w_1 = int((float(new_w) / 2.0) % 2) pad_h_1 = int((float(new_h) / 2.0) % 2) res_w_1 = math.floor(float(new_w) / 2.0) + pad_w_1 res_h_1 = math.floor(float(new_h) / 2.0) + pad_h_1 pad_w_2 = int((float(res_w_1) / 2.0) % 2) pad_h_2 = int((float(res_h_1) / 2.0) % 2) res_w_2 = math.floor(float(res_w_1) / 2.0) + pad_w_2 res_h_2 = math.floor(float(res_h_1) / 2.0) + pad_h_2 pad_w_3 = int((float(res_w_2) / 2.0) % 2) pad_h_3 = int((float(res_h_2) / 2.0) % 2) res_w_3 = math.floor(float(res_w_2) / 2.0) + pad_w_3 res_h_3 = math.floor(float(res_h_2) / 2.0) + pad_h_3 pad_w = new_w - w pad_h = new_h - h sigma_z = self.sess.run(self.sigma_z) y_hat = np.zeros( (1, self.M, int(float(res_h_3) / 2.0), int(float(res_w_3) / 2.0)), dtype=np.float32) y_w = y_hat.shape[3] y_h = y_hat.shape[2] new_y_w = int(math.ceil(float(y_w) / 4.0) * 4) new_y_h = int(math.ceil(float(y_h) / 4.0) * 4) pad_y_w = new_y_w - y_w pad_y_h = new_y_h - y_h pad_y_hat = np.pad(y_hat, ((0, 0), (0, 0), (0, pad_y_h), (0, pad_y_w)), mode='edge') z_hat = self.sess.run(self.z_hat, feed_dict={self.y_hat: pad_y_hat}) # NCHW # y_hat, z_hat, sigma_z = self.sess.run([self.y_hat, self.z_hat, self.sigma_z], # feed_dict={ # self.input_x: np.zeros((1, 3, padded_h, padded_w))}) # NCHW padded_y1_hat = np.pad(y_hat[:, :self.M1, :, :], ((0, 0), (0, 0), (3, 0), (2, 1)), 'constant', constant_values=((0, 0), (0, 0), (0, 0), (0, 0))) ############### decode zhat #################################### bitin = arithmeticcoding.BitInputStream(fileobj) dec = arithmeticcoding.ArithmeticDecoder(bitin) printProgressBar(0, z_hat.shape[1], prefix='Decoding z_hat:', suffix='Complete', length=50) for ch_idx in range(z_hat.shape[1]): printProgressBar(ch_idx + 1, z_hat.shape[1], prefix='Decoding z_hat:', suffix='Complete', length=50) mu_val = 255 sigma_val = sigma_z[ch_idx] # exp_sigma_val = np.exp(sigma_val) freq = arithmeticcoding.ModelFrequencyTable(mu_val, sigma_val) for h_idx in range(z_hat.shape[2]): for w_idx in range(z_hat.shape[3]): symbol = dec.read(freq) if symbol == 512: # EOF symbol print("EOF symbol") break z_hat[:, ch_idx, h_idx, w_idx] = symbol - 255 # bitin.close() ################## ################################################# # Entropy decoding y # padded_z = np.zeros_like(padded_z, dtype = np.float32) h_s_out = self.sess.run(self.h_s_out, feed_dict={self.z_hat: z_hat}) c_prime = h_s_out[:, :self.M1, :, :] sigma2 = h_s_out[:, self.M1:, :, :] padded_c_prime = np.pad(c_prime, ((0, 0), (0, 0), (3, 0), (2, 1)), 'constant', constant_values=((0, 0), (0, 0), (0, 0), (0, 0))) padded_y1_hat[:, :, :, :] = 0.0 y_hat[:, :, :, :] = 0.0 # bitin = arithmeticcoding.BitInputStream(open(dec_inputfile, "rb")) # dec = arithmeticcoding.ArithmeticDecoder(bitin) printProgressBar(0, y_hat.shape[2], prefix='Decoding y_hat:', suffix='Complete', length=50) for h_idx in range(y_hat.shape[2]): printProgressBar(h_idx + 1, y_hat.shape[2], prefix='Decoding y_hat:', suffix='Complete', length=50) for w_idx in range(y_hat.shape[3]): c_prime_i = self.extractor_prime(padded_c_prime, h_idx, w_idx) c_doubleprime_i = self.extractor_doubleprime( padded_y1_hat, h_idx, w_idx) concatenated_c_i = np.concatenate([c_doubleprime_i, c_prime_i], axis=1) pred_mean, pred_sigma = self.sess.run( [self.pred_mean, self.pred_sigma], feed_dict={self.concatenated_c_i: concatenated_c_i}) zero_means = np.zeros([ pred_mean.shape[0], self.M2, pred_mean.shape[2], pred_mean.shape[3] ]) concat_pred_mean = np.concatenate([pred_mean, zero_means], axis=1) concat_pred_sigma = np.concatenate([ pred_sigma, sigma2[:, :, h_idx:h_idx + 1, w_idx:w_idx + 1] ], axis=1) for ch_idx in range(self.M): mu_val = concat_pred_mean[0, ch_idx, 0, 0] + 255 sigma_val = concat_pred_sigma[0, ch_idx, 0, 0] freq = arithmeticcoding.ModelFrequencyTable( mu_val, sigma_val) symbol = dec.read(freq) if symbol == 512: # EOF symbol print("EOF symbol") break if ch_idx < self.M1: padded_y1_hat[:, ch_idx, h_idx + 3, w_idx + 2] = symbol - 255 y_hat[:, ch_idx, h_idx, w_idx] = symbol - 255 bitin.close() ################################################# ############### gsh1 = self.sess.run(self.gsh1, feed_dict={self.y_hat: y_hat}) gsh1 = gsh1[:, :res_h_3 - pad_h_3, :res_w_3 - pad_w_3, :] gsh2 = self.sess.run(self.gsh2, feed_dict={self.gsh1: gsh1}) gsh2 = gsh2[:, :res_h_2 - pad_h_2, :res_w_2 - pad_w_2, :] gsh3 = self.sess.run(self.gsh3, feed_dict={self.gsh2: gsh2}) gsh3 = gsh3[:, :res_h_1 - pad_h_1, :res_w_1 - pad_w_1, :] recon = self.sess.run(self.recon_image, feed_dict={self.gsh3: gsh3}) recon = recon[0, :recon.shape[1] - pad_h, :recon.shape[2] - pad_w, :] ############### im = Image.fromarray(recon.astype(np.uint8)) im.save(recon_path) return