Example #1
0
def main(inputfile, outputfile):

    # Perform file compression
    with open(inputfile, "rb") as inp, \
            contextlib.closing(arithmeticcoding.BitOutputStream(open(outputfile, "wb"))) as bitOut:
        compress(inp, bitOut)
    print("Adaptive Compress Success !!!")
 def compress(quantized, output_file):
     """
     Function to load d
     
     Input:
     filename : Input hdf5 file consisting of training dataset
     
     Output:
     dataframe of paths to images dataset
     """
     data = pickle.dumps(quantized)
     with open(output_file, "wb") as file:
         bitout = arithmeticcoding.BitOutputStream(file)
         initfreqs = arithmeticcoding.FlatFrequencyTable(257)
         freqs = arithmeticcoding.SimpleFrequencyTable(initfreqs)
         enc = arithmeticcoding.ArithmeticEncoder(32, bitout)
         i = 0
         while i < len(data):
             # Read and encode one byte
             symbol = data[i]
             i += 1
             enc.write(freqs, symbol)
             freqs.increment(symbol)
         enc.write(freqs, 256)  # EOF
         enc.finish()  # Flush remaining code bits
Example #3
0
def entropy_coding(frame_index, lat, path_bin, latent, sigma, mu):

    if lat == 'mv':
        bias = 50
    else:
        bias = 100

    bin_name = 'f' + str(frame_index).zfill(3) + '_' + lat + '.bin'
    bitout = arithmeticcoding.BitOutputStream(open(path_bin + bin_name, "wb"))
    enc = arithmeticcoding.ArithmeticEncoder(32, bitout)

    for h in range(latent.shape[1]):
        for w in range(latent.shape[2]):
            for ch in range(latent.shape[3]):
                mu_val = mu[0, h, w, ch] + bias
                sigma_val = sigma[0, h, w, ch]
                symbol = latent[0, h, w, ch] + bias

                freq = arithmeticcoding.logFrequencyTable_exp(
                    mu_val, sigma_val, np.int(bias * 2 + 1))
                enc.write(freq, symbol)

    enc.finish()
    bitout.close()

    bits_value = os.path.getsize(path_bin + bin_name) * 8

    return bits_value
 def start(self, dictionary_size=256):
     self.dictionary_size = dictionary_size
     self.bitout = arithmeticcoding.BitOutputStream(
         open(self.outputfile, "wb"))
     #self.freqsTable = arithmeticcoding.SimpleFrequencyTable([float(i % 8 + 1) for i in range(self.dictionary_size + 1)])
     self.freqsTable = arithmeticcoding.FlatFrequencyTable(
         self.dictionary_size + 1)
     self.encoder = arithmeticcoding.ArithmeticEncoder(32, self.bitout)
Example #5
0
def main(args):
	# Handle command line arguments
	if len(args) != 2:
		sys.exit("Usage: python adaptive_arithmetic_compress.py InputFile OutputFile")
	inputfile, outputfile = args
	
	# Perform file compression
	with open(inputfile, "rb") as inp, \
			contextlib.closing(arithmeticcoding.BitOutputStream(open(outputfile, "wb"))) as bitout:
		compress(inp, bitout)
Example #6
0
def compress(model):
    bit_out = arithmeticcoding.BitOutputStream(open('./result/data.bin', "wb"))
    enc = arithmeticcoding.ArithmeticEncoder(bit_out)
    # model = GenRNN(input_size=1, hidden_size=opt.hidden_size, output_size=len(characters), n_layers=opt.num_layers)
    # device = t.device(opt.device)
    # model.load_state_dict(t.load('./checkpoints/net_{}.pth'.format(opt.model_name, opt.chunk_len)))
    z = open('./result/old.txt', 'w')
    # model = model.to(device)
    # model.eval()
    hidden = None
    num_line = 0
    sum_all = 0
    time_num = 0
    acc_num = 0
    end_freq = generate_freqs(pro=1, first_step=True)
    with open('./result/test.qs') as f:
        while True:
            text = f.readline().replace('\n', '')
            z.write(text)
            z.write('\n')
            if not text:
                break
            encode_text = [char2int[char] for char in text]
            num_line += 1
            hidden = None
            for char_index in range(len(encode_text)):
                if char_index == 0:
                    freq = generate_freqs(pro=1, first_step=True)
                    sum_all += -np.log2(1 / 35.)
                    time_num += 8.0
                    # enc.write(freq, encode_text[char_index])
                else:
                    target_char = np.array(encode_text[char_index])
                    context_char = np.array(encode_text[char_index - 1])
                    out, hidden = predict(model, context_char, hidden)
                    out = out[0]  # (35, )
                    sum_all += -np.log2(out[target_char])
                    time_num += 8.0
                    freq = generate_freqs(pro=out, first_step=False)

                    if np.argmax(out) == target_char.astype(np.int):
                        acc_num += 1
                enc.write(freq, encode_text[char_index])
                end_freq = freq
            # enc.write(end_freq, 40)

            if num_line % 100 == 0:
                print(num_line)
            if num_line > 10000:
                break
    freq = generate_freqs(pro=1, first_step=True)
    # print(end_freq)
    enc.write(end_freq, len(characters))
    enc.finish()
    print(acc_num / time_num, sum_all / time_num)
Example #7
0
def comparess(file1, model, indices_char):
    #this is painfully slow
    #if at all possible it should be revised so that it can mostly be run on the gpu
    #by painfully slow i mean on the order of .02 seconds per character guess.
    #ie ~16 minutes for a 50k character file.

    f1 = open(file1, 'r').read()
    data_size = len(f1)
    i = 0
    #output = [0, f1[0]]

    bitout = arithmeticcoding.BitOutputStream(open(file1 + '.comp', "wb"))
    initfreqs = arithmeticcoding.FlatFrequencyTable(AE_SIZE)
    freqs = arithmeticcoding.SimpleFrequencyTable(initfreqs)
    enc = arithmeticcoding.ArithmeticEncoder(bitout)
    guesses_right = 0
    gss = ''

    while i < data_size:
        current = ord(f1[i])
        if i < maxlen:
            enc.write(freqs,
                      0)  # Always 'guessing' zero correctly before maxlen
            freqs.increment(0)
            enc.write(freqs, current)
            freqs.increment(current)
        else:
            guess = predict(f1[(i - maxlen):i], model, indices_char)
            if (f1[i] == guess and guesses_right < 255):
                guesses_right += 1
                print("Guessed", f1[i], "correctly")
            else:
                enc.write(freqs, guesses_right)
                print("Wrong guess. Outputing", guesses_right,
                      "correct guesses")
                freqs.increment(guesses_right)
                print(i, "Outputing char", current)
                enc.write(freqs, current)
                freqs.increment(current)
                guesses_right = 0

        if (i % 100 == 0): print("i:", i)
        i += 1

    if guesses_right > 0:
        enc.write(freqs, guesses_right)
    enc.write(freqs, MAGIC_EOF)
    print("out eof sanity check")
    enc.finish()
    bitout.close()
    return None
def final_function(args):
    """
    Final function to compress the given text file with PPM compression.
    """
    # Handle command line arguments
    if len(args) != 2:
        sys.exit("Usage: python ppm_compress.py InputFile OutputFile")
    inputfile = args[0]
    outputfile = args[1]

    # Perform file compression
    with open(inputfile, "rb") as inp, \
            contextlib.closing(arithmeticcoding.BitOutputStream(open(outputfile, "wb"))) as bitout:
        compress(inp, bitout)
def main(args):
    # Handle command line arguments
    if len(args) != 2:
        sys.exit("Usage: python ppm-compress.py InputFile OutputFile")
    inputfile = args[0]
    outputfile = args[1]

    # Perform file compression
    with open(inputfile, "rb") as inp:
        bitout = arithmeticcoding.BitOutputStream(open(outputfile, "wb"))
        try:
            compress(inp, bitout)
        finally:
            bitout.close()
Example #10
0
def main(inputfile, outputfile):
    # Handle command line arguments
    # if len(args) != 2:
    # 	sys.exit("Usage: python arithmetic-compress.py InputFile OutputFile")

    # Read input file once to compute symbol frequencies
    freqs = get_frequencies(inputfile)
    freqs.increment(256)  # EOF symbol gets a frequency of 1

    # Read input file again, compress with arithmetic coding, and write compress_file file
    with open(inputfile, "rb") as inp, \
            contextlib.closing(arithmeticcoding.BitOutputStream(open(outputfile, "wb"))) as bitout:
        write_frequencies(bitout, freqs)
        compress(freqs, inp, bitout)
    print("Compress Success !!!")
Example #11
0
 def __init__(self, path, ispaired, kmerLen, verbose, sequenceTable):
     self.mutiDict = {}  ### kmers for buckets
     self.sequenceTable = sequenceTable
     self.kmerLen = kmerLen
     self.indexLen = kmerLen
     self.paired = ispaired
     self.seqLen = 0
     #self.bucketDict = defaultdict(lambda : defaultdict(dict))
     self.bucketDict = {}  #nested_dict(2, int)
     self.encodeBucketPath = {}
     self.newNodeNum = 0
     self.simpleNodeNum = 0
     self.tipNodeNum = 0
     self.bifurNodeNum = 0
     self.deleteBifurRatio = 0.2
     self.outPutPath = path
     self.dna2num = {"A": 0, "C": 1, "G": 2, "T": 3}
     self.num2dna = {0: "A", 1: "C", 2: "G", 3: "T"}
     self.dna2bit = {"A": '0b00', "C": '0b01', "G": '0b10', "T": '0b11'}
     self.num2dna = {0: "A", 1: "C", 2: "G", 3: "T"}
     self.firstSeq = BitStream()
     self.numFlag = BitStream()
     self.freq3 = arithmeticcoding.SimpleFrequencyTable(
         arithmeticcoding.FlatFrequencyTable(3))
     self.freq4 = arithmeticcoding.SimpleFrequencyTable(
         arithmeticcoding.FlatFrequencyTable(4))
     self.freqs = arithmeticcoding.SimpleFrequencyTable(
         arithmeticcoding.FlatFrequencyTable(4))
     self.bitoutL = arithmeticcoding.BitOutputStream(
         open(self.outPutPath + ".bifurL", "wb"))
     self.bitoutR = arithmeticcoding.BitOutputStream(
         open(self.outPutPath + ".bifurR", "wb"))
     self.encodeSeqPathL = self.openFileLeft()
     self.encodeSeqPathR = self.openFileRight()
     self.verbose = verbose
     self.removeOutputFile()
def main(binfile):
    #inp = np.random.randint(5,size=[10])
    #inp.tolist()
    inp = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    with contextlib.closing(
            arithmeticcoding.BitOutputStream(open(binfile, "wb"))) as bitout:
        compress(inp, bitout)

    # Perform file decompression
    with open(binfile, "rb") as bitsfile:
        bitin = arithmeticcoding.BitInputStream(bitsfile)
        out = decompress(bitin)

    bpp = 1.0 * -np.log(0.1) / np.log(2.0) + 9.0 * -np.log(0.1) / np.log(
        2.0) + 1.0 * -np.log(0.3) / np.log(2.0)
    print(bpp / 8.0, "bytes")

    print(inp, out)
Example #13
0
def main(args):
    # Handle command line arguments
    if len(args) != 2:
        sys.exit("Usage: python arithmetic-compress.py InputFile OutputFile")
    inputfile = args[0]
    outputfile = args[1]

    # Read input file once to compute symbol frequencies
    freqs = get_frequencies(inputfile)
    freqs.increment(256)  # EOF symbol gets a frequency of 1

    # Read input file again, compress with arithmetic coding, and write output file
    with open(inputfile, "rb") as inp:
        bitout = arithmeticcoding.BitOutputStream(open(outputfile, "wb"))
        try:
            write_frequencies(bitout, freqs)
            compress(freqs, inp, bitout)
        finally:
            bitout.close()
    def encode(self, model_type, input_path, compressed_file_path,
               quality_level):  # with TOP N dimensions

        img = Image.open(input_path)
        w, h = img.size

        fileobj = open(compressed_file_path, mode='wb')

        buf = quality_level << 1
        buf = buf + model_type
        arr = np.array([0], dtype=np.uint8)
        arr[0] = buf
        arr.tofile(fileobj)

        arr = np.array([w, h], dtype=np.uint16)
        arr.tofile(fileobj)
        fileobj.close()

        new_w = int(math.ceil(w / 16) * 16)
        new_h = int(math.ceil(h / 16) * 16)

        pad_w = new_w - w
        pad_h = new_h - h

        input_x = np.asarray(img)
        input_x = np.pad(input_x, ((pad_h, 0), (pad_w, 0), (0, 0)),
                         mode='reflect')
        input_x = input_x.reshape(1, new_h, new_w, 3)
        input_x = input_x.transpose([0, 3, 1, 2])

        h_s_out, y_hat, z_hat, sigma_z = self.sess.run(
            [self.h_s_out, self.y_hat, self.z_hat, self.sigma_z],
            feed_dict={self.input_x: input_x})  # NCHW

        ############### encode z ####################################
        bitout = arithmeticcoding.BitOutputStream(
            open(compressed_file_path, "ab+"))
        enc = arithmeticcoding.ArithmeticEncoder(bitout)

        printProgressBar(0,
                         z_hat.shape[1],
                         prefix='Encoding z_hat:',
                         suffix='Complete',
                         length=50)
        for ch_idx in range(z_hat.shape[1]):
            printProgressBar(ch_idx + 1,
                             z_hat.shape[1],
                             prefix='Encoding z_hat:',
                             suffix='Complete',
                             length=50)
            mu_val = 255
            sigma_val = sigma_z[ch_idx]
            # exp_sigma_val = np.exp(sigma_val)

            freq = arithmeticcoding.ModelFrequencyTable(mu_val, sigma_val)

            for h_idx in range(z_hat.shape[2]):
                for w_idx in range(z_hat.shape[3]):
                    symbol = np.rint(z_hat[0, ch_idx, h_idx, w_idx] + 255)
                    if symbol < 0 or symbol > 511:
                        print("symbol range error: " + str(symbol))

                    # print(symbol)
                    enc.write(freq, symbol)

        # enc.write(freq, 512)
        # enc.finish()
        # bitout.close()

        ############### encode y ####################################
        padded_y1_hat = np.pad(y_hat[:, :self.M1, :, :],
                               ((0, 0), (0, 0), (3, 0), (2, 1)),
                               'constant',
                               constant_values=((0, 0), (0, 0), (0, 0), (0,
                                                                         0)))

        # bitout = arithmeticcoding.BitOutputStream(open(enc_outputfile, "wb"))
        # enc = arithmeticcoding.ArithmeticEncoder(bitout)

        c_prime = h_s_out[:, :self.M1, :, :]
        sigma2 = h_s_out[:, self.M1:, :, :]
        padded_c_prime = np.pad(c_prime, ((0, 0), (0, 0), (3, 0), (2, 1)),
                                'constant',
                                constant_values=((0, 0), (0, 0), (0, 0), (0,
                                                                          0)))

        printProgressBar(0,
                         y_hat.shape[2],
                         prefix='Encoding y_hat:',
                         suffix='Complete',
                         length=50)
        for h_idx in range(y_hat.shape[2]):
            printProgressBar(h_idx + 1,
                             y_hat.shape[2],
                             prefix='Encoding y_hat:',
                             suffix='Complete',
                             length=50)
            for w_idx in range(y_hat.shape[3]):
                c_prime_i = self.extractor_prime(padded_c_prime, h_idx, w_idx)
                c_doubleprime_i = self.extractor_doubleprime(
                    padded_y1_hat, h_idx, w_idx)
                concatenated_c_i = np.concatenate([c_doubleprime_i, c_prime_i],
                                                  axis=1)

                pred_mean, pred_sigma = self.sess.run(
                    [self.pred_mean, self.pred_sigma],
                    feed_dict={self.concatenated_c_i: concatenated_c_i})

                zero_means = np.zeros([
                    pred_mean.shape[0], self.M2, pred_mean.shape[2],
                    pred_mean.shape[3]
                ])

                concat_pred_mean = np.concatenate([pred_mean, zero_means],
                                                  axis=1)
                concat_pred_sigma = np.concatenate([
                    pred_sigma, sigma2[:, :, h_idx:h_idx + 1, w_idx:w_idx + 1]
                ],
                                                   axis=1)

                for ch_idx in range(self.M):
                    mu_val = concat_pred_mean[0, ch_idx, 0, 0] + 255
                    sigma_val = concat_pred_sigma[0, ch_idx, 0, 0]
                    # exp_sigma_val = np.exp(sigma_val)

                    freq = arithmeticcoding.ModelFrequencyTable(
                        mu_val, sigma_val)

                    symbol = np.rint(y_hat[0, ch_idx, h_idx, w_idx] + 255)
                    if symbol < 0 or symbol > 511:
                        print("symbol range error: " + str(symbol))
                    enc.write(freq, symbol)
        enc.write(freq, 512)
        enc.finish()
        bitout.close()

        return compressed_file_path
    def encode(self, model_type, input_path, compressed_file_path,
               quality_level):  # with TOP N dimensions

        img = Image.open(input_path)
        w, h = img.size

        fileobj = open(compressed_file_path, mode='wb')

        buf = quality_level << 1
        buf = buf + model_type
        arr = np.array([0], dtype=np.uint8)
        arr[0] = buf
        arr.tofile(fileobj)

        arr = np.array([w, h], dtype=np.uint16)
        arr.tofile(fileobj)
        fileobj.close()

        new_w = int(math.ceil(float(w) / 2.0) * 2)
        new_h = int(math.ceil(float(h) / 2.0) * 2)

        pad_w = new_w - w
        pad_h = new_h - h

        img_array = np.asarray(img)
        img_array = np.pad(img_array, ((0, pad_h), (0, pad_w), (0, 0)),
                           mode='edge')
        input_x = img_array

        input_x = input_x.reshape(1, new_h, new_w, 3)
        input_x = input_x.transpose([0, 3, 1, 2])

        gah1, sigma_z = self.sess.run([self.gah1, self.sigma_z],
                                      feed_dict={self.input_x: input_x})

        gah1 = np.pad(gah1, ((0, 0), (0, gah1.shape[1] % 2),
                             (0, gah1.shape[2] % 2), (0, 0)),
                      mode='constant')
        gah2 = self.sess.run(self.gah2, feed_dict={self.gah1: gah1})

        gah2 = np.pad(gah2, ((0, 0), (0, gah2.shape[1] % 2),
                             (0, gah2.shape[2] % 2), (0, 0)),
                      mode='constant')
        gah3 = self.sess.run(self.gah3, feed_dict={self.gah2: gah2})

        gah3 = np.pad(gah3, ((0, 0), (0, gah3.shape[1] % 2),
                             (0, gah3.shape[2] % 2), (0, 0)),
                      mode='constant')
        y_hat = self.sess.run(self.y_hat, feed_dict={self.gah3: gah3})

        y_w = y_hat.shape[3]
        y_h = y_hat.shape[2]
        new_y_w = int(math.ceil(float(y_w) / 4.0) * 4)
        new_y_h = int(math.ceil(float(y_h) / 4.0) * 4)
        pad_y_w = new_y_w - y_w
        pad_y_h = new_y_h - y_h
        pad_y_hat = np.pad(y_hat, ((0, 0), (0, 0), (0, pad_y_h), (0, pad_y_w)),
                           mode='symmetric')
        z_hat, c_prime = self.sess.run([self.z_hat, self.c_prime],
                                       feed_dict={self.y_hat: pad_y_hat})

        ############### encode zhat ####################################
        printProgressBar(0,
                         z_hat.shape[1],
                         prefix='Encoding z_hat:',
                         suffix='Complete',
                         length=50)
        bitout = arithmeticcoding.BitOutputStream(
            open(compressed_file_path, "ab+"))
        enc = arithmeticcoding.ArithmeticEncoder(bitout)

        for ch_idx in range(z_hat.shape[1]):
            printProgressBar(ch_idx + 1,
                             z_hat.shape[1],
                             prefix='Encoding z_hat:',
                             suffix='Complete',
                             length=50)
            mu_val = 255
            sigma_val = sigma_z[ch_idx]

            freq = arithmeticcoding.ModelFrequencyTable(mu_val, sigma_val)

            for h_idx in range(z_hat.shape[2]):
                for w_idx in range(z_hat.shape[3]):
                    symbol = np.int(z_hat[0, ch_idx, h_idx, w_idx] + 255)
                    if symbol < 0 or symbol > 511:
                        print("symbol range error: " + str(symbol))
                    enc.write(freq, symbol)

        ############### encode yhat ####################################
        padded_y_hat = np.pad(y_hat, ((0, 0), (0, 0), (3, 0), (2, 1)),
                              'constant',
                              constant_values=((0, 0), (0, 0), (0, 0), (0, 0)))

        padded_c_prime = np.pad(c_prime, ((0, 0), (0, 0), (3, 0), (2, 1)),
                                'constant',
                                constant_values=((0, 0), (0, 0), (0, 0), (0,
                                                                          0)))

        printProgressBar(0,
                         y_hat.shape[2],
                         prefix='Encoding y_hat:',
                         suffix='Complete',
                         length=50)
        for h_idx in range(y_hat.shape[2]):
            printProgressBar(h_idx + 1,
                             y_hat.shape[2],
                             prefix='Encoding y_hat:',
                             suffix='Complete',
                             length=50)
            for w_idx in range(y_hat.shape[3]):

                c_prime_i = self.extractor_prime(padded_c_prime, h_idx, w_idx)
                c_doubleprime_i = self.extractor_doubleprime(
                    padded_y_hat, h_idx, w_idx)
                concatenated_c_i = np.concatenate([c_doubleprime_i, c_prime_i],
                                                  axis=1)

                pred_mean, pred_sigma = self.sess.run(
                    [self.pred_mean, self.pred_sigma],
                    feed_dict={self.concatenated_c_i: concatenated_c_i})

                for ch_idx in range(self.M):
                    mu_val = pred_mean[0, ch_idx, 0, 0] + 255
                    sigma_val = pred_sigma[0, ch_idx, 0, 0]

                    freq = arithmeticcoding.ModelFrequencyTable(
                        mu_val, sigma_val)

                    symbol = np.int(y_hat[0, ch_idx, h_idx, w_idx] + 255)
                    if symbol < 0 or symbol > 511:
                        print("symbol range error: " + str(symbol))

                    enc.write(freq, symbol)

        enc.write(freq, 512)
        enc.finish()
        bitout.close()

        return compressed_file_path