Ejemplo n.º 1
0
def execute(arguments):
    _validate_arguments(arguments)
    _enrich_arguments(arguments)

    compress_value = arguments[_compress_command]

    read_file_value = arguments[_read_file_parameter]
    write_file_value = arguments[_write_file_parameter]

    lzw_value = arguments[_lzw_option]
    elias_value = arguments[_elias_option]

    code_value = arguments[_code_option]

    if compress_value:
        if lzw_value:
            lzw.compress(read_file_value, write_file_value)
        elif elias_value:
            elias.compress(read_file_value,
                           write_file_value,
                           code_type=code_value)
    else:
        if lzw_value:
            lzw.decompress(read_file_value, write_file_value)
        elif elias_value:
            elias.decompress(read_file_value,
                             write_file_value,
                             code_type=code_value)
Ejemplo n.º 2
0
def lzwEncode(stream, parameters):
	'''
		Method to encode streams using the LZW algorithm
	
		@param stream: A PDF stream
		@return: A tuple (status,statusContent), where statusContent is the encoded PDF stream in case status = 0 or an error in case status = -1
	'''
	encodedStream = ''
	if parameters == None or parameters == {}:
		try:
			generator = lzw.compress(stream)
			for c in generator:
				encodedStream += c
			return (0,encodedStream)
		except:
			return (-1,'Error compressing string')
	else:
		if parameters.has_key('/Predictor'):
			predictor = parameters['/Predictor'].getRawValue()
		else:
			predictor = 1
		# Columns = number of samples per row
		if parameters.has_key('/Columns'):
			columns = parameters['/Columns'].getRawValue()
		else:
			columns = 1
		# Colors = number of components per sample
		if parameters.has_key('/Colors'):
			colors = parameters['/Colors'].getRawValue()
			if colors < 1:
				colors = 1
		else:
			colors = 1
		# BitsPerComponent: number of bits per color component
		if parameters.has_key('/BitsPerComponent'):
			bits = parameters['/BitsPerComponent'].getRawValue()
			if bits not in [1,2,4,8,16]:
				bits = 8
		else:
			bits = 8
		if parameters.has_key('/EarlyChange'):
			earlyChange = parameters['/EarlyChange'].getRawValue()
		else:
			earlyChange = 1
		if predictor != None and predictor != 1:
			ret = pre_prediction(stream, predictor, columns, colors, bits)
			if ret[0] == -1:
				return ret
			output = ret[1]
		else:
			output = stream
		try:
			generator = lzw.compress(output)
			for c in generator:
				encodedStream += c
			return (0,encodedStream)
		except:
			return (-1,'Error decompressing string')
Ejemplo n.º 3
0
def lzwEncode(stream, parameters):
    '''
        Method to encode streams using the LZW algorithm
    
        @param stream: A PDF stream
        @return: A tuple (status,statusContent), where statusContent is the encoded PDF stream in case status = 0 or an error in case status = -1
    '''
    encodedStream = ''
    if parameters == None or parameters == {}:
        try:
            generator = lzw.compress(stream)
            for c in generator:
                encodedStream += c
            return (0, encodedStream)
        except:
            return (-1, 'Error compressing string')
    else:
        if parameters.has_key('/Predictor'):
            predictor = parameters['/Predictor'].getRawValue()
        else:
            predictor = 1
        # Columns = number of samples per row
        if parameters.has_key('/Columns'):
            columns = parameters['/Columns'].getRawValue()
        else:
            columns = 1
        # Colors = number of components per sample
        if parameters.has_key('/Colors'):
            colors = parameters['/Colors'].getRawValue()
            if colors < 1:
                colors = 1
        else:
            colors = 1
        # BitsPerComponent: number of bits per color component
        if parameters.has_key('/BitsPerComponent'):
            bits = parameters['/BitsPerComponent'].getRawValue()
            if bits not in [1, 2, 4, 8, 16]:
                bits = 8
        else:
            bits = 8
        if parameters.has_key('/EarlyChange'):
            earlyChange = parameters['/EarlyChange'].getRawValue()
        else:
            earlyChange = 1
        if predictor != None and predictor != 1:
            ret = pre_prediction(stream, predictor, columns, colors, bits)
            if ret[0] == -1:
                return ret
            output = ret[1]
        else:
            output = stream
        try:
            generator = lzw.compress(output)
            for c in generator:
                encodedStream += c
            return (0, encodedStream)
        except:
            return (-1, 'Error decompressing string')
Ejemplo n.º 4
0
    def test_typical_cases(self):

        string = "TOBEORNOTTOBEORTOBEORNOT"
        result = compress(string)
        self.assertSequenceEqual(result, [84, 79, 66, 69, 79, 82, 78, 79, 84, 256, 258, 260, 265, 259, 261, 263])

        string = "^WED^WE^WEE^WEB^WET"
        result = compress(string)
        self.assertSequenceEqual(result, [94, 87, 69, 68, 256, 69, 260, 261, 257, 66, 260, 84])

        string = "thisisthe"
        result = compress(string)
        self.assertSequenceEqual(result, [116, 104, 105, 115, 258, 256, 101])
Ejemplo n.º 5
0
def comprimir_archivos(origen, destino):
    for archivo in os.listdir(origen):
        comprimir = lzw.readbytes(os.path.abspath(origen + "/" + archivo))
        out = lzw.compress(comprimir)
        archivo_destino = os.path.abspath(destino + "/" + archivo +
                                          ".compressed")
        lzw.writebytes(archivo_destino, out)
Ejemplo n.º 6
0
    def verify_compressed_file(self, testfile=GIANT_FILE):

        with tempfile.TemporaryFile("w+b") as compressedfile:

            originalsize = 0
            compressedsize = 0
            uncompressedsize = 0

            bigstream = lzw.readbytes(testfile)
            compressed = lzw.compress(bigstream)

            for bs in compressed:
                compressedsize = compressedsize + 1
                compressedfile.write(bs)

            ############################

            compressedfile.flush()
            compressedfile.seek(0)

            checkstream = lzw.readbytes(testfile)
            uncompressed = lzw.decompress(lzw.filebytes(compressedfile))

            for oldbyte, newbyte in six.moves.zip_longest(
                    checkstream, uncompressed):
                uncompressedsize = uncompressedsize + 1

                if oldbyte != newbyte:
                    msg = "Corrupted byte at {0}, original {1} != {2}".format(
                        uncompressedsize, oldbyte, newbyte)
                    self.assertEquals(oldbyte, newbyte, msg)
Ejemplo n.º 7
0
def extend_with_ratio_metrics(**record):
    ret = {}

    for key in ['diff', 'neg_diff', 'editcomment']:
        value = record[key]

        total_len = len(value)
        upper_len = sum(c in uppercase for c in value)
        lower_len = sum(c in uppercase for c in value)
        digits_len = sum(c in digits for c in value)
        alnum_len = sum(c in alphanum for c in value)

        if value == 0:
            compressed_len = 0
        else:
            compressed_len = len(list(lzw.compress(value.encode('utf8'))))

        ret.update({
            key + '_ul_ratio': None if lower_len == 0 else upper_len / lower_len,
            key + '_u_ratio': None if total_len == 0 else upper_len / total_len,
            key + '_d_ratio': None if total_len == 0 else digits_len / total_len,
            key + '_non_alnum_ratio': None if total_len == 0 else (total_len - alnum_len) / total_len,
            key + '_compressibility': None if compressed_len == 0 else total_len / compressed_len,
        })

    return ret
Ejemplo n.º 8
0
def calcular_distancia(buscados, comprimidos):
	for archivo in os.listdir(buscados):
		bytes_archivo = lzw.readbytes(os.path.abspath(buscados + "/" + archivo)) # X
		archivo_comprimido = lzw.compress(bytes_archivo) # C(X)
		for comprimido in os.listdir(comprimidos):
			bytes_comprimido = lzw.readbytes(os.path.abspath(comprimidos + "/" + comprimido)) # C(Y)
			bytes_descomprimido = lzw.decompress(bytes_comprimido) # C(Y)
Ejemplo n.º 9
0
    def verify_compressed_file(self, testfile=GIANT_FILE):

        with tempfile.TemporaryFile("w+b") as compressedfile:

            originalsize = 0
            compressedsize = 0
            uncompressedsize = 0

            bigstream = lzw.readbytes(testfile)
            compressed = lzw.compress(bigstream)
            
            for bs in compressed: 
                compressedsize = compressedsize + 1
                compressedfile.write(bs)

            ############################

            compressedfile.flush()
            compressedfile.seek(0)

            checkstream = lzw.readbytes(testfile)
            uncompressed = lzw.decompress(lzw.filebytes(compressedfile))

            for oldbyte, newbyte in six.moves.zip_longest(checkstream, uncompressed):
                uncompressedsize = uncompressedsize + 1

                if oldbyte != newbyte:
                    msg = "Corrupted byte at {0}, original {1} != {2}".format(uncompressedsize, oldbyte, newbyte)
                    self.assertEquals(oldbyte, newbyte, msg)
Ejemplo n.º 10
0
 def post_build(self, pkt, pay):
     if not conf.contribs["http"]["auto_compression"]:
         return pkt + pay
     encodings = self._get_encodings()
     # Compress
     if "deflate" in encodings:
         import zlib
         pay = zlib.compress(pay)
     elif "gzip" in encodings:
         pay = gzip_compress(pay)
     elif "compress" in encodings:
         import lzw
         pay = lzw.compress(pay)
     elif "br" in encodings:
         if _is_brotli_available:
             pay = brotli.compress(pay)
         else:
             log_loading.info(
                 "Can't import brotli. brotli compression will "
                 "be ignored !")
     elif "zstd" in encodings:
         if _is_zstd_available:
             pay = zstandard.ZstdCompressor().compress(pay)
         else:
             log_loading.info(
                 "Can't import zstandard. zstd compression will "
                 "be ignored !")
     return pkt + pay
Ejemplo n.º 11
0
    def test_compressdecompress(self):
        english = self.english
        gibberish = self.gibberish

        compressed = lzw.compress(english)
        compressed = [ b for b in compressed ]

        decompressed = b"".join(lzw.decompress(compressed))

        self.assertEqual(english, decompressed)

        compressed = lzw.compress(gibberish)
        compressed = [ b for b in compressed ]

        decompressed = b"".join(lzw.decompress(compressed))
        
        self.assertEqual(gibberish, decompressed)
Ejemplo n.º 12
0
    def test_compressdecompress(self):
        english = self.english
        gibberish = self.gibberish

        compressed = lzw.compress(english)
        compressed = [ b for b in compressed ]

        decompressed = b"".join(lzw.decompress(compressed))

        self.assertEqual(english, decompressed)

        compressed = lzw.compress(gibberish)
        compressed = [ b for b in compressed ]

        decompressed = b"".join(lzw.decompress(compressed))
        
        self.assertEqual(gibberish, decompressed)
Ejemplo n.º 13
0
    def test_typical_cases(self):

        string = "TOBEORNOTTOBEORTOBEORNOT"
        result = compress(string)
        self.assertSequenceEqual(result, [
            84, 79, 66, 69, 79, 82, 78, 79, 84, 256, 258, 260, 265, 259, 261,
            263
        ])

        string = "^WED^WE^WEE^WEB^WET"
        result = compress(string)
        self.assertSequenceEqual(
            result, [94, 87, 69, 68, 256, 69, 260, 261, 257, 66, 260, 84])

        string = "thisisthe"
        result = compress(string)
        self.assertSequenceEqual(result, [116, 104, 105, 115, 258, 256, 101])
Ejemplo n.º 14
0
def compressed_cp_lines(cps):
    values_per_line = 12
    bytes_ = []
    for cp in cps:
        lzw.add_cp(bytes_, int(cp, 16))
    compressed_bytes = lzw.compress(bytes_)
    print 'rewrote {} * 32 = {} bits as {} * 8 = {} bits'.format(len(cps), len(cps)*32, len(bytes_), len(bytes_)*8)
    print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16)
    return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
Ejemplo n.º 15
0
def compressed_prop_lines(cp_prop_pairs):
    values_per_line = 12

    bytes_ = uncompressed_prop_bytes(cp_prop_pairs)
    compressed_bytes = lzw.compress(bytes_)

    #print 'rewrote {} * 64 = {} bits as {} * 8 = {} bits'.format(len(cp_prop_pairs), len(cp_prop_pairs)*64, len(bytes_), len(bytes_)*8)
    #print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16)

    return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
Ejemplo n.º 16
0
def compressed_case_mapping_lines(mappings):
    values_per_line = 12
    bytes_ = []
    for t in mappings:
        lzw.add_cp(bytes_, int(t[0], 16))
        lzw.add_short(bytes_, t[1][0])
        lzw.add_short(bytes_, t[1][1])
    compressed_bytes = lzw.compress(bytes_)
    print 'rewrote {} * 64 = {} bits as {} * 8 = {} bits'.format(len(mappings), len(mappings)*64, len(bytes_), len(bytes_)*8)
    print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16)
    return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
Ejemplo n.º 17
0
    def encode(self, output_path):
        with open(output_path, 'wb') as w:
            # "GIF89a" in Hex
            w.write(bytes([0x47, 0x49, 0x46, 0x38, 0x39, 0x61]))

            # width and height in unsigned 2 byte (16 bit) little-endian
            width_bytes = (self.img.shape[1]).to_bytes(2, byteorder='little')
            height_bytes = (self.img.shape[0]).to_bytes(2, byteorder='little')
            w.write(width_bytes)
            w.write(height_bytes)

            # GCT follows for 256 colors with resolution 3 x 8 bits/primary;
            # the lowest 3 bits represent the bit depth minus 1, the highest
            # true bit means that the GCT is present
            w.write(bytes([0xf0 + self.color_table_bits - 1]))

            # Background color #0
            w.write(bytes([0x00]))
            # Default pixel aspect ratio
            w.write(bytes([0x00]))

            # Global color table (GCT)
            assert self.color_table_size == self.color_table.shape[0]
            for c in range(self.color_table_size):
                r,g,b = self.color_table[c]
                w.write(bytes([r, g, b]))

            # Graphic Control Extension (comment fields precede this in most files)
            w.write(bytes([0x21, 0xf9, 0x03, 0x00, 0x00, 0x00, 0x00]))

            # Image Descriptor
            w.write(bytes([0x2c]))
            w.write(bytes([0x00, 0x00, 0x00, 0x00])) # NW corner position of image in logical screen
            w.write(width_bytes)
            w.write(height_bytes)

            w.write(bytes([0x00])) # no local color table
            lzw_min = max(2, self.color_table_bits)
            max_code_size = 10

            # start of image - LZW minium
            w.write(lzw_min.to_bytes(1, byteorder='little'))

            color_table_indices = ''.join([chr(x) for x in self.color_table_indices.flatten()])
            compressed_indices = lzw.compress(color_table_indices, lzw_min, max_code_size)

            for i, byte in enumerate(compressed_indices):
                if i % 255 == 0:
                    # Write length of coded stream in bytes (subblock can maximum be 255 long)
                    w.write((min(255, len(compressed_indices)-i)).to_bytes(1, byteorder='little'))
                w.write(byte.to_bytes(1, byteorder='little'))

            w.write(bytes([0x00, 0x3b])) # end of image data, end of GIF file
Ejemplo n.º 18
0
 def post_build(self, pkt, pay):
     if not conf.contribs["http"]["auto_compression"]:
         return pkt + pay
     encodings = self._get_encodings()
     # Compress
     if "deflate" in encodings:
         import zlib
         pay = zlib.compress(pay)
     elif "gzip" in encodings:
         pay = gzip_compress(pay)
     elif "compress" in encodings:
         import lzw
         pay = lzw.compress(pay)
     return pkt + pay
Ejemplo n.º 19
0
def compressed_case_mapping_to_lines(mappings):
    values_per_line = 12
    bytes_ = []
    for t in mappings:
        lzw.add_short(bytes_, t[0][0])
        lzw.add_short(bytes_, t[0][1])
        try:
            x = case_conditions[t[1]] # TODO: Totally wrong!  Just here for size eval.
        except:
            x = 0
        lzw.add_short(bytes_, x)
    compressed_bytes = lzw.compress(bytes_)
    print 'rewrote {} * 48 = {} bits as {} * 8 = {} bits'.format(len(mappings), len(mappings)*48, len(bytes_), len(bytes_)*8)
    print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16)
    return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
Ejemplo n.º 20
0
    def pic_to_file(self, fh):
        pic = self.pic
        data = pack("HHB", len(pic[0]), len(pic), 11)

        pic_data = []
        for ln in pic:
            pic_data += ln
        pic_data = rle.encode(pic_data)
        pic_data = lzw.compress(pic_data)
        pic_data = bytearray(pic_data)

        hdr_data = pack('HH', 0x3058, len(data) + len(pic_data))
        fh.write(hdr_data)
        fh.write(data)
        fh.write(pic_data)
Ejemplo n.º 21
0
def lzw_algm(filename):
    start_time = time.time()

    try:
        outfilename = filename + '.lzw'
        comp_time = timeit.default_timer()
        file_bytes = lzw.readbytes(filename)
        tot_time = timeit.default_timer() - comp_time
        print("reading time", tot_time)
        compressed = lzw.compress(file_bytes)
        comp_time = timeit.default_timer()
        lzw.writebytes(outfilename, compressed)
        tot_time = timeit.default_timer() - comp_time
        print("writing time", tot_time)
    finally:
        print("")
Ejemplo n.º 22
0
def extract_features(sample, phon_dict, pc_words, prons, freqs, total_freqs,
                     function):
    features = {}
    sylls = utils.read_sample(sample)
    words = utils.read_sample(sample, words=True)
    nwords = sum(len(line) for line in words)

    # average word length
    features['word-length'] = np.mean([len(w) for line in words for w in line])
    # average word length in syllables
    features['word-length-syllables'] = np.mean(
        [len(s) for line in sylls for s in utils.group_syllables(line)])
    # alliteration score
    features['alliteration'] = alliteration_score(words, phon_dict)
    # repetitiveness
    features['stressed-vowel-repetitiveness'] = assonance_entropy(
        words, phon_dict)
    features['word-onset-repetitiveness'] = onset_entropy(words, phon_dict)
    features['word-repetitiveness'] = vocab_entropy(words)
    features['syllable-repetitiveness'] = vocab_entropy(sylls)
    # proportion of pc words in line
    features['pc-words'] = \
        len([w for line in words for w in line if w in pc_words]) / nwords
    features['pronouns'] = \
        len([w for line in words for w in line if w in prons]) / nwords
    # lexical diversity wrt general corpus
    features['unigram-ppl'] = unigram_ppl(words, freqs, total_freqs)
    features['lzw'] = len(' '.join([w for l in words for w in l])) / \
        len(lzw.compress(' '.join([w for l in words for w in l])))
    # syntactic features
    features['nwords'] = nwords
    features['nchars'] = sum(len(w) for line in words for w in line)
    features['nlines'] = len(words)
    sentences = [' '.join(s) for s in utils.read_sample(sample, words=True)]
    for key, val in syntactic_features.get_features(sentences).items():
        features[key] = val
    # flow features
    for key, val in rhyme_features.get_features(words, phon_dict).items():
        features[key] = val
    features['assonance'] = assonance(words, phon_dict, function)
    features['repeated-words'] = repeated_words(words)

    return features
Ejemplo n.º 23
0
    ce_bytes = []
    for ce in collation_elements:
        x = '0' + ''.join(ce[0])
        if ce[0] != ('', ):
            x += '0' * (4 - len(ce[0])) * 2
        lzw.add_int(ce_bytes, int(x, 16))
        x = '0' + ''.join(ce[1])
        if ce[1] != ('', ):
            x += '0' * (2 - len(ce[1])) * 2
        lzw.add_short(ce_bytes, int(x, 16))
        x = '0' + ''.join(ce[2])
        if ce[2] != ('', ):
            x += '0' * (2 - len(ce[2])) * 2
        lzw.add_short(ce_bytes, int(x, 16))
    compressed_ces = lzw.compress(ce_bytes)

    def values_to_lines(values, value_type, values_per_chunk):
        retval = ''
        chunk_form = '''\
#ifdef _MSC_VER
{{
    std::array<{0}, {1}> values {{{{
#endif
    {2}
#ifdef _MSC_VER
    }}}};
    it = std::copy(values.begin(), values.end(), it);
}}
#endif
'''
Ejemplo n.º 24
0
def compress():
    lzw.compress(
        r'D:\workspace.python\data-compressor\files\wap.txt',
        r'D:\workspace.python\data-compressor\files\wap_compressed.txt')
Ejemplo n.º 25
0
def lzwEncode(stream, parameters):
	encodedStream = ''
	if parameters == None or parameters == {}:
		try:
			generator = lzw.compress(stream)
			for c in generator:
				encodedStream += c
			return (0,encodedStream)
		except:
			return (-1,'Error compressing string')
	else:
		if parameters.has_key('/Predictor'):
			predictor = parameters['/Predictor'].getRawValue()
		else:
			predictor = None
		if parameters.has_key('/Columns'):
			columns = parameters['/Columns'].getRawValue()
		else:
			columns = None
		if parameters.has_key('/Colors'):
			colors = parameters['/Colors'].getRawValue()
		else:
			colors = None
		if parameters.has_key('/BitsPerComponent'):
			bits = parameters['/BitsPerComponent'].getRawValue()
		else:
			bits = None
		if predictor != None and predictor != 1:
			# PNG prediction:
			if predictor >= 10 and predictor <= 15:
				output = ''
				# PNG prediction can vary from row to row
				for row in xrange(len(stream) / columns):
					rowdata = [ord(x) for x in stream[(row*columns):((row+1)*columns)]]
					filterByte = predictor - 10
					rowdata = [filterByte]+rowdata
					if filterByte == 0:
						pass
					elif filterByte == 1:
						for i in range(len(rowdata)-1,1,-1):
							if rowdata[i] < rowdata[i-1]:
								rowdata[i] = rowdata[i] + 256 - rowdata[i-1]
							else:
								rowdata[i] = rowdata[i] - rowdata[i-1]
					elif filterByte == 2:
						pass
					else:
						return (-1,'Unsupported parameters')
					output += (''.join([chr(x) for x in rowdata]))
			else:
				# unsupported predictor
				#sys.exit("Unsupported flatedecode predictor %r" % predictor)
				return (-1,'Unsupported parameters')
		else:
			output = stream
		try:
			generator = lzw.compress(output)
			for c in generator:
				encodedStream += c
			return (0,encodedStream)
		except:
			return (-1,'Error decompressing string')
Ejemplo n.º 26
0
 def encode(self, data):
     assert self.getParams()['EarlyChange'] == 1
     assert self.getParams()['Predictor'] == 1
     return ''.join(lzw.compress(data))
Ejemplo n.º 27
0
def compressToOutputFile(input_file, output_file_name, option):
    print ("fileName: " + output_file_name)
    print ("option: " + str(option))
    global tupleList
    global bitStream
    if option == 1:
        file = open(output_file_name, 'w')
        file.write(open(input_file,'r').read())
        file.close()
        print "Size of input file in bytes: "
        print_file_size(input_file)
        print "Size of output file ("+ output_file_name +") in bytes: "
        print_file_size(output_file_name)

    if option == 2:
        # read the whole input file into a byte array
        fileSize = os.path.getsize(str(os.path.abspath((input_file))))
        fi = open(input_file, 'rb')
        # byteArr = map(ord, fi.read(fileSize))
        byteArr = bytearray(fi.read(fileSize))
        fi.close()
        fileSize = len(byteArr)
        print "Size of input file in bytes: ", fileSize

         # calculate the total number of each byte value in the file
        freqList = [0] * 256
        for b in byteArr:
            freqList[b] += 1

        # create a list of (frequency, byteValue, encodingBitStr) tuples
        tupleList = []
        for b in range(256):
            if freqList[b] > 0:
                tupleList.append((freqList[b], b, ''))

        # sort the list according to the frequencies descending
        tupleList = sorted(tupleList, key=lambda tup: tup[0], reverse = True)

        shannon_fano_encoder(0, len(tupleList) - 1)
        # print 'The list of (frequency, byteValue, encodingBitStr) tuples:'
        # print tupleList
        # print

        # create a dictionary of byteValue : encodingBitStr pairs
        dic = dict([(tup[1], tup[2]) for tup in tupleList])
        del tupleList # unneeded anymore
        # print 'The dictionary of byteValue : encodingBitStr pairs:'
        # print dic

        # write a list of (byteValue,3-bit(len(encodingBitStr)-1),encodingBitStr)
        # tuples as the compressed file header
        bitStream = ''
        fo = open(output_file_name, 'wb')
        fo.write(chr(len(dic) - 1)) # first write the number of encoding tuples
        for (byteValue, encodingBitStr) in dic.iteritems():
            # convert the byteValue into 8-bit and send to be written into file
            bitStr = bin(byteValue)
            bitStr = bitStr[2:] # remove 0b
            bitStr = '0' * (8 - len(bitStr)) + bitStr # add 0's if needed for 8 bits
            byteWriter(bitStr, fo)
            # convert len(encodingBitStr) to 3-bit and send to be written into file
            bitStr = bin(len(encodingBitStr) - 1) # 0b0 to 0b111
            bitStr = bitStr[2:] # remove 0b
            bitStr = '0' * (3 - len(bitStr)) + bitStr # add 0's if needed for 3 bits
            byteWriter(bitStr, fo)
            # send encodingBitStr to be written into file
            byteWriter(encodingBitStr, fo)

        # write 32-bit (input file size)-1 value
        bitStr = bin(fileSize - 1)
        bitStr = bitStr[2:] # remove 0b
        bitStr = '0' * (32 - len(bitStr)) + bitStr # add 0's if needed for 32 bits
        byteWriter(bitStr, fo)

        # write the encoded data
        for b in byteArr:
            byteWriter(dic[b], fo)

        byteWriter('0' * 8, fo) # to write the last remaining bits (if any)
        fo.close()

        print "Size of compressed putput file ("+ output_file_name +") in bytes: "
        print_file_size(output_file_name)

    if option == 3:

        print "Size of input file in bytes: "
        print_file_size(input_file)
        mybytes = lzw.readbytes(input_file)
        lessbytes = lzw.compress(mybytes)
        lzw.writebytes(output_file_name, lessbytes)
        print "Size of compressed putput file ("+ output_file_name +") in bytes: "
        print_file_size(output_file_name)

    if option == 4:

        print "Size of input file in bytes: "
        print_file_size(input_file)
        ar = arcode.ArithmeticCode(False)
        ar.encode_file(input_file, output_file_name)
        print "Size of compressed putput file ("+ output_file_name +") in bytes: "
        print_file_size(output_file_name)
Ejemplo n.º 28
0
def test_compress_decompress_2():
    s = "rererere"
    cmp_s, _, dico = compress(s)
    res = decompress(cmp_s, dico)
    assert res == s
Ejemplo n.º 29
0
def lzwEncode(stream, parameters):
    encodedStream = ''
    if parameters == None or parameters == {}:
        try:
            generator = lzw.compress(stream)
            for c in generator:
                encodedStream += c
            return (0, encodedStream)
        except:
            return (-1, 'Error compressing string')
    else:
        if parameters.has_key('/Predictor'):
            predictor = parameters['/Predictor'].getRawValue()
        else:
            predictor = None
        if parameters.has_key('/Columns'):
            columns = parameters['/Columns'].getRawValue()
        else:
            columns = None
        if parameters.has_key('/Colors'):
            colors = parameters['/Colors'].getRawValue()
        else:
            colors = None
        if parameters.has_key('/BitsPerComponent'):
            bits = parameters['/BitsPerComponent'].getRawValue()
        else:
            bits = None
        if predictor != None and predictor != 1:
            # PNG prediction:
            if predictor >= 10 and predictor <= 15:
                output = ''
                # PNG prediction can vary from row to row
                for row in xrange(len(stream) / columns):
                    rowdata = [
                        ord(x)
                        for x in stream[(row * columns):((row + 1) * columns)]
                    ]
                    filterByte = predictor - 10
                    rowdata = [filterByte] + rowdata
                    if filterByte == 0:
                        pass
                    elif filterByte == 1:
                        for i in range(len(rowdata) - 1, 1, -1):
                            if rowdata[i] < rowdata[i - 1]:
                                rowdata[i] = rowdata[i] + 256 - rowdata[i - 1]
                            else:
                                rowdata[i] = rowdata[i] - rowdata[i - 1]
                    elif filterByte == 2:
                        pass
                    else:
                        return (-1, 'Unsupported parameters')
                    output += (''.join([chr(x) for x in rowdata]))
            else:
                # unsupported predictor
                #sys.exit("Unsupported flatedecode predictor %r" % predictor)
                return (-1, 'Unsupported parameters')
        else:
            output = stream
        try:
            generator = lzw.compress(output)
            for c in generator:
                encodedStream += c
            return (0, encodedStream)
        except:
            return (-1, 'Error decompressing string')
Ejemplo n.º 30
0
def write_tiff(filename, data):
    """
    expects data to be a 3-dimensional numpy array (height, width, channels)
    of type numpy.float32
    """
    assert len(data.shape) == 3
    height, width, nrchannels = data.shape
    assert nrchannels == 4
    ROWSPERSTRIP = 32
    FIRSTSTRIP = 8
    BITSPERSAMPLE = 32
    stripoffsets = []
    stripbytecounts = []
    directory = {
        "width": (width, VT_SHORT),
        "height": (height, VT_SHORT),
        "bitspersample": (nrchannels * [BITSPERSAMPLE], VT_SHORT),
        "compression": (COMPRESSION_LZW, VT_SHORT),
        "photometric": (PHOTOMETRIC_RGB, VT_SHORT),
        "stripoffsets": (stripoffsets, VT_LONG),
        "orientation": (1, VT_SHORT),
        "samplesperpixel": (nrchannels, VT_SHORT),
        "rowsperstrip": (ROWSPERSTRIP, VT_SHORT),
        "stripbytecounts": (stripbytecounts, VT_LONG),
        "planarconfig": (1, VT_SHORT),
        "xposition": ((0, 1), VT_RATIONAL),
        "yposition": ((0, 1), VT_RATIONAL),
        "datetime": ("some time long ago", VT_ASCII),
        "predictor": (PREDICTOR_FLOAT, VT_SHORT),
        "extrasamples": (EXTRASAMPLES_ALPHA, VT_SHORT),
        "sampleformat": (nrchannels * [SAMPLEFORMAT_FLOAT], VT_SHORT),
        "xml": ("dontcare", VT_BYTE)
    }
    nrstrips = int(math.ceil(float(height) / ROWSPERSTRIP))
    stripstart = FIRSTSTRIP
    stripdata = []
    for stripnr in range(nrstrips):
        nrrows = min(height - ROWSPERSTRIP * stripnr, ROWSPERSTRIP)
        bytespersample = BITSPERSAMPLE / 8

        stripstring = data[stripnr * ROWSPERSTRIP:][:ROWSPERSTRIP].tostring()
        stripbytes = numpy.fromstring(stripstring, dtype=numpy.uint8)
        # reverse the thing we do in reading
        cumsummedstrip = (stripbytes.reshape(
            (nrrows, width * nrchannels, bytespersample))[:, :, ::-1].
            transpose(0, 2, 1))
        reshapedcumsummedstrip = cumsummedstrip.reshape(
            (nrrows, width * bytespersample, nrchannels))
        # now the second step is slightly more complex than in the read-case
        diffstrip = numpy.diff(reshapedcumsummedstrip, axis=1)
        # because the diffstrip only contains diffs, not the starting value
        # so we have to re-attach the staring column
        predictedstrip = numpy.concatenate((reshapedcumsummedstrip[:, 0:1, :],
                                            diffstrip), axis=1).tostring()

        compressedstrip = lzw.compress(predictedstrip)
        stripoffsets.append(stripstart)
        stripbytecounts.append(len(compressedstrip))
        stripstart += len(compressedstrip)
        stripdata.append(compressedstrip)

    log.debug("Found strips of sizes: %s", repr(stripbytecounts))

    with open(filename, "w+b") as f:
        f.write(TIFF_HEADER)
        directorystart = stripstart
        write_uint32(f, directorystart)
        # pad.... Not sure if we need the padding at all or can just have the
        # first strip start at position 8....
        while f.tell() < FIRSTSTRIP:
            f.write('\x00')
        for stripstring in stripdata:
            f.write(stripstring)
        assert f.tell() == directorystart
        write_uint16(f, len(directory))
        extradatastart = (directorystart + 2 +
                          DIRECTORY_ENTRY_LENGTH * len(directory) +
                          len(END_OF_DIRECTORY_PADDING))
        extradata = ""

        assert len(directory) == len(FIELD)
        for info in FIELD:
            tagname, tag = info[:2]
            assert tagname in directory

            value = directory[tagname][0]
            vt_type = directory[tagname][1]
            write_uint16(f, tag)
            write_uint16(f, vt_type)

            if isinstance(value, list):
                values = value
            else:
                values = [value]

            if vt_type == VT_BYTE:
                towrite = value
            elif vt_type == VT_ASCII:
                towrite = "\x00".join(values) + "\x00"
            elif vt_type in [VT_SHORT, VT_LONG]:
                packformat = "<" + len(values) * VALUETYPE[vt_type][0]
                towrite = struct.pack(packformat, *values)
            else:
                assert vt_type == VT_RATIONAL
                packformat = "<" + len(values) * VALUETYPE[vt_type][0]
                topack = sum(values, ())
                towrite = struct.pack(packformat, *topack)

            length = len(towrite) / VALUETYPE[vt_type][1]
            write_uint32(f, length)

            if len(towrite) > 4:
                pointer = extradatastart + len(extradata)
                write_uint32(f, pointer)
                extradata += towrite
            else:
                f.write((towrite + 4 * '\x00')[:4])
        f.write(END_OF_DIRECTORY_PADDING)
        assert f.tell() == extradatastart
        if extradata:
            f.write(extradata)
Ejemplo n.º 31
0
import lzw

mybytes = lzw.readbytes("ElQuijote.txt")
lessbytes = lzw.compress(mybytes)

outFile = open("Compressed.txt", 'wb')
outFile.write(b"".join(lessbytes))
outFile.close()

newbytes = b"".join(lzw.decompress(lessbytes))
oldbytes = b"".join(lzw.readbytes("ElQuijote.txt"))

print(oldbytes == newbytes)
Ejemplo n.º 32
0
def test_compress_decompress_5():
    s = "abcd*dccacbdda*aaddcba*"
    cmp_s, _, dico = compress(s)
    res = decompress(cmp_s, dico)
    assert res == s
Ejemplo n.º 33
0
def approximate_KC_string(x):

    compressed_string = compress(x)
    total_bits = calculate_bits(compressed_string)
    return total_bits
Ejemplo n.º 34
0
import lzw

print("Compressing text...")
print()
compressed = lzw.compress('darth_plagueis.txt')
print("Compressed text: ")
print(compressed)
print()

print("Decompressing text...")
lzw.decompress(compressed, 'darth_plagueis_out.txt')
Ejemplo n.º 35
0
    def determinRepetition(self, text):
        compressed = lzw.compress(text)
        sbytes     = str.encode(text.encode('utf8'))
        ratio      = float(len("".join(sbytes))) / float(len("".join(compressed)))

        return ratio
Ejemplo n.º 36
0
import lzw

infile = lzw.readbytes("3_1.spc")
compressed = lzw.compress(infile)
lzw.writebytes("3_1.spc.compressed", compressed)
infile.close()

infile = lzw.readbytes("3_1.spc.compressed")
uncompressed = lzw.decompress(infile)
lzw.writebytes("3_1.spc.decompressed", uncompressed)
infile.close()
Ejemplo n.º 37
0
        nfkc_quick_check = 'quick_check::yes'
        if cp in quick_check_maps['NFKC']:
            nfkc_quick_check = quick_check_maps['NFKC'][cp]
        lzw.add_cp(prop_bytes_, cp)
        lzw.add_short(prop_bytes_, canonical_decomp[0])
        lzw.add_short(prop_bytes_, canonical_decomp[1])
        lzw.add_short(prop_bytes_, compatible_decomp[0])
        lzw.add_short(prop_bytes_, compatible_decomp[1])
        lzw.add_byte(prop_bytes_, int(ccc))
        lzw.add_byte(prop_bytes_, \
                     quick_checks_to_byte(nfd_quick_check, nfkd_quick_check))
        lzw.add_byte(prop_bytes_, \
                     quick_checks_to_byte(nfc_quick_check, nfkc_quick_check))

    value_per_line = 12
    compressed_bytes = lzw.compress(prop_bytes_)
    props_lines, num_shorts = lzw.compressed_bytes_to_lines(
        compressed_bytes, value_per_line)
    #print 'rewrote {} * 144 = {} bits as {} * 8 = {} bits'.format(len(all_cps), len(all_cps)*144, len(prop_bytes_), len(prop_bytes_)*8)
    #print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16)

    cpp_file = open('normalization_data_cp_props.cpp', 'w')
    cpp_file.write(
        cp_props_file_form.format(canon_all_cps_string,
                                  len(canon_all_cps), compat_all_cps_string,
                                  len(compat_all_cps), props_lines, num_shorts,
                                  len(all_cps)))


def cps_string(cps):
    cps = map(lambda x: hex(x)[2:], cps)
Ejemplo n.º 38
0
def test_compress_decompress_4():
    s = "pourquoi pas"
    cmp_s, _, dico = compress(s)
    res = decompress(cmp_s, dico)
    assert res == s
Ejemplo n.º 39
0
def test_compress_decompress_1():
    s = "ab*cde*fgh*"
    cmp_s, _, dico = compress(s)
    res = decompress(cmp_s, dico)
    assert res == s
Ejemplo n.º 40
0
def test_compress_decompress_3():
    s = "coucou"
    cmp_s, _, dico = compress(s)
    res = decompress(cmp_s, dico)
    assert res == s
Ejemplo n.º 41
0
def comprimir_archivos(origen, destino):	
	for archivo in os.listdir(origen):		
		comprimir = lzw.readbytes(os.path.abspath(origen + "/" + archivo))
		out = lzw.compress(comprimir)
		archivo_destino = os.path.abspath(destino + "/" + archivo + ".compressed")
		lzw.writebytes(archivo_destino, out)
Ejemplo n.º 42
0
 def encode(self, data):
     assert self.getParams()['EarlyChange']==1
     assert self.getParams()['Predictor']==1
     return ''.join(lzw.compress(data))
Ejemplo n.º 43
0
def approximate_KC_concat(x, y):

    concat = x + y
    compressed_string = compress(concat)
    total_bits = calculate_bits(compressed_string)
    return total_bits
Ejemplo n.º 44
0
 def encode(data):
     assert self.getParams()["EarlyChange"] == 1
     assert self.getParams()["Predictor"] == 1
     return lzw.compress(data)