-
Notifications
You must be signed in to change notification settings - Fork 0
/
xfp.py
210 lines (172 loc) · 5.07 KB
/
xfp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#
#
#
#
from huffman import get_huff_symbols
import numpy as np
from bitstring import BitString
def _add_data(toAdd, compressedBytes, nextByte):
# add bytes to compressedBytes bytestring
for i in toAdd:
nextByte += i
if len(nextByte) == 8:
compressedBytes.append(int(nextByte, 2))
nextByte = ''
return nextByte
def tobits(n, nbits=8, signed=False):
# convert the integer n to a bit string of n bits
# if signed then the returned value is 2's comp
if not signed:
return BitString(uint=n, length=nbits).bin
else:
return BitString(int=n, length=nbits).bin
def getsigned(n, nbits=8):
# cast uint n as an nbit int
return BitString(uint=n, length=nbits).int
def minbits(n):
# return the minimum amount of bits
# required to represent integer n
if n < 0:
n = -(n + 1)
n <<= 1
ret = 1
while n != 1 and n != 0:
n >>= 1
ret += 1
return ret
def compressData(data):
# initialize byte array
compressedBytes = []
nextByte = ''
# get symbol table
symbols = get_huff_symbols(data)
# len of symbol table
tableLen = len(symbols)
compressedBytes += ([(tableLen&(0xff<<(8*i)))>>(8*i) for i in range(2)])
# length of key
if min(symbols.keys()) >= 0:
# includes no negative values
signed = False
keybits = minbits(max(symbols.keys()))
nextByte = _add_data(tobits(keybits, nbits=8), compressedBytes, nextByte)
nextByte = _add_data('0', compressedBytes, nextByte)
else:
# includes negative and positive values
signed = True
posbits = minbits(max(symbols.keys()))
negbits = minbits(min(symbols.keys()))
if negbits > posbits:
keybits = negbits
else:
keybits = posbits + 1
nextByte = _add_data(tobits(keybits, nbits=8), compressedBytes, nextByte)
nextByte = _add_data('1', compressedBytes, nextByte)
# save symbol table
for key, value in symbols.items():
nextByte = _add_data(tobits(key, nbits=keybits, signed=signed), compressedBytes, nextByte)
nextByte = _add_data(tobits(len(value), nbits=8), compressedBytes, nextByte)
nextByte = _add_data(value, compressedBytes, nextByte)
# save data np shape
nextByte = _add_data(tobits(len(data.shape), nbits=8), compressedBytes, nextByte)
for dim in data.shape:
nextByte = _add_data(tobits(dim, nbits=64), compressedBytes, nextByte)
# save number of symbols as 64 bit integer
nextByte = _add_data(tobits(data.size, nbits=64), compressedBytes, nextByte)
# compress data
for v in np.nditer(data):
i = int(v)
nextByte = _add_data(symbols[i], compressedBytes, nextByte)
# pad until last byte is full
if nextByte != '':
nextByte += '0'*(8 - len(nextByte))
compressedBytes.append(int(nextByte, 2))
return bytes(compressedBytes)
def decompressData(compressed):
tableLen = compressed[0] + (compressed[1]<<8)
keybits = compressed[2]
bindata = ''
for i in compressed[3:]:
bindata += tobits(i, nbits=8)
if bindata[0] == '0':
signed = False
else:
signed = True
rindex = 1
tindex = 0
table = {}
while tindex < tableLen:
# get table key
symbol = bindata[rindex:(rindex+keybits)]
rindex += keybits
symbol = int(symbol, 2)
if signed:
symbol = getsigned(symbol, keybits)
# get value length
vlen = bindata[rindex:(rindex+8)]
rindex += 8
vlen = int(vlen, 2)
# get value
val = bindata[rindex:(rindex+vlen)]
rindex += vlen
table[val] = symbol
tindex += 1
# get data shape
slen = int(bindata[rindex:rindex+8], 2)
rindex += 8
shape = []
for i in range(slen):
shape.append(int(bindata[rindex:rindex+64], 2))
rindex += 64
shape = tuple(shape)
# get compressed data length
clen = int(bindata[rindex:rindex+64], 2)
rindex += 64
# decompress
if keybits <= 8:
if not signed:
dc_dtype = np.uint8
else:
dc_dtype = np.int8
elif keybits <= 16:
if not signed:
dc_dtype = np.uint16
else:
dc_dtype = np.int16
elif keybits <= 32:
if not signed:
dc_dtype = np.uint32
else:
dc_dtype = np.int32
else:
if not signed:
dc_dtype = np.uint64
else:
dc_dtype = np.int64
# decompress array using symbol table
decompressed = np.zeros(clen, dtype=dc_dtype)
for dcindex in range(clen):
curr = ''
while curr not in table:
curr += bindata[rindex]
rindex += 1
decompressed[dcindex] = table[curr]
return decompressed.reshape(shape)
def main():
testdata = np.zeros((10000), dtype=np.int16)
for i in range(100):
testdata[i] = (random.random()*6553)*random.random()
#testdata = np.array(list(open('../../bible.txt', 'rb').read())).astype(np.uint8)
compressed = compressData(testdata)
decompressed = decompressData(compressed)
iterator = np.nditer(op=[testdata, decompressed], flags=['c_index'])
for truth, test in iterator:
if truth != test:
print('Failure at index ' + str(iterator.index))
return 1
print('Success with compression ratio: ' + str(len(compressed)/testdata.size))
return 0
if __name__ == '__main__':
import numpy as np
import random
import cv2 as cv
main()