def parse_binary_compressed_pc_data(f, dtype, metadata): # compressed size of data (uint32) # uncompressed size of data (uint32) # compressed data # junk fmt = 'II' compressed_size, uncompressed_size =\ struct.unpack(fmt, f.read(struct.calcsize(fmt))) compressed_data = f.read(compressed_size) # TODO what to use as second argument? if buf is None # (compressed > uncompressed) # should we read buf as raw binary? buf = lzf.decompress(compressed_data, uncompressed_size) if len(buf) != uncompressed_size: raise Exception('Error decompressing data') # the data is stored field-by-field pc_data = np.zeros(metadata['width'], dtype=dtype) ix = 0 for dti in range(len(dtype)): dt = dtype[dti] bytes = dt.itemsize * metadata['width'] column = np.fromstring(buf[ix:(ix+bytes)], dt) pc_data[dtype.names[dti]] = column ix += bytes return pc_data
def pure_python_loads(data): "convert a mummy string into the python object it represents" if not data: raise ValueError("no data from which to load") if ord(data[0]) >> 7: if not lzf: raise RuntimeError("can't decompress without python-lzf") kind, ucsize, data = (chr(ord(data[0]) & 0x7F), _load_int(data[1:5])[0], data[5:]) data = kind + lzf.decompress(data, ucsize + 1) return _loads(string(data))[0]
def parseBINARY_COMPRESSED(self): """ BROKEN!!! - There seem to be uncompatiblities with pcl LZF and liblzf""" max_size = 1024**3 # 1GB fs = '<i' compressed_len = struct.unpack('<i', self.file.read(4))[0] decompressed_len = struct.unpack('<i', self.file.read(4))[0] compressed_body = self.file.read(compressed_len) decompressed_body = lzf.decompress(compressed_body, max_size) fobj = io.BytesIO(decompressed_body) self.parseBINARY(fobj)
def lzf_decompress(self, compressed, expected_length): if HAS_PYTHON_LZF: return lzf.decompress(compressed, expected_length) else: in_stream = bytearray(compressed) in_len = len(in_stream) in_index = 0 out_stream = bytearray() out_index = 0 while in_index < in_len : ctrl = in_stream[in_index] if not isinstance(ctrl, int) : raise Exception('lzf_decompress', 'ctrl should be a number %s for key %s' % (str(ctrl), self._key)) in_index = in_index + 1 if ctrl < 32 : for x in range(0, ctrl + 1) : out_stream.append(in_stream[in_index]) #sys.stdout.write(chr(in_stream[in_index])) in_index = in_index + 1 out_index = out_index + 1 else : length = ctrl >> 5 if length == 7 : length = length + in_stream[in_index] in_index = in_index + 1 ref = out_index - ((ctrl & 0x1f) << 8) - in_stream[in_index] - 1 in_index = in_index + 1 for x in range(0, length + 2) : out_stream.append(out_stream[ref]) ref = ref + 1 out_index = out_index + 1 if len(out_stream) != expected_length : raise Exception('lzf_decompress', 'Expected lengths do not match %d != %d for key %s' % (len(out_stream), expected_length, self._key)) return bytes(out_stream)
def read_pcd(filename): """ Reads and pcd file and return the elements as pandas Dataframes. Parameters ---------- filename: str Path to the pcd file. Returns ------- pandas Dataframe. """ data = {} with open(filename, 'rb') as f: header = [] while True: ln = f.readline().strip().decode() header.append(ln) if ln.startswith('DATA'): metadata = parse_header(header) dtype = build_dtype(metadata) break if metadata['data'] == 'ascii': pc_data = np.loadtxt(f, dtype=dtype, delimiter=' ') elif metadata['data'] == 'binary': rowstep = metadata['points'] * dtype.itemsize # for some reason pcl adds empty space at the end of files buf = f.read(rowstep) pc_data = np.fromstring(buf, dtype=dtype) elif metadata['data'] == 'binary_compressed': # compressed size of data (uint32) # uncompressed size of data (uint32) # compressed data # junk fmt = 'II' compressed_size, uncompressed_size =\ struct.unpack(fmt, f.read(struct.calcsize(fmt))) compressed_data = f.read(compressed_size) # TODO what to use as second argument? if buf is None # (compressed > uncompressed) # should we read buf as raw binary? buf = lzf.decompress(compressed_data, uncompressed_size) if len(buf) != uncompressed_size: raise Exception('Error decompressing data') # the data is stored field-by-field pc_data = np.zeros(metadata['width'], dtype=dtype) ix = 0 for dti in range(len(dtype)): dt = dtype[dti] bytes = dt.itemsize * metadata['width'] column = np.fromstring(buf[ix:(ix + bytes)], dt) pc_data[dtype.names[dti]] = column ix += bytes data["points"] = pd.DataFrame(pc_data) return data
print("Uncompressed: {0}".format(len(uncompressed_blobs))) for compressed_blob in compressed_blobs: try: print "Decompress {0}...".format(compressed_blob["compressed_filename"]) if False: decompressor = zlib.decompressobj() cs = hashlib.sha256() with io.FileIO(DUMP_STORAGE_PATH + session_guid + "/" + compressed_blob["compressed_filename"], 'rb') as fd: with io.FileIO(DUMP_STORAGE_PATH + session_guid + "/" + compressed_blob["decompressed_filename"], 'wb') as n_fd: if True: cd_data = fd.readall() dc_data = lzf.decompress(cd_data, 1 * 1024 * 1024 * 1024) n_fd.write(dc_data) cs.update(dc_data) else: for data in iter(lambda: fd.read(8192), ''): dc_data = decompressor.decompress(data) n_fd.write(dc_data) cs.update(dc_data) dc_data = decompressor.flush() n_fd.write(dc_data) cs.update(dc_data)
def test_decompresses_correctly(self): compressed = self.compress(self.VAL) self.assertEqual(lzf.decompress(compressed, len(self.VAL)), self.VAL)
def test_selective(self): compressed = self.compress(self.VAL) self.assertEqual(lzf.decompress(compressed, len(self.VAL) - 1), None) assert lzf.decompress(compressed, len(self.VAL))
def read_pcd(filename): """ Reads and pcd file and return the elements as pandas Dataframes. Parameters ---------- filename: str Path to the obj file. Returns ------- pandas Dataframe. """ data = {} with open(filename, 'rb') as f: header = [] while True: ln = f.readline().strip().decode() header.append(ln) if ln.startswith('DATA'): metadata = parse_header(header) dtype = build_dtype(metadata) break if metadata['data'] == 'ascii': pc_data = np.loadtxt(f, dtype=dtype, delimiter=' ') elif metadata['data'] == 'binary': rowstep = metadata['points'] * dtype.itemsize # for some reason pcl adds empty space at the end of files buf = f.read(rowstep) pc_data = np.fromstring(buf, dtype=dtype) elif metadata['data'] == 'binary_compressed': # compressed size of data (uint32) # uncompressed size of data (uint32) # compressed data # junk fmt = 'II' compressed_size, uncompressed_size =\ struct.unpack(fmt, f.read(struct.calcsize(fmt))) compressed_data = f.read(compressed_size) # TODO what to use as second argument? if buf is None # (compressed > uncompressed) # should we read buf as raw binary? buf = lzf.decompress(compressed_data, uncompressed_size) if len(buf) != uncompressed_size: raise Exception('Error decompressing data') # the data is stored field-by-field pc_data = np.zeros(metadata['width'], dtype=dtype) ix = 0 for dti in range(len(dtype)): dt = dtype[dti] bytes = dt.itemsize * metadata['width'] column = np.fromstring(buf[ix:(ix + bytes)], dt) pc_data[dtype.names[dti]] = column ix += bytes data["points"] = pd.DataFrame(pc_data) return data