def compress(data: bytes): rdr = BinaryReader(data) wtr = BinaryWriter() wtr.write_uint8(0x30) # rle identifier wtr.write_uint24(len(data) if len(data) < 0xffffff else 0) if len(data) > 0xffffff: wtr.write_uint32(len(data)) repCount = 1 currentBlockLenght = 0 dataBlock = [0 for _ in range(130)] while rdr.c < len(rdr.data): foundRepetition = False while (currentBlockLenght < 130 and rdr.c < len(rdr.data)): nextByte = rdr.read_uint8() dataBlock[currentBlockLenght] = nextByte currentBlockLenght += 1 if (currentBlockLenght > 1): if nextByte == dataBlock[currentBlockLenght - 2]: repCount += 1 else: repCount = 1 foundRepetition = repCount > 2 if foundRepetition: break if foundRepetition: numUncompToCopy = currentBlockLenght - 3 else: numUncompToCopy = min(currentBlockLenght, 130 - 2) if numUncompToCopy > 0: flag = numUncompToCopy - 1 wtr.write_uint8(flag) for i in range(numUncompToCopy): wtr.write_uint8(dataBlock[i]) for i in range(numUncompToCopy, currentBlockLenght): dataBlock[i - numUncompToCopy] = dataBlock[i] currentBlockLenght -= numUncompToCopy if foundRepetition: while currentBlockLenght < 130 and rdr.c < len(rdr.data): nextByte = rdr.read_uint8() dataBlock[currentBlockLenght] = nextByte currentBlockLenght += 1 if nextByte != dataBlock[0]: break else: repCount += 1 flag = 0x80 | (repCount - 3) wtr.write_uint8(flag) wtr.write_uint8(dataBlock[0]) if (repCount != currentBlockLenght): dataBlock[0] = dataBlock[currentBlockLenght - 1] currentBlockLenght -= repCount if currentBlockLenght > 0: flag = currentBlockLenght - 1 wtr.write_uint8(flag) for i in range(currentBlockLenght): wtr.write_uint8(dataBlock[i]) currentBlockLenght = 0 return wtr.data
def compress(input_data: bytes, datablock_size=None) -> bytes: if datablock_size is None: # Return the smallest we can return min(compress(input_data, 4), compress(input_data, 8), key=lambda x: len(x)) assert datablock_size in [4, 8] wtr = BinaryWriter() wtr.write_uint8(0x20 | datablock_size) # huffman identifier wtr.write_uint24(len(input_data) if len(input_data) < 0xffffff else 0) if len(input_data) > 0xffffff: wtr.write_uint32(len(input_data)) # build frequency table frequencies = [0 for _ in range(0x100 if datablock_size == 8 else 0x10)] for b in input_data: if datablock_size == 8: frequencies[b] += 1 else: b0, b1 = b & 0xf, b >> 4 frequencies[b0] += 1 frequencies[b1] += 1 # build the huffman tree node_count = 0 leaf_queue = [] node_queue = [] leaves: List[HuffTreeNode] = [ None for _ in range(0x100 if datablock_size == 8 else 0x10) ] for i in range(0x10 if datablock_size == 4 else 0x100): if frequencies[i] == 0: continue node = HuffTreeNode(True, data=i) leaves[i] = node leaf_queue.append((frequencies[i], node)) node_count += 1 if len(leaf_queue) < 2: # Add an unused node to make it posible node = HuffTreeNode(True, data=0) leaves[0] = node leaf_queue.append((1, node)) node_count += 1 def take_lowest( queue0: List[Tuple[int, HuffTreeNode]], queue1: List[Tuple[int, HuffTreeNode]]) -> Tuple[int, HuffTreeNode]: if queue0: lowest_queue0 = min(queue0, key=lambda x: x[0]) elif queue1: lowest_queue1 = min(queue1, key=lambda x: x[0]) queue1.remove(lowest_queue1) return lowest_queue1 else: raise ValueError("take_lowest() arg are empty sequences") if queue1: lowest_queue1 = min(queue1, key=lambda x: x[0]) else: queue0.remove(lowest_queue0) return lowest_queue0 if lowest_queue0[0] < lowest_queue1[0]: queue0.remove(lowest_queue0) return lowest_queue0 else: queue1.remove(lowest_queue1) return lowest_queue1 while len(leaf_queue) + len(node_queue) > 1: one_prio, one = take_lowest(leaf_queue, node_queue) two_prio, two = take_lowest(leaf_queue, node_queue) newnode = HuffTreeNode(False, child0=one, child1=two) node_queue.append((one_prio + two_prio, newnode)) node_count += 1 root: HuffTreeNode = node_queue[0][1] # write the huffman tree wtr.write_uint8((node_count - 1) // 2) root.to_wtr(wtr) datablock = 0 bits_left = 32 cached_byte = 0 for i in range(len(input_data) * (2 if datablock_size == 4 else 1)): if datablock_size == 4: if i & 1 == 0: cached_byte = input_data[i // 2] data = cached_byte & 0xf else: data = cached_byte >> 4 else: data = input_data[i] node = leaves[data] depth = node.depth path: List[bool] = [False for _ in range(depth)] for d in range(depth): path[depth - d - 1] = node.is_child1 node = node.parent for p in path: if bits_left == 0: wtr.write_uint32(datablock) datablock = 0 bits_left = 32 bits_left -= 1 if p: datablock |= 1 << bits_left if bits_left != 32: wtr.write_uint32(datablock) return wtr.getvalue()
def compress(data: bytes): rdr = BinaryReader(data) wtr = BinaryWriter() wtr.write_uint8(0x30) # rle identifier wtr.write_uint24(len(data) if len(data) < 0xffffff else 0) if len(data) > 0xffffff: wtr.write_uint32(len(data)) rep_count = 1 current_block_length = 0 data_block = [0 for _ in range(130)] while rdr.c < len(rdr.data): found_repetition = False while current_block_length < 130 and rdr.c < len(rdr.data): next_byte = rdr.read_uint8() data_block[current_block_length] = next_byte current_block_length += 1 if current_block_length > 1: if next_byte == data_block[current_block_length - 2]: rep_count += 1 else: rep_count = 1 found_repetition = rep_count > 2 if found_repetition: break if found_repetition: num_uncomp_to_copy = current_block_length - 3 else: num_uncomp_to_copy = min(current_block_length, 130 - 2) if num_uncomp_to_copy > 0: flag = num_uncomp_to_copy - 1 wtr.write_uint8(flag) for i in range(num_uncomp_to_copy): wtr.write_uint8(data_block[i]) for i in range(num_uncomp_to_copy, current_block_length): data_block[i - num_uncomp_to_copy] = data_block[i] current_block_length -= num_uncomp_to_copy if found_repetition: while current_block_length < 130 and rdr.c < len(rdr.data): next_byte = rdr.read_uint8() data_block[current_block_length] = next_byte current_block_length += 1 if next_byte != data_block[0]: break else: rep_count += 1 flag = 0x80 | (rep_count - 3) wtr.write_uint8(flag) wtr.write_uint8(data_block[0]) if rep_count != current_block_length: data_block[0] = data_block[current_block_length - 1] current_block_length -= rep_count if current_block_length > 0: flag = current_block_length - 1 wtr.write_uint8(flag) for i in range(current_block_length): wtr.write_uint8(data_block[i]) return wtr.data