Beispiel #1
0
def compress(data: bytes):
    rdr = BinaryReader(data)
    wtr = BinaryWriter()

    wtr.write_uint8(0x30)  # rle identifier

    wtr.write_uint24(len(data) if len(data) < 0xffffff else 0)
    if len(data) > 0xffffff:
        wtr.write_uint32(len(data))

    repCount = 1
    currentBlockLenght = 0
    dataBlock = [0 for _ in range(130)]
    while rdr.c < len(rdr.data):
        foundRepetition = False
        while (currentBlockLenght < 130 and rdr.c < len(rdr.data)):
            nextByte = rdr.read_uint8()
            dataBlock[currentBlockLenght] = nextByte
            currentBlockLenght += 1
            if (currentBlockLenght > 1):
                if nextByte == dataBlock[currentBlockLenght - 2]:
                    repCount += 1
                else:
                    repCount = 1
            foundRepetition = repCount > 2
            if foundRepetition:
                break
        if foundRepetition:
            numUncompToCopy = currentBlockLenght - 3
        else:
            numUncompToCopy = min(currentBlockLenght, 130 - 2)

        if numUncompToCopy > 0:
            flag = numUncompToCopy - 1
            wtr.write_uint8(flag)
            for i in range(numUncompToCopy):
                wtr.write_uint8(dataBlock[i])
            for i in range(numUncompToCopy, currentBlockLenght):
                dataBlock[i - numUncompToCopy] = dataBlock[i]
            currentBlockLenght -= numUncompToCopy
        if foundRepetition:
            while currentBlockLenght < 130 and rdr.c < len(rdr.data):
                nextByte = rdr.read_uint8()
                dataBlock[currentBlockLenght] = nextByte
                currentBlockLenght += 1
                if nextByte != dataBlock[0]:
                    break
                else:
                    repCount += 1
            flag = 0x80 | (repCount - 3)
            wtr.write_uint8(flag)
            wtr.write_uint8(dataBlock[0])
            if (repCount != currentBlockLenght):
                dataBlock[0] = dataBlock[currentBlockLenght - 1]
            currentBlockLenght -= repCount

    if currentBlockLenght > 0:
        flag = currentBlockLenght - 1
        wtr.write_uint8(flag)
        for i in range(currentBlockLenght):
            wtr.write_uint8(dataBlock[i])
        currentBlockLenght = 0

    return wtr.data
def compress(input_data: bytes, datablock_size=None) -> bytes:
    if datablock_size is None:
        # Return the smallest we can
        return min(compress(input_data, 4),
                   compress(input_data, 8),
                   key=lambda x: len(x))

    assert datablock_size in [4, 8]

    wtr = BinaryWriter()

    wtr.write_uint8(0x20 | datablock_size)  # huffman identifier

    wtr.write_uint24(len(input_data) if len(input_data) < 0xffffff else 0)
    if len(input_data) > 0xffffff:
        wtr.write_uint32(len(input_data))

    # build frequency table
    frequencies = [0 for _ in range(0x100 if datablock_size == 8 else 0x10)]
    for b in input_data:
        if datablock_size == 8:
            frequencies[b] += 1
        else:
            b0, b1 = b & 0xf, b >> 4
            frequencies[b0] += 1
            frequencies[b1] += 1

    # build the huffman tree
    node_count = 0
    leaf_queue = []
    node_queue = []
    leaves: List[HuffTreeNode] = [
        None for _ in range(0x100 if datablock_size == 8 else 0x10)
    ]
    for i in range(0x10 if datablock_size == 4 else 0x100):
        if frequencies[i] == 0:
            continue
        node = HuffTreeNode(True, data=i)
        leaves[i] = node
        leaf_queue.append((frequencies[i], node))
        node_count += 1

    if len(leaf_queue) < 2:  # Add an unused node to make it posible
        node = HuffTreeNode(True, data=0)
        leaves[0] = node
        leaf_queue.append((1, node))
        node_count += 1

    def take_lowest(
            queue0: List[Tuple[int, HuffTreeNode]],
            queue1: List[Tuple[int,
                               HuffTreeNode]]) -> Tuple[int, HuffTreeNode]:
        if queue0:
            lowest_queue0 = min(queue0, key=lambda x: x[0])
        elif queue1:
            lowest_queue1 = min(queue1, key=lambda x: x[0])
            queue1.remove(lowest_queue1)
            return lowest_queue1
        else:
            raise ValueError("take_lowest() arg are empty sequences")
        if queue1:
            lowest_queue1 = min(queue1, key=lambda x: x[0])
        else:
            queue0.remove(lowest_queue0)
            return lowest_queue0
        if lowest_queue0[0] < lowest_queue1[0]:
            queue0.remove(lowest_queue0)
            return lowest_queue0
        else:
            queue1.remove(lowest_queue1)
            return lowest_queue1

    while len(leaf_queue) + len(node_queue) > 1:
        one_prio, one = take_lowest(leaf_queue, node_queue)
        two_prio, two = take_lowest(leaf_queue, node_queue)

        newnode = HuffTreeNode(False, child0=one, child1=two)
        node_queue.append((one_prio + two_prio, newnode))
        node_count += 1

    root: HuffTreeNode = node_queue[0][1]

    # write the huffman tree
    wtr.write_uint8((node_count - 1) // 2)
    root.to_wtr(wtr)

    datablock = 0
    bits_left = 32
    cached_byte = 0
    for i in range(len(input_data) * (2 if datablock_size == 4 else 1)):
        if datablock_size == 4:
            if i & 1 == 0:
                cached_byte = input_data[i // 2]
                data = cached_byte & 0xf
            else:
                data = cached_byte >> 4
        else:
            data = input_data[i]

        node = leaves[data]
        depth = node.depth
        path: List[bool] = [False for _ in range(depth)]
        for d in range(depth):
            path[depth - d - 1] = node.is_child1
            node = node.parent
        for p in path:
            if bits_left == 0:
                wtr.write_uint32(datablock)
                datablock = 0
                bits_left = 32
            bits_left -= 1
            if p:
                datablock |= 1 << bits_left
    if bits_left != 32:
        wtr.write_uint32(datablock)
    return wtr.getvalue()
Beispiel #3
0
def compress(data: bytes):
    rdr = BinaryReader(data)
    wtr = BinaryWriter()

    wtr.write_uint8(0x30)  # rle identifier

    wtr.write_uint24(len(data) if len(data) < 0xffffff else 0)
    if len(data) > 0xffffff:
        wtr.write_uint32(len(data))

    rep_count = 1
    current_block_length = 0
    data_block = [0 for _ in range(130)]
    while rdr.c < len(rdr.data):
        found_repetition = False
        while current_block_length < 130 and rdr.c < len(rdr.data):
            next_byte = rdr.read_uint8()
            data_block[current_block_length] = next_byte
            current_block_length += 1
            if current_block_length > 1:
                if next_byte == data_block[current_block_length - 2]:
                    rep_count += 1
                else:
                    rep_count = 1
            found_repetition = rep_count > 2
            if found_repetition:
                break
        if found_repetition:
            num_uncomp_to_copy = current_block_length - 3
        else:
            num_uncomp_to_copy = min(current_block_length, 130 - 2)

        if num_uncomp_to_copy > 0:
            flag = num_uncomp_to_copy - 1
            wtr.write_uint8(flag)
            for i in range(num_uncomp_to_copy):
                wtr.write_uint8(data_block[i])
            for i in range(num_uncomp_to_copy, current_block_length):
                data_block[i - num_uncomp_to_copy] = data_block[i]
            current_block_length -= num_uncomp_to_copy
        if found_repetition:
            while current_block_length < 130 and rdr.c < len(rdr.data):
                next_byte = rdr.read_uint8()
                data_block[current_block_length] = next_byte
                current_block_length += 1
                if next_byte != data_block[0]:
                    break
                else:
                    rep_count += 1
            flag = 0x80 | (rep_count - 3)
            wtr.write_uint8(flag)
            wtr.write_uint8(data_block[0])
            if rep_count != current_block_length:
                data_block[0] = data_block[current_block_length - 1]
            current_block_length -= rep_count

    if current_block_length > 0:
        flag = current_block_length - 1
        wtr.write_uint8(flag)
        for i in range(current_block_length):
            wtr.write_uint8(data_block[i])

    return wtr.data