def test_compressed_runs_bit_array_rank_zero(bb: bytes) -> None:
    assume(len(bb) % 8 == 0)

    bits = bitarray()
    bits.frombytes(bb)
    crba = CompressedRunsBitArray(bits)

    for i in range(len(bits)):
        assert crba.rank_zero(i) == sum(1 - int(b) for b in bits[0:(i + 1)])
def test_compressed_runs_bit_array_select_zero(bb: bytes) -> None:
    assume(len(bb) % 8 == 0)

    bits = bitarray()
    bits.frombytes(bb)
    crba = CompressedRunsBitArray(bits)

    select_zero_answers: List[int] = []
    for i in range(len(bits)):
        if not bits[i]:
            select_zero_answers.append(i)

    for i, pos in enumerate(select_zero_answers):
        assert crba.select_zero(i) == pos
def test_compressed_runs_bit_array_getitem(bb: bytes) -> None:
    assume(len(bb) % 8 == 0)

    bits = bitarray()
    bits.frombytes(bb)
    crba = CompressedRunsBitArray(bits)

    for i in range(len(bits)):
        assert crba[i] == bits[i]
def test_compressed_runs_bit_array_select_example_1b() -> None:
    bits = bitarray('11110000000011010000')
    crba = CompressedRunsBitArray(bits)

    assert crba.select_zero(0) == 4
    assert crba.select_zero(1) == 5
    assert crba.select_zero(2) == 6
    assert crba.select_zero(3) == 7
    assert crba.select_zero(4) == 8
    assert crba.select_zero(5) == 9
    assert crba.select_zero(6) == 10
    assert crba.select_zero(7) == 11
    assert crba.select_zero(8) == 14
    assert crba.select_zero(9) == 16
    assert crba.select_zero(10) == 17
    assert crba.select_zero(11) == 18
    assert crba.select_zero(12) == 19
def test_compressed_runs_bit_array_select_example_2a() -> None:
    bits = bitarray('100001111111100101111')
    crba = CompressedRunsBitArray(bits)

    assert crba.select(0) == 0
    assert crba.select(1) == 5
    assert crba.select(2) == 6
    assert crba.select(3) == 7
    assert crba.select(4) == 8
    assert crba.select(5) == 9
    assert crba.select(6) == 10
    assert crba.select(7) == 11
    assert crba.select(8) == 12
    assert crba.select(9) == 15
    assert crba.select(10) == 17
    assert crba.select(11) == 18
    assert crba.select(12) == 19
    assert crba.select(13) == 20
예제 #6
0
    def _build_huffman_tree(self, values: IndexedIntSequence,
                            tree_nodes: List[HuffmanTreeNode]) -> None:
        # Determine the tree topology.
        heapq.heapify(tree_nodes)
        merge_sort_bitarray = bitarray()
        merge_sort_offset = 0
        while len(tree_nodes) > 1:
            x = heapq.heappop(tree_nodes)
            y = heapq.heappop(tree_nodes)
            merged = HuffmanInnerNode(size=len(x) + len(y),
                                      left_child=x,
                                      right_child=y,
                                      merge_sort_offset=merge_sort_offset)

            # Populate the merge sort bitarray's values
            it_left = ((value, False)
                       for value in x.iterator(values, merge_sort_bitarray))
            it_right = ((value, True)
                        for value in y.iterator(values, merge_sort_bitarray))

            for _, b in heapq.merge(it_left, it_right):
                merge_sort_bitarray.append(b)
                merge_sort_offset += 1

            heapq.heappush(tree_nodes, merged)

        # Build a LOUDS representation of the tree topology
        louds = LoudsBinaryTree(root=tree_nodes[0],
                                get_left_child=lambda n: n.left_child
                                if isinstance(n, HuffmanInnerNode) else None,
                                get_right_child=lambda n: n.right_child
                                if isinstance(n, HuffmanInnerNode) else None)

        # The data stored at each node of the tree is:
        # - The offset into a bitarray (if a node is an inner node)
        # - The offset into the original permutation (if a node is a leaf node)
        node_data: List[int] = []
        sizes: List[int] = []
        queue = deque(tree_nodes)
        while queue:
            tree_node = queue.popleft()
            if isinstance(tree_node, HuffmanInnerNode):
                node_data.append(tree_node.merge_sort_offset)
                sizes.append(len(tree_node))
                queue.append(tree_node.left_child)
                queue.append(tree_node.right_child)
            elif isinstance(tree_node, Run):
                node_data.append(tree_node.from_)
                sizes.append(len(tree_node))
            else:
                raise TypeError

        self._louds = louds
        self._node_data = node_data

        # TODO: The choice of "4" lower-order bits here is somewhat arbitrary.
        # Consider passing it in as a constructor parameter.
        self._merge_sort_poppy = CompressedRunsBitArray(merge_sort_bitarray,
                                                        num_lower_bits=4)

        number_of_runs = len(self._run_starts)
        self._run_rank_to_louds_id = [0] * number_of_runs
        for louds_id in range(len(self._node_data)):
            if self._louds.is_leaf(louds_id):
                run_offset = self._node_data[louds_id]
                run_start = self._get_run_start_position(run_offset)
                self._run_rank_to_louds_id[run_start] = louds_id
예제 #7
0
class Permutation:
    def __init__(self, values: IndexedIntSequence) -> None:
        self._size = len(values)
        runs = self._extract_runs(values)
        self._build_huffman_tree(values, runs)

    def _extract_runs(self,
                      values: IndexedIntSequence) -> List[HuffmanTreeNode]:
        run_starts: List[int] = [0]

        for i in range(1, len(values)):
            if values[i] < values[i - 1]:
                run_starts.append(i)
        self._run_starts = run_starts

        runs: List[HuffmanTreeNode] = []
        for i in range(len(run_starts)):
            from_index = run_starts[i]
            until_index = run_starts[
                i + 1] if i + 1 < len(run_starts) else len(values)
            runs.append(Run(from_=from_index, until=until_index))

        return runs

    def _build_huffman_tree(self, values: IndexedIntSequence,
                            tree_nodes: List[HuffmanTreeNode]) -> None:
        # Determine the tree topology.
        heapq.heapify(tree_nodes)
        merge_sort_bitarray = bitarray()
        merge_sort_offset = 0
        while len(tree_nodes) > 1:
            x = heapq.heappop(tree_nodes)
            y = heapq.heappop(tree_nodes)
            merged = HuffmanInnerNode(size=len(x) + len(y),
                                      left_child=x,
                                      right_child=y,
                                      merge_sort_offset=merge_sort_offset)

            # Populate the merge sort bitarray's values
            it_left = ((value, False)
                       for value in x.iterator(values, merge_sort_bitarray))
            it_right = ((value, True)
                        for value in y.iterator(values, merge_sort_bitarray))

            for _, b in heapq.merge(it_left, it_right):
                merge_sort_bitarray.append(b)
                merge_sort_offset += 1

            heapq.heappush(tree_nodes, merged)

        # Build a LOUDS representation of the tree topology
        louds = LoudsBinaryTree(root=tree_nodes[0],
                                get_left_child=lambda n: n.left_child
                                if isinstance(n, HuffmanInnerNode) else None,
                                get_right_child=lambda n: n.right_child
                                if isinstance(n, HuffmanInnerNode) else None)

        # The data stored at each node of the tree is:
        # - The offset into a bitarray (if a node is an inner node)
        # - The offset into the original permutation (if a node is a leaf node)
        node_data: List[int] = []
        sizes: List[int] = []
        queue = deque(tree_nodes)
        while queue:
            tree_node = queue.popleft()
            if isinstance(tree_node, HuffmanInnerNode):
                node_data.append(tree_node.merge_sort_offset)
                sizes.append(len(tree_node))
                queue.append(tree_node.left_child)
                queue.append(tree_node.right_child)
            elif isinstance(tree_node, Run):
                node_data.append(tree_node.from_)
                sizes.append(len(tree_node))
            else:
                raise TypeError

        self._louds = louds
        self._node_data = node_data

        # TODO: The choice of "4" lower-order bits here is somewhat arbitrary.
        # Consider passing it in as a constructor parameter.
        self._merge_sort_poppy = CompressedRunsBitArray(merge_sort_bitarray,
                                                        num_lower_bits=4)

        number_of_runs = len(self._run_starts)
        self._run_rank_to_louds_id = [0] * number_of_runs
        for louds_id in range(len(self._node_data)):
            if self._louds.is_leaf(louds_id):
                run_offset = self._node_data[louds_id]
                run_start = self._get_run_start_position(run_offset)
                self._run_rank_to_louds_id[run_start] = louds_id

    def _get_run_start_position(self, run_id: int) -> int:
        low = 0
        high = len(self._run_starts) - 1

        while low <= high:
            mid = (low + high) >> 1
            mid_val = self._run_starts[mid]
            if mid_val < run_id:
                low = mid + 1
            elif mid_val > run_id:
                high = mid - 1
            else:
                break

        if low > high:
            mid = high
        return mid

    def __getitem__(self, key: int) -> int:
        """
        Retrieve the i'th element of this permutation.
        """
        """
        Start at the leaves. Find the index of the current key `k` in the current
        node. Now, consider the parent node:
            - If the current node is the left child, get the index of the `k`'th
              zero in the parent node via "select_zero".

            - If the current node is the right child, get the index of the `k`th
            one in the parent node via "select".

        Repeat until the current node is the root, and return `k`.
        """
        if not (0 <= key < self._size):
            raise IndexError(f"Index out of bounds: {key}")
        run_rank = self._get_run_start_position(key)
        current_node = self._run_rank_to_louds_id[run_rank]
        key = key - self._node_data[current_node]

        while True:
            parent = self._louds.get_parent(current_node)
            if parent is None:
                return key
            else:
                parent_offset = self._node_data[parent]
                if self._louds.get_left_child(parent) == current_node:
                    # # get the index of the `k`'th zero in the parent node
                    parent_rank_zero = (
                        0 if parent_offset == 0 else
                        self._merge_sort_poppy.rank_zero(parent_offset - 1))
                    key = self._merge_sort_poppy.select_zero(
                        key + parent_rank_zero) - parent_offset
                    current_node = parent
                else:
                    # get the index of the `k`th one in the parent node
                    parent_rank = (0 if parent_offset == 0 else
                                   self._merge_sort_poppy.rank(parent_offset -
                                                               1))
                    key = self._merge_sort_poppy.select(
                        key + parent_rank) - parent_offset
                    current_node = parent

    def index_of(self, value: int) -> int:
        """
        Retrieve the index of the given value within this permutation. (This is
        the inverse of the permutation.)
        """
        """
        Start at the root.  Look up the bit at position `value`. Calculate either
        the rank_1 or rank_0 of that position, depending on whether it's 1 or 0.
        Recurse to either the left or right child, depending on that value.
        """
        current_node = self._louds.get_root()
        while True:
            if self._louds.is_leaf(current_node):
                return self._node_data[current_node] + value
            else:
                offset = self._node_data[current_node]
                bit_value = self._merge_sort_poppy[offset + value]

                if not bit_value:
                    value = (self._merge_sort_poppy.rank_zero(offset + value) -
                             (0 if offset == 0 else self._merge_sort_poppy.
                              rank_zero(offset - 1))) - 1
                    child = self._louds.get_left_child(current_node)
                else:
                    value = (self._merge_sort_poppy.rank(offset + value) -
                             (0 if offset == 0 else
                              self._merge_sort_poppy.rank(offset - 1))) - 1
                    child = self._louds.get_right_child(current_node)
                assert child is not None
                current_node = child
def test_compressed_runs_bit_array_rank_example_2b() -> None:
    bits = bitarray('011110000000011010000')
    crba = CompressedRunsBitArray(bits)

    assert crba.rank_zero(0) == 1
    assert crba.rank_zero(1) == 1
    assert crba.rank_zero(2) == 1
    assert crba.rank_zero(3) == 1
    assert crba.rank_zero(4) == 1
    assert crba.rank_zero(5) == 2
    assert crba.rank_zero(6) == 3
    assert crba.rank_zero(7) == 4
    assert crba.rank_zero(8) == 5
    assert crba.rank_zero(9) == 6
    assert crba.rank_zero(10) == 7
    assert crba.rank_zero(11) == 8
    assert crba.rank_zero(12) == 9
    assert crba.rank_zero(13) == 9
    assert crba.rank_zero(14) == 9
    assert crba.rank_zero(15) == 10
    assert crba.rank_zero(16) == 10
    assert crba.rank_zero(17) == 11
    assert crba.rank_zero(18) == 12
    assert crba.rank_zero(19) == 13
    assert crba.rank_zero(20) == 14
def test_compressed_runs_bit_array_rank_example_1a() -> None:
    bits = bitarray('00001111111100101111')
    crba = CompressedRunsBitArray(bits)

    assert crba.rank(0) == 0
    assert crba.rank(1) == 0
    assert crba.rank(2) == 0
    assert crba.rank(3) == 0
    assert crba.rank(4) == 1
    assert crba.rank(5) == 2
    assert crba.rank(6) == 3
    assert crba.rank(7) == 4
    assert crba.rank(8) == 5
    assert crba.rank(9) == 6
    assert crba.rank(10) == 7
    assert crba.rank(11) == 8
    assert crba.rank(12) == 8
    assert crba.rank(13) == 8
    assert crba.rank(14) == 9
    assert crba.rank(15) == 9
    assert crba.rank(16) == 10
    assert crba.rank(17) == 11
    assert crba.rank(18) == 12
    assert crba.rank(19) == 13