예제 #1
0
def test_successor_predecessor_empty_trie():
    t = XFastTrie(max_trie_entry_size)

    with pytest.raises(ValueError):
        t.successor(0)

    with pytest.raises(ValueError):
        t.predecessor(0)
예제 #2
0
def test_successor(entries, test_values):
    t = XFastTrie(max_trie_entry_size)

    for entry in entries:
        t += entry

    for val in test_values:
        succ = t > val

        if succ is not None:
            assert succ > val
            succ = t.successor(val)

            if succ.pred is not None:
                assert succ.pred.value <= val
예제 #3
0
class YFastTrie(object):
    @staticmethod
    def _calculate_representative(value: int, max_length: int) -> int:
        """
		Calculate the smallest value that would exist in _partitions
		the given value could belong to

		:param val: The value to calculate the representative for
		:param max_length: The bit length of the largest possible representative

		:return: The closest possible representative to the given number
		"""
        result = min(max_length * (value // max_length) + (-1 % max_length),
                     2**max_length - 1)
        return cast(int, result)

    @staticmethod
    def _merge_subtrees(
            left_tree: SortedList, right_tree: SortedList,
            max_size: int) -> Tuple[SortedList, Optional[SortedList]]:
        """
		Combine the elements of two trees into one larger tree,
		splitting them again if the larger tree exceeds a given size

		:param left_tree: Tree containing smaller elements
		:param right_tree: Tree containing larger elements
		:param max_size: Maximum size the combined tree can be before splitting
		:return: The combined tree and None if both trees' elements are
				 less than max_size, the tree with the smaller elements
				 and the tree with the larger elements otherwise
		"""
        if len(left_tree) + len(right_tree) <= max_size:
            left_tree.update(right_tree)
            result = (left_tree, None)
        else:
            total_median = (len(left_tree) + len(right_tree)) // 2 - 1
            if total_median < len(left_tree):
                total_median = left_tree[total_median]
            else:
                total_median = right_tree[total_median - len(left_tree)]

            # Taking advantage of the fact the number of elements in a subtree can only
            # be twice the bit length of the maximum element before splitting
            median_rep = YFastTrie._calculate_representative(
                total_median, max_size // 2)

            if median_rep <= max(left_tree):
                from_tree = left_tree
                to_tree = right_tree
                side = -1
            else:
                from_tree = right_tree
                to_tree = left_tree
                side = 0

            while max(left_tree) > median_rep or min(right_tree) <= median_rep:
                to_tree.add(from_tree.pop(side))

            result = (left_tree, right_tree)

        return result

    @staticmethod
    def _split_subtree(tree: SortedList,
                       max_length: int) -> Tuple[SortedList, SortedList]:
        """
		Split a tree by its median element into two smaller trees

		:param tree: The tree to split
		:param max_length: The size of the largest possible element in the trie in bits
		:return: The tree with the smaller elements,
				 and the tree with the larger elements
		"""
        median = tree.bisect_right(
            YFastTrie._calculate_representative(tree[len(tree) // 2],
                                                max_length))
        return SortedList(tree.islice(stop=median)), SortedList(
            tree.islice(start=median))

    def clear(self) -> None:
        """
		Remove all values from the trie and return it to its starting state
		"""
        self._count = 0
        self._max: Optional[int] = None
        self._min: Optional[int] = None
        self._partitions = XFastTrie(self._maxlen)
        self._subtrees = HopscotchDict()

    def _get_value_subtree(
        self,
        value: int,
        create_subtree: bool = False
    ) -> Tuple[Optional[SortedList], Optional["TrieNode"]]:
        """
		Find the subtree that would hold the given value

		:param value: The value to find
		:param create_subtree: If there is no subtree that would hold the given value,
							   create one
		:return: The subtree that potentially holds the given value,
				 and its corresponding representative
		"""
        result = None

        if self._count == 0:
            rep_node = None
        elif value <= cast(int, self._min) or self._min is None:
            rep_node = self._partitions.min_node
        else:
            # As the X-fast trie looks for strict successors,
            # if the value being searched for is a representative,
            # the wrong representative will be returned if the one being searched for
            # is not the largest, and no representative will be returned at all if it is;
            # so subtract one before searching for the successor
            rep_node = self._partitions.successor(value - 1)

        if rep_node is None:
            if create_subtree:
                rep = self._calculate_representative(value, self._maxlen)
                self._partitions += rep
                rep_node = self._partitions.successor(rep - 1)
                self._subtrees[rep] = result = SortedList()
        else:
            # Every representative in the X-fast trie should have a corresponding SortedList;
            # the code should blow up if it doesn't
            result = self._subtrees[rep_node.value]

        return (result, rep_node)

    def insert(self, value: Union[int, bytes]) -> None:
        """
		Insert a value into the trie

		:param value: The value to insert into the trie
		"""
        value = XFastTrie._to_int(value, self._maxlen)
        subtree, rep_node = self._get_value_subtree(value, True)
        subtree = cast(SortedList, subtree)
        rep_node = cast(TrieNode, rep_node)
        # Do nothing if the value is already in the trie
        if value in subtree:
            return

        if self._max is None or value > self._max:
            self._max = value

        if self._min is None or value < self._min:
            self._min = value

        subtree.add(value)

        if len(subtree) > self._max_subtree_size:
            # Out with the old
            del self._subtrees[rep_node.value]
            self._partitions -= rep_node.value

            # In with the new
            for tree in self._split_subtree(subtree, self._maxlen):
                rep = self._calculate_representative(max(tree), self._maxlen)
                self._partitions += rep
                self._subtrees[rep] = tree

        self._count += 1

    def predecessor(self, value: Union[int, bytes]) -> Optional[int]:
        """
		Find the largest value in the trie strictly less than the given value,
		if it exists

		:param value: The value to find the predecessor of
		:return: The predecessor of the given value, or None if it doesn't exist
		"""
        value = XFastTrie._to_int(value, self._maxlen)
        subtree, rep_node = self._get_value_subtree(value)

        # subtree should be None only if the trie is empty
        if subtree is None and self._count == 0:
            raise ValueError("No values exist in trie")
        elif value <= cast(int, self._min) or self._min is None:
            return None
        elif value > cast(int, self._max):
            return self._max

        subtree = cast(SortedList, subtree)
        rep_node = cast(TrieNode, rep_node)
        if min(subtree) >= value:
            subtree = self._subtrees[rep_node.pred.value]

        return cast(int, subtree[subtree.bisect_left(value) - 1])

    def remove(self, value: Union[int, bytes]) -> None:
        """
		Remove the given value from the trie

		:param value: The value to remove from the trie
		"""
        value = XFastTrie._to_int(value, self._maxlen)
        subtree, rep_node = self._get_value_subtree(value)

        # There should be no subtree only if the given value is not in the trie
        if subtree is None or value not in subtree:
            raise ValueError("Value does not exist in trie")

        subtree = cast(SortedList, subtree)
        rep_node = cast(TrieNode, rep_node)
        if self._min == value:
            if len(subtree) > 1:
                min_succ = subtree[1]
            else:
                min_succ = self.successor(value)
        else:
            min_succ = -1

        if self._max == value:
            if len(subtree) > 1:
                max_pred = subtree[-2]
            else:
                max_pred = self.predecessor(value)
        else:
            max_pred = -1

        if min_succ != -1:
            self._min = min_succ

        if max_pred != -1:
            self._max = max_pred

        subtree.remove(value)

        if len(subtree) == 0:
            del self._subtrees[rep_node.value]
            self._partitions -= rep_node.value

        elif len(subtree) < self._min_subtree_size and len(
                self._partitions) > 1:
            if rep_node.pred is not None:
                left_rep = rep_node.pred
                right_rep = rep_node
            else:
                left_rep = rep_node
                right_rep = rep_node.succ

            left_tree = self._subtrees[left_rep.value]
            right_tree = self._subtrees[right_rep.value]

            # Out with the old
            del self._subtrees[left_rep.value]
            del self._subtrees[right_rep.value]
            self._partitions -= left_rep.value
            self._partitions -= right_rep.value

            # In with the new
            tree: SortedList
            for tree in filter(
                    None,
                    self._merge_subtrees(left_tree, right_tree,
                                         2 * self._maxlen)):
                rep = self._calculate_representative(max(tree), self._maxlen)
                self._partitions += rep
                self._subtrees[rep] = tree

        self._count -= 1

    def successor(self, value: Union[int, bytes]) -> Optional[int]:
        """
		Find the smallest value in the trie strictly greater than the given value,
		if it exists

		:param value: The value to find the successor of
		:return: The successor of the given value, or None if it doesn't exist
		"""
        value = XFastTrie._to_int(value, self._maxlen)
        subtree, rep_node = self._get_value_subtree(value)

        # subtree should be None only if the trie is empty
        if subtree is None and self._count == 0:
            raise ValueError("No values exist in trie")
        elif value >= cast(int, self._max) or self._max is None:
            return None
        elif value < cast(int, self._min):
            return self._min

        subtree = cast(SortedList, subtree)
        rep_node = cast(TrieNode, rep_node)
        if max(subtree) <= value:
            subtree = self._subtrees[rep_node.succ.value]

        return cast(int, subtree[subtree.bisect_right(value)])

    @property
    def max(self) -> Optional[int]:
        """
		The maximum value in the trie

		:return: The maximum value in the trie,
				 or None if the trie is empty
		"""
        return self._max

    @property
    def min(self) -> Optional[int]:
        """
		The minimum value in the trie

		:return: The minimum value in the trie,
				 or None if the trie is empty
		"""
        return self._min

    def __init__(self, max_length: int = (maxsize.bit_length() + 1)):
        self._maxlen = max_length
        self._min_subtree_size = max_length // 2
        self._max_subtree_size = max_length * 2
        self.clear()

    def __contains__(self, value: Union[int, bytes]) -> bool:
        value = XFastTrie._to_int(value, self._maxlen)
        subtree, _ = self._get_value_subtree(value)
        return subtree is not None and value in subtree

    def __gt__(self, value: Union[int, bytes]) -> Optional[int]:
        value = XFastTrie._to_int(value, self._maxlen)
        return self.successor(value)

    def __iadd__(self, value: Union[int, bytes]) -> "YFastTrie":
        value = XFastTrie._to_int(value, self._maxlen)
        self.insert(value)
        return self

    def __isub__(self, value: Union[int, bytes]) -> "YFastTrie":
        value = XFastTrie._to_int(value, self._maxlen)
        self.remove(value)
        return self

    def __iter__(self) -> Iterable[int]:
        for rep in sorted(self._subtrees):
            for value in self._subtrees[rep]:
                yield value

    def __len__(self) -> int:
        return self._count

    def __lt__(self, value: Union[int, bytes]) -> Optional[int]:
        value = XFastTrie._to_int(value, self._maxlen)
        return self.predecessor(value)