Exemple #1
0
    def test_choose_leaf_uses_least_area_enlargement_for_higher_levels(
            self, least_area_enlargement_mock, least_overlap_enlargement_mock):
        """
        Ensure that the choose subtree strategy uses the least area enlargement strategy when picking a subtree at
        levels higher than the one just above the leaf level.
        """
        # Arrange
        tree = RStarTree()
        leaf = RTreeNode(tree, is_leaf=True)
        intermediate = RTreeNode(
            tree,
            is_leaf=False,
            entries=[RTreeEntry(Rect(0, 0, 0, 0), child=leaf)])
        intermediate_entry = RTreeEntry(Rect(0, 0, 0, 0), child=intermediate)
        root = RTreeNode(tree, is_leaf=False, entries=[intermediate_entry])
        tree.root = root
        e = RTreeEntry(Rect(0, 0, 0, 0))
        least_area_enlargement_mock.return_value = intermediate_entry

        # Act
        rstar_choose_leaf(tree, e)

        # Assert
        least_area_enlargement_mock.assert_called_once_with(
            root.entries, e.rect)
        least_overlap_enlargement_mock.assert_called_once_with(
            intermediate.entries, e.rect)
Exemple #2
0
    def test_get_rstar_stat_sorts_entries_by_both_min_and_max(self):
        """
        List of possible divisions should be based on entries sorted by both the minimum as well as maximum coordinate.
        In the example below, when the entries are sorted by either minx, miny, or maxy, the sort order is always
        (a,b,c), but when sorted by maxx, the order is (b,a,c). This ordering enables the [(b), (a,c)] division (which
        turns out to be optimal).
        """
        # Arrange
        a = RTreeEntry(data='a', rect=Rect(0, 0, 7, 2))
        b = RTreeEntry(data='b', rect=Rect(1, 1, 2, 3))
        c = RTreeEntry(data='c', rect=Rect(2, 2, 8, 4))

        # Act
        stat = get_rstar_stat([a, b, c], 1, 2)

        # Assert
        self.assertCountEqual([
            EntryDistribution(([a], [b, c])),
            EntryDistribution(([a, b], [c])),
            EntryDistribution(([b], [a, c]))
        ], stat.unique_distributions)
        self.assertCountEqual([
            EntryDistribution(([a], [b, c])),
            EntryDistribution(([a, b], [c])),
            EntryDistribution(([b], [a, c]))
        ], stat.get_axis_unique_distributions('x'))
        self.assertCountEqual([
            EntryDistribution(([a], [b, c])),
            EntryDistribution(([a, b], [c]))
        ], stat.get_axis_unique_distributions('y'))
        self.assertEqual(140, stat.get_axis_perimeter('x'))
        self.assertEqual(148, stat.get_axis_perimeter('y'))
Exemple #3
0
    def test_adjust_tree_without_split(self):
        """
        Ensure parent entry bounding rectangles are updated correctly when an entry is added without necessitating a
        node split.
        """
        # Arrange
        t = RTreeGuttman(max_entries=3)
        r1 = Rect(0, 0, 3, 2)
        r2 = Rect(5, 5, 7, 7)
        t.root = RTreeNode(t, is_leaf=False)
        entry_a = RTreeEntry(r1, data='a')
        entry_b = RTreeEntry(r2, data='b')
        n1 = RTreeNode(t, is_leaf=True, parent=t.root, entries=[entry_a])
        n2 = RTreeNode(t, is_leaf=True, parent=t.root, entries=[entry_b])
        e1 = RTreeEntry(r1, child=n1)
        e2 = RTreeEntry(r2, child=n2)
        t.root.entries = [e1, e2]
        # Arrange entry being inserted
        r3 = Rect(2, 1, 4, 3)
        entry_c = RTreeEntry(r3, data='c')
        # Manually insert the new entry into node n1, but without adjusting the covering rectangle of the corresponding
        # parent entry in the root node (e1), since that is what we are testing.
        n1.entries.append(entry_c)
        # At this point, the parent entry's covering rectangle will not be correct yet as it only encompasses entry_a.
        # Ensure this is the case (though this is not the focus of this test)
        self.assertEqual(Rect(0, 0, 3, 2), e1.rect)

        # Act
        adjust_tree_strategy(t, n1, None)

        # Assert
        # Ensure e1's bounding rectangle now encompasses both entry_a and entry_c
        self.assertEqual(Rect(0, 0, 4, 3), e1.rect)
        # e2's bounding rectangle should remain unchanged
        self.assertEqual(Rect(5, 5, 7, 7), e2.rect)
Exemple #4
0
    def test_get_rstar_stat_same_distribution_for_all_4_sort_types(self):
        """
        Tests get_rstar_stat when all 4 sort types (min_x, max_x, min_y, and max_y) result in the same distribution
        of entries.
        """
        # Arrange
        a = RTreeEntry(data='a', rect=Rect(0, 0, 1, 1))
        b = RTreeEntry(data='b', rect=Rect(1, 1, 2, 2))
        c = RTreeEntry(data='c', rect=Rect(2, 2, 3, 3))
        d = RTreeEntry(data='d', rect=Rect(3, 3, 4, 4))

        # Act
        stat = get_rstar_stat([a, b, c, d], 1, 3)

        # Assert
        unique_distributions = [
            EntryDistribution(([a], [b, c, d])),
            EntryDistribution(([a, b], [c, d])),
            EntryDistribution(([a, b, c], [d]))
        ]
        self.assertCountEqual(unique_distributions, stat.unique_distributions)
        self.assertCountEqual(unique_distributions,
                              stat.get_axis_unique_distributions('x'))
        self.assertCountEqual(unique_distributions,
                              stat.get_axis_unique_distributions('y'))
        self.assertEqual(96, stat.get_axis_perimeter('x'))
        self.assertEqual(96, stat.get_axis_perimeter('y'))
Exemple #5
0
    def test_least_overlap_enlargement_tie(self):
        """Ensure least area enlargement is used as a tie-breaker when overlap enlargements are equal."""
        # Arrange
        a = RTreeEntry(data='a', rect=Rect(0, 0, 4, 5))
        b = RTreeEntry(data='b', rect=Rect(3, 4, 5, 6))
        rect = Rect(2, 5, 3, 6)

        # Act
        entry = least_overlap_enlargement([a, b], rect)

        # Assert
        self.assertEqual(b, entry)
Exemple #6
0
    def test_least_area_enlargement(self):
        """
        Ensure the node whose bounding box needs least enlargement is chosen for a new entry in the case where there is
        a clear winner.
        """
        # Arrange
        a = RTreeEntry(data='a', rect=Rect(0, 0, 3, 3))
        b = RTreeEntry(data='b', rect=Rect(9, 9, 10, 10))
        rect = Rect(2, 2, 4, 4)

        # Act
        entry = least_area_enlargement([a, b], rect)

        # Assert
        self.assertEqual(a, entry)
Exemple #7
0
    def test_least_overlap_enlargement(self):
        """
        Basic test of least overlap enlargement helper method. This test demonstrates a scenario where least area
        enlargement would favor one entry, but least overlap enlargement favors another.
        """
        # Arrange
        a = RTreeEntry(data='a', rect=Rect(0, 0, 4, 5))
        b = RTreeEntry(data='b', rect=Rect(2, 4, 5, 6))
        rect = Rect(4, 3, 5, 4)

        # Act
        entry = least_overlap_enlargement([a, b], rect)

        # Assert
        self.assertEqual(a, entry)
Exemple #8
0
    def test_rstar_overflow_split_root(self):
        """
        When the root node overflows, the root node should be split and the tree should grow a level. Forced reinsert
        should not occur at the root level.
        """
        # Arrange
        t = RStarTree(max_entries=3)
        r1 = Rect(0, 0, 3, 2)
        r2 = Rect(7, 7, 10, 9)
        r3 = Rect(2, 1, 5, 3)
        entry_a = RTreeEntry(r1, data='a')
        entry_b = RTreeEntry(r2, data='b')
        entry_c = RTreeEntry(r3, data='c')
        t.root.entries = [entry_a, entry_b, entry_c]
        # Arrange entry being inserted. Since the root node is at max capacity, this entry should cause the root
        # to overflow.
        r4 = Rect(6, 6, 8, 8)

        # Act
        entry_d = t.insert('d', r4)

        # Assert
        # Root node should no longer be a leaf node (but should still be root)
        self.assertFalse(t.root.is_leaf)
        self.assertTrue(t.root.is_root)
        # Root node bounding box should encompass all entries
        self.assertEqual(Rect(0, 0, 10, 9), t.root.get_bounding_rect())
        # Root node should have 2 child entries
        self.assertEqual(2, len(t.root.entries))
        e1 = t.root.entries[0]
        e2 = t.root.entries[1]
        # e1 bounding box should encompass entries [a, c]
        self.assertEqual(Rect(0, 0, 5, 3), e1.rect)
        # e2 bounding box should encompass entries [b ,d]
        self.assertEqual(Rect(6, 6, 10, 9), e2.rect)
        # Ensure children nodes of e1 and e2 are leaf nodes
        leaf_node_1 = e1.child
        leaf_node_2 = e2.child
        self.assertIsNotNone(leaf_node_1)
        self.assertIsNotNone(leaf_node_2)
        self.assertTrue(leaf_node_1.is_leaf)
        self.assertTrue(leaf_node_2.is_leaf)
        # Leaf node 1 should contain entries [a, c]
        self.assertEqual(Rect(0, 0, 5, 3), leaf_node_1.get_bounding_rect())
        self.assertCountEqual([entry_a, entry_c], leaf_node_1.entries)
        # Leaf node 2 should contain entries [b, d]
        self.assertEqual(Rect(6, 6, 10, 9), leaf_node_2.get_bounding_rect())
        self.assertCountEqual([entry_b, entry_d], leaf_node_2.entries)
Exemple #9
0
    def test_least_area_enlargement_tie(self):
        """
        When two nodes need to be enlarged by the same amount, the strategy should pick the node having the smallest
        area as a tie-breaker.
        """
        # Arrange
        a = RTreeEntry(data='a', rect=Rect(0, 0, 4, 2))
        b = RTreeEntry(data='b', rect=Rect(5, 1, 7, 3))
        c = RTreeEntry(data='c', rect=Rect(0, 4, 1, 5))
        rect = Rect(4, 1, 5, 2)

        # Act
        entry = least_area_enlargement([a, b, c], rect)

        # Assert
        self.assertEqual(b, entry)
Exemple #10
0
    def test_get_possible_divisions_1_3(self):
        """Tests get_possible_divisions with m=1 and M=3"""
        # Arrange
        rect = Rect(0, 0, 0, 0)
        a = RTreeEntry(data='a', rect=rect)
        b = RTreeEntry(data='b', rect=rect)
        c = RTreeEntry(data='c', rect=rect)
        d = RTreeEntry(data='d', rect=rect)

        # Act
        divisions = get_possible_divisions([a, b, c, d], 1, 3)

        # Assert
        self.assertEqual(3, len(divisions))
        self.assertEqual(([a], [b, c, d]), divisions[0])
        self.assertEqual(([a, b], [c, d]), divisions[1])
        self.assertEqual(([a, b, c], [d]), divisions[2])
Exemple #11
0
    def test_choose_split_axis(self):
        """
        Ensure split axis is chosen based on smallest overall perimeter of all possible divisions of a list of entries.
        In the below scenario, there is a clear winner with the best division being ([a, b, c], [d]).
        """
        # Arrange
        a = RTreeEntry(data='a', rect=Rect(0, 0, 1, 1))
        b = RTreeEntry(data='b', rect=Rect(1, 0, 2, 1))
        c = RTreeEntry(data='c', rect=Rect(2, 0, 3, 1))
        d = RTreeEntry(data='d', rect=Rect(1, 7, 2, 8))
        stat = get_rstar_stat([a, b, c, d], 1, 3)

        # Act
        result = choose_split_axis(stat)

        # Assert
        self.assertEqual('y', result)
Exemple #12
0
    def test_get_possible_divisions_2_4(self):
        """Tests get_possible_divisions with m=2 and M=4"""
        # Arrange
        rect = Rect(0, 0, 0, 0)
        a = RTreeEntry(data='a', rect=rect)
        b = RTreeEntry(data='b', rect=rect)
        c = RTreeEntry(data='c', rect=rect)
        d = RTreeEntry(data='d', rect=rect)
        e = RTreeEntry(data='e', rect=rect)

        # Act
        divisions = get_possible_divisions([a, b, c, d, e], 2, 4)

        # Assert
        self.assertEqual(2, len(divisions))
        self.assertEqual(([a, b], [c, d, e]), divisions[0])
        self.assertEqual(([a, b, c], [d, e]), divisions[1])
Exemple #13
0
    def test_choose_split_index_tie(self):
        """When multiple divisions have the same overlap, ensure split index is chosen based on minimum area."""
        # Arrange
        a = RTreeEntry(data='a', rect=Rect(0, 0, 2, 1))
        b = RTreeEntry(data='b', rect=Rect(1, 0, 3, 2))
        c = RTreeEntry(data='c', rect=Rect(2, 2, 4, 3))
        d = RTreeEntry(data='d', rect=Rect(9, 9, 10, 10))
        distributions = [
            EntryDistribution(([a], [b, c, d])),
            EntryDistribution(([a, b], [c, d])),
            EntryDistribution(([a, b, c], [d]))
        ]

        # Act
        i = choose_split_index(distributions)

        # Assert
        self.assertEqual(2, i)
Exemple #14
0
    def test_choose_split_index(self):
        """Ensures best split index is chosen based on minimum overlap."""
        # Arrange
        a = RTreeEntry(data='a', rect=Rect(0, 1, 4, 5))
        b = RTreeEntry(data='b', rect=Rect(3, 5, 6, 8))
        c = RTreeEntry(data='c', rect=Rect(7, 0, 9, 4))
        d = RTreeEntry(data='d', rect=Rect(8, 7, 10, 9))
        distributions = [
            EntryDistribution(([a], [b, c, d])),
            EntryDistribution(([a, b], [c, d])),
            EntryDistribution(([a, b, c], [d]))
        ]

        # Act
        i = choose_split_index(distributions)

        # Assert
        self.assertEqual(1, i)
Exemple #15
0
    def test_rstar_split(self):
        """
        Ensures the R*-Tree split sets the entries in the original and split nodes correctly after performing a split.
        Note that the tree is not reorganized until adjust_tree is called, which is done on insert rather than split, so
        the resulting structure when calling rstar_split is not necessarily the final structure of the tree.
        """
        # Arrange
        tree = RStarTree(min_entries=1, max_entries=2)
        a = RTreeEntry(data='a', rect=Rect(0, 0, 7, 2))
        b = RTreeEntry(data='b', rect=Rect(1, 1, 2, 3))
        c = RTreeEntry(data='c', rect=Rect(2, 2, 8, 4))
        root = RTreeNode(tree, is_leaf=True, entries=[a, b, c])
        tree.root = root

        # Act
        split_node = rstar_split(tree, root)

        # Assert
        # The original node should contain entries from the first group in the optimal division. The optimal division
        # in this example is [(b), (a,c)], so the original node should contain entry 'b'.
        self.assertEqual(1, len(tree.root.entries))
        entry_b = tree.root.entries[0]
        self.assertEqual('b', entry_b.data)
        self.assertEqual(Rect(1, 1, 2, 3), tree.root.get_bounding_rect())
        self.assertEqual(Rect(1, 1, 2, 3), entry_b.rect)
        self.assertTrue(tree.root.is_root)
        self.assertTrue(tree.root.is_leaf)
        self.assertIsNone(entry_b.child)
        # The split node should contain entries (a,c)
        self.assertEqual(2, len(split_node.entries))
        self.assertEqual(Rect(0, 0, 8, 4), split_node.get_bounding_rect())
        # Entry 'a'
        entry_a = next((e for e in split_node.entries if e.data == 'a'))
        self.assertEqual(Rect(0, 0, 7, 2), entry_a.rect)
        self.assertTrue(entry_a.is_leaf)
        self.assertIsNone(entry_a.child)
        # Entry 'c'
        entry_c = next((e for e in split_node.entries if e.data == 'c'))
        self.assertEqual('c', entry_c.data)
        self.assertTrue(entry_c.is_leaf)
        self.assertIsNone(entry_c.child)
        self.assertEqual(Rect(2, 2, 8, 4), entry_c.rect)
        self.assertTrue(entry_c.is_leaf)
        self.assertIsNone(entry_c.child)
Exemple #16
0
    def test_adjust_tree_with_split_no_propagate(self):
        """
        Ensure parent entry bounding rectangles are updated correctly when a node is split, but it is not necessary to
        propagate the split upward.
        """
        # Arrange
        t = RTreeGuttman(max_entries=3)
        r1 = Rect(0, 0, 3, 2)
        r2 = Rect(2, 1, 5, 3)
        r3 = Rect(4, 2, 6, 4)
        r4 = Rect(6, 6, 8, 8)
        r5 = Rect(7, 7, 10, 9)
        t.root = RTreeNode(t, is_leaf=False)
        entry_a = RTreeEntry(r1, data='a')
        entry_b = RTreeEntry(r2, data='b')
        entry_c = RTreeEntry(r3, data='c')
        entry_d = RTreeEntry(r4, data='d')
        entry_e = RTreeEntry(r5, data='e')
        n1 = RTreeNode(t, is_leaf=True, parent=t.root, entries=[entry_a, entry_b, entry_c])
        n2 = RTreeNode(t, is_leaf=True, parent=t.root, entries=[entry_d, entry_e])
        e1 = RTreeEntry(Rect(0, 0, 6, 4), child=n1)
        e2 = RTreeEntry(Rect(6, 6, 10, 9), child=n2)
        t.root.entries = [e1, e2]
        # Arrange entry being inserted
        r6 = Rect(1, 3, 2, 5)
        entry_f = RTreeEntry(r6, data='f')
        # Manually insert the new entry into node n1, causing it to be overfull.
        n1.entries.append(entry_f)
        # Manually perform node split, but without adjusting the tree yet (since that is the focus of this test)
        split_node = t.perform_node_split(n1, [entry_a, entry_f], [entry_c, entry_b])
        # Ensure preconditions:
        # At this point, the parent entry e1 in the root node will still have the old covering rectangle.
        self.assertEqual(Rect(0, 0, 6, 4), e1.rect)
        # At this point, the root node will only have 2 entries for e1 and e2
        self.assertCountEqual([e1, e2], t.root.entries)

        # Act
        adjust_tree_strategy(t, n1, split_node)

        # Assert
        # Ensure the root node now has 3 child entries
        self.assertEqual(3, len(t.root.entries))
        # The child entries should correspond to nodes n1, n2, and the new split node
        self.assertEqual([n1, n2, split_node], [e.child for e in t.root.entries])
        # Ensure each node has the correct entries
        self.assertEqual([entry_a, entry_f], n1.entries)
        self.assertEqual([entry_d, entry_e], n2.entries)
        self.assertEqual([entry_c, entry_b], split_node.entries)
        # Ensure bounding rectangles have been property updated for all entries
        self.assertEqual(Rect(0, 0, 3, 5), t.root.entries[0].rect)
        self.assertEqual(Rect(6, 6, 10, 9), t.root.entries[1].rect)
        self.assertEqual(Rect(2, 1, 6, 4), t.root.entries[2].rect)
        # Bounding rectangle for the root node should encompass all entries
        self.assertEqual(Rect(0, 0, 10, 9), t.root.get_bounding_rect())
Exemple #17
0
    def test_choose_leaf_returns_leaf_node_when_root_is_leaf(
            self, least_area_enlargement_mock, least_overlap_enlargement_mock):
        """
        When the root node is a leaf, it should be returned without invoking either the least area or overlap
        enlargement strategies.
        """
        # Arrange
        tree = RStarTree()
        root = RTreeNode(tree,
                         is_leaf=True,
                         entries=[RTreeEntry(Rect(0, 0, 1, 1))])
        tree.root = root
        e = RTreeEntry(Rect(0, 0, 0, 0))

        # Act
        node = rstar_choose_leaf(tree, e)

        # Assert
        self.assertEqual(root, node)
        least_area_enlargement_mock.assert_not_called()
        least_overlap_enlargement_mock.assert_not_called()
Exemple #18
0
    def test_choose_leaf_uses_least_overlap_enlargement_for_level_above_leaf(
            self, least_area_enlargement_mock, least_overlap_enlargement_mock):
        """
        Ensure that the choose subtree strategy uses the least overlap enlargement strategy when picking a subtree at
        the level just above a leaf.
        """
        # Arrange
        tree = RStarTree()
        leaf = RTreeNode(tree, is_leaf=True)
        root = RTreeNode(tree,
                         is_leaf=False,
                         entries=[RTreeEntry(Rect(0, 0, 0, 0), child=leaf)])
        tree.root = root
        e = RTreeEntry(Rect(0, 0, 0, 0))

        # Act
        rstar_choose_leaf(tree, e)

        # Assert
        least_overlap_enlargement_mock.assert_called_once_with(
            root.entries, e.rect)
        least_area_enlargement_mock.assert_not_called()
Exemple #19
0
    def test_get_rstar_stat_different_distributions_for_each_sort(self):
        """
        More complex test of get_rstar_stat where each of the 4 sort types (min_x, max_x, min_y, and max_y) results in
        a different sort order of entries (and sometimes different distributions, though some are equivalent).
        """
        # Arrange
        a = RTreeEntry(data='a', rect=Rect(0, 0, 3, 2))
        b = RTreeEntry(data='b', rect=Rect(1, 1, 5, 5))
        c = RTreeEntry(data='c', rect=Rect(6, -1, 8, 3))
        d = RTreeEntry(data='d', rect=Rect(4, 2, 9, 4))

        # Act
        stat = get_rstar_stat([a, b, c, d], 1, 3)

        # Assert
        self.assertCountEqual([
            EntryDistribution(([a], [b, c, d])),
            EntryDistribution(([a, b], [c, d])),
            EntryDistribution(([a, b, c], [d])),
            EntryDistribution(([a, c], [b, d])),
            EntryDistribution(([b], [a, c, d])),
            EntryDistribution(([c], [a, b, d])),
        ], stat.unique_distributions)
        self.assertCountEqual([
            EntryDistribution(([a], [b, c, d])),
            EntryDistribution(([a, b], [c, d])),
            EntryDistribution(([a, b, c], [d])),
            EntryDistribution(([c], [a, b, d])),
        ], stat.get_axis_unique_distributions('x'))
        self.assertCountEqual([
            EntryDistribution(([a], [b, c, d])),
            EntryDistribution(([a, b, c], [d])),
            EntryDistribution(([a, c], [b, d])),
            EntryDistribution(([b], [a, c, d])),
            EntryDistribution(([c], [a, b, d])),
        ], stat.get_axis_unique_distributions('y'))
        self.assertEqual(238, stat.get_axis_perimeter('x'))
        self.assertEqual(260, stat.get_axis_perimeter('y'))
Exemple #20
0
    def test_adjust_tree_with_split_and_propagate(self):
        """
        Ensure parent entry bounding rectangles are updated correctly when a node is split, and it is necessary to
        propagate the split upward. This scenario should result in a new root node being created, and the tree growing
        an extra level.
        """
        # Arrange
        t = RTreeGuttman(max_entries=2)
        r1 = Rect(0, 0, 3, 2)
        r2 = Rect(2, 1, 5, 3)
        r3 = Rect(6, 6, 8, 8)
        r4 = Rect(7, 7, 10, 9)
        t.root = RTreeNode(t, is_leaf=False)
        entry_a = RTreeEntry(r1, data='a')
        entry_b = RTreeEntry(r2, data='b')
        entry_c = RTreeEntry(r3, data='c')
        entry_d = RTreeEntry(r4, data='d')
        n1 = RTreeNode(t, is_leaf=True, parent=t.root, entries=[entry_a, entry_b])
        n2 = RTreeNode(t, is_leaf=True, parent=t.root, entries=[entry_c, entry_d])
        e1 = RTreeEntry(Rect(0, 0, 5, 3), child=n1)
        e2 = RTreeEntry(Rect(6, 6, 10, 9), child=n2)
        t.root.entries = [e1, e2]
        # Arrange entry being inserted
        r5 = Rect(4, 2, 6, 4)
        entry_e = RTreeEntry(r5, data='e')
        # Manually insert the new entry into node n1, causing it to be overfull.
        n1.entries.append(entry_e)
        # Manually perform node split, but without adjusting the tree yet (since that is the focus of this test)
        split_node = t.perform_node_split(n1, [entry_a], [entry_e, entry_b])
        # Ensure preconditions:
        # At this point, the parent entry e1 in the root node will still have the old covering rectangle.
        self.assertEqual(Rect(0, 0, 5, 3), e1.rect)
        # At this point, the root node will only have 2 entries for e1 and e2
        self.assertCountEqual([e1, e2], t.root.entries)

        # Act
        adjust_tree_strategy(t, n1, split_node)

        # Assert
        # Root node bounding rectangle should encompass all entries
        self.assertEqual(Rect(0, 0, 10, 9), t.root.get_bounding_rect())
        # Root node should have 2 child entries
        self.assertEqual(2, len(t.root.entries))
        root_entry_1, root_entry_2 = t.root.entries
        # Root entry 1 bounding rectangle should encompass leaf entries [a, b, e]
        self.assertEqual(Rect(0, 0, 6, 4), root_entry_1.rect)
        # Root entry 2 bounding rectangle should encompass leaf entries [c, d]
        self.assertEqual(Rect(6, 6, 10, 9), root_entry_2.rect)
        # Get the children nodes corresponding to the entries in the root node.
        intermediate_node_1 = root_entry_1.child
        intermediate_node_2 = root_entry_2.child
        # Ensure the intermediate nodes are not marked as leaf or root
        self.assertFalse(intermediate_node_1.is_leaf)
        self.assertFalse(intermediate_node_1.is_root)
        self.assertFalse(intermediate_node_2.is_leaf)
        self.assertFalse(intermediate_node_2.is_root)
        # Intermediate node 1 should contain 2 child entries
        self.assertEqual(2, len(intermediate_node_1.entries))
        # Intermediate node 1 bounding rectangle should encompass leaf entries [a, b, e]
        self.assertEqual(Rect(0, 0, 6, 4), intermediate_node_1.get_bounding_rect())
        # Intermediate node 2 should contain 1 child entry
        self.assertEqual(1, len(intermediate_node_2.entries))
        # Intermediate node 2 bounding rectangle should encompass leaf entries [c, d]
        self.assertEqual(Rect(6, 6, 10, 9), intermediate_node_2.get_bounding_rect())
        # Get references to the entries in the intermediate nodes
        intermediate_entry_1 = intermediate_node_1.entries[0]
        intermediate_entry_2 = intermediate_node_1.entries[1]
        intermediate_entry_3 = intermediate_node_2.entries[0]
        # Ensure the bounding rectangles are correct for the entries in the intermediate nodes
        self.assertEqual(Rect(0, 0, 3, 2), intermediate_entry_1.rect)
        self.assertEqual(Rect(2, 1, 6, 4), intermediate_entry_2.rect)
        self.assertEqual(Rect(6, 6, 10, 9), intermediate_entry_3.rect)
        # Get the leaf nodes from the child entries of the intermediate nodes
        leaf_node_1 = intermediate_entry_1.child
        leaf_node_2 = intermediate_entry_2.child
        leaf_node_3 = intermediate_entry_3.child
        # Ensure the leaf nodes are properly marked as being leaf nodes
        self.assertTrue(leaf_node_1.is_leaf)
        self.assertTrue(leaf_node_2.is_leaf)
        self.assertTrue(leaf_node_3.is_leaf)
        # Leaf node 1 should contain a single child entry for entry_a
        self.assertEqual([entry_a], leaf_node_1.entries)
        # Leaf node 2 should contain entries [e, b]
        self.assertEqual([entry_e, entry_b], leaf_node_2.entries)
        # Leaf node 3 should contain entries [c, d]
        self.assertEqual([entry_c, entry_d], leaf_node_3.entries)
        # Ensure leaf node bounding rectangles are correct
        self.assertEqual(Rect(0, 0, 3, 2), leaf_node_1.get_bounding_rect())
        self.assertEqual(Rect(2, 1, 6, 4), leaf_node_2.get_bounding_rect())
        self.assertEqual(Rect(6, 6, 10, 9), leaf_node_3.get_bounding_rect())
Exemple #21
0
    def test_rstar_overflow_reinsert_grow_tree(self):
        """
        When a forced reinsert causes a node split which propagates upward causing the tree to grow, ensure that the
        remaining entries that still need to be re-inserted are inserted at the correct level. The number of levels in
        the tree changes in the middle of an insert operation under this scenario, which makes this rather complex.
        This scenario is also the reason why the R* insert tracks the level as the number of levels from the leaf (i.e.,
        the leaf level is 0), instead of the more traditional approach of referring to the root level as 0.
        """
        # Arrange
        # Create a tree with 2 levels, with the root and all leaf nodes at capacity.
        # Arrange
        t = RStarTree(max_entries=3, min_entries=1)
        r1 = Rect(0, 0, 5, 2)
        r2 = Rect(1, 1, 5, 3)
        r3 = Rect(2, 2, 6, 4)
        r4 = Rect(5, 0, 12, 9)
        r5 = Rect(7, 7, 20, 8)
        r6 = Rect(4, 4, 13, 6)
        r7 = Rect(16, 7, 19, 10)
        r8 = Rect(16, 9, 18, 10)
        r9 = Rect(18, 8, 19, 10)
        t.root = RTreeNode(t, is_leaf=False)
        entry_a = RTreeEntry(r1, data='a')
        entry_b = RTreeEntry(r2, data='b')
        entry_c = RTreeEntry(r3, data='c')
        entry_d = RTreeEntry(r4, data='d')
        entry_e = RTreeEntry(r5, data='e')
        entry_f = RTreeEntry(r6, data='f')
        entry_g = RTreeEntry(r7, data='g')
        entry_h = RTreeEntry(r8, data='h')
        entry_i = RTreeEntry(r9, data='i')
        n1 = RTreeNode(t,
                       is_leaf=True,
                       parent=t.root,
                       entries=[entry_a, entry_b, entry_c])
        n2 = RTreeNode(t,
                       is_leaf=True,
                       parent=t.root,
                       entries=[entry_d, entry_e, entry_f])
        n3 = RTreeNode(t,
                       is_leaf=True,
                       parent=t.root,
                       entries=[entry_g, entry_h, entry_i])
        e1 = RTreeEntry(Rect(0, 0, 6, 4), child=n1)
        e2 = RTreeEntry(Rect(4, 0, 20, 9), child=n2)
        e3 = RTreeEntry(Rect(16, 7, 19, 10), child=n3)
        t.root.entries = [e1, e2, e3]
        # Arrange entry being inserted
        r10 = Rect(18, 5, 20, 7)
        entry_j = RTreeEntry(r10, data='j')
        # Manually insert the new entry into node n2, causing it to be overfull.
        n2.entries.append(entry_j)
        # Ensure preconditions
        # At this point, the tree should have 2 levels
        self.assertEqual(2, len(t.get_levels()))

        # Act
        # Since this is a complicated case, here is a step-by-step breakdown of what we expect to happen:
        # (1) Upon inserting entry j into node n2, it will overflow (now has 4 entries and max_entries is 3).
        # (2) Forced reinsert will be performed on entries closest to n2's centroid, which are [e, d].
        #     (a) First, entry e will get reinserted into n3, which currently contains entries [g, h, i].
        #     (b) Since n3 now has 4 entries, it overflows.
        #     (c) Since n3 is at the same level as n2, and we already did a forced reinsert at that level, a regular
        #         split gets performed (instead of a forced reinsert).
        #     (d) The split will be propagated upward to the root level.
        #     (e) Since the root node is also at capacity, it will also overflow.
        #     (f) Forced reinsert never occurs at the root level, so a split is performed on the root node.
        #     (g) When the root node is split, the tree grows a level, and a new root node is created.
        #     (h) Forced reinsert of entry e is now complete.
        #     (i) Now, we still need to do the forced reinsert on entry d. Note that the tree structure has completely
        #         changed in the intervening time since entry e got reinserted. The fact that the structure has changed
        #         while we're still in the middle of reinserting entries is a source of much complexity, and the aim of
        #         this test is to ensure that this scenario is properly handled. In particular:
        #           - We must ensure that entry d gets inserted at the correct level of the tree. What was previously
        #             level 1 (immediately below the root) is now actually level 2. (This is why the rstar
        #             implementation keeps track of the level by counting from the leaf level, rather than from the
        #             root, since levels from leaf does not change.)
        #           - Since we must perform a forced reinsert at most once per level, if another forced reinsert causes
        #             another node to overflow, we must ensure to do a split instead of another forced reinsert. Indeed
        #             that is the case when reinserting entry d. Again, because the number of levels has changed, we
        #             must be very careful of how we track what levels we have done a forced reinsert on (this
        #             implementation counts from the leaf instead of from the root).
        #     (j) Entry d gets reinserted into node n1, which currently contains entries [a, b, c].
        #     (k) Since n1 now has 4 entries, it overflows.
        #     (l) Since n1 is at the same level as n2, and we already did a forced reinsert at this level, a regular
        #         split gets performed (instead of a forced reinsert).
        rstar_overflow(t, n2)

        # Assert
        # Tree should now have 3 levels
        self.assertEqual(3, len(t.get_levels()))
        # Root node bounding box should encompass all entries
        self.assertEqual(Rect(0, 0, 20, 10), t.root.get_bounding_rect())
        # Root node should have 2 child entries
        self.assertEqual(2, len(t.root.entries))
        # Ensure entries in the root node have the expected bounding boxes
        root_entry_1 = next(
            (e for e in t.root.entries if e.rect == Rect(0, 0, 12, 9)))
        root_entry_2 = next(
            (e for e in t.root.entries if e.rect == Rect(4, 4, 20, 10)))
        # Ensure children nodes of root_entry_1 and root_entry_2 are intermediate nodes
        intermediate_node_1 = root_entry_1.child
        intermediate_node_2 = root_entry_2.child
        self.assertFalse(intermediate_node_1.is_leaf)
        self.assertFalse(intermediate_node_1.is_root)
        self.assertFalse(intermediate_node_2.is_leaf)
        self.assertFalse(intermediate_node_2.is_root)
        # Ensure intermediate nodes have correct bounding boxes
        self.assertEqual(Rect(0, 0, 12, 9),
                         intermediate_node_1.get_bounding_rect())
        self.assertEqual(Rect(4, 4, 20, 10),
                         intermediate_node_2.get_bounding_rect())
        # Ensure intermediate nodes have the correct number of child entries
        self.assertEqual(2, len(intermediate_node_1.entries))
        self.assertEqual(3, len(intermediate_node_2.entries))
        # Ensure entries in the intermediate nodes have correct bounding boxes
        intermediate_entry_1 = next(e for e in intermediate_node_1.entries
                                    if e.rect == Rect(0, 0, 6, 4))
        intermediate_entry_2 = next(e for e in intermediate_node_1.entries
                                    if e.rect == Rect(5, 0, 12, 9))
        intermediate_entry_3 = next(e for e in intermediate_node_2.entries
                                    if e.rect == Rect(18, 8, 19, 10))
        intermediate_entry_4 = next(e for e in intermediate_node_2.entries
                                    if e.rect == Rect(7, 7, 20, 10))
        intermediate_entry_5 = next(e for e in intermediate_node_2.entries
                                    if e.rect == Rect(4, 4, 20, 7))
        # Get leaf child nodes
        leaf_node_1 = intermediate_entry_1.child
        leaf_node_2 = intermediate_entry_2.child
        leaf_node_3 = intermediate_entry_3.child
        leaf_node_4 = intermediate_entry_4.child
        leaf_node_5 = intermediate_entry_5.child
        # Ensure leaf nodes are properly marked as leaf nodes
        self.assertTrue(leaf_node_1.is_leaf)
        self.assertTrue(leaf_node_2.is_leaf)
        self.assertTrue(leaf_node_3.is_leaf)
        self.assertTrue(leaf_node_4.is_leaf)
        self.assertTrue(leaf_node_5.is_leaf)
        # Leaf nodes should not be inadvertently marked as root
        self.assertFalse(leaf_node_1.is_root)
        self.assertFalse(leaf_node_2.is_root)
        self.assertFalse(leaf_node_3.is_root)
        self.assertFalse(leaf_node_4.is_root)
        self.assertFalse(leaf_node_5.is_root)
        # Leaf node 1 should have entries [a, b, c]
        self.assertEqual(Rect(0, 0, 6, 4), leaf_node_1.get_bounding_rect())
        self.assertCountEqual([entry_a, entry_b, entry_c], leaf_node_1.entries)
        # Leaf node 2 should have entry [d]
        self.assertEqual(Rect(5, 0, 12, 9), leaf_node_2.get_bounding_rect())
        self.assertCountEqual([entry_d], leaf_node_2.entries)
        # Leaf node 3 should have entry [i]
        self.assertEqual(Rect(18, 8, 19, 10), leaf_node_3.get_bounding_rect())
        self.assertCountEqual([entry_i], leaf_node_3.entries)
        # Leaf node 4 should have entries [e, g, h]
        self.assertEqual(Rect(7, 7, 20, 10), leaf_node_4.get_bounding_rect())
        self.assertCountEqual([entry_e, entry_g, entry_h], leaf_node_4.entries)
        # Leaf node 5 should have entries [f, j]
        self.assertEqual(Rect(4, 4, 20, 7), leaf_node_5.get_bounding_rect())
        self.assertCountEqual([entry_f, entry_j], leaf_node_5.entries)
Exemple #22
0
    def test_rstar_overflow_reinsert_without_split(self, rstar_split_mock):
        """
        Tests R* overflow scenario that results in forced reinsert of some entries into a different node, but without
        any additional overflows/splits occurring.
        """
        # Arrange
        t = RStarTree(max_entries=3)
        r1 = Rect(0, 0, 1, 1)
        r2 = Rect(9, 0, 10, 1)
        r3 = Rect(0, 1, 1, 2)
        r4 = Rect(9, 5, 10, 6)
        r5 = Rect(3, 2, 10, 4)
        t.root = RTreeNode(t, is_leaf=False)
        entry_a = RTreeEntry(r1, data='a')
        entry_b = RTreeEntry(r2, data='b')
        entry_c = RTreeEntry(r3, data='c')
        entry_d = RTreeEntry(r4, data='d')
        entry_e = RTreeEntry(r5, data='e')
        n1 = RTreeNode(t,
                       is_leaf=True,
                       parent=t.root,
                       entries=[entry_a, entry_c])
        n2 = RTreeNode(t,
                       is_leaf=True,
                       parent=t.root,
                       entries=[entry_b, entry_d, entry_e])
        e1 = RTreeEntry(Rect(0, 0, 1, 2), child=n1)
        e2 = RTreeEntry(Rect(3, 0, 10, 6), child=n2)
        t.root.entries = [e1, e2]
        # Arrange entry being inserted
        r6 = Rect(2, 1, 3, 2)
        entry_f = RTreeEntry(r6, data='f')
        # Manually insert the new entry into node n2, causing it to be overfull.
        n2.entries.append(entry_f)
        # Ensure preconditions:
        # At this point, the root node entries will still have their old covering rectangles.
        self.assertEqual(Rect(0, 0, 1, 2), e1.rect)
        self.assertEqual(Rect(3, 0, 10, 6), e2.rect)
        # At this point, the root node will only have 2 entries for e1 and e2
        self.assertEqual([e1, e2], t.root.entries)

        # Act
        rstar_overflow(t, n2)

        # Assert
        # Ensure rstar_split was not invoked. In this scenario, it should do a forced reinsert instead (and the reinsert
        # should not result in any additional splits).
        rstar_split_mock.assert_not_called()
        # Ensure the root node still has only 2 entries, and their children are still the nodes n1 and n2.
        self.assertEqual([e1, e2], t.root.entries)
        self.assertEqual(n1, e1.child)
        self.assertEqual(n2, e2.child)
        # Forced insert should have resulted in entry f getting reinserted into node n1 (was previously in n2).
        # Ensure node n1 now has entries [a, c, f].
        self.assertCountEqual([entry_a, entry_c, entry_f], n1.entries)
        # Ensure node n1 bounding box accommodates entries [a, c, f]
        self.assertEqual(Rect(0, 0, 3, 2), n1.get_bounding_rect())
        # Remaining entries [b, d, e] should be in node n2.
        self.assertCountEqual([entry_b, entry_d, entry_e], n2.entries)
        self.assertEqual(Rect(3, 0, 10, 6), n2.get_bounding_rect())
        # Ensure nodes n1 and n2 are leaf nodes, and there are no additional levels in the tree.
        self.assertTrue(n1.is_leaf)
        self.assertTrue(n2.is_leaf)
        self.assertEqual(2, len(t.get_levels()))
Exemple #23
0
    def test_rstar_overflow_reinsert_with_split(self):
        """
        Tests R* overflow scenario that results in forced reinsert of some entries into a different node which is
        already at capacity, causing it to overflow. In this scenario, the second overflow at the same level should
        result in a regular split, not another forced reinsert.
        """
        # Arrange
        t = RStarTree(max_entries=3)
        r1 = Rect(0, 0, 1, 1)
        r2 = Rect(0, 1, 1, 2)
        r3 = Rect(9, 0, 10, 1)
        r4 = Rect(0, 2, 1, 3)
        r5 = Rect(9, 6, 10, 7)
        r6 = Rect(3, 2, 10, 5)
        t.root = RTreeNode(t, is_leaf=False)
        entry_a = RTreeEntry(r1, data='a')
        entry_b = RTreeEntry(r2, data='b')
        entry_c = RTreeEntry(r3, data='c')
        entry_d = RTreeEntry(r4, data='d')
        entry_e = RTreeEntry(r5, data='e')
        entry_f = RTreeEntry(r6, data='f')
        n1 = RTreeNode(t,
                       is_leaf=True,
                       parent=t.root,
                       entries=[entry_a, entry_b, entry_d])
        n2 = RTreeNode(t,
                       is_leaf=True,
                       parent=t.root,
                       entries=[entry_c, entry_e, entry_f])
        e1 = RTreeEntry(Rect(0, 0, 1, 3), child=n1)
        e2 = RTreeEntry(Rect(3, 0, 10, 7), child=n2)
        t.root.entries = [e1, e2]
        # Arrange entry being inserted
        r7 = Rect(2, 1, 3, 2)
        entry_g = RTreeEntry(r7, data='g')
        # Manually insert the new entry into node n2, causing it to be overfull.
        n2.entries.append(entry_g)
        # Ensure preconditions:
        # At this point, the root node entries will still have their old covering rectangles.
        self.assertEqual(Rect(0, 0, 1, 3), e1.rect)
        self.assertEqual(Rect(3, 0, 10, 7), e2.rect)
        # At this point, the root node will only have 2 entries for e1 and e2
        self.assertEqual([e1, e2], t.root.entries)

        # Act
        rstar_overflow(t, n2)

        # Assert
        # Root node should now have 3 entries (split should have occurred)
        self.assertEqual(3, len(t.root.entries))
        # There should still be 2 levels in the tree (root node should not have split)
        levels = t.get_levels()
        self.assertEqual(2, len(levels))
        # There should be 3 nodes at the leaf level
        leaf_nodes = levels[1]
        self.assertEqual(3, len(leaf_nodes))
        # One of the nodes should have entries [a, b] and with the correct bounding rectangle
        n1 = next((n for n in leaf_nodes
                   if set(_get_leaf_node_data(n)) == {'a', 'b'}))
        self.assertEqual(Rect(0, 0, 1, 2), n1.get_bounding_rect())
        self.assertEqual(Rect(0, 0, 1, 2), n1.parent_entry.rect)
        # Another node should have entries [c, e, f] and with the correct bounding rectangle
        n2 = next((n for n in leaf_nodes
                   if set(_get_leaf_node_data(n)) == {'c', 'e', 'f'}))
        self.assertEqual(Rect(3, 0, 10, 7), n2.get_bounding_rect())
        self.assertEqual(Rect(3, 0, 10, 7), n2.parent_entry.rect)
        # Last node should have entries [d, g] and with the correct bounding rectangle
        n3 = next((n for n in leaf_nodes
                   if set(_get_leaf_node_data(n)) == {'d', 'g'}))
        self.assertEqual(Rect(0, 1, 3, 3), n3.get_bounding_rect())
        self.assertEqual(Rect(0, 1, 3, 3), n3.parent_entry.rect)