Beispiel #1
0
    def test_23bit_file_offset_too_small(self):
        trie = Trie()

        # The high bit of the child offset stores a lookahead barrier, so the
        # file has to be smaller than 8M, not 16. Python has a recursion limit
        # of 1000, so we can't really insert a 8M character long string.
        # Instead, insert one 130-character string where each char has 32k
        # 16bit result IDs. 129 isn't enough to overflow the offsets.
        results_32k = [j for j in range(32767)]
        for i in range(130):
            trie.insert('a' * i, results_32k)

        with self.assertRaisesRegex(
                OverflowError,
                "Trie child offset too large to store in 23 bits, set SEARCH_FILE_OFFSET_BYTES = 4 in your conf.py."
        ):
            trie.serialize(
                Serializer(file_offset_bytes=3,
                           result_id_bytes=2,
                           name_size_bytes=1))

        # This should work
        trie.serialize(
            Serializer(file_offset_bytes=4,
                       result_id_bytes=2,
                       name_size_bytes=1))
Beispiel #2
0
    def test_16bit_result_count(self):
        trie = Trie()

        for i in range(128):
            trie.insert("__init__", i)
        # It's __init_subclass__ (one underscore, not two), but here I want to
        # trigger the case of both a high amount of results and some children
        # as well.
        for i in [203, 215, 267]:
            trie.insert("__init__subclass__", i)

        for i in trie_type_sizes:
            with self.subTest(**i):
                serialized = trie.serialize(Serializer(**i))
                self.compare(
                    Deserializer(**i), serialized, """
__init__ [{}]
        subclass__ [203, 215, 267]
""".format(', '.join([str(i) for i in range(128)])))
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2:
                    self.assertEqual(len(serialized), 377)
                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4:
                    self.assertEqual(len(serialized), 657)
                else:
                    self.assertGreater(len(serialized), 377)
                    self.assertLess(len(serialized), 657)
Beispiel #3
0
    def test_unicode(self):
        trie = Trie()

        trie.insert("hýždě", 0)
        trie.insert("hárá", 1)

        serialized = trie.serialize(
            Serializer(file_offset_bytes=3,
                       result_id_bytes=2,
                       name_size_bytes=1))
        self.compare(
            Deserializer(file_offset_bytes=3,
                         result_id_bytes=2,
                         name_size_bytes=1), serialized, """
h0xc3
  0xbd
   0xc5
  | 0xbe
  |  d0xc4
  |    0x9b
  |      [0]
  0xa1
   r0xc3
  |  0xa1
  |    [1]
""")
        self.assertEqual(len(serialized), 82)
Beispiel #4
0
    def test_empty(self):
        trie = Trie()

        for i in trie_type_sizes:
            with self.subTest(**i):
                serialized = trie.serialize(Serializer(**i))
                self.compare(Deserializer(**i), serialized, "")
                self.assertEqual(len(serialized), 6)
Beispiel #5
0
    def test_24bit_result_id_too_small(self):
        trie = Trie()
        trie.insert("a", 16 * 1024 * 1024)
        with self.assertRaisesRegex(
                OverflowError,
                "Trie result ID too large to store in 24 bits, set SEARCH_RESULT_ID_BYTES = 4 in your conf.py."
        ):
            trie.serialize(
                Serializer(file_offset_bytes=3,
                           result_id_bytes=3,
                           name_size_bytes=1))

        # This should work
        trie.serialize(
            Serializer(file_offset_bytes=3,
                       result_id_bytes=4,
                       name_size_bytes=1))
Beispiel #6
0
    def test_single(self):
        trie = Trie()
        trie.insert("magnum", 1337)
        trie.insert("magnum", 21)

        serialized = trie.serialize()
        self.compare(serialized, """
magnum [1337, 21]
""")
        self.assertEqual(len(serialized), 46)
Beispiel #7
0
    def test_single(self):
        trie = Trie()
        trie.insert("magnum", 1337)
        trie.insert("magnum", 21)

        for i in trie_type_sizes:
            with self.subTest(**i):
                serialized = trie.serialize(Serializer(**i))
                self.compare(Deserializer(**i), serialized, """
magnum [1337, 21]
""")
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2:
                    self.assertEqual(len(serialized), 46)
                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4:
                    self.assertEqual(len(serialized), 56)
                else:
                    self.assertGreater(len(serialized), 46)
                    self.assertLess(len(serialized), 56)
Beispiel #8
0
    def test_unicode(self):
        trie = Trie()

        trie.insert("hýždě", 0)
        trie.insert("hárá", 1)

        serialized = trie.serialize()
        self.compare(
            serialized, """
h0xc3
  0xbd
   0xc5
  | 0xbe
  |  d0xc4
  |    0x9b
  |      [0]
  0xa1
   r0xc3
  |  0xa1
  |    [1]
""")
        self.assertEqual(len(serialized), 82)
Beispiel #9
0
    def test_multiple(self):
        trie = Trie()

        trie.insert("math", 0)
        trie.insert("math::vector", 1, lookahead_barriers=[4])
        trie.insert("vector", 1)
        trie.insert("math::range", 2)
        trie.insert("range", 2)

        trie.insert("math::min", 3)
        trie.insert("min", 3)
        trie.insert("math::max", 4)
        trie.insert("max", 4)
        trie.insert("math::minmax", 5)
        trie.insert("minmax", 5)

        trie.insert("math::vector::minmax", 6, lookahead_barriers=[4, 12])
        trie.insert("vector::minmax", 6, lookahead_barriers=[6])
        trie.insert("minmax", 6)
        trie.insert("math::vector::min", 7)
        trie.insert("vector::min", 7)
        trie.insert("min", 7)
        trie.insert("math::vector::max", 8)
        trie.insert("vector::max", 8)
        trie.insert("max", 8)

        trie.insert("math::range::min", 9, lookahead_barriers=[4, 11])
        trie.insert("range::min", 9, lookahead_barriers=[5])
        trie.insert("min", 9)

        trie.insert("math::range::max", 10)
        trie.insert("range::max", 10)
        trie.insert("max", 10)

        serialized = trie.serialize()
        self.compare(
            serialized, """
math [0]
||| :$
|||  :vector [1]
|||   |     :$
|||   |      :min [7]
|||   |        | max [6]
|||   |        ax [8]
|||   range [2]
|||   |    :$
|||   |     :min [9]
|||   |       ax [10]
|||   min [3]
|||   || max [5]
|||   |ax [4]
||x [4, 8, 10]
|in [3, 7, 9]
|| max [5, 6]
vector [1]
|     :$
|      :min [7]
|        | max [6]
|        ax [8]
range [2]
|    :$
|     :min [9]
|       ax [10]
""")
        self.assertEqual(len(serialized), 340)
Beispiel #10
0
    def test_empty(self):
        trie = Trie()

        serialized = trie.serialize()
        self.compare(serialized, "")
        self.assertEqual(len(serialized), 6)
Beispiel #11
0
    def test_multiple(self):
        trie = Trie()

        trie.insert("math", 0)
        trie.insert("math::vector", 1, lookahead_barriers=[4])
        trie.insert("vector", 1)
        trie.insert("math::range", 2)
        trie.insert("range", 2)

        trie.insert("math::min", 3)
        trie.insert("min", 3)
        trie.insert("math::max", 4)
        trie.insert("max", 4)
        trie.insert("math::minmax", 5)
        trie.insert("minmax", 5)

        trie.insert("math::vector::minmax", 6, lookahead_barriers=[4, 12])
        trie.insert("vector::minmax", 6, lookahead_barriers=[6])
        trie.insert("minmax", 6)
        trie.insert("math::vector::min", 7)
        trie.insert("vector::min", 7)
        trie.insert("min", 7)
        trie.insert("math::vector::max", 8)
        trie.insert("vector::max", 8)
        trie.insert("max", 8)

        trie.insert("math::range::min", 9, lookahead_barriers=[4, 11])
        trie.insert("range::min", 9, lookahead_barriers=[5])
        trie.insert("min", 9)

        trie.insert("math::range::max", 10)
        trie.insert("range::max", 10)
        trie.insert("max", 10)

        for i in trie_type_sizes:
            with self.subTest(**i):
                serialized = trie.serialize(Serializer(**i))
                self.compare(
                    Deserializer(**i), serialized, """
math [0]
||| :$
|||  :vector [1]
|||   |     :$
|||   |      :min [7]
|||   |        | max [6]
|||   |        ax [8]
|||   range [2]
|||   |    :$
|||   |     :min [9]
|||   |       ax [10]
|||   min [3]
|||   || max [5]
|||   |ax [4]
||x [4, 8, 10]
|in [3, 7, 9]
|| max [5, 6]
vector [1]
|     :$
|      :min [7]
|        | max [6]
|        ax [8]
range [2]
|    :$
|     :min [9]
|       ax [10]
""")
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2:
                    self.assertEqual(len(serialized), 340)
                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4:
                    self.assertEqual(len(serialized), 428)
                else:
                    self.assertGreater(len(serialized), 340)
                    self.assertLess(len(serialized), 428)