Beispiel #1
0
    def test_23bit_file_offset_too_small(self):
        trie = Trie()

        # The high bit of the child offset stores a lookahead barrier, so the
        # file has to be smaller than 8M, not 16. Python has a recursion limit
        # of 1000, so we can't really insert a 8M character long string.
        # Instead, insert one 130-character string where each char has 32k
        # 16bit result IDs. 129 isn't enough to overflow the offsets.
        results_32k = [j for j in range(32767)]
        for i in range(130):
            trie.insert('a' * i, results_32k)

        with self.assertRaisesRegex(
                OverflowError,
                "Trie child offset too large to store in 23 bits, set SEARCH_FILE_OFFSET_BYTES = 4 in your conf.py."
        ):
            trie.serialize(
                Serializer(file_offset_bytes=3,
                           result_id_bytes=2,
                           name_size_bytes=1))

        # This should work
        trie.serialize(
            Serializer(file_offset_bytes=4,
                       result_id_bytes=2,
                       name_size_bytes=1))
Beispiel #2
0
    def test_single(self):
        map = ResultMap()

        self.assertEqual(
            map.add("Magnum",
                    "namespaceMagnum.html",
                    suffix_length=11,
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.NAMESPACE)), 0)

        for i in type_sizes:
            with self.subTest(**i):
                serialized = map.serialize(Serializer(**i))
                self.compare(
                    Deserializer(**i), serialized, """
0: Magnum [suffix_length=11, type=NAMESPACE] -> namespaceMagnum.html
""")
                # Verify just the smallest and largest size, everything else
                # should fit in between. The `result_id_bytes` don't affect
                # this case.
                if i['file_offset_bytes'] == 3 and i['name_size_bytes'] == 1:
                    self.assertEqual(len(serialized), 35)
                elif i['file_offset_bytes'] == 4 and i['name_size_bytes'] == 2:
                    self.assertEqual(len(serialized), 38)
                else:
                    self.assertGreater(len(serialized), 35)
                    self.assertLess(len(serialized), 38)
Beispiel #3
0
    def test_16bit_result_count(self):
        trie = Trie()

        for i in range(128):
            trie.insert("__init__", i)
        # It's __init_subclass__ (one underscore, not two), but here I want to
        # trigger the case of both a high amount of results and some children
        # as well.
        for i in [203, 215, 267]:
            trie.insert("__init__subclass__", i)

        for i in trie_type_sizes:
            with self.subTest(**i):
                serialized = trie.serialize(Serializer(**i))
                self.compare(
                    Deserializer(**i), serialized, """
__init__ [{}]
        subclass__ [203, 215, 267]
""".format(', '.join([str(i) for i in range(128)])))
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2:
                    self.assertEqual(len(serialized), 377)
                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4:
                    self.assertEqual(len(serialized), 657)
                else:
                    self.assertGreater(len(serialized), 377)
                    self.assertLess(len(serialized), 657)
Beispiel #4
0
    def test_unicode(self):
        trie = Trie()

        trie.insert("hýždě", 0)
        trie.insert("hárá", 1)

        serialized = trie.serialize(
            Serializer(file_offset_bytes=3,
                       result_id_bytes=2,
                       name_size_bytes=1))
        self.compare(
            Deserializer(file_offset_bytes=3,
                         result_id_bytes=2,
                         name_size_bytes=1), serialized, """
h0xc3
  0xbd
   0xc5
  | 0xbe
  |  d0xc4
  |    0x9b
  |      [0]
  0xa1
   r0xc3
  |  0xa1
  |    [1]
""")
        self.assertEqual(len(serialized), 82)
Beispiel #5
0
    def test_empty(self):
        trie = Trie()

        for i in trie_type_sizes:
            with self.subTest(**i):
                serialized = trie.serialize(Serializer(**i))
                self.compare(Deserializer(**i), serialized, "")
                self.assertEqual(len(serialized), 6)
Beispiel #6
0
    def test_empty(self):
        map = ResultMap()

        for i in type_sizes:
            with self.subTest(**i):
                serialized = map.serialize(Serializer(**i))
                self.compare(Deserializer(**i), serialized, "")
                self.assertEqual(len(serialized), i['file_offset_bytes'])
Beispiel #7
0
    def test_24bit_result_id_too_small(self):
        trie = Trie()
        trie.insert("a", 16 * 1024 * 1024)
        with self.assertRaisesRegex(
                OverflowError,
                "Trie result ID too large to store in 24 bits, set SEARCH_RESULT_ID_BYTES = 4 in your conf.py."
        ):
            trie.serialize(
                Serializer(file_offset_bytes=3,
                           result_id_bytes=3,
                           name_size_bytes=1))

        # This should work
        trie.serialize(
            Serializer(file_offset_bytes=3,
                       result_id_bytes=4,
                       name_size_bytes=1))
Beispiel #8
0
    def test_multiple(self):
        map = ResultMap()

        self.assertEqual(
            map.add("Math",
                    "namespaceMath.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.NAMESPACE)), 0)
        self.assertEqual(
            map.add("Math::Vector",
                    "classMath_1_1Vector.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.CLASS)), 1)
        self.assertEqual(
            map.add("Math::Range",
                    "classMath_1_1Range.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.CLASS)), 2)
        self.assertEqual(
            map.add("Math::min()",
                    "namespaceMath.html#abcdef2875",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.FUNC)), 3)
        self.assertEqual(
            map.add("Math::max(int, int)",
                    "namespaceMath.html#abcdef1234",
                    suffix_length=8,
                    flags=ResultFlag.from_type(
                        ResultFlag.DEPRECATED | ResultFlag.DELETED,
                        EntryType.FUNC)), 4)
        self.assertEqual(map.add("Rectangle", "", alias=2), 5)
        self.assertEqual(
            map.add("Rectangle::Rect()", "", suffix_length=2, alias=2), 6)

        for i in type_sizes:
            with self.subTest(**i):
                serialized = map.serialize(Serializer(**i))
                self.compare(
                    Deserializer(**i), serialized, """
0: Math [type=NAMESPACE] -> namespaceMath.html
1: ::Vector [prefix=0[:0], type=CLASS] -> classMath_1_1Vector.html
2: ::Range [prefix=0[:0], type=CLASS] -> classMath_1_1Range.html
3: ::min() [prefix=0[:18], type=FUNC] -> #abcdef2875
4: ::max(int, int) [prefix=0[:18], suffix_length=8, deprecated, deleted, type=FUNC] -> #abcdef1234
5: Rectangle [alias=2] ->
6: ::Rect() [alias=2, prefix=5[:0], suffix_length=2] ->
""")
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i[
                        'result_id_bytes'] == 2 and i['name_size_bytes'] == 1:
                    self.assertEqual(len(serialized), 202)
                elif i['file_offset_bytes'] == 4 and i[
                        'result_id_bytes'] == 4 and i['name_size_bytes'] == 2:
                    self.assertEqual(len(serialized), 231)
                else:
                    self.assertGreater(len(serialized), 202)
                    self.assertLess(len(serialized), 231)
Beispiel #9
0
    def test(self):
        trie = Trie()
        map = ResultMap()

        trie.insert(
            "math",
            map.add("Math",
                    "namespaceMath.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.NAMESPACE)))
        index = map.add("Math::Vector",
                        "classMath_1_1Vector.html",
                        flags=ResultFlag.from_type(ResultFlag.NONE,
                                                   EntryType.CLASS))
        trie.insert("math::vector", index)
        trie.insert("vector", index)
        index = map.add("Math::Range",
                        "classMath_1_1Range.html",
                        flags=ResultFlag.from_type(ResultFlag.NONE,
                                                   EntryType.CLASS))
        trie.insert("math::range", index)
        trie.insert("range", index)

        for i in type_sizes:
            with self.subTest(**i):
                serialized = serialize_search_data(Serializer(**i), trie, map,
                                                   search_type_map, 3)
                self.compare(
                    serialized, """
3 symbols
math [0]
|   ::vector [1]
|     range [2]
vector [1]
range [2]
0: Math [type=NAMESPACE] -> namespaceMath.html
1: ::Vector [prefix=0[:0], type=CLASS] -> classMath_1_1Vector.html
2: ::Range [prefix=0[:0], type=CLASS] -> classMath_1_1Range.html
(EntryType.PAGE, CssClass.SUCCESS, 'page'),
(EntryType.NAMESPACE, CssClass.PRIMARY, 'namespace'),
(EntryType.CLASS, CssClass.PRIMARY, 'class'),
(EntryType.FUNC, CssClass.INFO, 'func')
""")
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i[
                        'result_id_bytes'] == 2 and i['name_size_bytes'] == 1:
                    self.assertEqual(len(serialized), 282)
                elif i['file_offset_bytes'] == 4 and i[
                        'result_id_bytes'] == 4 and i['name_size_bytes'] == 2:
                    self.assertEqual(len(serialized), 317)
                else:
                    self.assertGreater(len(serialized), 282)
                    self.assertLess(len(serialized), 317)
Beispiel #10
0
    def test_24bit_alias_id_too_small(self):
        map = ResultMap()

        # The alias doesn't exist of course, hopefully that's fine in this case
        map.add("B", "", alias=16 * 1024 * 1024)

        with self.assertRaisesRegex(
                OverflowError,
                "Result map alias ID too large to store in 24 bits, set SEARCH_RESULT_ID_BYTES = 4 in your conf.py."
        ):
            map.serialize(
                Serializer(file_offset_bytes=3,
                           result_id_bytes=3,
                           name_size_bytes=1))

        # This should work
        map.serialize(
            Serializer(file_offset_bytes=3,
                       result_id_bytes=4,
                       name_size_bytes=1))
Beispiel #11
0
    def test_8bit_suffix_length_too_small(self):
        map = ResultMap()
        map.add("F()" + ';' * 256,
                "f.html",
                flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC),
                suffix_length=256)

        with self.assertRaisesRegex(
                OverflowError,
                "Result map suffix length too large to store in 8 bits, set SEARCH_NAME_SIZE_BYTES = 2 in your conf.py."
        ):
            map.serialize(
                Serializer(file_offset_bytes=3,
                           result_id_bytes=2,
                           name_size_bytes=1))

        # This should work
        map.serialize(
            Serializer(file_offset_bytes=3,
                       result_id_bytes=2,
                       name_size_bytes=2))
Beispiel #12
0
    def test_16bit_prefix_id_too_small(self):
        map = ResultMap()

        # Adding A0 to A65535 would be too slow due to the recursive Trie
        # population during prefix merging (SIGH) so trying this instead. It's
        # still hella slow, but at least not TWO MINUTES.
        for i in range(128):
            for j in range(128):
                for k in range(4):
                    map.add(bytes([i, j, k]).decode('utf-8'),
                            "a.html",
                            flags=ResultFlag.from_type(ResultFlag.NONE,
                                                       EntryType.CLASS))

        self.assertEqual(
            map.add("B",
                    "b.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.CLASS)), 65536)
        map.add("B::foo()",
                "b.html#foo",
                flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC))

        with self.assertRaisesRegex(
                OverflowError,
                "Result map prefix ID too large to store in 16 bits, set SEARCH_RESULT_ID_BYTES = 3 in your conf.py."
        ):
            map.serialize(
                Serializer(file_offset_bytes=3,
                           result_id_bytes=2,
                           name_size_bytes=1))

        # This should work
        map.serialize(
            Serializer(file_offset_bytes=3,
                       result_id_bytes=3,
                       name_size_bytes=1))
Beispiel #13
0
    def test_24bit_file_offset_too_small(self):
        map = ResultMap()
        # 3 bytes for the initial offset, 3 bytes for file size, 1 byte for the
        # flags, 1 byte for the null terminator, 6 bytes for the URL
        map.add('F' * (16 * 1024 * 1024 - 14),
                'f.html',
                flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS))

        with self.assertRaisesRegex(
                OverflowError,
                "Result map offset too large to store in 24 bits, set SEARCH_FILE_OFFSET_BYTES = 4 in your conf.py."
        ):
            # Disabling prefix merging otherwise memory usage goes to hell
            map.serialize(Serializer(file_offset_bytes=3,
                                     result_id_bytes=2,
                                     name_size_bytes=1),
                          merge_prefixes=False)

        # This should work. Disabling prefix merging otherwise memory usage
        # goes to hell.
        map.serialize(Serializer(file_offset_bytes=4,
                                 result_id_bytes=2,
                                 name_size_bytes=1),
                      merge_prefixes=False)
Beispiel #14
0
    def test_single(self):
        trie = Trie()
        trie.insert("magnum", 1337)
        trie.insert("magnum", 21)

        for i in trie_type_sizes:
            with self.subTest(**i):
                serialized = trie.serialize(Serializer(**i))
                self.compare(Deserializer(**i), serialized, """
magnum [1337, 21]
""")
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2:
                    self.assertEqual(len(serialized), 46)
                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4:
                    self.assertEqual(len(serialized), 56)
                else:
                    self.assertGreater(len(serialized), 46)
                    self.assertLess(len(serialized), 56)
from _search_test_metadata import EntryType, search_type_map, type_sizes
from _search import Trie, ResultMap, ResultFlag, serialize_search_data, Serializer

basedir = pathlib.Path(os.path.dirname(
    os.path.realpath(__file__))) / 'js-test-data'


def type_size_suffix(*, name_size_bytes, result_id_bytes, file_offset_bytes):
    return f'ns{name_size_bytes}-ri{result_id_bytes}-fo{file_offset_bytes}'


# Basic error handling

min_size = len(
    serialize_search_data(
        Serializer(name_size_bytes=1, result_id_bytes=2, file_offset_bytes=3),
        Trie(), ResultMap(), [], 0))

with open(basedir / 'short.bin', 'wb') as f:
    f.write(b'#' * (min_size - 1))
with open(basedir / 'wrong-magic.bin', 'wb') as f:
    f.write(b'MOS\2')
    f.write(b'\0' * (min_size - 4))
with open(basedir / 'wrong-version.bin', 'wb') as f:
    f.write(b'MCS\1')
    f.write(b'\0' * (min_size - 4))
with open(basedir / 'wrong-result-id-bytes.bin', 'wb') as f:
    f.write(Serializer.header_struct.pack(b'MCS', 2, 3 << 1, 0, 0, 0))
    f.write(b'\0' * (min_size - Serializer.header_struct.size))

# Empty file, in all possible type size combinations
Beispiel #16
0
    def test_multiple(self):
        trie = Trie()

        trie.insert("math", 0)
        trie.insert("math::vector", 1, lookahead_barriers=[4])
        trie.insert("vector", 1)
        trie.insert("math::range", 2)
        trie.insert("range", 2)

        trie.insert("math::min", 3)
        trie.insert("min", 3)
        trie.insert("math::max", 4)
        trie.insert("max", 4)
        trie.insert("math::minmax", 5)
        trie.insert("minmax", 5)

        trie.insert("math::vector::minmax", 6, lookahead_barriers=[4, 12])
        trie.insert("vector::minmax", 6, lookahead_barriers=[6])
        trie.insert("minmax", 6)
        trie.insert("math::vector::min", 7)
        trie.insert("vector::min", 7)
        trie.insert("min", 7)
        trie.insert("math::vector::max", 8)
        trie.insert("vector::max", 8)
        trie.insert("max", 8)

        trie.insert("math::range::min", 9, lookahead_barriers=[4, 11])
        trie.insert("range::min", 9, lookahead_barriers=[5])
        trie.insert("min", 9)

        trie.insert("math::range::max", 10)
        trie.insert("range::max", 10)
        trie.insert("max", 10)

        for i in trie_type_sizes:
            with self.subTest(**i):
                serialized = trie.serialize(Serializer(**i))
                self.compare(
                    Deserializer(**i), serialized, """
math [0]
||| :$
|||  :vector [1]
|||   |     :$
|||   |      :min [7]
|||   |        | max [6]
|||   |        ax [8]
|||   range [2]
|||   |    :$
|||   |     :min [9]
|||   |       ax [10]
|||   min [3]
|||   || max [5]
|||   |ax [4]
||x [4, 8, 10]
|in [3, 7, 9]
|| max [5, 6]
vector [1]
|     :$
|      :min [7]
|        | max [6]
|        ax [8]
range [2]
|    :$
|     :min [9]
|       ax [10]
""")
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i['result_id_bytes'] == 2:
                    self.assertEqual(len(serialized), 340)
                elif i['file_offset_bytes'] == 4 and i['result_id_bytes'] == 4:
                    self.assertEqual(len(serialized), 428)
                else:
                    self.assertGreater(len(serialized), 340)
                    self.assertLess(len(serialized), 428)