Example #1
0
    def test_single(self):
        map = ResultMap()

        self.assertEqual(
            map.add("Magnum",
                    "namespaceMagnum.html",
                    suffix_length=11,
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.NAMESPACE)), 0)

        for i in type_sizes:
            with self.subTest(**i):
                serialized = map.serialize(Serializer(**i))
                self.compare(
                    Deserializer(**i), serialized, """
0: Magnum [suffix_length=11, type=NAMESPACE] -> namespaceMagnum.html
""")
                # Verify just the smallest and largest size, everything else
                # should fit in between. The `result_id_bytes` don't affect
                # this case.
                if i['file_offset_bytes'] == 3 and i['name_size_bytes'] == 1:
                    self.assertEqual(len(serialized), 35)
                elif i['file_offset_bytes'] == 4 and i['name_size_bytes'] == 2:
                    self.assertEqual(len(serialized), 38)
                else:
                    self.assertGreater(len(serialized), 35)
                    self.assertLess(len(serialized), 38)
Example #2
0
    def test_empty(self):
        map = ResultMap()

        for i in type_sizes:
            with self.subTest(**i):
                serialized = map.serialize(Serializer(**i))
                self.compare(Deserializer(**i), serialized, "")
                self.assertEqual(len(serialized), i['file_offset_bytes'])
Example #3
0
    def test_multiple(self):
        map = ResultMap()

        self.assertEqual(
            map.add("Math",
                    "namespaceMath.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.NAMESPACE)), 0)
        self.assertEqual(
            map.add("Math::Vector",
                    "classMath_1_1Vector.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.CLASS)), 1)
        self.assertEqual(
            map.add("Math::Range",
                    "classMath_1_1Range.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.CLASS)), 2)
        self.assertEqual(
            map.add("Math::min()",
                    "namespaceMath.html#abcdef2875",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.FUNC)), 3)
        self.assertEqual(
            map.add("Math::max(int, int)",
                    "namespaceMath.html#abcdef1234",
                    suffix_length=8,
                    flags=ResultFlag.from_type(
                        ResultFlag.DEPRECATED | ResultFlag.DELETED,
                        EntryType.FUNC)), 4)
        self.assertEqual(map.add("Rectangle", "", alias=2), 5)
        self.assertEqual(
            map.add("Rectangle::Rect()", "", suffix_length=2, alias=2), 6)

        for i in type_sizes:
            with self.subTest(**i):
                serialized = map.serialize(Serializer(**i))
                self.compare(
                    Deserializer(**i), serialized, """
0: Math [type=NAMESPACE] -> namespaceMath.html
1: ::Vector [prefix=0[:0], type=CLASS] -> classMath_1_1Vector.html
2: ::Range [prefix=0[:0], type=CLASS] -> classMath_1_1Range.html
3: ::min() [prefix=0[:18], type=FUNC] -> #abcdef2875
4: ::max(int, int) [prefix=0[:18], suffix_length=8, deprecated, deleted, type=FUNC] -> #abcdef1234
5: Rectangle [alias=2] ->
6: ::Rect() [alias=2, prefix=5[:0], suffix_length=2] ->
""")
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i[
                        'result_id_bytes'] == 2 and i['name_size_bytes'] == 1:
                    self.assertEqual(len(serialized), 202)
                elif i['file_offset_bytes'] == 4 and i[
                        'result_id_bytes'] == 4 and i['name_size_bytes'] == 2:
                    self.assertEqual(len(serialized), 231)
                else:
                    self.assertGreater(len(serialized), 202)
                    self.assertLess(len(serialized), 231)
Example #4
0
    def test(self):
        trie = Trie()
        map = ResultMap()

        trie.insert(
            "math",
            map.add("Math",
                    "namespaceMath.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.NAMESPACE)))
        index = map.add("Math::Vector",
                        "classMath_1_1Vector.html",
                        flags=ResultFlag.from_type(ResultFlag.NONE,
                                                   EntryType.CLASS))
        trie.insert("math::vector", index)
        trie.insert("vector", index)
        index = map.add("Math::Range",
                        "classMath_1_1Range.html",
                        flags=ResultFlag.from_type(ResultFlag.NONE,
                                                   EntryType.CLASS))
        trie.insert("math::range", index)
        trie.insert("range", index)

        for i in type_sizes:
            with self.subTest(**i):
                serialized = serialize_search_data(Serializer(**i), trie, map,
                                                   search_type_map, 3)
                self.compare(
                    serialized, """
3 symbols
math [0]
|   ::vector [1]
|     range [2]
vector [1]
range [2]
0: Math [type=NAMESPACE] -> namespaceMath.html
1: ::Vector [prefix=0[:0], type=CLASS] -> classMath_1_1Vector.html
2: ::Range [prefix=0[:0], type=CLASS] -> classMath_1_1Range.html
(EntryType.PAGE, CssClass.SUCCESS, 'page'),
(EntryType.NAMESPACE, CssClass.PRIMARY, 'namespace'),
(EntryType.CLASS, CssClass.PRIMARY, 'class'),
(EntryType.FUNC, CssClass.INFO, 'func')
""")
                # Verify just the smallest and largest size, everything else
                # should fit in between
                if i['file_offset_bytes'] == 3 and i[
                        'result_id_bytes'] == 2 and i['name_size_bytes'] == 1:
                    self.assertEqual(len(serialized), 282)
                elif i['file_offset_bytes'] == 4 and i[
                        'result_id_bytes'] == 4 and i['name_size_bytes'] == 2:
                    self.assertEqual(len(serialized), 317)
                else:
                    self.assertGreater(len(serialized), 282)
                    self.assertLess(len(serialized), 317)
Example #5
0
    def test_single(self):
        map = ResultMap()
        self.assertEqual(
            map.add("Magnum",
                    "namespaceMagnum.html",
                    suffix_length=11,
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.NAMESPACE)), 0)

        serialized = map.serialize()
        self.compare(
            serialized, """
0: Magnum [suffix_length=11, type=NAMESPACE] -> namespaceMagnum.html
""")
        self.assertEqual(len(serialized), 36)
Example #6
0
    def test_multiple(self):
        map = ResultMap()

        self.assertEqual(
            map.add("Math",
                    "namespaceMath.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.NAMESPACE)), 0)
        self.assertEqual(
            map.add("Math::Vector",
                    "classMath_1_1Vector.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.CLASS)), 1)
        self.assertEqual(
            map.add("Math::Range",
                    "classMath_1_1Range.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.CLASS)), 2)
        self.assertEqual(
            map.add("Math::min()",
                    "namespaceMath.html#abcdef2875",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.FUNC)), 3)
        self.assertEqual(
            map.add("Math::max(int, int)",
                    "namespaceMath.html#abcdef1234",
                    suffix_length=8,
                    flags=ResultFlag.from_type(
                        ResultFlag.DEPRECATED | ResultFlag.DELETED,
                        EntryType.FUNC)), 4)
        self.assertEqual(map.add("Rectangle", "", alias=2), 5)
        self.assertEqual(
            map.add("Rectangle::Rect()", "", suffix_length=2, alias=2), 6)

        serialized = map.serialize()
        self.compare(
            serialized, """
0: Math [type=NAMESPACE] -> namespaceMath.html
1: ::Vector [prefix=0[:0], type=CLASS] -> classMath_1_1Vector.html
2: ::Range [prefix=0[:0], type=CLASS] -> classMath_1_1Range.html
3: ::min() [prefix=0[:18], type=FUNC] -> #abcdef2875
4: ::max(int, int) [prefix=0[:18], suffix_length=8, deprecated, deleted, type=FUNC] -> #abcdef1234
5: Rectangle [alias=2] ->
6: ::Rect() [alias=2, prefix=5[:0], suffix_length=2] ->
""")
        self.assertEqual(len(serialized), 203)
Example #7
0
    def test(self):
        trie = Trie()
        map = ResultMap()

        trie.insert(
            "math",
            map.add("Math",
                    "namespaceMath.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.NAMESPACE)))
        index = map.add("Math::Vector",
                        "classMath_1_1Vector.html",
                        flags=ResultFlag.from_type(ResultFlag.NONE,
                                                   EntryType.CLASS))
        trie.insert("math::vector", index)
        trie.insert("vector", index)
        index = map.add("Math::Range",
                        "classMath_1_1Range.html",
                        flags=ResultFlag.from_type(ResultFlag.NONE,
                                                   EntryType.CLASS))
        trie.insert("math::range", index)
        trie.insert("range", index)

        serialized = serialize_search_data(trie, map, search_type_map, 3)
        self.compare(
            serialized, """
3 symbols
math [0]
|   ::vector [1]
|     range [2]
vector [1]
range [2]
0: Math [type=NAMESPACE] -> namespaceMath.html
1: ::Vector [prefix=0[:0], type=CLASS] -> classMath_1_1Vector.html
2: ::Range [prefix=0[:0], type=CLASS] -> classMath_1_1Range.html
(EntryType.PAGE, CssClass.SUCCESS, 'page'),
(EntryType.NAMESPACE, CssClass.PRIMARY, 'namespace'),
(EntryType.CLASS, CssClass.PRIMARY, 'class'),
(EntryType.FUNC, CssClass.INFO, 'func')
""")
        self.assertEqual(len(serialized), 277)
Example #8
0
    def test_8bit_prefix_length_too_small(self):
        map = ResultMap()
        map.add("A",
                'a' * 251 + ".html",
                flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS))
        map.add("A::foo()",
                'a' * 251 + ".html#foo",
                flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC))

        with self.assertRaisesRegex(
                OverflowError,
                "Result map prefix length too large to store in 8 bits, set SEARCH_NAME_SIZE_BYTES = 2 in your conf.py."
        ):
            map.serialize(
                Serializer(file_offset_bytes=3,
                           result_id_bytes=2,
                           name_size_bytes=1))

        # This should work
        map.serialize(
            Serializer(file_offset_bytes=3,
                       result_id_bytes=2,
                       name_size_bytes=2))
Example #9
0
    def test_24bit_alias_id_too_small(self):
        map = ResultMap()

        # The alias doesn't exist of course, hopefully that's fine in this case
        map.add("B", "", alias=16 * 1024 * 1024)

        with self.assertRaisesRegex(
                OverflowError,
                "Result map alias ID too large to store in 24 bits, set SEARCH_RESULT_ID_BYTES = 4 in your conf.py."
        ):
            map.serialize(
                Serializer(file_offset_bytes=3,
                           result_id_bytes=3,
                           name_size_bytes=1))

        # This should work
        map.serialize(
            Serializer(file_offset_bytes=3,
                       result_id_bytes=4,
                       name_size_bytes=1))
Example #10
0
    def test_24bit_file_offset_too_small(self):
        map = ResultMap()
        # 3 bytes for the initial offset, 3 bytes for file size, 1 byte for the
        # flags, 1 byte for the null terminator, 6 bytes for the URL
        map.add('F' * (16 * 1024 * 1024 - 14),
                'f.html',
                flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.CLASS))

        with self.assertRaisesRegex(
                OverflowError,
                "Result map offset too large to store in 24 bits, set SEARCH_FILE_OFFSET_BYTES = 4 in your conf.py."
        ):
            # Disabling prefix merging otherwise memory usage goes to hell
            map.serialize(Serializer(file_offset_bytes=3,
                                     result_id_bytes=2,
                                     name_size_bytes=1),
                          merge_prefixes=False)

        # This should work. Disabling prefix merging otherwise memory usage
        # goes to hell.
        map.serialize(Serializer(file_offset_bytes=4,
                                 result_id_bytes=2,
                                 name_size_bytes=1),
                      merge_prefixes=False)
from _search import Trie, ResultMap, ResultFlag, serialize_search_data, Serializer

basedir = pathlib.Path(os.path.dirname(
    os.path.realpath(__file__))) / 'js-test-data'


def type_size_suffix(*, name_size_bytes, result_id_bytes, file_offset_bytes):
    return f'ns{name_size_bytes}-ri{result_id_bytes}-fo{file_offset_bytes}'


# Basic error handling

min_size = len(
    serialize_search_data(
        Serializer(name_size_bytes=1, result_id_bytes=2, file_offset_bytes=3),
        Trie(), ResultMap(), [], 0))

with open(basedir / 'short.bin', 'wb') as f:
    f.write(b'#' * (min_size - 1))
with open(basedir / 'wrong-magic.bin', 'wb') as f:
    f.write(b'MOS\2')
    f.write(b'\0' * (min_size - 4))
with open(basedir / 'wrong-version.bin', 'wb') as f:
    f.write(b'MCS\1')
    f.write(b'\0' * (min_size - 4))
with open(basedir / 'wrong-result-id-bytes.bin', 'wb') as f:
    f.write(Serializer.header_struct.pack(b'MCS', 2, 3 << 1, 0, 0, 0))
    f.write(b'\0' * (min_size - Serializer.header_struct.size))

# Empty file, in all possible type size combinations
Example #12
0
    def test_empty(self):
        map = ResultMap()

        serialized = map.serialize()
        self.compare(serialized, "")
        self.assertEqual(len(serialized), 4)
Example #13
0
    def test_16bit_prefix_id_too_small(self):
        map = ResultMap()

        # Adding A0 to A65535 would be too slow due to the recursive Trie
        # population during prefix merging (SIGH) so trying this instead. It's
        # still hella slow, but at least not TWO MINUTES.
        for i in range(128):
            for j in range(128):
                for k in range(4):
                    map.add(bytes([i, j, k]).decode('utf-8'),
                            "a.html",
                            flags=ResultFlag.from_type(ResultFlag.NONE,
                                                       EntryType.CLASS))

        self.assertEqual(
            map.add("B",
                    "b.html",
                    flags=ResultFlag.from_type(ResultFlag.NONE,
                                               EntryType.CLASS)), 65536)
        map.add("B::foo()",
                "b.html#foo",
                flags=ResultFlag.from_type(ResultFlag.NONE, EntryType.FUNC))

        with self.assertRaisesRegex(
                OverflowError,
                "Result map prefix ID too large to store in 16 bits, set SEARCH_RESULT_ID_BYTES = 3 in your conf.py."
        ):
            map.serialize(
                Serializer(file_offset_bytes=3,
                           result_id_bytes=2,
                           name_size_bytes=1))

        # This should work
        map.serialize(
            Serializer(file_offset_bytes=3,
                       result_id_bytes=3,
                       name_size_bytes=1))