Esempio n. 1
0
 def test_counting_scanner_state_has_right_result(self):
     scanner = pire.CountingScanner(
         pattern=pire.Lexer("[a-z]+").Parse(),
         sep=pire.Lexer(r"\s").Parse(),
     )
     text = "abc def, abc def ghi, abc"
     state = scanner.InitState().Begin().Run(text).End()
     assert 3 == state.Result(0)
Esempio n. 2
0
    def test_glued_counting_scanner_state_has_right_results(self):
        separator_fsm = pire.Lexer(".*").Parse()
        scanner1, scanner2 = [
            pire.CountingScanner(pire.Lexer(pattern).Parse(), separator_fsm)
            for pattern in ["[a-z]+", "[0-9]+"]
        ]
        glued = scanner1.GluedWith(scanner2)

        state = glued.InitState()
        state.Begin().Run("abc defg 123 jklmn 4567 opqrst").End()

        assert 4 == state.Result(0)
        assert 2 == state.Result(1)
Esempio n. 3
0
    def test_fsm_supports_fluent_inplace_operations(self, scanner_class,
                                                    parse_scanner):
        a = pire.Fsm().Append("a").AppendDot()

        b = pire.Fsm()
        b.Append("b")

        d = pire.Fsm().Append("d")
        d *= 3

        c = pire.Lexer("c").Parse()

        fsm = a.Iterate()
        fsm += b.AppendAnything()
        fsm |= d
        fsm &= c.PrependAnything().Complement()

        expected_scanner = parse_scanner("((a.)*(b.*)|(d{3}))&~(.*c)", "a")

        check_equivalence(expected_scanner, scanner_class(fsm), [
            "ddd",
            "dddc",
            "a-b--c",
            "a-a-b--",
            "bdddc",
            "bddd",
            "",
            "b",
            "bc",
            "c",
        ])
Esempio n. 4
0
 def test_lexer_glues_similar_glyphs(self):
     almost_regexp = u"rеgехр"  # 'е', 'х' and 'р' are cyrillic
     exactly_regexp = "regexp"  # all latin1
     for pattern in [almost_regexp, exactly_regexp]:
         scanner = pire.Lexer(
             pattern,
             pire.UTF8 | pire.GLUE_SIMILAR_GLYPHS,
         ).Parse().Compile()
         check_scanner(
             scanner,
             accepts=[exactly_regexp,
                      almost_regexp.encode("utf8")],
         )
Esempio n. 5
0
    def test_scanner_finds_prefixes_and_suffixes(self, scanner_class):
        fsm = pire.Lexer("-->").Parse()
        any_occurence = scanner_class(~pire.Fsm.MakeFalse() + fsm)
        first_occurence = scanner_class(~fsm.Surrounded() + fsm)
        reverse_occurence = scanner_class(fsm.Reverse())

        text = "1234567890 --> middle --> end"
        assert 14 == first_occurence.LongestPrefix(text)
        assert 11 == reverse_occurence.LongestSuffix(text[:14])

        assert 25 == any_occurence.LongestPrefix(text)
        assert 22 == reverse_occurence.LongestSuffix(text[:25])

        assert 14 == first_occurence.ShortestPrefix(text)
        assert 11 == reverse_occurence.ShortestSuffix(text[:14])

        assert 14 == any_occurence.ShortestPrefix(text)
        assert 11 == reverse_occurence.ShortestSuffix(text[:14])
Esempio n. 6
0
    def test_fsm_supports_nonmodifying_operations(self, scanner_class,
                                                  parse_scanner):
        a, b, c, d, e = [pire.Lexer(char).Parse() for char in "abcde"]

        expression = ((a + b.Iterated()) | c.Surrounded() | (2 * (d * 2))) & ~e
        expected_scanner = parse_scanner("((ab*)|(.*c.*)|(d{4}))&~e", "a")

        check_equivalence(expected_scanner, scanner_class(expression), [
            "a",
            "abbbb",
            "c",
            "--c-",
            "dddd",
            "--",
            "e",
            "-ee-",
            "",
        ])
Esempio n. 7
0
    def test_gluing_too_many_scanners_raises(self, scanner_class):
        if scanner_class in SCANNERS_WITHOUT_GLUE:
            return

        many_patterns = [
            '/product/', '/catalog/', '/?(\?.*)?$', '/.*/a', '/.*/b', '/.*/c',
            '/.*/d', '/.*/e', '/.*/f', '/.*/g', '/.*/1234567891011',
            '/.*/qwertyuiopasdfgh', '/.*/do_it_yourself/'
            '/.*/doityourself/'
        ]

        with pytest.raises(OverflowError):
            scanner = scanner_class()
            for pattern in many_patterns:
                new_scanner = (pire.Lexer("^" + pattern +
                                          ".*").Parse().Compile(scanner_class))
                scanner = scanner.GluedWith(new_scanner)
                assert not scanner.Empty()
            assert scanner.RegexpsCount() == len(many_patterns)
Esempio n. 8
0
    def test_capturing_trivial(self):
        """
        This is the "Trivial" test from tests/capture_ut.cpp.
        """
        lexer = pire.Lexer(r"""google_id\s*=\s*['"]([a-z0-9]+)['"]\s*;""")
        fsm = lexer.AddOptions(pire.I).AddCapturing(1).Parse()
        scanner = fsm.Surround().Compile(pire.CapturingScanner)

        text = "google_id = 'abcde';"
        captured = scanner.InitState().Begin().Run(text).End().Captured()
        assert captured
        assert "abcde" == text[captured[0] - 1:captured[1] - 1]

        text = "var google_id = 'abcde'; eval(google_id);"
        captured = scanner.InitState().Begin().Run(text).End().Captured()
        assert captured
        assert "abcde" == text[captured[0] - 1:captured[1] - 1]

        text = "google_id != 'abcde';"
        captured = scanner.InitState().Begin().Run(text).End().Captured()
        assert None is captured
Esempio n. 9
0
 def scanner_factory(pattern, options=""):
     lexer = pire.Lexer(pattern, options)
     fsm = lexer.Parse()
     return scanner_class(fsm)
Esempio n. 10
0
 def test_lexer_raises_on_parsing_invalid_regexp(self):
     lexer = pire.Lexer("[ab")
     with pytest.raises(ValueError):
         lexer.Parse()
Esempio n. 11
0
 def test_lexer_cannot_be_constructed_with_wrong_argument(self):
     with pytest.raises(TypeError):
         pire.Lexer(42)
Esempio n. 12
0
 def test_lexer_default_constructible(self):
     lexer = pire.Lexer()
     assert pire.Fsm == type(lexer.Parse())