Exemple #1
0
 def __init__(self, matcher, tokens, alphabet, discard, 
               t_regexp=None, s_regexp=None):
     '''
     matcher is the head of the original matcher graph, which will be called
     with a tokenised stream. 
     
     tokens is the set of `Token` instances that define the lexer.
     
     alphabet is the alphabet for which the regexps are defined.
     
     discard is the regular expression for spaces (which are silently
     dropped if not token can be matcher).
     
     t_regexp and s_regexp are internally compiled state, used in cloning,
     and should not be provided by non-cloning callers.
     '''
     super(Lexer, self).__init__(TOKENS, TokenNamespace)
     if t_regexp is None:
         unique = {}
         for token in tokens:
             token.compile(alphabet)
             self._debug(fmt('Token: {0}', token))
             # this just reduces the work for the regexp compiler
             unique[token.id_] = token
         t_regexp = Compiler.multiple(alphabet, 
                         [(t.id_, t.regexp) 
                          for t in unique.values() if t.regexp is not None]).dfa()
     if s_regexp is None and discard is not None:
         s_regexp = Compiler.single(alphabet, discard).dfa()
     self._arg(matcher=matcher)
     self._arg(tokens=tokens)
     self._arg(alphabet=alphabet)
     self._arg(discard=discard)
     self._karg(t_regexp=t_regexp)
     self._karg(s_regexp=s_regexp)
Exemple #2
0
def binary_parser(*regexps):
    '''
    Parse a set of binary regular expressions, returning the associated Regexp.
    '''
    return Compiler.multiple(BINARY,
                [Labelled(BINARY, label, *__compiled_binary_parser(text))
                 for (label, text) in regexps])
Exemple #3
0
 def test_match_7(self):
     alphabet = LineAwareAlphabet(UnicodeAlphabet.instance(), 
                                  make_str_parser)
     expr = Compiler.single(alphabet, '[^(*SOL)(*EOL)a]*').dfa()
     result = list(expr.match([str('1'), EOL]))
     assert result == [[str('label')], [str('1')], [EOL]], \
         result
Exemple #4
0
    def do_test(self, pattern, target, dfa_result, nfa_result, parser_factory):
        alphabet = LineAwareAlphabet(UnicodeAlphabet.instance(), parser_factory)
        compiler = Compiler.single(alphabet, pattern)
        str(compiler.expression)
#        text = str(compiler.expression)
#        assert text == format('(?P<label>{0!s})', pattern), text
        
        factory = LineAwareStreamFactory(alphabet)
        target = factory.from_string(target)
        
        dfa = compiler.dfa()
        result = dfa.match(target)
        if result:
            (a, b, c) = result
            (p, q, r) = dfa_result
            assert a == p, result
            assert b == q, result
            assert_str(repr(c), r)
        else:
            assert dfa_result == None, dfa_result

        nfa = compiler.nfa()
        result = list(nfa.match(target))
        assert len(result) == len(nfa_result), result
        for ((a,b,c), (p,q,r)) in zip(result, nfa_result):
            assert a == p, result
            assert b == q, result
            assert_str(repr(c), r)
Exemple #5
0
    def do_test(self, pattern, target, dfa_result, nfa_result, parser_factory):
        alphabet = LineAwareAlphabet(UnicodeAlphabet.instance(),
                                     parser_factory)
        compiler = Compiler.single(alphabet, pattern)
        str(compiler.expression)
        #        text = str(compiler.expression)
        #        assert text == format('(?P<label>{0!s})', pattern), text

        factory = LineAwareStreamFactory(alphabet)
        target = factory.from_string(target)

        dfa = compiler.dfa()
        result = dfa.match(target)
        if result:
            (a, b, c) = result
            (p, q, r) = dfa_result
            assert a == p, result
            assert b == q, result
            assert_str(repr(c), r)
        else:
            assert dfa_result == None, dfa_result

        nfa = compiler.nfa()
        result = list(nfa.match(target))
        assert len(result) == len(nfa_result), result
        for ((a, b, c), (p, q, r)) in zip(result, nfa_result):
            assert a == p, result
            assert b == q, result
            assert_str(repr(c), r)
Exemple #6
0
 def test_match_7(self):
     alphabet = LineAwareAlphabet(UnicodeAlphabet.instance(),
                                  make_str_parser)
     expr = Compiler.single(alphabet, '[^(*SOL)(*EOL)a]*').dfa()
     result = list(expr.match([str('1'), EOL]))
     assert result == [[str('label')], [str('1')], [EOL]], \
         result
Exemple #7
0
 def _compile(self):
     '''
     Compile the matcher.
     '''
     if self.__cached_matcher is None:
         self.__cached_matcher = \
                 Compiler.single(self.alphabet, self.regexp).nfa().match
     return self.__cached_matcher
Exemple #8
0
 def _compile(self):
     '''
     Compile the matcher.
     '''
     if self.__cached_matcher is None:
         self.__cached_matcher = \
                 Compiler.single(self.alphabet, self.regexp).nfa().match
     return self.__cached_matcher
Exemple #9
0
 def do_test(self, pattern, target, dfa_result, nfa_result):
     alphabet = UnicodeAlphabet.instance()
     compiler = Compiler.single(alphabet, pattern)
     text = str(compiler.expression)
     assert text == format('(?P<label>{0!s})', pattern), text
     nfa = compiler.nfa()
     result = list(nfa.match(target))
     assert result == nfa_result, result
     dfa = compiler.dfa()
     result = dfa.match(target)
     assert result == dfa_result, result
Exemple #10
0
 def do_test(self, pattern, target, dfa_result, nfa_result):
     alphabet = UnicodeAlphabet.instance()
     compiler = Compiler.single(alphabet, pattern)
     text = str(compiler.expression)
     assert text == fmt('(?P<label>{0!s})', pattern), text
     nfa = compiler.nfa()
     result = list(nfa.match((0, StringHelper(target))))
     assert result == nfa_result, result
     dfa = compiler.dfa()
     result = dfa.match((0, StringHelper(target)))
     assert result == dfa_result, result
Exemple #11
0
 def __init__(self,
              matcher,
              tokens,
              alphabet,
              discard,
              t_regexp=None,
              s_regexp=None):
     '''
     matcher is the head of the original matcher graph, which will be called
     with a tokenised stream. 
     
     tokens is the set of `Token` instances that define the lexer.
     
     alphabet is the alphabet for which the regexps are defined.
     
     discard is the regular expression for spaces (which are silently
     dropped if not token can be matcher).
     
     t_regexp and s_regexp are internally compiled state, used in cloning,
     and should not be provided by non-cloning callers.
     '''
     super(Lexer, self).__init__(TOKENS, TokenNamespace)
     if t_regexp is None:
         unique = {}
         for token in tokens:
             token.compile(alphabet)
             self._debug(fmt('Token: {0}', token))
             # this just reduces the work for the regexp compiler
             unique[token.id_] = token
         t_regexp = Compiler.multiple(
             alphabet,
             [(t.id_, t.regexp)
              for t in unique.values() if t.regexp is not None]).dfa()
     if s_regexp is None and discard is not None:
         s_regexp = Compiler.single(alphabet, discard).dfa()
     self._arg(matcher=matcher)
     self._arg(tokens=tokens)
     self._arg(alphabet=alphabet)
     self._arg(discard=discard)
     self._karg(t_regexp=t_regexp)
     self._karg(s_regexp=s_regexp)
Exemple #12
0
def binary_single_parser(label, text):
    '''
    Parse a binary regular expression, returning the associated Regexp.
    '''
    return Compiler.single(BINARY,
                Labelled(BINARY, label, *__compiled_binary_parser(text)))
Exemple #13
0
def _test_parser(regexp):
    return Compiler.single(UNICODE, regexp)
Exemple #14
0
 def test_match_2(self):
     alphabet = LineAwareAlphabet(UnicodeAlphabet.instance(), 
                                  make_str_parser)
     expr = Compiler.single(alphabet, '[^a]').nfa()
     result = list(expr.match(str('123a')))
     assert result == [(str('label'), str('1'), str('23a'))], result
Exemple #15
0
 def test_match_5(self):
     alphabet = LineAwareAlphabet(UnicodeAlphabet.instance(), 
                                  make_str_parser)
     expr = Compiler.single(alphabet, '[^a]*').dfa()
     result = list(expr.match([SOL, str('1'), str('a')]))
     assert result == [[str('label')], [SOL, str('1')], [str('a')]], result
Exemple #16
0
 def test_match_3(self):
     alphabet = LineAwareAlphabet(UnicodeAlphabet.instance(),
                                  make_str_parser)
     expr = Compiler.single(alphabet, '[^a]*').dfa()
     result = list(expr.match(str('123a')))
     assert result == [[str('label')], str('123'), str('a')], result