def testTrivialSyntaxDefinition(self): syntaxDef = [(epsilon_nfa.symbol("a").toDFA(), token.Token), (epsilon_nfa.symbol(scanner.EOFSymbol).toDFA(), None)] lex = lexer.LongestMatchLexer(syntaxDef) (tokenStream, issues) = lex([("a", 0), (scanner.EOFSymbol, 1)]) self.assertEqual(len(tokenStream), 3) self.assertFalse(issues) self.assertEqual(tokenStream[0].__class__, token.BOF) self.assertEqual(tokenStream[0].startIndex(), -1) self.assertEqual(tokenStream[0].endIndex(), -1) self.assertEqual(tokenStream[1].__class__, token.Token) self.assertEqual(tokenStream[1].startIndex(), 0) self.assertEqual(tokenStream[1].endIndex(), 1) self.assertEqual(tokenStream[2].__class__, token.EOF) self.assertEqual(tokenStream[2].startIndex(), -1) self.assertEqual(tokenStream[2].endIndex(), -1) (tokenStream, issues) = lex([("b", 0), (scanner.EOFSymbol, 1)]) self.assertEqual(tokenStream, None) self.assertEqual(len(issues), 1) self.assertEqual(issues[0].startIndex(), 0) self.assertEqual(issues[0].endIndex(), 0)
def _stringLiteral(): print "Creating string literal DFA..." # XXX TEST untested. # XXX TODO support escaping of the " character. strLit = symbol('"') &\ kleene(reduce(alt, map(symbol, graphic_chars - set(['"'])))) &\ symbol('"') return ((strLit.toDFA(), tokens.StringLit),)
def _identifier(): print "Creating identifier DFA..." # XXX TEST untested. ident = _letter() & \ kleene((symbol("_") | epsilon()) & \ (_letter() | _digit())) return ((ident.toDFA(), tokens.Identifier),)