def test_whitespace(self): expected_idval = [(lexer.COLON, None), (lexer.STRING, 'a'), (lexer.INTEGER, 12345), (lexer.RARROW, None), (lexer.EQUAL, None), (lexer.ASTERISK, None), (lexer.NAME_OTHER, '_b')] # With minimal whitespace toks = list(lexer.lex(':"a"12345->=*_b')) self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval) # With spaces toks = list(lexer.lex(' : "a" 12345 -> = * _b ')) self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval) # With tabs toks = list(lexer.lex('\t:\t"a"\t12345\t->\t=\t*\t_b\t')) self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval) # With newlines toks = list(lexer.lex('\n:\n"a"\n12345\n->\n=\n*\n_b\n')) self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval) # With spaces, tabs, newlines and comments toks = list(lexer.lex('# comment\n' + ': # X\n' + ' "a" # "b"\t\n' + '\t12345\n\n' + '->\n' + '=\n' + '*\n' + '_b # comment\n' + ' \t # end')) self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval)
def test_whitespace(self): expected_idval = [(lexer.COLON, None), (lexer.STRING, 'a'), (lexer.INTEGER, 12345), (lexer.RARROW, None), (lexer.EQUAL, None), (lexer.ASTERISK, None), (lexer.NAME_OTHER, '_b')] # With minimal whitespace toks = list(lexer.lex(':"a"12345->=*_b')) self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval) # With spaces toks = list(lexer.lex(' : "a" 12345 -> = * _b ')) self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval) # With tabs toks = list(lexer.lex('\t:\t"a"\t12345\t->\t=\t*\t_b\t')) self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval) # With newlines toks = list(lexer.lex('\n:\n"a"\n12345\n->\n=\n*\n_b\n')) self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval) # With spaces, tabs, newlines and comments toks = list( lexer.lex('# comment\n' + ': # X\n' + ' "a" # "b"\t\n' + '\t12345\n\n' + '->\n' + '=\n' + '*\n' + '_b # comment\n' + ' \t # end')) self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval)
def check_failing_token(self, ds_str): # Creating the lexer will fail, because the error is # in the first token. self.assertRaises(datashape.DataShapeSyntaxError, list, lexer.lex(ds_str))
def check_isolated_token(self, ds_str, tname, val=None): # The token name should be a property in parser tid = getattr(lexer, tname) # Lexing should produce a single token matching the specification self.assertEqual(list(lexer.lex(ds_str)), [lexer.Token(tid, tname, (0, len(ds_str)), val)])