def _parse_dot_rhs(self, binding_power): # From the grammar: # expression '.' ( identifier / # multi-select-list / # multi-select-hash / # function-expression / # * # In terms of tokens that means that after a '.', # you can have: lookahead = self._current_token() # Common case "foo.bar", so first check for an identifier. if lookahead in ['quoted_identifier', 'unquoted_identifier', 'star']: return self._expression(binding_power) elif lookahead == 'lbracket': self._match('lbracket') return self._parse_multi_select_list() elif lookahead == 'lbrace': self._match('lbrace') return self._parse_multi_select_hash() else: t = self._lookahead_token(0) allowed = ['quoted_identifier', 'unquoted_identifier', 'lbracket', 'lbrace'] lex_position = t['start'] actual_value = t['value'] actual_type = t['type'] raise exceptions.ParseError( lex_position, actual_value, actual_type, "Expecting: %s, got: %s" % (allowed, actual_type))
def _error_nud_token(self, token): if token['type'] == 'eof': raise exceptions.IncompleteExpressionError(token['start'], token['value'], token['type']) raise exceptions.ParseError(token['start'], token['value'], token['type'], 'Invalid token.')
def _assert_not_token(self, *token_types): if self._current_token() in token_types: t = self._lookahead_token(0) lex_position = t['start'] actual_value = t['value'] actual_type = t['type'] raise exceptions.ParseError( lex_position, actual_value, actual_type, "Token %s not allowed to be: %s" % (actual_type, token_types))
def _parse(self, expression): self.tokenizer = lexer.Lexer().tokenize(expression) self._tokens = list(self.tokenizer) self._index = 0 parsed = self._expression(binding_power=0) if not self._current_token() == 'eof': t = self._lookahead_token(0) raise exceptions.ParseError(t['start'], t['value'], t['type'], "Unexpected token: %s" % t['value']) return ParsedResult(expression, parsed)
def _token_nud_quoted_identifier(self, token): field = ast.field(token['value']) # You can't have a quoted identifier as a function # name. if self._current_token() == 'lparen': t = self._lookahead_token(0) raise exceptions.ParseError( 0, t['value'], t['type'], 'Quoted identifier not allowed for function names.') return field
def _raise_parse_error_maybe_eof(self, expected_type, token): lex_position = token['start'] actual_value = token['value'] actual_type = token['type'] if actual_type == 'eof': raise exceptions.IncompleteExpressionError(lex_position, actual_value, actual_type) message = 'Expecting: %s, got: %s' % (expected_type, actual_type) raise exceptions.ParseError(lex_position, actual_value, actual_type, message)
def _match_multiple_tokens(self, token_types): if self._current_token() not in token_types: t = self._lookahead_token(0) lex_position = t['start'] actual_value = t['value'] actual_type = t['type'] if actual_type == 'eof': raise exceptions.IncompleteExpressionError( lex_position, actual_value, actual_type) else: message = 'Expecting: %s, got: %s' % (token_types, actual_type) raise exceptions.ParseError(lex_position, actual_value, actual_type, message) self._advance()
def _match(self, token_type=None): # inline'd self._current_token() if self._current_token() == token_type: # inline'd self._advance() self._advance() else: t = self._lookahead_token(0) lex_position = t['start'] actual_value = t['value'] actual_type = t['type'] if actual_type == 'eof': raise exceptions.IncompleteExpressionError( lex_position, actual_value, actual_type) else: message = 'Expecting: %s, got: %s' % (token_type, actual_type) raise exceptions.ParseError(lex_position, actual_value, actual_type, message)
def _token_led_lparen(self, left): if left['type'] != 'field': # 0 - first func arg or closing paren. # -1 - '(' token # -2 - invalid function "name". prev_t = self._lookahead_token(-2) raise exceptions.ParseError( prev_t['start'], prev_t['value'], prev_t['type'], "Invalid function name '%s'" % prev_t['value']) name = left['value'] args = [] while not self._current_token() == 'rparen': expression = self._expression() if self._current_token() == 'comma': self._match('comma') args.append(expression) self._match('rparen') function_node = ast.function_expression(name, args) return function_node
def _parse_projection_rhs(self, binding_power): # Parse the right hand side of the projection. if self.BINDING_POWER[self._current_token()] < 10: # BP of 10 are all the tokens that stop a projection. right = ast.identity() elif self._current_token() == 'lbracket': right = self._expression(binding_power) elif self._current_token() == 'filter': right = self._expression(binding_power) elif self._current_token() == 'dot': self._match('dot') right = self._parse_dot_rhs(binding_power) else: t = self._lookahead_token(0) lex_position = t['start'] actual_value = t['value'] actual_type = t['type'] raise exceptions.ParseError(lex_position, actual_value, actual_type, 'syntax error') return right
def _parse_slice_expression(self): # [start:end:step] # Where start, end, and step are optional. # The last colon is optional as well. parts = [None, None, None] index = 0 current_token = self._current_token() while not current_token == 'rbracket' and index < 3: if current_token == 'colon': index += 1 self._advance() elif current_token == 'number': parts[index] = self._lookahead_token(0)['value'] self._advance() else: t = self._lookahead_token(0) lex_position = t['start'] actual_value = t['value'] actual_type = t['type'] raise exceptions.ParseError(lex_position, actual_value, actual_type, 'syntax error') current_token = self._current_token() self._match('rbracket') return ast.slice(*parts)
def _error_led_token(self, token): raise exceptions.ParseError(token['start'], token['value'], token['type'], 'Invalid token')
def _raise_parse_error_for_token(self, token, reason): lex_position = token['start'] actual_value = token['value'] actual_type = token['type'] raise exceptions.ParseError(lex_position, actual_value, actual_type, reason)