def convert_id(self, ): if self.current_token == '': raise PathExprParsingError('empty ID at position {}'.format( self.pos)) token, self.current_token = self.current_token, '' return token
def convert_slice_element(self): try: ret = None if self.current_token == '' else int(self.current_token) self.current_token = '' return ret except ValueError: raise PathExprParsingError('invalid slice syntax: {!r} at position {}'.format(self.current_token, self.pos))
def create_slice_object(self): if len(self.current_slice_elements) == 0: if self.bare_id_matches_all: slc_obj = slice(None, None, None) else: slc_obj = 0 elif len(self.current_slice_elements) == 1: # This must be true or the parsing logic is wrong assert isinstance(self.current_slice_elements[0], int) if self.current_slice_elements[0] >= 0: slc_obj = self.current_slice_elements[0] else: slc_obj = slice(self.current_slice_elements[0], self.current_slice_elements[0] + 1 if self.current_slice_elements[0] != -1 else None, None) elif len(self.current_slice_elements) <= 3: # 2 or 3 slc_obj = slice(*self.current_slice_elements) else: raise PathExprParsingError('slice can have at most three indices') self.current_slice_elements = [] return slc_obj
def unexpected_char_error(c, idx): return PathExprParsingError('unexpected char: {!r} at position {}'.format( c, idx))
def parse(self, path_expr): path_expr_stripped = path_expr.strip() if path_expr_stripped == '': raise PathExprParsingError('Empty path expression') # A path expression should always start with one of the following chars # Otherwise fail fast. if path_expr_stripped[ 0] not in '@/>0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ': raise unexpected_char_error(path_expr_stripped[0], path_expr.find(path_expr_stripped[0])) self.reset() self.node_path = NodePath(path_expr) self.current_state = STATE_START_PARSING self.current_token = '' while self.pos < len(path_expr): c = path_expr[self.pos] if c in string.whitespace: pass # all whitespaces are ignored elif c == '@': # start of subset specifier if self.current_state == STATE_START_PARSING: self.current_state = STATE_START_SUBSET else: raise unexpected_char_error(c, self.pos) elif c == '[': self.handle_left_bracket() elif c in (':', ']'): self.handle_colon_and_right_bracket(c) elif c in (PATH_SEPARATOR_CHILD, PATH_SEPARATOR_ATTRIB, PATH_SEPARATOR_DESCEND): self.handle_separator(c) else: if self.current_state in (STATE_START_ID, STATE_START_SUBSET_SLICE_0, STATE_START_SUBSET_SLICE_X, STATE_START_SLICE_0, STATE_START_SLICE_X): self.current_token += c elif self.current_state == STATE_START_PARSING: self.handle_separator(PATH_SEPARATOR_DESCEND) self.current_token += c else: raise unexpected_char_error(c, self.pos) self.pos += 1 if self.current_state == STATE_START_ID: self.current_id = self.convert_id() self.add_new_path_component() elif self.current_state == STATE_STOP_SLICE: self.add_new_path_component() elif self.current_token != '': raise unexpected_char_error(self.current_token[0], self.pos - len(self.current_token)) return self.node_path