def parse_value(lexer, symbol=None): try: if symbol is None: symbol = next(lexer) if symbol == 'null': yield ('null', None) elif symbol == 'true': yield ('boolean', True) elif symbol == 'false': yield ('boolean', False) elif symbol == '[': for event in parse_array(lexer): yield event elif symbol == '{': for event in parse_object(lexer): yield event elif symbol[0] == '"': yield ('string', ''.join(unescape(symbol[1:-1]))) else: try: number = Decimal(symbol) if '.' in symbol else int(symbol) yield ('number', number) except ValueError: raise UnexpectedSymbol(symbol, lexer) except StopIteration: raise common.IncompleteJSONError()
def stringlexem(self): start = self.pos + 1 while True: try: end = self.buffer.index('"', start) escpos = end - 1 while self.buffer[escpos] == '\\': escpos -= 1 if (end - escpos) % 2 == 0: start = end + 1 else: result = self.buffer[self.pos:end + 1] self.pos = end + 1 return result except ValueError: old_len = len(self.buffer) data = self.f.read(BUFSIZE) try: self.buffer += data.decode('utf-8') except UnicodeDecodeError: while 42: try: data += self.f.read(1) self.buffer += data.decode('utf-8') break except UnicodeDecodeError: pass if len(self.buffer) == old_len: raise common.IncompleteJSONError()
def parse_object(lexer): yield ('start_map', None) try: pos, symbol = next(lexer) if symbol != '}': while True: if symbol[0] != '"': raise UnexpectedSymbol(symbol, pos) if DOTRANSLATE: yield ('map_key', unescape(symbol[1:-1])) else: yield ('map_key', symbol[1:-1]) pos, symbol = next(lexer) if symbol != ':': raise UnexpectedSymbol(symbol, pos) for event in parse_value(lexer, None, pos): yield event pos, symbol = next(lexer) if symbol == '}': break if symbol != ',': raise UnexpectedSymbol(symbol, pos) pos, symbol = next(lexer) yield ('end_map', None) except StopIteration: raise common.IncompleteJSONError('Incomplete JSON data')
def parse_value(lexer, symbol=None, pos=0): try: if symbol is None: pos, symbol = next(lexer) if symbol == 'null': yield ('null', None) elif symbol == 'true': yield ('boolean', True) elif symbol == 'false': yield ('boolean', False) elif symbol == '[': for event in parse_array(lexer): yield event elif symbol == '{': for event in parse_object(lexer): yield event elif symbol[0] == '"': yield ('string', parse_string(symbol)) else: try: yield ('number', common.number(symbol)) except: raise UnexpectedSymbol(symbol, pos) except StopIteration: raise common.IncompleteJSONError('Incomplete JSON data')
def basic_parse(f, allow_comments=False, buf_size=64 * 1024, multiple_values=False): ''' Iterator yielding unprefixed events. Parameters: - f: a readable file-like object with JSON input - allow_comments: tells parser to allow comments in JSON input - buf_size: a size of an input buffer - multiple_values: allows the parser to parse multiple JSON objects ''' events = [] def callback(event, func_type, func): def c_callback(context, *args): events.append((event, func(*args))) return 1 return func_type(c_callback) callbacks = Callbacks(*[callback(*data) for data in _callback_data]) handle = yajl.yajl_alloc(byref(callbacks), None, None) if allow_comments: yajl.yajl_config(handle, YAJL_ALLOW_COMMENTS, 1) if multiple_values: yajl.yajl_config(handle, YAJL_MULTIPLE_VALUES, 1) try: while True: buffer = f.read(buf_size) if buffer: result = yajl.yajl_parse(handle, buffer, len(buffer)) else: result = yajl.yajl_complete_parse(handle) if result == YAJL_ERROR: perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) error = cast(perror, c_char_p).value yajl.yajl_free_error(handle, perror) raise common.JSONError(error) if not buffer and not events: if result == YAJL_INSUFFICIENT_DATA: raise common.IncompleteJSONError() break for event in events: yield event events = [] finally: yajl.yajl_free(handle)
def Lexer(f, buf_size=BUFSIZE): if type(f.read(0)) == bytetype: f = getreader('utf-8')(f) buf = f.read(buf_size) pos = 0 discarded = 0 while True: match = LEXEME_RE.search(buf, pos) if match: lexeme = match.group() if lexeme == '"': pos = match.start() start = pos + 1 while True: try: end = buf.index('"', start) escpos = end - 1 while buf[escpos] == '\\': escpos -= 1 if (end - escpos) % 2 == 0: start = end + 1 else: break except ValueError: data = f.read(buf_size) if not data: raise common.IncompleteJSONError( 'Incomplete string lexeme') buf += data yield discarded + pos, buf[pos:end + 1] pos = end + 1 else: while match.end() == len(buf): data = f.read(buf_size) if not data: break buf += data match = LEXEME_RE.search(buf, pos) lexeme = match.group() yield discarded + match.start(), lexeme pos = match.end() else: data = f.read(buf_size) if not data: break discarded += len(buf) buf = data pos = 0
def basic_parse(f, allow_comments=False, check_utf8=False, buf_size=64 * 1024): ''' Iterator yielding unprefixed events. Parameters: - f: a readable file-like object with JSON input - allow_comments: tells parser to allow comments in JSON input - check_utf8: if True, parser will cause an error if input is invalid utf-8 - buf_size: a size of an input buffer ''' f = compat.bytes_reader(f) events = [] def callback(event, func_type, func): def c_callback(context, *args): events.append((event, func(*args))) return 1 return func_type(c_callback) callbacks = Callbacks(*[callback(*data) for data in _callback_data]) config = Config(allow_comments, check_utf8) handle = yajl.yajl_alloc(byref(callbacks), byref(config), None, None) try: while True: buffer = f.read(buf_size) if buffer: result = yajl.yajl_parse(handle, buffer, len(buffer)) else: result = yajl.yajl_parse_complete(handle) if result == YAJL_ERROR: perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) error = cast(perror, c_char_p).value yajl.yajl_free_error(handle, perror) exception = common.IncompleteJSONError if result == YAJL_INSUFFICIENT_DATA else common.JSONError raise common.JSONError(error) if not buffer and not events: if result == YAJL_INSUFFICIENT_DATA: raise common.IncompleteJSONError('Incomplete JSON data') break for event in events: yield event events = [] finally: yajl.yajl_free(handle)
def parse_array(lexer): yield ('start_array', None) try: pos, symbol = next(lexer) if symbol != ']': while True: for event in parse_value(lexer, symbol, pos): yield event pos, symbol = next(lexer) if symbol == ']': break if symbol != ',': raise UnexpectedSymbol(symbol, pos) pos, symbol = next(lexer) yield ('end_array', None) except StopIteration: raise common.IncompleteJSONError('Incomplete JSON data')
def basic_parse_basecoro(target, allow_comments=False, check_utf8=False): ''' Iterator yielding unprefixed events. Parameters: - f: a readable file-like object with JSON input - allow_comments: tells parser to allow comments in JSON input - check_utf8: if True, parser will cause an error if input is invalid utf-8 - buf_size: a size of an input buffer ''' send = target.send def callback(event, func_type, func): def c_callback(context, *args): send((event, func(*args))) return 1 return func_type(c_callback) callbacks = Callbacks(*[callback(*data) for data in _callback_data]) config = Config(allow_comments, check_utf8) handle = yajl.yajl_alloc(byref(callbacks), byref(config), None, None) try: while True: try: buffer = (yield) except GeneratorExit: buffer = b'' if buffer: result = yajl.yajl_parse(handle, buffer, len(buffer)) else: result = yajl.yajl_parse_complete(handle) if result == YAJL_ERROR: perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) error = cast(perror, c_char_p).value yajl.yajl_free_error(handle, perror) raise common.JSONError(error.decode('utf-8')) elif not buffer: if result == YAJL_INSUFFICIENT_DATA: raise common.IncompleteJSONError('Incomplete JSON data') break finally: yajl.yajl_free(handle)
def stringlexem(self): start = self.pos + 1 while True: try: end = self.buffer.index('"', start) escpos = end - 1 while self.buffer[escpos] == '\\': escpos -= 1 if (end - escpos) % 2 == 0: start = end + 1 else: result = self.buffer[self.pos:end + 1] self.pos = end + 1 return result except ValueError: old_len = len(self.buffer) self.buffer += self.f.read(self.buf_size) if len(self.buffer) == old_len: raise common.IncompleteJSONError()
def basic_parse_basecoro(target, allow_comments=False, multiple_values=False, use_float=False): ''' Iterator yielding unprefixed events. Parameters: - f: a readable file-like object with JSON input - allow_comments: tells parser to allow comments in JSON input - check_utf8: if True, parser will cause an error if input is invalid utf-8 - buf_size: a size of an input buffer ''' if multiple_values: raise ValueError("yajl backend doesn't support multiple_values") callbacks = _yajl2_ctypes_common.make_callbaks(target.send, use_float, 1) config = Config(allow_comments, True) handle = yajl.yajl_alloc(byref(callbacks), byref(config), None, None) try: while True: try: buffer = (yield) except GeneratorExit: buffer = b'' if buffer: result = yajl.yajl_parse(handle, buffer, len(buffer)) else: result = yajl.yajl_parse_complete(handle) if result == _yajl2_ctypes_common.YAJL_ERROR: error = _yajl2_ctypes_common.yajl_get_error( yajl, handle, buffer) raise common.JSONError(error) elif not buffer: if result == _yajl2_ctypes_common.YAJL_INSUFFICIENT_DATA: raise common.IncompleteJSONError('Incomplete JSON data') break finally: yajl.yajl_free(handle)
def utf8_encoder(target): decoder = codecs.getincrementaldecoder('utf-8')() decode = decoder.decode send = target.send while True: try: final = False bdata = (yield) except GeneratorExit: final = True bdata = b'' try: sdata = decode(bdata, final) except UnicodeDecodeError as e: try: target.close() except: pass raise common.IncompleteJSONError(e) if sdata: send(sdata) elif not bdata: target.close() break
def Lexer(target): """ Parses lexemes out of the incoming content, and sends them to parse_value. A special EOF result is sent when the data source has been exhausted to give parse_value the possibility of raising custom exceptions due to missing content. """ try: data = (yield) except GeneratorExit: data = '' buf = data pos = 0 discarded = 0 send = target.send while True: match = LEXEME_RE.search(buf, pos) if match: lexeme = match.group() if lexeme == '"': pos = match.start() start = pos + 1 while True: try: end = buf.index('"', start) escpos = end - 1 while buf[escpos] == '\\': escpos -= 1 if (end - escpos) % 2 == 0: start = end + 1 else: break except ValueError: try: data = (yield) except GeneratorExit: data = '' if not data: raise common.IncompleteJSONError( 'Incomplete string lexeme') buf += data send((discarded + pos, buf[pos:end + 1])) pos = end + 1 else: while lexeme not in UNARY_LEXEMES and match.end() == len(buf): try: data = (yield) except GeneratorExit: data = '' if not data: break buf += data match = LEXEME_RE.search(buf, pos) lexeme = match.group() send((discarded + match.start(), lexeme)) pos = match.end() else: # Don't ask data from an already exhausted source if data: try: data = (yield) except GeneratorExit: data = '' if not data: # Normally should raise StopIteration, but can raise # IncompleteJSONError too, which is the point of sending EOF try: target.send(EOF) except StopIteration: pass break discarded += len(buf) buf = data pos = 0
def parse_value(target, multivalue, use_float): """ Parses results coming out of the Lexer into ijson events, which are sent to `target`. A stack keeps track of the type of object being parsed at the time (a value, and object or array -- the last two being values themselves). A special EOF result coming from the Lexer indicates that no more content is expected. This is used to check for incomplete content and raise the appropriate exception, which wouldn't be possible if the Lexer simply closed this co-routine (either explicitly via .close(), or implicitly by itself finishing and decreasing the only reference to the co-routine) since that causes a GeneratorExit exception that cannot be replaced with a custom one. """ state_stack = [_PARSE_VALUE] pop = state_stack.pop push = state_stack.append send = target.send prev_pos, prev_symbol = None, None to_number = common.integer_or_float if use_float else common.integer_or_decimal while True: if prev_pos is None: pos, symbol = (yield) if (pos, symbol) == EOF: if state_stack: raise common.IncompleteJSONError('Incomplete JSON content') break else: pos, symbol = prev_pos, prev_symbol prev_pos, prev_symbol = None, None try: state = state_stack[-1] except IndexError: if multivalue: state = _PARSE_VALUE push(state) else: raise common.JSONError('Additional data found') assert state_stack if state == _PARSE_VALUE: # Simple, common cases if symbol == 'null': send(('null', None)) pop() elif symbol == 'true': send(('boolean', True)) pop() elif symbol == 'false': send(('boolean', False)) pop() elif symbol[0] == '"': send(('string', parse_string(symbol))) pop() # Array start elif symbol == '[': send(('start_array', None)) pos, symbol = (yield) if (pos, symbol) == EOF: raise common.IncompleteJSONError('Incomplete JSON content') if symbol == ']': send(('end_array', None)) pop() else: prev_pos, prev_symbol = pos, symbol push(_PARSE_ARRAY_ELEMENT_END) push(_PARSE_VALUE) # Object start elif symbol == '{': send(('start_map', None)) pos, symbol = (yield) if (pos, symbol) == EOF: raise common.IncompleteJSONError('Incomplete JSON content') if symbol == '}': send(('end_map', None)) pop() else: prev_pos, prev_symbol = pos, symbol push(_PARSE_OBJECT_KEY) # A number else: # JSON numbers can't contain leading zeros if ((len(symbol) > 1 and symbol[0] == '0' and symbol[1] not in ('e', 'E', '.')) or (len(symbol) > 2 and symbol[0:2] == '-0' and symbol[2] not in ('e', 'E', '.'))): raise common.JSONError('Invalid JSON number: %s' % (symbol, )) # Fractions need a leading digit and must be followed by a digit if symbol[0] == '.' or symbol[-1] == '.': raise common.JSONError('Invalid JSON number: %s' % (symbol, )) try: number = to_number(symbol) if number == inf: raise common.JSONError("float overflow: %s" % (symbol, )) except: raise UnexpectedSymbol(symbol, pos) else: send(('number', number)) pop() elif state == _PARSE_OBJECT_KEY: if symbol[0] != '"': raise UnexpectedSymbol(symbol, pos) send(('map_key', parse_string(symbol))) pos, symbol = (yield) if (pos, symbol) == EOF: raise common.IncompleteJSONError('Incomplete JSON content') if symbol != ':': raise UnexpectedSymbol(symbol, pos) state_stack[-1] = _PARSE_OBJECT_END push(_PARSE_VALUE) elif state == _PARSE_OBJECT_END: if symbol == ',': state_stack[-1] = _PARSE_OBJECT_KEY elif symbol != '}': raise UnexpectedSymbol(symbol, pos) else: send(('end_map', None)) pop() pop() elif state == _PARSE_ARRAY_ELEMENT_END: if symbol == ',': state_stack[-1] = _PARSE_ARRAY_ELEMENT_END push(_PARSE_VALUE) elif symbol != ']': raise UnexpectedSymbol(symbol, pos) else: send(('end_array', None)) pop() pop()