def test_unexpected_data(): test_cases = [ ('[,', 'Expecting value', 1), ('{"spam":[}', 'Expecting value', 9), ('[42:', "Expecting ',' delimiter", 3), ('[42 "spam"', "Expecting ',' delimiter", 4), ('[42,]', 'Expecting value', 4), ('{"spam":[42}', "Expecting ',' delimiter", 11), ('["]', 'Unterminated string starting at', 1), ('["spam":', "Expecting ',' delimiter", 7), ('["spam",]', 'Expecting value', 8), ('{:', 'Expecting property name enclosed in double quotes', 1), ('{,', 'Expecting property name enclosed in double quotes', 1), ('{42', 'Expecting property name enclosed in double quotes', 1), ('[{]', 'Expecting property name enclosed in double quotes', 2), ('{"spam",', "Expecting ':' delimiter", 7), ('{"spam"}', "Expecting ':' delimiter", 7), ('[{"spam"]', "Expecting ':' delimiter", 8), ('{"spam":}', 'Expecting value', 8), ('[{"spam":]', 'Expecting value', 9), ('{"spam":42 "ham"', "Expecting ',' delimiter", 11), ('[{"spam":42]', "Expecting ',' delimiter", 11), ('{"spam":42,}', 'Expecting property name enclosed in double quotes', 11), ] for data, msg, idx in test_cases: with pytest.raises(sd_ujson.JSONDecodeError) as err: sd_ujson.loads(data) assert err.value.msg == msg assert err.value.pos == idx assert err.value.lineno == 1 assert err.value.colno == idx + 1 assert str(err.value) == f'{msg}: line 1 column {idx + 1:d} (char {idx:d})'
def test_truncated_input(): test_cases = [ ('', 'Expecting value', 0), ('[', 'Expecting value', 1), ('[42', "Expecting ',' delimiter", 3), ('[42,', 'Expecting value', 4), ('["', 'Unterminated string starting at', 1), ('["spam', 'Unterminated string starting at', 1), ('["spam"', "Expecting ',' delimiter", 7), ('["spam",', 'Expecting value', 8), ('{', 'Expecting property name enclosed in double quotes', 1), ('{"', 'Unterminated string starting at', 1), ('{"spam', 'Unterminated string starting at', 1), ('{"spam"', "Expecting ':' delimiter", 7), ('{"spam":', 'Expecting value', 8), ('{"spam":42', "Expecting ',' delimiter", 10), ('{"spam":42,', 'Expecting property name enclosed in double quotes', 11), ] test_cases += [ ('"', 'Unterminated string starting at', 0), ('"spam', 'Unterminated string starting at', 0), ] for data, msg, idx in test_cases: with pytest.raises(sd_ujson.JSONDecodeError) as err: sd_ujson.loads(data) assert err.value.msg == msg assert err.value.pos == idx assert err.value.lineno == 1 assert err.value.colno == idx + 1 assert str(err.value) == f'{msg}: line 1 column {idx + 1:d} (char {idx:d})'
def test_parse_pass_2(): # from http://json.org/JSON_checker/test/pass2.json JSON = r''' [[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] ''' # test in/out equivalence and parsing res = sd_ujson.loads(JSON) out = sd_ujson.dumps(res) assert res == sd_ujson.loads(out)
def test_allow_nan(): for val in (float('inf'), float('-inf'), float('nan')): out = sd_ujson.dumps([val]) if val == val: # inf assert sd_ujson.loads(out) == [val] else: # nan res = sd_ujson.loads(out) assert len(res) == 1 assert res[0] != res[0] with pytest.raises(ValueError): sd_ujson.dumps([val], allow_nan=False)
def test_failures(): for idx, doc in enumerate(JSONDOCS): idx = idx + 1 if doc in SKIPS: sd_ujson.loads(doc) continue try: sd_ujson.loads(doc) except ValueError: pass else: pytest.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc))
def test_string_with_utf8_bom(): # see #18958 bom_json = "[1,2,3]".encode('utf-8-sig').decode('utf-8') with pytest.raises(ValueError, match="Expected object or value") as e: sd_ujson.loads(bom_json) with pytest.raises(ValueError, match="Expected object or value") as e: sd_ujson.load(StringIO(bom_json)) # make sure that the BOM is not detected in the middle of a string bom_in_str = '"{}"'.format(''.encode('utf-8-sig').decode('utf-8')) assert sd_ujson.loads(bom_in_str) == '\ufeff' assert sd_ujson.json.load(StringIO(bom_in_str)) == '\ufeff'
def test_bytes_decode(): for encoding, bom in [ ('utf-8', codecs.BOM_UTF8), ('utf-16be', codecs.BOM_UTF16_BE), ('utf-16le', codecs.BOM_UTF16_LE), ('utf-32be', codecs.BOM_UTF32_BE), ('utf-32le', codecs.BOM_UTF32_LE), ]: data = ["a\xb5\u20ac\U0001d120"] encoded = sd_ujson.dumps(data).encode(encoding) print(data, encoded) assert sd_ujson.loads(bom + encoded) == data assert sd_ujson.loads(encoded) == data with pytest.raises(UnicodeDecodeError): sd_ujson.loads(b'["\x80"]') # RFC-7159 and ECMA-404 extend JSON to allow documents that # consist of only a string, which can present a special case # not covered by the encoding detection patterns specified in # RFC-4627 for utf-16-le (XX 00 XX 00). assert sd_ujson.loads('"\u2600"'.encode('utf-16-le')) == '\u2600' # Encoding detection for small (<4) bytes objects # is implemented as a special case. RFC-7159 and ECMA-404 # allow single codepoint JSON documents which are only two # bytes in utf-16 encodings w/o BOM. assert sd_ujson.loads(b'5\x00') == 5 assert sd_ujson.loads(b'\x007') == 7 assert sd_ujson.loads(b'57') == 57
def test_parse_pass_3(): # from http://json.org/JSON_checker/test/pass3.json JSON = r''' { "JSON Test Pattern pass3": { "The outermost value": "must be an object or array.", "In this test": "It is an object." } } ''' # test in/out equivalence and parsing res = sd_ujson.loads(JSON) out = sd_ujson.dumps(res) assert res == sd_ujson.loads(out)
def test_linecol(): test_cases = [ ('!', 1, 1, 0), (' !', 1, 2, 1), ('\n!', 2, 1, 1), ('\n \n\n !', 4, 6, 10), ] for data, line, col, idx in test_cases: with pytest.raises(sd_ujson.JSONDecodeError) as err: sd_ujson.loads(data) assert err.value.msg == 'Expecting value' assert err.value.pos == idx assert err.value.lineno == line assert err.value.colno == col assert str(err.value) == f'Expecting value: line {line} column {col:d} (char {idx:d})'
def test_encode_symbols(): s = "\u273f\u2661\u273f" # Рю┐РЎАРю┐ encoded = sd_ujson.dumps(s) encoded_json = sdjson.dumps(s) assert len(encoded) == len(s) * 6 + 2 # 6 characters + quotes assert encoded == encoded_json decoded = sd_ujson.loads(encoded) assert s == decoded # sd_ujson outputs an UTF-8 encoded str object encoded = sd_ujson.dumps(s, ensure_ascii=False) # json outputs an unicode object encoded_json = sdjson.dumps(s, ensure_ascii=False) assert len(encoded) == len(s) + 2 # original length + quotes assert encoded == encoded_json decoded = sd_ujson.loads(encoded) assert s == decoded
def test_extra_data(): test_cases = [ ('[]]', 'Extra data', 2), ('{}}', 'Extra data', 2), ('[],[]', 'Extra data', 2), ('{},{}', 'Extra data', 2), ] test_cases += [ ('42,"spam"', 'Extra data', 2), ('"spam",42', 'Extra data', 6), ] for data, msg, idx in test_cases: with pytest.raises(sd_ujson.JSONDecodeError) as err: sd_ujson.loads(data) assert err.value.msg == msg assert err.value.pos == idx assert err.value.lineno == 1 assert err.value.colno == idx + 1 assert str(err.value) == f'{msg}: line 1 column {idx + 1:d} (char {idx:d})'
def test_object_pairs_hook_with_unicode(): s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}' p = [ ("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4), ("qrt", 5), ("pad", 6), ("hoy", 7), ] assert sd_ujson.loads(s) == eval(s) assert sd_ujson.loads(s, object_pairs_hook=lambda x: x) == p od = sd_ujson.loads(s, object_pairs_hook=OrderedDict) assert od == OrderedDict(p) assert type(od) == OrderedDict # the object_pairs_hook takes priority over the object_hook assert sd_ujson.loads(s, object_pairs_hook=OrderedDict, object_hook=lambda x: None) == OrderedDict(p)
def test_separators(): h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', { 'nifty': 87 }, { 'field': 'yes', 'morefield': False }] expect = textwrap.dedent("""\ [ [ "blorpie" ] , [ "whoops" ] , [] , "d-shtaeou" , "d-nthiouh" , "i-vhbjkhnth" , { "nifty" : 87 } , { "field" : "yes" , "morefield" : false } ]""") d1 = sd_ujson.dumps(h) d2 = sd_ujson.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : ')) h1 = sd_ujson.loads(d1) h2 = sd_ujson.loads(d2) assert h1 == h assert h2 == h assert d2 == expect
def test_encode_unicode_bmp(): s = "\U0001f42e\U0001f42e\U0001F42D\U0001F42D" # Ъљ«Ъљ«ЪљГЪљГ encoded = sd_ujson.dumps(s) encoded_json = sdjson.dumps(s) if len(s) == 4: assert len(encoded) == len(s) * 12 + 2 else: assert len(encoded) == len(s) * 6 + 2 assert encoded == encoded_json decoded = sd_ujson.loads(encoded) assert s == decoded # sd_ujson outputs an UTF-8 encoded str object encoded = sd_ujson.dumps(s, ensure_ascii=False) # json outputs an unicode object encoded_json = sdjson.dumps(s, ensure_ascii=False) assert len(encoded) == len(s) + 2 # original length + quotes assert encoded == encoded_json decoded = sd_ujson.loads(encoded) assert s == decoded
def test_highly_nested_objects_decoding(): # test that loading highly-nested objects doesn't segfault when C # accelerations are used. See #12017 with pytest.raises((RecursionError, ValueError)): sd_ujson.loads('{"a":' * 100000 + '1' + '}' * 100000) with pytest.raises((RecursionError, ValueError)): sd_ujson.loads('{"a":' * 100000 + '[1]' + '}' * 100000) with pytest.raises((RecursionError, ValueError)): sd_ujson.loads('[' * 100000 + '1' + ']' * 100000)
def test_object_pairs_hook(): s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}' p = [("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4), ("qrt", 5), ("pad", 6), ("hoy", 7)] assert sd_ujson.loads(s) == eval(s) assert sd_ujson.loads(s, object_pairs_hook=lambda x: x) == p assert sd_ujson.json.load(StringIO(s), object_pairs_hook=lambda x: x) == p od = sd_ujson.loads(s, object_pairs_hook=OrderedDict) assert od == OrderedDict(p) assert type(od) == OrderedDict # the object_pairs_hook takes priority over the object_hook assert sd_ujson.loads(s, object_pairs_hook=OrderedDict, object_hook=lambda x: None) == \ OrderedDict(p) # check that empty object literals work (see #17368) assert sd_ujson.loads('{}', object_pairs_hook=OrderedDict) == \ OrderedDict() assert sd_ujson.loads('{"empty": {}}', object_pairs_hook=OrderedDict) == \ OrderedDict([('empty', OrderedDict())])
def test_unicode_decode(): for i in range(0, 0xd7ff): u = chr(i) s = '"\\u{0:04x}"'.format(i) assert sd_ujson.loads(s) == u
def test_big_unicode_decode(): u = 'z\U0001d120x' assert sd_ujson.loads('"' + u + '"') == u assert sd_ujson.loads('"z\\ud834\\udd20x"') == u
def test_decimal(): rval = sd_ujson.loads('1.1', parse_float=decimal.Decimal) assert isinstance(rval, decimal.Decimal) assert rval == decimal.Decimal('1.1')
def test_float(): rval = sd_ujson.loads('1', parse_int=float) assert isinstance(rval, float) assert rval == 1.0
def test_empty_objects(): assert sd_ujson.loads('{}') == {} assert sd_ujson.loads('[]') == [] assert sd_ujson.loads('""') == ""
def test_floats(): for num in [1617161771.7650001, math.pi, math.pi ** 100, math.pi ** -100, 3.1]: assert float(sd_ujson.dumps(num)) == num assert sd_ujson.loads(sd_ujson.dumps(num)) == num
def test_extra_data(): s = '[1, 2, 3]5' msg = 'Trailing data' with pytest.raises(ValueError, match=msg): sd_ujson.loads(s)
def test_loads(test_input, expected): assert sd_ujson.loads(test_input) == expected
def test_invalid_escape(): s = '["abc\\y"]' msg = "Unrecognized escape sequence when decoding 'string'" with pytest.raises(ValueError, match=msg): sd_ujson.loads(s)
def test_invalid_input_type(): msg = 'Expected String or Unicode' for value in [1, 3.14, [], {}, None]: with pytest.raises(TypeError, match=msg): sd_ujson.loads(value)
def test_parse_pass_1(): # from http://json.org/JSON_checker/test/pass1.json JSON = r''' [ "JSON Test Pattern pass1", {"object with 1 member":["array with 1 element"]}, {}, [], -42, true, false, null, { "integer": 1234567890, "real": -9876.543210, "e": 0.123456789e-12, "E": 1.234567890E+34, "": 23456789012E66, "zero": 0, "one": 1, "space": " ", "quote": "\"", "backslash": "\\", "controls": "\b\f\n\r\t", "slash": "/ & \/", "alpha": "abcdefghijklmnopqrstuvwyz", "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", "digit": "0123456789", "0123456789": "digit", "special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?", "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", "true": true, "false": false, "null": null, "array":[ ], "object":{ }, "address": "50 St. James Street", "url": "http://www.JSON.org/", "comment": "// /* <!-- --", "# -- --> */": " ", " s p a c e d " :[1,2 , 3 , 4 , 5 , 6 ,7 ],"compact":[1,2,3,4,5,6,7], "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", "quotes": "" \u0022 %22 0x22 034 "", "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" : "A key can be any string" }, 0.5 ,98.6 , 99.44 , 1066, 1e1, 0.1e1, 1e-1, 1e00,2e+00,2e-00 ,"rosebud"] ''' # test in/out equivalence and parsing res = sd_ujson.loads(JSON) out = sd_ujson.dumps(res) assert res == sd_ujson.loads(out)
def test_unicode_preservation(): assert type(sd_ujson.loads('""')) == str assert type(sd_ujson.loads('"a"')) == str assert type(sd_ujson.loads('["a"]')[0]) == str
def test_out_of_range(): assert sd_ujson.loads('[23456789012E666]') == [float('inf')] assert sd_ujson.loads('[-23456789012E666]') == [float('-inf')]
def test_decoder_optimizations(): # Several optimizations were made that skip over calls to # the whitespace regex, so this test is designed to try and # exercise the uncommon cases. The array cases are already covered. rval = sd_ujson.loads('{ "key" : "value" , "k":"v" }') assert rval == {"key": "value", "k": "v"}