def test_decode_from_unicode(self): unicode_input = u("{\"obj\": 31337}") dec1 = ujson.decode(unicode_input) dec2 = ujson.decode(str(unicode_input)) assert dec1 == dec2
def test_decode_big_escape(self): # Make sure no Exception is raised. for _ in range(10): base = '\u00e5'.encode("utf-8") if compat.PY3 else "\xc3\xa5" quote = compat.str_to_bytes("\"") escape_input = quote + (base * 1024 * 1024 * 2) + quote ujson.decode(escape_input)
def test_array_basic(self): arr = np.arange(96) arr = arr.reshape((2, 2, 2, 2, 3, 2)) tm.assert_numpy_array_equal( np.array(ujson.decode(ujson.encode(arr))), arr) tm.assert_numpy_array_equal(ujson.decode( ujson.encode(arr), numpy=True), arr)
def test_array_reshaped(self, shape): arr = np.arange(100) arr = arr.reshape(shape) tm.assert_numpy_array_equal( np.array(ujson.decode(ujson.encode(arr))), arr) tm.assert_numpy_array_equal(ujson.decode( ujson.encode(arr), numpy=True), arr)
def test_decode_big_escape(self): # Make sure no Exception is raised. for _ in range(10): base = '\u00e5'.encode("utf-8") quote = b'"' escape_input = quote + (base * 1024 * 1024 * 2) + quote ujson.decode(escape_input)
def test_encode_list_conversion(self): list_input = [1, 2, 3, 4] output = ujson.encode(list_input) assert list_input == json.loads(output) assert list_input == ujson.decode(output) tm.assert_numpy_array_equal(np.array(list_input), ujson.decode(output, numpy=True))
def test_encode_double_tiny_exponential(self): num = 1e-40 assert num == ujson.decode(ujson.encode(num)) num = 1e-100 assert num == ujson.decode(ujson.encode(num)) num = -1e-45 assert num == ujson.decode(ujson.encode(num)) num = -1e-145 assert np.allclose(num, ujson.decode(ujson.encode(num)))
def test_series_nested(self, orient): s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15]).sort_values() nested = {"s1": s, "s2": s.copy()} kwargs = {} if orient is None else dict(orient=orient) exp = {"s1": ujson.decode(ujson.encode(s, **kwargs)), "s2": ujson.decode(ujson.encode(s, **kwargs))} assert ujson.decode(ujson.encode(nested, **kwargs)) == exp
def test_encode_array_in_array(self): arr_in_arr_input = [[[[]]]] output = ujson.encode(arr_in_arr_input) assert arr_in_arr_input == json.loads(output) assert output == json.dumps(arr_in_arr_input) assert arr_in_arr_input == ujson.decode(output) tm.assert_numpy_array_equal(np.array(arr_in_arr_input), ujson.decode(output, numpy=True))
def test_encode_array_of_doubles(self): doubles_input = [31337.31337, 31337.31337, 31337.31337, 31337.31337] * 10 output = ujson.encode(doubles_input) assert doubles_input == json.loads(output) assert doubles_input == ujson.decode(output) tm.assert_numpy_array_equal(np.array(doubles_input), ujson.decode(output, numpy=True))
def test_encode_array_of_nested_arrays(self): nested_input = [[[[]]]] * 20 output = ujson.encode(nested_input) assert nested_input == json.loads(output) assert nested_input == ujson.decode(output) nested_input = np.array(nested_input) tm.assert_numpy_array_equal(nested_input, ujson.decode( output, numpy=True, dtype=nested_input.dtype))
def test_dataframe_nested(self, orient): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ "a", "b"], columns=["x", "y", "z"]) nested = {"df1": df, "df2": df.copy()} kwargs = {} if orient is None else dict(orient=orient) exp = {"df1": ujson.decode(ujson.encode(df, **kwargs)), "df2": ujson.decode(ujson.encode(df, **kwargs))} assert ujson.decode(ujson.encode(nested, **kwargs)) == exp
def test_array_float(self): dtype = np.float32 arr = np.arange(100.202, 200.202, 1, dtype=dtype) arr = arr.reshape((5, 5, 4)) arr_out = np.array(ujson.decode(ujson.encode(arr)), dtype=dtype) tm.assert_almost_equal(arr, arr_out) arr_out = ujson.decode(ujson.encode(arr), numpy=True, dtype=dtype) tm.assert_almost_equal(arr, arr_out)
def test_encode_list_long_conversion(self): long_input = [9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807] output = ujson.encode(long_input) assert long_input == json.loads(output) assert long_input == ujson.decode(output) tm.assert_numpy_array_equal(np.array(long_input), ujson.decode(output, numpy=True, dtype=np.int64))
def test_double_precision(self): double_input = 30.012345678901234 output = ujson.encode(double_input, double_precision=15) assert double_input == json.loads(output) assert double_input == ujson.decode(output) for double_precision in (3, 9): output = ujson.encode(double_input, double_precision=double_precision) rounded_input = round(double_input, double_precision) assert rounded_input == json.loads(output) assert rounded_input == ujson.decode(output)
def test_datetime_index(self): date_unit = "ns" rng = date_range("1/1/2000", periods=20) encoded = ujson.encode(rng, date_unit=date_unit) decoded = DatetimeIndex(np.array(ujson.decode(encoded))) tm.assert_index_equal(rng, decoded) ts = Series(np.random.randn(len(rng)), index=rng) decoded = Series(ujson.decode(ujson.encode(ts, date_unit=date_unit))) idx_values = decoded.index.values.astype(np.int64) decoded.index = DatetimeIndex(idx_values) tm.assert_series_equal(ts, decoded)
def test_encode_null_character(self): wrapped_input = "31337 \x00 1337" output = ujson.encode(wrapped_input) assert wrapped_input == json.loads(output) assert output == json.dumps(wrapped_input) assert wrapped_input == ujson.decode(output) alone_input = "\x00" output = ujson.encode(alone_input) assert alone_input == json.loads(output) assert output == json.dumps(alone_input) assert alone_input == ujson.decode(output) assert '" \\u0000\\r\\n "' == ujson.dumps(u(" \u0000\r\n "))
def test_default_handler(self): class _TestObject(object): def __init__(self, val): self.val = val @property def recursive_attr(self): return _TestObject("recursive_attr") def __str__(self): return str(self.val) msg = "Maximum recursion level reached" with pytest.raises(OverflowError, match=msg): ujson.encode(_TestObject("foo")) assert '"foo"' == ujson.encode(_TestObject("foo"), default_handler=str) def my_handler(_): return "foobar" assert '"foobar"' == ujson.encode(_TestObject("foo"), default_handler=my_handler) def my_handler_raises(_): raise TypeError("I raise for anything") with pytest.raises(TypeError, match="I raise for anything"): ujson.encode(_TestObject("foo"), default_handler=my_handler_raises) def my_int_handler(_): return 42 assert ujson.decode(ujson.encode(_TestObject("foo"), default_handler=my_int_handler)) == 42 def my_obj_handler(_): return datetime.datetime(2013, 2, 3) assert (ujson.decode(ujson.encode(datetime.datetime(2013, 2, 3))) == ujson.decode(ujson.encode(_TestObject("foo"), default_handler=my_obj_handler))) obj_list = [_TestObject("foo"), _TestObject("bar")] assert (json.loads(json.dumps(obj_list, default=str)) == ujson.decode(ujson.encode(obj_list, default_handler=str)))
def test_encode_set(self): s = {1, 2, 3, 4, 5, 6, 7, 8, 9} enc = ujson.encode(s) dec = ujson.decode(enc) for v in dec: assert v in s
def test_series(self, orient, numpy): s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15]).sort_values() encode_kwargs = {} if orient is None else dict(orient=orient) decode_kwargs = {} if numpy is None else dict(numpy=numpy) output = ujson.decode(ujson.encode(s, **encode_kwargs), **decode_kwargs) if orient == "split": dec = _clean_dict(output) output = Series(**dec) else: output = Series(output) if orient in (None, "index"): s.name = None output = output.sort_values() s.index = ["6", "7", "8", "9", "10", "15"] elif orient in ("records", "values"): s.name = None s.index = [0, 1, 2, 3, 4, 5] tm.assert_series_equal(output, s, check_dtype=False)
def test_dataframe(self, orient, numpy): if orient == "records" and numpy: pytest.skip("Not idiomatic pandas") df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ "a", "b"], columns=["x", "y", "z"]) encode_kwargs = {} if orient is None else dict(orient=orient) decode_kwargs = {} if numpy is None else dict(numpy=numpy) output = ujson.decode(ujson.encode(df, **encode_kwargs), **decode_kwargs) # Ensure proper DataFrame initialization. if orient == "split": dec = _clean_dict(output) output = DataFrame(**dec) else: output = DataFrame(output) # Corrections to enable DataFrame comparison. if orient == "values": df.columns = [0, 1, 2] df.index = [0, 1] elif orient == "records": df.index = [0, 1] elif orient == "index": df = df.transpose() tm.assert_frame_equal(output, df, check_dtype=False)
def test_int_array(self, any_int_dtype): arr = np.arange(100, dtype=np.int) arr_input = arr.astype(any_int_dtype) arr_output = np.array(ujson.decode(ujson.encode(arr_input)), dtype=any_int_dtype) tm.assert_numpy_array_equal(arr_input, arr_output)
def test_bool_array(self): bool_array = np.array([ True, False, True, True, False, True, False, False], dtype=np.bool) output = np.array(ujson.decode( ujson.encode(bool_array)), dtype=np.bool) tm.assert_numpy_array_equal(bool_array, output)
def test_encode_string_conversion2(self): string_input = "A string \\ / \b \f \n \r \t" output = ujson.encode(string_input) assert string_input == json.loads(output) assert string_input == ujson.decode(output) assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"'
def test_encode_control_escaping(self): escaped_input = "\x19" enc = ujson.encode(escaped_input) dec = ujson.decode(enc) assert escaped_input == dec assert enc == json_unicode(escaped_input)
def test_encode_unicode_surrogate_pair(self): surrogate_input = "\xf0\x90\x8d\x86" enc = ujson.encode(surrogate_input) dec = ujson.decode(enc) assert enc == json_unicode(surrogate_input) assert dec == json.loads(enc)
def test_encode_unicode_4bytes_utf8(self): four_bytes_input = "\xf0\x91\x80\xb0TRAILINGNORMAL" enc = ujson.encode(four_bytes_input) dec = ujson.decode(enc) assert enc == json_unicode(four_bytes_input) assert dec == json.loads(enc)
def test_encode_long_conversion(self): long_input = 9223372036854775807 output = ujson.encode(long_input) assert long_input == json.loads(output) assert output == json.dumps(long_input) assert long_input == ujson.decode(output)
def test_encode_datetime_conversion(self): datetime_input = datetime.datetime.fromtimestamp(time.time()) output = ujson.encode(datetime_input, date_unit="s") expected = calendar.timegm(datetime_input.utctimetuple()) assert int(expected) == json.loads(output) assert int(expected) == ujson.decode(output)
def test_index(self): i = Index([23, 45, 18, 98, 43, 11], name="index") # Column indexed. output = Index(ujson.decode(ujson.encode(i)), name="index") tm.assert_index_equal(i, output) output = Index(ujson.decode(ujson.encode(i), numpy=True), name="index") tm.assert_index_equal(i, output) dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"))) output = Index(**dec) tm.assert_index_equal(i, output) assert i.name == output.name dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"), numpy=True)) output = Index(**dec) tm.assert_index_equal(i, output) assert i.name == output.name output = Index(ujson.decode(ujson.encode(i, orient="values")), name="index") tm.assert_index_equal(i, output) output = Index(ujson.decode(ujson.encode(i, orient="values"), numpy=True), name="index") tm.assert_index_equal(i, output) output = Index(ujson.decode(ujson.encode(i, orient="records")), name="index") tm.assert_index_equal(i, output) output = Index(ujson.decode(ujson.encode(i, orient="records"), numpy=True), name="index") tm.assert_index_equal(i, output) output = Index(ujson.decode(ujson.encode(i, orient="index")), name="index") tm.assert_index_equal(i, output) output = Index(ujson.decode(ujson.encode(i, orient="index"), numpy=True), name="index") tm.assert_index_equal(i, output)
def test_decode_with_trailing_whitespaces(self): assert {} == ujson.decode("{}\n\t ")
def test_encode_unicode_conversion(self, unicode_input): enc = ujson.encode(unicode_input) dec = ujson.decode(enc) assert enc == json.dumps(unicode_input) assert dec == json.loads(enc)
def test_encode_dict_with_unicode_keys(self, unicode_key): unicode_dict = {unicode_key: "value1"} assert unicode_dict == ujson.decode(ujson.encode(unicode_dict))
def test_decode_jibberish(self): jibberish = "fdsa sda v9sa fdsa" msg = "Unexpected character found when decoding 'false'" with pytest.raises(ValueError, match=msg): ujson.decode(jibberish)
def test_encode_dict_conversion(self): dict_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4} output = ujson.encode(dict_input) assert dict_input == json.loads(output) assert dict_input == ujson.decode(output)
def test_decode_broken_json_leak(self, broken_json, err_msg): for _ in range(1000): with pytest.raises(ValueError, match=re.escape(err_msg)): ujson.decode(broken_json)
def test_decode_depth_too_big(self, too_big_char): with pytest.raises(ValueError, match="Reached object decoding depth limit"): ujson.decode(too_big_char * (1024 * 1024))
def test_float_max(self, float_numpy_dtype): klass = np.dtype(float_numpy_dtype).type num = klass(np.finfo(float_numpy_dtype).max / 10) tm.assert_almost_equal( klass(ujson.decode(ujson.encode(num, double_precision=15))), num)
def test_float(self, float_numpy_dtype): klass = np.dtype(float_numpy_dtype).type num = klass(256.2013) assert klass(ujson.decode(ujson.encode(num))) == num
def test_decode_invalid_array(self, invalid_arr): with pytest.raises(ValueError): ujson.decode(invalid_arr)
def test_array_numpy_except(self, bad_input, exc_type, err_msg, kwargs): with pytest.raises(exc_type, match=err_msg): ujson.decode(ujson.dumps(bad_input), numpy=True, **kwargs)
def test_decode_array(self, arr): assert arr == ujson.decode(str(arr))
def test_decode_extreme_numbers(self, extreme_num): assert extreme_num == ujson.decode(str(extreme_num))
def test_decode_too_extreme_numbers(self, too_extreme_num): with pytest.raises(ValueError): ujson.decode(too_extreme_num)
def test_decode_numeric_int(self, numeric_int_as_str): assert int(numeric_int_as_str) == ujson.decode(numeric_int_as_str)
def test_int(self, any_int_numpy_dtype): klass = np.dtype(any_int_numpy_dtype).type num = klass(1) assert klass(ujson.decode(ujson.encode(num))) == num
def test_decode_invalid_dict(self, invalid_dict): msg = ("Key name of object must be 'string' when decoding 'object'|" "No ':' found when decoding object value|" "Expected object or value") with pytest.raises(ValueError, match=msg): ujson.decode(invalid_dict)
def test_bool_array(self): bool_array = np.array( [True, False, True, True, False, True, False, False], dtype=bool) output = np.array(ujson.decode(ujson.encode(bool_array)), dtype=bool) tm.assert_numpy_array_equal(bool_array, output)
def test_decode_bad_string(self, bad_string): msg = ("Unexpected character found when decoding|" "Unmatched ''\"' when when decoding 'string'") with pytest.raises(ValueError, match=msg): ujson.decode(bad_string)
def test_bool(self, bool_input): b = bool(bool_input) assert ujson.decode(ujson.encode(b)) == b
def test_decode_broken_json(self, broken_json): msg = "Expected object or value" with pytest.raises(ValueError, match=msg): ujson.decode(broken_json)
def test_decode_number_with_32bit_sign_bit(self, val): # Test that numbers that fit within 32 bits but would have the # sign bit set (2**31 <= x < 2**32) are decoded properly. doc = f'{{"id": {val}}}' assert ujson.decode(doc)["id"] == val
def test_encode_builtin_values_conversion(self, builtin_value): output = ujson.encode(builtin_value) assert builtin_value == json.loads(output) assert output == json.dumps(builtin_value) assert builtin_value == ujson.decode(output)
def test_decode_numeric_int_exp(self, int_exp): assert ujson.decode(int_exp) == json.loads(int_exp)
def test_encode_num_conversion(self, num_input): output = ujson.encode(num_input) assert num_input == json.loads(output) assert output == json.dumps(num_input) assert num_input == ujson.decode(output)
def test_encode_long_conversion(self, long_input): output = ujson.encode(long_input) assert long_input == json.loads(output) assert output == json.dumps(long_input) assert long_input == ujson.decode(output)
def test_encode_double_conversion(self, double_input): output = ujson.encode(double_input) assert round(double_input, 5) == round(json.loads(output), 5) assert round(double_input, 5) == round(ujson.decode(output), 5)
def test_decode_null_character(self): wrapped_input = '"31337 \\u0000 31337"' assert ujson.decode(wrapped_input) == json.loads(wrapped_input)
def test_double_long_numbers(self, long_number): sut = {"a": long_number} encoded = ujson.encode(sut, double_precision=15) decoded = ujson.decode(encoded) assert sut == decoded
def test_decimal_decode_test_precise(self): sut = {"a": 4.56} encoded = ujson.encode(sut) decoded = ujson.decode(encoded, precise_float=True) assert sut == decoded