def test_itertuples(self, float_frame): for i, tup in enumerate(float_frame.itertuples()): s = DataFrame._constructor_sliced(tup[1:]) s.name = tup[0] expected = float_frame.iloc[i, :].reset_index(drop=True) tm.assert_series_equal(s, expected) df = DataFrame( {"floats": np.random.randn(5), "ints": range(5)}, columns=["floats", "ints"] ) for tup in df.itertuples(index=False): assert isinstance(tup[1], int) df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) dfaa = df[["a", "a"]] assert list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)] # repr with int on 32-bit/windows if not (compat.is_platform_windows() or compat.is_platform_32bit()): assert ( repr(list(df.itertuples(name=None))) == "[(0, 1, 4), (1, 2, 5), (2, 3, 6)]" ) tup = next(df.itertuples(name="TestName")) assert tup._fields == ("Index", "a", "b") assert (tup.Index, tup.a, tup.b) == tup assert type(tup).__name__ == "TestName" df.columns = ["def", "return"] tup2 = next(df.itertuples(name="TestName")) assert tup2 == (0, 1, 4) assert tup2._fields == ("Index", "_1", "_2") df3 = DataFrame({"f" + str(i): [i] for i in range(1024)}) # will raise SyntaxError if trying to create namedtuple tup3 = next(df3.itertuples()) assert isinstance(tup3, tuple) if PY37: assert hasattr(tup3, "_fields") else: assert not hasattr(tup3, "_fields") # GH 28282 df_254_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(254)}]) result_254_columns = next(df_254_columns.itertuples(index=False)) assert isinstance(result_254_columns, tuple) assert hasattr(result_254_columns, "_fields") df_255_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(255)}]) result_255_columns = next(df_255_columns.itertuples(index=False)) assert isinstance(result_255_columns, tuple) # Dataframes with >=255 columns will fallback to regular tuples on python < 3.7 if PY37: assert hasattr(result_255_columns, "_fields") else: assert not hasattr(result_255_columns, "_fields")
def test_dropna(self): # https://github.com/pydata/pandas/issues/9443#issuecomment-73719328 tm.assert_series_equal( pd.Series([True, True, False]).value_counts(dropna=True), pd.Series([2, 1], index=[True, False])) tm.assert_series_equal( pd.Series([True, True, False]).value_counts(dropna=False), pd.Series([2, 1], index=[True, False])) tm.assert_series_equal( pd.Series([True, True, False, None]).value_counts(dropna=True), pd.Series([2, 1], index=[True, False])) tm.assert_series_equal( pd.Series([True, True, False, None]).value_counts(dropna=False), pd.Series([2, 1, 1], index=[True, False, np.nan])) tm.assert_series_equal( pd.Series([10.3, 5., 5.]).value_counts(dropna=True), pd.Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( pd.Series([10.3, 5., 5.]).value_counts(dropna=False), pd.Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( pd.Series([10.3, 5., 5., None]).value_counts(dropna=True), pd.Series([2, 1], index=[5., 10.3])) # 32-bit linux has a different ordering if not compat.is_platform_32bit(): tm.assert_series_equal( pd.Series([10.3, 5., 5., None]).value_counts(dropna=False), pd.Series([2, 1, 1], index=[5., 10.3, np.nan]))
def skipif_32bit(param): """ Skip parameters in a parametrize on 32bit systems. Specifically used here to skip leaf_size parameters related to GH 23440. """ marks = pytest.mark.skipif(compat.is_platform_32bit(), reason='GH 23440: int type mismatch on 32bit') return pytest.param(param, marks=marks)
def test_int_max(self, any_int_dtype): if any_int_dtype in ("int64", "uint64") and compat.is_platform_32bit(): pytest.skip("Cannot test 64-bit integer on 32-bit platform") klass = np.dtype(any_int_dtype).type # uint64 max will always overflow, # as it's encoded to signed. if any_int_dtype == "uint64": num = np.iinfo("int64").max else: num = np.iinfo(any_int_dtype).max assert klass(ujson.decode(ujson.encode(num))) == num
def test_itertuples(self): for i, tup in enumerate(self.frame.itertuples()): s = self.klass._constructor_sliced(tup[1:]) s.name = tup[0] expected = self.frame.iloc[i, :].reset_index(drop=True) self._assert_series_equal(s, expected) df = self.klass({ 'floats': np.random.randn(5), 'ints': lrange(5) }, columns=['floats', 'ints']) for tup in df.itertuples(index=False): assert isinstance(tup[1], (int, long)) df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]}) dfaa = df[['a', 'a']] assert (list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)]) # repr with be int/long on 32-bit/windows if not (compat.is_platform_windows() or compat.is_platform_32bit()): assert (repr(list(df.itertuples( name=None))) == '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]') tup = next(df.itertuples(name='TestName')) if LooseVersion(sys.version) >= LooseVersion('2.7'): assert tup._fields == ('Index', 'a', 'b') assert (tup.Index, tup.a, tup.b) == tup assert type(tup).__name__ == 'TestName' df.columns = ['def', 'return'] tup2 = next(df.itertuples(name='TestName')) assert tup2 == (0, 1, 4) if LooseVersion(sys.version) >= LooseVersion('2.7'): assert tup2._fields == ('Index', '_1', '_2') df3 = DataFrame({'f' + str(i): [i] for i in range(1024)}) # will raise SyntaxError if trying to create namedtuple tup3 = next(df3.itertuples()) assert not hasattr(tup3, '_fields') assert isinstance(tup3, tuple)
def test_replace_series(self, how, to_key, from_key): if from_key == "bool" and how == "series": # doesn't work in PY3, though ...dict_from_bool works fine pytest.skip("doesn't work as in PY3") index = pd.Index([3, 4], name="xxx") obj = pd.Series(self.rep[from_key], index=index, name="yyy") assert obj.dtype == from_key if from_key.startswith("datetime") and to_key.startswith("datetime"): # tested below return elif from_key in ["datetime64[ns, US/Eastern]", "datetime64[ns, UTC]"]: # tested below return if how == "dict": replacer = dict(zip(self.rep[from_key], self.rep[to_key])) elif how == "series": replacer = pd.Series(self.rep[to_key], index=self.rep[from_key]) else: raise ValueError result = obj.replace(replacer) if (from_key == "float64" and to_key in ("int64")) or ( from_key == "complex128" and to_key in ("int64", "float64")): if compat.is_platform_32bit() or compat.is_platform_windows(): pytest.skip("32-bit platform buggy: {0} -> {1}".format( from_key, to_key)) # Expected: do not downcast by replacement exp = pd.Series(self.rep[to_key], index=index, name="yyy", dtype=from_key) else: exp = pd.Series(self.rep[to_key], index=index, name="yyy") assert exp.dtype == to_key tm.assert_series_equal(result, exp)
def test_itertuples(self): for i, tup in enumerate(self.frame.itertuples()): s = self.klass._constructor_sliced(tup[1:]) s.name = tup[0] expected = self.frame.iloc[i, :].reset_index(drop=True) self._assert_series_equal(s, expected) df = self.klass({'floats': np.random.randn(5), 'ints': lrange(5)}, columns=['floats', 'ints']) for tup in df.itertuples(index=False): assert isinstance(tup[1], (int, long)) df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]}) dfaa = df[['a', 'a']] assert (list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)]) # repr with be int/long on 32-bit/windows if not (compat.is_platform_windows() or compat.is_platform_32bit()): assert (repr(list(df.itertuples(name=None))) == '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]') tup = next(df.itertuples(name='TestName')) if sys.version >= LooseVersion('2.7'): assert tup._fields == ('Index', 'a', 'b') assert (tup.Index, tup.a, tup.b) == tup assert type(tup).__name__ == 'TestName' df.columns = ['def', 'return'] tup2 = next(df.itertuples(name='TestName')) assert tup2 == (0, 1, 4) if sys.version >= LooseVersion('2.7'): assert tup2._fields == ('Index', '_1', '_2') df3 = DataFrame({'f' + str(i): [i] for i in range(1024)}) # will raise SyntaxError if trying to create namedtuple tup3 = next(df3.itertuples()) assert not hasattr(tup3, '_fields') assert isinstance(tup3, tuple)
def test_replace_series(self, how, to_key, from_key): if from_key == 'bool' and how == 'series' and compat.PY3: # doesn't work in PY3, though ...dict_from_bool works fine pytest.skip("doesn't work as in PY3") index = pd.Index([3, 4], name='xxx') obj = pd.Series(self.rep[from_key], index=index, name='yyy') assert obj.dtype == from_key if (from_key.startswith('datetime') and to_key.startswith('datetime')): # tested below return elif from_key in ['datetime64[ns, US/Eastern]', 'datetime64[ns, UTC]']: # tested below return if how == 'dict': replacer = dict(zip(self.rep[from_key], self.rep[to_key])) elif how == 'series': replacer = pd.Series(self.rep[to_key], index=self.rep[from_key]) else: raise ValueError result = obj.replace(replacer) if ((from_key == 'float64' and to_key in ('int64')) or (from_key == 'complex128' and to_key in ('int64', 'float64'))): if compat.is_platform_32bit() or compat.is_platform_windows(): pytest.skip("32-bit platform buggy: {0} -> {1}".format( from_key, to_key)) # Expected: do not downcast by replacement exp = pd.Series(self.rep[to_key], index=index, name='yyy', dtype=from_key) else: exp = pd.Series(self.rep[to_key], index=index, name='yyy') assert exp.dtype == to_key tm.assert_series_equal(result, exp)
def test_replace_series(self, how, to_key, from_key): if from_key == 'bool' and how == 'series' and compat.PY3: # doesn't work in PY3, though ...dict_from_bool works fine pytest.skip("doesn't work as in PY3") index = pd.Index([3, 4], name='xxx') obj = pd.Series(self.rep[from_key], index=index, name='yyy') assert obj.dtype == from_key if (from_key.startswith('datetime') and to_key.startswith('datetime')): # tested below return elif from_key in ['datetime64[ns, US/Eastern]', 'datetime64[ns, UTC]']: # tested below return if how == 'dict': replacer = dict(zip(self.rep[from_key], self.rep[to_key])) elif how == 'series': replacer = pd.Series(self.rep[to_key], index=self.rep[from_key]) else: raise ValueError result = obj.replace(replacer) if ((from_key == 'float64' and to_key in ('int64')) or (from_key == 'complex128' and to_key in ('int64', 'float64'))): if compat.is_platform_32bit() or compat.is_platform_windows(): pytest.skip("32-bit platform buggy: {0} -> {1}".format (from_key, to_key)) # Expected: do not downcast by replacement exp = pd.Series(self.rep[to_key], index=index, name='yyy', dtype=from_key) else: exp = pd.Series(self.rep[to_key], index=index, name='yyy') assert exp.dtype == to_key tm.assert_series_equal(result, exp)
def test_ndarray_compat_properties(self): if compat.is_platform_32bit(): pytest.skip("skipping on 32bit") super(TestPeriodIndex, self).test_ndarray_compat_properties()
class TestIntervalTree(object): def setup_method(self, method): gentree = lambda dtype: IntervalTree(np.arange(5, dtype=dtype), np.arange(5, dtype=dtype) + 2) self.tree = gentree('int64') self.trees = {dtype: gentree(dtype) for dtype in ['int32', 'int64', 'float32', 'float64']} def test_get_loc(self): for dtype, tree in self.trees.items(): tm.assert_numpy_array_equal(tree.get_loc(1), np.array([0], dtype='int64')) tm.assert_numpy_array_equal(np.sort(tree.get_loc(2)), np.array([0, 1], dtype='int64')) with pytest.raises(KeyError): tree.get_loc(-1) def test_get_indexer(self): for dtype, tree in self.trees.items(): tm.assert_numpy_array_equal( tree.get_indexer(np.array([1.0, 5.5, 6.5])), np.array([0, 4, -1], dtype='int64')) with pytest.raises(KeyError): tree.get_indexer(np.array([3.0])) def test_get_indexer_non_unique(self): indexer, missing = self.tree.get_indexer_non_unique( np.array([1.0, 2.0, 6.5])) tm.assert_numpy_array_equal(indexer[:1], np.array([0], dtype='int64')) tm.assert_numpy_array_equal(np.sort(indexer[1:3]), np.array([0, 1], dtype='int64')) tm.assert_numpy_array_equal(np.sort(indexer[3:]), np.array([-1], dtype='int64')) tm.assert_numpy_array_equal(missing, np.array([2], dtype='int64')) def test_duplicates(self): tree = IntervalTree([0, 0, 0], [1, 1, 1]) tm.assert_numpy_array_equal(np.sort(tree.get_loc(0.5)), np.array([0, 1, 2], dtype='int64')) with pytest.raises(KeyError): tree.get_indexer(np.array([0.5])) indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) tm.assert_numpy_array_equal(np.sort(indexer), np.array([0, 1, 2], dtype='int64')) tm.assert_numpy_array_equal(missing, np.array([], dtype='int64')) def test_get_loc_closed(self): for closed in ['left', 'right', 'both', 'neither']: tree = IntervalTree([0], [1], closed=closed) for p, errors in [(0, tree.open_left), (1, tree.open_right)]: if errors: with pytest.raises(KeyError): tree.get_loc(p) else: tm.assert_numpy_array_equal(tree.get_loc(p), np.array([0], dtype='int64')) @pytest.mark.skipif(compat.is_platform_32bit(), reason="int type mismatch on 32bit") def test_get_indexer_closed(self): x = np.arange(1000, dtype='float64') found = x.astype('intp') not_found = (-1 * np.ones(1000)).astype('intp') for leaf_size in [1, 10, 100, 10000]: for closed in ['left', 'right', 'both', 'neither']: tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size) tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25)) expected = found if tree.closed_left else not_found tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0)) expected = found if tree.closed_right else not_found tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
a pytest.mark.skipif to use as either a test decorator or a parametrization mark. """ msg = "Could not import '{}'".format(package) if min_version: msg += " satisfying a min_version of {}".format(min_version) return pytest.mark.skipif( not safe_import(package, min_version=min_version), reason=msg ) skip_if_no_mpl = pytest.mark.skipif( _skip_if_no_mpl(), reason="Missing matplotlib dependency" ) skip_if_mpl = pytest.mark.skipif(not _skip_if_no_mpl(), reason="matplotlib is present") skip_if_32bit = pytest.mark.skipif(is_platform_32bit(), reason="skipping for 32 bit") skip_if_windows = pytest.mark.skipif(is_platform_windows(), reason="Running on Windows") skip_if_windows_python_3 = pytest.mark.skipif( is_platform_windows(), reason="not used on win32" ) skip_if_has_locale = pytest.mark.skipif( _skip_if_has_locale(), reason="Specific locale is set {lang}".format(lang=locale.getlocale()[0]), ) skip_if_not_us_locale = pytest.mark.skipif( _skip_if_not_us_locale(), reason="Specific locale is set " "{lang}".format(lang=locale.getlocale()[0]), ) skip_if_no_scipy = pytest.mark.skipif( _skip_if_no_scipy(), reason="Missing SciPy requirement" )
import warnings import numpy as np import pytest from pandas.compat import is_platform_32bit, is_platform_windows import pandas as pd from pandas import option_context import pandas.util.testing as tm use_32bit_repr = is_platform_windows() or is_platform_32bit() @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning") @pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning") class TestSparseSeriesFormatting: @property def dtype_format_for_platform(self): return '' if use_32bit_repr else ', dtype=int32' def test_sparse_max_row(self): s = pd.Series([1, np.nan, np.nan, 3, np.nan]).to_sparse() result = repr(s) dfm = self.dtype_format_for_platform exp = ("0 1.0\n1 NaN\n2 NaN\n3 3.0\n" "4 NaN\ndtype: Sparse[float64, nan]\nBlockIndex\n" "Block locations: array([0, 3]{0})\n" "Block lengths: array([1, 1]{0})".format(dfm)) assert result == exp
class TestUltraJSONTests: @pytest.mark.skipif(compat.is_platform_32bit(), reason="not compliant on 32-bit, xref #15865") def test_encode_decimal(self): sut = decimal.Decimal("1337.1337") encoded = ujson.encode(sut, double_precision=15) decoded = ujson.decode(encoded) assert decoded == 1337.1337 sut = decimal.Decimal("0.95") encoded = ujson.encode(sut, double_precision=1) assert encoded == "1.0" decoded = ujson.decode(encoded) assert decoded == 1.0 sut = decimal.Decimal("0.94") encoded = ujson.encode(sut, double_precision=1) assert encoded == "0.9" decoded = ujson.decode(encoded) assert decoded == 0.9 sut = decimal.Decimal("1.95") encoded = ujson.encode(sut, double_precision=1) assert encoded == "2.0" decoded = ujson.decode(encoded) assert decoded == 2.0 sut = decimal.Decimal("-1.95") encoded = ujson.encode(sut, double_precision=1) assert encoded == "-2.0" decoded = ujson.decode(encoded) assert decoded == -2.0 sut = decimal.Decimal("0.995") encoded = ujson.encode(sut, double_precision=2) assert encoded == "1.0" decoded = ujson.decode(encoded) assert decoded == 1.0 sut = decimal.Decimal("0.9995") encoded = ujson.encode(sut, double_precision=3) assert encoded == "1.0" decoded = ujson.decode(encoded) assert decoded == 1.0 sut = decimal.Decimal("0.99999999999999944") encoded = ujson.encode(sut, double_precision=15) assert encoded == "1.0" decoded = ujson.decode(encoded) assert decoded == 1.0 @pytest.mark.parametrize("ensure_ascii", [True, False]) def test_encode_string_conversion(self, ensure_ascii): string_input = "A string \\ / \b \f \n \r \t </script> &" not_html_encoded = '"A string \\\\ \\/ \\b \\f \\n ' '\\r \\t <\\/script> &"' html_encoded = ('"A string \\\\ \\/ \\b \\f \\n \\r \\t ' '\\u003c\\/script\\u003e \\u0026"') def helper(expected_output, **encode_kwargs): output = ujson.encode(string_input, ensure_ascii=ensure_ascii, **encode_kwargs) assert output == expected_output assert string_input == json.loads(output) assert string_input == ujson.decode(output) # Default behavior assumes encode_html_chars=False. helper(not_html_encoded) # Make sure explicit encode_html_chars=False works. helper(not_html_encoded, encode_html_chars=False) # Make sure explicit encode_html_chars=True does the encoding. helper(html_encoded, encode_html_chars=True) @pytest.mark.parametrize( "long_number", [-4342969734183514, -12345678901234.56789012, -528656961.4399388]) def test_double_long_numbers(self, long_number): sut = {"a": long_number} encoded = ujson.encode(sut, double_precision=15) decoded = ujson.decode(encoded) assert sut == decoded def test_encode_non_c_locale(self): lc_category = locale.LC_NUMERIC # We just need one of these locales to work. for new_locale in ("it_IT.UTF-8", "Italian_Italy"): if tm.can_set_locale(new_locale, lc_category): with tm.set_locale(new_locale, lc_category): assert ujson.loads(ujson.dumps(4.78e60)) == 4.78e60 assert ujson.loads("4.78", precise_float=True) == 4.78 break def test_decimal_decode_test_precise(self): sut = {"a": 4.56} encoded = ujson.encode(sut) decoded = ujson.decode(encoded, precise_float=True) assert sut == decoded def test_encode_double_tiny_exponential(self): num = 1e-40 assert num == ujson.decode(ujson.encode(num)) num = 1e-100 assert num == ujson.decode(ujson.encode(num)) num = -1e-45 assert num == ujson.decode(ujson.encode(num)) num = -1e-145 assert np.allclose(num, ujson.decode(ujson.encode(num))) @pytest.mark.parametrize("unicode_key", ["key1", "بن"]) def test_encode_dict_with_unicode_keys(self, unicode_key): unicode_dict = {unicode_key: "value1"} assert unicode_dict == ujson.decode(ujson.encode(unicode_dict)) @pytest.mark.parametrize( "double_input", [math.pi, -math.pi] # Should work with negatives too. ) def test_encode_double_conversion(self, double_input): output = ujson.encode(double_input) assert round(double_input, 5) == round(json.loads(output), 5) assert round(double_input, 5) == round(ujson.decode(output), 5) def test_encode_with_decimal(self): decimal_input = 1.0 output = ujson.encode(decimal_input) assert output == "1.0" def test_encode_array_of_nested_arrays(self): nested_input = [[[[]]]] * 20 output = ujson.encode(nested_input) assert nested_input == json.loads(output) assert nested_input == ujson.decode(output) nested_input = np.array(nested_input) tm.assert_numpy_array_equal( nested_input, ujson.decode(output, numpy=True, dtype=nested_input.dtype)) def test_encode_array_of_doubles(self): doubles_input = [31337.31337, 31337.31337, 31337.31337, 31337.31337 ] * 10 output = ujson.encode(doubles_input) assert doubles_input == json.loads(output) assert doubles_input == ujson.decode(output) tm.assert_numpy_array_equal(np.array(doubles_input), ujson.decode(output, numpy=True)) def test_double_precision(self): double_input = 30.012345678901234 output = ujson.encode(double_input, double_precision=15) assert double_input == json.loads(output) assert double_input == ujson.decode(output) for double_precision in (3, 9): output = ujson.encode(double_input, double_precision=double_precision) rounded_input = round(double_input, double_precision) assert rounded_input == json.loads(output) assert rounded_input == ujson.decode(output) @pytest.mark.parametrize("invalid_val", [20, -1, "9", None]) def test_invalid_double_precision(self, invalid_val): double_input = 30.12345678901234567890 expected_exception = ValueError if isinstance(invalid_val, int) else TypeError with pytest.raises(expected_exception): ujson.encode(double_input, double_precision=invalid_val) def test_encode_string_conversion2(self): string_input = "A string \\ / \b \f \n \r \t" output = ujson.encode(string_input) assert string_input == json.loads(output) assert string_input == ujson.decode(output) assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"' @pytest.mark.parametrize( "unicode_input", ["Räksmörgås اسامة بن محمد بن عوض بن لادن", "\xe6\x97\xa5\xd1\x88"], ) def test_encode_unicode_conversion(self, unicode_input): enc = ujson.encode(unicode_input) dec = ujson.decode(enc) assert enc == json.dumps(unicode_input) assert dec == json.loads(enc) def test_encode_control_escaping(self): escaped_input = "\x19" enc = ujson.encode(escaped_input) dec = ujson.decode(enc) assert escaped_input == dec assert enc == json.dumps(escaped_input) def test_encode_unicode_surrogate_pair(self): surrogate_input = "\xf0\x90\x8d\x86" enc = ujson.encode(surrogate_input) dec = ujson.decode(enc) assert enc == json.dumps(surrogate_input) assert dec == json.loads(enc) def test_encode_unicode_4bytes_utf8(self): four_bytes_input = "\xf0\x91\x80\xb0TRAILINGNORMAL" enc = ujson.encode(four_bytes_input) dec = ujson.decode(enc) assert enc == json.dumps(four_bytes_input) assert dec == json.loads(enc) def test_encode_unicode_4bytes_utf8highest(self): four_bytes_input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL" enc = ujson.encode(four_bytes_input) dec = ujson.decode(enc) assert enc == json.dumps(four_bytes_input) assert dec == json.loads(enc) def test_encode_array_in_array(self): arr_in_arr_input = [[[[]]]] output = ujson.encode(arr_in_arr_input) assert arr_in_arr_input == json.loads(output) assert output == json.dumps(arr_in_arr_input) assert arr_in_arr_input == ujson.decode(output) tm.assert_numpy_array_equal(np.array(arr_in_arr_input), ujson.decode(output, numpy=True)) @pytest.mark.parametrize( "num_input", [ 31337, -31337, # Negative number. -9223372036854775808, # Large negative number. ], ) def test_encode_num_conversion(self, num_input): output = ujson.encode(num_input) assert num_input == json.loads(output) assert output == json.dumps(num_input) assert num_input == ujson.decode(output) def test_encode_list_conversion(self): list_input = [1, 2, 3, 4] output = ujson.encode(list_input) assert list_input == json.loads(output) assert list_input == ujson.decode(output) tm.assert_numpy_array_equal(np.array(list_input), ujson.decode(output, numpy=True)) def test_encode_dict_conversion(self): dict_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4} output = ujson.encode(dict_input) assert dict_input == json.loads(output) assert dict_input == ujson.decode(output) @pytest.mark.parametrize("builtin_value", [None, True, False]) def test_encode_builtin_values_conversion(self, builtin_value): output = ujson.encode(builtin_value) assert builtin_value == json.loads(output) assert output == json.dumps(builtin_value) assert builtin_value == ujson.decode(output) def test_encode_datetime_conversion(self): datetime_input = datetime.datetime.fromtimestamp(time.time()) output = ujson.encode(datetime_input, date_unit="s") expected = calendar.timegm(datetime_input.utctimetuple()) assert int(expected) == json.loads(output) assert int(expected) == ujson.decode(output) def test_encode_date_conversion(self): date_input = datetime.date.fromtimestamp(time.time()) output = ujson.encode(date_input, date_unit="s") tup = (date_input.year, date_input.month, date_input.day, 0, 0, 0) expected = calendar.timegm(tup) assert int(expected) == json.loads(output) assert int(expected) == ujson.decode(output) @pytest.mark.parametrize( "test", [ datetime.time(), datetime.time(1, 2, 3), datetime.time(10, 12, 15, 343243) ], ) def test_encode_time_conversion_basic(self, test): output = ujson.encode(test) expected = '"{iso}"'.format(iso=test.isoformat()) assert expected == output def test_encode_time_conversion_pytz(self): # see gh-11473: to_json segfaults with timezone-aware datetimes test = datetime.time(10, 12, 15, 343243, pytz.utc) output = ujson.encode(test) expected = '"{iso}"'.format(iso=test.isoformat()) assert expected == output def test_encode_time_conversion_dateutil(self): # see gh-11473: to_json segfaults with timezone-aware datetimes test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc()) output = ujson.encode(test) expected = '"{iso}"'.format(iso=test.isoformat()) assert expected == output @pytest.mark.parametrize( "decoded_input", [NaT, np.datetime64("NaT"), np.nan, np.inf, -np.inf]) def test_encode_as_null(self, decoded_input): assert ujson.encode(decoded_input) == "null", "Expected null" def test_datetime_units(self): val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) stamp = Timestamp(val) roundtrip = ujson.decode(ujson.encode(val, date_unit="s")) assert roundtrip == stamp.value // 10**9 roundtrip = ujson.decode(ujson.encode(val, date_unit="ms")) assert roundtrip == stamp.value // 10**6 roundtrip = ujson.decode(ujson.encode(val, date_unit="us")) assert roundtrip == stamp.value // 10**3 roundtrip = ujson.decode(ujson.encode(val, date_unit="ns")) assert roundtrip == stamp.value msg = "Invalid value 'foo' for option 'date_unit'" with pytest.raises(ValueError, match=msg): ujson.encode(val, date_unit="foo") def test_encode_to_utf8(self): unencoded = "\xe6\x97\xa5\xd1\x88" enc = ujson.encode(unencoded, ensure_ascii=False) dec = ujson.decode(enc) assert enc == json.dumps(unencoded, ensure_ascii=False) assert dec == json.loads(enc) def test_decode_from_unicode(self): unicode_input = '{"obj": 31337}' dec1 = ujson.decode(unicode_input) dec2 = ujson.decode(str(unicode_input)) assert dec1 == dec2 def test_encode_recursion_max(self): # 8 is the max recursion depth class O2: member = 0 pass class O1: member = 0 pass decoded_input = O1() decoded_input.member = O2() decoded_input.member.member = decoded_input with pytest.raises(OverflowError): ujson.encode(decoded_input) def test_decode_jibberish(self): jibberish = "fdsa sda v9sa fdsa" with pytest.raises(ValueError): ujson.decode(jibberish) @pytest.mark.parametrize( "broken_json", [ "[", # Broken array start. "{", # Broken object start. "]", # Broken array end. "}", # Broken object end. ], ) def test_decode_broken_json(self, broken_json): with pytest.raises(ValueError): ujson.decode(broken_json) @pytest.mark.parametrize("too_big_char", ["[", "{"]) def test_decode_depth_too_big(self, too_big_char): with pytest.raises(ValueError): ujson.decode(too_big_char * (1024 * 1024)) @pytest.mark.parametrize( "bad_string", [ '"TESTING', # Unterminated. '"TESTING\\"', # Unterminated escape. "tru", # Broken True. "fa", # Broken False. "n", # Broken None. ], ) def test_decode_bad_string(self, bad_string): with pytest.raises(ValueError): ujson.decode(bad_string) @pytest.mark.parametrize("broken_json", ['{{1337:""}}', '{{"key":"}', "[[[true"]) def test_decode_broken_json_leak(self, broken_json): for _ in range(1000): with pytest.raises(ValueError): ujson.decode(broken_json) @pytest.mark.parametrize( "invalid_dict", [ "{{{{31337}}}}", # No key. '{{{{"key":}}}}', # No value. '{{{{"key"}}}}', # No colon or value. ], ) def test_decode_invalid_dict(self, invalid_dict): with pytest.raises(ValueError): ujson.decode(invalid_dict) @pytest.mark.parametrize( "numeric_int_as_str", ["31337", "-31337"] # Should work with negatives. ) def test_decode_numeric_int(self, numeric_int_as_str): assert int(numeric_int_as_str) == ujson.decode(numeric_int_as_str) def test_encode_null_character(self): wrapped_input = "31337 \x00 1337" output = ujson.encode(wrapped_input) assert wrapped_input == json.loads(output) assert output == json.dumps(wrapped_input) assert wrapped_input == ujson.decode(output) alone_input = "\x00" output = ujson.encode(alone_input) assert alone_input == json.loads(output) assert output == json.dumps(alone_input) assert alone_input == ujson.decode(output) assert '" \\u0000\\r\\n "' == ujson.dumps(" \u0000\r\n ") def test_decode_null_character(self): wrapped_input = '"31337 \\u0000 31337"' assert ujson.decode(wrapped_input) == json.loads(wrapped_input) def test_encode_list_long_conversion(self): long_input = [ 9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807, ] output = ujson.encode(long_input) assert long_input == json.loads(output) assert long_input == ujson.decode(output) tm.assert_numpy_array_equal( np.array(long_input), ujson.decode(output, numpy=True, dtype=np.int64)) def test_encode_long_conversion(self): long_input = 9223372036854775807 output = ujson.encode(long_input) assert long_input == json.loads(output) assert output == json.dumps(long_input) assert long_input == ujson.decode(output) @pytest.mark.parametrize( "int_exp", ["1337E40", "1.337E40", "1337E+9", "1.337e+40", "1.337E-4"]) def test_decode_numeric_int_exp(self, int_exp): assert ujson.decode(int_exp) == json.loads(int_exp) def test_loads_non_str_bytes_raises(self): msg = "Expected 'str' or 'bytes'" with pytest.raises(TypeError, match=msg): ujson.loads(None) def test_version(self): assert re.match(r"^\d+\.\d+(\.\d+)?$", ujson.__version__ ), "ujson.__version__ must be a string like '1.4.0'" def test_encode_numeric_overflow(self): with pytest.raises(OverflowError): ujson.encode(12839128391289382193812939) def test_encode_numeric_overflow_nested(self): class Nested: x = 12839128391289382193812939 for _ in range(0, 100): with pytest.raises(OverflowError): ujson.encode(Nested()) @pytest.mark.parametrize("val", [3590016419, 2**31, 2**32, (2**32) - 1]) def test_decode_number_with_32bit_sign_bit(self, val): # Test that numbers that fit within 32 bits but would have the # sign bit set (2**31 <= x < 2**32) are decoded properly. doc = '{{"id": {val}}}'.format(val=val) assert ujson.decode(doc)["id"] == val def test_encode_big_escape(self): # Make sure no Exception is raised. for _ in range(10): base = "\u00e5".encode("utf-8") escape_input = base * 1024 * 1024 * 2 ujson.encode(escape_input) def test_decode_big_escape(self): # Make sure no Exception is raised. for _ in range(10): base = "\u00e5".encode("utf-8") quote = b'"' escape_input = quote + (base * 1024 * 1024 * 2) + quote ujson.decode(escape_input) def test_to_dict(self): d = {"key": 31337} class DictTest: def toDict(self): return d o = DictTest() output = ujson.encode(o) dec = ujson.decode(output) assert dec == d def test_default_handler(self): class _TestObject: def __init__(self, val): self.val = val @property def recursive_attr(self): return _TestObject("recursive_attr") def __str__(self): return str(self.val) msg = "Maximum recursion level reached" with pytest.raises(OverflowError, match=msg): ujson.encode(_TestObject("foo")) assert '"foo"' == ujson.encode(_TestObject("foo"), default_handler=str) def my_handler(_): return "foobar" assert '"foobar"' == ujson.encode(_TestObject("foo"), default_handler=my_handler) def my_handler_raises(_): raise TypeError("I raise for anything") with pytest.raises(TypeError, match="I raise for anything"): ujson.encode(_TestObject("foo"), default_handler=my_handler_raises) def my_int_handler(_): return 42 assert (ujson.decode( ujson.encode(_TestObject("foo"), default_handler=my_int_handler)) == 42) def my_obj_handler(_): return datetime.datetime(2013, 2, 3) assert ujson.decode(ujson.encode(datetime.datetime( 2013, 2, 3))) == ujson.decode( ujson.encode(_TestObject("foo"), default_handler=my_obj_handler)) obj_list = [_TestObject("foo"), _TestObject("bar")] assert json.loads(json.dumps(obj_list, default=str)) == ujson.decode( ujson.encode(obj_list, default_handler=str))
# -*- coding: utf-8 -*- from __future__ import print_function import numpy as np import pandas as pd import pandas.util.testing as tm from pandas.compat import (is_platform_windows, is_platform_32bit) from pandas.core.config import option_context use_32bit_repr = is_platform_windows() or is_platform_32bit() class TestSparseSeriesFormatting(tm.TestCase): @property def dtype_format_for_platform(self): return '' if use_32bit_repr else ', dtype=int32' def test_sparse_max_row(self): s = pd.Series([1, np.nan, np.nan, 3, np.nan]).to_sparse() result = repr(s) dfm = self.dtype_format_for_platform exp = ("0 1.0\n1 NaN\n2 NaN\n3 3.0\n" "4 NaN\ndtype: float64\nBlockIndex\n" "Block locations: array([0, 3]{0})\n" "Block lengths: array([1, 1]{0})".format(dfm)) self.assertEqual(result, exp)
class TestIntervalTree: def test_get_indexer(self, tree): result = tree.get_indexer(np.array([1.0, 5.5, 6.5])) expected = np.array([0, 4, -1], dtype="intp") tm.assert_numpy_array_equal(result, expected) with pytest.raises( KeyError, match="'indexer does not intersect a unique set of intervals'" ): tree.get_indexer(np.array([3.0])) @pytest.mark.parametrize( "dtype, target_value, target_dtype", [("int64", 2 ** 63 + 1, "uint64"), ("uint64", -1, "int64")], ) def test_get_indexer_overflow(self, dtype, target_value, target_dtype): left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype) tree = IntervalTree(left, right) result = tree.get_indexer(np.array([target_value], dtype=target_dtype)) expected = np.array([-1], dtype="intp") tm.assert_numpy_array_equal(result, expected) def test_get_indexer_non_unique(self, tree): indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5])) result = indexer[:1] expected = np.array([0], dtype="intp") tm.assert_numpy_array_equal(result, expected) result = np.sort(indexer[1:3]) expected = np.array([0, 1], dtype="intp") tm.assert_numpy_array_equal(result, expected) result = np.sort(indexer[3:]) expected = np.array([-1], dtype="intp") tm.assert_numpy_array_equal(result, expected) result = missing expected = np.array([2], dtype="intp") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "dtype, target_value, target_dtype", [("int64", 2 ** 63 + 1, "uint64"), ("uint64", -1, "int64")], ) def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype): left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype) tree = IntervalTree(left, right) target = np.array([target_value], dtype=target_dtype) result_indexer, result_missing = tree.get_indexer_non_unique(target) expected_indexer = np.array([-1], dtype="intp") tm.assert_numpy_array_equal(result_indexer, expected_indexer) expected_missing = np.array([0], dtype="intp") tm.assert_numpy_array_equal(result_missing, expected_missing) def test_duplicates(self, dtype): left = np.array([0, 0, 0], dtype=dtype) tree = IntervalTree(left, left + 1) with pytest.raises( KeyError, match="'indexer does not intersect a unique set of intervals'" ): tree.get_indexer(np.array([0.5])) indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) result = np.sort(indexer) expected = np.array([0, 1, 2], dtype="intp") tm.assert_numpy_array_equal(result, expected) result = missing expected = np.array([], dtype="intp") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000] ) def test_get_indexer_closed(self, closed, leaf_size): x = np.arange(1000, dtype="float64") found = x.astype("intp") not_found = (-1 * np.ones(1000)).astype("intp") tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size) tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25)) expected = found if tree.closed_left else not_found tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0)) expected = found if tree.closed_right else not_found tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5)) @pytest.mark.parametrize( "left, right, expected", [ (np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True), (np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True), (np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True), (np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False), (np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False), ], ) @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) def test_is_overlapping(self, closed, order, left, right, expected): # GH 23309 tree = IntervalTree(left[order], right[order], closed=closed) result = tree.is_overlapping assert result is expected @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) def test_is_overlapping_endpoints(self, closed, order): """shared endpoints are marked as overlapping""" # GH 23309 left, right = np.arange(3, dtype="int64"), np.arange(1, 4) tree = IntervalTree(left[order], right[order], closed=closed) result = tree.is_overlapping expected = closed == "both" assert result is expected @pytest.mark.parametrize( "left, right", [ (np.array([], dtype="int64"), np.array([], dtype="int64")), (np.array([0], dtype="int64"), np.array([1], dtype="int64")), (np.array([np.nan]), np.array([np.nan])), (np.array([np.nan] * 3), np.array([np.nan] * 3)), ], ) def test_is_overlapping_trivial(self, closed, left, right): # GH 23309 tree = IntervalTree(left, right, closed=closed) assert tree.is_overlapping is False @pytest.mark.skipif(compat.is_platform_32bit(), reason="GH 23440") def test_construction_overflow(self): # GH 25485 left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101 tree = IntervalTree(left, right) # pivot should be average of left/right medians result = tree.root.pivot expected = (50 + np.iinfo(np.int64).max) / 2 assert result == expected
class TestPandasContainer(object): def setup_method(self, method): self.dirpath = tm.get_data_path() self.ts = tm.makeTimeSeries() self.ts.name = 'ts' self.series = tm.makeStringSeries() self.series.name = 'series' self.objSeries = tm.makeObjectSeries() self.objSeries.name = 'objects' self.empty_series = Series([], index=[]) self.empty_frame = DataFrame({}) self.frame = _frame.copy() self.frame2 = _frame2.copy() self.intframe = _intframe.copy() self.tsframe = _tsframe.copy() self.mixed_frame = _mixed_frame.copy() self.categorical = _cat_frame.copy() def teardown_method(self, method): del self.dirpath del self.ts del self.series del self.objSeries del self.empty_series del self.empty_frame del self.frame del self.frame2 del self.intframe del self.tsframe del self.mixed_frame def test_frame_double_encoded_labels(self): df = DataFrame([['a', 'b'], ['c', 'd']], index=['index " 1', 'index / 2'], columns=['a \\ b', 'y / z']) assert_frame_equal(df, read_json(df.to_json(orient='split'), orient='split')) assert_frame_equal(df, read_json(df.to_json(orient='columns'), orient='columns')) assert_frame_equal(df, read_json(df.to_json(orient='index'), orient='index')) df_unser = read_json(df.to_json(orient='records'), orient='records') assert_index_equal(df.columns, df_unser.columns) tm.assert_numpy_array_equal(df.values, df_unser.values) def test_frame_non_unique_index(self): df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1], columns=['x', 'y']) pytest.raises(ValueError, df.to_json, orient='index') pytest.raises(ValueError, df.to_json, orient='columns') assert_frame_equal(df, read_json(df.to_json(orient='split'), orient='split')) unser = read_json(df.to_json(orient='records'), orient='records') tm.assert_index_equal(df.columns, unser.columns) tm.assert_almost_equal(df.values, unser.values) unser = read_json(df.to_json(orient='values'), orient='values') tm.assert_numpy_array_equal(df.values, unser.values) def test_frame_non_unique_columns(self): df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2], columns=['x', 'x']) pytest.raises(ValueError, df.to_json, orient='index') pytest.raises(ValueError, df.to_json, orient='columns') pytest.raises(ValueError, df.to_json, orient='records') assert_frame_equal(df, read_json(df.to_json(orient='split'), orient='split', dtype=False)) unser = read_json(df.to_json(orient='values'), orient='values') tm.assert_numpy_array_equal(df.values, unser.values) # GH4377; duplicate columns not processing correctly df = DataFrame([['a', 'b'], ['c', 'd']], index=[ 1, 2], columns=['x', 'y']) result = read_json(df.to_json(orient='split'), orient='split') assert_frame_equal(result, df) def _check(df): result = read_json(df.to_json(orient='split'), orient='split', convert_dates=['x']) assert_frame_equal(result, df) for o in [[['a', 'b'], ['c', 'd']], [[1.5, 2.5], [3.5, 4.5]], [[1, 2.5], [3, 4.5]], [[Timestamp('20130101'), 3.5], [Timestamp('20130102'), 4.5]]]: _check(DataFrame(o, index=[1, 2], columns=['x', 'x'])) def test_frame_from_json_to_json(self): def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_dtype=True, raise_ok=None, sort=None, check_index_type=True, check_column_type=True, check_numpy_dtype=False): if sort is not None: df = df.sort_values(sort) else: df = df.sort_index() # if we are not unique, then check that we are raising ValueError # for the appropriate orients if not df.index.is_unique and orient in ['index', 'columns']: pytest.raises( ValueError, lambda: df.to_json(orient=orient)) return if (not df.columns.is_unique and orient in ['index', 'columns', 'records']): pytest.raises( ValueError, lambda: df.to_json(orient=orient)) return dfjson = df.to_json(orient=orient) try: unser = read_json(dfjson, orient=orient, dtype=dtype, numpy=numpy, convert_axes=convert_axes) except Exception as detail: if raise_ok is not None: if isinstance(detail, raise_ok): return raise if sort is not None and sort in unser.columns: unser = unser.sort_values(sort) else: unser = unser.sort_index() if dtype is False: check_dtype = False if not convert_axes and df.index.dtype.type == np.datetime64: unser.index = DatetimeIndex( unser.index.values.astype('i8') * 1e6) if orient == "records": # index is not captured in this orientation tm.assert_almost_equal(df.values, unser.values, check_dtype=check_numpy_dtype) tm.assert_index_equal(df.columns, unser.columns, exact=check_column_type) elif orient == "values": # index and cols are not captured in this orientation if numpy is True and df.shape == (0, 0): assert unser.shape[0] == 0 else: tm.assert_almost_equal(df.values, unser.values, check_dtype=check_numpy_dtype) elif orient == "split": # index and col labels might not be strings unser.index = [str(i) for i in unser.index] unser.columns = [str(i) for i in unser.columns] if sort is None: unser = unser.sort_index() tm.assert_almost_equal(df.values, unser.values, check_dtype=check_numpy_dtype) else: if convert_axes: tm.assert_frame_equal(df, unser, check_dtype=check_dtype, check_index_type=check_index_type, check_column_type=check_column_type) else: tm.assert_frame_equal(df, unser, check_less_precise=False, check_dtype=check_dtype) def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None, sort=None, check_index_type=True, check_column_type=True): # numpy=False if convert_axes: _check_orient(df, "columns", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "records", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "split", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "index", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "values", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "columns", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "records", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "split", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "index", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "values", dtype=dtype, convert_axes=False, sort=sort) # numpy=True and raise_ok might be not None, so ignore the error if convert_axes: _check_orient(df, "columns", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "records", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "split", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "index", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "values", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "columns", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "records", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "split", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "index", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "values", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) # basic _check_all_orients(self.frame) assert self.frame.to_json() == self.frame.to_json(orient="columns") _check_all_orients(self.intframe, dtype=self.intframe.values.dtype) _check_all_orients(self.intframe, dtype=False) # big one # index and columns are strings as all unserialised JSON object keys # are assumed to be strings biggie = DataFrame(np.zeros((200, 4)), columns=[str(i) for i in range(4)], index=[str(i) for i in range(200)]) _check_all_orients(biggie, dtype=False, convert_axes=False) # dtypes _check_all_orients(DataFrame(biggie, dtype=np.float64), dtype=np.float64, convert_axes=False) _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int, convert_axes=False) _check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3', convert_axes=False, raise_ok=ValueError) # categorical _check_all_orients(self.categorical, sort='sort', raise_ok=ValueError) # empty _check_all_orients(self.empty_frame, check_index_type=False, check_column_type=False) # time series data _check_all_orients(self.tsframe) # mixed data index = pd.Index(['a', 'b', 'c', 'd', 'e']) data = {'A': [0., 1., 2., 3., 4.], 'B': [0., 1., 0., 1., 0.], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': [True, False, True, False, True]} df = DataFrame(data=data, index=index) _check_orient(df, "split", check_dtype=False) _check_orient(df, "records", check_dtype=False) _check_orient(df, "values", check_dtype=False) _check_orient(df, "columns", check_dtype=False) # index oriented is problematic as it is read back in in a transposed # state, so the columns are interpreted as having mixed data and # given object dtypes. # force everything to have object dtype beforehand _check_orient(df.transpose().transpose(), "index", dtype=False) def test_frame_from_json_bad_data(self): pytest.raises(ValueError, read_json, StringIO('{"key":b:a:d}')) # too few indices json = StringIO('{"columns":["A","B"],' '"index":["2","3"],' '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}') pytest.raises(ValueError, read_json, json, orient="split") # too many columns json = StringIO('{"columns":["A","B","C"],' '"index":["1","2","3"],' '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}') pytest.raises(AssertionError, read_json, json, orient="split") # bad key json = StringIO('{"badkey":["A","B"],' '"index":["2","3"],' '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}') with tm.assert_raises_regex(ValueError, r"unexpected key\(s\): badkey"): read_json(json, orient="split") def test_frame_from_json_nones(self): df = DataFrame([[1, 2], [4, 5, 6]]) unser = read_json(df.to_json()) assert np.isnan(unser[2][0]) df = DataFrame([['1', '2'], ['4', '5', '6']]) unser = read_json(df.to_json()) assert np.isnan(unser[2][0]) unser = read_json(df.to_json(), dtype=False) assert unser[2][0] is None unser = read_json(df.to_json(), convert_axes=False, dtype=False) assert unser['2']['0'] is None unser = read_json(df.to_json(), numpy=False) assert np.isnan(unser[2][0]) unser = read_json(df.to_json(), numpy=False, dtype=False) assert unser[2][0] is None unser = read_json(df.to_json(), numpy=False, convert_axes=False, dtype=False) assert unser['2']['0'] is None # infinities get mapped to nulls which get mapped to NaNs during # deserialisation df = DataFrame([[1, 2], [4, 5, 6]]) df.loc[0, 2] = np.inf unser = read_json(df.to_json()) assert np.isnan(unser[2][0]) unser = read_json(df.to_json(), dtype=False) assert np.isnan(unser[2][0]) df.loc[0, 2] = np.NINF unser = read_json(df.to_json()) assert np.isnan(unser[2][0]) unser = read_json(df.to_json(), dtype=False) assert np.isnan(unser[2][0]) @pytest.mark.skipif(is_platform_32bit(), reason="not compliant on 32-bit, xref #15865") def test_frame_to_json_float_precision(self): df = pd.DataFrame([dict(a_float=0.95)]) encoded = df.to_json(double_precision=1) assert encoded == '{"a_float":{"0":1.0}}' df = pd.DataFrame([dict(a_float=1.95)]) encoded = df.to_json(double_precision=1) assert encoded == '{"a_float":{"0":2.0}}' df = pd.DataFrame([dict(a_float=-1.95)]) encoded = df.to_json(double_precision=1) assert encoded == '{"a_float":{"0":-2.0}}' df = pd.DataFrame([dict(a_float=0.995)]) encoded = df.to_json(double_precision=2) assert encoded == '{"a_float":{"0":1.0}}' df = pd.DataFrame([dict(a_float=0.9995)]) encoded = df.to_json(double_precision=3) assert encoded == '{"a_float":{"0":1.0}}' df = pd.DataFrame([dict(a_float=0.99999999999999944)]) encoded = df.to_json(double_precision=15) assert encoded == '{"a_float":{"0":1.0}}' def test_frame_to_json_except(self): df = DataFrame([1, 2, 3]) pytest.raises(ValueError, df.to_json, orient="garbage") def test_frame_empty(self): df = DataFrame(columns=['jim', 'joe']) assert not df._is_mixed_type assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False) # GH 7445 result = pd.DataFrame({'test': []}, index=[]).to_json(orient='columns') expected = '{"test":{}}' assert result == expected def test_frame_empty_mixedtype(self): # mixed type df = DataFrame(columns=['jim', 'joe']) df['joe'] = df['joe'].astype('i8') assert df._is_mixed_type assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False) def test_frame_mixedtype_orient(self): # GH10289 vals = [[10, 1, 'foo', .1, .01], [20, 2, 'bar', .2, .02], [30, 3, 'baz', .3, .03], [40, 4, 'qux', .4, .04]] df = DataFrame(vals, index=list('abcd'), columns=['1st', '2nd', '3rd', '4th', '5th']) assert df._is_mixed_type right = df.copy() for orient in ['split', 'index', 'columns']: inp = df.to_json(orient=orient) left = read_json(inp, orient=orient, convert_axes=False) assert_frame_equal(left, right) right.index = np.arange(len(df)) inp = df.to_json(orient='records') left = read_json(inp, orient='records', convert_axes=False) assert_frame_equal(left, right) right.columns = np.arange(df.shape[1]) inp = df.to_json(orient='values') left = read_json(inp, orient='values', convert_axes=False) assert_frame_equal(left, right) def test_v12_compat(self): df = DataFrame( [[1.56808523, 0.65727391, 1.81021139, -0.17251653], [-0.2550111, -0.08072427, -0.03202878, -0.17581665], [1.51493992, 0.11805825, 1.629455, -1.31506612], [-0.02765498, 0.44679743, 0.33192641, -0.27885413], [0.05951614, -2.69652057, 1.28163262, 0.34703478]], columns=['A', 'B', 'C', 'D'], index=pd.date_range('2000-01-03', '2000-01-07')) df['date'] = pd.Timestamp('19920106 18:21:32.12') df.iloc[3, df.columns.get_loc('date')] = pd.Timestamp('20130101') df['modified'] = df['date'] df.iloc[1, df.columns.get_loc('modified')] = pd.NaT v12_json = os.path.join(self.dirpath, 'tsframe_v012.json') df_unser = pd.read_json(v12_json) assert_frame_equal(df, df_unser) df_iso = df.drop(['modified'], axis=1) v12_iso_json = os.path.join(self.dirpath, 'tsframe_iso_v012.json') df_unser_iso = pd.read_json(v12_iso_json) assert_frame_equal(df_iso, df_unser_iso) def test_blocks_compat_GH9037(self): index = pd.date_range('20000101', periods=10, freq='H') df_mixed = DataFrame(OrderedDict( float_1=[-0.92077639, 0.77434435, 1.25234727, 0.61485564, -0.60316077, 0.24653374, 0.28668979, -2.51969012, 0.95748401, -1.02970536], int_1=[19680418, 75337055, 99973684, 65103179, 79373900, 40314334, 21290235, 4991321, 41903419, 16008365], str_1=['78c608f1', '64a99743', '13d2ff52', 'ca7f4af2', '97236474', 'bde7e214', '1a6bde47', 'b1190be5', '7a669144', '8d64d068'], float_2=[-0.0428278, -1.80872357, 3.36042349, -0.7573685, -0.48217572, 0.86229683, 1.08935819, 0.93898739, -0.03030452, 1.43366348], str_2=['14f04af9', 'd085da90', '4bcfac83', '81504caf', '2ffef4a9', '08e2f5c4', '07e1af03', 'addbd4a7', '1f6a09ba', '4bfc4d87'], int_2=[86967717, 98098830, 51927505, 20372254, 12601730, 20884027, 34193846, 10561746, 24867120, 76131025] ), index=index) # JSON deserialisation always creates unicode strings df_mixed.columns = df_mixed.columns.astype('unicode') df_roundtrip = pd.read_json(df_mixed.to_json(orient='split'), orient='split') assert_frame_equal(df_mixed, df_roundtrip, check_index_type=True, check_column_type=True, check_frame_type=True, by_blocks=True, check_exact=True) def test_frame_nonprintable_bytes(self): # GH14256: failing column caused segfaults, if it is not the last one class BinaryThing(object): def __init__(self, hexed): self.hexed = hexed if compat.PY2: self.binary = hexed.decode('hex') else: self.binary = bytes.fromhex(hexed) def __str__(self): return self.hexed hexed = '574b4454ba8c5eb4f98a8f45' binthing = BinaryThing(hexed) # verify the proper conversion of printable content df_printable = DataFrame({'A': [binthing.hexed]}) assert df_printable.to_json() == \ '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed) # check if non-printable content throws appropriate Exception df_nonprintable = DataFrame({'A': [binthing]}) with pytest.raises(OverflowError): df_nonprintable.to_json() # the same with multiple columns threw segfaults df_mixed = DataFrame({'A': [binthing], 'B': [1]}, columns=['A', 'B']) with pytest.raises(OverflowError): df_mixed.to_json() # default_handler should resolve exceptions for non-string types assert df_nonprintable.to_json(default_handler=str) == \ '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed) assert df_mixed.to_json(default_handler=str) == \ '{{"A":{{"0":"{hex}"}},"B":{{"0":1}}}}'.format(hex=hexed) def test_label_overflow(self): # GH14256: buffer length not checked when writing label df = pd.DataFrame({'foo': [1337], 'bar' * 100000: [1]}) assert df.to_json() == \ '{{"{bar}":{{"0":1}},"foo":{{"0":1337}}}}'.format( bar=('bar' * 100000)) def test_series_non_unique_index(self): s = Series(['a', 'b'], index=[1, 1]) pytest.raises(ValueError, s.to_json, orient='index') assert_series_equal(s, read_json(s.to_json(orient='split'), orient='split', typ='series')) unser = read_json(s.to_json(orient='records'), orient='records', typ='series') tm.assert_numpy_array_equal(s.values, unser.values) def test_series_from_json_to_json(self): def _check_orient(series, orient, dtype=None, numpy=False, check_index_type=True): series = series.sort_index() unser = read_json(series.to_json(orient=orient), typ='series', orient=orient, numpy=numpy, dtype=dtype) unser = unser.sort_index() if orient == "records" or orient == "values": assert_almost_equal(series.values, unser.values) else: if orient == "split": assert_series_equal(series, unser, check_index_type=check_index_type) else: assert_series_equal(series, unser, check_names=False, check_index_type=check_index_type) def _check_all_orients(series, dtype=None, check_index_type=True): _check_orient(series, "columns", dtype=dtype, check_index_type=check_index_type) _check_orient(series, "records", dtype=dtype, check_index_type=check_index_type) _check_orient(series, "split", dtype=dtype, check_index_type=check_index_type) _check_orient(series, "index", dtype=dtype, check_index_type=check_index_type) _check_orient(series, "values", dtype=dtype) _check_orient(series, "columns", dtype=dtype, numpy=True, check_index_type=check_index_type) _check_orient(series, "records", dtype=dtype, numpy=True, check_index_type=check_index_type) _check_orient(series, "split", dtype=dtype, numpy=True, check_index_type=check_index_type) _check_orient(series, "index", dtype=dtype, numpy=True, check_index_type=check_index_type) _check_orient(series, "values", dtype=dtype, numpy=True, check_index_type=check_index_type) # basic _check_all_orients(self.series) assert self.series.to_json() == self.series.to_json(orient="index") objSeries = Series([str(d) for d in self.objSeries], index=self.objSeries.index, name=self.objSeries.name) _check_all_orients(objSeries, dtype=False) # empty_series has empty index with object dtype # which cannot be revert assert self.empty_series.index.dtype == np.object_ _check_all_orients(self.empty_series, check_index_type=False) _check_all_orients(self.ts) # dtype s = Series(lrange(6), index=['a', 'b', 'c', 'd', 'e', 'f']) _check_all_orients(Series(s, dtype=np.float64), dtype=np.float64) _check_all_orients(Series(s, dtype=np.int), dtype=np.int) def test_series_to_json_except(self): s = Series([1, 2, 3]) pytest.raises(ValueError, s.to_json, orient="garbage") def test_series_from_json_precise_float(self): s = Series([4.56, 4.56, 4.56]) result = read_json(s.to_json(), typ='series', precise_float=True) assert_series_equal(result, s, check_index_type=False) def test_frame_from_json_precise_float(self): df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]]) result = read_json(df.to_json(), precise_float=True) assert_frame_equal(result, df, check_index_type=False, check_column_type=False) def test_typ(self): s = Series(lrange(6), index=['a', 'b', 'c', 'd', 'e', 'f'], dtype='int64') result = read_json(s.to_json(), typ=None) assert_series_equal(result, s) def test_reconstruction_index(self): df = DataFrame([[1, 2, 3], [4, 5, 6]]) result = read_json(df.to_json()) assert_frame_equal(result, df) df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['A', 'B', 'C']) result = read_json(df.to_json()) assert_frame_equal(result, df) def test_path(self): with ensure_clean('test.json') as path: for df in [self.frame, self.frame2, self.intframe, self.tsframe, self.mixed_frame]: df.to_json(path) read_json(path) def test_axis_dates(self): # frame json = self.tsframe.to_json() result = read_json(json) assert_frame_equal(result, self.tsframe) # series json = self.ts.to_json() result = read_json(json, typ='series') assert_series_equal(result, self.ts, check_names=False) assert result.name is None def test_convert_dates(self): # frame df = self.tsframe.copy() df['date'] = Timestamp('20130101') json = df.to_json() result = read_json(json) assert_frame_equal(result, df) df['foo'] = 1. json = df.to_json(date_unit='ns') result = read_json(json, convert_dates=False) expected = df.copy() expected['date'] = expected['date'].values.view('i8') expected['foo'] = expected['foo'].astype('int64') assert_frame_equal(result, expected) # series ts = Series(Timestamp('20130101'), index=self.ts.index) json = ts.to_json() result = read_json(json, typ='series') assert_series_equal(result, ts) def test_convert_dates_infer(self): # GH10747 from pandas.io.json import dumps infer_words = ['trade_time', 'date', 'datetime', 'sold_at', 'modified', 'timestamp', 'timestamps'] for infer_word in infer_words: data = [{'id': 1, infer_word: 1036713600000}, {'id': 2}] expected = DataFrame([[1, Timestamp('2002-11-08')], [2, pd.NaT]], columns=['id', infer_word]) result = read_json(dumps(data))[['id', infer_word]] assert_frame_equal(result, expected) def test_date_format_frame(self): df = self.tsframe.copy() def test_w_date(date, date_unit=None): df['date'] = Timestamp(date) df.iloc[1, df.columns.get_loc('date')] = pd.NaT df.iloc[5, df.columns.get_loc('date')] = pd.NaT if date_unit: json = df.to_json(date_format='iso', date_unit=date_unit) else: json = df.to_json(date_format='iso') result = read_json(json) assert_frame_equal(result, df) test_w_date('20130101 20:43:42.123') test_w_date('20130101 20:43:42', date_unit='s') test_w_date('20130101 20:43:42.123', date_unit='ms') test_w_date('20130101 20:43:42.123456', date_unit='us') test_w_date('20130101 20:43:42.123456789', date_unit='ns') pytest.raises(ValueError, df.to_json, date_format='iso', date_unit='foo') def test_date_format_series(self): def test_w_date(date, date_unit=None): ts = Series(Timestamp(date), index=self.ts.index) ts.iloc[1] = pd.NaT ts.iloc[5] = pd.NaT if date_unit: json = ts.to_json(date_format='iso', date_unit=date_unit) else: json = ts.to_json(date_format='iso') result = read_json(json, typ='series') assert_series_equal(result, ts) test_w_date('20130101 20:43:42.123') test_w_date('20130101 20:43:42', date_unit='s') test_w_date('20130101 20:43:42.123', date_unit='ms') test_w_date('20130101 20:43:42.123456', date_unit='us') test_w_date('20130101 20:43:42.123456789', date_unit='ns') ts = Series(Timestamp('20130101 20:43:42.123'), index=self.ts.index) pytest.raises(ValueError, ts.to_json, date_format='iso', date_unit='foo') def test_date_unit(self): df = self.tsframe.copy() df['date'] = Timestamp('20130101 20:43:42') dl = df.columns.get_loc('date') df.iloc[1, dl] = Timestamp('19710101 20:43:42') df.iloc[2, dl] = Timestamp('21460101 20:43:42') df.iloc[4, dl] = pd.NaT for unit in ('s', 'ms', 'us', 'ns'): json = df.to_json(date_format='epoch', date_unit=unit) # force date unit result = read_json(json, date_unit=unit) assert_frame_equal(result, df) # detect date unit result = read_json(json, date_unit=None) assert_frame_equal(result, df) def test_weird_nested_json(self): # this used to core dump the parser s = r'''{ "status": "success", "data": { "posts": [ { "id": 1, "title": "A blog post", "body": "Some useful content" }, { "id": 2, "title": "Another blog post", "body": "More content" } ] } }''' read_json(s) def test_doc_example(self): dfj2 = DataFrame(np.random.randn(5, 2), columns=list('AB')) dfj2['date'] = Timestamp('20130101') dfj2['ints'] = lrange(5) dfj2['bools'] = True dfj2.index = pd.date_range('20130101', periods=5) json = dfj2.to_json() result = read_json(json, dtype={'ints': np.int64, 'bools': np.bool_}) assert_frame_equal(result, result) def test_misc_example(self): # parsing unordered input fails result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True) expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) error_msg = """DataFrame\\.index are different DataFrame\\.index values are different \\(100\\.0 %\\) \\[left\\]: Index\\(\\[u?'a', u?'b'\\], dtype='object'\\) \\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)""" with tm.assert_raises_regex(AssertionError, error_msg): assert_frame_equal(result, expected, check_index_type=False) result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]') expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) assert_frame_equal(result, expected) @network def test_round_trip_exception_(self): # GH 3867 csv = 'https://raw.github.com/hayd/lahman2012/master/csvs/Teams.csv' df = pd.read_csv(csv) s = df.to_json() result = pd.read_json(s) assert_frame_equal(result.reindex( index=df.index, columns=df.columns), df) @network def test_url(self): url = 'https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5' # noqa result = read_json(url, convert_dates=True) for c in ['created_at', 'closed_at', 'updated_at']: assert result[c].dtype == 'datetime64[ns]' def test_timedelta(self): converter = lambda x: pd.to_timedelta(x, unit='ms') s = Series([timedelta(23), timedelta(seconds=5)]) assert s.dtype == 'timedelta64[ns]' result = pd.read_json(s.to_json(), typ='series').apply(converter) assert_series_equal(result, s) s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1])) assert s.dtype == 'timedelta64[ns]' result = pd.read_json(s.to_json(), typ='series').apply(converter) assert_series_equal(result, s) frame = DataFrame([timedelta(23), timedelta(seconds=5)]) assert frame[0].dtype == 'timedelta64[ns]' assert_frame_equal(frame, pd.read_json(frame.to_json()) .apply(converter)) frame = DataFrame({'a': [timedelta(days=23), timedelta(seconds=5)], 'b': [1, 2], 'c': pd.date_range(start='20130101', periods=2)}) result = pd.read_json(frame.to_json(date_unit='ns')) result['a'] = pd.to_timedelta(result.a, unit='ns') result['c'] = pd.to_datetime(result.c) assert_frame_equal(frame, result) def test_mixed_timedelta_datetime(self): frame = DataFrame({'a': [timedelta(23), pd.Timestamp('20130101')]}, dtype=object) expected = DataFrame({'a': [pd.Timedelta(frame.a[0]).value, pd.Timestamp(frame.a[1]).value]}) result = pd.read_json(frame.to_json(date_unit='ns'), dtype={'a': 'int64'}) assert_frame_equal(result, expected, check_index_type=False) def test_default_handler(self): value = object() frame = DataFrame({'a': [7, value]}) expected = DataFrame({'a': [7, str(value)]}) result = pd.read_json(frame.to_json(default_handler=str)) assert_frame_equal(expected, result, check_index_type=False) def test_default_handler_indirect(self): from pandas.io.json import dumps def default(obj): if isinstance(obj, complex): return [('mathjs', 'Complex'), ('re', obj.real), ('im', obj.imag)] return str(obj) df_list = [9, DataFrame({'a': [1, 'STR', complex(4, -5)], 'b': [float('nan'), None, 'N/A']}, columns=['a', 'b'])] expected = ('[9,[[1,null],["STR",null],[[["mathjs","Complex"],' '["re",4.0],["im",-5.0]],"N\\/A"]]]') assert dumps(df_list, default_handler=default, orient="values") == expected def test_default_handler_numpy_unsupported_dtype(self): # GH12554 to_json raises 'Unhandled numpy dtype 15' df = DataFrame({'a': [1, 2.3, complex(4, -5)], 'b': [float('nan'), None, complex(1.2, 0)]}, columns=['a', 'b']) expected = ('[["(1+0j)","(nan+0j)"],' '["(2.3+0j)","(nan+0j)"],' '["(4-5j)","(1.2+0j)"]]') assert df.to_json(default_handler=str, orient="values") == expected def test_default_handler_raises(self): def my_handler_raises(obj): raise TypeError("raisin") pytest.raises(TypeError, DataFrame({'a': [1, 2, object()]}).to_json, default_handler=my_handler_raises) pytest.raises(TypeError, DataFrame({'a': [1, 2, complex(4, -5)]}).to_json, default_handler=my_handler_raises) def test_categorical(self): # GH4377 df.to_json segfaults with non-ndarray blocks df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]}) df["B"] = df["A"] expected = df.to_json() df["B"] = df["A"].astype('category') assert expected == df.to_json() s = df["A"] sc = df["B"] assert s.to_json() == sc.to_json() def test_datetime_tz(self): # GH4377 df.to_json segfaults with non-ndarray blocks tz_range = pd.date_range('20130101', periods=3, tz='US/Eastern') tz_naive = tz_range.tz_convert('utc').tz_localize(None) df = DataFrame({ 'A': tz_range, 'B': pd.date_range('20130101', periods=3)}) df_naive = df.copy() df_naive['A'] = tz_naive expected = df_naive.to_json() assert expected == df.to_json() stz = Series(tz_range) s_naive = Series(tz_naive) assert stz.to_json() == s_naive.to_json() def test_sparse(self): # GH4377 df.to_json segfaults with non-ndarray blocks df = pd.DataFrame(np.random.randn(10, 4)) df.loc[:8] = np.nan sdf = df.to_sparse() expected = df.to_json() assert expected == sdf.to_json() s = pd.Series(np.random.randn(10)) s.loc[:8] = np.nan ss = s.to_sparse() expected = s.to_json() assert expected == ss.to_json() def test_tz_is_utc(self): from pandas.io.json import dumps exp = '"2013-01-10T05:00:00.000Z"' ts = Timestamp('2013-01-10 05:00:00Z') assert dumps(ts, iso_dates=True) == exp dt = ts.to_pydatetime() assert dumps(dt, iso_dates=True) == exp ts = Timestamp('2013-01-10 00:00:00', tz='US/Eastern') assert dumps(ts, iso_dates=True) == exp dt = ts.to_pydatetime() assert dumps(dt, iso_dates=True) == exp ts = Timestamp('2013-01-10 00:00:00-0500') assert dumps(ts, iso_dates=True) == exp dt = ts.to_pydatetime() assert dumps(dt, iso_dates=True) == exp def test_tz_range_is_utc(self): from pandas.io.json import dumps exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]' dfexp = ('{"DT":{' '"0":"2013-01-01T05:00:00.000Z",' '"1":"2013-01-02T05:00:00.000Z"}}') tz_range = pd.date_range('2013-01-01 05:00:00Z', periods=2) assert dumps(tz_range, iso_dates=True) == exp dti = pd.DatetimeIndex(tz_range) assert dumps(dti, iso_dates=True) == exp df = DataFrame({'DT': dti}) assert dumps(df, iso_dates=True) == dfexp tz_range = pd.date_range('2013-01-01 00:00:00', periods=2, tz='US/Eastern') assert dumps(tz_range, iso_dates=True) == exp dti = pd.DatetimeIndex(tz_range) assert dumps(dti, iso_dates=True) == exp df = DataFrame({'DT': dti}) assert dumps(df, iso_dates=True) == dfexp tz_range = pd.date_range('2013-01-01 00:00:00-0500', periods=2) assert dumps(tz_range, iso_dates=True) == exp dti = pd.DatetimeIndex(tz_range) assert dumps(dti, iso_dates=True) == exp df = DataFrame({'DT': dti}) assert dumps(df, iso_dates=True) == dfexp def test_read_inline_jsonl(self): # GH9180 result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) assert_frame_equal(result, expected) def test_read_s3_jsonl(self, s3_resource): pytest.importorskip('s3fs') # GH17200 result = read_json('s3n://pandas-test/items.jsonl', lines=True) expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) assert_frame_equal(result, expected) def test_read_local_jsonl(self): # GH17200 with ensure_clean('tmp_items.json') as path: with open(path, 'w') as infile: infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n') result = read_json(path, lines=True) expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) assert_frame_equal(result, expected) def test_read_jsonl_unicode_chars(self): # GH15132: non-ascii unicode characters # \u201d == RIGHT DOUBLE QUOTATION MARK # simulate file handle json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' json = StringIO(json) result = read_json(json, lines=True) expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]], columns=['a', 'b']) assert_frame_equal(result, expected) # simulate string json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' result = read_json(json, lines=True) expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]], columns=['a', 'b']) assert_frame_equal(result, expected) def test_to_jsonl(self): # GH9180 df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) result = df.to_json(orient="records", lines=True) expected = '{"a":1,"b":2}\n{"a":1,"b":2}' assert result == expected df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b']) result = df.to_json(orient="records", lines=True) expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}' assert result == expected assert_frame_equal(pd.read_json(result, lines=True), df) # GH15096: escaped characters in columns and data df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", 'b']) result = df.to_json(orient="records", lines=True) expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n' '{"a\\\\":"foo\\"","b":"bar"}') assert result == expected assert_frame_equal(pd.read_json(result, lines=True), df) def test_latin_encoding(self): if compat.PY2: tm.assert_raises_regex( TypeError, r'\[unicode\] is not implemented as a table column') return # GH 13774 pytest.skip("encoding not implemented in .to_json(), " "xref #13774") values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'], [b'E\xc9, 17', b'a', b'b', b'c'], [b'EE, 17', b'', b'a', b'b', b'c'], [b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'], [b'', b'a', b'b', b'c'], [b'\xf8\xfc', b'a', b'b', b'c'], [b'A\xf8\xfc', b'', b'a', b'b', b'c'], [np.nan, b'', b'b', b'c'], [b'A\xf8\xfc', np.nan, b'', b'b', b'c']] def _try_decode(x, encoding='latin-1'): try: return x.decode(encoding) except AttributeError: return x # not sure how to remove latin-1 from code in python 2 and 3 values = [[_try_decode(x) for x in y] for y in values] examples = [] for dtype in ['category', object]: for val in values: examples.append(Series(val, dtype=dtype)) def roundtrip(s, encoding='latin-1'): with ensure_clean('test.json') as path: s.to_json(path, encoding=encoding) retr = read_json(path, encoding=encoding) assert_series_equal(s, retr, check_categorical=False) for s in examples: roundtrip(s) def test_data_frame_size_after_to_json(self): # GH15344 df = DataFrame({'a': [str(1)]}) size_before = df.memory_usage(index=True, deep=True).sum() df.to_json() size_after = df.memory_usage(index=True, deep=True).sum() assert size_before == size_after @pytest.mark.parametrize('data, expected', [ (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']), {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}), (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo'), {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}), (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'], index=[['a', 'b'], ['c', 'd']]), {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}), (Series([1, 2, 3], name='A'), {'name': 'A', 'data': [1, 2, 3]}), (Series([1, 2, 3], name='A').rename_axis('foo'), {'name': 'A', 'data': [1, 2, 3]}), (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']]), {'name': 'A', 'data': [1, 2]}), ]) def test_index_false_to_json_split(self, data, expected): # GH 17394 # Testing index=False in to_json with orient='split' result = data.to_json(orient='split', index=False) result = json.loads(result) assert result == expected @pytest.mark.parametrize('data', [ (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])), (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo')), (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'], index=[['a', 'b'], ['c', 'd']])), (Series([1, 2, 3], name='A')), (Series([1, 2, 3], name='A').rename_axis('foo')), (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']])), ]) def test_index_false_to_json_table(self, data): # GH 17394 # Testing index=False in to_json with orient='table' result = data.to_json(orient='table', index=False) result = json.loads(result) expected = { 'schema': pd.io.json.build_table_schema(data, index=False), 'data': DataFrame(data).to_dict(orient='records') } assert result == expected @pytest.mark.parametrize('orient', [ 'records', 'index', 'columns', 'values' ]) def test_index_false_error_to_json(self, orient): # GH 17394 # Testing error message from to_json with index=False df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b']) with tm.assert_raises_regex(ValueError, "'index=False' is only " "valid when 'orient' is " "'split' or 'table'"): df.to_json(orient=orient, index=False)
class TestIntervalTree(object): def test_get_loc(self, tree): tm.assert_numpy_array_equal(tree.get_loc(1), np.array([0], dtype='int64')) tm.assert_numpy_array_equal(np.sort(tree.get_loc(2)), np.array([0, 1], dtype='int64')) with pytest.raises(KeyError): tree.get_loc(-1) def test_get_indexer(self, tree): tm.assert_numpy_array_equal( tree.get_indexer(np.array([1.0, 5.5, 6.5])), np.array([0, 4, -1], dtype='int64')) with pytest.raises(KeyError): tree.get_indexer(np.array([3.0])) def test_get_indexer_non_unique(self, tree): indexer, missing = tree.get_indexer_non_unique( np.array([1.0, 2.0, 6.5])) tm.assert_numpy_array_equal(indexer[:1], np.array([0], dtype='int64')) tm.assert_numpy_array_equal(np.sort(indexer[1:3]), np.array([0, 1], dtype='int64')) tm.assert_numpy_array_equal(np.sort(indexer[3:]), np.array([-1], dtype='int64')) tm.assert_numpy_array_equal(missing, np.array([2], dtype='int64')) def test_duplicates(self, dtype): left = np.array([0, 0, 0], dtype=dtype) tree = IntervalTree(left, left + 1) tm.assert_numpy_array_equal(np.sort(tree.get_loc(0.5)), np.array([0, 1, 2], dtype='int64')) with pytest.raises(KeyError): tree.get_indexer(np.array([0.5])) indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) tm.assert_numpy_array_equal(np.sort(indexer), np.array([0, 1, 2], dtype='int64')) tm.assert_numpy_array_equal(missing, np.array([], dtype='int64')) def test_get_loc_closed(self, closed): tree = IntervalTree([0], [1], closed=closed) for p, errors in [(0, tree.open_left), (1, tree.open_right)]: if errors: with pytest.raises(KeyError): tree.get_loc(p) else: tm.assert_numpy_array_equal(tree.get_loc(p), np.array([0], dtype='int64')) @pytest.mark.skipif(compat.is_platform_32bit(), reason="int type mismatch on 32bit") @pytest.mark.parametrize('leaf_size', [1, 10, 100, 10000]) def test_get_indexer_closed(self, closed, leaf_size): x = np.arange(1000, dtype='float64') found = x.astype('intp') not_found = (-1 * np.ones(1000)).astype('intp') tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size) tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25)) expected = found if tree.closed_left else not_found tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0)) expected = found if tree.closed_right else not_found tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
class TestIntervalTree(object): def test_get_loc(self, tree): result = tree.get_loc(1) expected = np.array([0], dtype='intp') tm.assert_numpy_array_equal(result, expected) result = np.sort(tree.get_loc(2)) expected = np.array([0, 1], dtype='intp') tm.assert_numpy_array_equal(result, expected) with pytest.raises(KeyError): tree.get_loc(-1) def test_get_indexer(self, tree): result = tree.get_indexer(np.array([1.0, 5.5, 6.5])) expected = np.array([0, 4, -1], dtype='intp') tm.assert_numpy_array_equal(result, expected) with pytest.raises(KeyError): tree.get_indexer(np.array([3.0])) def test_get_indexer_non_unique(self, tree): indexer, missing = tree.get_indexer_non_unique( np.array([1.0, 2.0, 6.5])) result = indexer[:1] expected = np.array([0], dtype='intp') tm.assert_numpy_array_equal(result, expected) result = np.sort(indexer[1:3]) expected = np.array([0, 1], dtype='intp') tm.assert_numpy_array_equal(result, expected) result = np.sort(indexer[3:]) expected = np.array([-1], dtype='intp') tm.assert_numpy_array_equal(result, expected) result = missing expected = np.array([2], dtype='intp') tm.assert_numpy_array_equal(result, expected) def test_duplicates(self, dtype): left = np.array([0, 0, 0], dtype=dtype) tree = IntervalTree(left, left + 1) result = np.sort(tree.get_loc(0.5)) expected = np.array([0, 1, 2], dtype='intp') tm.assert_numpy_array_equal(result, expected) with pytest.raises(KeyError): tree.get_indexer(np.array([0.5])) indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) result = np.sort(indexer) expected = np.array([0, 1, 2], dtype='intp') tm.assert_numpy_array_equal(result, expected) result = missing expected = np.array([], dtype='intp') tm.assert_numpy_array_equal(result, expected) def test_get_loc_closed(self, closed): tree = IntervalTree([0], [1], closed=closed) for p, errors in [(0, tree.open_left), (1, tree.open_right)]: if errors: with pytest.raises(KeyError): tree.get_loc(p) else: result = tree.get_loc(p) expected = np.array([0], dtype='intp') tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( 'leaf_size', [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000]) def test_get_indexer_closed(self, closed, leaf_size): x = np.arange(1000, dtype='float64') found = x.astype('intp') not_found = (-1 * np.ones(1000)).astype('intp') tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size) tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25)) expected = found if tree.closed_left else not_found tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0)) expected = found if tree.closed_right else not_found tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5)) @pytest.mark.parametrize( 'left, right, expected', [(np.array([0, 1, 4]), np.array([2, 3, 5]), True), (np.array([0, 1, 2]), np.array([5, 4, 3]), True), (np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True), (np.array([0, 2, 4]), np.array([1, 3, 5]), False), (np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False)]) @pytest.mark.parametrize('order', map(list, permutations(range(3)))) def test_is_overlapping(self, closed, order, left, right, expected): # GH 23309 tree = IntervalTree(left[order], right[order], closed=closed) result = tree.is_overlapping assert result is expected @pytest.mark.parametrize('order', map(list, permutations(range(3)))) def test_is_overlapping_endpoints(self, closed, order): """shared endpoints are marked as overlapping""" # GH 23309 left, right = np.arange(3), np.arange(1, 4) tree = IntervalTree(left[order], right[order], closed=closed) result = tree.is_overlapping expected = closed == 'both' assert result is expected @pytest.mark.parametrize( 'left, right', [(np.array([], dtype='int64'), np.array([], dtype='int64')), (np.array([0], dtype='int64'), np.array([1], dtype='int64')), (np.array([np.nan]), np.array([np.nan])), (np.array([np.nan] * 3), np.array([np.nan] * 3))]) def test_is_overlapping_trivial(self, closed, left, right): # GH 23309 tree = IntervalTree(left, right, closed=closed) assert tree.is_overlapping is False @pytest.mark.skipif(compat.is_platform_32bit(), reason='GH 23440') def test_construction_overflow(self): # GH 25485 left, right = np.arange(101), [np.iinfo(np.int64).max] * 101 tree = IntervalTree(left, right) # pivot should be average of left/right medians result = tree.root.pivot expected = (50 + np.iinfo(np.int64).max) / 2 assert result == expected
msg = "Could not import '{}'".format(package) if min_version: msg += " satisfying a min_version of {}".format(min_version) return pytest.mark.skipif( not safe_import(package, min_version=min_version), reason=msg )(func) return decorated_func skip_if_no_mpl = pytest.mark.skipif(_skip_if_no_mpl(), reason="Missing matplotlib dependency") skip_if_mpl_1_5 = pytest.mark.skipif(_skip_if_mpl_1_5(), reason="matplotlib 1.5") xfail_if_mpl_2_2 = pytest.mark.xfail(_skip_if_mpl_2_2(), reason="matplotlib 2.2") skip_if_32bit = pytest.mark.skipif(is_platform_32bit(), reason="skipping for 32 bit") skip_if_windows = pytest.mark.skipif(is_platform_windows(), reason="Running on Windows") skip_if_windows_python_3 = pytest.mark.skipif(is_platform_windows() and PY3, reason=("not used on python3/" "win32")) skip_if_has_locale = pytest.mark.skipif(_skip_if_has_locale(), reason="Specific locale is set {lang}" .format(lang=locale.getlocale()[0])) skip_if_not_us_locale = pytest.mark.skipif(_skip_if_not_us_locale(), reason="Specific locale is set " "{lang}".format( lang=locale.getlocale()[0])) skip_if_no_scipy = pytest.mark.skipif(_skip_if_no_scipy(), reason="Missing SciPy requirement")
def _skip_if_no_mpl(): mod = safe_import("matplotlib") if mod: mod.use("Agg", warn=False) else: return True def _skip_if_mpl_1_5(): mod = safe_import("matplotlib") if mod: v = mod.__version__ if LooseVersion(v) > LooseVersion('1.4.3') or str(v)[0] == '0': return True else: mod.use("Agg", warn=False) skip_if_no_mpl = pytest.mark.skipif(_skip_if_no_mpl(), reason="Missing matplotlib dependency") skip_if_mpl_1_5 = pytest.mark.skipif(_skip_if_mpl_1_5(), reason="matplotlib 1.5") skip_if_32bit = pytest.mark.skipif(is_platform_32bit(), reason="skipping for 32 bit") skip_if_windows = pytest.mark.skipif(is_platform_windows(), reason="Running on Windows") skip_if_windows_python_3 = pytest.mark.skipif(is_platform_windows() and PY3, reason=("not used on python3/" "win32"))