예제 #1
0
    def test_itertuples(self, float_frame):
        for i, tup in enumerate(float_frame.itertuples()):
            s = DataFrame._constructor_sliced(tup[1:])
            s.name = tup[0]
            expected = float_frame.iloc[i, :].reset_index(drop=True)
            tm.assert_series_equal(s, expected)

        df = DataFrame(
            {"floats": np.random.randn(5), "ints": range(5)}, columns=["floats", "ints"]
        )

        for tup in df.itertuples(index=False):
            assert isinstance(tup[1], int)

        df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]})
        dfaa = df[["a", "a"]]

        assert list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)]

        # repr with int on 32-bit/windows
        if not (compat.is_platform_windows() or compat.is_platform_32bit()):
            assert (
                repr(list(df.itertuples(name=None)))
                == "[(0, 1, 4), (1, 2, 5), (2, 3, 6)]"
            )

        tup = next(df.itertuples(name="TestName"))
        assert tup._fields == ("Index", "a", "b")
        assert (tup.Index, tup.a, tup.b) == tup
        assert type(tup).__name__ == "TestName"

        df.columns = ["def", "return"]
        tup2 = next(df.itertuples(name="TestName"))
        assert tup2 == (0, 1, 4)
        assert tup2._fields == ("Index", "_1", "_2")

        df3 = DataFrame({"f" + str(i): [i] for i in range(1024)})
        # will raise SyntaxError if trying to create namedtuple
        tup3 = next(df3.itertuples())
        assert isinstance(tup3, tuple)
        if PY37:
            assert hasattr(tup3, "_fields")
        else:
            assert not hasattr(tup3, "_fields")

        # GH 28282
        df_254_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(254)}])
        result_254_columns = next(df_254_columns.itertuples(index=False))
        assert isinstance(result_254_columns, tuple)
        assert hasattr(result_254_columns, "_fields")

        df_255_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(255)}])
        result_255_columns = next(df_255_columns.itertuples(index=False))
        assert isinstance(result_255_columns, tuple)

        # Dataframes with >=255 columns will fallback to regular tuples on python < 3.7
        if PY37:
            assert hasattr(result_255_columns, "_fields")
        else:
            assert not hasattr(result_255_columns, "_fields")
예제 #2
0
    def test_dropna(self):
        # https://github.com/pydata/pandas/issues/9443#issuecomment-73719328

        tm.assert_series_equal(
            pd.Series([True, True, False]).value_counts(dropna=True),
            pd.Series([2, 1], index=[True, False]))
        tm.assert_series_equal(
            pd.Series([True, True, False]).value_counts(dropna=False),
            pd.Series([2, 1], index=[True, False]))

        tm.assert_series_equal(
            pd.Series([True, True, False, None]).value_counts(dropna=True),
            pd.Series([2, 1], index=[True, False]))
        tm.assert_series_equal(
            pd.Series([True, True, False, None]).value_counts(dropna=False),
            pd.Series([2, 1, 1], index=[True, False, np.nan]))
        tm.assert_series_equal(
            pd.Series([10.3, 5., 5.]).value_counts(dropna=True),
            pd.Series([2, 1], index=[5., 10.3]))
        tm.assert_series_equal(
            pd.Series([10.3, 5., 5.]).value_counts(dropna=False),
            pd.Series([2, 1], index=[5., 10.3]))

        tm.assert_series_equal(
            pd.Series([10.3, 5., 5., None]).value_counts(dropna=True),
            pd.Series([2, 1], index=[5., 10.3]))

        # 32-bit linux has a different ordering
        if not compat.is_platform_32bit():
            tm.assert_series_equal(
                pd.Series([10.3, 5., 5., None]).value_counts(dropna=False),
                pd.Series([2, 1, 1], index=[5., 10.3, np.nan]))
예제 #3
0
def skipif_32bit(param):
    """
    Skip parameters in a parametrize on 32bit systems. Specifically used
    here to skip leaf_size parameters related to GH 23440.
    """
    marks = pytest.mark.skipif(compat.is_platform_32bit(),
                               reason='GH 23440: int type mismatch on 32bit')
    return pytest.param(param, marks=marks)
예제 #4
0
def skipif_32bit(param):
    """
    Skip parameters in a parametrize on 32bit systems. Specifically used
    here to skip leaf_size parameters related to GH 23440.
    """
    marks = pytest.mark.skipif(compat.is_platform_32bit(),
                               reason='GH 23440: int type mismatch on 32bit')
    return pytest.param(param, marks=marks)
예제 #5
0
    def test_int_max(self, any_int_dtype):
        if any_int_dtype in ("int64", "uint64") and compat.is_platform_32bit():
            pytest.skip("Cannot test 64-bit integer on 32-bit platform")

        klass = np.dtype(any_int_dtype).type

        # uint64 max will always overflow,
        # as it's encoded to signed.
        if any_int_dtype == "uint64":
            num = np.iinfo("int64").max
        else:
            num = np.iinfo(any_int_dtype).max

        assert klass(ujson.decode(ujson.encode(num))) == num
예제 #6
0
    def test_int_max(self, any_int_dtype):
        if any_int_dtype in ("int64", "uint64") and compat.is_platform_32bit():
            pytest.skip("Cannot test 64-bit integer on 32-bit platform")

        klass = np.dtype(any_int_dtype).type

        # uint64 max will always overflow,
        # as it's encoded to signed.
        if any_int_dtype == "uint64":
            num = np.iinfo("int64").max
        else:
            num = np.iinfo(any_int_dtype).max

        assert klass(ujson.decode(ujson.encode(num))) == num
예제 #7
0
파일: test_api.py 프로젝트: zgswanda/pandas
    def test_itertuples(self):
        for i, tup in enumerate(self.frame.itertuples()):
            s = self.klass._constructor_sliced(tup[1:])
            s.name = tup[0]
            expected = self.frame.iloc[i, :].reset_index(drop=True)
            self._assert_series_equal(s, expected)

        df = self.klass({
            'floats': np.random.randn(5),
            'ints': lrange(5)
        },
                        columns=['floats', 'ints'])

        for tup in df.itertuples(index=False):
            assert isinstance(tup[1], (int, long))

        df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]})
        dfaa = df[['a', 'a']]

        assert (list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)])

        # repr with be int/long on 32-bit/windows
        if not (compat.is_platform_windows() or compat.is_platform_32bit()):
            assert (repr(list(df.itertuples(
                name=None))) == '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]')

        tup = next(df.itertuples(name='TestName'))

        if LooseVersion(sys.version) >= LooseVersion('2.7'):
            assert tup._fields == ('Index', 'a', 'b')
            assert (tup.Index, tup.a, tup.b) == tup
            assert type(tup).__name__ == 'TestName'

        df.columns = ['def', 'return']
        tup2 = next(df.itertuples(name='TestName'))
        assert tup2 == (0, 1, 4)

        if LooseVersion(sys.version) >= LooseVersion('2.7'):
            assert tup2._fields == ('Index', '_1', '_2')

        df3 = DataFrame({'f' + str(i): [i] for i in range(1024)})
        # will raise SyntaxError if trying to create namedtuple
        tup3 = next(df3.itertuples())
        assert not hasattr(tup3, '_fields')
        assert isinstance(tup3, tuple)
예제 #8
0
    def test_replace_series(self, how, to_key, from_key):
        if from_key == "bool" and how == "series":
            # doesn't work in PY3, though ...dict_from_bool works fine
            pytest.skip("doesn't work as in PY3")

        index = pd.Index([3, 4], name="xxx")
        obj = pd.Series(self.rep[from_key], index=index, name="yyy")
        assert obj.dtype == from_key

        if from_key.startswith("datetime") and to_key.startswith("datetime"):
            # tested below
            return
        elif from_key in ["datetime64[ns, US/Eastern]", "datetime64[ns, UTC]"]:
            # tested below
            return

        if how == "dict":
            replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
        elif how == "series":
            replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
        else:
            raise ValueError

        result = obj.replace(replacer)

        if (from_key == "float64" and to_key in ("int64")) or (
                from_key == "complex128" and to_key in ("int64", "float64")):

            if compat.is_platform_32bit() or compat.is_platform_windows():
                pytest.skip("32-bit platform buggy: {0} -> {1}".format(
                    from_key, to_key))

            # Expected: do not downcast by replacement
            exp = pd.Series(self.rep[to_key],
                            index=index,
                            name="yyy",
                            dtype=from_key)

        else:
            exp = pd.Series(self.rep[to_key], index=index, name="yyy")
            assert exp.dtype == to_key

        tm.assert_series_equal(result, exp)
예제 #9
0
파일: test_api.py 프로젝트: mwaskom/pandas
    def test_itertuples(self):
        for i, tup in enumerate(self.frame.itertuples()):
            s = self.klass._constructor_sliced(tup[1:])
            s.name = tup[0]
            expected = self.frame.iloc[i, :].reset_index(drop=True)
            self._assert_series_equal(s, expected)

        df = self.klass({'floats': np.random.randn(5),
                         'ints': lrange(5)}, columns=['floats', 'ints'])

        for tup in df.itertuples(index=False):
            assert isinstance(tup[1], (int, long))

        df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]})
        dfaa = df[['a', 'a']]

        assert (list(dfaa.itertuples()) ==
                [(0, 1, 1), (1, 2, 2), (2, 3, 3)])

        # repr with be int/long on 32-bit/windows
        if not (compat.is_platform_windows() or compat.is_platform_32bit()):
            assert (repr(list(df.itertuples(name=None))) ==
                    '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]')

        tup = next(df.itertuples(name='TestName'))

        if sys.version >= LooseVersion('2.7'):
            assert tup._fields == ('Index', 'a', 'b')
            assert (tup.Index, tup.a, tup.b) == tup
            assert type(tup).__name__ == 'TestName'

        df.columns = ['def', 'return']
        tup2 = next(df.itertuples(name='TestName'))
        assert tup2 == (0, 1, 4)

        if sys.version >= LooseVersion('2.7'):
            assert tup2._fields == ('Index', '_1', '_2')

        df3 = DataFrame({'f' + str(i): [i] for i in range(1024)})
        # will raise SyntaxError if trying to create namedtuple
        tup3 = next(df3.itertuples())
        assert not hasattr(tup3, '_fields')
        assert isinstance(tup3, tuple)
예제 #10
0
    def test_replace_series(self, how, to_key, from_key):
        if from_key == 'bool' and how == 'series' and compat.PY3:
            # doesn't work in PY3, though ...dict_from_bool works fine
            pytest.skip("doesn't work as in PY3")

        index = pd.Index([3, 4], name='xxx')
        obj = pd.Series(self.rep[from_key], index=index, name='yyy')
        assert obj.dtype == from_key

        if (from_key.startswith('datetime') and to_key.startswith('datetime')):
            # tested below
            return
        elif from_key in ['datetime64[ns, US/Eastern]', 'datetime64[ns, UTC]']:
            # tested below
            return

        if how == 'dict':
            replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
        elif how == 'series':
            replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
        else:
            raise ValueError

        result = obj.replace(replacer)

        if ((from_key == 'float64' and to_key in ('int64')) or
            (from_key == 'complex128' and to_key in ('int64', 'float64'))):

            if compat.is_platform_32bit() or compat.is_platform_windows():
                pytest.skip("32-bit platform buggy: {0} -> {1}".format(
                    from_key, to_key))

            # Expected: do not downcast by replacement
            exp = pd.Series(self.rep[to_key],
                            index=index,
                            name='yyy',
                            dtype=from_key)

        else:
            exp = pd.Series(self.rep[to_key], index=index, name='yyy')
            assert exp.dtype == to_key

        tm.assert_series_equal(result, exp)
예제 #11
0
    def test_replace_series(self, how, to_key, from_key):
        if from_key == 'bool' and how == 'series' and compat.PY3:
            # doesn't work in PY3, though ...dict_from_bool works fine
            pytest.skip("doesn't work as in PY3")

        index = pd.Index([3, 4], name='xxx')
        obj = pd.Series(self.rep[from_key], index=index, name='yyy')
        assert obj.dtype == from_key

        if (from_key.startswith('datetime') and to_key.startswith('datetime')):
            # tested below
            return
        elif from_key in ['datetime64[ns, US/Eastern]', 'datetime64[ns, UTC]']:
            # tested below
            return

        if how == 'dict':
            replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
        elif how == 'series':
            replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
        else:
            raise ValueError

        result = obj.replace(replacer)

        if ((from_key == 'float64' and to_key in ('int64')) or
            (from_key == 'complex128' and
             to_key in ('int64', 'float64'))):

            if compat.is_platform_32bit() or compat.is_platform_windows():
                pytest.skip("32-bit platform buggy: {0} -> {1}".format
                            (from_key, to_key))

            # Expected: do not downcast by replacement
            exp = pd.Series(self.rep[to_key], index=index,
                            name='yyy', dtype=from_key)

        else:
            exp = pd.Series(self.rep[to_key], index=index, name='yyy')
            assert exp.dtype == to_key

        tm.assert_series_equal(result, exp)
예제 #12
0
 def test_ndarray_compat_properties(self):
     if compat.is_platform_32bit():
         pytest.skip("skipping on 32bit")
     super(TestPeriodIndex, self).test_ndarray_compat_properties()
예제 #13
0
class TestIntervalTree(object):
    def setup_method(self, method):
        gentree = lambda dtype: IntervalTree(np.arange(5, dtype=dtype),
                                             np.arange(5, dtype=dtype) + 2)
        self.tree = gentree('int64')
        self.trees = {dtype: gentree(dtype)
                      for dtype in ['int32', 'int64', 'float32', 'float64']}

    def test_get_loc(self):
        for dtype, tree in self.trees.items():
            tm.assert_numpy_array_equal(tree.get_loc(1),
                                        np.array([0], dtype='int64'))
            tm.assert_numpy_array_equal(np.sort(tree.get_loc(2)),
                                        np.array([0, 1], dtype='int64'))
            with pytest.raises(KeyError):
                tree.get_loc(-1)

    def test_get_indexer(self):
        for dtype, tree in self.trees.items():
            tm.assert_numpy_array_equal(
                tree.get_indexer(np.array([1.0, 5.5, 6.5])),
                np.array([0, 4, -1], dtype='int64'))
            with pytest.raises(KeyError):
                tree.get_indexer(np.array([3.0]))

    def test_get_indexer_non_unique(self):
        indexer, missing = self.tree.get_indexer_non_unique(
            np.array([1.0, 2.0, 6.5]))
        tm.assert_numpy_array_equal(indexer[:1],
                                    np.array([0], dtype='int64'))
        tm.assert_numpy_array_equal(np.sort(indexer[1:3]),
                                    np.array([0, 1], dtype='int64'))
        tm.assert_numpy_array_equal(np.sort(indexer[3:]),
                                    np.array([-1], dtype='int64'))
        tm.assert_numpy_array_equal(missing, np.array([2], dtype='int64'))

    def test_duplicates(self):
        tree = IntervalTree([0, 0, 0], [1, 1, 1])
        tm.assert_numpy_array_equal(np.sort(tree.get_loc(0.5)),
                                    np.array([0, 1, 2], dtype='int64'))

        with pytest.raises(KeyError):
            tree.get_indexer(np.array([0.5]))

        indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
        tm.assert_numpy_array_equal(np.sort(indexer),
                                    np.array([0, 1, 2], dtype='int64'))
        tm.assert_numpy_array_equal(missing, np.array([], dtype='int64'))

    def test_get_loc_closed(self):
        for closed in ['left', 'right', 'both', 'neither']:
            tree = IntervalTree([0], [1], closed=closed)
            for p, errors in [(0, tree.open_left),
                              (1, tree.open_right)]:
                if errors:
                    with pytest.raises(KeyError):
                        tree.get_loc(p)
                else:
                    tm.assert_numpy_array_equal(tree.get_loc(p),
                                                np.array([0], dtype='int64'))

    @pytest.mark.skipif(compat.is_platform_32bit(),
                        reason="int type mismatch on 32bit")
    def test_get_indexer_closed(self):
        x = np.arange(1000, dtype='float64')
        found = x.astype('intp')
        not_found = (-1 * np.ones(1000)).astype('intp')

        for leaf_size in [1, 10, 100, 10000]:
            for closed in ['left', 'right', 'both', 'neither']:
                tree = IntervalTree(x, x + 0.5, closed=closed,
                                    leaf_size=leaf_size)
                tm.assert_numpy_array_equal(found,
                                            tree.get_indexer(x + 0.25))

                expected = found if tree.closed_left else not_found
                tm.assert_numpy_array_equal(expected,
                                            tree.get_indexer(x + 0.0))

                expected = found if tree.closed_right else not_found
                tm.assert_numpy_array_equal(expected,
                                            tree.get_indexer(x + 0.5))
예제 #14
0
        a pytest.mark.skipif to use as either a test decorator or a
        parametrization mark.
    """
    msg = "Could not import '{}'".format(package)
    if min_version:
        msg += " satisfying a min_version of {}".format(min_version)
    return pytest.mark.skipif(
        not safe_import(package, min_version=min_version), reason=msg
    )


skip_if_no_mpl = pytest.mark.skipif(
    _skip_if_no_mpl(), reason="Missing matplotlib dependency"
)
skip_if_mpl = pytest.mark.skipif(not _skip_if_no_mpl(), reason="matplotlib is present")
skip_if_32bit = pytest.mark.skipif(is_platform_32bit(), reason="skipping for 32 bit")
skip_if_windows = pytest.mark.skipif(is_platform_windows(), reason="Running on Windows")
skip_if_windows_python_3 = pytest.mark.skipif(
    is_platform_windows(), reason="not used on win32"
)
skip_if_has_locale = pytest.mark.skipif(
    _skip_if_has_locale(),
    reason="Specific locale is set {lang}".format(lang=locale.getlocale()[0]),
)
skip_if_not_us_locale = pytest.mark.skipif(
    _skip_if_not_us_locale(),
    reason="Specific locale is set " "{lang}".format(lang=locale.getlocale()[0]),
)
skip_if_no_scipy = pytest.mark.skipif(
    _skip_if_no_scipy(), reason="Missing SciPy requirement"
)
예제 #15
0
import warnings

import numpy as np
import pytest

from pandas.compat import is_platform_32bit, is_platform_windows

import pandas as pd
from pandas import option_context
import pandas.util.testing as tm

use_32bit_repr = is_platform_windows() or is_platform_32bit()


@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
class TestSparseSeriesFormatting:
    @property
    def dtype_format_for_platform(self):
        return '' if use_32bit_repr else ', dtype=int32'

    def test_sparse_max_row(self):
        s = pd.Series([1, np.nan, np.nan, 3, np.nan]).to_sparse()
        result = repr(s)
        dfm = self.dtype_format_for_platform
        exp = ("0    1.0\n1    NaN\n2    NaN\n3    3.0\n"
               "4    NaN\ndtype: Sparse[float64, nan]\nBlockIndex\n"
               "Block locations: array([0, 3]{0})\n"
               "Block lengths: array([1, 1]{0})".format(dfm))
        assert result == exp
예제 #16
0
class TestUltraJSONTests:
    @pytest.mark.skipif(compat.is_platform_32bit(),
                        reason="not compliant on 32-bit, xref #15865")
    def test_encode_decimal(self):
        sut = decimal.Decimal("1337.1337")
        encoded = ujson.encode(sut, double_precision=15)
        decoded = ujson.decode(encoded)
        assert decoded == 1337.1337

        sut = decimal.Decimal("0.95")
        encoded = ujson.encode(sut, double_precision=1)
        assert encoded == "1.0"

        decoded = ujson.decode(encoded)
        assert decoded == 1.0

        sut = decimal.Decimal("0.94")
        encoded = ujson.encode(sut, double_precision=1)
        assert encoded == "0.9"

        decoded = ujson.decode(encoded)
        assert decoded == 0.9

        sut = decimal.Decimal("1.95")
        encoded = ujson.encode(sut, double_precision=1)
        assert encoded == "2.0"

        decoded = ujson.decode(encoded)
        assert decoded == 2.0

        sut = decimal.Decimal("-1.95")
        encoded = ujson.encode(sut, double_precision=1)
        assert encoded == "-2.0"

        decoded = ujson.decode(encoded)
        assert decoded == -2.0

        sut = decimal.Decimal("0.995")
        encoded = ujson.encode(sut, double_precision=2)
        assert encoded == "1.0"

        decoded = ujson.decode(encoded)
        assert decoded == 1.0

        sut = decimal.Decimal("0.9995")
        encoded = ujson.encode(sut, double_precision=3)
        assert encoded == "1.0"

        decoded = ujson.decode(encoded)
        assert decoded == 1.0

        sut = decimal.Decimal("0.99999999999999944")
        encoded = ujson.encode(sut, double_precision=15)
        assert encoded == "1.0"

        decoded = ujson.decode(encoded)
        assert decoded == 1.0

    @pytest.mark.parametrize("ensure_ascii", [True, False])
    def test_encode_string_conversion(self, ensure_ascii):
        string_input = "A string \\ / \b \f \n \r \t </script> &"
        not_html_encoded = '"A string \\\\ \\/ \\b \\f \\n ' '\\r \\t <\\/script> &"'
        html_encoded = ('"A string \\\\ \\/ \\b \\f \\n \\r \\t '
                        '\\u003c\\/script\\u003e \\u0026"')

        def helper(expected_output, **encode_kwargs):
            output = ujson.encode(string_input,
                                  ensure_ascii=ensure_ascii,
                                  **encode_kwargs)

            assert output == expected_output
            assert string_input == json.loads(output)
            assert string_input == ujson.decode(output)

        # Default behavior assumes encode_html_chars=False.
        helper(not_html_encoded)

        # Make sure explicit encode_html_chars=False works.
        helper(not_html_encoded, encode_html_chars=False)

        # Make sure explicit encode_html_chars=True does the encoding.
        helper(html_encoded, encode_html_chars=True)

    @pytest.mark.parametrize(
        "long_number",
        [-4342969734183514, -12345678901234.56789012, -528656961.4399388])
    def test_double_long_numbers(self, long_number):
        sut = {"a": long_number}
        encoded = ujson.encode(sut, double_precision=15)

        decoded = ujson.decode(encoded)
        assert sut == decoded

    def test_encode_non_c_locale(self):
        lc_category = locale.LC_NUMERIC

        # We just need one of these locales to work.
        for new_locale in ("it_IT.UTF-8", "Italian_Italy"):
            if tm.can_set_locale(new_locale, lc_category):
                with tm.set_locale(new_locale, lc_category):
                    assert ujson.loads(ujson.dumps(4.78e60)) == 4.78e60
                    assert ujson.loads("4.78", precise_float=True) == 4.78
                break

    def test_decimal_decode_test_precise(self):
        sut = {"a": 4.56}
        encoded = ujson.encode(sut)
        decoded = ujson.decode(encoded, precise_float=True)
        assert sut == decoded

    def test_encode_double_tiny_exponential(self):
        num = 1e-40
        assert num == ujson.decode(ujson.encode(num))
        num = 1e-100
        assert num == ujson.decode(ujson.encode(num))
        num = -1e-45
        assert num == ujson.decode(ujson.encode(num))
        num = -1e-145
        assert np.allclose(num, ujson.decode(ujson.encode(num)))

    @pytest.mark.parametrize("unicode_key", ["key1", "بن"])
    def test_encode_dict_with_unicode_keys(self, unicode_key):
        unicode_dict = {unicode_key: "value1"}
        assert unicode_dict == ujson.decode(ujson.encode(unicode_dict))

    @pytest.mark.parametrize(
        "double_input",
        [math.pi, -math.pi]  # Should work with negatives too.
    )
    def test_encode_double_conversion(self, double_input):
        output = ujson.encode(double_input)
        assert round(double_input, 5) == round(json.loads(output), 5)
        assert round(double_input, 5) == round(ujson.decode(output), 5)

    def test_encode_with_decimal(self):
        decimal_input = 1.0
        output = ujson.encode(decimal_input)

        assert output == "1.0"

    def test_encode_array_of_nested_arrays(self):
        nested_input = [[[[]]]] * 20
        output = ujson.encode(nested_input)

        assert nested_input == json.loads(output)
        assert nested_input == ujson.decode(output)

        nested_input = np.array(nested_input)
        tm.assert_numpy_array_equal(
            nested_input,
            ujson.decode(output, numpy=True, dtype=nested_input.dtype))

    def test_encode_array_of_doubles(self):
        doubles_input = [31337.31337, 31337.31337, 31337.31337, 31337.31337
                         ] * 10
        output = ujson.encode(doubles_input)

        assert doubles_input == json.loads(output)
        assert doubles_input == ujson.decode(output)

        tm.assert_numpy_array_equal(np.array(doubles_input),
                                    ujson.decode(output, numpy=True))

    def test_double_precision(self):
        double_input = 30.012345678901234
        output = ujson.encode(double_input, double_precision=15)

        assert double_input == json.loads(output)
        assert double_input == ujson.decode(output)

        for double_precision in (3, 9):
            output = ujson.encode(double_input,
                                  double_precision=double_precision)
            rounded_input = round(double_input, double_precision)

            assert rounded_input == json.loads(output)
            assert rounded_input == ujson.decode(output)

    @pytest.mark.parametrize("invalid_val", [20, -1, "9", None])
    def test_invalid_double_precision(self, invalid_val):
        double_input = 30.12345678901234567890
        expected_exception = ValueError if isinstance(invalid_val,
                                                      int) else TypeError

        with pytest.raises(expected_exception):
            ujson.encode(double_input, double_precision=invalid_val)

    def test_encode_string_conversion2(self):
        string_input = "A string \\ / \b \f \n \r \t"
        output = ujson.encode(string_input)

        assert string_input == json.loads(output)
        assert string_input == ujson.decode(output)
        assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"'

    @pytest.mark.parametrize(
        "unicode_input",
        ["Räksmörgås اسامة بن محمد بن عوض بن لادن", "\xe6\x97\xa5\xd1\x88"],
    )
    def test_encode_unicode_conversion(self, unicode_input):
        enc = ujson.encode(unicode_input)
        dec = ujson.decode(enc)

        assert enc == json.dumps(unicode_input)
        assert dec == json.loads(enc)

    def test_encode_control_escaping(self):
        escaped_input = "\x19"
        enc = ujson.encode(escaped_input)
        dec = ujson.decode(enc)

        assert escaped_input == dec
        assert enc == json.dumps(escaped_input)

    def test_encode_unicode_surrogate_pair(self):
        surrogate_input = "\xf0\x90\x8d\x86"
        enc = ujson.encode(surrogate_input)
        dec = ujson.decode(enc)

        assert enc == json.dumps(surrogate_input)
        assert dec == json.loads(enc)

    def test_encode_unicode_4bytes_utf8(self):
        four_bytes_input = "\xf0\x91\x80\xb0TRAILINGNORMAL"
        enc = ujson.encode(four_bytes_input)
        dec = ujson.decode(enc)

        assert enc == json.dumps(four_bytes_input)
        assert dec == json.loads(enc)

    def test_encode_unicode_4bytes_utf8highest(self):
        four_bytes_input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL"
        enc = ujson.encode(four_bytes_input)

        dec = ujson.decode(enc)

        assert enc == json.dumps(four_bytes_input)
        assert dec == json.loads(enc)

    def test_encode_array_in_array(self):
        arr_in_arr_input = [[[[]]]]
        output = ujson.encode(arr_in_arr_input)

        assert arr_in_arr_input == json.loads(output)
        assert output == json.dumps(arr_in_arr_input)
        assert arr_in_arr_input == ujson.decode(output)

        tm.assert_numpy_array_equal(np.array(arr_in_arr_input),
                                    ujson.decode(output, numpy=True))

    @pytest.mark.parametrize(
        "num_input",
        [
            31337,
            -31337,  # Negative number.
            -9223372036854775808,  # Large negative number.
        ],
    )
    def test_encode_num_conversion(self, num_input):
        output = ujson.encode(num_input)
        assert num_input == json.loads(output)
        assert output == json.dumps(num_input)
        assert num_input == ujson.decode(output)

    def test_encode_list_conversion(self):
        list_input = [1, 2, 3, 4]
        output = ujson.encode(list_input)

        assert list_input == json.loads(output)
        assert list_input == ujson.decode(output)

        tm.assert_numpy_array_equal(np.array(list_input),
                                    ujson.decode(output, numpy=True))

    def test_encode_dict_conversion(self):
        dict_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4}
        output = ujson.encode(dict_input)

        assert dict_input == json.loads(output)
        assert dict_input == ujson.decode(output)

    @pytest.mark.parametrize("builtin_value", [None, True, False])
    def test_encode_builtin_values_conversion(self, builtin_value):
        output = ujson.encode(builtin_value)
        assert builtin_value == json.loads(output)
        assert output == json.dumps(builtin_value)
        assert builtin_value == ujson.decode(output)

    def test_encode_datetime_conversion(self):
        datetime_input = datetime.datetime.fromtimestamp(time.time())
        output = ujson.encode(datetime_input, date_unit="s")
        expected = calendar.timegm(datetime_input.utctimetuple())

        assert int(expected) == json.loads(output)
        assert int(expected) == ujson.decode(output)

    def test_encode_date_conversion(self):
        date_input = datetime.date.fromtimestamp(time.time())
        output = ujson.encode(date_input, date_unit="s")

        tup = (date_input.year, date_input.month, date_input.day, 0, 0, 0)
        expected = calendar.timegm(tup)

        assert int(expected) == json.loads(output)
        assert int(expected) == ujson.decode(output)

    @pytest.mark.parametrize(
        "test",
        [
            datetime.time(),
            datetime.time(1, 2, 3),
            datetime.time(10, 12, 15, 343243)
        ],
    )
    def test_encode_time_conversion_basic(self, test):
        output = ujson.encode(test)
        expected = '"{iso}"'.format(iso=test.isoformat())
        assert expected == output

    def test_encode_time_conversion_pytz(self):
        # see gh-11473: to_json segfaults with timezone-aware datetimes
        test = datetime.time(10, 12, 15, 343243, pytz.utc)
        output = ujson.encode(test)
        expected = '"{iso}"'.format(iso=test.isoformat())
        assert expected == output

    def test_encode_time_conversion_dateutil(self):
        # see gh-11473: to_json segfaults with timezone-aware datetimes
        test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc())
        output = ujson.encode(test)
        expected = '"{iso}"'.format(iso=test.isoformat())
        assert expected == output

    @pytest.mark.parametrize(
        "decoded_input",
        [NaT, np.datetime64("NaT"), np.nan, np.inf, -np.inf])
    def test_encode_as_null(self, decoded_input):
        assert ujson.encode(decoded_input) == "null", "Expected null"

    def test_datetime_units(self):
        val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504)
        stamp = Timestamp(val)

        roundtrip = ujson.decode(ujson.encode(val, date_unit="s"))
        assert roundtrip == stamp.value // 10**9

        roundtrip = ujson.decode(ujson.encode(val, date_unit="ms"))
        assert roundtrip == stamp.value // 10**6

        roundtrip = ujson.decode(ujson.encode(val, date_unit="us"))
        assert roundtrip == stamp.value // 10**3

        roundtrip = ujson.decode(ujson.encode(val, date_unit="ns"))
        assert roundtrip == stamp.value

        msg = "Invalid value 'foo' for option 'date_unit'"
        with pytest.raises(ValueError, match=msg):
            ujson.encode(val, date_unit="foo")

    def test_encode_to_utf8(self):
        unencoded = "\xe6\x97\xa5\xd1\x88"

        enc = ujson.encode(unencoded, ensure_ascii=False)
        dec = ujson.decode(enc)

        assert enc == json.dumps(unencoded, ensure_ascii=False)
        assert dec == json.loads(enc)

    def test_decode_from_unicode(self):
        unicode_input = '{"obj": 31337}'

        dec1 = ujson.decode(unicode_input)
        dec2 = ujson.decode(str(unicode_input))

        assert dec1 == dec2

    def test_encode_recursion_max(self):
        # 8 is the max recursion depth

        class O2:
            member = 0
            pass

        class O1:
            member = 0
            pass

        decoded_input = O1()
        decoded_input.member = O2()
        decoded_input.member.member = decoded_input

        with pytest.raises(OverflowError):
            ujson.encode(decoded_input)

    def test_decode_jibberish(self):
        jibberish = "fdsa sda v9sa fdsa"

        with pytest.raises(ValueError):
            ujson.decode(jibberish)

    @pytest.mark.parametrize(
        "broken_json",
        [
            "[",  # Broken array start.
            "{",  # Broken object start.
            "]",  # Broken array end.
            "}",  # Broken object end.
        ],
    )
    def test_decode_broken_json(self, broken_json):
        with pytest.raises(ValueError):
            ujson.decode(broken_json)

    @pytest.mark.parametrize("too_big_char", ["[", "{"])
    def test_decode_depth_too_big(self, too_big_char):
        with pytest.raises(ValueError):
            ujson.decode(too_big_char * (1024 * 1024))

    @pytest.mark.parametrize(
        "bad_string",
        [
            '"TESTING',  # Unterminated.
            '"TESTING\\"',  # Unterminated escape.
            "tru",  # Broken True.
            "fa",  # Broken False.
            "n",  # Broken None.
        ],
    )
    def test_decode_bad_string(self, bad_string):
        with pytest.raises(ValueError):
            ujson.decode(bad_string)

    @pytest.mark.parametrize("broken_json",
                             ['{{1337:""}}', '{{"key":"}', "[[[true"])
    def test_decode_broken_json_leak(self, broken_json):
        for _ in range(1000):
            with pytest.raises(ValueError):
                ujson.decode(broken_json)

    @pytest.mark.parametrize(
        "invalid_dict",
        [
            "{{{{31337}}}}",  # No key.
            '{{{{"key":}}}}',  # No value.
            '{{{{"key"}}}}',  # No colon or value.
        ],
    )
    def test_decode_invalid_dict(self, invalid_dict):
        with pytest.raises(ValueError):
            ujson.decode(invalid_dict)

    @pytest.mark.parametrize(
        "numeric_int_as_str",
        ["31337", "-31337"]  # Should work with negatives.
    )
    def test_decode_numeric_int(self, numeric_int_as_str):
        assert int(numeric_int_as_str) == ujson.decode(numeric_int_as_str)

    def test_encode_null_character(self):
        wrapped_input = "31337 \x00 1337"
        output = ujson.encode(wrapped_input)

        assert wrapped_input == json.loads(output)
        assert output == json.dumps(wrapped_input)
        assert wrapped_input == ujson.decode(output)

        alone_input = "\x00"
        output = ujson.encode(alone_input)

        assert alone_input == json.loads(output)
        assert output == json.dumps(alone_input)
        assert alone_input == ujson.decode(output)
        assert '"  \\u0000\\r\\n "' == ujson.dumps("  \u0000\r\n ")

    def test_decode_null_character(self):
        wrapped_input = '"31337 \\u0000 31337"'
        assert ujson.decode(wrapped_input) == json.loads(wrapped_input)

    def test_encode_list_long_conversion(self):
        long_input = [
            9223372036854775807,
            9223372036854775807,
            9223372036854775807,
            9223372036854775807,
            9223372036854775807,
            9223372036854775807,
        ]
        output = ujson.encode(long_input)

        assert long_input == json.loads(output)
        assert long_input == ujson.decode(output)

        tm.assert_numpy_array_equal(
            np.array(long_input),
            ujson.decode(output, numpy=True, dtype=np.int64))

    def test_encode_long_conversion(self):
        long_input = 9223372036854775807
        output = ujson.encode(long_input)

        assert long_input == json.loads(output)
        assert output == json.dumps(long_input)
        assert long_input == ujson.decode(output)

    @pytest.mark.parametrize(
        "int_exp", ["1337E40", "1.337E40", "1337E+9", "1.337e+40", "1.337E-4"])
    def test_decode_numeric_int_exp(self, int_exp):
        assert ujson.decode(int_exp) == json.loads(int_exp)

    def test_loads_non_str_bytes_raises(self):
        msg = "Expected 'str' or 'bytes'"
        with pytest.raises(TypeError, match=msg):
            ujson.loads(None)

    def test_version(self):
        assert re.match(r"^\d+\.\d+(\.\d+)?$", ujson.__version__
                        ), "ujson.__version__ must be a string like '1.4.0'"

    def test_encode_numeric_overflow(self):
        with pytest.raises(OverflowError):
            ujson.encode(12839128391289382193812939)

    def test_encode_numeric_overflow_nested(self):
        class Nested:
            x = 12839128391289382193812939

        for _ in range(0, 100):
            with pytest.raises(OverflowError):
                ujson.encode(Nested())

    @pytest.mark.parametrize("val", [3590016419, 2**31, 2**32, (2**32) - 1])
    def test_decode_number_with_32bit_sign_bit(self, val):
        # Test that numbers that fit within 32 bits but would have the
        # sign bit set (2**31 <= x < 2**32) are decoded properly.
        doc = '{{"id": {val}}}'.format(val=val)
        assert ujson.decode(doc)["id"] == val

    def test_encode_big_escape(self):
        # Make sure no Exception is raised.
        for _ in range(10):
            base = "\u00e5".encode("utf-8")
            escape_input = base * 1024 * 1024 * 2
            ujson.encode(escape_input)

    def test_decode_big_escape(self):
        # Make sure no Exception is raised.
        for _ in range(10):
            base = "\u00e5".encode("utf-8")
            quote = b'"'

            escape_input = quote + (base * 1024 * 1024 * 2) + quote
            ujson.decode(escape_input)

    def test_to_dict(self):
        d = {"key": 31337}

        class DictTest:
            def toDict(self):
                return d

        o = DictTest()
        output = ujson.encode(o)

        dec = ujson.decode(output)
        assert dec == d

    def test_default_handler(self):
        class _TestObject:
            def __init__(self, val):
                self.val = val

            @property
            def recursive_attr(self):
                return _TestObject("recursive_attr")

            def __str__(self):
                return str(self.val)

        msg = "Maximum recursion level reached"
        with pytest.raises(OverflowError, match=msg):
            ujson.encode(_TestObject("foo"))
        assert '"foo"' == ujson.encode(_TestObject("foo"), default_handler=str)

        def my_handler(_):
            return "foobar"

        assert '"foobar"' == ujson.encode(_TestObject("foo"),
                                          default_handler=my_handler)

        def my_handler_raises(_):
            raise TypeError("I raise for anything")

        with pytest.raises(TypeError, match="I raise for anything"):
            ujson.encode(_TestObject("foo"), default_handler=my_handler_raises)

        def my_int_handler(_):
            return 42

        assert (ujson.decode(
            ujson.encode(_TestObject("foo"),
                         default_handler=my_int_handler)) == 42)

        def my_obj_handler(_):
            return datetime.datetime(2013, 2, 3)

        assert ujson.decode(ujson.encode(datetime.datetime(
            2013, 2, 3))) == ujson.decode(
                ujson.encode(_TestObject("foo"),
                             default_handler=my_obj_handler))

        obj_list = [_TestObject("foo"), _TestObject("bar")]
        assert json.loads(json.dumps(obj_list, default=str)) == ujson.decode(
            ujson.encode(obj_list, default_handler=str))
예제 #17
0
# -*- coding: utf-8 -*-
from __future__ import print_function

import numpy as np
import pandas as pd

import pandas.util.testing as tm
from pandas.compat import (is_platform_windows,
                           is_platform_32bit)
from pandas.core.config import option_context


use_32bit_repr = is_platform_windows() or is_platform_32bit()


class TestSparseSeriesFormatting(tm.TestCase):

    @property
    def dtype_format_for_platform(self):
        return '' if use_32bit_repr else ', dtype=int32'

    def test_sparse_max_row(self):
        s = pd.Series([1, np.nan, np.nan, 3, np.nan]).to_sparse()
        result = repr(s)
        dfm = self.dtype_format_for_platform
        exp = ("0    1.0\n1    NaN\n2    NaN\n3    3.0\n"
               "4    NaN\ndtype: float64\nBlockIndex\n"
               "Block locations: array([0, 3]{0})\n"
               "Block lengths: array([1, 1]{0})".format(dfm))
        self.assertEqual(result, exp)
예제 #18
0
class TestIntervalTree:
    def test_get_indexer(self, tree):
        result = tree.get_indexer(np.array([1.0, 5.5, 6.5]))
        expected = np.array([0, 4, -1], dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

        with pytest.raises(
            KeyError, match="'indexer does not intersect a unique set of intervals'"
        ):
            tree.get_indexer(np.array([3.0]))

    @pytest.mark.parametrize(
        "dtype, target_value, target_dtype",
        [("int64", 2 ** 63 + 1, "uint64"), ("uint64", -1, "int64")],
    )
    def test_get_indexer_overflow(self, dtype, target_value, target_dtype):
        left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype)
        tree = IntervalTree(left, right)

        result = tree.get_indexer(np.array([target_value], dtype=target_dtype))
        expected = np.array([-1], dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

    def test_get_indexer_non_unique(self, tree):
        indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5]))

        result = indexer[:1]
        expected = np.array([0], dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

        result = np.sort(indexer[1:3])
        expected = np.array([0, 1], dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

        result = np.sort(indexer[3:])
        expected = np.array([-1], dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

        result = missing
        expected = np.array([2], dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize(
        "dtype, target_value, target_dtype",
        [("int64", 2 ** 63 + 1, "uint64"), ("uint64", -1, "int64")],
    )
    def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype):
        left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype)
        tree = IntervalTree(left, right)
        target = np.array([target_value], dtype=target_dtype)

        result_indexer, result_missing = tree.get_indexer_non_unique(target)
        expected_indexer = np.array([-1], dtype="intp")
        tm.assert_numpy_array_equal(result_indexer, expected_indexer)

        expected_missing = np.array([0], dtype="intp")
        tm.assert_numpy_array_equal(result_missing, expected_missing)

    def test_duplicates(self, dtype):
        left = np.array([0, 0, 0], dtype=dtype)
        tree = IntervalTree(left, left + 1)

        with pytest.raises(
            KeyError, match="'indexer does not intersect a unique set of intervals'"
        ):
            tree.get_indexer(np.array([0.5]))

        indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
        result = np.sort(indexer)
        expected = np.array([0, 1, 2], dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

        result = missing
        expected = np.array([], dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize(
        "leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000]
    )
    def test_get_indexer_closed(self, closed, leaf_size):
        x = np.arange(1000, dtype="float64")
        found = x.astype("intp")
        not_found = (-1 * np.ones(1000)).astype("intp")

        tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size)
        tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25))

        expected = found if tree.closed_left else not_found
        tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0))

        expected = found if tree.closed_right else not_found
        tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))

    @pytest.mark.parametrize(
        "left, right, expected",
        [
            (np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True),
            (np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True),
            (np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
            (np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False),
            (np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False),
        ],
    )
    @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
    def test_is_overlapping(self, closed, order, left, right, expected):
        # GH 23309
        tree = IntervalTree(left[order], right[order], closed=closed)
        result = tree.is_overlapping
        assert result is expected

    @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
    def test_is_overlapping_endpoints(self, closed, order):
        """shared endpoints are marked as overlapping"""
        # GH 23309
        left, right = np.arange(3, dtype="int64"), np.arange(1, 4)
        tree = IntervalTree(left[order], right[order], closed=closed)
        result = tree.is_overlapping
        expected = closed == "both"
        assert result is expected

    @pytest.mark.parametrize(
        "left, right",
        [
            (np.array([], dtype="int64"), np.array([], dtype="int64")),
            (np.array([0], dtype="int64"), np.array([1], dtype="int64")),
            (np.array([np.nan]), np.array([np.nan])),
            (np.array([np.nan] * 3), np.array([np.nan] * 3)),
        ],
    )
    def test_is_overlapping_trivial(self, closed, left, right):
        # GH 23309
        tree = IntervalTree(left, right, closed=closed)
        assert tree.is_overlapping is False

    @pytest.mark.skipif(compat.is_platform_32bit(), reason="GH 23440")
    def test_construction_overflow(self):
        # GH 25485
        left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101
        tree = IntervalTree(left, right)

        # pivot should be average of left/right medians
        result = tree.root.pivot
        expected = (50 + np.iinfo(np.int64).max) / 2
        assert result == expected
예제 #19
0
class TestPandasContainer(object):

    def setup_method(self, method):
        self.dirpath = tm.get_data_path()

        self.ts = tm.makeTimeSeries()
        self.ts.name = 'ts'

        self.series = tm.makeStringSeries()
        self.series.name = 'series'

        self.objSeries = tm.makeObjectSeries()
        self.objSeries.name = 'objects'

        self.empty_series = Series([], index=[])
        self.empty_frame = DataFrame({})

        self.frame = _frame.copy()
        self.frame2 = _frame2.copy()
        self.intframe = _intframe.copy()
        self.tsframe = _tsframe.copy()
        self.mixed_frame = _mixed_frame.copy()
        self.categorical = _cat_frame.copy()

    def teardown_method(self, method):
        del self.dirpath

        del self.ts

        del self.series

        del self.objSeries

        del self.empty_series
        del self.empty_frame

        del self.frame
        del self.frame2
        del self.intframe
        del self.tsframe
        del self.mixed_frame

    def test_frame_double_encoded_labels(self):
        df = DataFrame([['a', 'b'], ['c', 'd']],
                       index=['index " 1', 'index / 2'],
                       columns=['a \\ b', 'y / z'])

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        assert_frame_equal(df, read_json(df.to_json(orient='columns'),
                                         orient='columns'))
        assert_frame_equal(df, read_json(df.to_json(orient='index'),
                                         orient='index'))
        df_unser = read_json(df.to_json(orient='records'), orient='records')
        assert_index_equal(df.columns, df_unser.columns)
        tm.assert_numpy_array_equal(df.values, df_unser.values)

    def test_frame_non_unique_index(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
                       columns=['x', 'y'])

        pytest.raises(ValueError, df.to_json, orient='index')
        pytest.raises(ValueError, df.to_json, orient='columns')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        unser = read_json(df.to_json(orient='records'), orient='records')
        tm.assert_index_equal(df.columns, unser.columns)
        tm.assert_almost_equal(df.values, unser.values)
        unser = read_json(df.to_json(orient='values'), orient='values')
        tm.assert_numpy_array_equal(df.values, unser.values)

    def test_frame_non_unique_columns(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2],
                       columns=['x', 'x'])

        pytest.raises(ValueError, df.to_json, orient='index')
        pytest.raises(ValueError, df.to_json, orient='columns')
        pytest.raises(ValueError, df.to_json, orient='records')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split', dtype=False))
        unser = read_json(df.to_json(orient='values'), orient='values')
        tm.assert_numpy_array_equal(df.values, unser.values)

        # GH4377; duplicate columns not processing correctly
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[
                       1, 2], columns=['x', 'y'])
        result = read_json(df.to_json(orient='split'), orient='split')
        assert_frame_equal(result, df)

        def _check(df):
            result = read_json(df.to_json(orient='split'), orient='split',
                               convert_dates=['x'])
            assert_frame_equal(result, df)

        for o in [[['a', 'b'], ['c', 'd']],
                  [[1.5, 2.5], [3.5, 4.5]],
                  [[1, 2.5], [3, 4.5]],
                  [[Timestamp('20130101'), 3.5],
                   [Timestamp('20130102'), 4.5]]]:
            _check(DataFrame(o, index=[1, 2], columns=['x', 'x']))

    def test_frame_from_json_to_json(self):
        def _check_orient(df, orient, dtype=None, numpy=False,
                          convert_axes=True, check_dtype=True, raise_ok=None,
                          sort=None, check_index_type=True,
                          check_column_type=True, check_numpy_dtype=False):
            if sort is not None:
                df = df.sort_values(sort)
            else:
                df = df.sort_index()

            # if we are not unique, then check that we are raising ValueError
            # for the appropriate orients
            if not df.index.is_unique and orient in ['index', 'columns']:
                pytest.raises(
                    ValueError, lambda: df.to_json(orient=orient))
                return
            if (not df.columns.is_unique and
                    orient in ['index', 'columns', 'records']):
                pytest.raises(
                    ValueError, lambda: df.to_json(orient=orient))
                return

            dfjson = df.to_json(orient=orient)

            try:
                unser = read_json(dfjson, orient=orient, dtype=dtype,
                                  numpy=numpy, convert_axes=convert_axes)
            except Exception as detail:
                if raise_ok is not None:
                    if isinstance(detail, raise_ok):
                        return
                raise

            if sort is not None and sort in unser.columns:
                unser = unser.sort_values(sort)
            else:
                unser = unser.sort_index()

            if dtype is False:
                check_dtype = False

            if not convert_axes and df.index.dtype.type == np.datetime64:
                unser.index = DatetimeIndex(
                    unser.index.values.astype('i8') * 1e6)
            if orient == "records":
                # index is not captured in this orientation
                tm.assert_almost_equal(df.values, unser.values,
                                       check_dtype=check_numpy_dtype)
                tm.assert_index_equal(df.columns, unser.columns,
                                      exact=check_column_type)
            elif orient == "values":
                # index and cols are not captured in this orientation
                if numpy is True and df.shape == (0, 0):
                    assert unser.shape[0] == 0
                else:
                    tm.assert_almost_equal(df.values, unser.values,
                                           check_dtype=check_numpy_dtype)
            elif orient == "split":
                # index and col labels might not be strings
                unser.index = [str(i) for i in unser.index]
                unser.columns = [str(i) for i in unser.columns]

                if sort is None:
                    unser = unser.sort_index()
                tm.assert_almost_equal(df.values, unser.values,
                                       check_dtype=check_numpy_dtype)
            else:
                if convert_axes:
                    tm.assert_frame_equal(df, unser, check_dtype=check_dtype,
                                          check_index_type=check_index_type,
                                          check_column_type=check_column_type)
                else:
                    tm.assert_frame_equal(df, unser, check_less_precise=False,
                                          check_dtype=check_dtype)

        def _check_all_orients(df, dtype=None, convert_axes=True,
                               raise_ok=None, sort=None, check_index_type=True,
                               check_column_type=True):

            # numpy=False
            if convert_axes:
                _check_orient(df, "columns", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "records", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "split", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "index", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "values", dtype=dtype, sort=sort,
                              check_index_type=False, check_column_type=False)

            _check_orient(df, "columns", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "records", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "split", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "index", dtype=dtype,
                          convert_axes=False, sort=sort)
            _check_orient(df, "values", dtype=dtype,
                          convert_axes=False, sort=sort)

            # numpy=True and raise_ok might be not None, so ignore the error
            if convert_axes:
                _check_orient(df, "columns", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "records", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "split", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "index", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)
                _check_orient(df, "values", dtype=dtype, numpy=True,
                              raise_ok=raise_ok, sort=sort,
                              check_index_type=False, check_column_type=False)

            _check_orient(df, "columns", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "records", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "split", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "index", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)
            _check_orient(df, "values", dtype=dtype, numpy=True,
                          convert_axes=False, raise_ok=raise_ok, sort=sort)

        # basic
        _check_all_orients(self.frame)
        assert self.frame.to_json() == self.frame.to_json(orient="columns")

        _check_all_orients(self.intframe, dtype=self.intframe.values.dtype)
        _check_all_orients(self.intframe, dtype=False)

        # big one
        # index and columns are strings as all unserialised JSON object keys
        # are assumed to be strings
        biggie = DataFrame(np.zeros((200, 4)),
                           columns=[str(i) for i in range(4)],
                           index=[str(i) for i in range(200)])
        _check_all_orients(biggie, dtype=False, convert_axes=False)

        # dtypes
        _check_all_orients(DataFrame(biggie, dtype=np.float64),
                           dtype=np.float64, convert_axes=False)
        _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int,
                           convert_axes=False)
        _check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3',
                           convert_axes=False, raise_ok=ValueError)

        # categorical
        _check_all_orients(self.categorical, sort='sort', raise_ok=ValueError)

        # empty
        _check_all_orients(self.empty_frame, check_index_type=False,
                           check_column_type=False)

        # time series data
        _check_all_orients(self.tsframe)

        # mixed data
        index = pd.Index(['a', 'b', 'c', 'd', 'e'])
        data = {'A': [0., 1., 2., 3., 4.],
                'B': [0., 1., 0., 1., 0.],
                'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
                'D': [True, False, True, False, True]}
        df = DataFrame(data=data, index=index)
        _check_orient(df, "split", check_dtype=False)
        _check_orient(df, "records", check_dtype=False)
        _check_orient(df, "values", check_dtype=False)
        _check_orient(df, "columns", check_dtype=False)
        # index oriented is problematic as it is read back in in a transposed
        # state, so the columns are interpreted as having mixed data and
        # given object dtypes.
        # force everything to have object dtype beforehand
        _check_orient(df.transpose().transpose(), "index", dtype=False)

    def test_frame_from_json_bad_data(self):
        pytest.raises(ValueError, read_json, StringIO('{"key":b:a:d}'))

        # too few indices
        json = StringIO('{"columns":["A","B"],'
                        '"index":["2","3"],'
                        '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
        pytest.raises(ValueError, read_json, json,
                      orient="split")

        # too many columns
        json = StringIO('{"columns":["A","B","C"],'
                        '"index":["1","2","3"],'
                        '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
        pytest.raises(AssertionError, read_json, json,
                      orient="split")

        # bad key
        json = StringIO('{"badkey":["A","B"],'
                        '"index":["2","3"],'
                        '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
        with tm.assert_raises_regex(ValueError,
                                    r"unexpected key\(s\): badkey"):
            read_json(json, orient="split")

    def test_frame_from_json_nones(self):
        df = DataFrame([[1, 2], [4, 5, 6]])
        unser = read_json(df.to_json())
        assert np.isnan(unser[2][0])

        df = DataFrame([['1', '2'], ['4', '5', '6']])
        unser = read_json(df.to_json())
        assert np.isnan(unser[2][0])
        unser = read_json(df.to_json(), dtype=False)
        assert unser[2][0] is None
        unser = read_json(df.to_json(), convert_axes=False, dtype=False)
        assert unser['2']['0'] is None

        unser = read_json(df.to_json(), numpy=False)
        assert np.isnan(unser[2][0])
        unser = read_json(df.to_json(), numpy=False, dtype=False)
        assert unser[2][0] is None
        unser = read_json(df.to_json(), numpy=False,
                          convert_axes=False, dtype=False)
        assert unser['2']['0'] is None

        # infinities get mapped to nulls which get mapped to NaNs during
        # deserialisation
        df = DataFrame([[1, 2], [4, 5, 6]])
        df.loc[0, 2] = np.inf
        unser = read_json(df.to_json())
        assert np.isnan(unser[2][0])
        unser = read_json(df.to_json(), dtype=False)
        assert np.isnan(unser[2][0])

        df.loc[0, 2] = np.NINF
        unser = read_json(df.to_json())
        assert np.isnan(unser[2][0])
        unser = read_json(df.to_json(), dtype=False)
        assert np.isnan(unser[2][0])

    @pytest.mark.skipif(is_platform_32bit(),
                        reason="not compliant on 32-bit, xref #15865")
    def test_frame_to_json_float_precision(self):
        df = pd.DataFrame([dict(a_float=0.95)])
        encoded = df.to_json(double_precision=1)
        assert encoded == '{"a_float":{"0":1.0}}'

        df = pd.DataFrame([dict(a_float=1.95)])
        encoded = df.to_json(double_precision=1)
        assert encoded == '{"a_float":{"0":2.0}}'

        df = pd.DataFrame([dict(a_float=-1.95)])
        encoded = df.to_json(double_precision=1)
        assert encoded == '{"a_float":{"0":-2.0}}'

        df = pd.DataFrame([dict(a_float=0.995)])
        encoded = df.to_json(double_precision=2)
        assert encoded == '{"a_float":{"0":1.0}}'

        df = pd.DataFrame([dict(a_float=0.9995)])
        encoded = df.to_json(double_precision=3)
        assert encoded == '{"a_float":{"0":1.0}}'

        df = pd.DataFrame([dict(a_float=0.99999999999999944)])
        encoded = df.to_json(double_precision=15)
        assert encoded == '{"a_float":{"0":1.0}}'

    def test_frame_to_json_except(self):
        df = DataFrame([1, 2, 3])
        pytest.raises(ValueError, df.to_json, orient="garbage")

    def test_frame_empty(self):
        df = DataFrame(columns=['jim', 'joe'])
        assert not df._is_mixed_type
        assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
                           check_index_type=False)
        # GH 7445
        result = pd.DataFrame({'test': []}, index=[]).to_json(orient='columns')
        expected = '{"test":{}}'
        assert result == expected

    def test_frame_empty_mixedtype(self):
        # mixed type
        df = DataFrame(columns=['jim', 'joe'])
        df['joe'] = df['joe'].astype('i8')
        assert df._is_mixed_type
        assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
                           check_index_type=False)

    def test_frame_mixedtype_orient(self):  # GH10289
        vals = [[10, 1, 'foo', .1, .01],
                [20, 2, 'bar', .2, .02],
                [30, 3, 'baz', .3, .03],
                [40, 4, 'qux', .4, .04]]

        df = DataFrame(vals, index=list('abcd'),
                       columns=['1st', '2nd', '3rd', '4th', '5th'])

        assert df._is_mixed_type
        right = df.copy()

        for orient in ['split', 'index', 'columns']:
            inp = df.to_json(orient=orient)
            left = read_json(inp, orient=orient, convert_axes=False)
            assert_frame_equal(left, right)

        right.index = np.arange(len(df))
        inp = df.to_json(orient='records')
        left = read_json(inp, orient='records', convert_axes=False)
        assert_frame_equal(left, right)

        right.columns = np.arange(df.shape[1])
        inp = df.to_json(orient='values')
        left = read_json(inp, orient='values', convert_axes=False)
        assert_frame_equal(left, right)

    def test_v12_compat(self):
        df = DataFrame(
            [[1.56808523, 0.65727391, 1.81021139, -0.17251653],
             [-0.2550111, -0.08072427, -0.03202878, -0.17581665],
             [1.51493992, 0.11805825, 1.629455, -1.31506612],
             [-0.02765498, 0.44679743, 0.33192641, -0.27885413],
             [0.05951614, -2.69652057, 1.28163262, 0.34703478]],
            columns=['A', 'B', 'C', 'D'],
            index=pd.date_range('2000-01-03', '2000-01-07'))
        df['date'] = pd.Timestamp('19920106 18:21:32.12')
        df.iloc[3, df.columns.get_loc('date')] = pd.Timestamp('20130101')
        df['modified'] = df['date']
        df.iloc[1, df.columns.get_loc('modified')] = pd.NaT

        v12_json = os.path.join(self.dirpath, 'tsframe_v012.json')
        df_unser = pd.read_json(v12_json)
        assert_frame_equal(df, df_unser)

        df_iso = df.drop(['modified'], axis=1)
        v12_iso_json = os.path.join(self.dirpath, 'tsframe_iso_v012.json')
        df_unser_iso = pd.read_json(v12_iso_json)
        assert_frame_equal(df_iso, df_unser_iso)

    def test_blocks_compat_GH9037(self):
        index = pd.date_range('20000101', periods=10, freq='H')
        df_mixed = DataFrame(OrderedDict(
            float_1=[-0.92077639, 0.77434435, 1.25234727, 0.61485564,
                     -0.60316077, 0.24653374, 0.28668979, -2.51969012,
                     0.95748401, -1.02970536],
            int_1=[19680418, 75337055, 99973684, 65103179, 79373900,
                   40314334, 21290235, 4991321, 41903419, 16008365],
            str_1=['78c608f1', '64a99743', '13d2ff52', 'ca7f4af2', '97236474',
                   'bde7e214', '1a6bde47', 'b1190be5', '7a669144', '8d64d068'],
            float_2=[-0.0428278, -1.80872357, 3.36042349, -0.7573685,
                     -0.48217572, 0.86229683, 1.08935819, 0.93898739,
                     -0.03030452, 1.43366348],
            str_2=['14f04af9', 'd085da90', '4bcfac83', '81504caf', '2ffef4a9',
                   '08e2f5c4', '07e1af03', 'addbd4a7', '1f6a09ba', '4bfc4d87'],
            int_2=[86967717, 98098830, 51927505, 20372254, 12601730, 20884027,
                   34193846, 10561746, 24867120, 76131025]
        ), index=index)

        # JSON deserialisation always creates unicode strings
        df_mixed.columns = df_mixed.columns.astype('unicode')

        df_roundtrip = pd.read_json(df_mixed.to_json(orient='split'),
                                    orient='split')
        assert_frame_equal(df_mixed, df_roundtrip,
                           check_index_type=True,
                           check_column_type=True,
                           check_frame_type=True,
                           by_blocks=True,
                           check_exact=True)

    def test_frame_nonprintable_bytes(self):
        # GH14256: failing column caused segfaults, if it is not the last one

        class BinaryThing(object):

            def __init__(self, hexed):
                self.hexed = hexed
                if compat.PY2:
                    self.binary = hexed.decode('hex')
                else:
                    self.binary = bytes.fromhex(hexed)

            def __str__(self):
                return self.hexed

        hexed = '574b4454ba8c5eb4f98a8f45'
        binthing = BinaryThing(hexed)

        # verify the proper conversion of printable content
        df_printable = DataFrame({'A': [binthing.hexed]})
        assert df_printable.to_json() == \
            '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed)

        # check if non-printable content throws appropriate Exception
        df_nonprintable = DataFrame({'A': [binthing]})
        with pytest.raises(OverflowError):
            df_nonprintable.to_json()

        # the same with multiple columns threw segfaults
        df_mixed = DataFrame({'A': [binthing], 'B': [1]},
                             columns=['A', 'B'])
        with pytest.raises(OverflowError):
            df_mixed.to_json()

        # default_handler should resolve exceptions for non-string types
        assert df_nonprintable.to_json(default_handler=str) == \
            '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed)
        assert df_mixed.to_json(default_handler=str) == \
            '{{"A":{{"0":"{hex}"}},"B":{{"0":1}}}}'.format(hex=hexed)

    def test_label_overflow(self):
        # GH14256: buffer length not checked when writing label
        df = pd.DataFrame({'foo': [1337], 'bar' * 100000: [1]})
        assert df.to_json() == \
            '{{"{bar}":{{"0":1}},"foo":{{"0":1337}}}}'.format(
                bar=('bar' * 100000))

    def test_series_non_unique_index(self):
        s = Series(['a', 'b'], index=[1, 1])

        pytest.raises(ValueError, s.to_json, orient='index')

        assert_series_equal(s, read_json(s.to_json(orient='split'),
                                         orient='split', typ='series'))
        unser = read_json(s.to_json(orient='records'),
                          orient='records', typ='series')
        tm.assert_numpy_array_equal(s.values, unser.values)

    def test_series_from_json_to_json(self):

        def _check_orient(series, orient, dtype=None, numpy=False,
                          check_index_type=True):
            series = series.sort_index()
            unser = read_json(series.to_json(orient=orient),
                              typ='series', orient=orient, numpy=numpy,
                              dtype=dtype)
            unser = unser.sort_index()
            if orient == "records" or orient == "values":
                assert_almost_equal(series.values, unser.values)
            else:
                if orient == "split":
                    assert_series_equal(series, unser,
                                        check_index_type=check_index_type)
                else:
                    assert_series_equal(series, unser, check_names=False,
                                        check_index_type=check_index_type)

        def _check_all_orients(series, dtype=None, check_index_type=True):
            _check_orient(series, "columns", dtype=dtype,
                          check_index_type=check_index_type)
            _check_orient(series, "records", dtype=dtype,
                          check_index_type=check_index_type)
            _check_orient(series, "split", dtype=dtype,
                          check_index_type=check_index_type)
            _check_orient(series, "index", dtype=dtype,
                          check_index_type=check_index_type)
            _check_orient(series, "values", dtype=dtype)

            _check_orient(series, "columns", dtype=dtype, numpy=True,
                          check_index_type=check_index_type)
            _check_orient(series, "records", dtype=dtype, numpy=True,
                          check_index_type=check_index_type)
            _check_orient(series, "split", dtype=dtype, numpy=True,
                          check_index_type=check_index_type)
            _check_orient(series, "index", dtype=dtype, numpy=True,
                          check_index_type=check_index_type)
            _check_orient(series, "values", dtype=dtype, numpy=True,
                          check_index_type=check_index_type)

        # basic
        _check_all_orients(self.series)
        assert self.series.to_json() == self.series.to_json(orient="index")

        objSeries = Series([str(d) for d in self.objSeries],
                           index=self.objSeries.index,
                           name=self.objSeries.name)
        _check_all_orients(objSeries, dtype=False)

        # empty_series has empty index with object dtype
        # which cannot be revert
        assert self.empty_series.index.dtype == np.object_
        _check_all_orients(self.empty_series, check_index_type=False)

        _check_all_orients(self.ts)

        # dtype
        s = Series(lrange(6), index=['a', 'b', 'c', 'd', 'e', 'f'])
        _check_all_orients(Series(s, dtype=np.float64), dtype=np.float64)
        _check_all_orients(Series(s, dtype=np.int), dtype=np.int)

    def test_series_to_json_except(self):
        s = Series([1, 2, 3])
        pytest.raises(ValueError, s.to_json, orient="garbage")

    def test_series_from_json_precise_float(self):
        s = Series([4.56, 4.56, 4.56])
        result = read_json(s.to_json(), typ='series', precise_float=True)
        assert_series_equal(result, s, check_index_type=False)

    def test_frame_from_json_precise_float(self):
        df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
        result = read_json(df.to_json(), precise_float=True)
        assert_frame_equal(result, df, check_index_type=False,
                           check_column_type=False)

    def test_typ(self):

        s = Series(lrange(6), index=['a', 'b', 'c',
                                     'd', 'e', 'f'], dtype='int64')
        result = read_json(s.to_json(), typ=None)
        assert_series_equal(result, s)

    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        assert_frame_equal(result, df)

        df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['A', 'B', 'C'])
        result = read_json(df.to_json())
        assert_frame_equal(result, df)

    def test_path(self):
        with ensure_clean('test.json') as path:
            for df in [self.frame, self.frame2, self.intframe, self.tsframe,
                       self.mixed_frame]:
                df.to_json(path)
                read_json(path)

    def test_axis_dates(self):

        # frame
        json = self.tsframe.to_json()
        result = read_json(json)
        assert_frame_equal(result, self.tsframe)

        # series
        json = self.ts.to_json()
        result = read_json(json, typ='series')
        assert_series_equal(result, self.ts, check_names=False)
        assert result.name is None

    def test_convert_dates(self):

        # frame
        df = self.tsframe.copy()
        df['date'] = Timestamp('20130101')

        json = df.to_json()
        result = read_json(json)
        assert_frame_equal(result, df)

        df['foo'] = 1.
        json = df.to_json(date_unit='ns')

        result = read_json(json, convert_dates=False)
        expected = df.copy()
        expected['date'] = expected['date'].values.view('i8')
        expected['foo'] = expected['foo'].astype('int64')
        assert_frame_equal(result, expected)

        # series
        ts = Series(Timestamp('20130101'), index=self.ts.index)
        json = ts.to_json()
        result = read_json(json, typ='series')
        assert_series_equal(result, ts)

    def test_convert_dates_infer(self):
        # GH10747
        from pandas.io.json import dumps
        infer_words = ['trade_time', 'date', 'datetime', 'sold_at',
                       'modified', 'timestamp', 'timestamps']
        for infer_word in infer_words:
            data = [{'id': 1, infer_word: 1036713600000}, {'id': 2}]
            expected = DataFrame([[1, Timestamp('2002-11-08')], [2, pd.NaT]],
                                 columns=['id', infer_word])
            result = read_json(dumps(data))[['id', infer_word]]
            assert_frame_equal(result, expected)

    def test_date_format_frame(self):
        df = self.tsframe.copy()

        def test_w_date(date, date_unit=None):
            df['date'] = Timestamp(date)
            df.iloc[1, df.columns.get_loc('date')] = pd.NaT
            df.iloc[5, df.columns.get_loc('date')] = pd.NaT
            if date_unit:
                json = df.to_json(date_format='iso', date_unit=date_unit)
            else:
                json = df.to_json(date_format='iso')
            result = read_json(json)
            assert_frame_equal(result, df)

        test_w_date('20130101 20:43:42.123')
        test_w_date('20130101 20:43:42', date_unit='s')
        test_w_date('20130101 20:43:42.123', date_unit='ms')
        test_w_date('20130101 20:43:42.123456', date_unit='us')
        test_w_date('20130101 20:43:42.123456789', date_unit='ns')

        pytest.raises(ValueError, df.to_json, date_format='iso',
                      date_unit='foo')

    def test_date_format_series(self):
        def test_w_date(date, date_unit=None):
            ts = Series(Timestamp(date), index=self.ts.index)
            ts.iloc[1] = pd.NaT
            ts.iloc[5] = pd.NaT
            if date_unit:
                json = ts.to_json(date_format='iso', date_unit=date_unit)
            else:
                json = ts.to_json(date_format='iso')
            result = read_json(json, typ='series')
            assert_series_equal(result, ts)

        test_w_date('20130101 20:43:42.123')
        test_w_date('20130101 20:43:42', date_unit='s')
        test_w_date('20130101 20:43:42.123', date_unit='ms')
        test_w_date('20130101 20:43:42.123456', date_unit='us')
        test_w_date('20130101 20:43:42.123456789', date_unit='ns')

        ts = Series(Timestamp('20130101 20:43:42.123'), index=self.ts.index)
        pytest.raises(ValueError, ts.to_json, date_format='iso',
                      date_unit='foo')

    def test_date_unit(self):
        df = self.tsframe.copy()
        df['date'] = Timestamp('20130101 20:43:42')
        dl = df.columns.get_loc('date')
        df.iloc[1, dl] = Timestamp('19710101 20:43:42')
        df.iloc[2, dl] = Timestamp('21460101 20:43:42')
        df.iloc[4, dl] = pd.NaT

        for unit in ('s', 'ms', 'us', 'ns'):
            json = df.to_json(date_format='epoch', date_unit=unit)

            # force date unit
            result = read_json(json, date_unit=unit)
            assert_frame_equal(result, df)

            # detect date unit
            result = read_json(json, date_unit=None)
            assert_frame_equal(result, df)

    def test_weird_nested_json(self):
        # this used to core dump the parser
        s = r'''{
        "status": "success",
        "data": {
        "posts": [
            {
            "id": 1,
            "title": "A blog post",
            "body": "Some useful content"
            },
            {
            "id": 2,
            "title": "Another blog post",
            "body": "More content"
            }
           ]
          }
        }'''

        read_json(s)

    def test_doc_example(self):
        dfj2 = DataFrame(np.random.randn(5, 2), columns=list('AB'))
        dfj2['date'] = Timestamp('20130101')
        dfj2['ints'] = lrange(5)
        dfj2['bools'] = True
        dfj2.index = pd.date_range('20130101', periods=5)

        json = dfj2.to_json()
        result = read_json(json, dtype={'ints': np.int64, 'bools': np.bool_})
        assert_frame_equal(result, result)

    def test_misc_example(self):

        # parsing unordered input fails
        result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True)
        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])

        error_msg = """DataFrame\\.index are different

DataFrame\\.index values are different \\(100\\.0 %\\)
\\[left\\]:  Index\\(\\[u?'a', u?'b'\\], dtype='object'\\)
\\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)"""
        with tm.assert_raises_regex(AssertionError, error_msg):
            assert_frame_equal(result, expected, check_index_type=False)

        result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]')
        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        assert_frame_equal(result, expected)

    @network
    def test_round_trip_exception_(self):
        # GH 3867
        csv = 'https://raw.github.com/hayd/lahman2012/master/csvs/Teams.csv'
        df = pd.read_csv(csv)
        s = df.to_json()
        result = pd.read_json(s)
        assert_frame_equal(result.reindex(
            index=df.index, columns=df.columns), df)

    @network
    def test_url(self):
        url = 'https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5'  # noqa
        result = read_json(url, convert_dates=True)
        for c in ['created_at', 'closed_at', 'updated_at']:
            assert result[c].dtype == 'datetime64[ns]'

    def test_timedelta(self):
        converter = lambda x: pd.to_timedelta(x, unit='ms')

        s = Series([timedelta(23), timedelta(seconds=5)])
        assert s.dtype == 'timedelta64[ns]'

        result = pd.read_json(s.to_json(), typ='series').apply(converter)
        assert_series_equal(result, s)

        s = Series([timedelta(23), timedelta(seconds=5)],
                   index=pd.Index([0, 1]))
        assert s.dtype == 'timedelta64[ns]'
        result = pd.read_json(s.to_json(), typ='series').apply(converter)
        assert_series_equal(result, s)

        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
        assert frame[0].dtype == 'timedelta64[ns]'
        assert_frame_equal(frame, pd.read_json(frame.to_json())
                           .apply(converter))

        frame = DataFrame({'a': [timedelta(days=23), timedelta(seconds=5)],
                           'b': [1, 2],
                           'c': pd.date_range(start='20130101', periods=2)})

        result = pd.read_json(frame.to_json(date_unit='ns'))
        result['a'] = pd.to_timedelta(result.a, unit='ns')
        result['c'] = pd.to_datetime(result.c)
        assert_frame_equal(frame, result)

    def test_mixed_timedelta_datetime(self):
        frame = DataFrame({'a': [timedelta(23), pd.Timestamp('20130101')]},
                          dtype=object)

        expected = DataFrame({'a': [pd.Timedelta(frame.a[0]).value,
                                    pd.Timestamp(frame.a[1]).value]})
        result = pd.read_json(frame.to_json(date_unit='ns'),
                              dtype={'a': 'int64'})
        assert_frame_equal(result, expected, check_index_type=False)

    def test_default_handler(self):
        value = object()
        frame = DataFrame({'a': [7, value]})
        expected = DataFrame({'a': [7, str(value)]})
        result = pd.read_json(frame.to_json(default_handler=str))
        assert_frame_equal(expected, result, check_index_type=False)

    def test_default_handler_indirect(self):
        from pandas.io.json import dumps

        def default(obj):
            if isinstance(obj, complex):
                return [('mathjs', 'Complex'),
                        ('re', obj.real),
                        ('im', obj.imag)]
            return str(obj)
        df_list = [9, DataFrame({'a': [1, 'STR', complex(4, -5)],
                                 'b': [float('nan'), None, 'N/A']},
                                columns=['a', 'b'])]
        expected = ('[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
                    '["re",4.0],["im",-5.0]],"N\\/A"]]]')
        assert dumps(df_list, default_handler=default,
                     orient="values") == expected

    def test_default_handler_numpy_unsupported_dtype(self):
        # GH12554 to_json raises 'Unhandled numpy dtype 15'
        df = DataFrame({'a': [1, 2.3, complex(4, -5)],
                        'b': [float('nan'), None, complex(1.2, 0)]},
                       columns=['a', 'b'])
        expected = ('[["(1+0j)","(nan+0j)"],'
                    '["(2.3+0j)","(nan+0j)"],'
                    '["(4-5j)","(1.2+0j)"]]')
        assert df.to_json(default_handler=str, orient="values") == expected

    def test_default_handler_raises(self):
        def my_handler_raises(obj):
            raise TypeError("raisin")
        pytest.raises(TypeError,
                      DataFrame({'a': [1, 2, object()]}).to_json,
                      default_handler=my_handler_raises)
        pytest.raises(TypeError,
                      DataFrame({'a': [1, 2, complex(4, -5)]}).to_json,
                      default_handler=my_handler_raises)

    def test_categorical(self):
        # GH4377 df.to_json segfaults with non-ndarray blocks
        df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
        df["B"] = df["A"]
        expected = df.to_json()

        df["B"] = df["A"].astype('category')
        assert expected == df.to_json()

        s = df["A"]
        sc = df["B"]
        assert s.to_json() == sc.to_json()

    def test_datetime_tz(self):
        # GH4377 df.to_json segfaults with non-ndarray blocks
        tz_range = pd.date_range('20130101', periods=3, tz='US/Eastern')
        tz_naive = tz_range.tz_convert('utc').tz_localize(None)

        df = DataFrame({
            'A': tz_range,
            'B': pd.date_range('20130101', periods=3)})

        df_naive = df.copy()
        df_naive['A'] = tz_naive
        expected = df_naive.to_json()
        assert expected == df.to_json()

        stz = Series(tz_range)
        s_naive = Series(tz_naive)
        assert stz.to_json() == s_naive.to_json()

    def test_sparse(self):
        # GH4377 df.to_json segfaults with non-ndarray blocks
        df = pd.DataFrame(np.random.randn(10, 4))
        df.loc[:8] = np.nan

        sdf = df.to_sparse()
        expected = df.to_json()
        assert expected == sdf.to_json()

        s = pd.Series(np.random.randn(10))
        s.loc[:8] = np.nan
        ss = s.to_sparse()

        expected = s.to_json()
        assert expected == ss.to_json()

    def test_tz_is_utc(self):
        from pandas.io.json import dumps
        exp = '"2013-01-10T05:00:00.000Z"'

        ts = Timestamp('2013-01-10 05:00:00Z')
        assert dumps(ts, iso_dates=True) == exp
        dt = ts.to_pydatetime()
        assert dumps(dt, iso_dates=True) == exp

        ts = Timestamp('2013-01-10 00:00:00', tz='US/Eastern')
        assert dumps(ts, iso_dates=True) == exp
        dt = ts.to_pydatetime()
        assert dumps(dt, iso_dates=True) == exp

        ts = Timestamp('2013-01-10 00:00:00-0500')
        assert dumps(ts, iso_dates=True) == exp
        dt = ts.to_pydatetime()
        assert dumps(dt, iso_dates=True) == exp

    def test_tz_range_is_utc(self):
        from pandas.io.json import dumps

        exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
        dfexp = ('{"DT":{'
                 '"0":"2013-01-01T05:00:00.000Z",'
                 '"1":"2013-01-02T05:00:00.000Z"}}')

        tz_range = pd.date_range('2013-01-01 05:00:00Z', periods=2)
        assert dumps(tz_range, iso_dates=True) == exp
        dti = pd.DatetimeIndex(tz_range)
        assert dumps(dti, iso_dates=True) == exp
        df = DataFrame({'DT': dti})
        assert dumps(df, iso_dates=True) == dfexp

        tz_range = pd.date_range('2013-01-01 00:00:00', periods=2,
                                 tz='US/Eastern')
        assert dumps(tz_range, iso_dates=True) == exp
        dti = pd.DatetimeIndex(tz_range)
        assert dumps(dti, iso_dates=True) == exp
        df = DataFrame({'DT': dti})
        assert dumps(df, iso_dates=True) == dfexp

        tz_range = pd.date_range('2013-01-01 00:00:00-0500', periods=2)
        assert dumps(tz_range, iso_dates=True) == exp
        dti = pd.DatetimeIndex(tz_range)
        assert dumps(dti, iso_dates=True) == exp
        df = DataFrame({'DT': dti})
        assert dumps(df, iso_dates=True) == dfexp

    def test_read_inline_jsonl(self):
        # GH9180
        result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        assert_frame_equal(result, expected)

    def test_read_s3_jsonl(self, s3_resource):
        pytest.importorskip('s3fs')
        # GH17200

        result = read_json('s3n://pandas-test/items.jsonl', lines=True)
        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        assert_frame_equal(result, expected)

    def test_read_local_jsonl(self):
        # GH17200
        with ensure_clean('tmp_items.json') as path:
            with open(path, 'w') as infile:
                infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
            result = read_json(path, lines=True)
            expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
            assert_frame_equal(result, expected)

    def test_read_jsonl_unicode_chars(self):
        # GH15132: non-ascii unicode characters
        # \u201d == RIGHT DOUBLE QUOTATION MARK

        # simulate file handle
        json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
        json = StringIO(json)
        result = read_json(json, lines=True)
        expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
                             columns=['a', 'b'])
        assert_frame_equal(result, expected)

        # simulate string
        json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
        result = read_json(json, lines=True)
        expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
                             columns=['a', 'b'])
        assert_frame_equal(result, expected)

    def test_to_jsonl(self):
        # GH9180
        df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
        assert result == expected

        df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
        assert result == expected
        assert_frame_equal(pd.read_json(result, lines=True), df)

        # GH15096: escaped characters in columns and data
        df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]],
                       columns=["a\\", 'b'])
        result = df.to_json(orient="records", lines=True)
        expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n'
                    '{"a\\\\":"foo\\"","b":"bar"}')
        assert result == expected
        assert_frame_equal(pd.read_json(result, lines=True), df)

    def test_latin_encoding(self):
        if compat.PY2:
            tm.assert_raises_regex(
                TypeError, r'\[unicode\] is not implemented as a table column')
            return

        # GH 13774
        pytest.skip("encoding not implemented in .to_json(), "
                    "xref #13774")

        values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'],
                  [b'E\xc9, 17', b'a', b'b', b'c'],
                  [b'EE, 17', b'', b'a', b'b', b'c'],
                  [b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
                  [b'', b'a', b'b', b'c'],
                  [b'\xf8\xfc', b'a', b'b', b'c'],
                  [b'A\xf8\xfc', b'', b'a', b'b', b'c'],
                  [np.nan, b'', b'b', b'c'],
                  [b'A\xf8\xfc', np.nan, b'', b'b', b'c']]

        def _try_decode(x, encoding='latin-1'):
            try:
                return x.decode(encoding)
            except AttributeError:
                return x

        # not sure how to remove latin-1 from code in python 2 and 3
        values = [[_try_decode(x) for x in y] for y in values]

        examples = []
        for dtype in ['category', object]:
            for val in values:
                examples.append(Series(val, dtype=dtype))

        def roundtrip(s, encoding='latin-1'):
            with ensure_clean('test.json') as path:
                s.to_json(path, encoding=encoding)
                retr = read_json(path, encoding=encoding)
                assert_series_equal(s, retr, check_categorical=False)

        for s in examples:
            roundtrip(s)

    def test_data_frame_size_after_to_json(self):
        # GH15344
        df = DataFrame({'a': [str(1)]})

        size_before = df.memory_usage(index=True, deep=True).sum()
        df.to_json()
        size_after = df.memory_usage(index=True, deep=True).sum()

        assert size_before == size_after

    @pytest.mark.parametrize('data, expected', [
        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']),
            {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo'),
            {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
                   index=[['a', 'b'], ['c', 'd']]),
            {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
        (Series([1, 2, 3], name='A'),
            {'name': 'A', 'data': [1, 2, 3]}),
        (Series([1, 2, 3], name='A').rename_axis('foo'),
            {'name': 'A', 'data': [1, 2, 3]}),
        (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']]),
            {'name': 'A', 'data': [1, 2]}),
    ])
    def test_index_false_to_json_split(self, data, expected):
        # GH 17394
        # Testing index=False in to_json with orient='split'

        result = data.to_json(orient='split', index=False)
        result = json.loads(result)

        assert result == expected

    @pytest.mark.parametrize('data', [
        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])),
        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo')),
        (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
                   index=[['a', 'b'], ['c', 'd']])),
        (Series([1, 2, 3], name='A')),
        (Series([1, 2, 3], name='A').rename_axis('foo')),
        (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']])),
    ])
    def test_index_false_to_json_table(self, data):
        # GH 17394
        # Testing index=False in to_json with orient='table'

        result = data.to_json(orient='table', index=False)
        result = json.loads(result)

        expected = {
            'schema': pd.io.json.build_table_schema(data, index=False),
            'data': DataFrame(data).to_dict(orient='records')
        }

        assert result == expected

    @pytest.mark.parametrize('orient', [
        'records', 'index', 'columns', 'values'
    ])
    def test_index_false_error_to_json(self, orient):
        # GH 17394
        # Testing error message from to_json with index=False

        df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])

        with tm.assert_raises_regex(ValueError, "'index=False' is only "
                                                "valid when 'orient' is "
                                                "'split' or 'table'"):
            df.to_json(orient=orient, index=False)
예제 #20
0
class TestIntervalTree(object):

    def test_get_loc(self, tree):
        tm.assert_numpy_array_equal(tree.get_loc(1),
                                    np.array([0], dtype='int64'))
        tm.assert_numpy_array_equal(np.sort(tree.get_loc(2)),
                                    np.array([0, 1], dtype='int64'))
        with pytest.raises(KeyError):
            tree.get_loc(-1)

    def test_get_indexer(self, tree):
        tm.assert_numpy_array_equal(
            tree.get_indexer(np.array([1.0, 5.5, 6.5])),
            np.array([0, 4, -1], dtype='int64'))
        with pytest.raises(KeyError):
            tree.get_indexer(np.array([3.0]))

    def test_get_indexer_non_unique(self, tree):
        indexer, missing = tree.get_indexer_non_unique(
            np.array([1.0, 2.0, 6.5]))
        tm.assert_numpy_array_equal(indexer[:1],
                                    np.array([0], dtype='int64'))
        tm.assert_numpy_array_equal(np.sort(indexer[1:3]),
                                    np.array([0, 1], dtype='int64'))
        tm.assert_numpy_array_equal(np.sort(indexer[3:]),
                                    np.array([-1], dtype='int64'))
        tm.assert_numpy_array_equal(missing, np.array([2], dtype='int64'))

    def test_duplicates(self, dtype):
        left = np.array([0, 0, 0], dtype=dtype)
        tree = IntervalTree(left, left + 1)
        tm.assert_numpy_array_equal(np.sort(tree.get_loc(0.5)),
                                    np.array([0, 1, 2], dtype='int64'))

        with pytest.raises(KeyError):
            tree.get_indexer(np.array([0.5]))

        indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
        tm.assert_numpy_array_equal(np.sort(indexer),
                                    np.array([0, 1, 2], dtype='int64'))
        tm.assert_numpy_array_equal(missing, np.array([], dtype='int64'))

    def test_get_loc_closed(self, closed):
        tree = IntervalTree([0], [1], closed=closed)
        for p, errors in [(0, tree.open_left),
                          (1, tree.open_right)]:
            if errors:
                with pytest.raises(KeyError):
                    tree.get_loc(p)
            else:
                tm.assert_numpy_array_equal(tree.get_loc(p),
                                            np.array([0], dtype='int64'))

    @pytest.mark.skipif(compat.is_platform_32bit(),
                        reason="int type mismatch on 32bit")
    @pytest.mark.parametrize('leaf_size', [1, 10, 100, 10000])
    def test_get_indexer_closed(self, closed, leaf_size):
        x = np.arange(1000, dtype='float64')
        found = x.astype('intp')
        not_found = (-1 * np.ones(1000)).astype('intp')

        tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size)
        tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25))

        expected = found if tree.closed_left else not_found
        tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0))

        expected = found if tree.closed_right else not_found
        tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
예제 #21
0
class TestIntervalTree(object):
    def test_get_loc(self, tree):
        result = tree.get_loc(1)
        expected = np.array([0], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

        result = np.sort(tree.get_loc(2))
        expected = np.array([0, 1], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

        with pytest.raises(KeyError):
            tree.get_loc(-1)

    def test_get_indexer(self, tree):
        result = tree.get_indexer(np.array([1.0, 5.5, 6.5]))
        expected = np.array([0, 4, -1], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

        with pytest.raises(KeyError):
            tree.get_indexer(np.array([3.0]))

    def test_get_indexer_non_unique(self, tree):
        indexer, missing = tree.get_indexer_non_unique(
            np.array([1.0, 2.0, 6.5]))

        result = indexer[:1]
        expected = np.array([0], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

        result = np.sort(indexer[1:3])
        expected = np.array([0, 1], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

        result = np.sort(indexer[3:])
        expected = np.array([-1], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

        result = missing
        expected = np.array([2], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

    def test_duplicates(self, dtype):
        left = np.array([0, 0, 0], dtype=dtype)
        tree = IntervalTree(left, left + 1)

        result = np.sort(tree.get_loc(0.5))
        expected = np.array([0, 1, 2], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

        with pytest.raises(KeyError):
            tree.get_indexer(np.array([0.5]))

        indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
        result = np.sort(indexer)
        expected = np.array([0, 1, 2], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

        result = missing
        expected = np.array([], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

    def test_get_loc_closed(self, closed):
        tree = IntervalTree([0], [1], closed=closed)
        for p, errors in [(0, tree.open_left), (1, tree.open_right)]:
            if errors:
                with pytest.raises(KeyError):
                    tree.get_loc(p)
            else:
                result = tree.get_loc(p)
                expected = np.array([0], dtype='intp')
                tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize(
        'leaf_size',
        [skipif_32bit(1),
         skipif_32bit(10),
         skipif_32bit(100), 10000])
    def test_get_indexer_closed(self, closed, leaf_size):
        x = np.arange(1000, dtype='float64')
        found = x.astype('intp')
        not_found = (-1 * np.ones(1000)).astype('intp')

        tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size)
        tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25))

        expected = found if tree.closed_left else not_found
        tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0))

        expected = found if tree.closed_right else not_found
        tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))

    @pytest.mark.parametrize(
        'left, right, expected',
        [(np.array([0, 1, 4]), np.array([2, 3, 5]), True),
         (np.array([0, 1, 2]), np.array([5, 4, 3]), True),
         (np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
         (np.array([0, 2, 4]), np.array([1, 3, 5]), False),
         (np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False)])
    @pytest.mark.parametrize('order', map(list, permutations(range(3))))
    def test_is_overlapping(self, closed, order, left, right, expected):
        # GH 23309
        tree = IntervalTree(left[order], right[order], closed=closed)
        result = tree.is_overlapping
        assert result is expected

    @pytest.mark.parametrize('order', map(list, permutations(range(3))))
    def test_is_overlapping_endpoints(self, closed, order):
        """shared endpoints are marked as overlapping"""
        # GH 23309
        left, right = np.arange(3), np.arange(1, 4)
        tree = IntervalTree(left[order], right[order], closed=closed)
        result = tree.is_overlapping
        expected = closed == 'both'
        assert result is expected

    @pytest.mark.parametrize(
        'left, right',
        [(np.array([], dtype='int64'), np.array([], dtype='int64')),
         (np.array([0], dtype='int64'), np.array([1], dtype='int64')),
         (np.array([np.nan]), np.array([np.nan])),
         (np.array([np.nan] * 3), np.array([np.nan] * 3))])
    def test_is_overlapping_trivial(self, closed, left, right):
        # GH 23309
        tree = IntervalTree(left, right, closed=closed)
        assert tree.is_overlapping is False

    @pytest.mark.skipif(compat.is_platform_32bit(), reason='GH 23440')
    def test_construction_overflow(self):
        # GH 25485
        left, right = np.arange(101), [np.iinfo(np.int64).max] * 101
        tree = IntervalTree(left, right)

        # pivot should be average of left/right medians
        result = tree.root.pivot
        expected = (50 + np.iinfo(np.int64).max) / 2
        assert result == expected
예제 #22
0
        msg = "Could not import '{}'".format(package)
        if min_version:
            msg += " satisfying a min_version of {}".format(min_version)
        return pytest.mark.skipif(
            not safe_import(package, min_version=min_version), reason=msg
        )(func)
    return decorated_func


skip_if_no_mpl = pytest.mark.skipif(_skip_if_no_mpl(),
                                    reason="Missing matplotlib dependency")
skip_if_mpl_1_5 = pytest.mark.skipif(_skip_if_mpl_1_5(),
                                     reason="matplotlib 1.5")
xfail_if_mpl_2_2 = pytest.mark.xfail(_skip_if_mpl_2_2(),
                                     reason="matplotlib 2.2")
skip_if_32bit = pytest.mark.skipif(is_platform_32bit(),
                                   reason="skipping for 32 bit")
skip_if_windows = pytest.mark.skipif(is_platform_windows(),
                                     reason="Running on Windows")
skip_if_windows_python_3 = pytest.mark.skipif(is_platform_windows() and PY3,
                                              reason=("not used on python3/"
                                                      "win32"))
skip_if_has_locale = pytest.mark.skipif(_skip_if_has_locale(),
                                        reason="Specific locale is set {lang}"
                                        .format(lang=locale.getlocale()[0]))
skip_if_not_us_locale = pytest.mark.skipif(_skip_if_not_us_locale(),
                                           reason="Specific locale is set "
                                           "{lang}".format(
                                               lang=locale.getlocale()[0]))
skip_if_no_scipy = pytest.mark.skipif(_skip_if_no_scipy(),
                                      reason="Missing SciPy requirement")
예제 #23
0
def _skip_if_no_mpl():
    mod = safe_import("matplotlib")
    if mod:
        mod.use("Agg", warn=False)
    else:
        return True


def _skip_if_mpl_1_5():
    mod = safe_import("matplotlib")

    if mod:
        v = mod.__version__
        if LooseVersion(v) > LooseVersion('1.4.3') or str(v)[0] == '0':
            return True
        else:
            mod.use("Agg", warn=False)


skip_if_no_mpl = pytest.mark.skipif(_skip_if_no_mpl(),
                                    reason="Missing matplotlib dependency")
skip_if_mpl_1_5 = pytest.mark.skipif(_skip_if_mpl_1_5(),
                                     reason="matplotlib 1.5")
skip_if_32bit = pytest.mark.skipif(is_platform_32bit(),
                                   reason="skipping for 32 bit")
skip_if_windows = pytest.mark.skipif(is_platform_windows(),
                                     reason="Running on Windows")
skip_if_windows_python_3 = pytest.mark.skipif(is_platform_windows() and PY3,
                                              reason=("not used on python3/"
                                                      "win32"))
예제 #24
0
 def test_ndarray_compat_properties(self):
     if compat.is_platform_32bit():
         pytest.skip("skipping on 32bit")
     super(TestPeriodIndex, self).test_ndarray_compat_properties()