Exemple #1
0
    def test_intersect(self):
        def _check_correct(a, b, expected):
            result = a.intersect(b)
            assert (result.equals(expected))

        def _check_length_exc(a, longer):
            self.assertRaises(Exception, a.intersect, longer)

        def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
            xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
            yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
            expected = BlockIndex(TEST_LENGTH, eloc, elen)
            longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen)

            _check_correct(xindex, yindex, expected)
            _check_correct(xindex.to_int_index(), yindex.to_int_index(),
                           expected.to_int_index())

            _check_length_exc(xindex, longer_index)
            _check_length_exc(xindex.to_int_index(),
                              longer_index.to_int_index())

        if compat.is_platform_windows():
            pytest.skip("segfaults on win-64 when all tests are run")
        check_cases(_check_case)
Exemple #2
0
    def test_encode(self, html_encoding_file):
        _, encoding = os.path.splitext(
            os.path.basename(html_encoding_file)
        )[0].split('_')

        try:
            with open(html_encoding_file, 'rb') as fobj:
                from_string = self.read_html(fobj.read(), encoding=encoding,
                                             index_col=0).pop()

            with open(html_encoding_file, 'rb') as fobj:
                from_file_like = self.read_html(BytesIO(fobj.read()),
                                                encoding=encoding,
                                                index_col=0).pop()

            from_filename = self.read_html(html_encoding_file,
                                           encoding=encoding,
                                           index_col=0).pop()
            tm.assert_frame_equal(from_string, from_file_like)
            tm.assert_frame_equal(from_string, from_filename)
        except Exception:
            # seems utf-16/32 fail on windows
            if is_platform_windows():
                if '16' in encoding or '32' in encoding:
                    pytest.skip()
                raise
    def test_constructor_compound_dtypes(self):
        # GH 5191
        # compound dtypes should raise not-implementederror

        def f(dtype):
            data = list(itertools.repeat((datetime(2001, 1, 1),
                                          "aa", 20), 9))
            return DataFrame(data=data,
                             columns=["A", "B", "C"],
                             dtype=dtype)

        msg = ("compound dtypes are not implemented in the DataFrame"
               " constructor")
        with pytest.raises(NotImplementedError, match=msg):
            f([("A", "datetime64[h]"),
               ("B", "str"),
               ("C", "int32")])

        # these work (though results may be unexpected)
        f('int64')
        f('float64')

        # 10822
        # invalid error message on dt inference
        if not compat.is_platform_windows():
            f('M8[ns]')
Exemple #4
0
    def test_header_not_enough_lines_as_recarray(self):

        if compat.is_platform_windows():
            raise nose.SkipTest(
                "segfaults on win-64, only when all tests are run")

        data = ('skip this\n'
                'skip this\n'
                'a,b,c\n'
                '1,2,3\n'
                '4,5,6')

        reader = TextReader(StringIO(data), delimiter=',', header=2,
                            as_recarray=True)
        header = reader.header
        expected = [['a', 'b', 'c']]
        self.assertEqual(header, expected)

        recs = reader.read()
        expected = {'a': [1, 4], 'b': [2, 5], 'c': [3, 6]}
        assert_array_dicts_equal(expected, recs)

        # not enough rows
        self.assertRaises(parser.CParserError, TextReader, StringIO(data),
                          delimiter=',', header=5, as_recarray=True)
Exemple #5
0
    def test_read_csv(self):
        if not compat.PY3:
            if compat.is_platform_windows():
                prefix = u("file:///")
            else:
                prefix = u("file://")

            fname = prefix + compat.text_type(self.csv1)
            self.read_csv(fname, index_col=0, parse_dates=True)
Exemple #6
0
    def test_convert_rows_list_to_csv_str(self):
        rows_list = ["aaa", "bbb", "ccc"]
        ret = tm.convert_rows_list_to_csv_str(rows_list)

        if compat.is_platform_windows():
            expected = "aaa\r\nbbb\r\nccc\r\n"
        else:
            expected = "aaa\nbbb\nccc\n"

        assert ret == expected
Exemple #7
0
    def test_invalid_index_types(self):

        # test all index types
        for i in [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)]:
            self.assertRaises(TypeError, lambda: frequencies.infer_freq(i))

        # GH 10822
        # odd error message on conversions to datetime for unicode
        if not is_platform_windows():
            for i in [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]:
                self.assertRaises(ValueError, lambda: frequencies.infer_freq(i))
Exemple #8
0
    def test_pass_dtype_as_recarray(self):
        if compat.is_platform_windows() and self.low_memory:
            raise nose.SkipTest("segfaults on win-64, only when all tests are run")

        data = """\
one,two
1,2.5
2,3.5
3,4.5
4,5.5"""

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = self.read_csv(StringIO(data), dtype={"one": "u1", 1: "S1"}, as_recarray=True)
            self.assertEqual(result["one"].dtype, "u1")
            self.assertEqual(result["two"].dtype, "S1")
Exemple #9
0
    def test_pass_dtype_as_recarray(self):
        if compat.is_platform_windows() and self.low_memory:
            raise nose.SkipTest(
                "segfaults on win-64, only when all tests are run")

        data = """\
one,two
1,2.5
2,3.5
3,4.5
4,5.5"""

        result = self.read_csv(StringIO(data), dtype={'one': 'u1', 1: 'S1'},
                               as_recarray=True)
        self.assertEqual(result['one'].dtype, 'u1')
        self.assertEqual(result['two'].dtype, 'S1')
Exemple #10
0
    def test_invalid_index_types(self):

        # test all index types
        for i in [
                tm.makeIntIndex(10),
                tm.makeFloatIndex(10),
                tm.makePeriodIndex(10)
        ]:
            self.assertRaises(TypeError, lambda: frequencies.infer_freq(i))

        # GH 10822
        # odd error message on conversions to datetime for unicode
        if not is_platform_windows():
            for i in [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]:
                self.assertRaises(ValueError,
                                  lambda: frequencies.infer_freq(i))
Exemple #11
0
    def test_constructor_bad_file(self):
        if is_platform_windows():
            raise nose.SkipTest("skipping construction error messages "
                                "tests on windows")

        non_file = StringIO('I am not a file')
        non_file.fileno = lambda: -1

        msg = "Invalid argument"
        tm.assertRaisesRegexp(mmap.error, msg, common.MMapWrapper, non_file)

        target = open(self.mmap_file, 'r')
        target.close()

        msg = "I/O operation on closed file"
        tm.assertRaisesRegexp(ValueError, msg, common.MMapWrapper, target)
Exemple #12
0
 def test_encode(self):
     assert self.files, 'no files read from the data folder'
     for f in self.files:
         _, encoding = _lang_enc(f)
         try:
             from_string = self.read_string(f, encoding).pop()
             from_file_like = self.read_file_like(f, encoding).pop()
             from_filename = self.read_filename(f, encoding).pop()
             tm.assert_frame_equal(from_string, from_file_like)
             tm.assert_frame_equal(from_string, from_filename)
         except Exception:
             # seems utf-16/32 fail on windows
             if is_platform_windows():
                 if '16' in encoding or '32' in encoding:
                     continue
                 raise
Exemple #13
0
 def test_encode(self):
     assert self.files, 'no files read from the data folder'
     for f in self.files:
         _, encoding = _lang_enc(f)
         try:
             from_string = self.read_string(f, encoding).pop()
             from_file_like = self.read_file_like(f, encoding).pop()
             from_filename = self.read_filename(f, encoding).pop()
             tm.assert_frame_equal(from_string, from_file_like)
             tm.assert_frame_equal(from_string, from_filename)
         except Exception:
             # seems utf-16/32 fail on windows
             if is_platform_windows():
                 if '16' in encoding or '32' in encoding:
                     continue
                 raise
Exemple #14
0
    def test_itertuples(self):
        for i, tup in enumerate(self.frame.itertuples()):
            s = self.klass._constructor_sliced(tup[1:])
            s.name = tup[0]
            expected = self.frame.iloc[i, :].reset_index(drop=True)
            self._assert_series_equal(s, expected)

        df = self.klass({
            'floats': np.random.randn(5),
            'ints': lrange(5)
        },
                        columns=['floats', 'ints'])

        for tup in df.itertuples(index=False):
            assert isinstance(tup[1], (int, long))

        df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]})
        dfaa = df[['a', 'a']]

        assert (list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)])

        # repr with be int/long on 32-bit/windows
        if not (compat.is_platform_windows() or compat.is_platform_32bit()):
            assert (repr(list(df.itertuples(
                name=None))) == '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]')

        tup = next(df.itertuples(name='TestName'))

        if LooseVersion(sys.version) >= LooseVersion('2.7'):
            assert tup._fields == ('Index', 'a', 'b')
            assert (tup.Index, tup.a, tup.b) == tup
            assert type(tup).__name__ == 'TestName'

        df.columns = ['def', 'return']
        tup2 = next(df.itertuples(name='TestName'))
        assert tup2 == (0, 1, 4)

        if LooseVersion(sys.version) >= LooseVersion('2.7'):
            assert tup2._fields == ('Index', '_1', '_2')

        df3 = DataFrame({'f' + str(i): [i] for i in range(1024)})
        # will raise SyntaxError if trying to create namedtuple
        tup3 = next(df3.itertuples())
        assert not hasattr(tup3, '_fields')
        assert isinstance(tup3, tuple)
Exemple #15
0
def test_tar_gz_to_different_filename():
    with tm.ensure_clean(filename=".foo") as file:
        pd.DataFrame(
            [["1", "2"]],
            columns=["foo", "bar"],
        ).to_csv(file, compression={"method": "tar", "mode": "w:gz"}, index=False)
        with gzip.open(file) as uncompressed:
            with tarfile.TarFile(fileobj=uncompressed) as archive:
                members = archive.getmembers()
                assert len(members) == 1
                content = archive.extractfile(members[0]).read().decode("utf8")

                if is_platform_windows():
                    expected = "foo,bar\r\n1,2\r\n"
                else:
                    expected = "foo,bar\n1,2\n"

                assert content == expected
Exemple #16
0
 def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
     if data.dtype.fill_value != 0:
         pass
     elif all_arithmetic_operators.strip("_") not in [
         "mul",
         "rmul",
         "floordiv",
         "rfloordiv",
         "pow",
         "mod",
         "rmod",
     ]:
         mark = pytest.mark.xfail(reason="result dtype.fill_value mismatch")
         request.node.add_marker(mark)
     elif is_platform_windows() or not IS64:
         mark = pytest.mark.xfail(reason="results are int32, expected int64")
         request.node.add_marker(mark)
     super().test_arith_frame_with_scalar(data, all_arithmetic_operators)
Exemple #17
0
    def test_pass_dtype_as_recarray(self):
        if compat.is_platform_windows() and self.low_memory:
            pytest.skip(
                "segfaults on win-64, only when all tests are run")

        data = """\
one,two
1,2.5
2,3.5
3,4.5
4,5.5"""

        with tm.assert_produces_warning(
                FutureWarning, check_stacklevel=False):
            result = self.read_csv(StringIO(data), dtype={
                'one': 'u1', 1: 'S1'}, as_recarray=True)
            self.assertEqual(result['one'].dtype, 'u1')
            self.assertEqual(result['two'].dtype, 'S1')
Exemple #18
0
    def test_pass_dtype_as_recarray(self):
        if compat.is_platform_windows() and self.low_memory:
            raise nose.SkipTest(
                "segfaults on win-64, only when all tests are run")

        data = """\
one,two
1,2.5
2,3.5
3,4.5
4,5.5"""

        with tm.assert_produces_warning(
                FutureWarning, check_stacklevel=False):
            result = self.read_csv(StringIO(data), dtype={
                'one': 'u1', 1: 'S1'}, as_recarray=True)
            self.assertEqual(result['one'].dtype, 'u1')
            self.assertEqual(result['two'].dtype, 'S1')
Exemple #19
0
def test_floating_array_disallows_float16(request):
    # GH#44715
    arr = np.array([1, 2], dtype=np.float16)
    mask = np.array([False, False])

    msg = "FloatingArray does not support np.float16 dtype"
    with pytest.raises(TypeError, match=msg):
        FloatingArray(arr, mask)

    if np_version_under1p19 or (locale.getlocale()[0] != "en_US"
                                and not is_platform_windows()):
        # the locale condition may need to be refined; this fails on
        #  the CI in the ZH_CN build
        mark = pytest.mark.xfail(
            reason="numpy does not raise on np.dtype('Float16')")
        request.node.add_marker(mark)

    with pytest.raises(TypeError, match="data type 'Float16' not understood"):
        pd.array([1.0, 2.0], dtype="Float16")
Exemple #20
0
    def test_numpy_array_equal_object_message(self):

        if is_platform_windows():
            raise nose.SkipTest("windows has incomparable line-endings "
                                "and uses L on the shape")

        a = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-01')])
        b = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')])

        expected = """numpy array are different

numpy array values are different \\(50\\.0 %\\)
\\[left\\]:  \\[2011-01-01 00:00:00, 2011-01-01 00:00:00\\]
\\[right\\]: \\[2011-01-01 00:00:00, 2011-01-02 00:00:00\\]"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(a, b)
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(a, b)
Exemple #21
0
def test_close_file_handle_on_invalid_usecols(all_parsers):
    # GH 45384
    parser = all_parsers

    error = ValueError
    if parser.engine == "pyarrow":
        pyarrow = pytest.importorskip("pyarrow")
        error = pyarrow.lib.ArrowKeyError
        if is_ci_environment() and (is_platform_windows() or is_platform_mac()):
            # GH#45547 causes timeouts on windows/mac builds
            pytest.skip("GH#45547 causing timeouts on windows/mac builds 2022-01-22")

    with tm.ensure_clean("test.csv") as fname:
        Path(fname).write_text("col1,col2\na,b\n1,2")
        with tm.assert_produces_warning(False):
            with pytest.raises(error, match="col3"):
                parser.read_csv(fname, usecols=["col1", "col2", "col3"])
        # unlink fails on windows if file handles still point to it
        os.unlink(fname)
Exemple #22
0
    def test_constructor_bad_file(self):
        non_file = StringIO('I am not a file')
        non_file.fileno = lambda: -1

        # the error raised is different on Windows
        if is_platform_windows():
            msg = "The parameter is incorrect"
            err = OSError
        else:
            msg = "[Errno 22]"
            err = mmap.error

        tm.assertRaisesRegexp(err, msg, common.MMapWrapper, non_file)

        target = open(self.mmap_file, 'r')
        target.close()

        msg = "I/O operation on closed file"
        tm.assertRaisesRegexp(ValueError, msg, common.MMapWrapper, target)
Exemple #23
0
    def test_constructor_compound_dtypes(self):
        # GH 5191
        # compound dtypes should raise not-implementederror

        def f(dtype):
            data = list(itertools.repeat((datetime(2001, 1, 1), "aa", 20), 9))
            return DataFrame(data=data, columns=["A", "B", "C"], dtype=dtype)

        pytest.raises(NotImplementedError, f, [("A", "datetime64[h]"),
                                               ("B", "str"), ("C", "int32")])

        # these work (though results may be unexpected)
        f('int64')
        f('float64')

        # 10822
        # invalid error message on dt inference
        if not compat.is_platform_windows():
            f('M8[ns]')
Exemple #24
0
    def test_compact_ints_as_recarray(self):
        if compat.is_platform_windows() and self.low_memory:
            raise nose.SkipTest(
                "segfaults on win-64, only when all tests are run")

        data = ('0,1,0,0\n'
                '1,1,0,0\n'
                '0,1,0,1')

        result = self.read_csv(StringIO(data), delimiter=',', header=None,
                               compact_ints=True, as_recarray=True)
        ex_dtype = np.dtype([(str(i), 'i1') for i in range(4)])
        self.assertEqual(result.dtype, ex_dtype)

        result = self.read_csv(StringIO(data), delimiter=',', header=None,
                               as_recarray=True, compact_ints=True,
                               use_unsigned=True)
        ex_dtype = np.dtype([(str(i), 'u1') for i in range(4)])
        self.assertEqual(result.dtype, ex_dtype)
Exemple #25
0
    def test_replace_series(self, how, to_key, from_key):
        if from_key == 'bool' and how == 'series' and compat.PY3:
            # doesn't work in PY3, though ...dict_from_bool works fine
            pytest.skip("doesn't work as in PY3")

        index = pd.Index([3, 4], name='xxx')
        obj = pd.Series(self.rep[from_key], index=index, name='yyy')
        assert obj.dtype == from_key

        if (from_key.startswith('datetime') and to_key.startswith('datetime')):
            # tested below
            return
        elif from_key in ['datetime64[ns, US/Eastern]', 'datetime64[ns, UTC]']:
            # tested below
            return

        if how == 'dict':
            replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
        elif how == 'series':
            replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
        else:
            raise ValueError

        result = obj.replace(replacer)

        if ((from_key == 'float64' and to_key in ('int64')) or
            (from_key == 'complex128' and to_key in ('int64', 'float64'))):

            if compat.is_platform_32bit() or compat.is_platform_windows():
                pytest.skip("32-bit platform buggy: {0} -> {1}".format(
                    from_key, to_key))

            # Expected: do not downcast by replacement
            exp = pd.Series(self.rep[to_key],
                            index=index,
                            name='yyy',
                            dtype=from_key)

        else:
            exp = pd.Series(self.rep[to_key], index=index, name='yyy')
            assert exp.dtype == to_key

        tm.assert_series_equal(result, exp)
Exemple #26
0
    def test_replace_series(self, how, to_key, from_key):
        if from_key == "bool" and how == "series":
            # doesn't work in PY3, though ...dict_from_bool works fine
            pytest.skip("doesn't work as in PY3")

        index = pd.Index([3, 4], name="xxx")
        obj = pd.Series(self.rep[from_key], index=index, name="yyy")
        assert obj.dtype == from_key

        if from_key.startswith("datetime") and to_key.startswith("datetime"):
            # tested below
            return
        elif from_key in ["datetime64[ns, US/Eastern]", "datetime64[ns, UTC]"]:
            # tested below
            return

        if how == "dict":
            replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
        elif how == "series":
            replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
        else:
            raise ValueError

        result = obj.replace(replacer)

        if (from_key == "float64" and to_key in ("int64")) or (
                from_key == "complex128" and to_key in ("int64", "float64")):

            if compat.is_platform_32bit() or compat.is_platform_windows():
                pytest.skip("32-bit platform buggy: {0} -> {1}".format(
                    from_key, to_key))

            # Expected: do not downcast by replacement
            exp = pd.Series(self.rep[to_key],
                            index=index,
                            name="yyy",
                            dtype=from_key)

        else:
            exp = pd.Series(self.rep[to_key], index=index, name="yyy")
            assert exp.dtype == to_key

        tm.assert_series_equal(result, exp)
Exemple #27
0
    def test_itertuples(self):
        for i, tup in enumerate(self.frame.itertuples()):
            s = self.klass._constructor_sliced(tup[1:])
            s.name = tup[0]
            expected = self.frame.iloc[i, :].reset_index(drop=True)
            self._assert_series_equal(s, expected)

        df = self.klass({'floats': np.random.randn(5),
                         'ints': lrange(5)}, columns=['floats', 'ints'])

        for tup in df.itertuples(index=False):
            assert isinstance(tup[1], (int, long))

        df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]})
        dfaa = df[['a', 'a']]

        assert (list(dfaa.itertuples()) ==
                [(0, 1, 1), (1, 2, 2), (2, 3, 3)])

        # repr with be int/long on 32-bit/windows
        if not (compat.is_platform_windows() or compat.is_platform_32bit()):
            assert (repr(list(df.itertuples(name=None))) ==
                    '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]')

        tup = next(df.itertuples(name='TestName'))

        if sys.version >= LooseVersion('2.7'):
            assert tup._fields == ('Index', 'a', 'b')
            assert (tup.Index, tup.a, tup.b) == tup
            assert type(tup).__name__ == 'TestName'

        df.columns = ['def', 'return']
        tup2 = next(df.itertuples(name='TestName'))
        assert tup2 == (0, 1, 4)

        if sys.version >= LooseVersion('2.7'):
            assert tup2._fields == ('Index', '_1', '_2')

        df3 = DataFrame({'f' + str(i): [i] for i in range(1024)})
        # will raise SyntaxError if trying to create namedtuple
        tup3 = next(df3.itertuples())
        assert not hasattr(tup3, '_fields')
        assert isinstance(tup3, tuple)
Exemple #28
0
    def test_constructor_bad_file(self):
        non_file = StringIO('I am not a file')
        non_file.fileno = lambda: -1

        # the error raised is different on Windows
        if is_platform_windows():
            msg = "The parameter is incorrect"
            err = OSError
        else:
            msg = "Invalid argument"
            err = mmap.error

        tm.assertRaisesRegexp(err, msg, common.MMapWrapper, non_file)

        target = open(self.mmap_file, 'r')
        target.close()

        msg = "I/O operation on closed file"
        tm.assertRaisesRegexp(ValueError, msg, common.MMapWrapper, target)
Exemple #29
0
    def test_numpy_array_equal_object_message(self):

        if is_platform_windows():
            pytest.skip("windows has incomparable line-endings "
                        "and uses L on the shape")

        a = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-01')])
        b = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')])

        expected = """numpy array are different

numpy array values are different \\(50\\.0 %\\)
\\[left\\]:  \\[2011-01-01 00:00:00, 2011-01-01 00:00:00\\]
\\[right\\]: \\[2011-01-01 00:00:00, 2011-01-02 00:00:00\\]"""

        with tm.assert_raises_regex(AssertionError, expected):
            assert_numpy_array_equal(a, b)
        with tm.assert_raises_regex(AssertionError, expected):
            assert_almost_equal(a, b)
Exemple #30
0
    def test_replace_series(self, how, to_key, from_key):
        if from_key == 'bool' and how == 'series' and compat.PY3:
            # doesn't work in PY3, though ...dict_from_bool works fine
            pytest.skip("doesn't work as in PY3")

        index = pd.Index([3, 4], name='xxx')
        obj = pd.Series(self.rep[from_key], index=index, name='yyy')
        assert obj.dtype == from_key

        if (from_key.startswith('datetime') and to_key.startswith('datetime')):
            # tested below
            return
        elif from_key in ['datetime64[ns, US/Eastern]', 'datetime64[ns, UTC]']:
            # tested below
            return

        if how == 'dict':
            replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
        elif how == 'series':
            replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
        else:
            raise ValueError

        result = obj.replace(replacer)

        if ((from_key == 'float64' and to_key in ('int64')) or
            (from_key == 'complex128' and
             to_key in ('int64', 'float64'))):

            if compat.is_platform_32bit() or compat.is_platform_windows():
                pytest.skip("32-bit platform buggy: {0} -> {1}".format
                            (from_key, to_key))

            # Expected: do not downcast by replacement
            exp = pd.Series(self.rep[to_key], index=index,
                            name='yyy', dtype=from_key)

        else:
            exp = pd.Series(self.rep[to_key], index=index, name='yyy')
            assert exp.dtype == to_key

        tm.assert_series_equal(result, exp)
Exemple #31
0
    def test_itertuples(self, float_frame):
        for i, tup in enumerate(float_frame.itertuples()):
            s = self.klass._constructor_sliced(tup[1:])
            s.name = tup[0]
            expected = float_frame.iloc[i, :].reset_index(drop=True)
            self._assert_series_equal(s, expected)

        df = self.klass({
            "floats": np.random.randn(5),
            "ints": range(5)
        },
                        columns=["floats", "ints"])

        for tup in df.itertuples(index=False):
            assert isinstance(tup[1], int)

        df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]})
        dfaa = df[["a", "a"]]

        assert list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)]

        # repr with int on 32-bit/windows
        if not (compat.is_platform_windows() or compat.is_platform_32bit()):
            assert (repr(list(df.itertuples(
                name=None))) == "[(0, 1, 4), (1, 2, 5), (2, 3, 6)]")

        tup = next(df.itertuples(name="TestName"))
        assert tup._fields == ("Index", "a", "b")
        assert (tup.Index, tup.a, tup.b) == tup
        assert type(tup).__name__ == "TestName"

        df.columns = ["def", "return"]
        tup2 = next(df.itertuples(name="TestName"))
        assert tup2 == (0, 1, 4)
        assert tup2._fields == ("Index", "_1", "_2")

        df3 = DataFrame({"f" + str(i): [i] for i in range(1024)})
        # will raise SyntaxError if trying to create namedtuple
        tup3 = next(df3.itertuples())
        assert not hasattr(tup3, "_fields")
        assert isinstance(tup3, tuple)
Exemple #32
0
    def test_constructor_bad_file(self, mmap_file):
        non_file = StringIO('I am not a file')
        non_file.fileno = lambda: -1

        # the error raised is different on Windows
        if is_platform_windows():
            msg = "The parameter is incorrect"
            err = OSError
        else:
            msg = "[Errno 22]"
            err = mmap.error

        with pytest.raises(err, match=msg):
            icom.MMapWrapper(non_file)

        target = open(mmap_file, 'r')
        target.close()

        msg = "I/O operation on closed file"
        with pytest.raises(ValueError, match=msg):
            icom.MMapWrapper(target)
Exemple #33
0
    def test_numpy_string_dtype_as_recarray(self):
        data = """\
a,1
aa,2
aaa,3
aaaa,4
aaaaa,5"""

        if compat.is_platform_windows():
            raise nose.SkipTest("segfaults on win-64, only when all tests are run")

        def _make_reader(**kwds):
            return TextReader(StringIO(data), delimiter=',', header=None,
                              **kwds)

        reader = _make_reader(dtype='S4', as_recarray=True)
        result = reader.read()
        self.assertEqual(result['0'].dtype, 'S4')
        ex_values = np.array(['a', 'aa', 'aaa', 'aaaa', 'aaaa'], dtype='S4')
        self.assertTrue((result['0'] == ex_values).all())
        self.assertEqual(result['1'].dtype, 'S4')
Exemple #34
0
    def test_constructor_bad_file(self, mmap_file):
        non_file = StringIO("I am not a file")
        non_file.fileno = lambda: -1

        # the error raised is different on Windows
        if is_platform_windows():
            msg = "The parameter is incorrect"
            err = OSError
        else:
            msg = "[Errno 22]"
            err = mmap.error

        with pytest.raises(err, match=msg):
            icom._MMapWrapper(non_file)

        target = open(mmap_file)
        target.close()

        msg = "I/O operation on closed file"
        with pytest.raises(ValueError, match=msg):
            icom._MMapWrapper(target)
Exemple #35
0
def test_maybe_promote_datetimetz_with_datetimetz(tz_aware_fixture,
                                                  tz_aware_fixture2):
    dtype = DatetimeTZDtype(tz=tz_aware_fixture)
    fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture2)

    from dateutil.tz import tzlocal

    if is_platform_windows() and tz_aware_fixture2 == tzlocal():
        pytest.xfail("Cannot process fill_value with this dtype, see GH 24310")

    # create array of given dtype; casts "1" to correct dtype
    fill_value = pd.Series([10**9], dtype=fill_dtype)[0]

    # filling datetimetz with datetimetz casts to object, unless tz matches
    exp_val_for_scalar = fill_value
    if dtype.tz == fill_dtype.tz:
        expected_dtype = dtype
    else:
        expected_dtype = np.dtype(object)
        pytest.xfail("fails to cast to object")

    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
    def test_replace_series(self, how, to_key, from_key):
        index = pd.Index([3, 4], name="xxx")
        obj = pd.Series(self.rep[from_key], index=index, name="yyy")
        assert obj.dtype == from_key

        if from_key.startswith("datetime") and to_key.startswith("datetime"):
            # tested below
            return
        elif from_key in ["datetime64[ns, US/Eastern]", "datetime64[ns, UTC]"]:
            # tested below
            return

        if how == "dict":
            replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
        elif how == "series":
            replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
        else:
            raise ValueError

        result = obj.replace(replacer)

        if (from_key == "float64" and to_key in ("int64")) or (
                from_key == "complex128" and to_key in ("int64", "float64")):

            if not IS64 or is_platform_windows():
                pytest.skip(f"32-bit platform buggy: {from_key} -> {to_key}")

            # Expected: do not downcast by replacement
            exp = pd.Series(self.rep[to_key],
                            index=index,
                            name="yyy",
                            dtype=from_key)

        else:
            exp = pd.Series(self.rep[to_key], index=index, name="yyy")
            assert exp.dtype == to_key

        tm.assert_series_equal(result, exp)
Exemple #37
0
    def test_encode(self, f):
        _, encoding = os.path.splitext(os.path.basename(f))[0].split('_')

        try:
            with open(f, 'rb') as fobj:
                from_string = self.read_html(fobj.read(), encoding=encoding,
                                             index_col=0).pop()

            with open(f, 'rb') as fobj:
                from_file_like = self.read_html(BytesIO(fobj.read()),
                                                encoding=encoding,
                                                index_col=0).pop()

            from_filename = self.read_html(f, encoding=encoding,
                                           index_col=0).pop()
            tm.assert_frame_equal(from_string, from_file_like)
            tm.assert_frame_equal(from_string, from_filename)
        except Exception:
            # seems utf-16/32 fail on windows
            if is_platform_windows():
                if '16' in encoding or '32' in encoding:
                    pytest.skip()
                raise
Exemple #38
0
    def test_compact_ints_as_recarray(self):
        if compat.is_platform_windows():
            raise nose.SkipTest(
                "segfaults on win-64, only when all tests are run")

        data = ('0,1,0,0\n'
                '1,1,0,0\n'
                '0,1,0,1')

        with tm.assert_produces_warning(
                FutureWarning, check_stacklevel=False):
            result = self.read_csv(StringIO(data), delimiter=',', header=None,
                                   compact_ints=True, as_recarray=True)
            ex_dtype = np.dtype([(str(i), 'i1') for i in range(4)])
            self.assertEqual(result.dtype, ex_dtype)

        with tm.assert_produces_warning(
                FutureWarning, check_stacklevel=False):
            result = self.read_csv(StringIO(data), delimiter=',', header=None,
                                   as_recarray=True, compact_ints=True,
                                   use_unsigned=True)
            ex_dtype = np.dtype([(str(i), 'u1') for i in range(4)])
            self.assertEqual(result.dtype, ex_dtype)
    def test_constructor_compound_dtypes(self):
        # GH 5191
        # compound dtypes should raise not-implementederror

        def f(dtype):
            data = list(itertools.repeat((datetime(2001, 1, 1), "aa", 20), 9))
            return DataFrame(data=data, columns=["A", "B", "C"], dtype=dtype)

        msg = "compound dtypes are not implemented in the DataFrame constructor"
        with pytest.raises(NotImplementedError, match=msg):
            f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")])

        # these work (though results may be unexpected)
        depr_msg = "either all columns will be cast to that dtype, or a TypeError will"
        with tm.assert_produces_warning(FutureWarning, match=depr_msg):
            f("int64")
        with tm.assert_produces_warning(FutureWarning, match=depr_msg):
            f("float64")

        # 10822
        # invalid error message on dt inference
        if not compat.is_platform_windows():
            f("M8[ns]")
Exemple #40
0
def test_maybe_promote_datetimetz_with_datetimetz(tz_aware_fixture,
                                                  tz_aware_fixture2, box):
    dtype = DatetimeTZDtype(tz=tz_aware_fixture)
    fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture2)
    boxed, box_dtype = box  # read from parametrized fixture

    from dateutil.tz import tzlocal

    if is_platform_windows() and tz_aware_fixture2 == tzlocal():
        pytest.xfail("Cannot process fill_value with this dtype, see GH 24310")
    if dtype.tz == fill_dtype.tz and boxed:
        pytest.xfail("falsely upcasts")
    if dtype.tz != fill_dtype.tz and not boxed:
        pytest.xfail("falsely upcasts")

    # create array of given dtype; casts "1" to correct dtype
    fill_value = pd.Series([10**9], dtype=fill_dtype)[0]

    # filling datetimetz with datetimetz casts to object, unless tz matches
    exp_val_for_scalar = fill_value
    if dtype.tz == fill_dtype.tz:
        expected_dtype = dtype
        exp_val_for_array = NaT
    else:
        expected_dtype = np.dtype(object)
        exp_val_for_array = np.nan

    _check_promote(
        dtype,
        fill_value,
        boxed,
        box_dtype,
        expected_dtype,
        exp_val_for_scalar,
        exp_val_for_array,
    )
    def test_numpy_string_dtype_as_recarray(self):
        data = """\
a,1
aa,2
aaa,3
aaaa,4
aaaaa,5"""

        if compat.is_platform_windows():
            raise nose.SkipTest(
                "segfaults on win-64, only when all tests are run")

        def _make_reader(**kwds):
            return TextReader(StringIO(data),
                              delimiter=',',
                              header=None,
                              **kwds)

        reader = _make_reader(dtype='S4', as_recarray=True)
        result = reader.read()
        self.assertEqual(result['0'].dtype, 'S4')
        ex_values = np.array(['a', 'aa', 'aaa', 'aaaa', 'aaaa'], dtype='S4')
        self.assertTrue((result['0'] == ex_values).all())
        self.assertEqual(result['1'].dtype, 'S4')
def test_intersect():
    def _check_correct(a, b, expected):
        result = a.intersect(b)
        assert (result.equals(expected))

    def _check_length_exc(a, longer):
        nose.tools.assert_raises(Exception, a.intersect, longer)

    def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
        xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
        yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
        expected = BlockIndex(TEST_LENGTH, eloc, elen)
        longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen)

        _check_correct(xindex, yindex, expected)
        _check_correct(xindex.to_int_index(), yindex.to_int_index(),
                       expected.to_int_index())

        _check_length_exc(xindex, longer_index)
        _check_length_exc(xindex.to_int_index(), longer_index.to_int_index())

    if compat.is_platform_windows():
        raise nose.SkipTest("segfaults on win-64 when all tests are run")
    check_cases(_check_case)
Exemple #43
0
def _skip_if_no_mpl():
    mod = safe_import("matplotlib")
    if mod:
        mod.use("Agg", warn=False)
    else:
        return True


def _skip_if_mpl_1_5():
    mod = safe_import("matplotlib")

    if mod:
        v = mod.__version__
        if LooseVersion(v) > LooseVersion('1.4.3') or str(v)[0] == '0':
            return True
        else:
            mod.use("Agg", warn=False)


skip_if_no_mpl = pytest.mark.skipif(_skip_if_no_mpl(),
                                    reason="Missing matplotlib dependency")
skip_if_mpl_1_5 = pytest.mark.skipif(_skip_if_mpl_1_5(),
                                     reason="matplotlib 1.5")
skip_if_32bit = pytest.mark.skipif(is_platform_32bit(),
                                   reason="skipping for 32 bit")
skip_if_windows = pytest.mark.skipif(is_platform_windows(),
                                     reason="Running on Windows")
skip_if_windows_python_3 = pytest.mark.skipif(is_platform_windows() and PY3,
                                              reason=("not used on python3/"
                                                      "win32"))
Exemple #44
0
import locale
import os

import pytest

from pandas._config.localization import can_set_locale, get_locales, set_locale

from pandas.compat import is_platform_windows

import pandas as pd

_all_locales = get_locales() or []
_current_locale = locale.getlocale()

# Don't run any of these tests if we are on Windows or have no locales.
pytestmark = pytest.mark.skipif(is_platform_windows() or not _all_locales,
                                reason="Need non-Windows and locales")

_skip_if_only_one_locale = pytest.mark.skipif(
    len(_all_locales) <= 1, reason="Need multiple locales for meaningful test")


def test_can_set_locale_valid_set():
    # Can set the default locale.
    assert can_set_locale("")


def test_can_set_locale_invalid_set():
    # Cannot set an invalid locale.
    assert not can_set_locale("non-existent_locale")
Exemple #45
0
def pa():
    if not _HAVE_PYARROW:
        pytest.skip("pyarrow is not installed")
    if is_platform_windows():
        pytest.skip("pyarrow-parquet not building on windows")
    return 'pyarrow'
Exemple #46
0
    result = frequencies.infer_freq(vals)
    assert result == rng.inferred_freq


@pytest.mark.parametrize("idx", [
    tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)
])
def test_invalid_index_types(idx):
    msg = ("(cannot infer freq from a non-convertible)|"
           "(Check the `freq` attribute instead of using infer_freq)")

    with pytest.raises(TypeError, match=msg):
        frequencies.infer_freq(idx)


@pytest.mark.skipif(is_platform_windows(),
                    reason="see gh-10822: Windows issue")
@pytest.mark.parametrize("idx", [tm.makeStringIndex(10),
                                 tm.makeUnicodeIndex(10)])
def test_invalid_index_types_unicode(idx):
    # see gh-10822
    #
    # Odd error message on conversions to datetime for unicode.
    msg = "Unknown string format"

    with pytest.raises(ValueError, match=msg):
        frequencies.infer_freq(idx)


def test_string_datetime_like_compat():
    # see gh-6463
Exemple #47
0
# -*- coding: utf-8 -*-
from __future__ import print_function

import numpy as np
import pandas as pd

import pandas.util.testing as tm
from pandas.compat import (is_platform_windows,
                           is_platform_32bit)
from pandas.core.config import option_context


use_32bit_repr = is_platform_windows() or is_platform_32bit()


class TestSparseSeriesFormatting(tm.TestCase):

    @property
    def dtype_format_for_platform(self):
        return '' if use_32bit_repr else ', dtype=int32'

    def test_sparse_max_row(self):
        s = pd.Series([1, np.nan, np.nan, 3, np.nan]).to_sparse()
        result = repr(s)
        dfm = self.dtype_format_for_platform
        exp = ("0    1.0\n1    NaN\n2    NaN\n3    3.0\n"
               "4    NaN\ndtype: float64\nBlockIndex\n"
               "Block locations: array([0, 3]{0})\n"
               "Block lengths: array([1, 1]{0})".format(dfm))
        self.assertEqual(result, exp)
Exemple #48
0
from pandas.compat import is_platform_windows, lrange
from pandas.compat.numpy import np_array_datetime64_compat

import pandas as pd
from pandas import DataFrame, DatetimeIndex, Index, MultiIndex
from pandas.core.indexes.datetimes import date_range
import pandas.util.testing as tm

import pandas.io.date_converters as conv
import pandas.io.parsers as parsers

# constant
_DEFAULT_DATETIME = datetime(1, 1, 1)

# Strategy for hypothesis
if is_platform_windows():
    date_strategy = st.datetimes(min_value=datetime(1900, 1, 1))
else:
    date_strategy = st.datetimes()


def test_separator_date_conflict(all_parsers):
    # Regression test for gh-4678
    #
    # Make sure thousands separator and
    # date parsing do not conflict.
    parser = all_parsers
    data = "06-02-2013;13:00;1-000.215"
    expected = DataFrame([[datetime(2013, 6, 2, 13, 0, 0), 1000.215]],
                         columns=["Date", 2])
Exemple #49
0
from pandas._libs.tslibs.parsing import parse_datetime_string
from pandas.compat import is_platform_windows
from pandas.compat.numpy import np_array_datetime64_compat

import pandas as pd
from pandas import DataFrame, DatetimeIndex, Index, MultiIndex, Series
import pandas._testing as tm
from pandas.core.indexes.datetimes import date_range

import pandas.io.date_converters as conv

# constant
_DEFAULT_DATETIME = datetime(1, 1, 1)

# Strategy for hypothesis
if is_platform_windows():
    date_strategy = st.datetimes(min_value=datetime(1900, 1, 1))
else:
    date_strategy = st.datetimes()


def test_separator_date_conflict(all_parsers):
    # Regression test for gh-4678
    #
    # Make sure thousands separator and
    # date parsing do not conflict.
    parser = all_parsers
    data = "06-02-2013;13:00;1-000.215"
    expected = DataFrame([[datetime(2013, 6, 2, 13, 0, 0), 1000.215]],
                         columns=["Date", 2])
Exemple #50
0

@pytest.mark.parametrize(
    "idx",
    [tm.makeIntIndex(10),
     tm.makeFloatIndex(10),
     tm.makePeriodIndex(10)])
def test_invalid_index_types(idx):
    msg = ("(cannot infer freq from a non-convertible)|"
           "(Check the `freq` attribute instead of using infer_freq)")

    with pytest.raises(TypeError, match=msg):
        frequencies.infer_freq(idx)


@pytest.mark.skipif(is_platform_windows(),
                    reason="see gh-10822: Windows issue")
@pytest.mark.parametrize(
    "idx",
    [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)])
def test_invalid_index_types_unicode(idx):
    # see gh-10822
    #
    # Odd error message on conversions to datetime for unicode.
    msg = "Unknown string format"

    with pytest.raises(ValueError, match=msg):
        frequencies.infer_freq(idx)


def test_string_datetime_like_compat():
Exemple #51
0
        assert isinstance(get_engine('auto'), PyArrowImpl)
        assert isinstance(get_engine('pyarrow'), PyArrowImpl)
        assert isinstance(get_engine('fastparquet'), FastParquetImpl)

    with pd.option_context('io.parquet.engine', 'fastparquet'):
        assert isinstance(get_engine('auto'), FastParquetImpl)
        assert isinstance(get_engine('pyarrow'), PyArrowImpl)
        assert isinstance(get_engine('fastparquet'), FastParquetImpl)

    with pd.option_context('io.parquet.engine', 'auto'):
        assert isinstance(get_engine('auto'), PyArrowImpl)
        assert isinstance(get_engine('pyarrow'), PyArrowImpl)
        assert isinstance(get_engine('fastparquet'), FastParquetImpl)


@pytest.mark.xfail(is_platform_windows() or is_platform_mac(),
                   reason="reading pa metadata failing on Windows/mac",
                   strict=True)
def test_cross_engine_pa_fp(df_cross_compat, pa, fp):
    # cross-compat with differing reading/writing engines

    df = df_cross_compat
    with tm.ensure_clean() as path:
        df.to_parquet(path, engine=pa, compression=None)

        result = read_parquet(path, engine=fp)
        tm.assert_frame_equal(result, df)

        result = read_parquet(path, engine=fp, columns=['a', 'd'])
        tm.assert_frame_equal(result, df[['a', 'd']])
Exemple #52
0
        assert isinstance(get_engine('auto'), PyArrowImpl)
        assert isinstance(get_engine('pyarrow'), PyArrowImpl)
        assert isinstance(get_engine('fastparquet'), FastParquetImpl)

    with pd.option_context('io.parquet.engine', 'fastparquet'):
        assert isinstance(get_engine('auto'), FastParquetImpl)
        assert isinstance(get_engine('pyarrow'), PyArrowImpl)
        assert isinstance(get_engine('fastparquet'), FastParquetImpl)

    with pd.option_context('io.parquet.engine', 'auto'):
        assert isinstance(get_engine('auto'), PyArrowImpl)
        assert isinstance(get_engine('pyarrow'), PyArrowImpl)
        assert isinstance(get_engine('fastparquet'), FastParquetImpl)


@pytest.mark.xfail(is_platform_windows() or is_platform_mac(),
                   reason="reading pa metadata failing on Windows/mac")
def test_cross_engine_pa_fp(df_cross_compat, pa, fp):
    # cross-compat with differing reading/writing engines

    df = df_cross_compat
    with tm.ensure_clean() as path:
        df.to_parquet(path, engine=pa, compression=None)

        result = read_parquet(path, engine=fp)
        tm.assert_frame_equal(result, df)

        result = read_parquet(path, engine=fp, columns=['a', 'd'])
        tm.assert_frame_equal(result, df[['a', 'd']])

Exemple #53
0
    def test_numpy_array_equal_message(self):

        if is_platform_windows():
            raise nose.SkipTest("windows has incomparable line-endings "
                                "and uses L on the shape")

        expected = """numpy array are different

numpy array shapes are different
\\[left\\]:  \\(2,\\)
\\[right\\]: \\(3,\\)"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]))

        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]))

        # scalar comparison
        expected = """Expected type """
        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(1, 2)
        expected = """expected 2\\.00000 but got 1\\.00000, with decimal 5"""
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(1, 2)

        # array / scalar array comparison
        expected = """numpy array are different

numpy array classes are different
\\[left\\]:  ndarray
\\[right\\]: int"""

        with assertRaisesRegexp(AssertionError, expected):
            # numpy_array_equal only accepts np.ndarray
            assert_numpy_array_equal(np.array([1]), 1)
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([1]), 1)

        # scalar / array comparison
        expected = """numpy array are different

numpy array classes are different
\\[left\\]:  int
\\[right\\]: ndarray"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(1, np.array([1]))
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(1, np.array([1]))

        expected = """numpy array are different

numpy array values are different \\(66\\.66667 %\\)
\\[left\\]:  \\[nan, 2\\.0, 3\\.0\\]
\\[right\\]: \\[1\\.0, nan, 3\\.0\\]"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(np.array([np.nan, 2, 3]),
                                     np.array([1, np.nan, 3]))
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([np.nan, 2, 3]),
                                np.array([1, np.nan, 3]))

        expected = """numpy array are different

numpy array values are different \\(50\\.0 %\\)
\\[left\\]:  \\[1, 2\\]
\\[right\\]: \\[1, 3\\]"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3]))
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([1, 2]), np.array([1, 3]))

        expected = """numpy array are different

numpy array values are different \\(50\\.0 %\\)
\\[left\\]:  \\[1\\.1, 2\\.000001\\]
\\[right\\]: \\[1\\.1, 2.0\\]"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(
                np.array([1.1, 2.000001]), np.array([1.1, 2.0]))

        # must pass
        assert_almost_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0]))

        expected = """numpy array are different

numpy array values are different \\(16\\.66667 %\\)
\\[left\\]:  \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\]
\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]),
                                     np.array([[1, 3], [3, 4], [5, 6]]))
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([[1, 2], [3, 4], [5, 6]]),
                                np.array([[1, 3], [3, 4], [5, 6]]))

        expected = """numpy array are different

numpy array values are different \\(25\\.0 %\\)
\\[left\\]:  \\[\\[1, 2\\], \\[3, 4\\]\\]
\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(np.array([[1, 2], [3, 4]]),
                                     np.array([[1, 3], [3, 4]]))
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([[1, 2], [3, 4]]),
                                np.array([[1, 3], [3, 4]]))

        # allow to overwrite message
        expected = """Index are different

Index shapes are different
\\[left\\]:  \\(2,\\)
\\[right\\]: \\(3,\\)"""

        with assertRaisesRegexp(AssertionError, expected):
            assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]),
                                     obj='Index')
        with assertRaisesRegexp(AssertionError, expected):
            assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]),
                                obj='Index')
Exemple #54
0
            msg += " satisfying a min_version of {}".format(min_version)
        return pytest.mark.skipif(
            not safe_import(package, min_version=min_version), reason=msg
        )(func)
    return decorated_func


skip_if_no_mpl = pytest.mark.skipif(_skip_if_no_mpl(),
                                    reason="Missing matplotlib dependency")
skip_if_mpl_1_5 = pytest.mark.skipif(_skip_if_mpl_1_5(),
                                     reason="matplotlib 1.5")
xfail_if_mpl_2_2 = pytest.mark.xfail(_skip_if_mpl_2_2(),
                                     reason="matplotlib 2.2")
skip_if_32bit = pytest.mark.skipif(is_platform_32bit(),
                                   reason="skipping for 32 bit")
skip_if_windows = pytest.mark.skipif(is_platform_windows(),
                                     reason="Running on Windows")
skip_if_windows_python_3 = pytest.mark.skipif(is_platform_windows() and PY3,
                                              reason=("not used on python3/"
                                                      "win32"))
skip_if_has_locale = pytest.mark.skipif(_skip_if_has_locale(),
                                        reason="Specific locale is set {lang}"
                                        .format(lang=locale.getlocale()[0]))
skip_if_not_us_locale = pytest.mark.skipif(_skip_if_not_us_locale(),
                                           reason="Specific locale is set "
                                           "{lang}".format(
                                               lang=locale.getlocale()[0]))
skip_if_no_scipy = pytest.mark.skipif(_skip_if_no_scipy(),
                                      reason="Missing SciPy requirement")
skip_if_no_lzma = pytest.mark.skipif(_skip_if_no_lzma(),
                                     reason="need backports.lzma to run")
Exemple #55
0
        parametrization mark.
    """
    msg = f"Could not import '{package}'"
    if min_version:
        msg += f" satisfying a min_version of {min_version}"
    return pytest.mark.skipif(
        not safe_import(package, min_version=min_version), reason=msg
    )


skip_if_no_mpl = pytest.mark.skipif(
    _skip_if_no_mpl(), reason="Missing matplotlib dependency"
)
skip_if_mpl = pytest.mark.skipif(not _skip_if_no_mpl(), reason="matplotlib is present")
skip_if_32bit = pytest.mark.skipif(not IS64, reason="skipping for 32 bit")
skip_if_windows = pytest.mark.skipif(is_platform_windows(), reason="Running on Windows")
skip_if_not_us_locale = pytest.mark.skipif(
    _skip_if_not_us_locale(), reason=f"Specific locale is set {locale.getlocale()[0]}"
)
skip_if_no_scipy = pytest.mark.skipif(
    _skip_if_no_scipy(), reason="Missing SciPy requirement"
)
skip_if_no_ne = pytest.mark.skipif(
    not USE_NUMEXPR,
    reason=f"numexpr enabled->{USE_NUMEXPR}, installed->{NUMEXPR_INSTALLED}",
)


# TODO(pytest#7469): return type, _pytest.mark.structures.MarkDecorator is not public
# https://github.com/pytest-dev/pytest/issues/7469
def skip_if_np_lt(ver_str: str, *args, reason: str | None = None):
Exemple #56
0
class TestParquetPyArrow(Base):
    def test_basic(self, pa, df_full):

        df = df_full

        # additional supported types for pyarrow
        dti = pd.date_range("20130101", periods=3, tz="Europe/Brussels")
        dti = dti._with_freq(None)  # freq doesn't round-trip
        df["datetime_tz"] = dti
        df["bool_with_none"] = [True, None, True]

        check_round_trip(df, pa)

    def test_basic_subset_columns(self, pa, df_full):
        # GH18628

        df = df_full
        # additional supported types for pyarrow
        df["datetime_tz"] = pd.date_range("20130101",
                                          periods=3,
                                          tz="Europe/Brussels")

        check_round_trip(
            df,
            pa,
            expected=df[["string", "int"]],
            read_kwargs={"columns": ["string", "int"]},
        )

    def test_to_bytes_without_path_or_buf_provided(self, pa, df_full):
        # GH 37105

        buf_bytes = df_full.to_parquet(engine=pa)
        assert isinstance(buf_bytes, bytes)

        buf_stream = BytesIO(buf_bytes)
        res = read_parquet(buf_stream)

        tm.assert_frame_equal(df_full, res)

    def test_duplicate_columns(self, pa):
        # not currently able to handle duplicate columns
        df = pd.DataFrame(np.arange(12).reshape(4, 3),
                          columns=list("aaa")).copy()
        self.check_error_on_write(df, pa, ValueError,
                                  "Duplicate column names found")

    def test_unsupported(self, pa):
        # timedelta
        df = pd.DataFrame({"a": pd.timedelta_range("1 day", periods=3)})
        self.check_external_error_on_write(df, pa, NotImplementedError)

        # mixed python objects
        df = pd.DataFrame({"a": ["a", 1, 2.0]})
        # pyarrow 0.11 raises ArrowTypeError
        # older pyarrows raise ArrowInvalid
        self.check_external_error_on_write(df, pa, pyarrow.ArrowException)

    def test_categorical(self, pa):

        # supported in >= 0.7.0
        df = pd.DataFrame()
        df["a"] = pd.Categorical(list("abcdef"))

        # test for null, out-of-order values, and unobserved category
        df["b"] = pd.Categorical(
            ["bar", "foo", "foo", "bar", None, "bar"],
            dtype=pd.CategoricalDtype(["foo", "bar", "baz"]),
        )

        # test for ordered flag
        df["c"] = pd.Categorical(["a", "b", "c", "a", "c", "b"],
                                 categories=["b", "c", "d"],
                                 ordered=True)

        check_round_trip(df, pa)

    @pytest.mark.xfail(
        is_platform_windows(),
        reason="localhost connection rejected",
        strict=False,
    )
    def test_s3_roundtrip_explicit_fs(self, df_compat, s3_resource, pa, s3so):
        s3fs = pytest.importorskip("s3fs")
        s3 = s3fs.S3FileSystem(**s3so)
        kw = {"filesystem": s3}
        check_round_trip(
            df_compat,
            pa,
            path="pandas-test/pyarrow.parquet",
            read_kwargs=kw,
            write_kwargs=kw,
        )

    def test_s3_roundtrip(self, df_compat, s3_resource, pa, s3so):
        # GH #19134
        s3so = {"storage_options": s3so}
        check_round_trip(
            df_compat,
            pa,
            path="s3://pandas-test/pyarrow.parquet",
            read_kwargs=s3so,
            write_kwargs=s3so,
        )

    @td.skip_if_no("s3fs")  # also requires flask
    @pytest.mark.parametrize(
        "partition_col",
        [
            ["A"],
            [],
        ],
    )
    def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa,
                                  partition_col, s3so):
        # GH #26388
        expected_df = df_compat.copy()

        # GH #35791
        # read_table uses the new Arrow Datasets API since pyarrow 1.0.0
        # Previous behaviour was pyarrow partitioned columns become 'category' dtypes
        # These are added to back of dataframe on read. In new API category dtype is
        # only used if partition field is string, but this changed again to use
        # category dtype for all types (not only strings) in pyarrow 2.0.0
        if partition_col:
            partition_col_type = ("int32" if (not pa_version_under1p0)
                                  and pa_version_under2p0 else "category")

            expected_df[partition_col] = expected_df[partition_col].astype(
                partition_col_type)

        check_round_trip(
            df_compat,
            pa,
            expected=expected_df,
            path="s3://pandas-test/parquet_dir",
            read_kwargs={"storage_options": s3so},
            write_kwargs={
                "partition_cols": partition_col,
                "compression": None,
                "storage_options": s3so,
            },
            check_like=True,
            repeat=1,
        )

    @td.skip_if_no("pyarrow")
    def test_read_file_like_obj_support(self, df_compat):
        buffer = BytesIO()
        df_compat.to_parquet(buffer)
        df_from_buf = read_parquet(buffer)
        tm.assert_frame_equal(df_compat, df_from_buf)

    @td.skip_if_no("pyarrow")
    def test_expand_user(self, df_compat, monkeypatch):
        monkeypatch.setenv("HOME", "TestingUser")
        monkeypatch.setenv("USERPROFILE", "TestingUser")
        with pytest.raises(OSError, match=r".*TestingUser.*"):
            read_parquet("~/file.parquet")
        with pytest.raises(OSError, match=r".*TestingUser.*"):
            df_compat.to_parquet("~/file.parquet")

    def test_partition_cols_supported(self, pa, df_full):
        # GH #23283
        partition_cols = ["bool", "int"]
        df = df_full
        with tm.ensure_clean_dir() as path:
            df.to_parquet(path,
                          partition_cols=partition_cols,
                          compression=None)
            check_partition_names(path, partition_cols)
            assert read_parquet(path).shape == df.shape

    def test_partition_cols_string(self, pa, df_full):
        # GH #27117
        partition_cols = "bool"
        partition_cols_list = [partition_cols]
        df = df_full
        with tm.ensure_clean_dir() as path:
            df.to_parquet(path,
                          partition_cols=partition_cols,
                          compression=None)
            check_partition_names(path, partition_cols_list)
            assert read_parquet(path).shape == df.shape

    @pytest.mark.parametrize("path_type", [str, pathlib.Path])
    def test_partition_cols_pathlib(self, pa, df_compat, path_type):
        # GH 35902

        partition_cols = "B"
        partition_cols_list = [partition_cols]
        df = df_compat

        with tm.ensure_clean_dir() as path_str:
            path = path_type(path_str)
            df.to_parquet(path, partition_cols=partition_cols_list)
            assert read_parquet(path).shape == df.shape

    def test_empty_dataframe(self, pa):
        # GH #27339
        df = pd.DataFrame()
        check_round_trip(df, pa)

    def test_write_with_schema(self, pa):
        import pyarrow

        df = pd.DataFrame({"x": [0, 1]})
        schema = pyarrow.schema([pyarrow.field("x", type=pyarrow.bool_())])
        out_df = df.astype(bool)
        check_round_trip(df,
                         pa,
                         write_kwargs={"schema": schema},
                         expected=out_df)

    @td.skip_if_no("pyarrow")
    def test_additional_extension_arrays(self, pa):
        # test additional ExtensionArrays that are supported through the
        # __arrow_array__ protocol
        df = pd.DataFrame({
            "a": pd.Series([1, 2, 3], dtype="Int64"),
            "b": pd.Series([1, 2, 3], dtype="UInt32"),
            "c": pd.Series(["a", None, "c"], dtype="string"),
        })
        check_round_trip(df, pa)

        df = pd.DataFrame({"a": pd.Series([1, 2, 3, None], dtype="Int64")})
        check_round_trip(df, pa)

    @td.skip_if_no("pyarrow", min_version="1.0.0")
    def test_pyarrow_backed_string_array(self, pa, string_storage):
        # test ArrowStringArray supported through the __arrow_array__ protocol
        df = pd.DataFrame(
            {"a": pd.Series(["a", None, "c"], dtype="string[pyarrow]")})
        with pd.option_context("string_storage", string_storage):
            check_round_trip(df,
                             pa,
                             expected=df.astype(f"string[{string_storage}]"))

    @td.skip_if_no("pyarrow")
    def test_additional_extension_types(self, pa):
        # test additional ExtensionArrays that are supported through the
        # __arrow_array__ protocol + by defining a custom ExtensionType
        df = pd.DataFrame({
            # Arrow does not yet support struct in writing to Parquet (ARROW-1644)
            # "c": pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2), (3, 4)]),
            "d":
            pd.period_range("2012-01-01", periods=3, freq="D"),
        })
        check_round_trip(df, pa)

    def test_timestamp_nanoseconds(self, pa):
        # with version 2.0, pyarrow defaults to writing the nanoseconds, so
        # this should work without error
        df = pd.DataFrame(
            {"a": pd.date_range("2017-01-01", freq="1n", periods=10)})
        check_round_trip(df, pa, write_kwargs={"version": "2.0"})

    def test_timezone_aware_index(self, pa, timezone_aware_date_list):
        if not pa_version_under2p0:
            # temporary skip this test until it is properly resolved
            # https://github.com/pandas-dev/pandas/issues/37286
            pytest.skip()
        idx = 5 * [timezone_aware_date_list]
        df = pd.DataFrame(index=idx, data={"index_as_col": idx})

        # see gh-36004
        # compare time(zone) values only, skip their class:
        # pyarrow always creates fixed offset timezones using pytz.FixedOffset()
        # even if it was datetime.timezone() originally
        #
        # technically they are the same:
        # they both implement datetime.tzinfo
        # they both wrap datetime.timedelta()
        # this use-case sets the resolution to 1 minute
        check_round_trip(df, pa, check_dtype=False)

    @td.skip_if_no("pyarrow", min_version="1.0.0")
    def test_filter_row_groups(self, pa):
        # https://github.com/pandas-dev/pandas/issues/26551
        df = pd.DataFrame({"a": list(range(0, 3))})
        with tm.ensure_clean() as path:
            df.to_parquet(path, pa)
            result = read_parquet(path,
                                  pa,
                                  filters=[("a", "==", 0)],
                                  use_legacy_dataset=False)
        assert len(result) == 1

    def test_read_parquet_manager(self, pa, using_array_manager):
        # ensure that read_parquet honors the pandas.options.mode.data_manager option
        df = pd.DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"])

        with tm.ensure_clean() as path:
            df.to_parquet(path, pa)
            result = read_parquet(path, pa)
        if using_array_manager:
            assert isinstance(result._mgr, pd.core.internals.ArrayManager)
        else:
            assert isinstance(result._mgr, pd.core.internals.BlockManager)
Exemple #57
0
import codecs
import locale
import os

import pytest

from pandas._config.localization import can_set_locale, get_locales, set_locale

from pandas.compat import is_platform_windows

_all_locales = get_locales() or []
_current_locale = locale.getlocale()

# Don't run any of these tests if we are on Windows or have no locales.
pytestmark = pytest.mark.skipif(is_platform_windows() or not _all_locales,
                                reason="Need non-Windows and locales")

_skip_if_only_one_locale = pytest.mark.skipif(
    len(_all_locales) <= 1, reason="Need multiple locales for meaningful test")


def test_can_set_locale_valid_set():
    # Can set the default locale.
    assert can_set_locale("")


def test_can_set_locale_invalid_set():
    # Cannot set an invalid locale.
    assert not can_set_locale("non-existent_locale")