Example #1
0
def test_numpy_ufuncs_basic(index, func):
    # test ufuncs of numpy, see:
    # https://numpy.org/doc/stable/reference/ufuncs.html

    if isinstance(index, DatetimeIndexOpsMixin):
        with tm.external_error_raised((TypeError, AttributeError)):
            with np.errstate(all="ignore"):
                func(index)
    elif isinstance(index,
                    (Float64Index, Int64Index, UInt64Index, RangeIndex)):
        # coerces to float (e.g. np.sin)
        with np.errstate(all="ignore"):
            result = func(index)
            exp = Index(func(index.values), name=index.name)

        tm.assert_index_equal(result, exp)
        assert isinstance(result, Float64Index)
    else:
        # raise AttributeError or TypeError
        if len(index) == 0:
            pass
        else:
            with tm.external_error_raised((TypeError, AttributeError)):
                with np.errstate(all="ignore"):
                    func(index)
Example #2
0
def test_numpy_ufuncs_other(index, func, request):
    # test ufuncs of numpy, see:
    # https://numpy.org/doc/stable/reference/ufuncs.html
    if isinstance(index, (DatetimeIndex, TimedeltaIndex)):

        if func in (np.isfinite, np.isinf, np.isnan):
            # numpy 1.18 changed isinf and isnan to not raise on dt64/td64
            result = func(index)
            assert isinstance(result, np.ndarray)
        else:
            with tm.external_error_raised(TypeError):
                func(index)

    elif isinstance(index, PeriodIndex):
        with tm.external_error_raised(TypeError):
            func(index)

    elif isinstance(index, NumericIndex):
        # Results in bool array
        result = func(index)
        assert isinstance(result, np.ndarray)
        assert not isinstance(result, Index)
    else:
        if len(index) == 0:
            pass
        else:
            with tm.external_error_raised(TypeError):
                func(index)
Example #3
0
    def test_tz_localize_nonexistent(self, tz, method, exp):
        # GH 8917
        n = 60
        dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min")
        ser = Series(1, index=dti)
        df = ser.to_frame()

        if method == "raise":

            with tm.external_error_raised(pytz.NonExistentTimeError):
                dti.tz_localize(tz, nonexistent=method)
            with tm.external_error_raised(pytz.NonExistentTimeError):
                ser.tz_localize(tz, nonexistent=method)
            with tm.external_error_raised(pytz.NonExistentTimeError):
                df.tz_localize(tz, nonexistent=method)

        elif exp == "invalid":
            with pytest.raises(ValueError, match="argument must be one of"):
                dti.tz_localize(tz, nonexistent=method)
            with pytest.raises(ValueError, match="argument must be one of"):
                ser.tz_localize(tz, nonexistent=method)
            with pytest.raises(ValueError, match="argument must be one of"):
                df.tz_localize(tz, nonexistent=method)

        else:
            result = ser.tz_localize(tz, nonexistent=method)
            expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz))
            tm.assert_series_equal(result, expected)

            result = df.tz_localize(tz, nonexistent=method)
            expected = expected.to_frame()
            tm.assert_frame_equal(result, expected)
Example #4
0
def test_numpy_ufuncs_basic(index, func):
    # test ufuncs of numpy, see:
    # https://numpy.org/doc/stable/reference/ufuncs.html

    if isinstance(index, DatetimeIndexOpsMixin):
        with tm.external_error_raised((TypeError, AttributeError)):
            with np.errstate(all="ignore"):
                func(index)
    elif isinstance(index,
                    NumericIndex) or (not isinstance(index.dtype, np.dtype)
                                      and index.dtype._is_numeric):
        # coerces to float (e.g. np.sin)
        with np.errstate(all="ignore"):
            result = func(index)
            exp = Index(func(index.values), name=index.name)

        tm.assert_index_equal(result, exp)
        if type(index) is not Index:
            # i.e NumericIndex
            assert isinstance(result, Float64Index)
        else:
            # e.g. np.exp with Int64 -> Float64
            assert type(result) is Index
    else:
        # raise AttributeError or TypeError
        if len(index) == 0:
            pass
        else:
            with tm.external_error_raised((TypeError, AttributeError)):
                with np.errstate(all="ignore"):
                    func(index)
def test_numpy_ufuncs_other(index, func, request):
    # test ufuncs of numpy, see:
    # https://numpy.org/doc/stable/reference/ufuncs.html
    if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
        if (isinstance(index, DatetimeIndex) and index.tz is not None
                and func in [np.isfinite, np.isnan, np.isinf]):
            mark = pytest.mark.xfail(reason="__array_ufunc__ is not defined")
            request.node.add_marker(mark)

        if func in (np.isfinite, np.isinf, np.isnan):
            # numpy 1.18 changed isinf and isnan to not raise on dt64/tfd64
            result = func(index)
            assert isinstance(result, np.ndarray)
        else:
            with tm.external_error_raised(TypeError):
                func(index)

    elif isinstance(index, PeriodIndex):
        with tm.external_error_raised(TypeError):
            func(index)

    elif isinstance(index, NumericIndex):
        # Results in bool array
        result = func(index)
        assert isinstance(result, np.ndarray)
        assert not isinstance(result, Index)
    else:
        if len(index) == 0:
            pass
        else:
            with tm.external_error_raised(TypeError):
                func(index)
Example #6
0
    def test_dt_round_tz_ambiguous(self, method):
        # GH 18946 round near "fall back" DST
        df1 = DataFrame(
            [
                pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True),
                pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True),
                pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True),
            ],
            columns=["date"],
        )
        df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid")
        # infer
        result = getattr(df1.date.dt, method)("H", ambiguous="infer")
        expected = df1["date"]
        tm.assert_series_equal(result, expected)

        # bool-array
        result = getattr(df1.date.dt, method)("H", ambiguous=[True, False, False])
        tm.assert_series_equal(result, expected)

        # NaT
        result = getattr(df1.date.dt, method)("H", ambiguous="NaT")
        expected = df1["date"].copy()
        expected.iloc[0:2] = pd.NaT
        tm.assert_series_equal(result, expected)

        # raise
        with tm.external_error_raised(pytz.AmbiguousTimeError):
            getattr(df1.date.dt, method)("H", ambiguous="raise")
Example #7
0
    def check_external_error_on_write(self, df):
        # check that we are raising the exception
        # on writing

        with tm.external_error_raised(Exception):
            with tm.ensure_clean() as path:
                to_feather(df, path)
Example #8
0
    def test_series_tz_localize_ambiguous_bool(self):
        # make sure that we are correctly accepting bool values as ambiguous

        # GH#14402
        ts = Timestamp("2015-11-01 01:00:03")
        expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central")
        expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central")

        ser = Series([ts])
        expected0 = Series([expected0])
        expected1 = Series([expected1])

        with tm.external_error_raised(pytz.AmbiguousTimeError):
            ser.dt.tz_localize("US/Central")

        result = ser.dt.tz_localize("US/Central", ambiguous=True)
        tm.assert_series_equal(result, expected0)

        result = ser.dt.tz_localize("US/Central", ambiguous=[True])
        tm.assert_series_equal(result, expected0)

        result = ser.dt.tz_localize("US/Central", ambiguous=False)
        tm.assert_series_equal(result, expected1)

        result = ser.dt.tz_localize("US/Central", ambiguous=[False])
        tm.assert_series_equal(result, expected1)
Example #9
0
    def test_delete(self):
        idx = timedelta_range(start="1 Days", periods=5, freq="D", name="idx")

        # preserve freq
        expected_0 = timedelta_range(start="2 Days",
                                     periods=4,
                                     freq="D",
                                     name="idx")
        expected_4 = timedelta_range(start="1 Days",
                                     periods=4,
                                     freq="D",
                                     name="idx")

        # reset freq to None
        expected_1 = TimedeltaIndex(["1 day", "3 day", "4 day", "5 day"],
                                    freq=None,
                                    name="idx")

        cases = {
            0: expected_0,
            -5: expected_0,
            -1: expected_4,
            4: expected_4,
            1: expected_1,
        }
        for n, expected in cases.items():
            result = idx.delete(n)
            tm.assert_index_equal(result, expected)
            assert result.name == expected.name
            assert result.freq == expected.freq

        with tm.external_error_raised((IndexError, ValueError)):
            # either depending on numpy version
            idx.delete(5)
Example #10
0
def test_numpy_ufuncs_other(index, func):
    # test ufuncs of numpy, see:
    # https://numpy.org/doc/stable/reference/ufuncs.html
    if isinstance(index, (DatetimeIndex, TimedeltaIndex)):

        if func in (np.isfinite, np.isinf, np.isnan):
            # numpy 1.18 changed isinf and isnan to not raise on dt64/td64
            result = func(index)
            assert isinstance(result, np.ndarray)

            out = np.empty(index.shape, dtype=bool)
            func(index, out=out)
            tm.assert_numpy_array_equal(out, result)
        else:
            with tm.external_error_raised(TypeError):
                func(index)

    elif isinstance(index, PeriodIndex):
        with tm.external_error_raised(TypeError):
            func(index)

    elif (isinstance(index, NumericIndex) or
          (not isinstance(index.dtype, np.dtype) and index.dtype._is_numeric)
          or (index.dtype.kind == "c" and func is not np.signbit)
          or index.dtype == bool):
        # Results in bool array
        result = func(index)
        if not isinstance(index.dtype, np.dtype):
            # e.g. Int64 we expect to get BooleanArray back
            assert isinstance(result, BooleanArray)
        else:
            assert isinstance(result, np.ndarray)

        out = np.empty(index.shape, dtype=bool)
        func(index, out=out)

        if not isinstance(index.dtype, np.dtype):
            tm.assert_numpy_array_equal(out, result._data)
        else:
            tm.assert_numpy_array_equal(out, result)

    else:
        if len(index) == 0:
            pass
        else:
            with tm.external_error_raised(TypeError):
                func(index)
Example #11
0
    def test_errorbar_plot(self):

        s = Series(np.arange(10), name="x")
        s_err = np.random.randn(10)
        d_err = DataFrame(np.random.randn(10, 2),
                          index=s.index,
                          columns=["x", "y"])
        # test line and bar plots
        kinds = ["line", "bar"]
        for kind in kinds:
            ax = _check_plot_works(s.plot, yerr=Series(s_err), kind=kind)
            self._check_has_errorbars(ax, xerr=0, yerr=1)
            ax = _check_plot_works(s.plot, yerr=s_err, kind=kind)
            self._check_has_errorbars(ax, xerr=0, yerr=1)
            ax = _check_plot_works(s.plot, yerr=s_err.tolist(), kind=kind)
            self._check_has_errorbars(ax, xerr=0, yerr=1)
            ax = _check_plot_works(s.plot, yerr=d_err, kind=kind)
            self._check_has_errorbars(ax, xerr=0, yerr=1)
            ax = _check_plot_works(s.plot, xerr=0.2, yerr=0.2, kind=kind)
            self._check_has_errorbars(ax, xerr=1, yerr=1)

        ax = _check_plot_works(s.plot, xerr=s_err)
        self._check_has_errorbars(ax, xerr=1, yerr=0)

        # test time series plotting
        ix = date_range("1/1/2000", "1/1/2001", freq="M")
        ts = Series(np.arange(12), index=ix, name="x")
        ts_err = Series(np.random.randn(12), index=ix)
        td_err = DataFrame(np.random.randn(12, 2),
                           index=ix,
                           columns=["x", "y"])

        ax = _check_plot_works(ts.plot, yerr=ts_err)
        self._check_has_errorbars(ax, xerr=0, yerr=1)
        ax = _check_plot_works(ts.plot, yerr=td_err)
        self._check_has_errorbars(ax, xerr=0, yerr=1)

        # check incorrect lengths and types
        with tm.external_error_raised(ValueError):
            s.plot(yerr=np.arange(11))

        s_err = ["zzz"] * 10
        with tm.external_error_raised(TypeError):
            s.plot(yerr=s_err)
Example #12
0
    def test_read_expands_user_home_dir(
        self, reader, module, error_class, fn_ext, monkeypatch
    ):
        pytest.importorskip(module)

        path = os.path.join("~", "does_not_exist." + fn_ext)
        monkeypatch.setattr(icom, "_expand_user", lambda x: os.path.join("foo", x))

        with tm.external_error_raised(error_class):
            reader(path)
Example #13
0
    def test_unsupported_float16_cleanup(self, pa, path_type):
        # #44847, #44914
        # Not able to write float 16 column using pyarrow.
        # Tests cleanup by pyarrow in case of an error
        data = np.arange(2, 10, dtype=np.float16)
        df = pd.DataFrame(data=data, columns=["fp16"])

        with tm.ensure_clean() as path_str:
            path = path_type(path_str)
            with tm.external_error_raised(pyarrow.ArrowException):
                df.to_parquet(path=path, engine=pa)
            assert not os.path.isfile(path)
Example #14
0
    def test_delete(self):

        ci = self.create_index()
        categories = ci.categories

        result = ci.delete(0)
        expected = CategoricalIndex(list("abbca"), categories=categories)
        tm.assert_index_equal(result, expected, exact=True)

        result = ci.delete(-1)
        expected = CategoricalIndex(list("aabbc"), categories=categories)
        tm.assert_index_equal(result, expected, exact=True)

        with tm.external_error_raised((IndexError, ValueError)):
            # Either depending on NumPy version
            ci.delete(10)
Example #15
0
def test_rolling_apply_with_pandas_objects(window):
    # 5071
    df = DataFrame(
        {
            "A": np.random.randn(5),
            "B": np.random.randint(0, 10, size=5)
        },
        index=date_range("20130101", periods=5, freq="s"),
    )

    # we have an equal spaced timeseries index
    # so simulate removing the first period
    def f(x):
        if x.index[0] == df.index[0]:
            return np.nan
        return x.iloc[-1]

    result = df.rolling(window).apply(f, raw=False)
    expected = df.iloc[2:].reindex_like(df)
    tm.assert_frame_equal(result, expected)

    with tm.external_error_raised(AttributeError):
        df.rolling(window).apply(f, raw=True)
Example #16
0
 def check_external_error_on_write(self, df, engine, exc):
     # check that an external library is raising the exception on writing
     with tm.ensure_clean() as path:
         with tm.external_error_raised(exc):
             to_parquet(df, path, engine, compression=None)
Example #17
0
class TestToGBQIntegrationWithServiceAccountKeyPath:
    @pytest.fixture()
    def gbq_dataset(self):
        # Setup Dataset
        _skip_if_no_project_id()
        _skip_if_no_private_key_path()

        dataset_id = "pydata_pandas_bq_testing_" + generate_rand_str()

        self.client = _get_client()
        self.dataset = self.client.dataset(dataset_id)

        # Create the dataset
        self.client.create_dataset(bigquery.Dataset(self.dataset))

        table_name = generate_rand_str()
        destination_table = f"{dataset_id}.{table_name}"
        yield destination_table

        # Teardown Dataset
        self.client.delete_dataset(self.dataset, delete_contents=True)

    def test_roundtrip(self, gbq_dataset):
        destination_table = gbq_dataset

        test_size = 20001
        df = make_mixed_dataframe_v2(test_size)

        df.to_gbq(
            destination_table,
            _get_project_id(),
            chunksize=None,
            credentials=_get_credentials(),
        )

        result = pd.read_gbq(
            f"SELECT COUNT(*) AS num_rows FROM {destination_table}",
            project_id=_get_project_id(),
            credentials=_get_credentials(),
            dialect="standard",
        )
        assert result["num_rows"][0] == test_size

    @pytest.mark.parametrize(
        "if_exists, expected_num_rows, expectation",
        [
            ("append", 300, does_not_raise()),
            ("fail", 200,
             tm.external_error_raised(pandas_gbq.gbq.TableCreationError)),
            ("replace", 100, does_not_raise()),
        ],
    )
    def test_gbq_if_exists(self, if_exists, expected_num_rows, expectation,
                           gbq_dataset):
        # GH 29598
        destination_table = gbq_dataset

        test_size = 200
        df = make_mixed_dataframe_v2(test_size)

        df.to_gbq(
            destination_table,
            _get_project_id(),
            chunksize=None,
            credentials=_get_credentials(),
        )

        with expectation:
            df.iloc[:100].to_gbq(
                destination_table,
                _get_project_id(),
                if_exists=if_exists,
                chunksize=None,
                credentials=_get_credentials(),
            )

        result = pd.read_gbq(
            f"SELECT COUNT(*) AS num_rows FROM {destination_table}",
            project_id=_get_project_id(),
            credentials=_get_credentials(),
            dialect="standard",
        )
        assert result["num_rows"][0] == expected_num_rows
Example #18
0
    def test_grouped_hist_legacy(self):
        from matplotlib.patches import Rectangle

        from pandas.plotting._matplotlib.hist import _grouped_hist

        df = DataFrame(np.random.randn(500, 1), columns=["A"])
        df["B"] = to_datetime(
            np.random.randint(
                self.start_date_to_int64,
                self.end_date_to_int64,
                size=500,
                dtype=np.int64,
            )
        )
        df["C"] = np.random.randint(0, 4, 500)
        df["D"] = ["X"] * 500

        axes = _grouped_hist(df.A, by=df.C)
        self._check_axes_shape(axes, axes_num=4, layout=(2, 2))

        tm.close()
        axes = df.hist(by=df.C)
        self._check_axes_shape(axes, axes_num=4, layout=(2, 2))

        tm.close()
        # group by a key with single value
        axes = df.hist(by="D", rot=30)
        self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
        self._check_ticks_props(axes, xrot=30)

        tm.close()
        # make sure kwargs to hist are handled
        xf, yf = 20, 18
        xrot, yrot = 30, 40

        axes = _grouped_hist(
            df.A,
            by=df.C,
            cumulative=True,
            bins=4,
            xlabelsize=xf,
            xrot=xrot,
            ylabelsize=yf,
            yrot=yrot,
            density=True,
        )
        # height of last bin (index 5) must be 1.0
        for ax in axes.ravel():
            rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
            height = rects[-1].get_height()
            tm.assert_almost_equal(height, 1.0)
        self._check_ticks_props(
            axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot
        )

        tm.close()
        axes = _grouped_hist(df.A, by=df.C, log=True)
        # scale of y must be 'log'
        self._check_ax_scales(axes, yaxis="log")

        tm.close()
        # propagate attr exception from matplotlib.Axes.hist
        with tm.external_error_raised(AttributeError):
            _grouped_hist(df.A, by=df.C, foo="bar")

        msg = "Specify figure size by tuple instead"
        with pytest.raises(ValueError, match=msg):
            df.hist(by="C", figsize="default")
Example #19
0
    def test_hist_df_legacy(self):
        from matplotlib.patches import Rectangle

        with tm.assert_produces_warning(UserWarning):
            _check_plot_works(self.hist_df.hist)

        # make sure layout is handled
        df = DataFrame(np.random.randn(100, 2))
        df[2] = to_datetime(
            np.random.randint(
                self.start_date_to_int64,
                self.end_date_to_int64,
                size=100,
                dtype=np.int64,
            )
        )
        with tm.assert_produces_warning(UserWarning):
            axes = _check_plot_works(df.hist, grid=False)
        self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
        assert not axes[1, 1].get_visible()

        _check_plot_works(df[[2]].hist)
        df = DataFrame(np.random.randn(100, 1))
        _check_plot_works(df.hist)

        # make sure layout is handled
        df = DataFrame(np.random.randn(100, 5))
        df[5] = to_datetime(
            np.random.randint(
                self.start_date_to_int64,
                self.end_date_to_int64,
                size=100,
                dtype=np.int64,
            )
        )
        with tm.assert_produces_warning(UserWarning):
            axes = _check_plot_works(df.hist, layout=(4, 2))
        self._check_axes_shape(axes, axes_num=6, layout=(4, 2))

        # make sure sharex, sharey is handled
        with tm.assert_produces_warning(UserWarning):
            _check_plot_works(df.hist, sharex=True, sharey=True)

        # handle figsize arg
        with tm.assert_produces_warning(UserWarning):
            _check_plot_works(df.hist, figsize=(8, 10))

        # check bins argument
        with tm.assert_produces_warning(UserWarning):
            _check_plot_works(df.hist, bins=5)

        # make sure xlabelsize and xrot are handled
        ser = df[0]
        xf, yf = 20, 18
        xrot, yrot = 30, 40
        axes = ser.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
        self._check_ticks_props(
            axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot
        )

        xf, yf = 20, 18
        xrot, yrot = 30, 40
        axes = df.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
        self._check_ticks_props(
            axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot
        )

        tm.close()

        ax = ser.hist(cumulative=True, bins=4, density=True)
        # height of last bin (index 5) must be 1.0
        rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
        tm.assert_almost_equal(rects[-1].get_height(), 1.0)

        tm.close()
        ax = ser.hist(log=True)
        # scale of y must be 'log'
        self._check_ax_scales(ax, yaxis="log")

        tm.close()

        # propagate attr exception from matplotlib.Axes.hist
        with tm.external_error_raised(AttributeError):
            ser.hist(foo="bar")
Example #20
0
def test_sub(left_array, right_array):
    with tm.external_error_raised(TypeError):
        # numpy points to ^ operator or logical_xor function instead
        left_array - right_array
Example #21
0
def test_gcs_not_present_exception():
    with tm.external_error_raised(ImportError):
        read_csv("gs://test/test.csv")
Example #22
0
    def test_read_non_existant(self, reader, module, error_class, fn_ext):
        pytest.importorskip(module)

        path = os.path.join(HERE, "data", "does_not_exist." + fn_ext)
        with tm.external_error_raised(error_class):
            reader(path)
Example #23
0
def test_external_error_raised():
    with tm.external_error_raised(TypeError):
        raise TypeError("Should not check this error message, so it will pass")