def test_numpy_ufuncs_basic(index, func): # test ufuncs of numpy, see: # https://numpy.org/doc/stable/reference/ufuncs.html if isinstance(index, DatetimeIndexOpsMixin): with tm.external_error_raised((TypeError, AttributeError)): with np.errstate(all="ignore"): func(index) elif isinstance(index, (Float64Index, Int64Index, UInt64Index, RangeIndex)): # coerces to float (e.g. np.sin) with np.errstate(all="ignore"): result = func(index) exp = Index(func(index.values), name=index.name) tm.assert_index_equal(result, exp) assert isinstance(result, Float64Index) else: # raise AttributeError or TypeError if len(index) == 0: pass else: with tm.external_error_raised((TypeError, AttributeError)): with np.errstate(all="ignore"): func(index)
def test_numpy_ufuncs_other(index, func, request): # test ufuncs of numpy, see: # https://numpy.org/doc/stable/reference/ufuncs.html if isinstance(index, (DatetimeIndex, TimedeltaIndex)): if func in (np.isfinite, np.isinf, np.isnan): # numpy 1.18 changed isinf and isnan to not raise on dt64/td64 result = func(index) assert isinstance(result, np.ndarray) else: with tm.external_error_raised(TypeError): func(index) elif isinstance(index, PeriodIndex): with tm.external_error_raised(TypeError): func(index) elif isinstance(index, NumericIndex): # Results in bool array result = func(index) assert isinstance(result, np.ndarray) assert not isinstance(result, Index) else: if len(index) == 0: pass else: with tm.external_error_raised(TypeError): func(index)
def test_tz_localize_nonexistent(self, tz, method, exp): # GH 8917 n = 60 dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") ser = Series(1, index=dti) df = ser.to_frame() if method == "raise": with tm.external_error_raised(pytz.NonExistentTimeError): dti.tz_localize(tz, nonexistent=method) with tm.external_error_raised(pytz.NonExistentTimeError): ser.tz_localize(tz, nonexistent=method) with tm.external_error_raised(pytz.NonExistentTimeError): df.tz_localize(tz, nonexistent=method) elif exp == "invalid": with pytest.raises(ValueError, match="argument must be one of"): dti.tz_localize(tz, nonexistent=method) with pytest.raises(ValueError, match="argument must be one of"): ser.tz_localize(tz, nonexistent=method) with pytest.raises(ValueError, match="argument must be one of"): df.tz_localize(tz, nonexistent=method) else: result = ser.tz_localize(tz, nonexistent=method) expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz)) tm.assert_series_equal(result, expected) result = df.tz_localize(tz, nonexistent=method) expected = expected.to_frame() tm.assert_frame_equal(result, expected)
def test_numpy_ufuncs_basic(index, func): # test ufuncs of numpy, see: # https://numpy.org/doc/stable/reference/ufuncs.html if isinstance(index, DatetimeIndexOpsMixin): with tm.external_error_raised((TypeError, AttributeError)): with np.errstate(all="ignore"): func(index) elif isinstance(index, NumericIndex) or (not isinstance(index.dtype, np.dtype) and index.dtype._is_numeric): # coerces to float (e.g. np.sin) with np.errstate(all="ignore"): result = func(index) exp = Index(func(index.values), name=index.name) tm.assert_index_equal(result, exp) if type(index) is not Index: # i.e NumericIndex assert isinstance(result, Float64Index) else: # e.g. np.exp with Int64 -> Float64 assert type(result) is Index else: # raise AttributeError or TypeError if len(index) == 0: pass else: with tm.external_error_raised((TypeError, AttributeError)): with np.errstate(all="ignore"): func(index)
def test_numpy_ufuncs_other(index, func, request): # test ufuncs of numpy, see: # https://numpy.org/doc/stable/reference/ufuncs.html if isinstance(index, (DatetimeIndex, TimedeltaIndex)): if (isinstance(index, DatetimeIndex) and index.tz is not None and func in [np.isfinite, np.isnan, np.isinf]): mark = pytest.mark.xfail(reason="__array_ufunc__ is not defined") request.node.add_marker(mark) if func in (np.isfinite, np.isinf, np.isnan): # numpy 1.18 changed isinf and isnan to not raise on dt64/tfd64 result = func(index) assert isinstance(result, np.ndarray) else: with tm.external_error_raised(TypeError): func(index) elif isinstance(index, PeriodIndex): with tm.external_error_raised(TypeError): func(index) elif isinstance(index, NumericIndex): # Results in bool array result = func(index) assert isinstance(result, np.ndarray) assert not isinstance(result, Index) else: if len(index) == 0: pass else: with tm.external_error_raised(TypeError): func(index)
def test_dt_round_tz_ambiguous(self, method): # GH 18946 round near "fall back" DST df1 = DataFrame( [ pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True), pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True), pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True), ], columns=["date"], ) df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid") # infer result = getattr(df1.date.dt, method)("H", ambiguous="infer") expected = df1["date"] tm.assert_series_equal(result, expected) # bool-array result = getattr(df1.date.dt, method)("H", ambiguous=[True, False, False]) tm.assert_series_equal(result, expected) # NaT result = getattr(df1.date.dt, method)("H", ambiguous="NaT") expected = df1["date"].copy() expected.iloc[0:2] = pd.NaT tm.assert_series_equal(result, expected) # raise with tm.external_error_raised(pytz.AmbiguousTimeError): getattr(df1.date.dt, method)("H", ambiguous="raise")
def check_external_error_on_write(self, df): # check that we are raising the exception # on writing with tm.external_error_raised(Exception): with tm.ensure_clean() as path: to_feather(df, path)
def test_series_tz_localize_ambiguous_bool(self): # make sure that we are correctly accepting bool values as ambiguous # GH#14402 ts = Timestamp("2015-11-01 01:00:03") expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central") expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central") ser = Series([ts]) expected0 = Series([expected0]) expected1 = Series([expected1]) with tm.external_error_raised(pytz.AmbiguousTimeError): ser.dt.tz_localize("US/Central") result = ser.dt.tz_localize("US/Central", ambiguous=True) tm.assert_series_equal(result, expected0) result = ser.dt.tz_localize("US/Central", ambiguous=[True]) tm.assert_series_equal(result, expected0) result = ser.dt.tz_localize("US/Central", ambiguous=False) tm.assert_series_equal(result, expected1) result = ser.dt.tz_localize("US/Central", ambiguous=[False]) tm.assert_series_equal(result, expected1)
def test_delete(self): idx = timedelta_range(start="1 Days", periods=5, freq="D", name="idx") # preserve freq expected_0 = timedelta_range(start="2 Days", periods=4, freq="D", name="idx") expected_4 = timedelta_range(start="1 Days", periods=4, freq="D", name="idx") # reset freq to None expected_1 = TimedeltaIndex(["1 day", "3 day", "4 day", "5 day"], freq=None, name="idx") cases = { 0: expected_0, -5: expected_0, -1: expected_4, 4: expected_4, 1: expected_1, } for n, expected in cases.items(): result = idx.delete(n) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq with tm.external_error_raised((IndexError, ValueError)): # either depending on numpy version idx.delete(5)
def test_numpy_ufuncs_other(index, func): # test ufuncs of numpy, see: # https://numpy.org/doc/stable/reference/ufuncs.html if isinstance(index, (DatetimeIndex, TimedeltaIndex)): if func in (np.isfinite, np.isinf, np.isnan): # numpy 1.18 changed isinf and isnan to not raise on dt64/td64 result = func(index) assert isinstance(result, np.ndarray) out = np.empty(index.shape, dtype=bool) func(index, out=out) tm.assert_numpy_array_equal(out, result) else: with tm.external_error_raised(TypeError): func(index) elif isinstance(index, PeriodIndex): with tm.external_error_raised(TypeError): func(index) elif (isinstance(index, NumericIndex) or (not isinstance(index.dtype, np.dtype) and index.dtype._is_numeric) or (index.dtype.kind == "c" and func is not np.signbit) or index.dtype == bool): # Results in bool array result = func(index) if not isinstance(index.dtype, np.dtype): # e.g. Int64 we expect to get BooleanArray back assert isinstance(result, BooleanArray) else: assert isinstance(result, np.ndarray) out = np.empty(index.shape, dtype=bool) func(index, out=out) if not isinstance(index.dtype, np.dtype): tm.assert_numpy_array_equal(out, result._data) else: tm.assert_numpy_array_equal(out, result) else: if len(index) == 0: pass else: with tm.external_error_raised(TypeError): func(index)
def test_errorbar_plot(self): s = Series(np.arange(10), name="x") s_err = np.random.randn(10) d_err = DataFrame(np.random.randn(10, 2), index=s.index, columns=["x", "y"]) # test line and bar plots kinds = ["line", "bar"] for kind in kinds: ax = _check_plot_works(s.plot, yerr=Series(s_err), kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=1) ax = _check_plot_works(s.plot, yerr=s_err, kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=1) ax = _check_plot_works(s.plot, yerr=s_err.tolist(), kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=1) ax = _check_plot_works(s.plot, yerr=d_err, kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=1) ax = _check_plot_works(s.plot, xerr=0.2, yerr=0.2, kind=kind) self._check_has_errorbars(ax, xerr=1, yerr=1) ax = _check_plot_works(s.plot, xerr=s_err) self._check_has_errorbars(ax, xerr=1, yerr=0) # test time series plotting ix = date_range("1/1/2000", "1/1/2001", freq="M") ts = Series(np.arange(12), index=ix, name="x") ts_err = Series(np.random.randn(12), index=ix) td_err = DataFrame(np.random.randn(12, 2), index=ix, columns=["x", "y"]) ax = _check_plot_works(ts.plot, yerr=ts_err) self._check_has_errorbars(ax, xerr=0, yerr=1) ax = _check_plot_works(ts.plot, yerr=td_err) self._check_has_errorbars(ax, xerr=0, yerr=1) # check incorrect lengths and types with tm.external_error_raised(ValueError): s.plot(yerr=np.arange(11)) s_err = ["zzz"] * 10 with tm.external_error_raised(TypeError): s.plot(yerr=s_err)
def test_read_expands_user_home_dir( self, reader, module, error_class, fn_ext, monkeypatch ): pytest.importorskip(module) path = os.path.join("~", "does_not_exist." + fn_ext) monkeypatch.setattr(icom, "_expand_user", lambda x: os.path.join("foo", x)) with tm.external_error_raised(error_class): reader(path)
def test_unsupported_float16_cleanup(self, pa, path_type): # #44847, #44914 # Not able to write float 16 column using pyarrow. # Tests cleanup by pyarrow in case of an error data = np.arange(2, 10, dtype=np.float16) df = pd.DataFrame(data=data, columns=["fp16"]) with tm.ensure_clean() as path_str: path = path_type(path_str) with tm.external_error_raised(pyarrow.ArrowException): df.to_parquet(path=path, engine=pa) assert not os.path.isfile(path)
def test_delete(self): ci = self.create_index() categories = ci.categories result = ci.delete(0) expected = CategoricalIndex(list("abbca"), categories=categories) tm.assert_index_equal(result, expected, exact=True) result = ci.delete(-1) expected = CategoricalIndex(list("aabbc"), categories=categories) tm.assert_index_equal(result, expected, exact=True) with tm.external_error_raised((IndexError, ValueError)): # Either depending on NumPy version ci.delete(10)
def test_rolling_apply_with_pandas_objects(window): # 5071 df = DataFrame( { "A": np.random.randn(5), "B": np.random.randint(0, 10, size=5) }, index=date_range("20130101", periods=5, freq="s"), ) # we have an equal spaced timeseries index # so simulate removing the first period def f(x): if x.index[0] == df.index[0]: return np.nan return x.iloc[-1] result = df.rolling(window).apply(f, raw=False) expected = df.iloc[2:].reindex_like(df) tm.assert_frame_equal(result, expected) with tm.external_error_raised(AttributeError): df.rolling(window).apply(f, raw=True)
def check_external_error_on_write(self, df, engine, exc): # check that an external library is raising the exception on writing with tm.ensure_clean() as path: with tm.external_error_raised(exc): to_parquet(df, path, engine, compression=None)
class TestToGBQIntegrationWithServiceAccountKeyPath: @pytest.fixture() def gbq_dataset(self): # Setup Dataset _skip_if_no_project_id() _skip_if_no_private_key_path() dataset_id = "pydata_pandas_bq_testing_" + generate_rand_str() self.client = _get_client() self.dataset = self.client.dataset(dataset_id) # Create the dataset self.client.create_dataset(bigquery.Dataset(self.dataset)) table_name = generate_rand_str() destination_table = f"{dataset_id}.{table_name}" yield destination_table # Teardown Dataset self.client.delete_dataset(self.dataset, delete_contents=True) def test_roundtrip(self, gbq_dataset): destination_table = gbq_dataset test_size = 20001 df = make_mixed_dataframe_v2(test_size) df.to_gbq( destination_table, _get_project_id(), chunksize=None, credentials=_get_credentials(), ) result = pd.read_gbq( f"SELECT COUNT(*) AS num_rows FROM {destination_table}", project_id=_get_project_id(), credentials=_get_credentials(), dialect="standard", ) assert result["num_rows"][0] == test_size @pytest.mark.parametrize( "if_exists, expected_num_rows, expectation", [ ("append", 300, does_not_raise()), ("fail", 200, tm.external_error_raised(pandas_gbq.gbq.TableCreationError)), ("replace", 100, does_not_raise()), ], ) def test_gbq_if_exists(self, if_exists, expected_num_rows, expectation, gbq_dataset): # GH 29598 destination_table = gbq_dataset test_size = 200 df = make_mixed_dataframe_v2(test_size) df.to_gbq( destination_table, _get_project_id(), chunksize=None, credentials=_get_credentials(), ) with expectation: df.iloc[:100].to_gbq( destination_table, _get_project_id(), if_exists=if_exists, chunksize=None, credentials=_get_credentials(), ) result = pd.read_gbq( f"SELECT COUNT(*) AS num_rows FROM {destination_table}", project_id=_get_project_id(), credentials=_get_credentials(), dialect="standard", ) assert result["num_rows"][0] == expected_num_rows
def test_grouped_hist_legacy(self): from matplotlib.patches import Rectangle from pandas.plotting._matplotlib.hist import _grouped_hist df = DataFrame(np.random.randn(500, 1), columns=["A"]) df["B"] = to_datetime( np.random.randint( self.start_date_to_int64, self.end_date_to_int64, size=500, dtype=np.int64, ) ) df["C"] = np.random.randint(0, 4, 500) df["D"] = ["X"] * 500 axes = _grouped_hist(df.A, by=df.C) self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) tm.close() axes = df.hist(by=df.C) self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) tm.close() # group by a key with single value axes = df.hist(by="D", rot=30) self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) self._check_ticks_props(axes, xrot=30) tm.close() # make sure kwargs to hist are handled xf, yf = 20, 18 xrot, yrot = 30, 40 axes = _grouped_hist( df.A, by=df.C, cumulative=True, bins=4, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot, density=True, ) # height of last bin (index 5) must be 1.0 for ax in axes.ravel(): rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] height = rects[-1].get_height() tm.assert_almost_equal(height, 1.0) self._check_ticks_props( axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot ) tm.close() axes = _grouped_hist(df.A, by=df.C, log=True) # scale of y must be 'log' self._check_ax_scales(axes, yaxis="log") tm.close() # propagate attr exception from matplotlib.Axes.hist with tm.external_error_raised(AttributeError): _grouped_hist(df.A, by=df.C, foo="bar") msg = "Specify figure size by tuple instead" with pytest.raises(ValueError, match=msg): df.hist(by="C", figsize="default")
def test_hist_df_legacy(self): from matplotlib.patches import Rectangle with tm.assert_produces_warning(UserWarning): _check_plot_works(self.hist_df.hist) # make sure layout is handled df = DataFrame(np.random.randn(100, 2)) df[2] = to_datetime( np.random.randint( self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=np.int64, ) ) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(df.hist, grid=False) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) assert not axes[1, 1].get_visible() _check_plot_works(df[[2]].hist) df = DataFrame(np.random.randn(100, 1)) _check_plot_works(df.hist) # make sure layout is handled df = DataFrame(np.random.randn(100, 5)) df[5] = to_datetime( np.random.randint( self.start_date_to_int64, self.end_date_to_int64, size=100, dtype=np.int64, ) ) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(df.hist, layout=(4, 2)) self._check_axes_shape(axes, axes_num=6, layout=(4, 2)) # make sure sharex, sharey is handled with tm.assert_produces_warning(UserWarning): _check_plot_works(df.hist, sharex=True, sharey=True) # handle figsize arg with tm.assert_produces_warning(UserWarning): _check_plot_works(df.hist, figsize=(8, 10)) # check bins argument with tm.assert_produces_warning(UserWarning): _check_plot_works(df.hist, bins=5) # make sure xlabelsize and xrot are handled ser = df[0] xf, yf = 20, 18 xrot, yrot = 30, 40 axes = ser.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot) self._check_ticks_props( axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot ) xf, yf = 20, 18 xrot, yrot = 30, 40 axes = df.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot) self._check_ticks_props( axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot ) tm.close() ax = ser.hist(cumulative=True, bins=4, density=True) # height of last bin (index 5) must be 1.0 rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] tm.assert_almost_equal(rects[-1].get_height(), 1.0) tm.close() ax = ser.hist(log=True) # scale of y must be 'log' self._check_ax_scales(ax, yaxis="log") tm.close() # propagate attr exception from matplotlib.Axes.hist with tm.external_error_raised(AttributeError): ser.hist(foo="bar")
def test_sub(left_array, right_array): with tm.external_error_raised(TypeError): # numpy points to ^ operator or logical_xor function instead left_array - right_array
def test_gcs_not_present_exception(): with tm.external_error_raised(ImportError): read_csv("gs://test/test.csv")
def test_read_non_existant(self, reader, module, error_class, fn_ext): pytest.importorskip(module) path = os.path.join(HERE, "data", "does_not_exist." + fn_ext) with tm.external_error_raised(error_class): reader(path)
def test_external_error_raised(): with tm.external_error_raised(TypeError): raise TypeError("Should not check this error message, so it will pass")