def test_andrews_curves(self, iris): from matplotlib import cm from pandas.plotting import andrews_curves df = iris # Ensure no UserWarning when making plot with tm.assert_produces_warning(None): _check_plot_works(andrews_curves, frame=df, class_column="Name") rgba = ("#556270", "#4ECDC4", "#C7F464") ax = _check_plot_works(andrews_curves, frame=df, class_column="Name", color=rgba) self._check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10]) cnames = ["dodgerblue", "aquamarine", "seagreen"] ax = _check_plot_works(andrews_curves, frame=df, class_column="Name", color=cnames) self._check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10]) ax = _check_plot_works(andrews_curves, frame=df, class_column="Name", colormap=cm.jet) cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10]) length = 10 df = DataFrame({ "A": np.random.rand(length), "B": np.random.rand(length), "C": np.random.rand(length), "Name": ["A"] * length, }) _check_plot_works(andrews_curves, frame=df, class_column="Name") rgba = ("#556270", "#4ECDC4", "#C7F464") ax = _check_plot_works(andrews_curves, frame=df, class_column="Name", color=rgba) self._check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10]) cnames = ["dodgerblue", "aquamarine", "seagreen"] ax = _check_plot_works(andrews_curves, frame=df, class_column="Name", color=cnames) self._check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10]) ax = _check_plot_works(andrews_curves, frame=df, class_column="Name", colormap=cm.jet) cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10]) colors = ["b", "g", "r"] df = DataFrame({ "A": [1, 2, 3], "B": [1, 2, 3], "C": [1, 2, 3], "Name": colors }) ax = andrews_curves(df, "Name", color=colors) handles, labels = ax.get_legend_handles_labels() self._check_colors(handles, linecolors=colors)
def test_is_dtype_equal_deprecated(self): # GH#37545 c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False) with tm.assert_produces_warning(FutureWarning): c1.is_dtype_equal(c1)
def test_delta_deprecated(): # GH#46476 td = Timedelta(123456546, unit="ns") with tm.assert_produces_warning(FutureWarning, match="Timedelta.delta is"): td.delta
def test_cut_no_warnings(): df = DataFrame({"value": np.random.randint(0, 100, 20)}) labels = [f"{i} - {i + 9}" for i in range(0, 100, 10)] with tm.assert_produces_warning(False): df["group"] = cut(df.value, range(0, 105, 10), right=False, labels=labels)
def test_excel_sep_warning(self, df): with tm.assert_produces_warning(): df.to_clipboard(excel=True, sep=r"\t")
def test_get_loc_raises_missized_tolerance(self): index = Index([0, 1, 2]) with pytest.raises(ValueError, match="tolerance size must match"): with tm.assert_produces_warning(FutureWarning, match="deprecated"): index.get_loc(1.1, "nearest", tolerance=[1, 1])
def test_fillna_series_method(self, data_missing): with tm.assert_produces_warning(PerformanceWarning): super().test_fillna_limit_backfill(data_missing)
def test_getitem_ndim_deprecated(): s = Series([0, 1]) with tm.assert_produces_warning(FutureWarning): s[:, None]
def test_deprecated_start_stop_step_attrs(self, attr_name): # GH 26581 idx = self.create_index() with tm.assert_produces_warning(FutureWarning): getattr(idx, attr_name)
def test_unambiguous_timedelta_values(self, val, warning): # GH36666 Deprecate use of strings denoting units with 'M', 'Y', 'm' or 'y' # in pd.to_timedelta with tm.assert_produces_warning(warning, check_stacklevel=False): to_timedelta(val)
def assert_stat_op_calc( opname, alternative, frame, has_skipna=True, check_dtype=True, check_dates=False, check_less_precise=False, skipna_alternative=None, ): """ Check that operator opname works as advertised on frame Parameters ---------- opname : string Name of the operator to test on frame alternative : function Function that opname is tested against; i.e. "frame.opname()" should equal "alternative(frame)". frame : DataFrame The object that the tests are executed on has_skipna : bool, default True Whether the method "opname" has the kwarg "skip_na" check_dtype : bool, default True Whether the dtypes of the result of "frame.opname()" and "alternative(frame)" should be checked. check_dates : bool, default false Whether opname should be tested on a Datetime Series check_less_precise : bool, default False Whether results should only be compared approximately; passed on to tm.assert_series_equal skipna_alternative : function, default None NaN-safe version of alternative """ f = getattr(frame, opname) if check_dates: expected_warning = FutureWarning if opname in ["mean", "median" ] else None df = DataFrame({"b": date_range("1/1/2001", periods=2)}) with tm.assert_produces_warning(expected_warning): result = getattr(df, opname)() assert isinstance(result, Series) df["a"] = range(len(df)) with tm.assert_produces_warning(expected_warning): result = getattr(df, opname)() assert isinstance(result, Series) assert len(result) if has_skipna: def wrapper(x): return alternative(x.values) skipna_wrapper = tm._make_skipna_wrapper(alternative, skipna_alternative) result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) tm.assert_series_equal( result0, frame.apply(wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise, ) # HACK: win32 tm.assert_series_equal( result1, frame.apply(wrapper, axis=1), check_dtype=False, check_less_precise=check_less_precise, ) else: skipna_wrapper = alternative result0 = f(axis=0) result1 = f(axis=1) tm.assert_series_equal( result0, frame.apply(skipna_wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise, ) if opname in ["sum", "prod"]: expected = frame.apply(skipna_wrapper, axis=1) tm.assert_series_equal(result1, expected, check_dtype=False, check_less_precise=check_less_precise) # check dtypes if check_dtype: lcd_dtype = frame.values.dtype assert lcd_dtype == result0.dtype assert lcd_dtype == result1.dtype # bad axis with pytest.raises(ValueError, match="No axis named 2"): f(axis=2) # all NA case if has_skipna: all_na = frame * np.NaN r0 = getattr(all_na, opname)(axis=0) r1 = getattr(all_na, opname)(axis=1) if opname in ["sum", "prod"]: unit = 1 if opname == "prod" else 0 # result for empty sum/prod expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) tm.assert_series_equal(r0, expected) expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) tm.assert_series_equal(r1, expected)
def test_frame_outer_deprecated(): df = pd.DataFrame({"A": [1, 2]}) with tm.assert_produces_warning(FutureWarning): np.subtract.outer(df, df)
def test_construction_dti_with_mixed_timezones(self): # GH 11488 (not changed, added explicit tests) # no tz results in DatetimeIndex result = DatetimeIndex( [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" ) exp = DatetimeIndex( [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) # same tz results in DatetimeIndex result = DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), ], name="idx", ) exp = DatetimeIndex( [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")], tz="Asia/Tokyo", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) # same tz results in DatetimeIndex (DST) result = DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="US/Eastern"), Timestamp("2011-08-01 10:00", tz="US/Eastern"), ], name="idx", ) exp = DatetimeIndex( [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")], tz="US/Eastern", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) # tz mismatch affecting to tz-aware raises TypeError/ValueError msg = "cannot be converted to datetime64" with pytest.raises(ValueError, match=msg): DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], name="idx", ) with pytest.raises(ValueError, match=msg): DatetimeIndex( [ Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], tz="Asia/Tokyo", name="idx", ) with pytest.raises(ValueError, match=msg): DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], tz="US/Eastern", name="idx", ) with pytest.raises(ValueError, match=msg): # passing tz should results in DatetimeIndex, then mismatch raises # TypeError with tm.assert_produces_warning(FutureWarning): # subclass-specific kwargs to pd.Index Index( [ pd.NaT, Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], tz="Asia/Tokyo", name="idx", )
def test_construction_index_with_mixed_timezones_with_NaT(self): # see gh-11488 result = Index( [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")], name="idx", ) exp = DatetimeIndex( [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")], name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is None # Same tz results in DatetimeIndex result = Index( [ pd.NaT, Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), pd.NaT, Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), ], name="idx", ) exp = DatetimeIndex( [ pd.NaT, Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-02 10:00"), ], tz="Asia/Tokyo", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is not None assert result.tz == exp.tz # same tz results in DatetimeIndex (DST) result = Index( [ Timestamp("2011-01-01 10:00", tz="US/Eastern"), pd.NaT, Timestamp("2011-08-01 10:00", tz="US/Eastern"), ], name="idx", ) exp = DatetimeIndex( [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")], tz="US/Eastern", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is not None assert result.tz == exp.tz # different tz results in Index(dtype=object) result = Index( [ pd.NaT, Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], name="idx", ) exp = Index( [ pd.NaT, Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], dtype="object", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert not isinstance(result, DatetimeIndex) result = Index( [ pd.NaT, Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], name="idx", ) exp = Index( [ pd.NaT, Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], dtype="object", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert not isinstance(result, DatetimeIndex) # all NaT result = Index([pd.NaT, pd.NaT], name="idx") exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx") tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is None # all NaT with tz with tm.assert_produces_warning(FutureWarning): # subclass-specific kwargs to pd.Index result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is not None assert result.tz == exp.tz
def test_get_loc_outside_tolerance_raises(self, method): index = Index([0, 1, 2]) with pytest.raises(KeyError, match="1.1"): with tm.assert_produces_warning(FutureWarning, match="deprecated"): index.get_loc(1.1, method, tolerance=0.05)
def test_deprecated_kwargs(self, read_ext): with tm.assert_produces_warning(FutureWarning, raise_on_extra_warnings=False): pd.read_excel("test1" + read_ext, "Sheet1", 0) pd.read_excel("test1" + read_ext)
def test_get_loc_bad_tolerance_raises(self): index = Index([0, 1, 2]) with pytest.raises(ValueError, match="must be numeric"): with tm.assert_produces_warning(FutureWarning, match="deprecated"): index.get_loc(1.1, "nearest", tolerance="invalid")
def test_copy_dtype_deprecated(index): # GH#35853 with tm.assert_produces_warning(FutureWarning): index.copy(dtype=object)
def test_fillna_limit_pad(self, data_missing): with tm.assert_produces_warning(PerformanceWarning): super().test_fillna_limit_pad(data_missing)
def test_ravel_deprecation(index): # GH#19956 ravel returning ndarray is deprecated with tm.assert_produces_warning(FutureWarning): index.ravel()
def test_searchsorted(self, data_for_sorting, as_series): with tm.assert_produces_warning(PerformanceWarning): super().test_searchsorted(data_for_sorting, as_series)
def test_is_type_compatible_deprecation(index): # GH#42113 msg = "is_type_compatible is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): index.is_type_compatible(index.inferred_type)
def test_call(self): with tm.assert_produces_warning(FutureWarning): # GH#34171 DateOffset.__call__ is deprecated assert self.offset2(self.d) == datetime(2008, 1, 3)
def test_is_mixed_deprecated(index): # GH#32922 msg = "Index.is_mixed is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): index.is_mixed()
def test_copy_delim_warning(self, df): with tm.assert_produces_warning(): df.to_clipboard(excel=False, sep="\t")
def test_get_loc_float_index_nan_with_method(self, vals, method): # GH#39382 idx = Index(vals) with pytest.raises(KeyError, match="nan"): with tm.assert_produces_warning(FutureWarning, match="deprecated"): idx.get_loc(np.nan, method=method)
def test_getitem_single_list_of_columns(self, df): # per GH 23566 this should raise a FutureWarning with tm.assert_produces_warning(FutureWarning): df.groupby("A")["C", "D"].mean()
def test_get_slice_bounds_outside(self, kind, side, expected, bound): index = Index(range(6)) with tm.assert_produces_warning(FutureWarning, match="'kind' argument"): result = index.get_slice_bound(bound, kind=kind, side=side) assert result == expected
def test_index_from_listlike_with_dtype(self, data): msg = "passing a SparseArray to pd.Index" with tm.assert_produces_warning(FutureWarning, match=msg): super().test_index_from_listlike_with_dtype(data)
def test_grouped_box_layout(self): df = self.hist_df msg = "Layout of 1x1 must be larger than required size 2" with pytest.raises(ValueError, match=msg): df.boxplot(column=["weight", "height"], by=df.gender, layout=(1, 1)) msg = "The 'layout' keyword is not supported when 'by' is None" with pytest.raises(ValueError, match=msg): df.boxplot( column=["height", "weight", "category"], layout=(2, 1), return_type="dict", ) msg = "At least one dimension of layout must be positive" with pytest.raises(ValueError, match=msg): df.boxplot(column=["weight", "height"], by=df.gender, layout=(-1, -1)) # _check_plot_works adds an ax so catch warning. see GH #13188 with tm.assert_produces_warning(UserWarning): box = _check_plot_works(df.groupby("gender").boxplot, column="height", return_type="dict") self._check_axes_shape(self.plt.gcf().axes, axes_num=2, layout=(1, 2)) with tm.assert_produces_warning(UserWarning): box = _check_plot_works(df.groupby("category").boxplot, column="height", return_type="dict") self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2)) # GH 6769 with tm.assert_produces_warning(UserWarning): box = _check_plot_works(df.groupby("classroom").boxplot, column="height", return_type="dict") self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) # GH 5897 axes = df.boxplot(column=["height", "weight", "category"], by="gender", return_type="axes") self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) for ax in [axes["height"]]: self._check_visible(ax.get_xticklabels(), visible=False) self._check_visible([ax.xaxis.get_label()], visible=False) for ax in [axes["weight"], axes["category"]]: self._check_visible(ax.get_xticklabels()) self._check_visible([ax.xaxis.get_label()]) box = df.groupby("classroom").boxplot( column=["height", "weight", "category"], return_type="dict") self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) with tm.assert_produces_warning(UserWarning): box = _check_plot_works( df.groupby("category").boxplot, column="height", layout=(3, 2), return_type="dict", ) self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2)) with tm.assert_produces_warning(UserWarning): box = _check_plot_works( df.groupby("category").boxplot, column="height", layout=(3, -1), return_type="dict", ) self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2)) box = df.boxplot(column=["height", "weight", "category"], by="gender", layout=(4, 1)) self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(4, 1)) box = df.boxplot(column=["height", "weight", "category"], by="gender", layout=(-1, 1)) self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(3, 1)) box = df.groupby("classroom").boxplot( column=["height", "weight", "category"], layout=(1, 4), return_type="dict") self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 4)) box = df.groupby("classroom").boxplot( # noqa column=["height", "weight", "category"], layout=(1, -1), return_type="dict") self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 3))