def test_rank(self): tm._skip_if_no_scipy() from scipy.stats import rankdata self.frame['A'][::2] = np.nan self.frame['B'][::3] = np.nan self.frame['C'][::4] = np.nan self.frame['D'][::5] = np.nan ranks0 = self.frame.rank() ranks1 = self.frame.rank(1) mask = np.isnan(self.frame.values) fvals = self.frame.fillna(np.inf).values exp0 = np.apply_along_axis(rankdata, 0, fvals) exp0[mask] = np.nan exp1 = np.apply_along_axis(rankdata, 1, fvals) exp1[mask] = np.nan tm.assert_almost_equal(ranks0.values, exp0) tm.assert_almost_equal(ranks1.values, exp1) # integers df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4))) result = df.rank() exp = df.astype(float).rank() tm.assert_frame_equal(result, exp) result = df.rank(1) exp = df.astype(float).rank(1) tm.assert_frame_equal(result, exp)
def test_interp_datetime64(self): tm._skip_if_no_scipy() df = Series([1, np.nan, 3], index=date_range('1/1/2000', periods=3)) result = df.interpolate(method='nearest') expected = Series([1., 1., 3.], index=date_range('1/1/2000', periods=3)) assert_series_equal(result, expected)
def test_interp_alt_scipy(self): tm._skip_if_no_scipy() df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], 'C': [1, 2, 3, 5, 8, 13, 21]}) result = df.interpolate(method='barycentric') expected = df.copy() expected.loc[2, 'A'] = 3 expected.loc[5, 'A'] = 6 assert_frame_equal(result, expected) result = df.interpolate(method='barycentric', downcast='infer') assert_frame_equal(result, expected.astype(np.int64)) result = df.interpolate(method='krogh') expectedk = df.copy() expectedk['A'] = expected['A'] assert_frame_equal(result, expectedk) _skip_if_no_pchip() import scipy result = df.interpolate(method='pchip') expected.loc[2, 'A'] = 3 if LooseVersion(scipy.__version__) >= '0.17.0': expected.loc[5, 'A'] = 6.0 else: expected.loc[5, 'A'] = 6.125 assert_frame_equal(result, expected)
def test_no_order(self): tm._skip_if_no_scipy() s = Series([0, 1, np.nan, 3]) with tm.assertRaises(ValueError): s.interpolate(method='polynomial') with tm.assertRaises(ValueError): s.interpolate(method='spline')
def test_scatter_plot_legacy(self): tm._skip_if_no_scipy() df = DataFrame(randn(100, 2)) def scat(**kwds): return plotting.scatter_matrix(df, **kwds) with tm.assert_produces_warning(UserWarning): _check_plot_works(scat) with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, marker='+') with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, vmin=0) if _ok_for_gaussian_kde('kde'): with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, diagonal='kde') if _ok_for_gaussian_kde('density'): with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, diagonal='density') with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, diagonal='hist') with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, range_padding=.1) def scat2(x, y, by=None, ax=None, figsize=None): return plotting.scatter_plot(df, x, y, by, ax, figsize=None) _check_plot_works(scat2, x=0, y=1) grouper = Series(np.repeat([1, 2, 3, 4, 5], 20), df.index) with tm.assert_produces_warning(UserWarning): _check_plot_works(scat2, x=0, y=1, by=grouper)
def test_interp_scipy_basic(self): tm._skip_if_no_scipy() s = Series([1, 3, np.nan, 12, np.nan, 25]) # slinear expected = Series([1., 3., 7.5, 12., 18.5, 25.]) result = s.interpolate(method='slinear') assert_series_equal(result, expected) result = s.interpolate(method='slinear', donwcast='infer') assert_series_equal(result, expected) # nearest expected = Series([1, 3, 3, 12, 12, 25]) result = s.interpolate(method='nearest') assert_series_equal(result, expected.astype('float')) result = s.interpolate(method='nearest', downcast='infer') assert_series_equal(result, expected) # zero expected = Series([1, 3, 3, 12, 12, 25]) result = s.interpolate(method='zero') assert_series_equal(result, expected.astype('float')) result = s.interpolate(method='zero', downcast='infer') assert_series_equal(result, expected) # quadratic expected = Series([1, 3., 6.769231, 12., 18.230769, 25.]) result = s.interpolate(method='quadratic') assert_series_equal(result, expected) result = s.interpolate(method='quadratic', downcast='infer') assert_series_equal(result, expected) # cubic expected = Series([1., 3., 6.8, 12., 18.2, 25.]) result = s.interpolate(method='cubic') assert_series_equal(result, expected)
def test_interp_alt_scipy(self): tm._skip_if_no_scipy() df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], 'C': [1, 2, 3, 5, 8, 13, 21]}) result = df.interpolate(method='barycentric') expected = df.copy() expected['A'].iloc[2] = 3 expected['A'].iloc[5] = 6 assert_frame_equal(result, expected) result = df.interpolate(method='barycentric', downcast='infer') assert_frame_equal(result, expected.astype(np.int64)) result = df.interpolate(method='krogh') expectedk = df.copy() # expectedk['A'].iloc[2] = 3 # expectedk['A'].iloc[5] = 6 expectedk['A'] = expected['A'] assert_frame_equal(result, expectedk) _skip_if_no_pchip() result = df.interpolate(method='pchip') expected['A'].iloc[2] = 3 expected['A'].iloc[5] = 6.125 assert_frame_equal(result, expected)
def test_scatter_matrix_axis(self): tm._skip_if_no_scipy() scatter_matrix = plotting.scatter_matrix with tm.RNGContext(42): df = DataFrame(randn(100, 3)) # we are plotting multiples on a sub-plot with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(scatter_matrix, filterwarnings='always', frame=df, range_padding=.1) axes0_labels = axes[0][0].yaxis.get_majorticklabels() # GH 5662 expected = ['-2', '-1', '0', '1', '2'] self._check_text_labels(axes0_labels, expected) self._check_ticks_props( axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) df[0] = ((df[0] - 2) / 3) # we are plotting multiples on a sub-plot with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(scatter_matrix, filterwarnings='always', frame=df, range_padding=.1) axes0_labels = axes[0][0].yaxis.get_majorticklabels() expected = ['-1.2', '-1.0', '-0.8', '-0.6', '-0.4', '-0.2', '0.0'] self._check_text_labels(axes0_labels, expected) self._check_ticks_props( axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0)
def test_is_scipy_sparse(): tm._skip_if_no_scipy() from scipy.sparse import bsr_matrix assert com.is_scipy_sparse(bsr_matrix([1, 2, 3])) assert not com.is_scipy_sparse(pd.SparseArray([1, 2, 3])) assert not com.is_scipy_sparse(pd.SparseSeries([1, 2, 3]))
def test_from_coo_long_repr(self): # GH 13114 # test it doesn't raise error. Formatting is tested in test_format tm._skip_if_no_scipy() import scipy.sparse sparse = SparseSeries.from_coo(scipy.sparse.rand(350, 18)) repr(sparse)
def test_spline_interpolation(self): tm._skip_if_no_scipy() s = Series(np.arange(10) ** 2) s[np.random.randint(0, 9, 3)] = np.nan result1 = s.interpolate(method='spline', order=1) expected1 = s.interpolate(method='spline', order=1) assert_series_equal(result1, expected1)
def test_nan_interpolate(self): s = Series([0, 1, np.nan, 3]) result = s.interpolate() expected = Series([0., 1., 2., 3.]) assert_series_equal(result, expected) tm._skip_if_no_scipy() result = s.interpolate(method='polynomial', order=1) assert_series_equal(result, expected)
def test_kde_missing_vals(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() s = Series(np.random.uniform(size=50)) s[0] = np.nan axes = _check_plot_works(s.plot.kde) # gh-14821: check if the values have any missing values assert any(~np.isnan(axes.lines[0].get_xdata()))
def test_kde_missing_vals(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() s = Series(np.random.uniform(size=50)) s[0] = np.nan axes = _check_plot_works(s.plot.kde) # check if the values have any missing values # GH14821 self.assertTrue(any(~np.isnan(axes.lines[0].get_xdata())), msg="Missing Values not dropped")
def test_kde_missing_vals(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() s = Series(np.random.uniform(size=50)) s[0] = np.nan axes = _check_plot_works(s.plot.kde) # check if the values have any missing values # GH14821 self.assertTrue(any(~np.isnan(axes.lines[0].get_xdata())), msg='Missing Values not dropped')
def test_secondary_kde(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() import matplotlib.pyplot as plt ser = Series(np.random.randn(10)) ax = ser.plot(secondary_y=True, kind='density').right_ax fig = ax.get_figure() axes = fig.get_axes() self.assertEqual(axes[1].get_yaxis().get_ticks_position(), 'right')
def test_kde_kwargs(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() from numpy import linspace _check_plot_works(self.ts.plot.kde, bw_method=0.5, ind=linspace(-100, 100, 20)) _check_plot_works(self.ts.plot.density, bw_method=0.5, ind=linspace(-100, 100, 20)) ax = self.ts.plot.kde(logy=True, bw_method=0.5, ind=linspace(-100, 100, 20)) self._check_ax_scales(ax, yaxis="log") self._check_text_labels(ax.yaxis.get_label(), "Density")
def test_spline_error(self): tm._skip_if_no_scipy() s = pd.Series(np.arange(10) ** 2) s[np.random.randint(0, 9, 3)] = np.nan with tm.assertRaises(ValueError): s.interpolate(method='spline') with tm.assertRaises(ValueError): s.interpolate(method='spline', order=0)
def test_interp_all_good(self): # scipy tm._skip_if_no_scipy() s = Series([1, 2, 3]) result = s.interpolate(method='polynomial', order=1) assert_series_equal(result, s) # non-scipy result = s.interpolate() assert_series_equal(result, s)
def test_scatter_plot_legacy(self): tm._skip_if_no_scipy() df = pd.DataFrame(randn(100, 2)) with tm.assert_produces_warning(FutureWarning): plotting.scatter_matrix(df) with tm.assert_produces_warning(FutureWarning): pd.scatter_matrix(df)
def test_interp_leading_nans(self): df = DataFrame({"A": [np.nan, np.nan, .5, .25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]}) result = df.interpolate() expected = df.copy() expected['B'].loc[3] = -3.75 assert_frame_equal(result, expected) tm._skip_if_no_scipy() result = df.interpolate(method='polynomial', order=1) assert_frame_equal(result, expected)
def test_interp_regression(self): tm._skip_if_no_scipy() _skip_if_no_pchip() ser = Series(np.sort(np.random.uniform(size=100))) # interpolate at new_index new_index = ser.index + Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75]) interp_s = ser.reindex(new_index).interpolate(method='pchip') # does not blow up, GH5977 interp_s[49:51]
def test_secondary_kde(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() ser = Series(np.random.randn(10)) fig, ax = self.plt.subplots() ax = ser.plot(secondary_y=True, kind='density', ax=ax) assert hasattr(ax, 'left_ax') assert not hasattr(ax, 'right_ax') axes = fig.get_axes() assert axes[1].get_yaxis().get_ticks_position() == 'right'
def test_cmov_window_frame(self): tm._skip_if_no_scipy() try: from scikits.timeseries.lib import cmov_window except ImportError: raise nose.SkipTest("no scikits.timeseries") # DataFrame vals = np.random.randn(10, 2) xp = cmov_window(vals, 5, 'boxcar') rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True) assert_frame_equal(DataFrame(xp), rs)
def test_kde_kwargs(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() from numpy import linspace _check_plot_works(self.ts.plot.kde, bw_method=.5, ind=linspace(-100, 100, 20)) _check_plot_works(self.ts.plot.density, bw_method=.5, ind=linspace(-100, 100, 20)) ax = self.ts.plot.kde(logy=True, bw_method=.5, ind=linspace(-100, 100, 20)) self._check_ax_scales(ax, yaxis='log') self._check_text_labels(ax.yaxis.get_label(), 'Density')
def test_secondary_kde(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() import matplotlib.pyplot as plt # noqa ser = Series(np.random.randn(10)) ax = ser.plot(secondary_y=True, kind='density') assert hasattr(ax, 'left_ax') assert not hasattr(ax, 'right_ax') fig = ax.get_figure() axes = fig.get_axes() assert axes[1].get_yaxis().get_ticks_position() == 'right'
def test_kde_missing_vals(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() if not self.mpl_ge_1_5_0: pytest.skip("mpl is not supported") s = Series(np.random.uniform(size=50)) s[0] = np.nan axes = _check_plot_works(s.plot.kde) # gh-14821: check if the values have any missing values assert any(~np.isnan(axes.lines[0].get_xdata()))
def test_interp_multiIndex(self): idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')]) s = Series([1, 2, np.nan], index=idx) expected = s.copy() expected.loc[2] = 2 result = s.interpolate() assert_series_equal(result, expected) tm._skip_if_no_scipy() with tm.assertRaises(ValueError): s.interpolate(method='polynomial', order=1)
def test_hist_kde_color(self): ax = self.ts.plot.hist(logy=True, bins=10, color='b') self._check_ax_scales(ax, yaxis='log') self.assertEqual(len(ax.patches), 10) self._check_colors(ax.patches, facecolors=['b'] * 10) tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() ax = self.ts.plot.kde(logy=True, color='r') self._check_ax_scales(ax, yaxis='log') lines = ax.get_lines() self.assertEqual(len(lines), 1) self._check_colors(lines, ['r'])
def test_interpolate_corners(self): s = Series([np.nan, np.nan]) assert_series_equal(s.interpolate(), s) s = Series([]).interpolate() assert_series_equal(s.interpolate(), s) tm._skip_if_no_scipy() s = Series([np.nan, np.nan]) assert_series_equal(s.interpolate(method='polynomial', order=1), s) s = Series([]).interpolate() assert_series_equal(s.interpolate(method='polynomial', order=1), s)
def test_secondary_kde(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() import matplotlib.pyplot as plt # noqa ser = Series(np.random.randn(10)) ax = ser.plot(secondary_y=True, kind="density") self.assertTrue(hasattr(ax, "left_ax")) self.assertFalse(hasattr(ax, "right_ax")) fig = ax.get_figure() axes = fig.get_axes() self.assertEqual(axes[1].get_yaxis().get_ticks_position(), "right")
def test_interpolate_from_derivatives(self): tm._skip_if_no_scipy() ser = Series([10, 11, 12, 13]) expected = Series( [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0])) # interpolate at new_index new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])) interp_s = ser.reindex(new_index).interpolate( method='from_derivatives') assert_series_equal(interp_s[1:3], expected)
def test_interpolate_from_derivatives(self): tm._skip_if_no_scipy() ser = Series([10, 11, 12, 13]) expected = Series([11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0])) # interpolate at new_index new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])) interp_s = ser.reindex(new_index).interpolate( method='from_derivatives') assert_series_equal(interp_s[1:3], expected)
def test_cmov_window_regular(self): tm._skip_if_no_scipy() try: from scikits.timeseries.lib import cmov_window except ImportError: raise nose.SkipTest("no scikits.timeseries") win_types = ["triang", "blackman", "hamming", "bartlett", "bohman", "blackmanharris", "nuttall", "barthann"] for wt in win_types: vals = np.random.randn(10) xp = cmov_window(vals, 5, wt) rs = mom.rolling_window(Series(vals), 5, wt, center=True) assert_series_equal(Series(xp), rs)
def test_interp_various(self): tm._skip_if_no_scipy() df = DataFrame({ 'A': [1, 2, np.nan, 4, 5, np.nan, 7], 'C': [1, 2, 3, 5, 8, 13, 21] }) df = df.set_index('C') expected = df.copy() result = df.interpolate(method='polynomial', order=1) expected.A.loc[3] = 2.66666667 expected.A.loc[13] = 5.76923076 assert_frame_equal(result, expected) result = df.interpolate(method='cubic') # GH #15662. # new cubic and quadratic interpolation algorithms from scipy 0.19.0. # previously `splmake` was used. See scipy/scipy#6710 if _is_scipy_ge_0190: expected.A.loc[3] = 2.81547781 expected.A.loc[13] = 5.52964175 else: expected.A.loc[3] = 2.81621174 expected.A.loc[13] = 5.64146581 assert_frame_equal(result, expected) result = df.interpolate(method='nearest') expected.A.loc[3] = 2 expected.A.loc[13] = 5 assert_frame_equal(result, expected, check_dtype=False) result = df.interpolate(method='quadratic') if _is_scipy_ge_0190: expected.A.loc[3] = 2.82150771 expected.A.loc[13] = 6.12648668 else: expected.A.loc[3] = 2.82533638 expected.A.loc[13] = 6.02817974 assert_frame_equal(result, expected) result = df.interpolate(method='slinear') expected.A.loc[3] = 2.66666667 expected.A.loc[13] = 5.76923077 assert_frame_equal(result, expected) result = df.interpolate(method='zero') expected.A.loc[3] = 2. expected.A.loc[13] = 5 assert_frame_equal(result, expected, check_dtype=False)
def test_rank(): tm._skip_if_no_scipy() from scipy.stats import rankdata def _check(arr): mask = ~np.isfinite(arr) arr = arr.copy() result = _algos.rank_1d_float64(arr) arr[mask] = np.inf exp = rankdata(arr) exp[mask] = nan assert_almost_equal(result, exp) _check(np.array([nan, nan, 5., 5., 5., nan, 1, 2, 3, nan])) _check(np.array([4., nan, 5., 5., 5., nan, 1, 2, 4., nan]))
def test_hist_kde_color(self): _, ax = self.plt.subplots() ax = self.ts.plot.hist(logy=True, bins=10, color='b', ax=ax) self._check_ax_scales(ax, yaxis='log') assert len(ax.patches) == 10 self._check_colors(ax.patches, facecolors=['b'] * 10) tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() _, ax = self.plt.subplots() ax = self.ts.plot.kde(logy=True, color='r', ax=ax) self._check_ax_scales(ax, yaxis='log') lines = ax.get_lines() assert len(lines) == 1 self._check_colors(lines, ['r'])
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): # GH 4343 tm._skip_if_no_scipy() # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results arr = np.eye(2, dtype=dtype) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = pd.SparseDataFrame(spm, index=index, columns=columns, default_fill_value=fill_value) # Expected result construction is kind of tricky for all # dtype-fill_value combinations; easiest to cast to something generic # and except later on rarr = arr.astype(object) rarr[arr == 0] = np.nan expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( fill_value if fill_value is not None else np.nan) # Assert frame is as expected sdf_obj = sdf.astype(object) tm.assert_sp_frame_equal(sdf_obj, expected) tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok())) # Ensure dtype is preserved if possible was_upcast = ((fill_value is None or is_float(fill_value)) and not is_object_dtype(dtype) and not is_float_dtype(dtype)) res_dtype = (bool if is_bool_dtype(dtype) else float if was_upcast else dtype) tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) tm.assert_equal(sdf.to_coo().dtype, res_dtype) # However, adding a str column results in an upcast to object sdf['strings'] = np.arange(len(sdf)).astype(str) tm.assert_equal(sdf.to_coo().dtype, np.object_)
def test_kde_kwargs(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() if not self.mpl_ge_1_5_0: pytest.skip("mpl is not supported") from numpy import linspace _check_plot_works(self.ts.plot.kde, bw_method=.5, ind=linspace(-100, 100, 20)) _check_plot_works(self.ts.plot.density, bw_method=.5, ind=linspace(-100, 100, 20)) _, ax = self.plt.subplots() ax = self.ts.plot.kde(logy=True, bw_method=.5, ind=linspace(-100, 100, 20), ax=ax) self._check_ax_scales(ax, yaxis='log') self._check_text_labels(ax.yaxis.get_label(), 'Density')
def test_cmov_window_na_min_periods(self): tm._skip_if_no_scipy() try: from scikits.timeseries.lib import cmov_window except ImportError: raise nose.SkipTest("no scikits.timeseries") # min_periods vals = Series(np.random.randn(10)) vals[4] = np.nan vals[8] = np.nan xp = mom.rolling_mean(vals, 5, min_periods=4, center=True) rs = mom.rolling_window(vals, 5, 'boxcar', min_periods=4, center=True) assert_series_equal(xp, rs)
def test_cmov_window(self): tm._skip_if_no_scipy() try: from scikits.timeseries.lib import cmov_window except ImportError: raise nose.SkipTest("no scikits.timeseries") vals = np.random.randn(10) xp = cmov_window(vals, 5, 'boxcar') rs = mom.rolling_window(vals, 5, 'boxcar', center=True) assert_almost_equal(xp.compressed(), rs[2:-2]) assert_almost_equal(xp.mask, np.isnan(rs)) xp = Series(rs) rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True) assert_series_equal(xp, rs)
def test_cmov_window_regular(self): tm._skip_if_no_scipy() try: from scikits.timeseries.lib import cmov_window except ImportError: raise nose.SkipTest("no scikits.timeseries") win_types = [ 'triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann' ] for wt in win_types: vals = np.random.randn(10) xp = cmov_window(vals, 5, wt) rs = mom.rolling_window(Series(vals), 5, wt, center=True) assert_series_equal(Series(xp), rs)
def test_scatter_matrix_axis(self): tm._skip_if_no_scipy() scatter_matrix = plotting.scatter_matrix with tm.RNGContext(42): df = DataFrame(randn(100, 3)) # we are plotting multiples on a sub-plot with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(scatter_matrix, filterwarnings='always', frame=df, range_padding=.1) axes0_labels = axes[0][0].yaxis.get_majorticklabels() # GH 5662 if self.mpl_ge_2_0_0: expected = ['-2', '0', '2'] else: expected = ['-2', '-1', '0', '1', '2'] self._check_text_labels(axes0_labels, expected) self._check_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) df[0] = ((df[0] - 2) / 3) # we are plotting multiples on a sub-plot with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(scatter_matrix, filterwarnings='always', frame=df, range_padding=.1) axes0_labels = axes[0][0].yaxis.get_majorticklabels() if self.mpl_ge_2_0_0: expected = ['-1.0', '-0.5', '0.0'] else: expected = ['-1.2', '-1.0', '-0.8', '-0.6', '-0.4', '-0.2', '0.0'] self._check_text_labels(axes0_labels, expected) self._check_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0)
def test_interp_various(self): tm._skip_if_no_scipy() df = DataFrame({ 'A': [1, 2, np.nan, 4, 5, np.nan, 7], 'C': [1, 2, 3, 5, 8, 13, 21] }) df = df.set_index('C') expected = df.copy() result = df.interpolate(method='polynomial', order=1) expected.A.loc[3] = 2.66666667 expected.A.loc[13] = 5.76923076 assert_frame_equal(result, expected) result = df.interpolate(method='cubic') expected.A.loc[3] = 2.81621174 expected.A.loc[13] = 5.64146581 assert_frame_equal(result, expected) result = df.interpolate(method='nearest') expected.A.loc[3] = 2 expected.A.loc[13] = 5 assert_frame_equal(result, expected, check_dtype=False) result = df.interpolate(method='quadratic') expected.A.loc[3] = 2.82533638 expected.A.loc[13] = 6.02817974 assert_frame_equal(result, expected) result = df.interpolate(method='slinear') expected.A.loc[3] = 2.66666667 expected.A.loc[13] = 5.76923077 assert_frame_equal(result, expected) result = df.interpolate(method='zero') expected.A.loc[3] = 2. expected.A.loc[13] = 5 assert_frame_equal(result, expected, check_dtype=False) result = df.interpolate(method='quadratic') expected.A.loc[3] = 2.82533638 expected.A.loc[13] = 6.02817974 assert_frame_equal(result, expected)
def test_hist_kde(self): ax = self.ts.plot.hist(logy=True) self._check_ax_scales(ax, yaxis='log') xlabels = ax.get_xticklabels() # ticks are values, thus ticklabels are blank self._check_text_labels(xlabels, [''] * len(xlabels)) ylabels = ax.get_yticklabels() self._check_text_labels(ylabels, [''] * len(ylabels)) tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() _check_plot_works(self.ts.plot.kde) _check_plot_works(self.ts.plot.density) ax = self.ts.plot.kde(logy=True) self._check_ax_scales(ax, yaxis='log') xlabels = ax.get_xticklabels() self._check_text_labels(xlabels, [''] * len(xlabels)) ylabels = ax.get_yticklabels() self._check_text_labels(ylabels, [''] * len(ylabels))
def test_interp_scipy_basic(self): tm._skip_if_no_scipy() s = Series([1, 3, np.nan, 12, np.nan, 25]) # slinear expected = Series([1., 3., 7.5, 12., 18.5, 25.]) result = s.interpolate(method='slinear') assert_series_equal(result, expected) result = s.interpolate(method='slinear', downcast='infer') assert_series_equal(result, expected) # nearest expected = Series([1, 3, 3, 12, 12, 25]) result = s.interpolate(method='nearest') assert_series_equal(result, expected.astype('float')) result = s.interpolate(method='nearest', downcast='infer') assert_series_equal(result, expected) # zero expected = Series([1, 3, 3, 12, 12, 25]) result = s.interpolate(method='zero') assert_series_equal(result, expected.astype('float')) result = s.interpolate(method='zero', downcast='infer') assert_series_equal(result, expected) # quadratic # GH #15662. # new cubic and quadratic interpolation algorithms from scipy 0.19.0. # previously `splmake` was used. See scipy/scipy#6710 if _is_scipy_ge_0190: expected = Series([1, 3., 6.823529, 12., 18.058824, 25.]) else: expected = Series([1, 3., 6.769231, 12., 18.230769, 25.]) result = s.interpolate(method='quadratic') assert_series_equal(result, expected) result = s.interpolate(method='quadratic', downcast='infer') assert_series_equal(result, expected) # cubic expected = Series([1., 3., 6.8, 12., 18.2, 25.]) result = s.interpolate(method='cubic') assert_series_equal(result, expected)
def test_scatter_matrix_axis(self): tm._skip_if_no_scipy() scatter_matrix = plotting.scatter_matrix with tm.RNGContext(42): df = DataFrame(randn(100, 3)) axes = _check_plot_works(scatter_matrix, filterwarnings='always', frame=df, range_padding=.1) axes0_labels = axes[0][0].yaxis.get_majorticklabels() # GH 5662 expected = ['-2', '-1', '0', '1', '2'] self._check_text_labels(axes0_labels, expected) self._check_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) df[0] = ((df[0] - 2) / 3) axes = _check_plot_works(scatter_matrix, filterwarnings='always', frame=df, range_padding=.1) axes0_labels = axes[0][0].yaxis.get_majorticklabels() expected = ['-1.2', '-1.0', '-0.8', '-0.6', '-0.4', '-0.2', '0.0'] self._check_text_labels(axes0_labels, expected) self._check_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0)
def test_interp_rowwise(self): df = DataFrame({0: [1, 2, np.nan, 4], 1: [2, 3, 4, np.nan], 2: [np.nan, 4, 5, 6], 3: [4, np.nan, 6, 7], 4: [1, 2, 3, 4]}) result = df.interpolate(axis=1) expected = df.copy() expected.loc[3, 1] = 5 expected.loc[0, 2] = 3 expected.loc[1, 3] = 3 expected[4] = expected[4].astype(np.float64) assert_frame_equal(result, expected) # scipy route tm._skip_if_no_scipy() result = df.interpolate(axis=1, method='values') assert_frame_equal(result, expected) result = df.interpolate(axis=0) expected = df.interpolate() assert_frame_equal(result, expected)
def test_scatter_plot_legacy(self): tm._skip_if_no_scipy() df = DataFrame(randn(100, 2)) def scat(**kwds): return plotting.scatter_matrix(df, **kwds) with tm.assert_produces_warning(UserWarning): _check_plot_works(scat) with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, marker='+') with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, vmin=0) if _ok_for_gaussian_kde('kde'): with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, diagonal='kde') if _ok_for_gaussian_kde('density'): with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, diagonal='density') with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, diagonal='hist') with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, range_padding=.1) with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, color='rgb') with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, c='rgb') with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, facecolor='rgb') def scat2(x, y, by=None, ax=None, figsize=None): return plotting._core.scatter_plot(df, x, y, by, ax, figsize=None) _check_plot_works(scat2, x=0, y=1) grouper = Series(np.repeat([1, 2, 3, 4, 5], 20), df.index) with tm.assert_produces_warning(UserWarning): _check_plot_works(scat2, x=0, y=1, by=grouper)