def test_kurt(self): from scipy.stats import kurtosis string_series = tm.makeStringSeries().rename('series') alt = lambda x: kurtosis(x, bias=False) self._check_stat_op('kurt', alt, string_series) index = pd.MultiIndex( levels=[['bar'], ['one', 'two', 'three'], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]] ) s = Series(np.random.randn(6), index=index) tm.assert_almost_equal(s.kurt(), s.kurt(level=0)['bar']) # test corner cases, kurt() returns NaN unless there's at least 4 # values min_N = 4 for i in range(1, min_N + 1): s = Series(np.ones(i)) df = DataFrame(np.ones((i, i))) if i < min_N: assert np.isnan(s.kurt()) assert np.isnan(df.kurt()).all() else: assert 0 == s.kurt() assert (df.kurt() == 0).all()
def test_var_std(self): string_series = tm.makeStringSeries().rename('series') datetime_series = tm.makeTimeSeries().rename('ts') alt = lambda x: np.std(x, ddof=1) self._check_stat_op('std', alt, string_series) alt = lambda x: np.var(x, ddof=1) self._check_stat_op('var', alt, string_series) result = datetime_series.std(ddof=4) expected = np.std(datetime_series.values, ddof=4) tm.assert_almost_equal(result, expected) result = datetime_series.var(ddof=4) expected = np.var(datetime_series.values, ddof=4) tm.assert_almost_equal(result, expected) # 1 - element series with ddof=1 s = datetime_series.iloc[[0]] result = s.var(ddof=1) assert pd.isna(result) result = s.std(ddof=1) assert pd.isna(result)
def test_isnull(self): self.assertFalse(isnull(1.)) self.assertTrue(isnull(None)) self.assertTrue(isnull(np.NaN)) self.assertTrue(float('nan')) self.assertFalse(isnull(np.inf)) self.assertFalse(isnull(-np.inf)) # series for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries()]: assert isinstance(isnull(s), Series) # frame for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame()]: result = isnull(df) expected = df.apply(isnull) tm.assert_frame_equal(result, expected) # panel with catch_warnings(record=True): for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel())]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel_equal(result, expected) # panel 4d with catch_warnings(record=True): for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel4d_equal(result, expected)
def test_median(self): string_series = tm.makeStringSeries().rename('series') self._check_stat_op('median', np.median, string_series) # test with integers, test failure int_ts = Series(np.ones(10, dtype=int), index=lrange(10)) tm.assert_almost_equal(np.median(int_ts), int_ts.median())
def test_isna_isnull(self, isna_f): assert not isna_f(1.) assert isna_f(None) assert isna_f(np.NaN) assert float('nan') assert not isna_f(np.inf) assert not isna_f(-np.inf) # series for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries()]: assert isinstance(isna_f(s), Series) # frame for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame()]: result = isna_f(df) expected = df.apply(isna_f) tm.assert_frame_equal(result, expected) # panel with catch_warnings(record=True): simplefilter("ignore", FutureWarning) for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel())]: result = isna_f(p) expected = p.apply(isna_f) tm.assert_panel_equal(result, expected)
def test_idxmin(self): # test idxmin # _check_stat_op approach can not be used here because of isna check. string_series = tm.makeStringSeries().rename('series') # add some NaNs string_series[5:15] = np.NaN # skipna or no assert string_series[string_series.idxmin()] == string_series.min() assert pd.isna(string_series.idxmin(skipna=False)) # no NaNs nona = string_series.dropna() assert nona[nona.idxmin()] == nona.min() assert (nona.index.values.tolist().index(nona.idxmin()) == nona.values.argmin()) # all NaNs allna = string_series * np.nan assert pd.isna(allna.idxmin()) # datetime64[ns] s = Series(pd.date_range('20130102', periods=6)) result = s.idxmin() assert result == 0 s[0] = np.nan result = s.idxmin() assert result == 1
def string_series(): """ Fixture for Series of floats with Index of unique strings """ s = tm.makeStringSeries() s.name = 'series' return s
def test_len_keys(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.assertEquals(len(self.store), 4) self.assert_(set(self.store.keys()) == set(['a', 'b', 'c', 'd']))
def test_transpose(self): for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: # calls implementation in pandas/core/base.py tm.assert_series_equal(s.transpose(), s) for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.transpose().transpose(), df)
def setUp(self): super(TestSeries, self).setUp() self.d = {} s = tm.makeStringSeries() s.name = 'string' self.d['string'] = s s = tm.makeObjectSeries() s.name = 'object' self.d['object'] = s s = Series(tslib.iNaT, dtype='M8[ns]', index=range(5)) self.d['date'] = s data = { 'A': [0., 1., 2., 3., np.nan], 'B': [0, 1, 0, 1, 0], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': date_range('1/1/2009', periods=5), 'E': [0., 1, Timestamp('20100101'), 'foo', 2.], } self.d['float'] = Series(data['A']) self.d['int'] = Series(data['B']) self.d['mixed'] = Series(data['E'])
def test_notnull(): assert notnull(1.) assert not notnull(None) assert not notnull(np.NaN) with cf.option_context("mode.use_inf_as_null", False): assert notnull(np.inf) assert notnull(-np.inf) arr = np.array([1.5, np.inf, 3.5, -np.inf]) result = notnull(arr) assert result.all() with cf.option_context("mode.use_inf_as_null", True): assert not notnull(np.inf) assert not notnull(-np.inf) arr = np.array([1.5, np.inf, 3.5, -np.inf]) result = notnull(arr) assert result.sum() == 2 with cf.option_context("mode.use_inf_as_null", False): for s in [tm.makeFloatSeries(),tm.makeStringSeries(), tm.makeObjectSeries(),tm.makeTimeSeries(),tm.makePeriodSeries()]: assert(isinstance(isnull(s), Series))
def test_squeeze(self): # noop for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries() ]: tm.assert_series_equal(s.squeeze(),s) for df in [ tm.makeTimeDataFrame() ]: tm.assert_frame_equal(df.squeeze(),df) for p in [ tm.makePanel() ]: tm.assert_panel_equal(p.squeeze(),p) for p4d in [ tm.makePanel4D() ]: tm.assert_panel4d_equal(p4d.squeeze(),p4d) # squeezing df = tm.makeTimeDataFrame().reindex(columns=['A']) tm.assert_series_equal(df.squeeze(),df['A']) p = tm.makePanel().reindex(items=['ItemA']) tm.assert_frame_equal(p.squeeze(),p['ItemA']) p = tm.makePanel().reindex(items=['ItemA'],minor_axis=['A']) tm.assert_series_equal(p.squeeze(),p.ix['ItemA',:,'A']) p4d = tm.makePanel4D().reindex(labels=['label1']) tm.assert_panel_equal(p4d.squeeze(),p4d['label1']) p4d = tm.makePanel4D().reindex(labels=['label1'],items=['ItemA']) tm.assert_frame_equal(p4d.squeeze(),p4d.ix['label1','ItemA'])
def test_isnull(): assert not isnull(1.) assert isnull(None) assert isnull(np.NaN) assert not isnull(np.inf) assert not isnull(-np.inf) # series for s in [tm.makeFloatSeries(),tm.makeStringSeries(), tm.makeObjectSeries(),tm.makeTimeSeries(),tm.makePeriodSeries()]: assert(isinstance(isnull(s), Series)) # frame for df in [tm.makeTimeDataFrame(),tm.makePeriodFrame(),tm.makeMixedDataFrame()]: result = isnull(df) expected = df.apply(isnull) tm.assert_frame_equal(result, expected) # panel for p in [ tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) ]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel_equal(result, expected) # panel 4d for p in [ tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D()) ]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel4d_equal(result, expected)
def test_repr(self): repr(self.store) self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() repr(self.store)
def test_squeeze(self): # noop for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: tm.assert_series_equal(s.squeeze(), s) for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.squeeze(), df) # squeezing df = tm.makeTimeDataFrame().reindex(columns=['A']) tm.assert_series_equal(df.squeeze(), df['A']) # don't fail with 0 length dimensions GH11229 & GH8999 empty_series = Series([], name='five') empty_frame = DataFrame([empty_series]) with catch_warnings(record=True): simplefilter("ignore", FutureWarning) empty_panel = Panel({'six': empty_frame}) [tm.assert_series_equal(empty_series, higher_dim.squeeze()) for higher_dim in [empty_series, empty_frame, empty_panel]] # axis argument df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] assert df.shape == (1, 1) tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0]) assert df.squeeze() == df.iloc[0, 0] pytest.raises(ValueError, df.squeeze, axis=2) pytest.raises(ValueError, df.squeeze, axis='x') df = tm.makeTimeDataFrame(3) tm.assert_frame_equal(df.squeeze(axis=0), df)
def test_take(self): indices = [1, 5, -2, 6, 3, -1] for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: out = s.take(indices) expected = Series(data=s.values.take(indices), index=s.index.take(indices), dtype=s.dtype) tm.assert_series_equal(out, expected) for df in [tm.makeTimeDataFrame()]: out = df.take(indices) expected = DataFrame(data=df.values.take(indices, axis=0), index=df.index.take(indices), columns=df.columns) tm.assert_frame_equal(out, expected) indices = [-3, 2, 0, 1] with catch_warnings(record=True): simplefilter("ignore", FutureWarning) for p in [tm.makePanel()]: out = p.take(indices) expected = Panel(data=p.values.take(indices, axis=0), items=p.items.take(indices), major_axis=p.major_axis, minor_axis=p.minor_axis) tm.assert_panel_equal(out, expected)
def setUp(self): super(TestSeries, self).setUp() self.d = {} s = tm.makeStringSeries() s.name = "string" self.d["string"] = s s = tm.makeObjectSeries() s.name = "object" self.d["object"] = s s = Series(tslib.iNaT, dtype="M8[ns]", index=range(5)) self.d["date"] = s data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } self.d["float"] = Series(data["A"]) self.d["int"] = Series(data["B"]) self.d["mixed"] = Series(data["E"])
def test_keys(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.store['foo/bar'] = tm.makePanel() self.assertEquals(len(self.store), 5) self.assert_(set(self.store.keys()) == set(['/a', '/b', '/c', '/d', '/foo/bar']))
def setUp(self): self.ts = tm.makeTimeSeries() self.ts.name = 'ts' self.series = tm.makeStringSeries() self.series.name = 'series' self.iseries = tm.makePeriodSeries() self.iseries.name = 'iseries'
def setUp(self): self.ts = tm.makeTimeSeries() self.ts.name = "ts" self.series = tm.makeStringSeries() self.series.name = "series" self.iseries = tm.makePeriodSeries() self.iseries.name = "iseries"
def test_repr(self): repr(self.store) self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeStringSeries() self.store['c'] = tm.makeDataFrame() self.store['d'] = tm.makePanel() self.store['foo/bar'] = tm.makePanel() self.store.append('e', tm.makePanel()) repr(self.store) str(self.store)
def test_squeeze(self): # noop for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: tm.assert_series_equal(s.squeeze(), s) for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.squeeze(), df) with catch_warnings(record=True): for p in [tm.makePanel()]: tm.assert_panel_equal(p.squeeze(), p) with catch_warnings(record=True): for p4d in [tm.makePanel4D()]: tm.assert_panel4d_equal(p4d.squeeze(), p4d) # squeezing df = tm.makeTimeDataFrame().reindex(columns=['A']) tm.assert_series_equal(df.squeeze(), df['A']) with catch_warnings(record=True): p = tm.makePanel().reindex(items=['ItemA']) tm.assert_frame_equal(p.squeeze(), p['ItemA']) p = tm.makePanel().reindex(items=['ItemA'], minor_axis=['A']) tm.assert_series_equal(p.squeeze(), p.loc['ItemA', :, 'A']) with catch_warnings(record=True): p4d = tm.makePanel4D().reindex(labels=['label1']) tm.assert_panel_equal(p4d.squeeze(), p4d['label1']) with catch_warnings(record=True): p4d = tm.makePanel4D().reindex(labels=['label1'], items=['ItemA']) tm.assert_frame_equal(p4d.squeeze(), p4d.loc['label1', 'ItemA']) # don't fail with 0 length dimensions GH11229 & GH8999 empty_series = Series([], name='five') empty_frame = DataFrame([empty_series]) with catch_warnings(record=True): empty_panel = Panel({'six': empty_frame}) [tm.assert_series_equal(empty_series, higher_dim.squeeze()) for higher_dim in [empty_series, empty_frame, empty_panel]] # axis argument df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] assert df.shape == (1, 1) tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0]) assert df.squeeze() == df.iloc[0, 0] pytest.raises(ValueError, df.squeeze, axis=2) pytest.raises(ValueError, df.squeeze, axis='x') df = tm.makeTimeDataFrame(3) tm.assert_frame_equal(df.squeeze(axis=0), df)
def setUp(self): self.ts = tm.makeTimeSeries() self.ts.name = 'ts' self.series = tm.makeStringSeries() self.series.name = 'series' self.objSeries = tm.makeObjectSeries() self.objSeries.name = 'objects' self.empty = Series([], index=[])
def setUp(self): import matplotlib as mpl self.mpl_le_1_2_1 = str(mpl.__version__) <= LooseVersion('1.2.1') self.ts = tm.makeTimeSeries() self.ts.name = 'ts' self.series = tm.makeStringSeries() self.series.name = 'series' self.iseries = tm.makePeriodSeries() self.iseries.name = 'iseries'
def test_sparse_series(self): s = tm.makeStringSeries() s[3:5] = np.nan ss = s.to_sparse() self._check_roundtrip(ss, tm.assert_series_equal, check_series_type=True) ss2 = s.to_sparse(kind="integer") self._check_roundtrip(ss2, tm.assert_series_equal, check_series_type=True) ss3 = s.to_sparse(fill_value=0) self._check_roundtrip(ss3, tm.assert_series_equal, check_series_type=True)
def test_squeeze(self): # noop for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries() ]: tm.assert_series_equal(s.squeeze(), s) for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.squeeze(), df) with catch_warnings(record=True): simplefilter("ignore", FutureWarning) for p in [tm.makePanel()]: tm.assert_panel_equal(p.squeeze(), p) # squeezing df = tm.makeTimeDataFrame().reindex(columns=['A']) tm.assert_series_equal(df.squeeze(), df['A']) with catch_warnings(record=True): simplefilter("ignore", FutureWarning) p = tm.makePanel().reindex(items=['ItemA']) tm.assert_frame_equal(p.squeeze(), p['ItemA']) p = tm.makePanel().reindex(items=['ItemA'], minor_axis=['A']) tm.assert_series_equal(p.squeeze(), p.loc['ItemA', :, 'A']) # don't fail with 0 length dimensions GH11229 & GH8999 empty_series = Series([], name='five') empty_frame = DataFrame([empty_series]) with catch_warnings(record=True): simplefilter("ignore", FutureWarning) empty_panel = Panel({'six': empty_frame}) [ tm.assert_series_equal(empty_series, higher_dim.squeeze()) for higher_dim in [empty_series, empty_frame, empty_panel] ] # axis argument df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] assert df.shape == (1, 1) tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0]) assert df.squeeze() == df.iloc[0, 0] pytest.raises(ValueError, df.squeeze, axis=2) pytest.raises(ValueError, df.squeeze, axis='x') df = tm.makeTimeDataFrame(3) tm.assert_frame_equal(df.squeeze(axis=0), df)
def test_series(self): s = tm.makeStringSeries() self._check_roundtrip(s, tm.assert_series_equal) ts = tm.makeTimeSeries() self._check_roundtrip(ts, tm.assert_series_equal) ts2 = Series(ts.index, Index(ts.index, dtype=object)) self._check_roundtrip(ts2, tm.assert_series_equal) ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object)) self._check_roundtrip(ts3, tm.assert_series_equal)
def setUp(self): TestPlotBase.setUp(self) import matplotlib as mpl mpl.rcdefaults() self.ts = tm.makeTimeSeries() self.ts.name = 'ts' self.series = tm.makeStringSeries() self.series.name = 'series' self.iseries = tm.makePeriodSeries() self.iseries.name = 'iseries'
def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl mpl.rcdefaults() self.ts = tm.makeTimeSeries() self.ts.name = 'ts' self.series = tm.makeStringSeries() self.series.name = 'series' self.iseries = tm.makePeriodSeries() self.iseries.name = 'iseries'
def test_take(self): indices = [1, 5, -2, 6, 3, -1] for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: out = s.take(indices) expected = Series(data=s.values.take(indices), index=s.index.take(indices), dtype=s.dtype) tm.assert_series_equal(out, expected) for df in [tm.makeTimeDataFrame()]: out = df.take(indices) expected = DataFrame(data=df.values.take(indices, axis=0), index=df.index.take(indices), columns=df.columns) tm.assert_frame_equal(out, expected)
def test_sem(self): string_series = tm.makeStringSeries().rename('series') datetime_series = tm.makeTimeSeries().rename('ts') alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) self._check_stat_op('sem', alt, string_series) result = datetime_series.sem(ddof=4) expected = np.std(datetime_series.values, ddof=4) / np.sqrt(len(datetime_series.values)) tm.assert_almost_equal(result, expected) # 1 - element series with ddof=1 s = datetime_series.iloc[[0]] result = s.sem(ddof=1) assert pd.isna(result)
def test_transpose(self): msg = (r"transpose\(\) got multiple values for " r"keyword argument 'axes'") for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: # calls implementation in pandas/core/base.py tm.assert_series_equal(s.transpose(), s) for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.transpose().transpose(), df) with catch_warnings(record=True): for p in [tm.makePanel()]: tm.assert_panel_equal(p.transpose(2, 0, 1) .transpose(1, 2, 0), p) tm.assert_raises_regex(TypeError, msg, p.transpose, 2, 0, 1, axes=(2, 0, 1))
def test_isnull(): assert not isnull(1.) assert isnull(None) assert isnull(np.NaN) assert not isnull(np.inf) assert not isnull(-np.inf) for s in [tm.makeFloatSeries(),tm.makeStringSeries(), tm.makeObjectSeries(),tm.makeTimeSeries(),tm.makePeriodSeries()]: assert(isinstance(isnull(s), Series)) # call on DataFrame df = DataFrame(np.random.randn(10, 5)) df['foo'] = 'bar' result = isnull(df) expected = result.apply(isnull) tm.assert_frame_equal(result, expected)
def test_squeeze(self): # noop for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries() ]: tm.assert_series_equal(s.squeeze(), s) for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.squeeze(), df) # squeezing df = tm.makeTimeDataFrame().reindex(columns=['A']) tm.assert_series_equal(df.squeeze(), df['A']) # don't fail with 0 length dimensions GH11229 & GH8999 empty_series = Series([], name='five') empty_frame = DataFrame([empty_series]) with catch_warnings(record=True): simplefilter("ignore", FutureWarning) empty_panel = Panel({'six': empty_frame}) [ tm.assert_series_equal(empty_series, higher_dim.squeeze()) for higher_dim in [empty_series, empty_frame, empty_panel] ] # axis argument df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] assert df.shape == (1, 1) tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0]) assert df.squeeze() == df.iloc[0, 0] msg = ("No axis named 2 for object type <class" " 'pandas.core.frame.DataFrame'>") with pytest.raises(ValueError, match=msg): df.squeeze(axis=2) msg = ("No axis named x for object type <class" " 'pandas.core.frame.DataFrame'>") with pytest.raises(ValueError, match=msg): df.squeeze(axis='x') df = tm.makeTimeDataFrame(3) tm.assert_frame_equal(df.squeeze(axis=0), df)
def test_isnull(): assert not isnull(1.) assert isnull(None) assert isnull(np.NaN) assert not isnull(np.inf) assert not isnull(-np.inf) for s in [tm.makeFloatSeries(),tm.makeStringSeries(), tm.makeObjectSeries(),tm.makeTimeSeries(),tm.makePeriodSeries()]: assert(isinstance(isnull(s), np.ndarray)) # call on DataFrame df = DataFrame(np.random.randn(10, 5)) df['foo'] = 'bar' result = isnull(df) expected = result.apply(isnull) tm.assert_frame_equal(result, expected)
def test_isnull(self): self.assertFalse(isnull(1.)) self.assertTrue(isnull(None)) self.assertTrue(isnull(np.NaN)) self.assertTrue(float('nan')) self.assertFalse(isnull(np.inf)) self.assertFalse(isnull(-np.inf)) # series for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries() ]: self.assertIsInstance(isnull(s), Series) # frame for df in [ tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame() ]: result = isnull(df) expected = df.apply(isnull) tm.assert_frame_equal(result, expected) # panel with catch_warnings(record=True): for p in [ tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) ]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel_equal(result, expected) # panel 4d with catch_warnings(record=True): for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel4d_equal(result, expected)
def test_idxmax(self): # test idxmax # _check_stat_op approach can not be used here because of isna check. string_series = tm.makeStringSeries().rename("series") # add some NaNs string_series[5:15] = np.NaN # skipna or no assert string_series[string_series.idxmax()] == string_series.max() assert pd.isna(string_series.idxmax(skipna=False)) # no NaNs nona = string_series.dropna() assert nona[nona.idxmax()] == nona.max() assert nona.index.values.tolist().index( nona.idxmax()) == nona.values.argmax() # all NaNs allna = string_series * np.nan assert pd.isna(allna.idxmax()) from pandas import date_range s = Series(date_range("20130102", periods=6)) result = s.idxmax() assert result == 5 s[5] = np.nan result = s.idxmax() assert result == 4 # Float64Index # GH#5914 s = pd.Series([1, 2, 3], [1.1, 2.1, 3.1]) result = s.idxmax() assert result == 3.1 result = s.idxmin() assert result == 1.1 s = pd.Series(s.index, s.index) result = s.idxmax() assert result == 3.1 result = s.idxmin() assert result == 1.1
def test_isna_isnull(self, isna_f): assert not isna_f(1.) assert isna_f(None) assert isna_f(np.NaN) assert float('nan') assert not isna_f(np.inf) assert not isna_f(-np.inf) # series for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries() ]: assert isinstance(isna_f(s), Series) # frame for df in [ tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame() ]: result = isna_f(df) expected = df.apply(isna_f) tm.assert_frame_equal(result, expected) # panel with catch_warnings(record=True): for p in [ tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) ]: result = isna_f(p) expected = p.apply(isna_f) tm.assert_panel_equal(result, expected) # panel 4d with catch_warnings(record=True): for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: result = isna_f(p) expected = p.apply(isna_f) tm.assert_panel4d_equal(result, expected)
def setUp(self): self.ts = tm.makeTimeSeries() self.ts.name = 'ts' self.series = tm.makeStringSeries() self.series.name = 'series' self.objSeries = tm.makeObjectSeries() self.objSeries.name = 'objects' self.empty_series = Series([], index=[]) self.empty_frame = DataFrame({}) self.frame = _frame.copy() self.frame2 = _frame2.copy() self.intframe = _intframe.copy() self.tsframe = _tsframe.copy() self.mixed_frame = _mixed_frame.copy()
def test_isnull(): assert not isnull(1.) assert isnull(None) assert isnull(np.NaN) assert not isnull(np.inf) assert not isnull(-np.inf) # series for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries() ]: assert (isinstance(isnull(s), Series)) # frame for df in [ tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame() ]: result = isnull(df) expected = df.apply(isnull) tm.assert_frame_equal(result, expected) # panel for p in [ tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) ]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel_equal(result, expected) # panel 4d for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel4d_equal(result, expected)
def test_isna_isnull(self, isna_f): assert not isna_f(1.) assert isna_f(None) assert isna_f(np.NaN) assert float('nan') assert not isna_f(np.inf) assert not isna_f(-np.inf) # series for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries()]: assert isinstance(isna_f(s), Series) # frame for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame()]: result = isna_f(df) expected = df.apply(isna_f) tm.assert_frame_equal(result, expected)
def test_skew(self): from scipy.stats import skew string_series = tm.makeStringSeries().rename('series') alt = lambda x: skew(x, bias=False) self._check_stat_op('skew', alt, string_series) # test corner cases, skew() returns NaN unless there's at least 3 # values min_N = 3 for i in range(1, min_N + 1): s = Series(np.ones(i)) df = DataFrame(np.ones((i, i))) if i < min_N: assert np.isnan(s.skew()) assert np.isnan(df.skew()).all() else: assert 0 == s.skew() assert (df.skew() == 0).all()
def test_squeeze(self): # noop for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries() ]: tm.assert_series_equal(s.squeeze(), s) for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.squeeze(), df) # squeezing df = tm.makeTimeDataFrame().reindex(columns=["A"]) tm.assert_series_equal(df.squeeze(), df["A"]) # don't fail with 0 length dimensions GH11229 & GH8999 empty_series = Series([], name="five") empty_frame = DataFrame([empty_series]) [ tm.assert_series_equal(empty_series, higher_dim.squeeze()) for higher_dim in [empty_series, empty_frame] ] # axis argument df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] assert df.shape == (1, 1) tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0]) assert df.squeeze() == df.iloc[0, 0] msg = "No axis named 2 for object type <class 'pandas.core.frame.DataFrame'>" with pytest.raises(ValueError, match=msg): df.squeeze(axis=2) msg = "No axis named x for object type <class 'pandas.core.frame.DataFrame'>" with pytest.raises(ValueError, match=msg): df.squeeze(axis="x") df = tm.makeTimeDataFrame(3) tm.assert_frame_equal(df.squeeze(axis=0), df)
def setup_method(self, method): super().setup_method(method) self.d = {} s = tm.makeStringSeries() s.name = "string" self.d["string"] = s s = tm.makeObjectSeries() s.name = "object" self.d["object"] = s s = Series(iNaT, dtype="M8[ns]", index=range(5)) self.d["date"] = s data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], "F": [Timestamp("20130102", tz="US/Eastern")] * 2 + [Timestamp("20130603", tz="CET")] * 3, "G": [Timestamp("20130102", tz="US/Eastern")] * 5, "H": Categorical([1, 2, 3, 4, 5]), "I": Categorical([1, 2, 3, 4, 5], ordered=True), "J": (np.bool_(1), 2, 3, 4, 5), } self.d["float"] = Series(data["A"]) self.d["int"] = Series(data["B"]) self.d["mixed"] = Series(data["E"]) self.d["dt_tz_mixed"] = Series(data["F"]) self.d["dt_tz"] = Series(data["G"]) self.d["cat_ordered"] = Series(data["H"]) self.d["cat_unordered"] = Series(data["I"]) self.d["numpy_bool_mixed"] = Series(data["J"])
def setup_method(self, method): super(TestSeries, self).setup_method(method) self.d = {} s = tm.makeStringSeries() s.name = 'string' self.d['string'] = s s = tm.makeObjectSeries() s.name = 'object' self.d['object'] = s s = Series(iNaT, dtype='M8[ns]', index=range(5)) self.d['date'] = s data = { 'A': [0., 1., 2., 3., np.nan], 'B': [0, 1, 0, 1, 0], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': date_range('1/1/2009', periods=5), 'E': [0., 1, Timestamp('20100101'), 'foo', 2.], 'F': [Timestamp('20130102', tz='US/Eastern')] * 2 + [Timestamp('20130603', tz='CET')] * 3, 'G': [Timestamp('20130102', tz='US/Eastern')] * 5, 'H': Categorical([1, 2, 3, 4, 5]), 'I': Categorical([1, 2, 3, 4, 5], ordered=True), 'J': (np.bool_(1), 2, 3, 4, 5), } self.d['float'] = Series(data['A']) self.d['int'] = Series(data['B']) self.d['mixed'] = Series(data['E']) self.d['dt_tz_mixed'] = Series(data['F']) self.d['dt_tz'] = Series(data['G']) self.d['cat_ordered'] = Series(data['H']) self.d['cat_unordered'] = Series(data['I']) self.d['numpy_bool_mixed'] = Series(data['J'])
def setup_method(self, method): self.dirpath = tm.get_data_path() self.ts = tm.makeTimeSeries() self.ts.name = 'ts' self.series = tm.makeStringSeries() self.series.name = 'series' self.objSeries = tm.makeObjectSeries() self.objSeries.name = 'objects' self.empty_series = Series([], index=[]) self.empty_frame = DataFrame({}) self.frame = _frame.copy() self.frame2 = _frame2.copy() self.intframe = _intframe.copy() self.tsframe = _tsframe.copy() self.mixed_frame = _mixed_frame.copy() self.categorical = _cat_frame.copy()
def test_take(self): indices = [1, 5, -2, 6, 3, -1] for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries() ]: out = s.take(indices) expected = Series(data=s.values.take(indices), index=s.index.take(indices), dtype=s.dtype) tm.assert_series_equal(out, expected) for df in [tm.makeTimeDataFrame()]: out = df.take(indices) expected = DataFrame(data=df.values.take(indices, axis=0), index=df.index.take(indices), columns=df.columns) tm.assert_frame_equal(out, expected) indices = [-3, 2, 0, 1] with catch_warnings(record=True): for p in [tm.makePanel()]: out = p.take(indices) expected = Panel(data=p.values.take(indices, axis=0), items=p.items.take(indices), major_axis=p.major_axis, minor_axis=p.minor_axis) tm.assert_panel_equal(out, expected) with catch_warnings(record=True): for p4d in [tm.makePanel4D()]: out = p4d.take(indices) expected = Panel4D(data=p4d.values.take(indices, axis=0), labels=p4d.labels.take(indices), major_axis=p4d.major_axis, minor_axis=p4d.minor_axis, items=p4d.items) tm.assert_panel4d_equal(out, expected)
def setUp(self): self.ts = tm.makeTimeSeries() # Was at top level in test_series self.ts.name = 'ts' self.series = tm.makeStringSeries() self.series.name = 'series'
def setup_method(self): self.ts = tm.makeTimeSeries() # Was at top level in test_series self.ts.name = "ts" self.series = tm.makeStringSeries() self.series.name = "series"
def setUp(self): self.ts = common.makeTimeSeries() self.series = common.makeStringSeries() self.objSeries = common.makeObjectSeries() self.empty = Series([], index=[])
def test_neg(self): ser = tm.makeStringSeries() ser.name = "series" assert_series_equal(-ser, -1 * ser)
def series(self): series = tm.makeStringSeries() series.name = 'series' return series
def test_invert(self): ser = tm.makeStringSeries() ser.name = "series" assert_series_equal(-(ser < 0), ~(ser < 0))
def setUp(self): self.ts = tm.makeTimeSeries() self.ts.name = 'ts' self.series = tm.makeStringSeries() self.series.name = 'series'