def test_split(self): values = Series(["a_b_c", "c_d_e", NA, "f_g_h"]) result = values.str.split("_") exp = Series([["a", "b", "c"], ["c", "d", "e"], NA, ["f", "g", "h"]]) tm.assert_series_equal(result, exp) # more than one char values = Series(["a__b__c", "c__d__e", NA, "f__g__h"]) result = values.str.split("__") tm.assert_series_equal(result, exp) # mixed mixed = Series(["a_b_c", NA, "d_e_f", True, datetime.today(), None, 1, 2.0]) rs = Series(mixed).str.split("_") xp = Series([["a", "b", "c"], NA, ["d", "e", "f"], NA, NA, NA, NA, NA]) tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u("a_b_c"), u("c_d_e"), NA, u("f_g_h")]) result = values.str.split("_") exp = Series([[u("a"), u("b"), u("c")], [u("c"), u("d"), u("e")], NA, [u("f"), u("g"), u("h")]]) tm.assert_series_equal(result, exp)
def test_deprecated_match(self): # Old match behavior, deprecated (but still default) in 0.13 values = Series(["fooBAD__barBAD", NA, "foo"]) with tm.assert_produces_warning(): result = values.str.match(".*(BAD[_]+).*(BAD)") exp = Series([("BAD__", "BAD"), NA, []]) tm.assert_series_equal(result, exp) # mixed mixed = Series(["aBAD_BAD", NA, "BAD_b_BAD", True, datetime.today(), "foo", None, 1, 2.0]) with tm.assert_produces_warning(): rs = Series(mixed).str.match(".*(BAD[_]+).*(BAD)") xp = [("BAD_", "BAD"), NA, ("BAD_", "BAD"), NA, NA, [], NA, NA, NA] tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u("fooBAD__barBAD"), NA, u("foo")]) with tm.assert_produces_warning(): result = values.str.match(".*(BAD[_]+).*(BAD)") exp = Series([(u("BAD__"), u("BAD")), NA, []]) tm.assert_series_equal(result, exp)
def test_repeat(self): values = Series(["a", "b", NA, "c", NA, "d"]) result = values.str.repeat(3) exp = Series(["aaa", "bbb", NA, "ccc", NA, "ddd"]) tm.assert_series_equal(result, exp) result = values.str.repeat([1, 2, 3, 4, 5, 6]) exp = Series(["a", "bb", NA, "cccc", NA, "dddddd"]) tm.assert_series_equal(result, exp) # mixed mixed = Series(["a", NA, "b", True, datetime.today(), "foo", None, 1, 2.0]) rs = Series(mixed).str.repeat(3) xp = ["aaa", NA, "bbb", NA, NA, "foofoofoo", NA, NA, NA] tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u("a"), u("b"), NA, u("c"), NA, u("d")]) result = values.str.repeat(3) exp = Series([u("aaa"), u("bbb"), NA, u("ccc"), NA, u("ddd")]) tm.assert_series_equal(result, exp) result = values.str.repeat([1, 2, 3, 4, 5, 6]) exp = Series([u("a"), u("bb"), NA, u("cccc"), NA, u("dddddd")]) tm.assert_series_equal(result, exp)
def test_replace(self): values = Series(["fooBAD__barBAD", NA]) result = values.str.replace("BAD[_]*", "") exp = Series(["foobar", NA]) tm.assert_series_equal(result, exp) result = values.str.replace("BAD[_]*", "", n=1) exp = Series(["foobarBAD", NA]) tm.assert_series_equal(result, exp) # mixed mixed = Series(["aBAD", NA, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0]) rs = Series(mixed).str.replace("BAD[_]*", "") xp = ["a", NA, "b", NA, NA, "foo", NA, NA, NA] tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u("fooBAD__barBAD"), NA]) result = values.str.replace("BAD[_]*", "") exp = Series([u("foobar"), NA]) tm.assert_series_equal(result, exp) result = values.str.replace("BAD[_]*", "", n=1) exp = Series([u("foobarBAD"), NA]) tm.assert_series_equal(result, exp) # flags + unicode values = Series([b"abcd,\xc3\xa0".decode("utf-8")]) exp = Series([b"abcd, \xc3\xa0".decode("utf-8")]) result = values.str.replace("(?<=\w),(?=\w)", ", ", flags=re.UNICODE) tm.assert_series_equal(result, exp)
def test_get(self): values = Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h']) result = values.str.split('_').str.get(1) expected = Series(['b', 'd', np.nan, 'g']) tm.assert_series_equal(result, expected) # mixed mixed = Series(['a_b_c', NA, 'c_d_e', True, datetime.today(), None, 1, 2.]) rs = Series(mixed).str.split('_').str.get(1) xp = Series(['b', NA, 'd', NA, NA, NA, NA, NA]) tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u('a_b_c'), u('c_d_e'), np.nan, u('f_g_h')]) result = values.str.split('_').str.get(1) expected = Series([u('b'), u('d'), np.nan, u('g')]) tm.assert_series_equal(result, expected)
def _compare_ols_results(model1, model2): tm.assert_isinstance(model1, type(model2)) if hasattr(model1, "_window_type"): _compare_moving_ols(model1, model2) else: _compare_fullsample_ols(model1, model2)
def test_endswith(self): values = Series(["om", NA, "foo_nom", "nom", "bar_foo", NA, "foo"]) result = values.str.endswith("foo") exp = Series([False, NA, False, False, True, NA, True]) tm.assert_series_equal(result, exp) # mixed mixed = ["a", NA, "b", True, datetime.today(), "foo", None, 1, 2.0] rs = strings.str_endswith(mixed, "f") xp = [False, NA, False, NA, NA, False, NA, NA, NA] tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.endswith("f") tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u("om"), NA, u("foo_nom"), u("nom"), u("bar_foo"), NA, u("foo")]) result = values.str.endswith("foo") exp = Series([False, NA, False, False, True, NA, True]) tm.assert_series_equal(result, exp) result = values.str.endswith("foo", na=False) tm.assert_series_equal(result, exp.fillna(False).astype(bool))
def test_repeat(self): values = Series(['a', 'b', NA, 'c', NA, 'd']) result = values.str.repeat(3) exp = Series(['aaa', 'bbb', NA, 'ccc', NA, 'ddd']) tm.assert_series_equal(result, exp) result = values.str.repeat([1, 2, 3, 4, 5, 6]) exp = Series(['a', 'bb', NA, 'cccc', NA, 'dddddd']) tm.assert_series_equal(result, exp) # mixed mixed = Series(['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.]) rs = Series(mixed).str.repeat(3) xp = ['aaa', NA, 'bbb', NA, NA, 'foofoofoo', NA, NA, NA] tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u('a'), u('b'), NA, u('c'), NA, u('d')]) result = values.str.repeat(3) exp = Series([u('aaa'), u('bbb'), NA, u('ccc'), NA, u('ddd')]) tm.assert_series_equal(result, exp) result = values.str.repeat([1, 2, 3, 4, 5, 6]) exp = Series([u('a'), u('bb'), NA, u('cccc'), NA, u('dddddd')]) tm.assert_series_equal(result, exp)
def test_match(self): # New match behavior introduced in 0.13 values = Series(['fooBAD__barBAD', NA, 'foo']) with tm.assert_produces_warning(): result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) # If no groups, use new behavior even when as_indexer is False. # (Old behavior is pretty much useless in this case.) values = Series(['fooBAD__barBAD', NA, 'foo']) result = values.str.match('.*BAD[_]+.*BAD', as_indexer=False) exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) # mixed mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), 'foo', None, 1, 2.]) with tm.assert_produces_warning(): rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) xp = [True, NA, True, NA, NA, False, NA, NA, NA] tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u('fooBAD__barBAD'), NA, u('foo')]) with tm.assert_produces_warning(): result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) exp = Series([True, NA, False]) tm.assert_series_equal(result, exp)
def test_lower_upper(self): values = Series(['om', NA, 'nom', 'nom']) result = values.str.upper() exp = Series(['OM', NA, 'NOM', 'NOM']) tm.assert_series_equal(result, exp) result = result.str.lower() tm.assert_series_equal(result, values) # mixed mixed = Series(['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.]) mixed = mixed.str.upper() rs = Series(mixed).str.lower() xp = ['a', NA, 'b', NA, NA, 'foo', NA, NA, NA] tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u('om'), NA, u('nom'), u('nom')]) result = values.str.upper() exp = Series([u('OM'), NA, u('NOM'), u('NOM')]) tm.assert_series_equal(result, exp) result = result.str.lower() tm.assert_series_equal(result, values)
def test_split(self): values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h']) result = values.str.split('_') exp = Series([['a', 'b', 'c'], ['c', 'd', 'e'], NA, ['f', 'g', 'h']]) tm.assert_series_equal(result, exp) # more than one char values = Series(['a__b__c', 'c__d__e', NA, 'f__g__h']) result = values.str.split('__') tm.assert_series_equal(result, exp) # mixed mixed = Series(['a_b_c', NA, 'd_e_f', True, datetime.today(), None, 1, 2.]) rs = Series(mixed).str.split('_') xp = Series([['a', 'b', 'c'], NA, ['d', 'e', 'f'], NA, NA, NA, NA, NA]) tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u('a_b_c'), u('c_d_e'), NA, u('f_g_h')]) result = values.str.split('_') exp = Series([[u('a'), u('b'), u('c')], [u('c'), u('d'), u('e')], NA, [u('f'), u('g'), u('h')]]) tm.assert_series_equal(result, exp)
def test_slice(self): values = Series(['aafootwo', 'aabartwo', NA, 'aabazqux']) result = values.str.slice(2, 5) exp = Series(['foo', 'bar', NA, 'baz']) tm.assert_series_equal(result, exp) # mixed mixed = Series(['aafootwo', NA, 'aabartwo', True, datetime.today(), None, 1, 2.]) rs = Series(mixed).str.slice(2, 5) xp = Series(['foo', NA, 'bar', NA, NA, NA, NA, NA]) tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u('aafootwo'), u('aabartwo'), NA, u('aabazqux')]) result = values.str.slice(2, 5) exp = Series([u('foo'), u('bar'), NA, u('baz')]) tm.assert_series_equal(result, exp)
def test_lower_upper(self): values = Series(["om", NA, "nom", "nom"]) result = values.str.upper() exp = Series(["OM", NA, "NOM", "NOM"]) tm.assert_series_equal(result, exp) result = result.str.lower() tm.assert_series_equal(result, values) # mixed mixed = Series(["a", NA, "b", True, datetime.today(), "foo", None, 1, 2.0]) mixed = mixed.str.upper() rs = Series(mixed).str.lower() xp = ["a", NA, "b", NA, NA, "foo", NA, NA, NA] tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u("om"), NA, u("nom"), u("nom")]) result = values.str.upper() exp = Series([u("OM"), NA, u("NOM"), u("NOM")]) tm.assert_series_equal(result, exp) result = result.str.lower() tm.assert_series_equal(result, values)
def test_endswith(self): values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo']) result = values.str.endswith('foo') exp = Series([False, NA, False, False, True, NA, True]) tm.assert_series_equal(result, exp) # mixed mixed = ['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.] rs = strings.str_endswith(mixed, 'f') xp = [False, NA, False, NA, NA, False, NA, NA, NA] tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.endswith('f') tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u('om'), NA, u('foo_nom'), u('nom'), u('bar_foo'), NA, u('foo')]) result = values.str.endswith('foo') exp = Series([False, NA, False, False, True, NA, True]) tm.assert_series_equal(result, exp) result = values.str.endswith('foo', na=False) tm.assert_series_equal(result, exp.fillna(False).astype(bool))
def test_file_url(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(url), 'First', attrs={'id': 'table'}) tm.assert_isinstance(dfs, list) for df in dfs: tm.assert_isinstance(df, DataFrame)
def test_xcompat(self): import pandas as pd import matplotlib.pyplot as plt df = tm.makeTimeDataFrame() ax = df.plot(x_compat=True) lines = ax.get_lines() self.assert_(not isinstance(lines[0].get_xdata(), PeriodIndex)) plt.close('all') pd.plot_params['xaxis.compat'] = True ax = df.plot() lines = ax.get_lines() self.assert_(not isinstance(lines[0].get_xdata(), PeriodIndex)) plt.close('all') pd.plot_params['x_compat'] = False ax = df.plot() lines = ax.get_lines() tm.assert_isinstance(lines[0].get_xdata(), PeriodIndex) plt.close('all') # useful if you're plotting a bunch together with pd.plot_params.use('x_compat', True): ax = df.plot() lines = ax.get_lines() self.assert_(not isinstance(lines[0].get_xdata(), PeriodIndex)) plt.close('all') ax = df.plot() lines = ax.get_lines() tm.assert_isinstance(lines[0].get_xdata(), PeriodIndex)
def test_deprecated_match(self): # Old match behavior, deprecated (but still default) in 0.13 values = Series(['fooBAD__barBAD', NA, 'foo']) with tm.assert_produces_warning(): result = values.str.match('.*(BAD[_]+).*(BAD)') exp = Series([('BAD__', 'BAD'), NA, []]) tm.assert_series_equal(result, exp) # mixed mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), 'foo', None, 1, 2.]) with tm.assert_produces_warning(): rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)') xp = [('BAD_', 'BAD'), NA, ('BAD_', 'BAD'), NA, NA, [], NA, NA, NA] tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u('fooBAD__barBAD'), NA, u('foo')]) with tm.assert_produces_warning(): result = values.str.match('.*(BAD[_]+).*(BAD)') exp = Series([(u('BAD__'), u('BAD')), NA, []]) tm.assert_series_equal(result, exp)
def test_center(self): values = Series(['a', 'b', NA, 'c', NA, 'eeeeee']) result = values.str.center(5) exp = Series([' a ', ' b ', NA, ' c ', NA, 'eeeeee']) tm.assert_almost_equal(result, exp) # mixed mixed = Series(['a', NA, 'b', True, datetime.today(), 'c', 'eee', None, 1, 2.]) rs = Series(mixed).str.center(5) xp = Series([' a ', NA, ' b ', NA, NA, ' c ', ' eee ', NA, NA, NA]) tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = Series([u('a'), u('b'), NA, u('c'), NA, u('eeeeee')]) result = values.str.center(5) exp = Series([u(' a '), u(' b '), NA, u(' c '), NA, u('eeeeee')]) tm.assert_almost_equal(result, exp)
def assert_geoseries_equal(left, right, check_dtype=False, check_index_type=False, check_series_type=True, check_less_precise=False, check_geom_type=False, check_crs=True): """Test util for checking that two GeoSeries are equal. Parameters ---------- left, right : two GeoSeries check_dtype : bool, default False if True, check geo dtype [only included so it's a drop-in replacement for assert_series_equal] check_index_type : bool, default False check that index types are equal check_series_type : bool, default True check that both are same type (*and* are GeoSeries). If False, will attempt to convert both into GeoSeries. check_less_precise : bool, default False if True, use geom_almost_equals. if False, use geom_equals. check_geom_type : bool, default False if True, check that all the geom types are equal. check_crs: bool, default True if check_series_type is True, then also check that the crs matches """ assert len(left) == len(right), "%d != %d" % (len(left), len(right)) if check_index_type: assert_isinstance(left.index, type(right.index)) if check_dtype: assert left.dtype == right.dtype, "dtype: %s != %s" % (left.dtype, right.dtype) if check_series_type: assert isinstance(left, GeoSeries) assert_isinstance(left, type(right)) if check_crs: assert(left.crs == right.crs) else: if not isinstance(left, GeoSeries): left = GeoSeries(left) if not isinstance(right, GeoSeries): right = GeoSeries(right, index=left.index) assert left.index.equals(right.index), "index: %s != %s" % (left.index, right.index) if check_geom_type: assert (left.type == right.type).all(), "type: %s != %s" % (left.type, right.type) if check_less_precise: assert geom_almost_equals(left, right) else: assert geom_equals(left, right)
def _check_structures(self, func, static_comp, has_min_periods=True, has_time_rule=True, has_center=True, fill_value=None): series_result = func(self.series, 50) tm.assert_isinstance(series_result, Series) frame_result = func(self.frame, 50) self.assertEqual(type(frame_result), DataFrame) # check time_rule works if has_time_rule: win = 25 minp = 10 if has_min_periods: series_result = func(self.series[::2], win, min_periods=minp, freq='B') frame_result = func(self.frame[::2], win, min_periods=minp, freq='B') else: series_result = func(self.series[::2], win, freq='B') frame_result = func(self.frame[::2], win, freq='B') last_date = series_result.index[-1] prev_date = last_date - 24 * datetools.bday trunc_series = self.series[::2].truncate(prev_date, last_date) trunc_frame = self.frame[::2].truncate(prev_date, last_date) assert_almost_equal(series_result[-1], static_comp(trunc_series)) assert_almost_equal(frame_result.xs(last_date), trunc_frame.apply(static_comp)) if has_center: if has_min_periods: minp = 10 series_xp = func(self.series, 25, min_periods=minp).shift(-12) frame_xp = func(self.frame, 25, min_periods=minp).shift(-12) series_rs = func(self.series, 25, min_periods=minp, center=True) frame_rs = func(self.frame, 25, min_periods=minp, center=True) else: series_xp = func(self.series, 25).shift(-12) frame_xp = func(self.frame, 25).shift(-12) series_rs = func(self.series, 25, center=True) frame_rs = func(self.frame, 25, center=True) if fill_value is not None: series_xp = series_xp.fillna(fill_value) frame_xp = frame_xp.fillna(fill_value) assert_series_equal(series_xp, series_rs) assert_frame_equal(frame_xp, frame_rs)
def test_unpickle_daterange(self): pth, _ = os.path.split(os.path.abspath(__file__)) filepath = os.path.join(pth, 'data', 'daterange_073.pickle') rng = read_pickle(filepath) tm.assert_isinstance(rng[0], datetime) tm.assert_isinstance(rng.offset, offsets.BDay) self.assert_(rng.values.dtype == object)
def test_regex_idempotency(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(url), match=re.compile(re.compile('Florida')), attrs={'id': 'table'}) tm.assert_isinstance(dfs, list) for df in dfs: tm.assert_isinstance(df, DataFrame)
def test_upsample_apply_functions(self): # #1596 rng = pd.date_range('2012-06-12', periods=4, freq='h') ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample('20min', how=['mean', 'sum']) tm.assert_isinstance(result, DataFrame)
def test_unpickle_daterange(self): pth, _ = os.path.split(os.path.abspath(__file__)) filepath = os.path.join(pth, 'data', 'daterange_073.pickle') rng = read_pickle(filepath) tm.assert_isinstance(rng[0], datetime) tm.assert_isinstance(rng.offset, offsets.BDay) self.assertEqual(rng.values.dtype, object)
def test_daterange_bug_456(self): # GH #456 rng1 = cdate_range('12/5/2011', '12/5/2011') rng2 = cdate_range('12/2/2011', '12/5/2011') rng2.offset = datetools.CDay() result = rng1.union(rng2) tm.assert_isinstance(result, DatetimeIndex)
def test_upsample_apply_functions(self): # #1596 rng = pd.date_range("2012-06-12", periods=4, freq="h") ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample("20min", how=["mean", "sum"]) tm.assert_isinstance(result, DataFrame)
def test_resample_reresample(self): dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D") s = Series(np.random.rand(len(dti)), dti) bs = s.resample("B", closed="right", label="right") result = bs.resample("8H") self.assertEquals(len(result), 22) tm.assert_isinstance(result.index.freq, offsets.DateOffset) self.assertEqual(result.index.freq, offsets.Hour(8))
def test_append_join_nondatetimeindex(self): rng = timedelta_range('1 days', periods=10) idx = Index(['a', 'b', 'c', 'd']) result = rng.append(idx) tm.assert_isinstance(result[0], Timedelta) # it works rng.join(idx, how='outer')
def test_append_join_nondatetimeindex(self): rng = timedelta_range("1 days", periods=10) idx = Index(["a", "b", "c", "d"]) result = rng.append(idx) tm.assert_isinstance(result[0], Timedelta) # it works rng.join(idx, how="outer")
def test_longpanel_series_combo(self): wp = tm.makePanel() lp = wp.to_frame() y = lp.pop('ItemA') model = ols(y=y, x=lp, entity_effects=True, window=20) self.assert_(notnull(model.beta.values).all()) tm.assert_isinstance(model, PanelOLS) model.summary
def check_result(self, actual, expected, lengths): for (iter_res, list_res), exp, length in zip(actual, expected, lengths): self.assert_(not isinstance(iter_res, list)) tm.assert_isinstance(list_res, list) iter_res = list(iter_res) self.assertEqual(len(list_res), length) self.assertEqual(len(iter_res), length) self.assertEqual(iter_res, exp) self.assertEqual(list_res, exp)
def test_longpanel_series_combo(self): wp = tm.makePanel() lp = wp.to_frame() y = lp.pop("ItemA") model = ols(y=y, x=lp, entity_effects=True, window=20) self.assertTrue(notnull(model.beta.values).all()) tm.assert_isinstance(model, PanelOLS) model.summary
def test_resample_reresample(self): dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq='D') s = Series(np.random.rand(len(dti)), dti) bs = s.resample('B', closed='right', label='right') result = bs.resample('8H') self.assertEquals(len(result), 22) tm.assert_isinstance(result.index.freq, offsets.DateOffset) self.assertEqual(result.index.freq, offsets.Hour(8))
def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) yindex = BlockIndex(TEST_LENGTH, yloc, ylen) # see if survive the round trip xbindex = xindex.to_int_index().to_block_index() ybindex = yindex.to_int_index().to_block_index() tm.assert_isinstance(xbindex, BlockIndex) self.assert_(xbindex.equals(xindex)) self.assert_(ybindex.equals(yindex))
def check_result(self, actual, expected, lengths): for (iter_res, list_res), exp, length in zip(actual, expected, lengths): self.assertNotIsInstance(iter_res, list) tm.assert_isinstance(list_res, list) iter_res = list(iter_res) self.assertEqual(len(list_res), length) self.assertEqual(len(iter_res), length) self.assertEqual(iter_res, exp) self.assertEqual(list_res, exp)
def _check_join(left, right, how='inner'): ra, rb, rc = left.join(right, how=how, return_indexers=True) ea, eb, ec = left.join(DatetimeIndex(right), how=how, return_indexers=True) tm.assert_isinstance(ra, DatetimeIndex) self.assert_(ra.equals(ea)) assert_almost_equal(rb, eb) assert_almost_equal(rc, ec)
def test_margins(self): def _check_output(res, col, index=['A', 'B'], columns=['C']): cmarg = res['All'][:-1] exp = self.data.groupby(index)[col].mean() tm.assert_series_equal(cmarg, exp) res = res.sortlevel() rmarg = res.xs(('All', ''))[:-1] exp = self.data.groupby(columns)[col].mean() tm.assert_series_equal(rmarg, exp) gmarg = res['All']['All', ''] exp = self.data[col].mean() self.assertEqual(gmarg, exp) # column specified table = self.data.pivot_table('D', index=['A', 'B'], columns='C', margins=True, aggfunc=np.mean) _check_output(table, 'D') # no column specified table = self.data.pivot_table(index=['A', 'B'], columns='C', margins=True, aggfunc=np.mean) for valcol in table.columns.levels[0]: _check_output(table[valcol], valcol) # no col # to help with a buglet self.data.columns = [k * 2 for k in self.data.columns] table = self.data.pivot_table(index=['AA', 'BB'], margins=True, aggfunc=np.mean) for valcol in table.columns: gmarg = table[valcol]['All', ''] self.assertEqual(gmarg, self.data[valcol].mean()) # this is OK table = self.data.pivot_table(index=['AA', 'BB'], margins=True, aggfunc='mean') # no rows rtable = self.data.pivot_table(columns=['AA', 'BB'], margins=True, aggfunc=np.mean) tm.assert_isinstance(rtable, Series) for item in ['DD', 'EE', 'FF']: gmarg = table[item]['All', ''] self.assertEqual(gmarg, self.data[item].mean())
def test_misc_coverage(self): rng = timedelta_range('1 day', periods=5) result = rng.groupby(rng.days) tm.assert_isinstance(list(result.values())[0][0], Timedelta) idx = TimedeltaIndex(['3d', '1d', '2d']) self.assertTrue(idx.equals(list(idx))) non_td = Index(list('abc')) self.assertFalse(idx.equals(list(non_td)))
def test_value_counts(self): from pandas.tools.tile import cut arr = np.random.randn(4) factor = cut(arr, 4) tm.assert_isinstance(factor, Categorical) result = algos.value_counts(factor) expected = algos.value_counts(np.asarray(factor)) tm.assert_series_equal(result, expected)
def test_ExcelWriter_dispatch(self): with tm.assertRaisesRegexp(ValueError, 'No engine'): writer = ExcelWriter('nothing') _skip_if_no_openpyxl() writer = ExcelWriter('apple.xlsx') tm.assert_isinstance(writer, _OpenpyxlWriter) _skip_if_no_xlwt() writer = ExcelWriter('apple.xls') tm.assert_isinstance(writer, _XlwtWriter)
def test_unpickle_legacy_len0_daterange(self): pth, _ = os.path.split(os.path.abspath(__file__)) filepath = os.path.join(pth, 'data', 'series_daterange0.pickle') result = pd.read_pickle(filepath) ex_index = DatetimeIndex([], freq='B') self.assertTrue(result.index.equals(ex_index)) tm.assert_isinstance(result.index.freq, offsets.BDay) self.assertEqual(len(result), 0)
def test_shallow_copying(self): original = self.container.copy() assert_isinstance(self.container.view(), FrozenNDArray) self.assert_(not isinstance(self.container.view(np.ndarray), FrozenNDArray)) self.assert_(self.container.view() is not self.container) self.assert_(np.array_equal(self.container, original)) # shallow copy should be the same too assert_isinstance(self.container._shallow_copy(), FrozenNDArray) # setting should not be allowed def testit(container): container[0] = 16 self.check_mutable_error(testit, self.container)
def test_contains(self): values = ['foo', NA, 'fooommm__foo', 'mmm_', 'foommm[_]+bar'] pat = 'mmm[_]+' result = strings.str_contains(values, pat) expected = [False, NA, True, True, False] tm.assert_almost_equal(result, expected) result = strings.str_contains(values, pat, regex=False) expected = [False, NA, False, False, True] tm.assert_almost_equal(result, expected) values = ['foo', 'xyz', 'fooommm__foo', 'mmm_'] result = strings.str_contains(values, pat) expected = [False, False, True, True] self.assert_(result.dtype == np.bool_) tm.assert_almost_equal(result, expected) # mixed mixed = ['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.] rs = strings.str_contains(mixed, 'o') xp = [False, NA, False, NA, NA, True, NA, NA, NA] tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.contains('o') tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode values = [u('foo'), NA, u('fooommm__foo'), u('mmm_')] pat = 'mmm[_]+' result = strings.str_contains(values, pat) expected = [False, np.nan, True, True] tm.assert_almost_equal(result, expected) result = strings.str_contains(values, pat, na=False) expected = [False, False, True, True] tm.assert_almost_equal(result, expected) values = ['foo', 'xyz', 'fooommm__foo', 'mmm_'] result = strings.str_contains(values, pat) expected = [False, False, True, True] self.assert_(result.dtype == np.bool_) tm.assert_almost_equal(result, expected) # na values = Series(['om', 'foo', np.nan]) res = values.str.contains('foo', na="foo") self.assertEqual(res.ix[2], "foo")
def test_take(self): tds = ['1day 02:00:00', '1 day 04:00:00', '1 day 10:00:00'] idx = TimedeltaIndex(start='1d', end='2d', freq='H', name='idx') expected = TimedeltaIndex(tds, freq=None, name='idx') taken1 = idx.take([2, 4, 10]) taken2 = idx[[2, 4, 10]] for taken in [taken1, taken2]: self.assertTrue(taken.equals(expected)) tm.assert_isinstance(taken, TimedeltaIndex) self.assertIsNone(taken.freq) self.assertEqual(taken.name, expected.name)
def test_join_utc_convert(self): rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') left = rng.tz_convert('US/Eastern') right = rng.tz_convert('Europe/Berlin') for how in ['inner', 'outer', 'left', 'right']: result = left.join(left[:-5], how=how) tm.assert_isinstance(result, DatetimeIndex) self.assertEqual(result.tz, left.tz) result = left.join(right[:-5], how=how) tm.assert_isinstance(result, DatetimeIndex) self.assertEqual(result.tz.zone, 'UTC')
def test_outer_join(self): # should just behave as union # overlapping left = self.rng[:10] right = self.rng[5:10] the_join = left.join(right, how='outer') tm.assert_isinstance(the_join, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_join = left.join(right, how='outer') tm.assert_isinstance(the_join, DatetimeIndex) self.assertIsNone(the_join.freq) # non-overlapping, no gap left = self.rng[:5] right = self.rng[5:10] the_join = left.join(right, how='outer') tm.assert_isinstance(the_join, DatetimeIndex) # overlapping, but different offset rng = date_range(START, END, freq=datetools.bmonthEnd) the_join = self.rng.join(rng, how='outer') tm.assert_isinstance(the_join, DatetimeIndex) self.assertIsNone(the_join.freq)
def test_union(self): # overlapping left = self.rng[:10] right = self.rng[5:10] the_union = left.union(right) tm.assert_isinstance(the_union, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_union = left.union(right) tm.assert_isinstance(the_union, Index) # non-overlapping, no gap left = self.rng[:5] right = self.rng[5:10] the_union = left.union(right) tm.assert_isinstance(the_union, DatetimeIndex) # order does not matter self.assert_numpy_array_equal(right.union(left), the_union) # overlapping, but different offset rng = date_range(START, END, freq=datetools.bmonthEnd) the_union = self.rng.union(rng) tm.assert_isinstance(the_union, DatetimeIndex)
def _check_op(op, first, second): res = op(first, second) exp = SparseArray(op(first.values, second.values), fill_value=first.fill_value) tm.assert_isinstance(res, SparseArray) assert_almost_equal(res.values, exp.values) res2 = op(first, second.values) tm.assert_isinstance(res2, SparseArray) assert_sp_array_equal(res, res2) res3 = op(first.values, second) tm.assert_isinstance(res3, SparseArray) assert_sp_array_equal(res, res3) res4 = op(first, 4) tm.assert_isinstance(res4, SparseArray) # ignore this if the actual op raises (e.g. pow) try: exp = op(first.values, 4) exp_fv = op(first.fill_value, 4) assert_almost_equal(res4.fill_value, exp_fv) assert_almost_equal(res4.values, exp) except (ValueError): pass
def test_getitem(self): smaller = self.rng[:5] self.assert_numpy_array_equal(smaller, self.rng.view(np.ndarray)[:5]) self.assertEqual(smaller.offset, self.rng.offset) sliced = self.rng[::5] self.assertEqual(sliced.offset, datetools.cday * 5) fancy_indexed = self.rng[[4, 3, 2, 1, 0]] self.assertEqual(len(fancy_indexed), 5) tm.assert_isinstance(fancy_indexed, DatetimeIndex) self.assertIsNone(fancy_indexed.freq) # 32-bit vs. 64-bit platforms self.assertEqual(self.rng[4], self.rng[np.int_(4)])
def test_various_attributes(self): # just make sure everything "works". test correctness elsewhere x = DataFrame(np.random.randn(100, 5)) y = np.random.randn(100) model = ols(y=y, x=x, window=20) series_attrs = ['rank', 'df', 'forecast_mean', 'forecast_vol'] for attr in series_attrs: value = getattr(model, attr) tm.assert_isinstance(value, Series) # works model._results
def test_arithmetic_interaction(self): index = self.frame.index obj_index = index.asobject dseries = Series(rand(len(index)), index=index) oseries = Series(dseries.values, index=obj_index) result = dseries + oseries expected = dseries * 2 tm.assert_isinstance(result.index, DatetimeIndex) assert_series_equal(result, expected) result = dseries + oseries[:5] expected = dseries + dseries[:5] tm.assert_isinstance(result.index, DatetimeIndex) assert_series_equal(result, expected)
def test_ExcelWriter_dispatch(self): with tm.assertRaisesRegexp(ValueError, 'No engine'): ExcelWriter('nothing') try: import xlsxwriter writer_klass = _XlsxWriter except ImportError: _skip_if_no_openpyxl() writer_klass = _OpenpyxlWriter writer = ExcelWriter('apple.xlsx') tm.assert_isinstance(writer, writer_klass) _skip_if_no_xlwt() writer = ExcelWriter('apple.xls') tm.assert_isinstance(writer, _XlwtWriter)
def test_gap_upsample(self): low = tm.makeTimeSeries() low[5:25] = np.nan ax = low.plot() idxh = date_range(low.index[0], low.index[-1], freq='12h') s = Series(np.random.randn(len(idxh)), idxh) s.plot(secondary_y=True) lines = ax.get_lines() self.assertEqual(len(lines), 1) self.assertEqual(len(ax.right_ax.get_lines()), 1) l = lines[0] data = l.get_xydata() tm.assert_isinstance(data, np.ma.core.MaskedArray) mask = data.mask self.assertTrue(mask[5:25, 1].all())
def test_set_value(self): for label in self.panel4d.labels: for item in self.panel4d.items: for mjr in self.panel4d.major_axis[::2]: for mnr in self.panel4d.minor_axis: self.panel4d.set_value(label, item, mjr, mnr, 1.) assert_almost_equal( self.panel4d[label][item][mnr][mjr], 1.) # resize res = self.panel4d.set_value('l4', 'ItemE', 'foo', 'bar', 1.5) tm.assert_isinstance(res, Panel4D) self.assert_(res is not self.panel4d) self.assertEqual(res.get_value('l4', 'ItemE', 'foo', 'bar'), 1.5) res3 = self.panel4d.set_value('l4', 'ItemE', 'foobar', 'baz', 5) self.assert_(com.is_float_dtype(res3['l4'].values))
def test_intersection(self): rng = date_range('1/1/2000', periods=50, freq=datetools.Minute()) rng1 = rng[10:] rng2 = rng[:25] the_int = rng1.intersection(rng2) expected = rng[10:25] self.assertTrue(the_int.equals(expected)) tm.assert_isinstance(the_int, DatetimeIndex) self.assertEqual(the_int.offset, rng.offset) the_int = rng1.intersection(rng2.view(DatetimeIndex)) self.assertTrue(the_int.equals(expected)) # non-overlapping the_int = rng[:10].intersection(rng[10:]) expected = DatetimeIndex([]) self.assertTrue(the_int.equals(expected))