def test_catch_regressor_overlap(self): df1 = tm.makeTimeDataFrame().ix[:, ['A', 'B']] df2 = tm.makeTimeDataFrame().ix[:, ['B', 'C', 'D']] y = tm.makeTimeSeries() data = {'foo' : df1, 'bar' : df2} self.assertRaises(Exception, ols, y=y, x=data)
def test_squeeze(self): # noop for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: tm.assert_series_equal(s.squeeze(), s) for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.squeeze(), df) # squeezing df = tm.makeTimeDataFrame().reindex(columns=['A']) tm.assert_series_equal(df.squeeze(), df['A']) # don't fail with 0 length dimensions GH11229 & GH8999 empty_series = Series([], name='five') empty_frame = DataFrame([empty_series]) with catch_warnings(record=True): simplefilter("ignore", FutureWarning) empty_panel = Panel({'six': empty_frame}) [tm.assert_series_equal(empty_series, higher_dim.squeeze()) for higher_dim in [empty_series, empty_frame, empty_panel]] # axis argument df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] assert df.shape == (1, 1) tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0]) assert df.squeeze() == df.iloc[0, 0] pytest.raises(ValueError, df.squeeze, axis=2) pytest.raises(ValueError, df.squeeze, axis='x') df = tm.makeTimeDataFrame(3) tm.assert_frame_equal(df.squeeze(axis=0), df)
def test_wls_panel(self): y = tm.makeTimeDataFrame() x = Panel({'x1': tm.makeTimeDataFrame(), 'x2': tm.makeTimeDataFrame()}) y.iloc[[1, 7], y.columns.get_loc('A')] = np.nan y.iloc[[6, 15], y.columns.get_loc('B')] = np.nan y.iloc[[3, 20], y.columns.get_loc('C')] = np.nan y.iloc[[5, 11], y.columns.get_loc('D')] = np.nan stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) for k, v in x.iteritems())) weights = x.std('items') stack_weights = weights.stack() stack_y.index = stack_y.index._tuple_index stack_x.index = stack_x.index._tuple_index stack_weights.index = stack_weights.index._tuple_index with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=y, x=x, weights=1 / weights) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights) assert_almost_equal(result.beta, expected.beta) for attr in ['resid', 'y_fitted']: rvals = getattr(result, attr).stack().values evals = getattr(expected, attr).values assert_almost_equal(rvals, evals)
def test_comparisons(self): df1 = tm.makeTimeDataFrame() df2 = tm.makeTimeDataFrame() row = self.simple.xs('a') ndim_5 = np.ones(df1.shape + (1, 1, 1)) def test_comp(func): result = func(df1, df2) self.assert_numpy_array_equal(result.values, func(df1.values, df2.values)) with assertRaisesRegexp(ValueError, 'Wrong number of dimensions'): func(df1, ndim_5) result2 = func(self.simple, row) self.assert_numpy_array_equal(result2.values, func(self.simple.values, row.values)) result3 = func(self.frame, 0) self.assert_numpy_array_equal(result3.values, func(self.frame.values, 0)) with assertRaisesRegexp(ValueError, 'Can only compare ' 'identically-labeled DataFrame'): func(self.simple, self.simple[:2]) test_comp(operator.eq) test_comp(operator.ne) test_comp(operator.lt) test_comp(operator.gt) test_comp(operator.ge) test_comp(operator.le)
def test_comparisons(self): df1 = tm.makeTimeDataFrame() df2 = tm.makeTimeDataFrame() row = self.simple.xs('a') ndim_5 = np.ones(df1.shape + (1, 1, 1)) def test_comp(func): result = func(df1, df2) tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values)) with tm.assert_raises_regex(ValueError, 'dim must be <= 2'): func(df1, ndim_5) result2 = func(self.simple, row) tm.assert_numpy_array_equal(result2.values, func(self.simple.values, row.values)) result3 = func(self.frame, 0) tm.assert_numpy_array_equal(result3.values, func(self.frame.values, 0)) with tm.assert_raises_regex(ValueError, 'Can only compare identically' '-labeled DataFrame'): func(self.simple, self.simple[:2]) test_comp(operator.eq) test_comp(operator.ne) test_comp(operator.lt) test_comp(operator.gt) test_comp(operator.ge) test_comp(operator.le)
def test_squeeze(self): # noop for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries() ]: tm.assert_series_equal(s.squeeze(),s) for df in [ tm.makeTimeDataFrame() ]: tm.assert_frame_equal(df.squeeze(),df) for p in [ tm.makePanel() ]: tm.assert_panel_equal(p.squeeze(),p) for p4d in [ tm.makePanel4D() ]: tm.assert_panel4d_equal(p4d.squeeze(),p4d) # squeezing df = tm.makeTimeDataFrame().reindex(columns=['A']) tm.assert_series_equal(df.squeeze(),df['A']) p = tm.makePanel().reindex(items=['ItemA']) tm.assert_frame_equal(p.squeeze(),p['ItemA']) p = tm.makePanel().reindex(items=['ItemA'],minor_axis=['A']) tm.assert_series_equal(p.squeeze(),p.ix['ItemA',:,'A']) p4d = tm.makePanel4D().reindex(labels=['label1']) tm.assert_panel_equal(p4d.squeeze(),p4d['label1']) p4d = tm.makePanel4D().reindex(labels=['label1'],items=['ItemA']) tm.assert_frame_equal(p4d.squeeze(),p4d.ix['label1','ItemA'])
def test_wls_panel(self): y = tm.makeTimeDataFrame() x = Panel({"x1": tm.makeTimeDataFrame(), "x2": tm.makeTimeDataFrame()}) y.ix[[1, 7], "A"] = np.nan y.ix[[6, 15], "B"] = np.nan y.ix[[3, 20], "C"] = np.nan y.ix[[5, 11], "D"] = np.nan stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) for k, v in x.iteritems())) weights = x.std("items") stack_weights = weights.stack() stack_y.index = stack_y.index.get_tuple_index() stack_x.index = stack_x.index.get_tuple_index() stack_weights.index = stack_weights.index.get_tuple_index() result = ols(y=y, x=x, weights=1 / weights) expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights) assert_almost_equal(result.beta, expected.beta) for attr in ["resid", "y_fitted"]: rvals = getattr(result, attr).stack().values evals = getattr(expected, attr).values assert_almost_equal(rvals, evals)
def test_wls_panel(self): y = tm.makeTimeDataFrame() x = Panel({'x1' : tm.makeTimeDataFrame(), 'x2' : tm.makeTimeDataFrame()}) y.ix[[1, 7], 'A'] = np.nan y.ix[[6, 15], 'B'] = np.nan y.ix[[3, 20], 'C'] = np.nan y.ix[[5, 11], 'D'] = np.nan stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) for k, v in x.iteritems())) weights = x.std('items') stack_weights = weights.stack() stack_y.index = stack_y.index.get_tuple_index() stack_x.index = stack_x.index.get_tuple_index() stack_weights.index = stack_weights.index.get_tuple_index() result = ols(y=y, x=x, weights=1/weights) expected = ols(y=stack_y, x=stack_x, weights=1/stack_weights) assert_almost_equal(result.beta, expected.beta) for attr in ['resid', 'y_fitted']: rvals = getattr(result, attr).stack().values evals = getattr(expected, attr).values assert_almost_equal(rvals, evals)
def test_catch_regressor_overlap(self): df1 = tm.makeTimeDataFrame().ix[:, ["A", "B"]] df2 = tm.makeTimeDataFrame().ix[:, ["B", "C", "D"]] y = tm.makeTimeSeries() data = {"foo": df1, "bar": df2} self.assertRaises(Exception, ols, y=y, x=data)
def test_comparisons(self, simple_frame, float_frame): df1 = tm.makeTimeDataFrame() df2 = tm.makeTimeDataFrame() row = simple_frame.xs('a') ndim_5 = np.ones(df1.shape + (1, 1, 1)) def test_comp(func): result = func(df1, df2) tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values)) with pytest.raises(ValueError, match='dim must be <= 2'): func(df1, ndim_5) result2 = func(simple_frame, row) tm.assert_numpy_array_equal(result2.values, func(simple_frame.values, row.values)) result3 = func(float_frame, 0) tm.assert_numpy_array_equal(result3.values, func(float_frame.values, 0)) msg = 'Can only compare identically-labeled DataFrame' with pytest.raises(ValueError, match=msg): func(simple_frame, simple_frame[:2]) test_comp(operator.eq) test_comp(operator.ne) test_comp(operator.lt) test_comp(operator.gt) test_comp(operator.ge) test_comp(operator.le)
def test_wls_panel(self): y = tm.makeTimeDataFrame() x = Panel({"x1": tm.makeTimeDataFrame(), "x2": tm.makeTimeDataFrame()}) y.ix[[1, 7], "A"] = np.nan y.ix[[6, 15], "B"] = np.nan y.ix[[3, 20], "C"] = np.nan y.ix[[5, 11], "D"] = np.nan stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) for k, v in compat.iteritems(x))) weights = x.std("items") stack_weights = weights.stack() stack_y.index = stack_y.index._tuple_index stack_x.index = stack_x.index._tuple_index stack_weights.index = stack_weights.index._tuple_index with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=y, x=x, weights=1 / weights) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights) assert_almost_equal(result.beta, expected.beta) for attr in ["resid", "y_fitted"]: rvals = getattr(result, attr).stack().values evals = getattr(expected, attr).values assert_almost_equal(rvals, evals)
def test_plm_lagged_y_predict(self): y = tm.makeTimeDataFrame() x = {'a' : tm.makeTimeDataFrame(), 'b' : tm.makeTimeDataFrame()} model = ols(y=y, x=x, window=10) result = model.lagged_y_predict(2)
def test_plm_lagged_y_predict(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=x, window=10) result = model.lagged_y_predict(2)
def test_plm_attrs(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} rmodel = ols(y=y, x=x, window=10) model = ols(y=y, x=x) model.resid rmodel.resid
def test_plm_exclude_dummy_corner(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} model = ols(y=y, x=x, entity_effects=True, dropped_dummies={"entity": "D"}) model.summary self.assertRaises(Exception, ols, y=y, x=x, entity_effects=True, dropped_dummies={"entity": "E"})
def test_plm_ctor(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} model = ols(y=y, x=x, intercept=False) model.summary model = ols(y=y, x=Panel(x)) model.summary
def test_plm_attrs(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): rmodel = ols(y=y, x=x, window=10) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=x) model.resid rmodel.resid
def test_plm_exclude_dummy_corner(self): y = tm.makeTimeDataFrame() x = {'a' : tm.makeTimeDataFrame(), 'b' : tm.makeTimeDataFrame()} model = ols(y=y, x=x, entity_effects=True, dropped_dummies={'entity' : 'D'}) model.summary self.assertRaises(Exception, ols, y=y, x=x, entity_effects=True, dropped_dummies={'entity' : 'E'})
def test_squeeze(self): # noop for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: tm.assert_series_equal(s.squeeze(), s) for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.squeeze(), df) with catch_warnings(record=True): for p in [tm.makePanel()]: tm.assert_panel_equal(p.squeeze(), p) with catch_warnings(record=True): for p4d in [tm.makePanel4D()]: tm.assert_panel4d_equal(p4d.squeeze(), p4d) # squeezing df = tm.makeTimeDataFrame().reindex(columns=['A']) tm.assert_series_equal(df.squeeze(), df['A']) with catch_warnings(record=True): p = tm.makePanel().reindex(items=['ItemA']) tm.assert_frame_equal(p.squeeze(), p['ItemA']) p = tm.makePanel().reindex(items=['ItemA'], minor_axis=['A']) tm.assert_series_equal(p.squeeze(), p.loc['ItemA', :, 'A']) with catch_warnings(record=True): p4d = tm.makePanel4D().reindex(labels=['label1']) tm.assert_panel_equal(p4d.squeeze(), p4d['label1']) with catch_warnings(record=True): p4d = tm.makePanel4D().reindex(labels=['label1'], items=['ItemA']) tm.assert_frame_equal(p4d.squeeze(), p4d.loc['label1', 'ItemA']) # don't fail with 0 length dimensions GH11229 & GH8999 empty_series = Series([], name='five') empty_frame = DataFrame([empty_series]) with catch_warnings(record=True): empty_panel = Panel({'six': empty_frame}) [tm.assert_series_equal(empty_series, higher_dim.squeeze()) for higher_dim in [empty_series, empty_frame, empty_panel]] # axis argument df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] assert df.shape == (1, 1) tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0]) assert df.squeeze() == df.iloc[0, 0] pytest.raises(ValueError, df.squeeze, axis=2) pytest.raises(ValueError, df.squeeze, axis='x') df = tm.makeTimeDataFrame(3) tm.assert_frame_equal(df.squeeze(axis=0), df)
def test_plm_ctor(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=x, intercept=False) model.summary with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=Panel(x)) model.summary
def test_catch_regressor_overlap(self): df1 = tm.makeTimeDataFrame().ix[:, ['A', 'B']] df2 = tm.makeTimeDataFrame().ix[:, ['B', 'C', 'D']] y = tm.makeTimeSeries() data = {'foo': df1, 'bar': df2} def f(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): ols(y=y, x=data) self.assertRaises(Exception, f)
def test_plm_f_test(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} model = ols(y=y, x=x) hyp = "1*a+1*b=0" result = model.f_test(hyp) hyp = ["1*a=0", "1*b=0"] result = model.f_test(hyp) assert_almost_equal(result["f-stat"], model.f_stat["f-stat"])
def test_catch_regressor_overlap(self): df1 = tm.makeTimeDataFrame().ix[:, ["A", "B"]] df2 = tm.makeTimeDataFrame().ix[:, ["B", "C", "D"]] y = tm.makeTimeSeries() data = {"foo": df1, "bar": df2} def f(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): ols(y=y, x=data) self.assertRaises(Exception, f)
def test_plm_exclude_dummy_corner(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=x, entity_effects=True, dropped_dummies={"entity": "D"}) model.summary def f(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): ols(y=y, x=x, entity_effects=True, dropped_dummies={"entity": "E"}) self.assertRaises(Exception, f)
def test_plm_f_test(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=x) hyp = "1*a+1*b=0" result = model.f_test(hyp) hyp = ["1*a=0", "1*b=0"] result = model.f_test(hyp) assert_almost_equal(result["f-stat"], model.f_stat["f-stat"])
def test_plm_f_test(self): y = tm.makeTimeDataFrame() x = {'a' : tm.makeTimeDataFrame(), 'b' : tm.makeTimeDataFrame()} model = ols(y=y, x=x) hyp = '1*a+1*b=0' result = model.f_test(hyp) hyp = ['1*a=0', '1*b=0'] result = model.f_test(hyp) assert_almost_equal(result['f-stat'], model.f_stat['f-stat'])
def test_schema(self): frame = tm.makeTimeDataFrame() create_sql = sql.get_sqlite_schema(frame, "test", {"A": "DATETIME"}) lines = create_sql.splitlines() for l in lines: tokens = l.split(" ") if len(tokens) == 2 and tokens[0] == "A": self.assert_(tokens[1] == "DATETIME") frame = tm.makeTimeDataFrame() create_sql = sql.get_sqlite_schema(frame, "test", keys=["A", "B"]) lines = create_sql.splitlines() self.assert_("PRIMARY KEY (A,B)" in create_sql) self.db.execute(create_sql)
def test_schema(self): frame = tm.makeTimeDataFrame() create_sql = sql.get_sqlite_schema(frame, 'test', {'A': 'DATETIME'}) lines = create_sql.splitlines() for l in lines: tokens = l.split(' ') if len(tokens) == 2 and tokens[0] == 'A': self.assert_(tokens[1] == 'DATETIME') frame = tm.makeTimeDataFrame() create_sql = sql.get_sqlite_schema(frame, 'test', keys=['A', 'B']) lines = create_sql.splitlines() self.assert_('PRIMARY KEY (A,B)' in create_sql) self.db.execute(create_sql)
def test_plm_f_test(self): y = tm.makeTimeDataFrame() x = {'a': tm.makeTimeDataFrame(), 'b': tm.makeTimeDataFrame()} with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=x) hyp = '1*a+1*b=0' result = model.f_test(hyp) hyp = ['1*a=0', '1*b=0'] result = model.f_test(hyp) assert_almost_equal(result['f-stat'], model.f_stat['f-stat'])
def test_isnull(): assert not isnull(1.) assert isnull(None) assert isnull(np.NaN) assert not isnull(np.inf) assert not isnull(-np.inf) # series for s in [tm.makeFloatSeries(),tm.makeStringSeries(), tm.makeObjectSeries(),tm.makeTimeSeries(),tm.makePeriodSeries()]: assert(isinstance(isnull(s), Series)) # frame for df in [tm.makeTimeDataFrame(),tm.makePeriodFrame(),tm.makeMixedDataFrame()]: result = isnull(df) expected = df.apply(isnull) tm.assert_frame_equal(result, expected) # panel for p in [ tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) ]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel_equal(result, expected) # panel 4d for p in [ tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D()) ]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel4d_equal(result, expected)
def test_isnull(): assert not isnull(1.) assert isnull(None) assert isnull(np.NaN) assert not isnull(np.inf) assert not isnull(-np.inf) # series for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries() ]: assert (isinstance(isnull(s), Series)) # frame for df in [ tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame() ]: result = isnull(df) expected = df.apply(isnull) tm.assert_frame_equal(result, expected) # panel for p in [ tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) ]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel_equal(result, expected) # panel 4d for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel4d_equal(result, expected)
def test_partial_set_invalid(self): # GH 4940 # allow only setting of 'valid' values orig = tm.makeTimeDataFrame() df = orig.copy() # don't allow not string inserts def f(): with catch_warnings(record=True): df.loc[100.0, :] = df.ix[0] pytest.raises(TypeError, f) def f(): with catch_warnings(record=True): df.loc[100, :] = df.ix[0] pytest.raises(TypeError, f) def f(): with catch_warnings(record=True): df.ix[100.0, :] = df.ix[0] pytest.raises(TypeError, f) def f(): with catch_warnings(record=True): df.ix[100, :] = df.ix[0] pytest.raises(ValueError, f) # allow object conversion here df = orig.copy() with catch_warnings(record=True): df.loc['a', :] = df.ix[0] exp = orig.append(pd.Series(df.ix[0], name='a')) tm.assert_frame_equal(df, exp) tm.assert_index_equal(df.index, pd.Index(orig.index.tolist() + ['a'])) assert df.index.dtype == 'object'
def test_put(self): ts = tm.makeTimeSeries() df = tm.makeTimeDataFrame() self.store['a'] = ts self.store['b'] = df[:10] self.store.put('c', df[:10], table=True) # not OK, not a table self.assertRaises(ValueError, self.store.put, 'b', df[10:], append=True) # node does not currently exist, test _is_table_type returns False in # this case self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True) # OK self.store.put('c', df[10:], append=True) # overwrite table self.store.put('c', df[:10], table=True, append=False) tm.assert_frame_equal(df[:10], self.store['c'])
def test_take_invalid_kwargs(self): indices = [-3, 2, 0, 1] s = tm.makeFloatSeries() df = tm.makeTimeDataFrame() with catch_warnings(record=True): p = tm.makePanel() for obj in (s, df, p): msg = r"take\(\) got an unexpected keyword argument 'foo'" tm.assert_raises_regex(TypeError, msg, obj.take, indices, foo=2) msg = "the 'out' parameter is not supported" tm.assert_raises_regex(ValueError, msg, obj.take, indices, out=indices) msg = "the 'mode' parameter is not supported" tm.assert_raises_regex(ValueError, msg, obj.take, indices, mode='clip')
def dynamic_vector_autoregression_example(): np.random.seed(1) ptest.N = 500 data = ptest.makeTimeDataFrame().cumsum(0) #print(data) var = DynamicVAR(data, lag_order=2, window_type='expanding') print(var.coefs) # All estimated coefficients for equation A print(var.coefs.minor_xs('A').info()) # Coefficients on 11/30/2001 print(var.coefs.major_xs(datetime(2001, 11, 30)).T) # Dynamic forecasts for a given number of steps ahead. print(var.forecast(2)) var.plot_forecast(2)
def test_isna_isnull(self, isna_f): assert not isna_f(1.) assert isna_f(None) assert isna_f(np.NaN) assert float('nan') assert not isna_f(np.inf) assert not isna_f(-np.inf) # series for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries()]: assert isinstance(isna_f(s), Series) # frame for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame()]: result = isna_f(df) expected = df.apply(isna_f) tm.assert_frame_equal(result, expected)
def test_reshaping_multi_index_categorical(self): cols = ["ItemA", "ItemB", "ItemC"] data = {c: tm.makeTimeDataFrame() for c in cols} df = pd.concat({c: data[c].stack() for c in data}, axis="columns") df.index.names = ["major", "minor"] df["str"] = "foo" dti = df.index.levels[0] df["category"] = df["str"].astype("category") result = df["category"].unstack() c = Categorical(["foo"] * len(dti)) expected = DataFrame( {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()}, columns=Index(list("ABCD"), name="minor"), index=dti, ) tm.assert_frame_equal(result, expected)
def test_numpy_transpose(self): msg = "the 'axes' parameter is not supported" s = tm.makeFloatSeries() tm.assert_series_equal( np.transpose(s), s) tm.assert_raises_regex(ValueError, msg, np.transpose, s, axes=1) df = tm.makeTimeDataFrame() tm.assert_frame_equal(np.transpose( np.transpose(df)), df) tm.assert_raises_regex(ValueError, msg, np.transpose, df, axes=1) with catch_warnings(record=True): p = tm.makePanel() tm.assert_panel_equal(np.transpose( np.transpose(p, axes=(2, 0, 1)), axes=(1, 2, 0)), p)
def test_transpose(self): msg = (r"transpose\(\) got multiple values for " r"keyword argument 'axes'") for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries() ]: # calls implementation in pandas/core/base.py tm.assert_series_equal(s.transpose(), s) for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.transpose().transpose(), df) with catch_warnings(record=True): simplefilter("ignore", FutureWarning) for p in [tm.makePanel()]: tm.assert_panel_equal( p.transpose(2, 0, 1).transpose(1, 2, 0), p) with pytest.raises(TypeError, match=msg): p.transpose(2, 0, 1, axes=(2, 0, 1))
def test_take(self): indices = [1, 5, -2, 6, 3, -1] for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries() ]: out = s.take(indices) expected = Series(data=s.values.take(indices), index=s.index.take(indices), dtype=s.dtype) tm.assert_series_equal(out, expected) for df in [tm.makeTimeDataFrame()]: out = df.take(indices) expected = DataFrame( data=df.values.take(indices, axis=0), index=df.index.take(indices), columns=df.columns, ) tm.assert_frame_equal(out, expected)
def test_hash_pandas_object(self): for obj in [Series([1, 2, 3]), Series([1.0, 1.5, 3.2]), Series([1.0, 1.5, np.nan]), Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]), Series(['a', 'b', 'c']), Series(['a', np.nan, 'c']), Series(['a', None, 'c']), Series([True, False, True]), Index([1, 2, 3]), Index([True, False, True]), DataFrame({'x': ['a', 'b', 'c'], 'y': [1, 2, 3]}), tm.makeMissingDataframe(), tm.makeMixedDataFrame(), tm.makeTimeDataFrame(), tm.makeTimeSeries(), tm.makeTimedeltaIndex()]: self.check_equal(obj) self.check_not_equal_with_index(obj)
def test_r2_no_intercept(self): y = tm.makeTimeSeries() x = tm.makeTimeDataFrame() x_with = x.copy() x_with['intercept'] = 1. model1 = ols(y=y, x=x) model2 = ols(y=y, x=x_with, intercept=False) assert_series_equal(model1.beta, model2.beta) # TODO: can we infer whether the intercept is there... self.assert_(model1.r2 != model2.r2) # rolling model1 = ols(y=y, x=x, window=20) model2 = ols(y=y, x=x_with, window=20, intercept=False) assert_frame_equal(model1.beta, model2.beta) self.assert_((model1.r2 != model2.r2).all())
def setUp(self): self.ts = tm.makeTimeDataFrame(100) self.ts2 = self.unpivot(self.ts).set_index('date') self.ts.columns = ['col1', 'col2', 'col3', 'col4'] create_list = [] for ix, cols in self.ts.iterrows(): create_list.append( WideTimeSeries(date_ix=ix, col1=cols['col1'], col2=cols['col2'], col3=cols['col3'], col4=cols['col4'])) WideTimeSeries.objects.bulk_create(create_list) create_list = [ LongTimeSeries(date_ix=r[0], series_name=r[1][0], value=r[1][1]) for r in self.ts2.iterrows() ] LongTimeSeries.objects.bulk_create(create_list)
def test_uquery(self): frame = tm.makeTimeDataFrame() sql.write_frame(frame, name='test_table', con=self.db) stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)' self.assertEqual(sql.uquery(stmt, con=self.db), 1) try: sys.stdout = StringIO() self.assertRaises(sqlite3.OperationalError, sql.tquery, 'insert into blah values (1)', con=self.db) self.assertRaises(sqlite3.OperationalError, sql.tquery, 'insert into blah values (1)', con=self.db, retry=True) finally: sys.stdout = sys.__stdout__
def test_numpy_transpose(self): msg = "the 'axes' parameter is not supported" s = tm.makeFloatSeries() tm.assert_series_equal(np.transpose(s), s) with pytest.raises(ValueError, match=msg): np.transpose(s, axes=1) df = tm.makeTimeDataFrame() tm.assert_frame_equal(np.transpose(np.transpose(df)), df) with pytest.raises(ValueError, match=msg): np.transpose(df, axes=1) with catch_warnings(record=True): simplefilter("ignore", FutureWarning) p = tm.makePanel() tm.assert_panel_equal( np.transpose(np.transpose(p, axes=(2, 0, 1)), axes=(1, 2, 0)), p)
def test_reshaping_multi_index_categorical(self): cols = ['ItemA', 'ItemB', 'ItemC'] data = {c: tm.makeTimeDataFrame() for c in cols} df = pd.concat({c: data[c].stack() for c in data}, axis='columns') df.index.names = ['major', 'minor'] df['str'] = 'foo' dti = df.index.levels[0] df['category'] = df['str'].astype('category') result = df['category'].unstack() c = Categorical(['foo'] * len(dti)) expected = DataFrame({'A': c.copy(), 'B': c.copy(), 'C': c.copy(), 'D': c.copy()}, columns=Index(list('ABCD'), name='minor'), index=dti) tm.assert_frame_equal(result, expected)
def test_frame_select(self): df = tm.makeTimeDataFrame() self.store.put('frame', df, table=True) date = df.index[len(df) // 2] crit1 = ('index', '>=', date) crit2 = ('column', ['A', 'D']) crit3 = ('column', 'A') result = self.store.select('frame', [crit1, crit2]) expected = df.ix[date:, ['A', 'D']] tm.assert_frame_equal(result, expected) result = self.store.select('frame', [crit3]) expected = df.ix[:, ['A']] tm.assert_frame_equal(result, expected) # can't select if not written as table self.store['frame'] = df self.assertRaises(Exception, self.store.select, 'frame', [crit1, crit2])
def test_take_invalid_kwargs(self): indices = [-3, 2, 0, 1] s = tm.makeFloatSeries() df = tm.makeTimeDataFrame() with catch_warnings(record=True): simplefilter("ignore", FutureWarning) p = tm.makePanel() for obj in (s, df, p): msg = r"take\(\) got an unexpected keyword argument 'foo'" with pytest.raises(TypeError, match=msg): obj.take(indices, foo=2) msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): obj.take(indices, out=indices) msg = "the 'mode' parameter is not supported" with pytest.raises(ValueError, match=msg): obj.take(indices, mode='clip')
def test_append(self): pth = '__test_append__.h5' try: store = HDFStore(pth) df = tm.makeTimeDataFrame() store.append('df1', df[:10]) store.append('df1', df[10:]) tm.assert_frame_equal(store['df1'], df) store.put('df2', df[:10], table=True) store.append('df2', df[10:]) tm.assert_frame_equal(store['df2'], df) store.append('/df3', df[:10]) store.append('/df3', df[10:]) tm.assert_frame_equal(store['df3'], df) # this is allowed by almost always don't want to do it import warnings import tables warnings.filterwarnings('ignore', category=tables.NaturalNameWarning) store.append('/df3 foo', df[:10]) store.append('/df3 foo', df[10:]) tm.assert_frame_equal(store['df3 foo'], df) warnings.filterwarnings('always', category=tables.NaturalNameWarning) wp = tm.makePanel() store.append('wp1', wp.ix[:, :10, :]) store.append('wp1', wp.ix[:, 10:, :]) tm.assert_panel_equal(store['wp1'], wp) except: raise finally: store.close() os.remove(pth)
def test_plot(self): df = tm.makeTimeDataFrame() _check_plot_works(df.plot, grid=False) _check_plot_works(df.plot, subplots=True) _check_plot_works(df.plot, subplots=True, use_index=False) df = DataFrame({'x': [1, 2], 'y': [3, 4]}) self._check_plot_fails(df.plot, kind='line', blarg=True) df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) _check_plot_works(df.plot, use_index=True) _check_plot_works(df.plot, sort_columns=False) _check_plot_works(df.plot, yticks=[1, 5, 10]) _check_plot_works(df.plot, xticks=[1, 5, 10]) _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100)) _check_plot_works(df.plot, subplots=True, title='blah') _check_plot_works(df.plot, title='blah') tuples = list(zip(list(string.ascii_letters[:10]), list(range(10)))) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) _check_plot_works(df.plot, use_index=True) # unicode index = MultiIndex.from_tuples([('\u03b1', 0), ('\u03b1', 1), ('\u03b2', 2), ('\u03b2', 3), ('\u03b3', 4), ('\u03b3', 5), ('\u03b4', 6), ('\u03b4', 7)], names=['i0', 'i1']) columns = MultiIndex.from_tuples([('bar', '\u0394'), ('bar', '\u0395')], names=['c0', 'c1']) df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) _check_plot_works(df.plot, title='\u03A3')
def test_append_frame_column_oriented(self): # column oriented df = tm.makeTimeDataFrame() self.store.remove('df1') self.store.append('df1', df.ix[:, :2], axes=['columns']) self.store.append('df1', df.ix[:, 2:]) tm.assert_frame_equal(self.store['df1'], df) result = self.store.select('df1', 'columns=A') expected = df.reindex(columns=['A']) tm.assert_frame_equal(expected, result) # this isn't supported self.assertRaises(Exception, self.store.select, 'df1', ('columns=A', Term('index', '>', df.index[4]))) # selection on the non-indexable result = self.store.select( 'df1', ('columns=A', Term('index', '=', df.index[0:4]))) expected = df.reindex(columns=['A'], index=df.index[0:4]) tm.assert_frame_equal(expected, result)
def test_correct_start_date(self): testing.N, testing.K = 20, 1 ts = testing.makeTimeDataFrame(freq="s") start_date = "2018-01-01" periodic_feature = PeriodicSeasonalFeature(period="1 days", start_date=start_date) periodic_feature.transform(ts) assert periodic_feature.start_date == ts.index.values[0] periodic_feature = PeriodicSeasonalFeature(period="3 days", index_period=10, start_date=start_date) periodic_feature.transform() assert periodic_feature.start_date == pd.to_datetime(start_date) start_date = pd.to_datetime("2018-01-01") periodic_feature = PeriodicSeasonalFeature(period="3 days", index_period=10, start_date=start_date) periodic_feature.transform() assert periodic_feature.start_date == start_date
def test_partial_set_invalid(self): # GH 4940 # allow only setting of 'valid' values orig = tm.makeTimeDataFrame() df = orig.copy() # don't allow not string inserts with pytest.raises(TypeError): df.loc[100.0, :] = df.iloc[0] with pytest.raises(TypeError): df.loc[100, :] = df.iloc[0] # allow object conversion here df = orig.copy() df.loc["a", :] = df.iloc[0] exp = orig.append(Series(df.iloc[0], name="a")) tm.assert_frame_equal(df, exp) tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) assert df.index.dtype == "object"
def test_isna_isnull(self, isna_f): assert not isna_f(1.) assert isna_f(None) assert isna_f(np.NaN) assert float('nan') assert not isna_f(np.inf) assert not isna_f(-np.inf) # series for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries() ]: assert isinstance(isna_f(s), Series) # frame for df in [ tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame() ]: result = isna_f(df) expected = df.apply(isna_f) tm.assert_frame_equal(result, expected) # panel with catch_warnings(record=True): simplefilter("ignore", FutureWarning) for p in [ tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) ]: result = isna_f(p) expected = p.apply(isna_f) tm.assert_panel_equal(result, expected)
def test_tquery(self): frame = tm.makeTimeDataFrame() sql.write_frame(frame, name='test_table', con=self.db) result = sql.tquery("select A from test_table", self.db) expected = frame.A result = Series(result, frame.index) tm.assert_series_equal(result, expected) try: sys.stdout = StringIO() self.assertRaises(sqlite3.OperationalError, sql.tquery, 'select * from blah', con=self.db) self.assertRaises(sqlite3.OperationalError, sql.tquery, 'select * from blah', con=self.db, retry=True) finally: sys.stdout = sys.__stdout__
def setup_method(self, method): import matplotlib as mpl from pandas.plotting._matplotlib import compat mpl.rcdefaults() self.mpl_ge_2_2_3 = compat._mpl_ge_2_2_3() self.mpl_ge_3_0_0 = compat._mpl_ge_3_0_0() self.mpl_ge_3_1_0 = compat._mpl_ge_3_1_0() self.bp_n_objects = 7 self.polycollection_factor = 2 self.default_figsize = (6.4, 4.8) self.default_tick_position = "left" n = 100 with tm.RNGContext(42): gender = np.random.choice(["Male", "Female"], size=n) classroom = np.random.choice(["A", "B", "C"], size=n) self.hist_df = DataFrame( { "gender": gender, "classroom": classroom, "height": random.normal(66, 4, size=n), "weight": random.normal(161, 32, size=n), "category": random.randint(4, size=n), } ) self.tdf = tm.makeTimeDataFrame() self.hexbin_df = DataFrame( { "A": np.random.uniform(size=20), "B": np.random.uniform(size=20), "C": np.arange(20) + np.random.uniform(size=20), } )
def test_take(self): indices = [1, 5, -2, 6, 3, -1] for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries() ]: out = s.take(indices) expected = Series(data=s.values.take(indices), index=s.index.take(indices), dtype=s.dtype) tm.assert_series_equal(out, expected) for df in [tm.makeTimeDataFrame()]: out = df.take(indices) expected = DataFrame(data=df.values.take(indices, axis=0), index=df.index.take(indices), columns=df.columns) tm.assert_frame_equal(out, expected) indices = [-3, 2, 0, 1] with catch_warnings(record=True): for p in [tm.makePanel()]: out = p.take(indices) expected = Panel(data=p.values.take(indices, axis=0), items=p.items.take(indices), major_axis=p.major_axis, minor_axis=p.minor_axis) tm.assert_panel_equal(out, expected) with catch_warnings(record=True): for p4d in [tm.makePanel4D()]: out = p4d.take(indices) expected = Panel4D(data=p4d.values.take(indices, axis=0), labels=p4d.labels.take(indices), major_axis=p4d.major_axis, minor_axis=p4d.minor_axis, items=p4d.items) tm.assert_panel4d_equal(out, expected)
def setup_method(self, method): import matplotlib as mpl mpl.rcdefaults() self.mpl_ge_2_0_1 = plotting._compat._mpl_ge_2_0_1() self.mpl_ge_2_1_0 = plotting._compat._mpl_ge_2_1_0() self.mpl_ge_2_2_0 = plotting._compat._mpl_ge_2_2_0() self.mpl_ge_2_2_2 = plotting._compat._mpl_ge_2_2_2() self.mpl_ge_3_0_0 = plotting._compat._mpl_ge_3_0_0() self.bp_n_objects = 7 self.polycollection_factor = 2 self.default_figsize = (6.4, 4.8) self.default_tick_position = 'left' n = 100 with tm.RNGContext(42): gender = np.random.choice(['Male', 'Female'], size=n) classroom = np.random.choice(['A', 'B', 'C'], size=n) self.hist_df = DataFrame({ 'gender': gender, 'classroom': classroom, 'height': random.normal(66, 4, size=n), 'weight': random.normal(161, 32, size=n), 'category': random.randint(4, size=n) }) self.tdf = tm.makeTimeDataFrame() self.hexbin_df = DataFrame({ "A": np.random.uniform(size=20), "B": np.random.uniform(size=20), "C": np.arange(20) + np.random.uniform(size=20) })
def test_plot(self): df = tm.makeTimeDataFrame() _check_plot_works(df.plot, grid=False) _check_plot_works(df.plot, subplots=True) _check_plot_works(df.plot, subplots=True, use_index=False) df = DataFrame({'x': [1, 2], 'y': [3, 4]}) self._check_plot_fails(df.plot, kind='line', blarg=True) df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) _check_plot_works(df.plot, use_index=True) _check_plot_works(df.plot, sort_columns=False) _check_plot_works(df.plot, yticks=[1, 5, 10]) _check_plot_works(df.plot, xticks=[1, 5, 10]) _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100)) _check_plot_works(df.plot, subplots=True, title='blah') _check_plot_works(df.plot, title='blah') tuples = zip(list(string.ascii_letters[:10]), range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) _check_plot_works(df.plot, use_index=True)
def create_random_sample_set(n_samples, time_shift='120m', randomize_times=False, freq='60T'): # Create artificial data tm.K = 3 tm.N = n_samples # Random data frame with an hourly index test_df = tm.makeTimeDataFrame(freq=freq) # Turn the index into a column labeled 'index' test_df = test_df.reset_index() if randomize_times: tm.K = 1 # Subtract and adds random time deltas to the index column, to create the prediction and evaluation times rand_fact = tm.makeDataFrame().reset_index(drop=True).squeeze().iloc[:len(test_df)].abs() test_df['index'] = test_df['index'].subtract(rand_fact.apply(lambda x: x * pd.Timedelta(time_shift))) rand_fact = tm.makeDataFrame().reset_index(drop=True).squeeze().iloc[:len(test_df)].abs() test_df['index2'] = test_df['index'].add(rand_fact.apply(lambda x: x * pd.Timedelta(time_shift))) else: test_df['index2'] = test_df['index'].apply(lambda x: x + pd.Timedelta(time_shift)) # Sort the data frame by prediction time test_df = test_df.sort_values('index') X = test_df[['A', 'B', 'C']] pred_times = test_df['index'] exit_times = test_df['index2'] return X, pred_times, exit_times