def data(request): missing, datatype, const = request.param return generate_data(missing, datatype, const=const, other_effects=1, ntk=(25, 200, 5))
def large_data(request): missing, datatype, const = request.param return generate_data(missing, datatype, const=const, ntk=(51, 71, 5), other_effects=2)
def data(request): missing, datatype, const = request.param return generate_data(missing, datatype, const=const, ntk=(91, 15, 5), other_effects=2)
def singleton_data(request): missing, datatype, const = request.param return generate_data(missing, datatype, const=const, ntk=(91, 15, 5), other_effects=2, num_cats=[5 * 91, 15])
def const_data(request): missing, datatype = request.param data = generate_data(missing, datatype, ntk=(91, 7, 1)) y = PanelData(data.y).dataframe x = y.copy() x.iloc[:, :] = 1 x.columns = ['Const'] return AttrDict(y=y, x=x, w=PanelData(data.w).dataframe)
def test_limited_redundancy(): data = generate_data(0, datatype="numpy", const=False, other_effects=1, ntk=(25, 200, 5)) for i in range(0, data.x.shape[1], 7): data.x[1, i, :] = data.x[0, i, :] mod = FamaMacBeth(data.y, data.x) res = mod.fit() assert np.any(np.isnan(res.all_params))
def absorbed_data(request): datatype = request.param rng = np.random.RandomState(12345) data = generate_data(0, datatype, ntk=(131, 4, 3), rng=rng) x = data.x if isinstance(data.x, np.ndarray): absorbed = np.arange(x.shape[2]) absorbed = np.tile(absorbed, (1, x.shape[1], 1)) data.x = np.concatenate([data.x, absorbed]) elif isinstance(data.x, pd.DataFrame): codes = get_codes(data.x.index) absorbed = np.array(codes[0]).astype(np.double) data.x["x_absorbed"] = absorbed return data
def absorbed_data(request): datatype = request.param rng = np.random.RandomState(12345) data = generate_data(0, datatype, ntk=(131, 4, 3), rng=rng) x = data.x if isinstance(data.x, np.ndarray): absorbed = np.arange(x.shape[2]) absorbed = np.tile(absorbed, (1, x.shape[1], 1)) data.x = np.concatenate([data.x, absorbed]) else: try: codes = data.x.index.codes except AttributeError: # pandas < 0.24 codes = data.x.index.labels absorbed = np.array(codes[0]).astype(np.double) data.x["x_absorbed"] = absorbed return data
def data(request): missing, datatype = request.param return generate_data(missing, datatype, other_effects=1)
def data(request): missing, datatype = request.param rng = np.random.RandomState(12345) return generate_data(missing, datatype, ntk=(131, 4, 3), rng=rng)
def data(request): missing, datatype, const = request.param return generate_data(missing, datatype, ntk=(1000, 3, 5), const=const)
def data_gen(missing, datatype): missing = 0.20 if missing else 0.0 return generate_data(missing, datatype)
def missing_data(request): return generate_data(0.20, request.param)
def data(request): return generate_data(0.0, request.param)
joined = {} for n in (2000, ): beta = {} std_errs = {} std_errs_no = {} std_errs_u = {} std_errs_u_no = {} std_errs_r = {} std_errs_r_no = {} vals = np.zeros((NUM_REPS, 5, 7)) for b in range(NUM_REPS): if b % 25 == 0: print(key, n, b) data = generate_data(0.00, 'pandas', ntk=(n, 3, 5), other_effects=1, const=False, rng=rs) mo, fo = options[key] mod_type, cluster_type = key.split(':') y = PanelData(data.y) random_effects = np.random.randint(0, n // 3, size=y.dataframe.shape) other_random = np.random.randint(0, n // 5, size=y.dataframe.shape) if mod_type == 'random': effects = y.copy() effects.dataframe.iloc[:, :] = random_effects
zz.groupby(level=0).sum() zz.groupby(level=0).sum().T o = zz.groupby(level=0).sum().values o o.T @ o (x.T @ z) @ (x.T @ z).T a = x.T @ p @ x b = (x.T @ z) @ (x.T @ z).T a b np.linalg.inv(a) @ b np.trace(np.linalg.inv(a) @ b) 30 30 data = generate_data(0, 'pandas', ntk=(101, 3, 5), other_effects=1, const=False) y = PanelData(data.y) x = PanelData(data.x) w = PanelData(data.w) x.dataframe.iloc[:, 0] = 1 mod = PanelOLS(data.y, data.x, weights=data.w) mod.fit() mod = PanelOLS(y, x, weights=data.w, entity_effects=True) mod.fit() mod = PanelOLS(data.y, data.x, weights=data.w, time_effects=True) mod.fit() mod = PanelOLS(data.y, data.x, weights=data.w, time_effects=True, entity_effects=True) mod.fit()
def data(request): missing, datatype = request.param return generate_data(missing, datatype, ntk=(231, 7, 5))
def test_uneuqal_samples(): data = generate_data(False, "pandas") with pytest.raises(ValueError, match="dependent and exog must have"): PooledOLS(data.y.iloc[::2], data.x)