Exemple #1
0
def test_predict_partial(config):
    fmla, model, interface = config
    for key in fmla:
        if "[" in fmla[key] and model not in (IVSystemGMM, IV3SLS):
            return
    mod = model.from_formula(fmla, joined)
    res = mod.fit()
    eqns = AttrDict()
    for key in list(mod._equations.keys())[1:]:
        eqns[key] = mod._equations[key]
    pred = res.predict(equations=eqns, dataframe=True)
    for key in mod._equations:
        if key in eqns:
            assert key in pred
        else:
            assert key not in pred
    pred2 = res.predict(data=joined, dataframe=True)
    assert_frame_equal(pred2[pred.columns], pred)

    eqns = AttrDict()
    for key in list(mod._equations.keys())[1:]:
        eqns[key] = mod._equations[key]
    final = list(mod._equations.keys())[0]
    eqns[final] = {"exog": None, "endog": None}
    pred3 = res.predict(equations=eqns, dataframe=True)
    assert_frame_equal(pred2[pred3.columns], pred3)

    eqns = AttrDict()
    for key in mod._equations:
        eqns[key] = {
            k: v
            for k, v in mod._equations[key].items() if v is not None
        }
    pred4 = res.predict(equations=eqns, dataframe=True)
    assert_frame_equal(pred2, pred4)
Exemple #2
0
def process_block(results):
    for i, line in enumerate(results):
        if line.startswith("chi2_1"):
            stat_start = i
        elif "* Variance" in line:
            variance_start = i + 2
        elif "* Sigma" in line:
            sigma_start = i + 2
    param_results = results[:stat_start]
    stats = results[stat_start : variance_start - 2]
    variance = results[variance_start : sigma_start - 2]
    sigma = results[sigma_start:]

    def parse_block(block):
        values = pd.read_csv(StringIO("\n".join(block)), header=None)
        nums = np.asarray(values.iloc[:, -1])
        nums = np.reshape(nums, (len(nums) // 3, 3))
        values = pd.DataFrame(
            nums, index=values.iloc[::3, 0], columns=["param", "tstat", "pval"]
        )
        values.index.name = ""
        return values

    params = {}
    block = []
    key = None
    for line in param_results[2:]:
        contents = list(map(lambda s: s.strip(), line.split("\t")))
        if contents[0] != "" and contents[1] == "":
            if key is not None:
                params[key] = parse_block(block)
            key = contents[0]
            block = []
        else:
            block.append(",".join(contents))
    params[key] = parse_block(block)

    stat_values = AttrDict()
    for line in stats:
        contents = line.strip().split("\t")
        if len(contents) > 1 and contents[0] and contents[1]:
            stat_values[contents[0]] = float(contents[1])
    stats = stat_values

    variance = list(map(lambda s: s.replace("\t", ","), variance))
    header = variance[0]
    block = []
    for line in variance[1:]:
        if ",,," in line:
            continue
        else:
            block.append(line)
    out = pd.read_csv(StringIO("".join([header] + block)))
    out = out.iloc[:, 1:]
    out.index = header.strip().split(",")[1:]
    vcv = out

    sigma = list(map(lambda s: s.replace("\t", ","), sigma))
    sigma = pd.read_csv(StringIO("".join(sigma)), index_col=0)
    return AttrDict(sigma=sigma, params=params, variance=vcv, stats=stats)
 def __init__(self, results: AttrDict) -> None:
     super(SystemResults, self).__init__(results)
     self._individual = AttrDict()
     for key in results.individual:
         self._individual[key] = SystemEquationResult(results.individual[key])
     self._system_r2 = results.system_r2
     self._sigma = results.sigma
     self._model = results.model
     self._constraints = results.constraints
     self._num_constraints = "None"
     if results.constraints is not None:
         self._num_constraints = str(results.constraints.r.shape[0])
     self._weight_estimtor = results.get("weight_estimator", None)
Exemple #4
0
def generate_data(nfactor=3,
                  nportfolio=25,
                  nobs=1000,
                  premia=None,
                  output="pandas",
                  alpha=False):
    np.random.seed(12345)
    if premia is None:
        premia = np.arange(1, nfactor + 1) / (10 * nfactor)
    rho = 0.2
    e = np.random.randn(nobs, nfactor)
    factors = rho * np.random.randn(nobs, 1) + np.sqrt(1 - rho**2) * e
    factors = np.sqrt(0.20**2 / 12) * factors
    factors += premia[None, :] / 12
    idio = np.sqrt(0.10**2 / 12) * np.random.randn(nobs, nportfolio)
    betas = np.random.chisquare(2, (nfactor, nportfolio)) / 2.0
    portfolios = factors @ betas + idio
    if alpha:
        portfolios += np.arange(nportfolio)[None, :] / nportfolio / 100
    index = pd.date_range("1930-1-1", periods=nobs, freq="D")
    if output == "pandas":
        cols = ["factor_{0}".format(i) for i in range(1, nfactor + 1)]
        factors = pd.DataFrame(factors, columns=cols, index=index)
        cols = ["port_{0}".format(i) for i in range(1, nportfolio + 1)]
        portfolios = pd.DataFrame(portfolios, columns=cols, index=index)

    return AttrDict(factors=factors, portfolios=portfolios)
Exemple #5
0
def data():
    idx = date_range("2000-01-01", periods=100)
    df1 = DataFrame(np.arange(100)[:, None], columns=["A"], index=idx)
    x = np.reshape(np.arange(200), (100, 2))
    df2 = DataFrame(x, columns=["B", "C"], index=idx[::-1])
    s = Series(300 + np.arange(100), index=idx, name="D")
    return AttrDict(df1=df1, df2=df2, s=s)
Exemple #6
0
def finalize(params, stats, cov, weight_mat):
    tstats = params.tstats
    params = params.params
    out = AttrDict(params=params,
                   tstats=tstats,
                   stats=stats,
                   cov=cov,
                   weight_mat=weight_mat)
    for key in stats.index:
        out[key] = stats[key]
    fixes = {
        "model_ss": "mss",
        "resid_ss": "rss",
        "rsquared": "r2",
        "rsquared_adj": "r2_a",
    }
    for key in fixes:
        if fixes[key] in out:
            out[key] = out[fixes[key]]
        else:
            out[key] = None
    if "chi2" in out:
        out["f_statistic"] = out["chi2"]
    elif "F" in out:
        out["f_statistic"] = out["F"]
    else:
        out["f_statistic"] = None

    return out
Exemple #7
0
def data():
    return AttrDict(
        dep=SIMULATED_DATA.y_robust,
        exog=add_constant(SIMULATED_DATA[["x3", "x4", "x5"]]),
        endog=SIMULATED_DATA[["x1", "x2"]],
        instr=SIMULATED_DATA[["z1", "z2"]],
    )
Exemple #8
0
def const_data(request):
    missing, datatype = request.param
    data = generate_data(missing, datatype, ntk=(91, 7, 1))
    y = PanelData(data.y).dataframe
    x = y.copy()
    x.iloc[:, :] = 1
    x.columns = ["Const"]
    return AttrDict(y=y, x=x, w=PanelData(data.w).dataframe)
Exemple #9
0
def lsdv_config(request):
    weights, entity_effects, time_effects, other_effects = request.param
    return AttrDict(
        weights=weights,
        entity_effects=entity_effects,
        time_effects=time_effects,
        other_effects=other_effects,
    )
Exemple #10
0
def generate_data(nkp=(1000, 5, 3)):
    n, k, p = nkp
    np.random.seed(12345)
    clusters = np.random.randint(0, 10, n)
    rho = 0.5
    r = np.zeros((k + p + 1, k + p + 1))
    r.fill(rho)
    r[-1, 2:] = 0
    r[2:, -1] = 0
    r[-1, -1] = 0.5
    r += np.eye(9) * 0.5
    v = np.random.multivariate_normal(np.zeros(r.shape[0]), r, n)

    x = v[:, :k]
    z = v[:, 2:k + p]
    e = v[:, [-1]]
    endog = x[:, :2]
    exog = x[:, 2:]
    instr = z[:, k - 2:]
    params = np.arange(1, k + 1) / k
    params = params[:, None]
    y = x @ params + e
    dep = y
    xhat = z @ np.linalg.pinv(z) @ x
    nobs, nvar = x.shape
    s2 = e.T @ e / nobs
    s2_debiased = e.T @ e / (nobs - nvar)
    v = xhat.T @ xhat / nobs
    vinv = np.linalg.inv(v)
    kappa = 0.99
    vk = (x.T @ x * (1 - kappa) + kappa * xhat.T @ xhat) / nobs
    xzizx = x.T @ z @ z.T @ x / nobs
    xzizx_inv = np.linalg.inv(xzizx)

    return AttrDict(
        nobs=nobs,
        e=e,
        x=x,
        y=y,
        z=z,
        xhat=xhat,
        params=params,
        s2=s2,
        s2_debiased=s2_debiased,
        clusters=clusters,
        nvar=nvar,
        v=v,
        vinv=vinv,
        vk=vk,
        i=np.eye(k + p - 2),
        kappa=kappa,
        xzizx=xzizx,
        xzizx_inv=xzizx_inv,
        dep=dep,
        exog=exog,
        endog=endog,
        instr=instr,
    )
Exemple #11
0
 def __init__(self, results: AttrDict) -> None:
     super(SystemEquationResult, self).__init__(results)
     self._eq_label = results.eq_label
     self._dependent = results.dependent
     self._f_statistic = results.f_stat
     self._r2a = results.r2a
     self._instruments = results.instruments
     self._endog = results.endog
     self._weight_estimator = results.get("weight_estimator", None)
def model_data(request) -> AttrDict:
    key = request.param
    dgp, model_type = key.split("-")
    if dgp == "basic":
        data = basic_data
    elif dgp == "common":
        data = common_data
        for i, data_key in enumerate(data):
            if i == 0:
                exog = data[data_key]["exog"]
            else:
                data[data_key]["exog"] = exog
    else:  # dgp == 'missing'
        data = missing_data
    cov_kwds: Dict[str, Union[str, bool]] = {"cov_type": "unadjusted"}
    if model_type == "ss":
        cov_kwds["debiased"] = True
    stata_result = stata_results[key]
    rekeyed_data = {}
    for data_key in data:
        temp = data[data_key]
        new_key = temp["dependent"].columns[0]
        rekeyed_data[new_key] = temp
    constraint = None
    if model_type == "constrained":
        cols = []
        widths = []
        for new_key in rekeyed_data:
            exog = rekeyed_data[new_key]["exog"]
            cols.extend([new_key + "_" + col for col in exog.columns])
            widths.append(exog.shape[1])
        r = pd.DataFrame(columns=cols, index=["r0", "r1"], dtype=np.float64)
        r.iloc[:, :] = 0.0
        r.iloc[:, 0] = -1.0
        r.iloc[0, widths[0]] = 1.0
        r.iloc[1, widths[0] + widths[1]] = 1.0
        constraint = r

    mod = SUR(rekeyed_data)
    if constraint is not None:
        mod.add_constraints(constraint)

    if model_type != "ss":
        res = mod.fit(cov_type="unadjusted")
    else:
        res = mod.fit(cov_type="unadjusted", debiased=True)

    return AttrDict(
        data=rekeyed_data,
        cov_kwds=cov_kwds,
        model_type=model_type,
        stata_result=stata_result,
        key=key,
        constraint=constraint,
        mod=mod,
        res=res,
    )
def data(request):
    steps, robust = request.param
    weight_type = "robust" if robust else "unadjusted"
    eqns = generate_3sls_data_v2(k=3)
    y = [eqns[key].dependent for key in eqns]
    x = [np.concatenate([eqns[key].exog, eqns[key].endog], 1) for key in eqns]
    z = [np.concatenate([eqns[key].exog, eqns[key].instruments], 1) for key in eqns]

    return AttrDict(
        eqns=eqns, x=x, y=y, z=z, steps=steps, robust=robust, weight_type=weight_type
    )
Exemple #14
0
    def config(self) -> AttrDict:
        """
        Weight estimator configuration

        Returns
        -------
        AttrDict
            Dictionary containing weight estimator configuration information
        """
        out = AttrDict([(k, v) for k, v in self._config.items()])
        out["bandwidth"] = self.bandwidth
        return out
Exemple #15
0
def simple_3sls(y, x, z):
    out = AttrDict()
    k = len(y)
    b = []
    eps = []
    xhat = []
    for i in range(k):
        xhat.append(z[i] @ lstsq(z[i], x[i], rcond=None)[0])
        b.append(lstsq(xhat[i], y[i], rcond=None)[0])
        eps.append(y[i] - x[i] @ b[-1])
    b = np.vstack(b)
    out["beta0"] = b
    out["eps0"] = eps
    eps = np.hstack(eps)
    nobs = eps.shape[0]
    sigma = eps.T @ eps / nobs
    out["sigma"] = sigma
    omega = np.kron(sigma, np.eye(nobs))
    omegainv = np.linalg.inv(omega)
    by = np.vstack([y[i] for i in range(k)])
    bx = []
    for i in range(k):
        row = []
        for j in range(k):
            if i == j:
                row.append(xhat[i])
            else:
                row.append(np.zeros((nobs, xhat[j].shape[1])))
        row = np.hstack(row)
        bx.append(row)
    bx = np.vstack(bx)
    xpx = bx.T @ omegainv @ bx
    xpy = bx.T @ omegainv @ by
    beta1 = np.linalg.solve(xpx, xpy)
    out["beta1"] = beta1
    out["xpx"] = xpx
    out["xpy"] = xpy
    idx = 0
    eps = []
    for i in range(k):
        k = x[i].shape[1]
        b = beta1[idx:idx + k]
        eps.append(y[i] - x[i] @ b)
        idx += k

    eps = np.hstack(eps)
    nobs = eps.shape[0]
    out["eps"] = eps
    out["cov"] = np.linalg.inv(bx.T @ omegainv @ bx)

    return out
 def __init__(
     self,
     x: List[ndarray],
     eps: NDArray,
     sigma: NDArray,
     full_sigma: NDArray,
     *,
     gls: bool = False,
     debiased: bool = False,
     constraints: Optional[LinearConstraint] = None,
 ) -> None:
     self._eps = eps
     self._x = x
     self._nobs = eps.shape[0]
     self._k = len(x)
     self._sigma = sigma
     self._full_sigma = full_sigma
     self._gls = gls
     self._debiased = debiased
     self._constraints = constraints
     self._name = "Homoskedastic (Unadjusted) Covariance"
     self._str_extra = AttrDict(Debiased=self._debiased, GLS=self._gls)
     self._cov_config = AttrDict(debiased=self._debiased)
def kernel(request):
    kernel_name = request.param
    if kernel_name == "bartlett":
        weight_func = kernel_weight_bartlett
        alt_names = ["newey-west"]
    elif kernel_name == "parzen":
        weight_func = kernel_weight_parzen
        alt_names = ["gallant"]
    else:
        weight_func = kernel_weight_quadratic_spectral
        alt_names = ["quadratic-spectral", "andrews"]
    return AttrDict(kernel=kernel_name,
                    alt_names=alt_names,
                    weight=weight_func)
Exemple #18
0
def test_formula_equivalence_weights(data):
    weights = {}
    eqn_copy = AttrDict()
    for key in data.eqns:
        eqn = {k: v for k, v in data.eqns[key].items()}
        nobs = eqn["dependent"].shape[0]
        w = np.random.chisquare(2, (nobs, 1)) / 2
        weights[key] = w
        eqn["weights"] = w
        eqn_copy[key] = eqn

    mod = IVSystemGMM(eqn_copy, weight_type="unadjusted")
    df = []
    formulas = {}
    for i, key in enumerate(data.eqns):
        eqn = data.eqns[key]
        dep = eqn.dependent
        ex = eqn.exog
        en = eqn.endog
        instr = eqn.instruments
        dep = DataFrame(dep, columns=["dep_{0}".format(i)])
        has_const = False
        if np.any(np.all(ex == 1, 0)):
            ex = ex[:, 1:]
            has_const = True
        ex = DataFrame(
            ex,
            columns=["ex_{0}_{1}".format(i, j) for j in range(ex.shape[1])])
        en = DataFrame(
            en,
            columns=["en_{0}_{1}".format(i, j) for j in range(en.shape[1])])
        instr = DataFrame(
            instr,
            columns=["instr_{0}_{1}".format(i, j) for j in range(ex.shape[1])])
        fmla = "".join(dep.columns) + " ~  "
        if has_const:
            fmla += " 1 + "
        fmla += " + ".join(ex.columns) + " + ["
        fmla += " + ".join(en.columns) + " ~ "
        fmla += " + ".join(instr.columns) + " ] "
        formulas[key] = fmla
        df.extend([dep, ex, en, instr])
    df = concat(df, 1, sort=False)
    formula_mod = IVSystemGMM.from_formula(formulas,
                                           df,
                                           weights=weights,
                                           weight_type="unadjusted")
    res = mod.fit(cov_type="unadjusted")
    formula_res = formula_mod.fit(cov_type="unadjusted")
    assert_allclose(res.params, formula_res.params)
def data(request):
    model, vcv, weights, missing = request.param.split("-")
    y_vars = ["y"]
    x_vars = ["x1", "x2", "x3", "x4", "x5"]
    variables = y_vars + x_vars
    if missing:
        for i, v in enumerate(variables):
            variables[i] = v + missing
        y_vars = variables[:1]
        x_vars = variables[1:]
    y = sim_data[y_vars]
    x = sim_data[["intercept"] + x_vars]
    mod = MODELS[model]
    mod_options = {}
    if model == "fixed_effect":
        mod_options = {"entity_effects": True}
    if weights == "weighted":
        mod_options.update({"weights": sim_data["w"]})
    fit_options = {"debiased": True}
    if weights == "wls":
        fit_options.update({"reweight": True})
    if vcv == "robust" and model not in ("fixed_effect", "random_effect"):
        fit_options.update({"cov_type": "robust"})
    elif vcv in ("cluster", "robust"):
        y_data = PanelData(y)
        eid = y_data.entity_ids
        entities = pd.DataFrame(eid, index=y_data.index, columns=["firm_ids"])
        fit_options.update({"cov_type": "clustered", "clusters": entities})
    else:
        fit_options.update({"cov_type": "unadjusted"})

    if vcv == "cluster" or (model in ("fixed_effect", "random_effect")
                            and vcv == "robust"):
        fit_options.update({"group_debias": True})
    spec_mod = mod(y, x, **mod_options)
    fit = spec_mod.fit(**fit_options)
    return AttrDict(
        fit=fit,
        model=spec_mod,
        model_options=mod_options,
        y=y,
        x=x,
        stata=STATA_RESULTS[request.param],
        fit_options=fit_options,
        model_name=model,
        vcv=vcv,
        weights=weights,
        missing=missing,
    )
Exemple #20
0
def data():
    n, q, k, p = 1000, 2, 5, 3
    np.random.seed(12345)
    clusters = np.random.randint(0, 10, n)
    rho = 0.5
    r = np.zeros((k + p + 1, k + p + 1))
    r.fill(rho)
    r[-1, 2:] = 0
    r[2:, -1] = 0
    r[-1, -1] = 0.5
    r += np.eye(9) * 0.5
    v = np.random.multivariate_normal(np.zeros(r.shape[0]), r, n)
    x = v[:, :k]
    z = v[:, k:k + p]
    e = v[:, [-1]]
    params = np.arange(1, k + 1) / k
    params = params[:, None]
    y = x @ params + e
    xhat = z @ np.linalg.pinv(z) @ x
    nobs, nvar = x.shape
    s2 = e.T @ e / nobs
    s2_debiased = e.T @ e / (nobs - nvar)
    v = xhat.T @ xhat / nobs
    vinv = np.linalg.inv(v)
    kappa = 0.99
    vk = (x.T @ x * (1 - kappa) + kappa * xhat.T @ xhat) / nobs
    return AttrDict(
        nobs=nobs,
        e=e,
        x=x,
        y=y,
        z=z,
        xhat=xhat,
        params=params,
        s2=s2,
        s2_debiased=s2_debiased,
        clusters=clusters,
        nvar=nvar,
        v=v,
        vinv=vinv,
        vk=vk,
        kappa=kappa,
        dep=y,
        exog=x[:, q:],
        endog=x[:, :q],
        instr=z,
    )
Exemple #21
0
def data():
    n, q, k, p = 1000, 2, 5, 3
    rs = np.random.RandomState(12345)
    clusters = rs.randint(0, 10, n)

    rho = 0.5
    r = scipy.linalg.toeplitz([1] + (rho + np.linspace(0.1, -0.1, 8)).tolist())
    r[-1, 2:] = 0
    r[2:, -1] = 0
    r[-1, -1] = 1
    v = rs.multivariate_normal(np.zeros(r.shape[0]), r, n)
    x = v[:, :k]
    z = v[:, k:k + p]
    e = v[:, [-1]]
    params = np.arange(1, k + 1) / k
    params = params[:, None]
    y = x @ params + e
    exog_instr = np.column_stack((x[:, q:], z))
    xhat = exog_instr @ np.linalg.pinv(exog_instr) @ x
    nobs, nvar = x.shape
    s2 = e.T @ e / nobs
    s2_debiased = e.T @ e / (nobs - nvar)
    v = xhat.T @ xhat / nobs
    vinv = np.linalg.inv(v)
    kappa = 0.99
    vk = (x.T @ x * (1 - kappa) + kappa * xhat.T @ xhat) / nobs
    return AttrDict(
        nobs=nobs,
        e=e,
        x=x,
        y=y,
        z=z,
        xhat=xhat,
        params=params,
        s2=s2,
        s2_debiased=s2_debiased,
        clusters=clusters,
        nvar=nvar,
        v=v,
        vinv=vinv,
        vk=vk,
        kappa=kappa,
        dep=y,
        exog=x[:, q:],
        endog=x[:, :q],
        instr=z,
    )
Exemple #22
0
def generate_data(
    n=500,
    k=10,
    p=3,
    const=True,
    rho=0.8,
    common_exog=False,
    included_weights=False,
    output_dict=True,
    seed=1234,
):
    np.random.seed(seed)
    p = np.array(p)
    if p.ndim == 0:
        p = [int(p)] * k
    assert len(p) == k

    eps = np.random.standard_normal((n, k))
    eps *= np.sqrt(1 - rho**2)
    eps += rho * np.random.standard_normal((n, 1))

    data = AttrDict()

    x = np.random.standard_normal((n, p[0]))
    if const:
        x = np.c_[np.ones((n, 1)), x]

    for i in range(k):
        beta = np.random.chisquare(1, (const + p[i], 1))
        if not common_exog:
            x = np.random.standard_normal((n, p[i]))
            if const:
                x = np.c_[np.ones((n, 1)), x]
        y = x @ beta + eps[:, [i]]
        if included_weights:
            w = np.random.chisquare(5, (n, 1)) / 5
        if output_dict:
            data["equ.{0}".format(i)] = {"dependent": y, "exog": x}
            if included_weights:
                data["equ.{0}".format(i)]["weights"] = w
        else:
            data["equ.{0}".format(i)] = (y, x)
            if included_weights:
                data["equ.{0}".format(i)] = tuple(
                    list(data["equ.{0}".format(i)]) + [w])

    return data
def data():
    n, q, k, p = 1000, 2, 5, 3
    np.random.seed(12345)
    clusters = np.random.randint(0, 10, n)
    rho = 0.5
    r = np.zeros((k + p + 1, k + p + 1))
    r.fill(rho)
    r[-1, 2:] = 0
    r[2:, -1] = 0
    r[-1, -1] = 0.5
    r += np.eye(9) * 0.5
    v = np.random.multivariate_normal(np.zeros(r.shape[0]), r, n)
    v.flat[::93] = np.nan
    x = v[:, :k]
    z = v[:, k:k + p]
    e = v[:, [-1]]
    params = np.arange(1, k + 1) / k
    params = params[:, None]
    y = x @ params + e

    dep = y
    exog = x[:, q:]
    endog = x[:, :q]
    instr = z

    not_missing = ~np.any(np.isnan(v), 1)
    y_clean = y[not_missing]
    x_clean = x[not_missing]
    z_clean = z[not_missing]
    dep_clean = y_clean
    exog_clean = x_clean[:, q:]
    endog_clean = x_clean[:, :q]
    instr_clean = z_clean
    clusters_clean = clusters[not_missing]
    return AttrDict(
        dep=dep,
        exog=exog,
        endog=endog,
        instr=instr,
        dep_clean=dep_clean,
        exog_clean=exog_clean,
        endog_clean=endog_clean,
        instr_clean=instr_clean,
        clusters=clusters,
        clusters_clean=clusters_clean,
    )
 def __init__(
     self,
     x: List[ndarray],
     z: List[ndarray],
     eps: NDArray,
     w: NDArray,
     *,
     sigma: Optional[ndarray] = None,
     debiased: bool = False,
     constraints: Optional[LinearConstraint] = None,
 ) -> None:
     self._x = x
     self._z = z
     self._eps = eps
     self._sigma = sigma
     self._w = w
     self._debiased = debiased
     self._constraints = constraints
     self._name = "GMM Homoskedastic (Unadjusted) Covariance"
     self._cov_config = AttrDict(debiased=self._debiased)
Exemple #25
0
def split_block(block):
    block = block[:]
    for i, line in enumerate(block):
        if "** Sigma **" in line:
            sigma = block[i + 2:]
            block = block[:i]
    for i, line in enumerate(block):
        if "** Variance **" in line:
            variance = block[i + 2:]
            block = block[:i]
    for i, line in enumerate(block):
        if "chi2_" in line or "F_" in line:
            stats = block[i:]
            params = block[:i]
            break
    return AttrDict(
        sigma=process_sigma(sigma),
        variance=process_variance(variance),
        stats=process_stats(stats),
        params=process_params(params),
    )
Exemple #26
0
def parse_block(block):
    block = [line.strip().split("\t") for line in block]
    params = []
    cov = []
    weight_mat = []
    last = 0
    for i, line in enumerate(block):
        last = i
        if len(line) == 2:
            params.append(line)
        elif len(line) == 1:
            if line[0].startswith("***"):
                break
            try:
                float(line[0])
                params[-1].append(line[0])
            except ValueError:
                pass
    params = pd.DataFrame(params, columns=["variable", "params", "tstats"])
    params = repl_const(params.set_index("variable"))
    stats = params.loc[params.tstats.isnull(), "params"]
    params = params.loc[params.tstats.notnull()]

    for line in block[last + 2:]:
        if len(line) == 1 and line[0].startswith("***"):
            break
        cov.append(line)
    cov[0].insert(0, "variable")
    last += i + 2

    cov = pd.DataFrame(cov[1:], columns=cov[0])
    cov = repl_const(cov.set_index("variable"))

    if len(block) > (last + 1):
        weight_mat = block[last + 2:]
        weight_mat[0].insert(0, "variable")
        weight_mat = pd.DataFrame(weight_mat[1:], columns=weight_mat[0])
        weight_mat = repl_const(weight_mat.set_index("variable"))

    return AttrDict(params=params, cov=cov, weight_mat=weight_mat, stats=stats)
Exemple #27
0
def test_model_missing(data):
    import copy

    data2 = AttrDict()
    for key in data:
        data2[key] = copy.deepcopy(data[key])
    data = data2
    data.dep[::7, :] = np.nan
    data.exog[::13, :] = np.nan
    data.endog[::23, :] = np.nan
    data.instr[::29, :] = np.nan
    with warnings.catch_warnings(record=True) as w:
        mod = IV2SLS(data.dep, data.exog, data.endog, data.instr)
    assert len(w) == 1
    res = mod.fit()

    var_names = [data.dep, data.exog, data.endog, data.instr]
    missing = [np.any(np.isnan(var_name), 1) for var_name in var_names]
    missing = np.any(np.c_[missing], 0)
    not_missing = missing.shape[0] - missing.sum()
    assert res.nobs == not_missing
    assert_equal(mod.isnull, missing)
    assert_equal(mod.notnull, ~missing)
Exemple #28
0
def test_gls_without_mv_ols_equiv(mvreg_data):
    dependent, exog = mvreg_data
    mv_mod = SUR.multivariate_ls(dependent, exog)
    mv_res = mv_mod.fit()
    keys = mv_res.equation_labels

    ad = AttrDict()
    for i in range(dependent.shape[1]):
        key = "dependent.{0}".format(i)
        df = DataFrame(dependent[:, [i]], columns=[key])
        ad[key] = {"dependent": df, "exog": exog.copy()}
    gls_mod = SUR(ad)
    gls_res = gls_mod.fit(method="ols")
    check_results(mv_res, gls_res)

    for i in range(dependent.shape[1]):
        mv_res_eq = mv_res.equations[keys[i]]
        gls_res_eq = gls_res.equations[keys[i]]
        check_results(mv_res_eq, gls_res_eq)

    mv_res = mv_mod.fit(cov_type="robust")
    gls_res = gls_mod.fit(cov_type="robust", method="ols")
    check_results(mv_res, gls_res)

    for i in range(dependent.shape[1]):
        mv_res_eq = mv_res.equations[keys[i]]
        gls_res_eq = gls_res.equations[keys[i]]
        check_results(mv_res_eq, gls_res_eq)

    mv_res = mv_mod.fit(cov_type="robust", debiased=True)
    gls_res = gls_mod.fit(cov_type="robust", method="ols", debiased=True)
    check_results(mv_res, gls_res)

    for i in range(dependent.shape[1]):
        mv_res_eq = mv_res.equations[keys[i]]
        gls_res_eq = gls_res.equations[keys[i]]
        check_results(mv_res_eq, gls_res_eq)
Exemple #29
0
def simple_sur(y, x):
    out = AttrDict()
    k = len(y)
    b = []
    eps = []
    for i in range(k):
        b.append(lstsq(x[i], y[i], rcond=None)[0])
        eps.append(y[i] - x[i] @ b[-1])
    b = np.vstack(b)
    out["beta0"] = b
    out["eps0"] = eps
    eps = np.hstack(eps)
    nobs = eps.shape[0]
    sigma = eps.T @ eps / nobs
    omega = np.kron(sigma, np.eye(nobs))
    omegainv = np.linalg.inv(omega)
    by = np.vstack([y[i] for i in range(k)])
    bx = []
    for i in range(k):
        row = []
        for j in range(k):
            if i == j:
                row.append(x[i])
            else:
                row.append(np.zeros((nobs, x[j].shape[1])))
        row = np.hstack(row)
        bx.append(row)
    bx = np.vstack(bx)
    xpx = bx.T @ omegainv @ bx
    xpy = bx.T @ omegainv @ by
    beta1 = np.linalg.solve(xpx, xpy)
    out["beta1"] = beta1
    out["xpx"] = xpx
    out["xpy"] = xpy

    return out
 def cov_config(self) -> AttrDict:
     """Optional configuration information used in covariance"""
     out = AttrDict([(k, v) for k, v in self._cov_config.items()])
     out["bandwidth"] = self.bandwidth
     return out