def finalize(params, stats, cov, weight_mat):
    tstats = params.tstats
    params = params.params
    out = AttrDict(params=params,
                   tstats=tstats,
                   stats=stats,
                   cov=cov,
                   weight_mat=weight_mat)
    for key in stats.index:
        out[key] = stats[key]
    fixes = {
        'model_ss': 'mss',
        'resid_ss': 'rss',
        'rsquared': 'r2',
        'rsquared_adj': 'r2_a'
    }
    for key in fixes:
        if fixes[key] in out:
            out[key] = out[fixes[key]]
        else:
            out[key] = None
    if 'chi2' in out:
        out['f_statistic'] = out['chi2']
    elif 'F' in out:
        out['f_statistic'] = out['F']
    else:
        out['f_statistic'] = None

    return out
Example #2
0
def data():
    n, k, p = 1000, 5, 3
    np.random.seed(12345)
    clusters = np.random.randint(0, 10, n)
    rho = 0.5
    r = np.zeros((k + p + 1, k + p + 1))
    r.fill(rho)
    r[-1, 2:] = 0
    r[2:, -1] = 0
    r[-1, -1] = 0.5
    r += np.eye(9) * 0.5
    v = np.random.multivariate_normal(np.zeros(r.shape[0]), r, n)
    x = v[:, :k]
    z = v[:, 2:k + p]
    e = v[:, [-1]]
    params = np.arange(1, k + 1) / k
    params = params[:, None]
    y = x @ params + e
    nobs, nvar = x.shape
    xzizx = x.T @ z @ z.T @ x / nobs
    xzizx_inv = inv(xzizx)
    return AttrDict(nobs=nobs,
                    e=e,
                    x=x,
                    y=y,
                    z=z,
                    params=params,
                    clusters=clusters,
                    nvar=nvar,
                    i=np.eye(k + p - 2),
                    xzizx=xzizx,
                    xzizx_inv=xzizx_inv)
Example #3
0
def finalize(params, stats, cov, weight_mat):
    tstats = params.tstats
    params = params.params
    out = AttrDict(params=params,
                   tstats=tstats,
                   stats=stats,
                   cov=cov,
                   weight_mat=weight_mat)
    for key in stats.index:
        out[key] = stats[key]
    fixes = {
        "model_ss": "mss",
        "resid_ss": "rss",
        "rsquared": "r2",
        "rsquared_adj": "r2_a",
    }
    for key in fixes:
        if fixes[key] in out:
            out[key] = out[fixes[key]]
        else:
            out[key] = None
    if "chi2" in out:
        out["f_statistic"] = out["chi2"]
    elif "F" in out:
        out["f_statistic"] = out["F"]
    else:
        out["f_statistic"] = None

    return out
Example #4
0
def data():
    idx = date_range('2000-01-01', periods=100)
    df1 = DataFrame(np.arange(100)[:, None], columns=['A'], index=idx)
    x = np.reshape(np.arange(200), (100, 2))
    df2 = DataFrame(x, columns=['B', 'C'], index=idx[::-1])
    s = Series(300 + np.arange(100), index=idx, name='D')
    return AttrDict(df1=df1, df2=df2, s=s)
Example #5
0
    def _multivariate_ls_finalize(self, beta, eps, sigma, cov_type, **cov_config):
        k = len(self._wx)

        # Covariance estimation
        if cov_type == 'unadjusted':
            cov_est = HomoskedasticCovariance
        else:
            cov_est = HeteroskedasticCovariance
        cov = cov_est(self._wx, eps, sigma, sigma, gls=False,
                      constraints=self._constraints, **cov_config).cov

        individual = AttrDict()
        debiased = cov_config.get('debiased', False)
        for i in range(k):
            wy = wye = self._wy[i]
            w = self._w[i]
            cons = int(self.has_constant.iloc[i])
            if cons:
                wc = np.ones_like(wy) * np.sqrt(w)
                wye = wy - wc @ np.linalg.lstsq(wc, wy)[0]
            total_ss = float(wye.T @ wye)

            stats = self._common_indiv_results(i, beta, cov, eps, eps, 'OLS',
                                               cov_type, 0, debiased, cons, total_ss)
            key = self._eq_labels[i]
            individual[key] = stats

        nobs = eps.size
        results = self._common_results(beta, cov, 'OLS', 0, nobs, cov_type,
                                       sigma, individual, debiased)
        results['wresid'] = results.resid

        return SURResults(results)
Example #6
0
def generate_data(nfactor=3,
                  nportfolio=25,
                  nobs=1000,
                  premia=None,
                  output="pandas",
                  alpha=False):
    np.random.seed(12345)
    if premia is None:
        premia = np.arange(1, nfactor + 1) / (10 * nfactor)
    rho = 0.2
    e = np.random.randn(nobs, nfactor)
    factors = rho * np.random.randn(nobs, 1) + np.sqrt(1 - rho**2) * e
    factors = np.sqrt(0.20**2 / 12) * factors
    factors += premia[None, :] / 12
    idio = np.sqrt(0.10**2 / 12) * np.random.randn(nobs, nportfolio)
    betas = np.random.chisquare(2, (nfactor, nportfolio)) / 2.0
    portfolios = factors @ betas + idio
    if alpha:
        portfolios += np.arange(nportfolio)[None, :] / nportfolio / 100
    index = pd.date_range("1930-1-1", periods=nobs, freq="D")
    if output == "pandas":
        cols = ["factor_{0}".format(i) for i in range(1, nfactor + 1)]
        factors = pd.DataFrame(factors, columns=cols, index=index)
        cols = ["port_{0}".format(i) for i in range(1, nportfolio + 1)]
        portfolios = pd.DataFrame(portfolios, columns=cols, index=index)

    return AttrDict(factors=factors, portfolios=portfolios)
Example #7
0
    def _common_results(self, beta, cov, method, iter_count, nobs, cov_type,
                        sigma, individual, debiased):
        results = AttrDict()
        results['method'] = method
        results['iter'] = iter_count
        results['nobs'] = nobs
        results['cov_type'] = cov_type
        results['index'] = self._dependent[0].rows
        results['sigma'] = sigma
        results['individual'] = individual
        results['params'] = beta
        results['df_model'] = beta.shape[0]
        results['param_names'] = self._param_names
        results['cov'] = cov
        results['debiased'] = debiased

        total_ss = resid_ss = 0.0
        resid = []
        for key in individual:
            total_ss += individual[key].total_ss
            resid_ss += individual[key].resid_ss
            resid.append(individual[key].resid)
        resid = hstack(resid)

        results['resid_ss'] = resid_ss
        results['total_ss'] = total_ss
        results['r2'] = 1.0 - results.resid_ss / results.total_ss
        results['resid'] = resid
        results['constraints'] = self._constraints
        results['model'] = self

        return results
Example #8
0
def data(request):
    moments = np.random.randn(500, 10)
    jacobian = np.random.rand(10, 8)
    jacobian_inv = np.eye(10)
    return AttrDict(moments=moments,
                    jacobian=jacobian,
                    inv_jacobian=jacobian_inv)
def data():
    return AttrDict(
        dep=SIMULATED_DATA.y_robust,
        exog=add_constant(SIMULATED_DATA[["x3", "x4", "x5"]]),
        endog=SIMULATED_DATA[["x1", "x2"]],
        instr=SIMULATED_DATA[["z1", "z2"]],
    )
def data():
    n, q, k, p = 1000, 2, 5, 3
    np.random.seed(12345)
    clusters = np.random.randint(0, 10, n)
    rho = 0.5
    r = np.zeros((k + p + 1, k + p + 1))
    r.fill(rho)
    r[-1, 2:] = 0
    r[2:, -1] = 0
    r[-1, -1] = 0.5
    r += np.eye(9) * 0.5
    v = np.random.multivariate_normal(np.zeros(r.shape[0]), r, n)
    x = v[:, :k]
    z = v[:, k:k + p]
    e = v[:, [-1]]
    params = np.arange(1, k + 1) / k
    params = params[:, None]
    y = x @ params + e
    xhat = z @ np.linalg.pinv(z) @ x
    nobs, nvar = x.shape
    s2 = e.T @ e / nobs
    s2_debiased = e.T @ e / (nobs - nvar)
    v = xhat.T @ xhat / nobs
    vinv = np.linalg.inv(v)
    kappa = 0.99
    vk = (x.T @ x * (1 - kappa) + kappa * xhat.T @ xhat) / nobs
    return AttrDict(nobs=nobs, e=e, x=x, y=y, z=z, xhat=xhat,
                    params=params, s2=s2, s2_debiased=s2_debiased,
                    clusters=clusters, nvar=nvar, v=v, vinv=vinv, vk=vk,
                    kappa=kappa, dep=y, exog=x[:, q:], endog=x[:, :q],
                    instr=z)
Example #11
0
def data():
    idx = date_range("2000-01-01", periods=100)
    df1 = DataFrame(np.arange(100)[:, None], columns=["A"], index=idx)
    x = np.reshape(np.arange(200), (100, 2))
    df2 = DataFrame(x, columns=["B", "C"], index=idx[::-1])
    s = Series(300 + np.arange(100), index=idx, name="D")
    return AttrDict(df1=df1, df2=df2, s=s)
Example #12
0
def generate_3sls_data_v2(n=500, k=3, nexog=3, nendog=2, ninstr=3, const=True, rho=0.5,
                          output_dict=True, seed=1234, omitted='none'):
    np.random.seed(seed)
    eqns = AttrDict()
    for i in range(k):
        exog_instr = np.random.standard_normal((n, ninstr + nexog))
        f = np.random.standard_normal((n, 1))
        exog_instr = np.sqrt(rho) * f + np.sqrt(1 - rho) * exog_instr
        exog = exog_instr[:, :nexog]
        instr = exog_instr[:, nexog:]
        eps = np.random.standard_normal((n, 1))
        endog = np.empty((n, nendog))
        for j in range(nendog):
            c = np.random.chisquare(2, (ninstr + nexog, 1)) / 2
            scale = np.arange(1, ninstr + nexog + 1) / (ninstr + nexog)
            scale = scale / scale.sum()
            c = c * scale[:, None]
            endog[:, [j]] = exog_instr @ c + eps + np.random.standard_normal((n, 1))
        params = np.arange(1, nendog + nexog + const + 1)[:, None]
        x = np.hstack([exog, endog])
        if const:
            x = np.hstack([np.ones((n, 1)), x])
            exog = np.hstack([np.ones((n, 1)), exog])
        dep = x @ params + eps + nendog * np.random.standard_normal((n, 1))
        if omitted == 'none' or omitted == 'drop':
            if exog.shape[1] == 0:
                exog = None
            if endog.shape[1] == 0:
                endog = None
            if instr.shape[1] == 0:
                instr = None
        eqn = AttrDict(dependent=dep, exog=exog, endog=endog, instruments=instr,
                       params=params)
        eqns['eqn.{0}'.format(i)] = eqn
    if not output_dict:
        for key in eqns:
            eq = eqns[key]
            eqns[key] = (eq.dependent, eq.exog, eq.endog, eq.instruments)
    else:
        if omitted == 'drop':
            for key in eqns:
                eq = eqns[key]
                for key2 in ('exog', 'endog', 'instruments'):
                    if eq[key2] is None:
                        del eq[key2]

    return eqns
Example #13
0
def lsdv_config(request):
    weights, entity_effects, time_effects, other_effects = request.param
    return AttrDict(
        weights=weights,
        entity_effects=entity_effects,
        time_effects=time_effects,
        other_effects=other_effects,
    )
Example #14
0
def const_data(request):
    missing, datatype = request.param
    data = generate_data(missing, datatype, ntk=(91, 7, 1))
    y = PanelData(data.y).dataframe
    x = y.copy()
    x.iloc[:, :] = 1
    x.columns = ['Const']
    return AttrDict(y=y, x=x, w=PanelData(data.w).dataframe)
Example #15
0
def generate_data(nkp=(1000, 5, 3)):
    n, k, p = nkp
    np.random.seed(12345)
    clusters = np.random.randint(0, 10, n)
    rho = 0.5
    r = np.zeros((k + p + 1, k + p + 1))
    r.fill(rho)
    r[-1, 2:] = 0
    r[2:, -1] = 0
    r[-1, -1] = 0.5
    r += np.eye(9) * 0.5
    v = np.random.multivariate_normal(np.zeros(r.shape[0]), r, n)

    x = v[:, :k]
    z = v[:, 2:k + p]
    e = v[:, [-1]]
    endog = x[:, :2]
    exog = x[:, 2:]
    instr = z[:, k - 2:]
    params = np.arange(1, k + 1) / k
    params = params[:, None]
    y = x @ params + e
    dep = y
    xhat = z @ np.linalg.pinv(z) @ x
    nobs, nvar = x.shape
    s2 = e.T @ e / nobs
    s2_debiased = e.T @ e / (nobs - nvar)
    v = xhat.T @ xhat / nobs
    vinv = np.linalg.inv(v)
    kappa = 0.99
    vk = (x.T @ x * (1 - kappa) + kappa * xhat.T @ xhat) / nobs
    xzizx = x.T @ z @ z.T @ x / nobs
    xzizx_inv = np.linalg.inv(xzizx)

    return AttrDict(
        nobs=nobs,
        e=e,
        x=x,
        y=y,
        z=z,
        xhat=xhat,
        params=params,
        s2=s2,
        s2_debiased=s2_debiased,
        clusters=clusters,
        nvar=nvar,
        v=v,
        vinv=vinv,
        vk=vk,
        i=np.eye(k + p - 2),
        kappa=kappa,
        xzizx=xzizx,
        xzizx_inv=xzizx_inv,
        dep=dep,
        exog=exog,
        endog=endog,
        instr=instr,
    )
Example #16
0
 def __init__(self, results: AttrDict) -> None:
     super(SystemEquationResult, self).__init__(results)
     self._eq_label = results.eq_label
     self._dependent = results.dependent
     self._f_statistic = results.f_stat
     self._r2a = results.r2a
     self._instruments = results.instruments
     self._endog = results.endog
     self._weight_estimator = results.get("weight_estimator", None)
Example #17
0
def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects=0, rng=None):
    if rng is None:
        np.random.seed(12345)
    else:
        np.random.set_state(rng.get_state())

    n, t, k = ntk
    k += const
    x = standard_normal((k, t, n))
    beta = np.arange(1, k + 1)[:, None, None] / k
    y = (x * beta).sum(0) + standard_normal((t, n)) + 2 * standard_normal((1, n))
    w = np.random.chisquare(5, (t, n)) / 5
    c = None
    if other_effects == 1:
        cats = ['Industries']
    else:
        cats = ['cat.' + str(i) for i in range(other_effects)]
    if other_effects:
        c = np.random.randint(0, 4, (other_effects, t, n))

    vcats = ['varcat.' + str(i) for i in range(2)]
    vc2 = np.ones((2, t, 1)) @ np.random.randint(0, n // 2, (2, 1, n))
    vc1 = vc2[[0]]

    if const:
        x[0] = 1.0

    if missing > 0:
        locs = np.random.choice(n * t, int(n * t * missing))
        y.flat[locs] = np.nan
        locs = np.random.choice(n * t * k, int(n * t * k * missing))
        x.flat[locs] = np.nan

    if datatype in ('pandas', 'xarray'):
        entities = ['firm' + str(i) for i in range(n)]
        time = pd.date_range('1-1-1900', periods=t, freq='A-DEC')
        vars = ['x' + str(i) for i in range(k)]
        y = pd.DataFrame(y, index=time, columns=entities)
        w = pd.DataFrame(w, index=time, columns=entities)
        x = pd.Panel(x, items=vars, major_axis=time, minor_axis=entities)
        c = pd.Panel(c, items=cats, major_axis=time, minor_axis=entities)
        vc1 = pd.Panel(vc1, items=vcats[:1], major_axis=time, minor_axis=entities)
        vc2 = pd.Panel(vc2, items=vcats, major_axis=time, minor_axis=entities)

    if datatype == 'xarray':
        x = xr.DataArray(x)
        y = xr.DataArray(y)
        w = xr.DataArray(w)
        c = xr.DataArray(c)
        vc1 = xr.DataArray(vc1)
        vc2 = xr.DataArray(vc2)

    if rng is not None:
        rng.set_state(np.random.get_state())

    return AttrDict(y=y, x=x, w=w, c=c, vc1=vc1, vc2=vc2)
Example #18
0
def data(request):
    steps, robust = request.param
    weight_type = 'robust' if robust else 'unadjusted'
    eqns = generate_3sls_data_v2(k=3)
    y = [eqns[key].dependent for key in eqns]
    x = [np.concatenate([eqns[key].exog, eqns[key].endog], 1) for key in eqns]
    z = [np.concatenate([eqns[key].exog, eqns[key].instruments], 1) for key in eqns]

    return AttrDict(eqns=eqns, x=x, y=y, z=z, steps=steps,
                    robust=robust, weight_type=weight_type)
Example #19
0
def test_formula_equivalence_weights(data):
    weights = AttrDict()
    eqn_copy = AttrDict()
    for key in data.eqns:
        eqn = {k: v for k, v in data.eqns[key].items()}
        nobs = eqn['dependent'].shape[0]
        w = np.random.chisquare(2, (nobs, 1)) / 2
        weights[key] = w
        eqn['weights'] = w
        eqn_copy[key] = eqn

    mod = IVSystemGMM(eqn_copy, weight_type='unadjusted')
    df = []
    formulas = OrderedDict()
    for i, key in enumerate(data.eqns):
        eqn = data.eqns[key]
        dep = eqn.dependent
        ex = eqn.exog
        en = eqn.endog
        instr = eqn.instruments
        dep = pd.DataFrame(dep, columns=['dep_{0}'.format(i)])
        has_const = False
        if np.any(np.all(ex == 1, 0)):
            ex = ex[:, 1:]
            has_const = True
        ex = pd.DataFrame(ex, columns=['ex_{0}_{1}'.format(i, j) for j in range(ex.shape[1])])
        en = pd.DataFrame(en, columns=['en_{0}_{1}'.format(i, j) for j in range(en.shape[1])])
        instr = pd.DataFrame(instr, columns=['instr_{0}_{1}'.format(i, j)
                                             for j in range(ex.shape[1])])
        fmla = ''.join(dep.columns) + ' ~  '
        if has_const:
            fmla += ' 1 + '
        fmla += ' + '.join(ex.columns) + ' + ['
        fmla += ' + '.join(en.columns) + ' ~ '
        fmla += ' + '.join(instr.columns) + ' ] '
        formulas[key] = fmla
        df.extend([dep, ex, en, instr])
    df = pd.concat(df, 1)
    formula_mod = IVSystemGMM.from_formula(formulas, df, weights=weights, weight_type='unadjusted')
    res = mod.fit(cov_type='unadjusted')
    formula_res = formula_mod.fit(cov_type='unadjusted')
    assert_allclose(res.params, formula_res.params)
Example #20
0
    def _gls_finalize(self, beta, sigma, full_sigma, gls_eps, eps, cov_type,
                      iter_count, **cov_config):
        """Collect results to return after GLS estimation"""
        wx = self._wx
        k = len(self._wy)

        # Covariance estimation
        if cov_type == 'unadjusted':
            cov_est = HomoskedasticCovariance
        else:
            cov_est = HeteroskedasticCovariance
        gls_eps = reshape(gls_eps, (k, gls_eps.shape[0] // k)).T
        eps = reshape(eps, (k, eps.shape[0] // k)).T
        cov = cov_est(wx,
                      gls_eps,
                      sigma,
                      full_sigma,
                      gls=True,
                      constraints=self._constraints,
                      **cov_config).cov

        # Repackage results for individual equations
        individual = AttrDict()
        debiased = cov_config.get('debiased', False)
        method = 'Iterative GLS' if iter_count > 1 else 'GLS'
        for i in range(k):
            cons = int(self.has_constant.iloc[i])

            if cons:
                c = np.sqrt(self._w[i])
                ye = self._wy[i] - c @ np.linalg.lstsq(c, self._wy[i])[0]
            else:
                ye = self._wy[i]
            total_ss = float(ye.T @ ye)
            stats = self._common_indiv_results(i, beta, cov, gls_eps, eps,
                                               method, cov_type, iter_count,
                                               debiased, cons, total_ss)

            key = self._eq_labels[i]
            individual[key] = stats

        # Populate results dictionary
        nobs = eps.size
        results = self._common_results(beta, cov, method, iter_count, nobs,
                                       cov_type, sigma, individual, debiased)

        # wresid is different between GLS and OLS
        wresid = []
        for key in individual:
            wresid.append(individual[key].wresid)
        wresid = hstack(wresid)
        results['wresid'] = wresid

        return SURResults(results)
Example #21
0
def kernel(request):
    kernel_name = request.param
    if kernel_name == "bartlett":
        weight_func = kernel_weight_bartlett
        alt_names = ["newey-west"]
    elif kernel_name == "parzen":
        weight_func = kernel_weight_parzen
        alt_names = ["gallant"]
    else:
        weight_func = kernel_weight_quadratic_spectral
        alt_names = ["quadratic-spectral", "andrews"]
    return AttrDict(kernel=kernel_name, alt_names=alt_names, weight=weight_func)
def model_data(request):
    key = request.param
    dgp, model_type = key.split("-")
    if dgp == "basic":
        data = basic_data
    elif dgp == "common":
        data = common_data
        for i, data_key in enumerate(data):
            if i == 0:
                exog = data[data_key]["exog"]
            else:
                data[data_key]["exog"] = exog
    else:  # dgp == 'missing'
        data = missing_data
    cov_kwds = {"cov_type": "unadjusted"}
    if model_type == "ss":
        cov_kwds["debiased"] = True
    stata_result = stata_results[key]
    rekeyed_data = {}
    for data_key in data:
        temp = data[data_key]
        new_key = temp["dependent"].columns[0]
        rekeyed_data[new_key] = temp
    constraint = None
    if model_type == "constrained":
        cols = []
        widths = []
        for new_key in rekeyed_data:
            exog = rekeyed_data[new_key]["exog"]
            cols.extend([new_key + "_" + col for col in exog.columns])
            widths.append(exog.shape[1])
        r = pd.DataFrame(columns=cols, index=["r0", "r1"], dtype=np.float64)
        r.iloc[:, :] = 0.0
        r.iloc[:, 0] = -1.0
        r.iloc[0, widths[0]] = 1.0
        r.iloc[1, widths[0] + widths[1]] = 1.0
        constraint = r

    mod = SUR(rekeyed_data)
    if constraint is not None:
        mod.add_constraints(constraint)
    res = mod.fit(**cov_kwds)

    return AttrDict(
        data=rekeyed_data,
        cov_kwds=cov_kwds,
        model_type=model_type,
        stata_result=stata_result,
        key=key,
        constraint=constraint,
        mod=mod,
        res=res,
    )
Example #23
0
    def config(self):
        """
        Weight estimator configuration

        Returns
        -------
        config : AttrDict
            Dictionary containing weight estimator configuration information
        """
        out = AttrDict([(k, v) for k, v in self._config.items()])
        out['bandwidth'] = self.bandwidth
        return out
Example #24
0
 def __init__(self,
              x,
              eps,
              sigma,
              full_sigma,
              *,
              gls=False,
              debiased=False,
              constraints=None):
     self._eps = eps
     self._x = x
     self._nobs = eps.shape[0]
     self._k = len(x)
     self._sigma = sigma
     self._full_sigma = full_sigma
     self._gls = gls
     self._debiased = debiased
     self._constraints = constraints
     self._name = 'Homoskedastic (Unadjusted) Covariance'
     self._str_extra = AttrDict(Debiased=self._debiased, GLS=self._gls)
     self._cov_config = AttrDict(debiased=self._debiased)
Example #25
0
def simple_3sls(y, x, z):
    out = AttrDict()
    k = len(y)
    b = []
    eps = []
    xhat = []
    for i in range(k):
        xhat.append(z[i] @ lstsq(z[i], x[i])[0])
        b.append(lstsq(xhat[i], y[i])[0])
        eps.append(y[i] - x[i] @ b[-1])
    b = np.vstack(b)
    out['beta0'] = b
    out['eps0'] = eps
    eps = np.hstack(eps)
    nobs = eps.shape[0]
    sigma = eps.T @ eps / nobs
    out['sigma'] = sigma
    omega = np.kron(sigma, np.eye(nobs))
    omegainv = np.linalg.inv(omega)
    by = np.vstack([y[i] for i in range(k)])
    bx = []
    for i in range(k):
        row = []
        for j in range(k):
            if i == j:
                row.append(xhat[i])
            else:
                row.append(np.zeros((nobs, xhat[j].shape[1])))
        row = np.hstack(row)
        bx.append(row)
    bx = np.vstack(bx)
    xpx = (bx.T @ omegainv @ bx)
    xpy = (bx.T @ omegainv @ by)
    beta1 = np.linalg.solve(xpx, xpy)
    out['beta1'] = beta1
    out['xpx'] = xpx
    out['xpy'] = xpy
    idx = 0
    eps = []
    for i in range(k):
        k = x[i].shape[1]
        b = beta1[idx:idx + k]
        eps.append(y[i] - x[i] @ b)
        idx += k

    eps = np.hstack(eps)
    nobs = eps.shape[0]
    sigma = eps.T @ eps / nobs
    out['eps'] = eps
    out['cov'] = np.linalg.inv(bx.T @ omegainv @ bx)

    return out
Example #26
0
def kernel(request):
    kernel_name = request.param
    if kernel_name == 'bartlett':
        weight_func = kernel_weight_bartlett
        alt_names = ['newey-west']
    elif kernel_name == 'parzen':
        weight_func = kernel_weight_parzen
        alt_names = ['gallant']
    else:
        weight_func = kernel_weight_quadratic_spectral
        alt_names = ['quadratic-spectral', 'andrews']
    return AttrDict(kernel=kernel_name, alt_names=alt_names,
                    weight=weight_func)
def model_data(request):
    key = request.param
    dgp, model_type = key.split('-')
    if dgp == 'basic':
        data = basic_data
    elif dgp == 'common':
        data = common_data
        for i, data_key in enumerate(data):
            if i == 0:
                exog = data[data_key]['exog']
            else:
                data[data_key]['exog'] = exog
    else:  # dgp == 'missing'
        data = missing_data
    cov_kwds = {'cov_type': 'unadjusted'}
    if model_type == 'ss':
        cov_kwds['debiased'] = True
    stata_result = stata_results[key]
    rekeyed_data = OrderedDict()
    for data_key in data:
        temp = data[data_key]
        new_key = temp['dependent'].columns[0]
        rekeyed_data[new_key] = temp
    constraint = None
    if model_type == 'constrained':
        cols = []
        widths = []
        for new_key in rekeyed_data:
            exog = rekeyed_data[new_key]['exog']
            cols.extend([new_key + '_' + col for col in exog.columns])
            widths.append(exog.shape[1])
        r = pd.DataFrame(columns=cols, index=['r0', 'r1'], dtype=np.float64)
        r.iloc[:, :] = 0.0
        r.iloc[:, 0] = -1.0
        r.iloc[0, widths[0]] = 1.0
        r.iloc[1, widths[0] + widths[1]] = 1.0
        constraint = r

    mod = SUR(rekeyed_data)
    if constraint is not None:
        mod.add_constraints(constraint)
    res = mod.fit(**cov_kwds)

    return AttrDict(data=rekeyed_data,
                    cov_kwds=cov_kwds,
                    model_type=model_type,
                    stata_result=stata_result,
                    key=key,
                    constraint=constraint,
                    mod=mod,
                    res=res)
Example #28
0
 def __init__(
     self,
     x: List[ndarray],
     eps: NDArray,
     sigma: NDArray,
     full_sigma: NDArray,
     *,
     gls: bool = False,
     debiased: bool = False,
     constraints: Optional[LinearConstraint] = None,
 ) -> None:
     self._eps = eps
     self._x = x
     self._nobs = eps.shape[0]
     self._k = len(x)
     self._sigma = sigma
     self._full_sigma = full_sigma
     self._gls = gls
     self._debiased = debiased
     self._constraints = constraints
     self._name = "Homoskedastic (Unadjusted) Covariance"
     self._str_extra = AttrDict(Debiased=self._debiased, GLS=self._gls)
     self._cov_config = AttrDict(debiased=self._debiased)
Example #29
0
def data(request):
    model, vcv, weights, missing = request.param.split("-")
    y_vars = ["y"]
    x_vars = ["x1", "x2", "x3", "x4", "x5"]
    vars = y_vars + x_vars
    if missing:
        for i, v in enumerate(vars):
            vars[i] = v + missing
        y_vars = vars[:1]
        x_vars = vars[1:]
    y = sim_data[y_vars]
    x = sim_data[["intercept"] + x_vars]
    mod = MODELS[model]
    mod_options = {}
    if model == "fixed_effect":
        mod_options = {"entity_effects": True}
    if weights == "weighted":
        mod_options.update({"weights": sim_data["w"]})
    fit_options = {"debiased": True}
    if weights == "wls":
        fit_options.update({"reweight": True})
    if vcv == "robust" and model not in ("fixed_effect", "random_effect"):
        fit_options.update({"cov_type": "robust"})
    elif vcv in ("cluster", "robust"):
        y_data = PanelData(y)
        eid = y_data.entity_ids
        entities = pd.DataFrame(eid, index=y_data.index, columns=["firm_ids"])
        fit_options.update({"cov_type": "clustered", "clusters": entities})
    else:
        fit_options.update({"cov_type": "unadjusted"})

    if vcv == "cluster" or (model in ("fixed_effect", "random_effect")
                            and vcv == "robust"):
        fit_options.update({"group_debias": True})
    spec_mod = mod(y, x, **mod_options)
    fit = spec_mod.fit(**fit_options)
    return AttrDict(
        fit=fit,
        model=spec_mod,
        model_options=mod_options,
        y=y,
        x=x,
        stata=STATA_RESULTS[request.param],
        fit_options=fit_options,
        model_name=model,
        vcv=vcv,
        weights=weights,
        missing=missing,
    )
Example #30
0
def data(request):
    model, vcv, weights, missing = request.param.split('-')
    y_vars = ['y']
    x_vars = ['x1', 'x2', 'x3', 'x4', 'x5']
    vars = y_vars + x_vars
    if missing:
        for i, v in enumerate(vars):
            vars[i] = v + missing
        y_vars = vars[:1]
        x_vars = vars[1:]
    y = sim_data[y_vars]
    x = sim_data[['intercept'] + x_vars]
    mod = MODELS[model]
    mod_options = {}
    if model == 'fixed_effect':
        mod_options = {'entity_effects': True}
    if weights == 'weighted':
        mod_options.update({'weights': sim_data['w']})
    fit_options = {'debiased': True}
    if weights == 'wls':
        fit_options.update({'reweight': True})
    if vcv == 'robust' and model not in ('fixed_effect', 'random_effect'):
        fit_options.update({'cov_type': 'robust'})
    elif vcv in ('cluster', 'robust'):
        y_data = PanelData(y)
        eid = y_data.entity_ids
        entities = pd.DataFrame(eid, index=y_data.index, columns=['firm_ids'])
        fit_options.update({'cov_type': 'clustered', 'clusters': entities})
    else:
        fit_options.update({'cov_type': 'unadjusted'})

    if vcv == 'cluster' or (model in ('fixed_effect', 'random_effect')
                            and vcv == 'robust'):
        fit_options.update({'group_debias': True})
    spec_mod = mod(y, x, **mod_options)
    fit = spec_mod.fit(**fit_options)
    return AttrDict(fit=fit,
                    model=spec_mod,
                    model_options=mod_options,
                    y=y,
                    x=x,
                    stata=STATA_RESULTS[request.param],
                    fit_options=fit_options,
                    model_name=model,
                    vcv=vcv,
                    weights=weights,
                    missing=missing)