Python AttrDict.AttrDict Exemples, linearmodels.utility.AttrDict.AttrDict Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_utility.py Projet : codacy-badger/linearmodels

def test_attr_dict():
    ad = AttrDict()
    ad["one"] = "one"
    ad[1] = 1
    ad[("a", 2)] = ("a", 2)
    assert list(ad.keys()) == ["one", 1, ("a", 2)]
    assert len(ad) == 3

    ad2 = ad.copy()
    assert list(ad2.keys()) == list(ad.keys())
    assert ad.get("one", None) == "one"
    assert ad.get("two", False) is False

    k, v = ad.popitem()
    assert k == "one"
    assert v == "one"

    items = ad.items()
    assert (1, 1) in items
    assert (("a", 2), ("a", 2)) in items
    assert len(items) == 2

    values = ad.values()
    assert 1 in values
    assert ("a", 2) in values
    assert len(values) == 2

    ad2 = AttrDict()
    ad2[1] = 3
    ad2["one"] = "one"
    ad2["a"] = "a"
    ad.update(ad2)
    assert ad[1] == 3
    assert "a" in ad

    ad.__str__()
    with pytest.raises(AttributeError):
        ad.__private_dict__ = None
    with pytest.raises(AttributeError):
        ad.some_other_key
    with pytest.raises(KeyError):
        ad["__private_dict__"] = None

    del ad[1]
    assert 1 not in ad.keys()
    ad.new_value = "new_value"
    assert "new_value" in ad.keys()
    assert ad.new_value == ad["new_value"]

    for key in ad.keys():
        if isinstance(key, str):
            assert key in dir(ad)

    new_value = ad.pop("new_value")
    assert new_value == "new_value"

    del ad.one
    assert "one" not in ad.keys()

    ad.clear()
    assert list(ad.keys()) == []

Exemple #2

0

Afficher le fichier

    def fit(self, center=True, use_cue=False, steps=2, disp=10, max_iter=1000,
            cov_type='robust', debiased=True, **cov_config):
        """
        Estimate model parameters

        Parameters
        ----------
        center : bool, optional
            Flag indicating to center the moment conditions before computing
            the weighting matrix.
        use_cue : bool, optional
            Flag indicating to use continuously updating estimator
        steps : int, optional
            Number of steps to use when estimating parameters.  2 corresponds
            to the standard efficient GMM estimator. Higher values will
            iterate until convergence or up to the number of steps given
        disp : int, optional
            Number of iterations between printed update. 0 or negative values
            suppresses output
        max_iter : int, positive, optional
            Maximum number of iterations when minimizing objective
        cov_type : str, optional
            Name of covariance estimator
        debiased : bool, optional
            Flag indicating whether to debias the covariance estimator using
            a degree of freedom adjustment
        **cov_config
            Additional covariance-specific options.  See Notes.

        Returns
        -------
        results : GMMFactorModelResults
            Results class with parameter estimates, covariance and test statistics

        Notes
        -----
        The kernel covariance estimator takes the optional arguments
        ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral)
        and ``bandwidth`` (a positive integer).
        """

        nobs, n = self.portfolios.shape
        k = self.factors.shape[1]
        excess_returns = not self._risk_free
        nrf = int(not bool(excess_returns))
        # 1. Starting Values - use 2 pass
        mod = LinearFactorModel(self.portfolios, self.factors, risk_free=self._risk_free)
        res = mod.fit()
        betas = np.asarray(res.betas).ravel()
        lam = np.asarray(res.risk_premia)
        mu = self.factors.ndarray.mean(0)
        sv = np.r_[betas, lam, mu][:, None]
        g = self._moments(sv, excess_returns)
        g -= g.mean(0)[None, :] if center else 0
        if cov_type not in ('robust', 'heteroskedastic', 'kernel'):
            raise ValueError('Unknown weight: {0}'.format(cov_type))
        if cov_type in ('robust', 'heteroskedastic'):
            weight_est = HeteroskedasticWeight
            cov_est = HeteroskedasticCovariance
        else:  # 'kernel':
            weight_est = KernelWeight
            cov_est = KernelCovariance
        weight_est = weight_est(g, center=center, **cov_config)
        w = weight_est.w(g)

        args = (excess_returns, w)

        # 2. Step 1 using w = inv(s) from SV
        callback = callback_factory(self._j, args, disp=disp)
        res = minimize(self._j, sv, args=args, callback=callback,
                       options={'disp': bool(disp), 'maxiter': max_iter})
        params = res.x
        last_obj = res.fun
        iters = 1
        # 3. Step 2 using step 1 estimates
        if not use_cue:
            while iters < steps:
                iters += 1
                g = self._moments(params, excess_returns)
                w = weight_est.w(g)
                args = (excess_returns, w)

                # 2. Step 1 using w = inv(s) from SV
                callback = callback_factory(self._j, args, disp=disp)
                res = minimize(self._j, params, args=args, callback=callback,
                               options={'disp': bool(disp), 'maxiter': max_iter})
                params = res.x
                obj = res.fun
                if np.abs(obj - last_obj) < 1e-6:
                    break
                last_obj = obj

        else:
            args = (excess_returns, weight_est)
            obj = self._j_cue
            callback = callback_factory(obj, args, disp=disp)
            res = minimize(obj, params, args=args, callback=callback,
                           options={'disp': bool(disp), 'maxiter': max_iter})
            params = res.x

        # 4. Compute final S and G for inference
        g = self._moments(params, excess_returns)
        s = g.T @ g / nobs
        jac = self._jacobian(params, excess_returns)

        cov_est = cov_est(g, jacobian=jac, center=center, debiased=debiased,
                          df=self.factors.shape[1], **cov_config)

        full_vcv = cov_est.cov
        sel = slice((n * k), (n * k + k + nrf))
        rp = params[sel]
        rp_cov = full_vcv[sel, sel]
        sel = slice(0, (n * (k + 1)), (k + 1))
        alphas = g.mean(0)[sel, None]
        alpha_vcv = s[sel, sel] / nobs
        stat = self._j(params, excess_returns, w)
        jstat = WaldTestStatistic(stat, 'All alphas are 0', n - k - nrf, name='J-statistic')

        # R2 calculation
        betas = np.reshape(params[:(n * k)], (n, k))
        resids = self.portfolios.ndarray - self.factors.ndarray @ betas.T
        resids -= resids.mean(0)[None, :]
        residual_ss = (resids ** 2).sum()
        total = self.portfolios.ndarray
        total = total - total.mean(0)[None, :]
        total_ss = (total ** 2).sum()
        r2 = 1.0 - residual_ss / total_ss
        param_names = []
        for portfolio in self.portfolios.cols:
            for factor in self.factors.cols:
                param_names.append('beta-{0}-{1}'.format(portfolio, factor))
        if not excess_returns:
            param_names.append('lambda-risk_free')
        param_names.extend(['lambda-{0}'.format(f) for f in self.factors.cols])
        param_names.extend(['mu-{0}'.format(f) for f in self.factors.cols])
        rp_names = list(self.factors.cols)[:]
        if not excess_returns:
            rp_names.insert(0, 'risk_free')
        params = np.c_[alphas, betas]
        # 5. Return values
        res = AttrDict(params=params, cov=full_vcv, betas=betas, rp=rp, rp_cov=rp_cov,
                       alphas=alphas, alpha_vcv=alpha_vcv, jstat=jstat,
                       rsquared=r2, total_ss=total_ss, residual_ss=residual_ss,
                       param_names=param_names, portfolio_names=self.portfolios.cols,
                       factor_names=self.factors.cols, name=self._name,
                       cov_type=cov_type, model=self, nobs=nobs, rp_names=rp_names,
                       iter=iters, cov_est=cov_est)

        return GMMFactorModelResults(res)

Exemple #3

0

Afficher le fichier

Fichier : test_utility.py Projet : limingbei/linearmodels

def test_attr_dict():
    ad = AttrDict()
    ad['one'] = 'one'
    ad[1] = 1
    ad[('a', 2)] = ('a', 2)
    assert list(ad.keys()) == ['one', 1, ('a', 2)]
    assert len(ad) == 3

    ad2 = ad.copy()
    assert list(ad2.keys()) == list(ad.keys())
    assert ad.get('one', None) == 'one'
    assert ad.get('two', False) is False

    k, v = ad.popitem()
    assert k == 'one'
    assert v == 'one'

    items = ad.items()
    assert (1, 1) in items
    assert (('a', 2), ('a', 2)) in items
    assert len(items) == 2

    values = ad.values()
    assert 1 in values
    assert ('a', 2) in values
    assert len(values) == 2

    ad2 = AttrDict()
    ad2[1] = 3
    ad2['one'] = 'one'
    ad2['a'] = 'a'
    ad.update(ad2)
    assert ad[1] == 3
    assert 'a' in ad

    ad.__str__()
    with pytest.raises(AttributeError):
        ad.__ordered_dict__ = None
    with pytest.raises(AttributeError):
        ad.some_other_key
    with pytest.raises(KeyError):
        ad['__ordered_dict__'] = None

    del ad[1]
    assert 1 not in ad.keys()
    ad.new_value = 'new_value'
    assert 'new_value' in ad.keys()
    assert ad.new_value == ad['new_value']

    for key in ad.keys():
        if isinstance(key, str):
            assert key in dir(ad)

    new_value = ad.pop('new_value')
    assert new_value == 'new_value'

    del ad.one
    assert 'one' not in ad.keys()

    ad.clear()
    assert list(ad.keys()) == []

Exemple #4

0

Afficher le fichier

    def fit(self, cov_type='robust', debiased=True, **cov_config):
        """
        Estimate model parameters

        Parameters
        ----------
        cov_type : str, optional
            Name of covariance estimator
        debiased : bool, optional
            Flag indicating whether to debias the covariance estimator using
            a degree of freedom adjustment
        **cov_config : dict
            Additional covariance-specific options.  See Notes.

        Returns
        -------
        results : LinearFactorModelResults
            Results class with parameter estimates, covariance and test statistics

        Notes
        -----
        Supported covariance estimators are:

        * 'robust' - Heteroskedasticity-robust covariance estimator
        * 'kernel' - Heteroskedasticity and Autocorrelation consistent (HAC)
          covariance estimator

        The kernel covariance estimator takes the optional arguments
        ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral)
        and ``bandwidth`` (a positive integer).
        """
        p = self.portfolios.ndarray
        f = self.factors.ndarray
        nportfolio = p.shape[1]
        nobs, nfactor = f.shape
        fc = np.c_[np.ones((nobs, 1)), f]
        rp = f.mean(0)[:, None]
        fe = f - f.mean(0)
        b = np.linalg.pinv(fc) @ p
        eps = p - fc @ b
        alphas = b[:1].T

        nloading = (nfactor + 1) * nportfolio
        xpxi = np.eye(nloading + nfactor)
        xpxi[:nloading, :nloading] = np.kron(np.eye(nportfolio), np.linalg.pinv(fc.T @ fc / nobs))
        f_rep = np.tile(fc, (1, nportfolio))
        eps_rep = np.tile(eps, (nfactor + 1, 1))  # 1 2 3 ... 25 1 2 3 ...
        eps_rep = eps_rep.ravel(order='F')
        eps_rep = np.reshape(eps_rep, (nobs, (nfactor + 1) * nportfolio), order='F')
        xe = f_rep * eps_rep
        xe = np.c_[xe, fe]
        if cov_type in ('robust', 'heteroskedastic'):
            cov_est = HeteroskedasticCovariance(xe, inv_jacobian=xpxi, center=False,
                                                debiased=debiased, df=fc.shape[1])
            rp_cov_est = HeteroskedasticCovariance(fe, jacobian=np.eye(f.shape[1]), center=False,
                                                   debiased=debiased, df=1)
        elif cov_type == 'kernel':
            cov_est = KernelCovariance(xe, inv_jacobian=xpxi, center=False, debiased=debiased,
                                       df=fc.shape[1], **cov_config)
            bw = cov_est.bandwidth
            _cov_config = {k: v for k, v in cov_config.items()}
            _cov_config['bandwidth'] = bw
            rp_cov_est = KernelCovariance(fe, jacobian=np.eye(f.shape[1]), center=False,
                                          debiased=debiased, df=1, **_cov_config)
        else:
            raise ValueError('Unknown cov_type: {0}'.format(cov_type))
        full_vcv = cov_est.cov
        rp_cov = rp_cov_est.cov
        vcv = full_vcv[:nloading, :nloading]

        # Rearrange VCV
        order = np.reshape(np.arange((nfactor + 1) * nportfolio), (nportfolio, nfactor + 1))
        order = order.T.ravel()
        vcv = vcv[order][:, order]

        # Return values
        alpha_vcv = vcv[:nportfolio, :nportfolio]
        stat = float(alphas.T @ np.linalg.pinv(alpha_vcv) @ alphas)
        jstat = WaldTestStatistic(stat, 'All alphas are 0', nportfolio, name='J-statistic')
        params = b.T
        betas = b[1:].T
        residual_ss = (eps ** 2).sum()
        e = p - p.mean(0)[None, :]
        total_ss = (e ** 2).sum()
        r2 = 1 - residual_ss / total_ss
        param_names = []
        for portfolio in self.portfolios.cols:
            param_names.append('alpha-{0}'.format(portfolio))
            for factor in self.factors.cols:
                param_names.append('beta-{0}-{1}'.format(portfolio, factor))
        for factor in self.factors.cols:
            param_names.append('lambda-{0}'.format(factor))

        res = AttrDict(params=params, cov=full_vcv, betas=betas, rp=rp, rp_cov=rp_cov,
                       alphas=alphas, alpha_vcv=alpha_vcv, jstat=jstat,
                       rsquared=r2, total_ss=total_ss, residual_ss=residual_ss,
                       param_names=param_names, portfolio_names=self.portfolios.cols,
                       factor_names=self.factors.cols, name=self._name,
                       cov_type=cov_type, model=self, nobs=nobs, rp_names=self.factors.cols,
                       cov_est=cov_est)

        return LinearFactorModelResults(res)

Exemple #5

0

Afficher le fichier

    def fit(self, cov_type='robust', debiased=True, **cov_config):
        """
        Estimate model parameters

        Parameters
        ----------
        cov_type : str, optional
            Name of covariance estimator
        debiased : bool, optional
            Flag indicating whether to debias the covariance estimator using
            a degree of freedom adjustment
        **cov_config
            Additional covariance-specific options.  See Notes.

        Returns
        -------
        results : LinearFactorModelResults
            Results class with parameter estimates, covariance and test statistics

        Notes
        -----
        The kernel covariance estimator takes the optional arguments
        ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral)
        and ``bandwidth`` (a positive integer).
        """
        nobs, nf, nport, nrf, s1, s2, s3 = self._boundaries()
        excess_returns = not self._risk_free
        f = self.factors.ndarray
        p = self.portfolios.ndarray
        nport = p.shape[1]

        # Step 1, n regressions to get B
        fc = np.c_[np.ones((nobs, 1)), f]
        b = lstsq(fc, p)[0]  # nf+1 by np
        eps = p - fc @ b
        if excess_returns:
            betas = b[1:].T
        else:
            betas = b.T.copy()
            betas[:, 0] = 1.0

        sigma_m12 = self._sigma_m12
        lam = lstsq(sigma_m12 @ betas, sigma_m12 @ p.mean(0)[:, None])[0]
        expected = betas @ lam
        pricing_errors = p - expected.T
        # Moments
        alphas = pricing_errors.mean(0)[:, None]
        moments = self._moments(eps, betas, lam, alphas, pricing_errors)
        # Jacobian
        jacobian = self._jacobian(betas, lam, alphas)

        if cov_type not in ('robust', 'heteroskedastic', 'kernel'):
            raise ValueError('Unknown weight: {0}'.format(cov_type))
        if cov_type in ('robust', 'heteroskedastic'):
            cov_est = HeteroskedasticCovariance
        else:  # 'kernel':
            cov_est = KernelCovariance
        cov_est = cov_est(moments, jacobian=jacobian, center=False,
                          debiased=debiased, df=fc.shape[1], **cov_config)

        # VCV
        full_vcv = cov_est.cov
        alpha_vcv = full_vcv[s2:, s2:]
        stat = float(alphas.T @ np.linalg.pinv(alpha_vcv) @ alphas)
        jstat = WaldTestStatistic(stat, 'All alphas are 0', nport - nf - nrf,
                                  name='J-statistic')

        total_ss = ((p - p.mean(0)[None, :]) ** 2).sum()
        residual_ss = (eps ** 2).sum()
        r2 = 1 - residual_ss / total_ss
        rp = lam
        rp_cov = full_vcv[s1:s2, s1:s2]
        betas = betas if excess_returns else betas[:, 1:]
        params = np.c_[alphas, betas]
        param_names = []
        for portfolio in self.portfolios.cols:
            param_names.append('alpha-{0}'.format(portfolio))
            for factor in self.factors.cols:
                param_names.append('beta-{0}-{1}'.format(portfolio, factor))
        if not excess_returns:
            param_names.append('lambda-risk_free')
        for factor in self.factors.cols:
            param_names.append('lambda-{0}'.format(factor))

        # Pivot vcv to remove unnecessary and have correct order
        order = np.reshape(np.arange(s1), (nport, nf + 1))
        order[:, 0] = np.arange(s2, s3)
        order = order.ravel()
        order = np.r_[order, s1:s2]
        full_vcv = full_vcv[order][:, order]
        factor_names = list(self.factors.cols)
        rp_names = factor_names[:]
        if not excess_returns:
            rp_names.insert(0, 'risk_free')
        res = AttrDict(params=params, cov=full_vcv, betas=betas, rp=rp, rp_cov=rp_cov,
                       alphas=alphas, alpha_vcv=alpha_vcv, jstat=jstat,
                       rsquared=r2, total_ss=total_ss, residual_ss=residual_ss,
                       param_names=param_names, portfolio_names=self.portfolios.cols,
                       factor_names=factor_names, name=self._name,
                       cov_type=cov_type, model=self, nobs=nobs, rp_names=rp_names,
                       cov_est=cov_est)

        return LinearFactorModelResults(res)

Exemple #6

0

Afficher le fichier

Fichier : _utility.py Projet : noisyoscillator/linearmodels

def generate_data(missing,
                  datatype,
                  const=False,
                  ntk=(971, 7, 5),
                  other_effects=0,
                  rng=None):
    if rng is None:
        np.random.seed(12345)
    else:
        np.random.set_state(rng.get_state())

    n, t, k = ntk
    k += const
    x = standard_normal((k, t, n))
    beta = np.arange(1, k + 1)[:, None, None] / k
    y = (x * beta).sum(0) + standard_normal((t, n)) + 2 * standard_normal(
        (1, n))
    w = np.random.chisquare(5, (t, n)) / 5
    c = None
    if other_effects == 1:
        cats = ['Industries']
    else:
        cats = ['cat.' + str(i) for i in range(other_effects)]
    if other_effects:
        c = np.random.randint(0, 4, (other_effects, t, n))

    vcats = ['varcat.' + str(i) for i in range(2)]
    vc2 = np.ones((2, t, 1)) @ np.random.randint(0, n // 2, (2, 1, n))
    vc1 = vc2[[0]]

    if const:
        x[0] = 1.0

    if missing > 0:
        locs = np.random.choice(n * t, int(n * t * missing))
        y.flat[locs] = np.nan
        locs = np.random.choice(n * t * k, int(n * t * k * missing))
        x.flat[locs] = np.nan

    if datatype in ('pandas', 'xarray'):
        entities = ['firm' + str(i) for i in range(n)]
        time = date_range('1-1-1900', periods=t, freq='A-DEC')
        var_names = ['x' + str(i) for i in range(k)]
        # y = DataFrame(y, index=time, columns=entities)
        y = panel_to_frame(y[None],
                           items=['y'],
                           major_axis=time,
                           minor_axis=entities,
                           swap=True)
        w = panel_to_frame(w[None],
                           items=['w'],
                           major_axis=time,
                           minor_axis=entities,
                           swap=True)
        w = w.reindex(y.index)
        x = panel_to_frame(x,
                           items=var_names,
                           major_axis=time,
                           minor_axis=entities,
                           swap=True)
        x = x.reindex(y.index)
        c = panel_to_frame(c,
                           items=cats,
                           major_axis=time,
                           minor_axis=entities,
                           swap=True)
        c = c.reindex(y.index)
        vc1 = panel_to_frame(vc1,
                             items=vcats[:1],
                             major_axis=time,
                             minor_axis=entities,
                             swap=True)
        vc1 = vc1.reindex(y.index)
        vc2 = panel_to_frame(vc2,
                             items=vcats,
                             major_axis=time,
                             minor_axis=entities,
                             swap=True)
        vc2 = vc2.reindex(y.index)

    if datatype == 'xarray':
        # TODO: This is broken now, need to transfor multiindex to xarray 3d
        import xarray as xr
        x = xr.DataArray(PanelData(x).values3d,
                         coords={
                             'entities': entities,
                             'time': time,
                             'vars': var_names
                         },
                         dims=['vars', 'time', 'entities'])
        y = xr.DataArray(PanelData(y).values3d,
                         coords={
                             'entities': entities,
                             'time': time,
                             'vars': ['y']
                         },
                         dims=['vars', 'time', 'entities'])
        w = xr.DataArray(PanelData(w).values3d,
                         coords={
                             'entities': entities,
                             'time': time,
                             'vars': ['w']
                         },
                         dims=['vars', 'time', 'entities'])
        if c.shape[1] > 0:
            c = xr.DataArray(PanelData(c).values3d,
                             coords={
                                 'entities': entities,
                                 'time': time,
                                 'vars': c.columns
                             },
                             dims=['vars', 'time', 'entities'])
        vc1 = xr.DataArray(PanelData(vc1).values3d,
                           coords={
                               'entities': entities,
                               'time': time,
                               'vars': vc1.columns
                           },
                           dims=['vars', 'time', 'entities'])
        vc2 = xr.DataArray(PanelData(vc2).values3d,
                           coords={
                               'entities': entities,
                               'time': time,
                               'vars': vc2.columns
                           },
                           dims=['vars', 'time', 'entities'])

    if rng is not None:
        rng.set_state(np.random.get_state())

    return AttrDict(y=y, x=x, w=w, c=c, vc1=vc1, vc2=vc2)

Exemple #7

0

Afficher le fichier

Fichier : gmm.py Projet : codacy-badger/linearmodels

 def _str_extra(self) -> AttrDict:
     return AttrDict(Debiased=self._debiased, Center=self._center)

Exemple #8

0

Afficher le fichier

Fichier : gmm.py Projet : codacy-badger/linearmodels

 def __init__(self, center: bool = False, debiased: bool = False) -> None:
     self._center = center
     self._debiased = debiased
     self._bandwidth: Optional[float] = 0
     self._name = "Homoskedastic (Unadjusted) Weighting"
     self._config = AttrDict(center=center, debiased=debiased)

Exemple #9

0

Afficher le fichier

Fichier : test_panel_ols.py Projet : paulkahura/linearmodels

def lsdv_config(request):
    weights, entity_effects, time_effects, other_effects = request.param
    return AttrDict(weights=weights,
                    entity_effects=entity_effects,
                    time_effects=time_effects,
                    other_effects=other_effects)

Exemple #10

0

Afficher le fichier

Fichier : _utility.py Projet : codacy-badger/linearmodels

def generate_data(
        missing,
        datatype,
        const=False,
        ntk=(971, 7, 5),
        other_effects=0,
        rng=None,
        num_cats=4,
):
    if rng is None:
        np.random.seed(12345)
    else:
        np.random.set_state(rng.get_state())

    n, t, k = ntk
    k += const
    x = standard_normal((k, t, n))
    beta = np.arange(1, k + 1)[:, None, None] / k
    y = (x * beta).sum(0) + standard_normal((t, n)) + 2 * standard_normal(
        (1, n))
    w = np.random.chisquare(5, (t, n)) / 5
    c = None
    if other_effects == 1:
        cats = ["Industries"]
    else:
        cats = ["cat." + str(i) for i in range(other_effects)]
    if other_effects:
        if not isinstance(num_cats, list):
            num_cats = [num_cats] * other_effects
        c = []
        for i in range(other_effects):
            nc = num_cats[i]
            c.append(np.random.randint(0, nc, (1, t, n)))
        c = np.concatenate(c, 0)

    vcats = ["varcat." + str(i) for i in range(2)]
    vc2 = np.ones((2, t, 1)) @ np.random.randint(0, n // 2, (2, 1, n))
    vc1 = vc2[[0]]

    if const:
        x[0] = 1.0

    if missing > 0:
        locs = np.random.choice(n * t, int(n * t * missing))
        y.flat[locs] = np.nan
        locs = np.random.choice(n * t * k, int(n * t * k * missing))
        x.flat[locs] = np.nan

    if datatype in ("pandas", "xarray"):
        entities = ["firm" + str(i) for i in range(n)]
        time = date_range("1-1-1900", periods=t, freq="A-DEC")
        var_names = ["x" + str(i) for i in range(k)]
        # y = DataFrame(y, index=time, columns=entities)
        y = panel_to_frame(y[None],
                           items=["y"],
                           major_axis=time,
                           minor_axis=entities,
                           swap=True)
        w = panel_to_frame(w[None],
                           items=["w"],
                           major_axis=time,
                           minor_axis=entities,
                           swap=True)
        w = w.reindex(y.index)
        x = panel_to_frame(x,
                           items=var_names,
                           major_axis=time,
                           minor_axis=entities,
                           swap=True)
        x = x.reindex(y.index)
        c = panel_to_frame(c,
                           items=cats,
                           major_axis=time,
                           minor_axis=entities,
                           swap=True)
        c = c.reindex(y.index)
        vc1 = panel_to_frame(vc1,
                             items=vcats[:1],
                             major_axis=time,
                             minor_axis=entities,
                             swap=True)
        vc1 = vc1.reindex(y.index)
        vc2 = panel_to_frame(vc2,
                             items=vcats,
                             major_axis=time,
                             minor_axis=entities,
                             swap=True)
        vc2 = vc2.reindex(y.index)

    if datatype == "xarray":
        # TODO: This is broken now, need to transfor multiindex to xarray 3d
        import xarray as xr

        x = xr.DataArray(
            PanelData(x).values3d,
            coords={
                "entities": entities,
                "time": time,
                "vars": var_names
            },
            dims=["vars", "time", "entities"],
        )
        y = xr.DataArray(
            PanelData(y).values3d,
            coords={
                "entities": entities,
                "time": time,
                "vars": ["y"]
            },
            dims=["vars", "time", "entities"],
        )
        w = xr.DataArray(
            PanelData(w).values3d,
            coords={
                "entities": entities,
                "time": time,
                "vars": ["w"]
            },
            dims=["vars", "time", "entities"],
        )
        if c.shape[1] > 0:
            c = xr.DataArray(
                PanelData(c).values3d,
                coords={
                    "entities": entities,
                    "time": time,
                    "vars": c.columns
                },
                dims=["vars", "time", "entities"],
            )
        vc1 = xr.DataArray(
            PanelData(vc1).values3d,
            coords={
                "entities": entities,
                "time": time,
                "vars": vc1.columns
            },
            dims=["vars", "time", "entities"],
        )
        vc2 = xr.DataArray(
            PanelData(vc2).values3d,
            coords={
                "entities": entities,
                "time": time,
                "vars": vc2.columns
            },
            dims=["vars", "time", "entities"],
        )

    if rng is not None:
        rng.set_state(np.random.get_state())

    return AttrDict(y=y, x=x, w=w, c=c, vc1=vc1, vc2=vc2)