Ejemplo n.º 1
0
 def _check_weights(self) -> None:
     if self._weights is None:
         nobs = self._dependent.shape[0]
         self._is_weighted = False
         self._weight_data = IVData(ones(nobs), "weights")
     else:
         self._is_weighted = True
         weights = IVData(self._weights).ndarray
         weights = weights / nanmean(weights)
         self._weight_data = IVData(weights, var_name="weights", nobs=self._nobs)
Ejemplo n.º 2
0
 def test_string_cat_equiv(self):
     s1 = pd.Series(['a', 'b', 'a', 'b', 'c', 'd', 'a', 'b'])
     s2 = pd.Series(np.arange(8.0))
     s3 = pd.Series(['apple', 'banana', 'apple', 'banana',
                     'cherry', 'date', 'apple', 'banana'])
     df = pd.DataFrame({'string': s1, 'number': s2, 'other_string': s3})
     dh = IVData(df)
     df_cat = df.copy()
     df_cat['string'] = df_cat['string'].astype('category')
     dh_cat = IVData(df_cat)
     assert_frame_equal(dh.pandas, dh_cat.pandas)
Ejemplo n.º 3
0
    def test_invalid_types(self):
        with pytest.raises(ValueError):
            IVData(np.empty((1, 1, 1)))
        with pytest.raises(ValueError):
            IVData(np.empty((10, 2, 2)))
        with pytest.raises(TypeError):
            class a(object):
                @property
                def ndim(self):
                    return 2

            IVData(a())
Ejemplo n.º 4
0
 def test_existing_datahandler(self):
     x = np.empty((10, 2))
     index = pd.date_range('2017-01-01', periods=10)
     xdf = pd.DataFrame(x, columns=['a', 'b'], index=index)
     xdh = IVData(xdf)
     xdh2 = IVData(xdh)
     assert xdh is not xdh2
     assert xdh.cols == xdh2.cols
     assert xdh.rows == xdh2.rows
     assert_equal(xdh.ndarray, xdh2.ndarray)
     assert xdh.ndim == xdh2.ndim
     assert_frame_equal(xdh.pandas, xdh2.pandas)
Ejemplo n.º 5
0
def test_string_cat_equiv() -> None:
    s1 = pd.Series(["a", "b", "a", "b", "c", "d", "a", "b"])
    s2 = pd.Series(np.arange(8.0))
    s3 = pd.Series(
        ["apple", "banana", "apple", "banana", "cherry", "date", "apple", "banana"]
    )
    df = pd.DataFrame({"string": s1, "number": s2, "other_string": s3})
    dh = IVData(df)
    df_cat = df.copy()
    df_cat["string"] = df_cat["string"].astype("category")
    dh_cat = IVData(df_cat)
    assert_frame_equal(dh.pandas, dh_cat.pandas)
Ejemplo n.º 6
0
    def test_invalid_types(self) -> None:
        with pytest.raises(ValueError):
            IVData(np.empty((1, 1, 1)))
        with pytest.raises(ValueError):
            IVData(np.empty((10, 2, 2)))
        with pytest.raises(TypeError):

            class AnotherClass(object):
                @property
                def ndim(self) -> int:
                    return 2

            IVData(AnotherClass())
Ejemplo n.º 7
0
    def __init__(
        self,
        dependent: ArrayLike,
        exog: OptionalArrayLike = None,
        *,
        absorb: InteractionVar = None,
        interactions: Union[InteractionVar, Iterable[InteractionVar]] = None,
        weights: OptionalArrayLike = None,
        drop_absorbed: bool = False,
    ) -> None:

        self._dependent = IVData(dependent, "dependent")
        self._nobs = nobs = self._dependent.shape[0]
        self._exog = IVData(exog, "exog", nobs=self._nobs)
        self._absorb = absorb
        if isinstance(absorb, DataFrame):
            self._absorb_inter = Interaction.from_frame(absorb)
        elif absorb is None:
            self._absorb_inter = Interaction(None, None, nobs)
        elif isinstance(absorb, Interaction):
            self._absorb_inter = absorb
        else:
            raise TypeError("absorb must ba a DataFrame or an Interaction")
        self._weights = weights
        self._is_weighted = False
        self._drop_absorbed = drop_absorbed
        self._check_weights()

        self._interactions = interactions
        self._interaction_list: List[Interaction] = []
        self._prepare_interactions()
        self._absorbed_dependent: Optional[DataFrame] = None
        self._absorbed_exog: Optional[DataFrame] = None

        self._check_shape()
        self._original_index = self._dependent.pandas.index
        self._drop_locs = self._drop_missing()
        self._columns = self._exog.cols
        self._index = self._dependent.rows
        self._method = "Absorbing LS"

        self._const_col = 0
        self._has_constant = False
        self._has_constant_exog = self._check_constant()
        self._constant_absorbed = False
        self._num_params = 0
        self._regressors: Optional[sp.csc_matrix] = None
        self._regressors_hash: Optional[Tuple[Tuple[str, ...], ...]] = None
Ejemplo n.º 8
0
    def __init__(self,
                 dependent: ArrayLike,
                 exog: OptionalArrayLike = None,
                 *,
                 absorb: InteractionVar = None,
                 interactions: Union[InteractionVar,
                                     Iterable[InteractionVar]] = None,
                 weights: OptionalArrayLike = None):

        self._dependent = IVData(dependent, 'dependent')
        self._nobs = nobs = self._dependent.shape[0]
        self._exog = IVData(exog, 'exog', nobs=self._nobs)
        self._absorb = absorb
        if isinstance(absorb, DataFrame):
            self._absorb_inter = Interaction.from_frame(absorb)
        elif absorb is None:
            self._absorb_inter = Interaction(None, None, nobs)
        elif isinstance(absorb, Interaction):
            self._absorb_inter = absorb
        else:
            raise TypeError('absorb must ba a DataFrame or an Interaction')
        self._weights = weights
        self._is_weighted = False
        self._check_weights()

        self._interactions = interactions
        self._interaction_list = []  # type: List[Interaction]
        self._prepare_interactions()
        self._absorbed_dependent = None
        self._absorbed_exog = None
        self._x = None

        self._check_shape()
        self._original_index = self._dependent.pandas.index
        self._drop_locs = self._drop_missing()
        self._columns = self._exog.cols
        self._index = self._dependent.rows
        self._method = 'Absorbing LS'

        self._const_col = 0
        self._has_constant = False
        self._has_constant_exog = self._check_constant()
        self._constant_absorbed = False
        self._num_params = 0
        self._regressors = None
        self._regressors_hash = None
Ejemplo n.º 9
0
    def multivariate_ls(cls,
                        dependent,
                        exog=None,
                        endog=None,
                        instruments=None):
        """
        Interface for specification of multivariate IV models

        Parameters
        ----------
        dependent : array-like
            nobs by ndep array of dependent variables
        exog : array-like, optional
            nobs by nexog array of exogenous regressors common to all models
        endog : array-like, optional
            nobs by nengod array of endogenous regressors common to all models
        instruments : array-like, optional
            nobs by ninstr array of instruments to use in all equations

        Returns
        -------
        model : IV3SLS
            Model instance

        Notes
        -----
        At least one of exog or endog must be provided.

        Utility function to simplify the construction of multivariate IV
        models which all use the same regressors and instruments. Constructs
        the dictionary of equations from the variables using the common
        exogenous, endogenous and instrumental variables.
        """
        equations = OrderedDict()
        dependent = IVData(dependent, var_name='dependent')
        if exog is None and endog is None:
            raise ValueError('At least one of exog or endog must be provided')
        exog = IVData(exog, var_name='exog')
        endog = IVData(endog, var_name='endog', nobs=dependent.shape[0])
        instr = IVData(instruments,
                       var_name='instruments',
                       nobs=dependent.shape[0])
        for col in dependent.pandas:
            equations[col] = (dependent.pandas[[col]], exog.pandas,
                              endog.pandas, instr.pandas)
        return cls(equations)
Ejemplo n.º 10
0
    def _check_data(self):
        cat, cont = self._cat, self._cont
        cat_nobs = getattr(cat, 'shape', (0, ))[0]
        cont_nobs = getattr(cont, 'shape', (0, ))[0]
        nobs = max(cat_nobs, cont_nobs)
        if cat is None and cont is None:
            if self._nobs is not None:
                self._cont_data = self._cat_data = IVData(None,
                                                          'none',
                                                          nobs=self._nobs)
            else:
                raise ValueError(
                    'nobs must be provided when cat and cont are None')
            return
        self._nobs = nobs

        self._cat_data = IVData(cat, 'cat', nobs=nobs, convert_dummies=False)
        self._cont_data = IVData(cont,
                                 'cont',
                                 nobs=nobs,
                                 convert_dummies=False)
        if self._cat_data.shape[1] == self._cont_data.shape[1] == 0:
            raise ValueError('Both cat and cont are empty arrays')
        cat_data = self._cat_data.pandas
        convert = [
            col for col in cat_data if not (is_categorical(cat_data[col]))
        ]
        if convert:
            cat_data = DataFrame(
                {col: cat_data[col].astype('category')
                 for col in cat_data})
            self._cat_data = IVData(cat_data, 'cat', convert_dummies=False)
Ejemplo n.º 11
0
    def _check_data(self) -> None:
        cat, cont = self._cat, self._cont
        cat_nobs = getattr(cat, "shape", (0, ))[0]
        cont_nobs = getattr(cont, "shape", (0, ))[0]
        nobs = max(cat_nobs, cont_nobs)
        if cat is None and cont is None:
            if self._nobs is not None:
                self._cont_data = self._cat_data = IVData(None,
                                                          "none",
                                                          nobs=self._nobs)
            else:
                raise ValueError(
                    "nobs must be provided when cat and cont are None")
            return
        self._nobs = nobs

        self._cat_data = IVData(cat, "cat", nobs=nobs, convert_dummies=False)
        self._cont_data = IVData(cont,
                                 "cont",
                                 nobs=nobs,
                                 convert_dummies=False)
        if self._cat_data.shape[1] == self._cont_data.shape[1] == 0:
            raise ValueError("Both cat and cont are empty arrays")
        cat_data = self._cat_data.pandas
        convert = [
            col for col in cat_data
            if not (is_categorical_dtype(cat_data[col]))
        ]
        if convert:
            cat_data = DataFrame(
                {col: cat_data[col].astype("category")
                 for col in cat_data})
            self._cat_data = IVData(cat_data, "cat", convert_dummies=False)
Ejemplo n.º 12
0
def test_xarray_2d() -> None:
    x_np = np.random.randn(10, 2)
    x = xr.DataArray(x_np)
    dh = IVData(x)
    assert_equal(dh.ndarray, x_np)
    assert dh.rows == list(np.arange(10))
    assert dh.cols == ["x.0", "x.1"]
    expected = pd.DataFrame(x_np, columns=dh.cols, index=dh.rows)
    assert_frame_equal(expected, dh.pandas)

    index = pd.date_range("2017-01-01", periods=10)
    x = xr.DataArray(x_np, [("time", index), ("variables", ["apple", "banana"])])
    dh = IVData(x)
    assert_equal(dh.ndarray, x_np)
    assert_series_equal(pd.Series(dh.rows), pd.Series(list(index)))
    assert dh.cols == ["apple", "banana"]
    expected = pd.DataFrame(x_np, columns=dh.cols, index=dh.rows)
    assert_frame_equal(expected, dh.pandas)
Ejemplo n.º 13
0
    def test_xarray_1d(self):
        x_np = np.random.randn(10)
        x = xr.DataArray(x_np)
        dh = IVData(x, 'some_variable')
        assert_equal(dh.ndarray, x_np[:, None])
        assert dh.rows == list(np.arange(10))
        assert dh.cols == ['some_variable.0']
        expected = pd.DataFrame(x_np, columns=dh.cols, index=dh.rows)
        assert_frame_equal(expected, dh.pandas)

        index = pd.date_range('2017-01-01', periods=10)
        x = xr.DataArray(x_np, [('time', index)])
        dh = IVData(x, 'some_variable')
        assert_equal(dh.ndarray, x_np[:, None])
        assert_series_equal(pd.Series(dh.rows), pd.Series(list(index)))
        assert dh.cols == ['some_variable.0']
        expected = pd.DataFrame(x_np[:, None], columns=dh.cols, index=dh.rows)
        assert_frame_equal(expected, dh.pandas)
Ejemplo n.º 14
0
    def test_xarray_2d(self):
        x_np = np.random.randn(10, 2)
        x = xr.DataArray(x_np)
        dh = IVData(x)
        assert_equal(dh.ndarray, x_np)
        assert dh.rows == list(np.arange(10))
        assert dh.cols == ['x.0', 'x.1']
        expected = pd.DataFrame(x_np, columns=dh.cols, index=dh.rows)
        assert_frame_equal(expected, dh.pandas)

        index = pd.date_range('2017-01-01', periods=10)
        x = xr.DataArray(x_np, [('time', index),
                                ('variables', ['apple', 'banana'])])
        dh = IVData(x)
        assert_equal(dh.ndarray, x_np)
        assert_series_equal(pd.Series(dh.rows), pd.Series(list(index)))
        assert dh.cols == ['apple', 'banana']
        expected = pd.DataFrame(x_np, columns=dh.cols, index=dh.rows)
        assert_frame_equal(expected, dh.pandas)
Ejemplo n.º 15
0
 def test_numpy_1d(self):
     x = np.empty(10)
     xdh = IVData(x)
     assert xdh.ndim == 2
     assert xdh.cols == ['x']
     assert xdh.rows == list(np.arange(10))
     assert_equal(xdh.ndarray, x[:, None])
     df = pd.DataFrame(x[:, None], columns=xdh.cols, index=xdh.rows)
     assert_frame_equal(xdh.pandas, df)
     assert xdh.shape == (10, 1)
Ejemplo n.º 16
0
 def test_categorical_no_conversion(self):
     index = pd.date_range('2017-01-01', periods=10)
     cat = pd.Categorical(['a', 'b', 'a', 'b', 'a', 'a', 'b', 'c', 'c', 'a'])
     s = pd.Series({'cat': cat}, index=index, name='cat')
     dh = IVData(s, convert_dummies=False)
     assert dh.ndim == 2
     assert dh.shape == (10, 1)
     assert dh.cols == ['cat']
     assert dh.rows == list(index)
     df = pd.DataFrame(s)
     assert_frame_equal(dh.pandas, df)
Ejemplo n.º 17
0
 def test_categorical_series(self):
     index = pd.date_range('2017-01-01', periods=10)
     cat = pd.Categorical(['a', 'b', 'a', 'b', 'a', 'a', 'b', 'c', 'c', 'a'])
     s = pd.Series(cat, name='cat', index=index)
     dh = IVData(s)
     assert dh.ndim == 2
     assert dh.shape == (10, 2)
     assert sorted(dh.cols) == sorted(['cat.b', 'cat.c'])
     assert dh.rows == list(index)
     assert_equal(dh.pandas['cat.b'].values, (cat == 'b').astype(np.float))
     assert_equal(dh.pandas['cat.c'].values, (cat == 'c').astype(np.float))
Ejemplo n.º 18
0
 def test_numpy_2d(self):
     x = np.empty((10, 2))
     xdh = IVData(x)
     assert xdh.ndim == x.ndim
     assert xdh.cols == ['x.0', 'x.1']
     assert xdh.rows == list(np.arange(10))
     assert_equal(xdh.ndarray, x)
     df = pd.DataFrame(x, columns=xdh.cols, index=xdh.rows)
     assert_frame_equal(xdh.pandas, df)
     assert xdh.shape == (10, 2)
     assert xdh.labels == {0: xdh.rows, 1: xdh.cols}
Ejemplo n.º 19
0
def test_categorical_series() -> None:
    index = pd.date_range("2017-01-01", periods=10)
    cat = pd.Categorical(["a", "b", "a", "b", "a", "a", "b", "c", "c", "a"])
    s = pd.Series(cat, name="cat", index=index)
    dh = IVData(s)
    assert dh.ndim == 2
    assert dh.shape == (10, 2)
    assert sorted(dh.cols) == sorted(["cat.b", "cat.c"])
    assert dh.rows == list(index)
    assert_equal(dh.pandas["cat.b"].values, (cat == "b").astype(float))
    assert_equal(dh.pandas["cat.c"].values, (cat == "c").astype(float))
Ejemplo n.º 20
0
def test_categorical_no_conversion() -> None:
    index = pd.date_range("2017-01-01", periods=10)
    cat = pd.Categorical(["a", "b", "a", "b", "a", "a", "b", "c", "c", "a"])
    s = pd.Series(cat, index=index, name="cat")
    dh = IVData(s, convert_dummies=False)
    assert dh.ndim == 2
    assert dh.shape == (10, 1)
    assert dh.cols == ["cat"]
    assert dh.rows == list(index)
    df = pd.DataFrame(s)
    assert_frame_equal(dh.pandas, df)
Ejemplo n.º 21
0
    def multivariate_ls(cls, dependent, exog):
        """
        Interface for specification of multivariate regression models

        Parameters
        ----------
        dependent : array-like
            nobs by ndep array of dependent variables
        exog : array-like
            nobs by nvar array of exogenous regressors common to all models

        Returns
        -------
        model : SUR
            Model instance

        Notes
        -----
        Utility function to simplify the construction of multivariate
        regression models which all use the same regressors. Constructs
        the dictionary of equations from the variables using the common
        exogenous variable.

        Examples
        --------
        A simple CAP-M can be estimated as a multivariate regression

        >>> from linearmodels.datasets import french
        >>> from linearmodels.system import SUR
        >>> data = french.load()
        >>> portfolios = data[['S1V1','S1V5','S5V1','S5V5']]
        >>> factors = data[['MktRF']].copy()
        >>> factors['alpha'] = 1
        >>> mod = SUR.multivariate_ls(portfolios, factors)
        """
        equations = OrderedDict()
        dependent = IVData(dependent, var_name='dependent')
        exog = IVData(exog, var_name='exog')
        for col in dependent.pandas:
            equations[col] = (dependent.pandas[[col]], exog.pandas)
        return cls(equations)
Ejemplo n.º 22
0
 def test_pandas_series_numeric(self):
     x = np.empty(10)
     index = pd.date_range('2017-01-01', periods=10)
     xs = pd.Series(x, name='charlie', index=index)
     xdh = IVData(xs)
     assert xdh.ndim == 2
     assert xdh.cols == [xs.name]
     assert xdh.rows == list(xs.index)
     assert_equal(xdh.ndarray, x[:, None])
     df = pd.DataFrame(x[:, None], columns=xdh.cols, index=xdh.rows)
     assert_frame_equal(xdh.pandas, df)
     assert xdh.shape == (10, 1)
Ejemplo n.º 23
0
 def test_pandas_df_numeric(self):
     x = np.empty((10, 2))
     index = pd.date_range('2017-01-01', periods=10)
     xdf = pd.DataFrame(x, columns=['a', 'b'], index=index)
     xdh = IVData(xdf)
     assert xdh.ndim == 2
     assert xdh.cols == list(xdf.columns)
     assert xdh.rows == list(xdf.index)
     assert_equal(xdh.ndarray, x)
     df = pd.DataFrame(x, columns=xdh.cols, index=xdh.rows)
     assert_frame_equal(xdh.pandas, df)
     assert xdh.shape == (10, 2)
Ejemplo n.º 24
0
def test_pandas_df_numeric() -> None:
    x = np.empty((10, 2))
    index = pd.date_range("2017-01-01", periods=10)
    xdf = pd.DataFrame(x, columns=["a", "b"], index=index)
    xdh = IVData(xdf)
    assert xdh.ndim == 2
    assert xdh.cols == list(xdf.columns)
    assert xdh.rows == list(xdf.index)
    assert_equal(xdh.ndarray, x)
    df = pd.DataFrame(x, columns=xdh.cols, index=xdh.rows).asfreq("D")
    assert_frame_equal(xdh.pandas, df)
    assert xdh.shape == (10, 2)
Ejemplo n.º 25
0
 def test_categorical(self):
     index = pd.date_range('2017-01-01', periods=10)
     cat = pd.Categorical(['a', 'b', 'a', 'b', 'a', 'a', 'b', 'c', 'c', 'a'])
     num = np.empty(10)
     df = pd.DataFrame(OrderedDict(cat=cat, num=num), index=index)
     dh = IVData(df)
     assert dh.ndim == 2
     assert dh.shape == (10, 3)
     assert sorted(dh.cols) == sorted(['cat.b', 'cat.c', 'num'])
     assert dh.rows == list(index)
     assert_equal(dh.pandas['num'].values, num)
     assert_equal(dh.pandas['cat.b'].values, (cat == 'b').astype(np.float))
     assert_equal(dh.pandas['cat.c'].values, (cat == 'c').astype(np.float))
Ejemplo n.º 26
0
def test_categorical() -> None:
    index = pd.date_range("2017-01-01", periods=10)
    cat = pd.Categorical(["a", "b", "a", "b", "a", "a", "b", "c", "c", "a"])
    num = np.empty(10)
    df = pd.DataFrame(dict(cat=cat, num=num), index=index)
    dh = IVData(df)
    assert dh.ndim == 2
    assert dh.shape == (10, 3)
    assert sorted(dh.cols) == sorted(["cat.b", "cat.c", "num"])
    assert dh.rows == list(index)
    assert_equal(dh.pandas["num"].values, num)
    assert_equal(dh.pandas["cat.b"].values, (cat == "b").astype(float))
    assert_equal(dh.pandas["cat.c"].values, (cat == "c").astype(float))
Ejemplo n.º 27
0
def test_drop_missing(data):
    p = data.portfolios
    if isinstance(p, pd.DataFrame):
        p.iloc[::33] = np.nan
    else:
        p[::33] = np.nan

    res = TradedFactorModel(p, data.factors).fit()

    p = IVData(p)
    f = IVData(data.factors)
    isnull = p.isnull | f.isnull
    p.drop(isnull)
    f.drop(isnull)

    res2 = TradedFactorModel(p, f).fit()
    assert_equal(np.asarray(res.params), np.asarray(res2.params))
Ejemplo n.º 28
0
def test_fitted_predict(data, model):
    mod = model(data.dep, None, data.endog, data.instr)
    res = mod.fit()
    assert_series_equal(res.idiosyncratic, res.resids)
    y = mod.dependent.pandas
    expected = y.values - res.resids.values[:, None]
    expected = DataFrame(expected, y.index, ['fitted_values'])
    assert_frame_similar(expected, res.fitted_values)
    assert_allclose(expected, res.fitted_values)
    pred = res.predict()
    nobs = res.resids.shape[0]
    assert isinstance(pred, DataFrame)
    assert pred.shape == (nobs, 1)
    pred = res.predict(idiosyncratic=True, missing=True)
    nobs = IVData(data.dep).pandas.shape[0]
    assert pred.shape == (nobs, 2)
    assert list(pred.columns) == ['fitted_values', 'residual']
Ejemplo n.º 29
0
 def instruments(self) -> IVData:
     return IVData(None, "instrument", nobs=self._dependent.shape[0])
Ejemplo n.º 30
0
class Interaction(object):
    """
    Class that simplifies specifying interactions

    Parameters
    ----------
    cat : {ndarray, Series, DataFrame, DataArray}, optional
        Variables to treat as categoricals. Best format is a Categorical
        Series or DataFrame containing Categorical Series. Other formats
        are converted to Categorical Series, column-by-column. cats has
        shape (nobs, ncat).
    cont : {ndarray, Series, DataFrame, DataArray}, optional
        Variables to treat as continuous, (nobs, ncont).

    Notes
    -----
    For each variable in `cont`, computes the interaction of the variable
    and the cartesian product of the categories.

    Examples
    --------
    >>> import numpy as np
    >>> from linearmodels.iv.absorbing import Interaction
    >>> rs = np.random.RandomState(0)
    >>> n = 100000
    >>> cats = rs.randint(2, size=n)  # binary dummy
    >>> cont = rs.standard_normal((n, 3))
    >>> interact = Interaction(cats, cont)
    >>> interact.sparse.shape  # Get the shape of the dummy matrix
    (100000, 6)

    >>> rs = np.random.RandomState(0)
    >>> import pandas as pd
    >>> cats_df = pd.concat([pd.Series(pd.Categorical(rs.randint(5,size=n)))
    ...                     for _ in range(4)],1)
    >>> cats_df.describe()
                 0       1       2       3
    count   100000  100000  100000  100000
    unique       5       5       5       5
    top          3       3       0       4
    freq     20251   20195   20331   20158

    >>> interact = Interaction(cats, cont)
    >>> interact.sparse.shape # Cart product of all cats, 5**4, times ncont, 3
    (100000, 1875)
    """

    _iv_data = IVData(None, "none", 1)

    def __init__(
        self,
        cat: OptionalArrayLike = None,
        cont: OptionalArrayLike = None,
        nobs: Optional[int] = None,
    ) -> None:
        self._cat = cat
        self._cont = cont
        self._cat_data = self._iv_data
        self._cont_data = self._iv_data
        self._nobs = nobs
        self._check_data()

    @property
    def nobs(self) -> int:
        assert self._nobs is not None
        return self._nobs

    def _check_data(self) -> None:
        cat, cont = self._cat, self._cont
        cat_nobs = getattr(cat, "shape", (0, ))[0]
        cont_nobs = getattr(cont, "shape", (0, ))[0]
        nobs = max(cat_nobs, cont_nobs)
        if cat is None and cont is None:
            if self._nobs is not None:
                self._cont_data = self._cat_data = IVData(None,
                                                          "none",
                                                          nobs=self._nobs)
            else:
                raise ValueError(
                    "nobs must be provided when cat and cont are None")
            return
        self._nobs = nobs

        self._cat_data = IVData(cat, "cat", nobs=nobs, convert_dummies=False)
        self._cont_data = IVData(cont,
                                 "cont",
                                 nobs=nobs,
                                 convert_dummies=False)
        if self._cat_data.shape[1] == self._cont_data.shape[1] == 0:
            raise ValueError("Both cat and cont are empty arrays")
        cat_data = self._cat_data.pandas
        convert = [
            col for col in cat_data
            if not (is_categorical_dtype(cat_data[col]))
        ]
        if convert:
            cat_data = DataFrame(
                {col: cat_data[col].astype("category")
                 for col in cat_data})
            self._cat_data = IVData(cat_data, "cat", convert_dummies=False)

    @property
    def cat(self) -> DataFrame:
        """Categorical Variables"""
        return self._cat_data.pandas

    @property
    def cont(self) -> DataFrame:
        """Continuous Variables"""
        return self._cont_data.pandas

    @property
    def isnull(self) -> Series:
        return self.cat.isnull().any(1) | self.cont.isnull().any(1)

    def drop(self, locs: BoolArray) -> None:
        self._cat_data.drop(locs)
        self._cont_data.drop(locs)

    @property
    def sparse(self) -> sp.csc_matrix:
        r"""
        Construct a sparse interaction matrix

        Returns
        -------
        csc_matrix
            Dummy interaction constructed from the cartesian product of
            the categories and each of the continuous variables.

        Notes
        -----
        The number of columns in `dummy_interact` is

        .. math::

            ncont \times \prod_{i=1}^{ncat} |c_i|

        where :math:`|c_i|` is the number distinct categories in column i.
        """
        if self.cat.shape[1] and self.cont.shape[1]:
            out = []
            for col in self.cont:
                out.append(
                    category_continuous_interaction(self.cat,
                                                    self.cont[col],
                                                    precondition=False))
            return sp.hstack(out, format="csc")
        elif self.cat.shape[1]:
            return category_interaction(category_product(self.cat),
                                        precondition=False)
        elif self.cont.shape[1]:
            return sp.csc_matrix(self._cont_data.ndarray)
        else:  # empty interaction
            return sp.csc_matrix(empty((self._cat_data.shape[0], 0)))

    @property
    def hash(self) -> List[Tuple[str, ...]]:
        """
        Construct a hash that will be invariant for any permutation of
        inputs that produce the same fit when used as regressors"""
        # Sorted hashes of any categoricals
        hasher = hash_func()
        cat_hashes = []
        cat = self.cat
        for col in cat:
            hasher.update(
                ascontiguousarray(self.cat[col].cat.codes.to_numpy().data))
            cat_hashes.append(hasher.hexdigest())
            hasher = _reset(hasher)
        sorted_hashes = tuple(sorted(cat_hashes))

        hashes = []
        cont = self.cont
        for col in cont:
            hasher.update(ascontiguousarray(cont[col].to_numpy()).data)
            hashes.append(sorted_hashes + (hasher.hexdigest(), ))
            hasher = _reset(hasher)

        return sorted(hashes)

    @staticmethod
    def from_frame(frame: DataFrame) -> Interaction:
        """
        Convenience function the simplifies using a DataFrame

        Parameters
        ----------
        frame : DataFrame
            Frame containing categorical and continuous variables. All
            categorical variables are passed to `cat` and all other
            variables are passed as `cont`.

        Returns
        -------
        Interaction
            Instance using the columns of frame

        Examples
        --------
        >>> import numpy as np
        >>> from linearmodels.iv.absorbing import Interaction
        >>> import pandas as pd
        >>> rs = np.random.RandomState(0)
        >>> n = 100000
        >>> cats = pd.concat([pd.Series(pd.Categorical(rs.randint(i+2,size=n)))
        ...                  for i in range(4)],1)
        >>> cats.columns = ['cat{0}'.format(i) for i in range(4)]
        >>> columns = ['cont{0}'.format(i) for i in range(6)]
        >>> cont = pd.DataFrame(rs.standard_normal((n, 6)), columns=columns)
        >>> frame = pd.concat([cats, cont], 1)
        >>> interact = Interaction.from_frame(frame)
        >>> interact.sparse.shape # Cart product of all cats, 5!, times ncont, 6
        (100000, 720)
        """
        cat_cols = [col for col in frame if is_categorical_dtype(frame[col])]
        cont_cols = [col for col in frame if col not in cat_cols]
        return Interaction(frame[cat_cols],
                           frame[cont_cols],
                           nobs=frame.shape[0])