Example #1
0
 def test_extra_kwargs_2d(self):
     sigma = np.random.random((25, 25))
     sigma = sigma + sigma.T - np.diag(np.diag(sigma))
     data = sm_data.handle_data(self.y, self.X, 'drop', sigma=sigma)
     idx = ~np.isnan(np.c_[self.y, self.X]).any(axis=1)
     sigma = sigma[idx][:,idx]
     np.testing.assert_array_equal(data.sigma, sigma)
Example #2
0
 def setupClass(cls):
     cls.endog = endog = pandas.DataFrame(np.random.random((10,4)),
                                  columns=['y_1', 'y_2', 'y_3', 'y_4'])
     exog =  pandas.DataFrame(np.random.random((10,2)),
                              columns=['x_1','x_2'])
     exog.insert(0, 'const', 1)
     cls.exog = exog
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
     nrows = 10
     nvars = 3
     neqs = 4
     cls.col_input = np.random.random(nvars)
     cls.col_result = pandas.Series(cls.col_input,
                                       index=exog.columns)
     cls.row_input = np.random.random(nrows)
     cls.row_result = pandas.Series(cls.row_input,
                                       index=exog.index)
     cls.cov_input = np.random.random((nvars, nvars))
     cls.cov_result = pandas.DataFrame(cls.cov_input,
                                        index = exog.columns,
                                        columns = exog.columns)
     cls.cov_eq_input = np.random.random((neqs, neqs))
     cls.cov_eq_result = pandas.DataFrame(cls.cov_eq_input,
                                           index=endog.columns,
                                           columns=endog.columns)
     cls.col_eq_input = np.random.random((nvars, neqs))
     cls.col_eq_result = pandas.DataFrame(cls.col_eq_input,
                                           index=exog.columns,
                                           columns=endog.columns)
     cls.xnames = ['const', 'x_1', 'x_2']
     cls.ynames = ['y_1', 'y_2', 'y_3', 'y_4']
     cls.row_labels = cls.exog.index
Example #3
0
 def setupClass(cls):
     super(TestArrays1dExog, cls).setupClass()
     cls.endog = np.random.random(10)
     exog =  np.random.random(10)
     cls.data = sm_data.handle_data(cls.endog, exog)
     cls.exog = exog[:,None]
     cls.xnames = ['x1']
     cls.ynames = 'y'
 def setupClass(cls):
     super(TestStructarrays, cls).setupClass()
     cls.endog = np.random.random(9).view([("y_1", "f8")]).view(np.recarray)
     exog = np.random.random(9 * 3).view([("const", "f8"), ("x_1", "f8"), ("x_2", "f8")]).view(np.recarray)
     exog["const"] = 1
     cls.exog = exog
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
     cls.xnames = ["const", "x_1", "x_2"]
     cls.ynames = "y_1"
Example #5
0
 def test_drop(self):
     y = self.y
     X = self.X
     combined = np.c_[y, X]
     idx = ~np.isnan(combined).any(axis=1)
     y = y[idx]
     X = X[idx]
     data = sm_data.handle_data(self.y, self.X, 'drop')
     np.testing.assert_array_equal(data.endog, y)
     np.testing.assert_array_equal(data.exog, X)
Example #6
0
 def setupClass(cls):
     super(TestStructarrays, cls).setupClass()
     cls.endog = np.random.random(9).view([('y_1',
                                      'f8')]).view(np.recarray)
     exog = np.random.random(9*3).view([('const', 'f8'),('x_1', 'f8'),
                             ('x_2', 'f8')]).view(np.recarray)
     exog['const'] = 1
     cls.exog = exog
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
     cls.xnames = ['const', 'x_1', 'x_2']
     cls.ynames = 'y_1'
Example #7
0
 def setupClass(cls):
     cls.endog = np.random.random(10)
     cls.exog = np.c_[np.ones(10), np.random.random((10,2))]
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
     nrows = 10
     nvars = 3
     cls.col_result = cls.col_input = np.random.random(nvars)
     cls.row_result = cls.row_input = np.random.random(nrows)
     cls.cov_result = cls.cov_input = np.random.random((nvars, nvars))
     cls.xnames = ['const', 'x1', 'x2']
     cls.ynames = 'y'
     cls.row_labels = None
Example #8
0
 def setup_class(cls):
     cls.endog = np.random.random(10)
     cls.exog = np.c_[np.ones(10), np.random.random((10,2))]
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
     nrows = 10
     nvars = 3
     cls.col_result = cls.col_input = np.random.random(nvars)
     cls.row_result = cls.row_input = np.random.random(nrows)
     cls.cov_result = cls.cov_input = np.random.random((nvars, nvars))
     cls.xnames = ['const', 'x1', 'x2']
     cls.ynames = 'y'
     cls.row_labels = None
Example #9
0
 def test_drop(self):
     y = self.y
     X = self.X
     combined = np.c_[y, X]
     idx = ~np.isnan(combined).any(axis=1)
     y = y.ix[idx]
     X = X.ix[idx]
     data = sm_data.handle_data(self.y, self.X, 'drop')
     np.testing.assert_array_equal(data.endog, y.values)
     ptesting.assert_series_equal(data._orig_endog, self.y.ix[idx])
     np.testing.assert_array_equal(data.exog, X.values)
     ptesting.assert_frame_equal(data._orig_exog, self.X.ix[idx])
Example #10
0
 def test_drop(self):
     y = self.y
     X = self.X
     combined = np.c_[y, X]
     idx = ~np.isnan(combined).any(axis=1)
     y = y.ix[idx]
     X = X.ix[idx]
     data = sm_data.handle_data(self.y, self.X, 'drop')
     np.testing.assert_array_equal(data.endog, y.values)
     ptesting.assert_series_equal(data.orig_endog, self.y.ix[idx])
     np.testing.assert_array_equal(data.exog, X.values)
     ptesting.assert_frame_equal(data.orig_exog, self.X.ix[idx])
Example #11
0
 def setupClass(cls):
     cls.endog = np.random.random((10,4))
     cls.exog = np.c_[np.ones(10), np.random.random((10,2))]
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
     nrows = 10
     nvars = 3
     neqs = 4
     cls.col_result = cls.col_input = np.random.random(nvars)
     cls.row_result = cls.row_input = np.random.random(nrows)
     cls.cov_result = cls.cov_input = np.random.random((nvars, nvars))
     cls.cov_eq_result = cls.cov_eq_input = np.random.random((neqs,neqs))
     cls.col_eq_result = cls.col_eq_input = np.array((neqs, nvars))
     cls.xnames = ['const', 'x1', 'x2']
     cls.ynames = ['y1', 'y2', 'y3', 'y4']
     cls.row_labels = None
 def setupClass(cls):
     cls.endog = np.random.random((10, 4))
     cls.exog = np.c_[np.ones(10), np.random.random((10, 2))]
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
     nrows = 10
     nvars = 3
     neqs = 4
     cls.col_result = cls.col_input = np.random.random(nvars)
     cls.row_result = cls.row_input = np.random.random(nrows)
     cls.cov_result = cls.cov_input = np.random.random((nvars, nvars))
     cls.cov_eq_result = cls.cov_eq_input = np.random.random((neqs, neqs))
     cls.col_eq_result = cls.col_eq_input = np.array((neqs, nvars))
     cls.xnames = ["const", "x1", "x2"]
     cls.ynames = ["y1", "y2", "y3", "y4"]
     cls.row_labels = None
Example #13
0
 def setupClass(cls):
     cls.endog = np.random.random((10, 4))
     cls.exog = np.c_[np.ones(10), np.random.random((10, 2))]
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
     nrows = 10
     nvars = 3
     neqs = 4
     cls.col_result = cls.col_input = np.random.random(nvars)
     cls.row_result = cls.row_input = np.random.random(nrows)
     cls.cov_result = cls.cov_input = np.random.random((nvars, nvars))
     cls.cov_eq_result = cls.cov_eq_input = np.random.random((neqs, neqs))
     cls.col_eq_result = cls.col_eq_input = np.array((neqs, nvars))
     cls.xnames = ['const', 'x1', 'x2']
     cls.ynames = ['y1', 'y2', 'y3', 'y4']
     cls.row_labels = None
 def setupClass(cls):
     cls.endog = pandas.DataFrame(np.random.random(10), columns=["y_1"])
     exog = pandas.DataFrame(np.random.random((10, 2)), columns=["x_1", "x_2"])
     exog.insert(0, "const", 1)
     cls.exog = exog
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
     nrows = 10
     nvars = 3
     cls.col_input = np.random.random(nvars)
     cls.col_result = pandas.Series(cls.col_input, index=exog.columns)
     cls.row_input = np.random.random(nrows)
     cls.row_result = pandas.Series(cls.row_input, index=exog.index)
     cls.cov_input = np.random.random((nvars, nvars))
     cls.cov_result = pandas.DataFrame(cls.cov_input, index=exog.columns, columns=exog.columns)
     cls.xnames = ["const", "x_1", "x_2"]
     cls.ynames = "y_1"
     cls.row_labels = cls.exog.index
    def setupClass(cls):
        cls.endog = pandas.Series(np.random.random(10), name="y_1")

        exog = pandas.Series(np.random.random(10), name="x_1")
        cls.exog = exog
        cls.data = sm_data.handle_data(cls.endog, cls.exog)
        nrows = 10
        nvars = 1
        cls.col_input = np.random.random(nvars)
        cls.col_result = pandas.Series(cls.col_input, index=[exog.name])
        cls.row_input = np.random.random(nrows)
        cls.row_result = pandas.Series(cls.row_input, index=exog.index)
        cls.cov_input = np.random.random((nvars, nvars))
        cls.cov_result = pandas.DataFrame(cls.cov_input, index=[exog.name], columns=[exog.name])
        cls.xnames = ["x_1"]
        cls.ynames = "y_1"
        cls.row_labels = cls.exog.index
Example #16
0
 def setup_class(cls):
     cls.endog = pd.DataFrame(np.random.random(10), columns=['y_1'])
     exog = pd.DataFrame(np.random.random((10, 2)), columns=['x_1', 'x_2'])
     exog.insert(0, 'const', 1)
     cls.exog = exog
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
     nrows = 10
     nvars = 3
     cls.col_input = np.random.random(nvars)
     cls.col_result = pd.Series(cls.col_input, index=exog.columns)
     cls.row_input = np.random.random(nrows)
     cls.row_result = pd.Series(cls.row_input, index=exog.index)
     cls.cov_input = np.random.random((nvars, nvars))
     cls.cov_result = pd.DataFrame(cls.cov_input,
                                   index=exog.columns,
                                   columns=exog.columns)
     cls.xnames = ['const', 'x_1', 'x_2']
     cls.ynames = 'y_1'
     cls.row_labels = cls.exog.index
Example #17
0
    def setupClass(cls):
        cls.endog = pandas.Series(np.random.random(10), name='y_1')

        exog = pandas.Series(np.random.random(10), name='x_1')
        cls.exog = exog
        cls.data = sm_data.handle_data(cls.endog, cls.exog)
        nrows = 10
        nvars = 1
        cls.col_input = np.random.random(nvars)
        cls.col_result = pandas.Series(cls.col_input, index=[exog.name])
        cls.row_input = np.random.random(nrows)
        cls.row_result = pandas.Series(cls.row_input, index=exog.index)
        cls.cov_input = np.random.random((nvars, nvars))
        cls.cov_result = pandas.DataFrame(cls.cov_input,
                                          index=[exog.name],
                                          columns=[exog.name])
        cls.xnames = ['x_1']
        cls.ynames = 'y_1'
        cls.row_labels = cls.exog.index
Example #18
0
 def setup_class(cls):
     cls.endog = pd.DataFrame(np.random.random(10), columns=['y_1'])
     mi = pd.MultiIndex.from_product([['x'], ['1', '2']])
     exog = pd.DataFrame(np.random.random((10, 2)), columns=mi)
     exog_flattened_idx = pd.Index(['const', 'x_1', 'x_2'])
     exog.insert(0, 'const', 1)
     cls.exog = exog
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
     nrows = 10
     nvars = 3
     cls.col_input = np.random.random(nvars)
     cls.col_result = pd.Series(cls.col_input, index=exog_flattened_idx)
     cls.row_input = np.random.random(nrows)
     cls.row_result = pd.Series(cls.row_input, index=exog.index)
     cls.cov_input = np.random.random((nvars, nvars))
     cls.cov_result = pd.DataFrame(cls.cov_input,
                                   index=exog_flattened_idx,
                                   columns=exog_flattened_idx)
     cls.xnames = ['const', 'x_1', 'x_2']
     cls.ynames = 'y_1'
     cls.row_labels = cls.exog.index
Example #19
0
 def setup_class(cls):
     cls.endog = pandas.DataFrame(np.random.random(10), columns=['y_1'])
     mi = pandas.MultiIndex.from_product([['x'], ['1', '2']])
     exog = pandas.DataFrame(np.random.random((10, 2)), columns=mi)
     exog_flattened_idx = pandas.Index(['const', 'x_1', 'x_2'])
     exog.insert(0, 'const', 1)
     cls.exog = exog
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
     nrows = 10
     nvars = 3
     cls.col_input = np.random.random(nvars)
     cls.col_result = pandas.Series(cls.col_input, index=exog_flattened_idx)
     cls.row_input = np.random.random(nrows)
     cls.row_result = pandas.Series(cls.row_input, index=exog.index)
     cls.cov_input = np.random.random((nvars, nvars))
     cls.cov_result = pandas.DataFrame(cls.cov_input,
                                       index=exog_flattened_idx,
                                       columns=exog_flattened_idx)
     cls.xnames = ['const', 'x_1', 'x_2']
     cls.ynames = 'y_1'
     cls.row_labels = cls.exog.index
Example #20
0
    def setup_class(cls):
        cls.endog = np.random.random(10).tolist()

        exog =  pandas.DataFrame(np.random.random((10,2)),
                                 columns=['x_1','x_2'])
        exog.insert(0, 'const', 1)
        cls.exog = exog
        cls.data = sm_data.handle_data(cls.endog, cls.exog)
        nrows = 10
        nvars = 3
        cls.col_input = np.random.random(nvars)
        cls.col_result = pandas.Series(cls.col_input,
                                          index=exog.columns)
        cls.row_input = np.random.random(nrows)
        cls.row_result = pandas.Series(cls.row_input,
                                          index=exog.index)
        cls.cov_input = np.random.random((nvars, nvars))
        cls.cov_result = pandas.DataFrame(cls.cov_input,
                                           index = exog.columns,
                                           columns = exog.columns)
        cls.xnames = ['const', 'x_1', 'x_2']
        cls.ynames = 'y'
        cls.row_labels = cls.exog.index
Example #21
0
    def setupClass(cls):
        cls.endog = pandas.DataFrame(np.random.random(10), columns=['y_1'])

        exog =  pandas.DataFrame(np.random.random((10,2)),
                                 columns=['x1','x2']) # names mimic defaults
        exog.insert(0, 'const', 1)
        cls.exog = exog.values
        cls.data = sm_data.handle_data(cls.endog, cls.exog)
        nrows = 10
        nvars = 3
        cls.col_input = np.random.random(nvars)
        cls.col_result = pandas.Series(cls.col_input,
                                          index=exog.columns)
        cls.row_input = np.random.random(nrows)
        cls.row_result = pandas.Series(cls.row_input,
                                          index=exog.index)
        cls.cov_input = np.random.random((nvars, nvars))
        cls.cov_result = pandas.DataFrame(cls.cov_input,
                                           index = exog.columns,
                                           columns = exog.columns)
        cls.xnames = ['const', 'x1', 'x2']
        cls.ynames = 'y_1'
        cls.row_labels = cls.endog.index
Example #22
0
 def test_endog_only_drop(self):
     y = self.y
     y = y.dropna()
     data = sm_data.handle_data(self.y, None, 'drop')
     np.testing.assert_array_equal(data.endog, y.values)
Example #23
0
 def test_none(self):
     data = sm_data.handle_data(self.y, self.X, 'none')
     np.testing.assert_array_equal(data.endog, self.y.values)
     np.testing.assert_array_equal(data.exog, self.X.values)
Example #24
0
 def test_none(self):
     data = sm_data.handle_data(self.y, self.X, 'none', hasconst=False)
     np.testing.assert_array_equal(data.endog, self.y)
     np.testing.assert_array_equal(data.exog, self.X)
     assert data.k_constant == 0
Example #25
0
 def test_mv_endog(self):
     y = self.X
     y = y.loc[~np.isnan(y.values).any(axis=1)]
     data = sm_data.handle_data(self.X, None, 'drop')
     np.testing.assert_array_equal(data.endog, y.values)
Example #26
0
 def test_none(self):
     data = sm_data.handle_data(self.y, self.X, 'none', hasconst=False)
     np.testing.assert_array_equal(data.endog, self.y.values)
     np.testing.assert_array_equal(data.exog, self.X.values)
Example #27
0
 def test_endog_only_raise(self):
     with pytest.raises(Exception):
         # TODO: be more specific about exception
         sm_data.handle_data(self.y, None, 'raise')
Example #28
0
 def setupClass(cls):
     super(TestArrays2dEndog, cls).setupClass()
     cls.endog = np.random.random((10,1))
     cls.exog = np.c_[np.ones(10), np.random.random((10,2))]
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
Example #29
0
 def test_raise_no_missing(self):
     # GH#1700
     sm_data.handle_data(pd.Series(np.random.random(20)),
                         pd.DataFrame(np.random.random((20, 2))), 'raise')
Example #30
0
    def __init__(self, endog, exog, constraints=None, **kwargs):
        # Standardize data
        endog_using_pandas = _is_using_pandas(endog, None)
        if not endog_using_pandas:
            endog = np.asanyarray(endog)

        exog_is_using_pandas = _is_using_pandas(exog, None)
        if not exog_is_using_pandas:
            exog = np.asarray(exog)

        # Make sure we have 2-dimensional array
        if exog.ndim == 1:
            if not exog_is_using_pandas:
                exog = exog[:, None]
            else:
                exog = pd.DataFrame(exog)

        self.k_exog = exog.shape[1]

        # Handle constraints
        self.k_constraints = 0
        self._r_matrix = self._q_matrix = None
        if constraints is not None:
            from patsy import DesignInfo
            from statsmodels.base.data import handle_data
            data = handle_data(endog, exog, **kwargs)
            names = data.param_names
            LC = DesignInfo(names).linear_constraint(constraints)
            self._r_matrix, self._q_matrix = LC.coefs, LC.constants
            self.k_constraints = self._r_matrix.shape[0]

            constraint_endog = np.zeros((len(endog), len(self._r_matrix)))
            if endog_using_pandas:
                constraint_endog = pd.DataFrame(constraint_endog,
                                                index=endog.index)
                endog = concat([endog, constraint_endog], axis=1)
                endog.values[:, 1:] = self._q_matrix[:, 0]
            else:
                endog[:, 1:] = self._q_matrix[:, 0]

        # Handle coefficient initialization
        kwargs.setdefault('initialization', 'diffuse')

        # Initialize the state space representation
        super(RecursiveLS, self).__init__(
            endog, k_states=self.k_exog, exog=exog, **kwargs)

        # Use univariate filtering by default
        self.ssm.filter_univariate = True

        # Concentrate the scale out of the likelihood function
        self.ssm.filter_concentrated = True

        # Setup the state space representation
        self['design'] = np.zeros((self.k_endog, self.k_states, self.nobs))
        self['design', 0] = self.exog[:, :, None].T
        if self._r_matrix is not None:
            self['design', 1:, :] = self._r_matrix[:, :, None]
        self['transition'] = np.eye(self.k_states)

        # Notice that the filter output does not depend on the measurement
        # variance, so we set it here to 1
        self['obs_cov', 0, 0] = 1.
        self['transition'] = np.eye(self.k_states)

        # Linear constraints are technically imposed by adding "fake" endog
        # variables that are used during filtering, but for all model- and
        # results-based purposes we want k_endog = 1.
        if self._r_matrix is not None:
            self.k_endog = 1
Example #31
0
 def test_raise_no_missing(self):
     # smoke test for #1700
     sm_data.handle_data(pandas.Series(np.random.random(20)),
                         pandas.DataFrame(np.random.random((20, 2))),
                         'raise')
Example #32
0
 def test_pandas_constant(self):
     exog = self.data.exog.copy()
     exog['const'] = 1
     data = sm_data.handle_data(self.data.endog, exog)
     np.testing.assert_equal(data.k_constant, 1)
     np.testing.assert_equal(data.const_idx, 6)
Example #33
0
 def test_pandas_noconstant(self):
     exog = self.data.exog.copy()
     data = sm_data.handle_data(self.data.endog, exog)
     np.testing.assert_equal(data.k_constant, 0)
     np.testing.assert_equal(data.const_idx, None)
Example #34
0
 def setupClass(cls):
     super(TestLists, cls).setupClass()
     cls.endog = np.random.random(10).tolist()
     cls.exog = np.c_[np.ones(10), np.random.random((10,2))].tolist()
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
Example #35
0
 def test_mv_endog(self):
     y = self.X
     y = y.ix[~np.isnan(y.values).any(axis=1)]
     data = sm_data.handle_data(self.X, None, 'drop')
     np.testing.assert_array_equal(data.endog, y.values)
Example #36
0
 def test_raise_no_missing(self):
     # smoke test for #1700
     sm_data.handle_data(np.random.random(20), np.random.random((20, 2)),
                         'raise')
Example #37
0
 def test_labels(self):
     2, 10, 14
     labels = pandas.Index([0, 1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15,
                            16, 17, 18, 19, 20, 21, 22, 23, 24])
     data = sm_data.handle_data(self.y, self.X, 'drop')
     np.testing.assert_(data.row_labels.equals(labels))
Example #38
0
 def test_extra_kwargs_1d(self):
     weights = np.random.random(25)
     data = sm_data.handle_data(self.y, self.X, 'drop', weights=weights)
     idx = ~np.isnan(np.c_[self.y, self.X]).any(axis=1)
     weights = weights[idx]
     np.testing.assert_array_equal(data.weights, weights)
Example #39
0
 def setupClass(cls):
     super(TestArrays2dEndog, cls).setupClass()
     cls.endog = np.random.random((10, 1))
     cls.exog = np.c_[np.ones(10), np.random.random((10, 2))]
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
Example #40
0
 def test_raise(self):
     with pytest.raises(Exception):
         # TODO: be more specific about exception
         sm_data.handle_data(self.y, self.X, 'raise')
Example #41
0
 def setupClass(cls):
     super(TestLists, cls).setupClass()
     cls.endog = np.random.random(10).tolist()
     cls.exog = np.c_[np.ones(10), np.random.random((10, 2))].tolist()
     cls.data = sm_data.handle_data(cls.endog, cls.exog)
Example #42
0
 def test_pandas_noconstant(self):
     exog = self.data.exog.copy()
     data = sm_data.handle_data(self.data.endog, exog)
     np.testing.assert_equal(data.k_constant, 0)
     np.testing.assert_equal(data.const_idx, None)
Example #43
0
 def test_raise_no_missing(self):
     # smoke test for #1700
     sm_data.handle_data(np.random.random(20), np.random.random((20, 2)),
                         'raise')
Example #44
0
 def test_endog_only_drop(self):
     y = self.y
     y = y[~np.isnan(y)]
     data = sm_data.handle_data(self.y, None, 'drop')
     np.testing.assert_array_equal(data.endog, y)
Example #45
0
 def test_endog_only_drop(self):
     y = self.y
     y = y[~np.isnan(y)]
     data = sm_data.handle_data(self.y, None, 'drop')
     np.testing.assert_array_equal(data.endog, y)
Example #46
0
def test_formula_missing_extra_arrays():
    np.random.seed(1)
    # because patsy can't turn off missing data-handling as of 0.3.0, we need
    # separate tests to make sure that missing values are handled correctly
    # when going through formulas

    # there is a handle_formula_data step
    # then there is the regular handle_data step
    # see 2083

    # the untested cases are endog/exog have missing. extra has missing.
    # endog/exog are fine. extra has missing.
    # endog/exog do or do not have missing and extra has wrong dimension
    y = np.random.randn(10)
    y_missing = y.copy()
    y_missing[[2, 5]] = np.nan
    X = np.random.randn(10)
    X_missing = X.copy()
    X_missing[[1, 3]] = np.nan

    weights = np.random.uniform(size=10)
    weights_missing = weights.copy()
    weights_missing[[6]] = np.nan

    weights_wrong_size = np.random.randn(12)

    data = {'y': y,
            'X': X,
            'y_missing': y_missing,
            'X_missing': X_missing,
            'weights': weights,
            'weights_missing': weights_missing}
    data = pandas.DataFrame.from_dict(data)
    data['constant'] = 1

    formula = 'y_missing ~ X_missing'

    ((endog, exog),
     missing_idx, design_info) = handle_formula_data(data, None, formula,
                                                     depth=2,
                                                     missing='drop')

    kwargs = {'missing_idx': missing_idx, 'missing': 'drop',
              'weights': data['weights_missing']}

    model_data = sm_data.handle_data(endog, exog, **kwargs)
    data_nona = data.dropna()
    assert_equal(data_nona['y'].values, model_data.endog)
    assert_equal(data_nona[['constant', 'X']].values, model_data.exog)
    assert_equal(data_nona['weights'].values, model_data.weights)

    tmp = handle_formula_data(data, None, formula, depth=2, missing='drop')
    (endog, exog), missing_idx, design_info = tmp
    weights_2d = np.random.randn(10, 10)
    weights_2d[[8, 7], [7, 8]] = np.nan  #symmetric missing values
    kwargs.update({'weights': weights_2d,
                   'missing_idx': missing_idx})

    model_data2 = sm_data.handle_data(endog, exog, **kwargs)

    good_idx = [0, 4, 6, 9]
    assert_equal(data.loc[good_idx, 'y'], model_data2.endog)
    assert_equal(data.loc[good_idx, ['constant', 'X']], model_data2.exog)
    assert_equal(weights_2d[good_idx][:, good_idx], model_data2.weights)

    tmp = handle_formula_data(data, None, formula, depth=2, missing='drop')
    (endog, exog), missing_idx, design_info = tmp

    kwargs.update({'weights': weights_wrong_size,
                   'missing_idx': missing_idx})
    assert_raises(ValueError, sm_data.handle_data, endog, exog, **kwargs)
Example #47
0
 def test_raise_no_missing(self):
     # smoke test for #1700
     sm_data.handle_data(pandas.Series(np.random.random(20)),
                         pandas.DataFrame(np.random.random((20, 2))),
                         'raise')
Example #48
0
 def test_extra_kwargs_1d(self):
     weights = np.random.random(25)
     data = sm_data.handle_data(self.y, self.X, 'drop', weights=weights)
     idx = ~np.isnan(np.c_[self.y, self.X]).any(axis=1)
     weights = weights[idx]
     np.testing.assert_array_equal(data.weights, weights)
Example #49
0
 def test_endog_only_drop(self):
     y = self.y
     y = y.dropna()
     data = sm_data.handle_data(self.y, None, 'drop')
     np.testing.assert_array_equal(data.endog, y.values)
Example #50
0
 def test_none(self):
     data = sm_data.handle_data(self.y, self.X, 'none', hasconst=False)
     np.testing.assert_array_equal(data.endog, self.y)
     np.testing.assert_array_equal(data.exog, self.X)
Example #51
0
 def test_pandas_constant(self):
     exog = self.data.exog.copy()
     exog['const'] = 1
     data = sm_data.handle_data(self.data.endog, exog)
     np.testing.assert_equal(data.k_constant, 1)
     np.testing.assert_equal(data.const_idx, 6)
Example #52
0
 def test_array_noconstant(self):
     exog = self.data.exog.copy()
     data = sm_data.handle_data(self.data.endog.values, exog.values)
     np.testing.assert_equal(data.k_constant, 0)
     np.testing.assert_equal(data.const_idx, None)
Example #53
0
 def test_array_noconstant(self):
     exog = self.data.exog.copy()
     data = sm_data.handle_data(self.data.endog.values, exog.values)
     np.testing.assert_equal(data.k_constant, 0)
     np.testing.assert_equal(data.const_idx, None)
Example #54
0
 def test_none(self):
     data = sm_data.handle_data(self.y, self.X, 'none')
     np.testing.assert_array_equal(data.endog, self.y)
     np.testing.assert_array_equal(data.exog, self.X)
Example #55
0
def test_formula_missing_extra_arrays():
    np.random.seed(1)
    # because patsy can't turn off missing data-handling as of 0.3.0, we need
    # separate tests to make sure that missing values are handled correctly
    # when going through formulas

    # there is a handle_formula_data step
    # then there is the regular handle_data step
    # see 2083

    # the untested cases are endog/exog have missing. extra has missing.
    # endog/exog are fine. extra has missing.
    # endog/exog do or do not have missing and extra has wrong dimension
    y = np.random.randn(10)
    y_missing = y.copy()
    y_missing[[2, 5]] = np.nan
    X = np.random.randn(10)
    X_missing = X.copy()
    X_missing[[1, 3]] = np.nan

    weights = np.random.uniform(size=10)
    weights_missing = weights.copy()
    weights_missing[[6]] = np.nan

    weights_wrong_size = np.random.randn(12)

    data = {
        'y': y,
        'X': X,
        'y_missing': y_missing,
        'X_missing': X_missing,
        'weights': weights,
        'weights_missing': weights_missing
    }
    data = pandas.DataFrame.from_dict(data)
    data['constant'] = 1

    formula = 'y_missing ~ X_missing'

    ((endog, exog), missing_idx,
     design_info) = handle_formula_data(data,
                                        None,
                                        formula,
                                        depth=2,
                                        missing='drop')

    kwargs = {
        'missing_idx': missing_idx,
        'missing': 'drop',
        'weights': data['weights_missing']
    }

    model_data = sm_data.handle_data(endog, exog, **kwargs)
    data_nona = data.dropna()
    assert_equal(data_nona['y'].values, model_data.endog)
    assert_equal(data_nona[['constant', 'X']].values, model_data.exog)
    assert_equal(data_nona['weights'].values, model_data.weights)

    tmp = handle_formula_data(data, None, formula, depth=2, missing='drop')
    (endog, exog), missing_idx, design_info = tmp
    weights_2d = np.random.randn(10, 10)
    weights_2d[[8, 7], [7, 8]] = np.nan  #symmetric missing values
    kwargs.update({'weights': weights_2d, 'missing_idx': missing_idx})

    model_data2 = sm_data.handle_data(endog, exog, **kwargs)

    good_idx = [0, 4, 6, 9]
    assert_equal(data.loc[good_idx, 'y'], model_data2.endog)
    assert_equal(data.loc[good_idx, ['constant', 'X']], model_data2.exog)
    assert_equal(weights_2d[good_idx][:, good_idx], model_data2.weights)

    tmp = handle_formula_data(data, None, formula, depth=2, missing='drop')
    (endog, exog), missing_idx, design_info = tmp

    kwargs.update({'weights': weights_wrong_size, 'missing_idx': missing_idx})
    assert_raises(ValueError, sm_data.handle_data, endog, exog, **kwargs)
Example #56
0
 def _handle_data(self, X, missing='none'):
     data = handle_data(X, None, missing, 0)
     self.exog = None  #TODO: remove this when we don't inherit from LLM
     self.endog = data.endog #TODO: ditto
     return data