def set_sample(self): sample_cols = tuple( [self.__dict__[x] for x in self.sample_cols_labels]) self.sample = flag_sample(self.df, *sample_cols) if self.nosingles and self.fe_name: self.sample &= flag_nonsingletons(self.df, self.fe_name, self.sample) sample_vars = set_sample(self.df, self.sample, sample_cols) self.__dict__.update(dict(zip(self.sample_store_labels, sample_vars))) self.x = force_df(self.x) self.y = self.y.squeeze() # Force regression variables to float64 for var in self.vars_in_reg: self.__dict__[var] = self.__dict__[var].astype(np.float64) # Demean or add constant if self.fe_name is not None: self._demean_sample() elif self.addcons: _cons = np.ones(self.y.shape[0]) x = self.__dict__[self.add_constant_to] if x.empty: x = pd.DataFrame(_cons, columns=['_cons'], index=self.y.index) else: x['_cons'] = _cons self.__dict__[self.add_constant_to] = x # Re-weight sample if self.AWT is not None: self._weight_sample()
def test_multiidx(self): idx = pd.MultiIndex.from_tuples([('a', 1), ('b', 7), ('w', 99)], names=['dingle', 'dangle']) expected = self.sdf expected.index = idx s = self.s s.index = idx result = force_df(s) assert_frame_equal(expected, result)
def _demean_guts(A, args): for df in args: # Ignore empty `df` (e.g. empty list of exogenous included regressors) if df is None or df.empty: yield df else: group_name = A.name mean = df.groupby(A).mean() large_mean = force_df(A).join(mean, on=group_name).drop(group_name, axis=1) if df.ndim == 1: large_mean = large_mean.squeeze() demeaned = df - large_mean yield demeaned
def _demean(A, df): """ Demean a matrix/DataFrame within group `A` """ # Ignore empty `df` (e.g. empty list of exogenous included regressors) if df is None or df.empty: return df else: group_name = A.name mean = df.groupby(A).mean() large_mean = force_df(A).join(mean, on=group_name).drop(group_name, axis=1) if df.ndim == 1: large_mean = large_mean.squeeze() demeaned = df - large_mean return demeaned
def test_array(self): with pytest.raises(ValueError): force_df(np.arange(3))
def test_list(self): with pytest.raises(ValueError): force_df([1, 2, 3])
def test_passthrough(self): df = self.df assert_frame_equal(df, force_df(df))
def test_simple(self): expected = pd.DataFrame(self.s) result = force_df(self.s) assert_frame_equal(expected, result)