コード例 #1
0
    def set_sample(self):
        sample_cols = tuple(
            [self.__dict__[x] for x in self.sample_cols_labels])
        self.sample = flag_sample(self.df, *sample_cols)
        if self.nosingles and self.fe_name:
            self.sample &= flag_nonsingletons(self.df, self.fe_name,
                                              self.sample)

        sample_vars = set_sample(self.df, self.sample, sample_cols)
        self.__dict__.update(dict(zip(self.sample_store_labels, sample_vars)))
        self.x = force_df(self.x)
        self.y = self.y.squeeze()

        # Force regression variables to float64
        for var in self.vars_in_reg:
            self.__dict__[var] = self.__dict__[var].astype(np.float64)

        # Demean or add constant
        if self.fe_name is not None:
            self._demean_sample()
        elif self.addcons:
            _cons = np.ones(self.y.shape[0])
            x = self.__dict__[self.add_constant_to]
            if x.empty:
                x = pd.DataFrame(_cons, columns=['_cons'], index=self.y.index)
            else:
                x['_cons'] = _cons
            self.__dict__[self.add_constant_to] = x

        # Re-weight sample
        if self.AWT is not None:
            self._weight_sample()
 def test_multiidx(self):
     idx = pd.MultiIndex.from_tuples([('a', 1), ('b', 7), ('w', 99)],
                                     names=['dingle', 'dangle'])
     expected = self.sdf
     expected.index = idx
     s = self.s
     s.index = idx
     result = force_df(s)
     assert_frame_equal(expected, result)
コード例 #3
0
def _demean_guts(A, args):
    for df in args:
        # Ignore empty `df` (e.g. empty list of exogenous included regressors)
        if df is None or df.empty:
            yield df
        else:
            group_name = A.name
            mean = df.groupby(A).mean()
            large_mean = force_df(A).join(mean, on=group_name).drop(group_name,
                                                                    axis=1)
            if df.ndim == 1:
                large_mean = large_mean.squeeze()
            demeaned = df - large_mean
            yield demeaned
コード例 #4
0
def _demean(A, df):
    """ Demean a matrix/DataFrame within group `A` """
    # Ignore empty `df` (e.g. empty list of exogenous included regressors)
    if df is None or df.empty:
        return df
    else:
        group_name = A.name
        mean = df.groupby(A).mean()
        large_mean = force_df(A).join(mean, on=group_name).drop(group_name,
                                                                axis=1)
        if df.ndim == 1:
            large_mean = large_mean.squeeze()
        demeaned = df - large_mean
        return demeaned
 def test_array(self):
     with pytest.raises(ValueError):
         force_df(np.arange(3))
 def test_list(self):
     with pytest.raises(ValueError):
         force_df([1, 2, 3])
 def test_passthrough(self):
     df = self.df
     assert_frame_equal(df, force_df(df))
 def test_simple(self):
     expected = pd.DataFrame(self.s)
     result = force_df(self.s)
     assert_frame_equal(expected, result)