def predict(self, beta=None, x=None, fill_value=None, fill_method=None, axis=0): """ Parameters ---------- beta : Series x : Series or DataFrame fill_value : scalar or dict, default None fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None axis : {0, 1}, default 0 See DataFrame.fillna for more details Notes ----- 1. If both fill_value and fill_method are None then NaNs are dropped (this is the default behavior) 2. An intercept will be automatically added to the new_y_values if the model was fitted using an intercept Returns ------- Series of predicted values """ if beta is None and x is None: return self.y_predict if beta is None: beta = self.beta else: beta = beta.reindex(self.beta.index) if isnull(beta).any(): raise ValueError('Must supply betas for same variables') if x is None: x = self._x orig_x = x else: orig_x = x if fill_value is None and fill_method is None: x = x.dropna(how='any') else: x = x.fillna(value=fill_value, method=fill_method, axis=axis) if isinstance(x, Series): x = DataFrame({'x': x}) if self._intercept: x['intercept'] = 1. x = x.reindex(columns=self._x.columns) rs = np.dot(x.values, beta.values) return Series(rs, x.index).reindex(orig_x.index)
def test_groupby_preserve_categorical_dtype(self): # GH13743, GH13854 df = DataFrame({'A': [1, 2, 1, 1, 2], 'B': [10, 16, 22, 28, 34], 'C1': Categorical(list("abaab"), categories=list("bac"), ordered=False), 'C2': Categorical(list("abaab"), categories=list("bac"), ordered=True)}) # single grouper exp_full = DataFrame({'A': [2.0, 1.0, np.nan], 'B': [25.0, 20.0, np.nan], 'C1': Categorical(list("bac"), categories=list("bac"), ordered=False), 'C2': Categorical(list("bac"), categories=list("bac"), ordered=True)}) for col in ['C1', 'C2']: result1 = df.groupby(by=col, as_index=False).mean() result2 = df.groupby(by=col, as_index=True).mean().reset_index() expected = exp_full.reindex(columns=result1.columns) tm.assert_frame_equal(result1, expected) tm.assert_frame_equal(result2, expected) # multiple grouper exp_full = DataFrame({'A': [1, 1, 1, 2, 2, 2], 'B': [np.nan, 20.0, np.nan, 25.0, np.nan, np.nan], 'C1': Categorical(list("bacbac"), categories=list("bac"), ordered=False), 'C2': Categorical(list("bacbac"), categories=list("bac"), ordered=True)}) for cols in [['A', 'C1'], ['A', 'C2']]: result1 = df.groupby(by=cols, as_index=False).mean() result2 = df.groupby(by=cols, as_index=True).mean().reset_index() expected = exp_full.reindex(columns=result1.columns) tm.assert_frame_equal(result1, expected) tm.assert_frame_equal(result2, expected)
def test_groupby_preserve_categorical_dtype(self): # GH13743, GH13854 df = DataFrame({ 'A': [1, 2, 1, 1, 2], 'B': [10, 16, 22, 28, 34], 'C1': Categorical(list("abaab"), categories=list("bac"), ordered=False), 'C2': Categorical(list("abaab"), categories=list("bac"), ordered=True) }) # single grouper exp_full = DataFrame({ 'A': [2.0, 1.0, np.nan], 'B': [25.0, 20.0, np.nan], 'C1': Categorical(list("bac"), categories=list("bac"), ordered=False), 'C2': Categorical(list("bac"), categories=list("bac"), ordered=True) }) for col in ['C1', 'C2']: result1 = df.groupby(by=col, as_index=False).mean() result2 = df.groupby(by=col, as_index=True).mean().reset_index() expected = exp_full.reindex(columns=result1.columns) tm.assert_frame_equal(result1, expected) tm.assert_frame_equal(result2, expected) # multiple grouper exp_full = DataFrame({ 'A': [1, 1, 1, 2, 2, 2], 'B': [np.nan, 20.0, np.nan, 25.0, np.nan, np.nan], 'C1': Categorical(list("bacbac"), categories=list("bac"), ordered=False), 'C2': Categorical(list("bacbac"), categories=list("bac"), ordered=True) }) for cols in [['A', 'C1'], ['A', 'C2']]: result1 = df.groupby(by=cols, as_index=False).mean() result2 = df.groupby(by=cols, as_index=True).mean().reset_index() expected = exp_full.reindex(columns=result1.columns) tm.assert_frame_equal(result1, expected) tm.assert_frame_equal(result2, expected)