Beispiel #1
0
    def predict(self,
                beta=None,
                x=None,
                fill_value=None,
                fill_method=None,
                axis=0):
        """
        Parameters
        ----------
        beta : Series
        x : Series or DataFrame
        fill_value : scalar or dict, default None
        fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
        axis : {0, 1}, default 0
            See DataFrame.fillna for more details

        Notes
        -----
        1. If both fill_value and fill_method are None then NaNs are dropped
        (this is the default behavior)
        2. An intercept will be automatically added to the new_y_values if
           the model was fitted using an intercept

        Returns
        -------
        Series of predicted values
        """
        if beta is None and x is None:
            return self.y_predict

        if beta is None:
            beta = self.beta
        else:
            beta = beta.reindex(self.beta.index)
            if isnull(beta).any():
                raise ValueError('Must supply betas for same variables')

        if x is None:
            x = self._x
            orig_x = x
        else:
            orig_x = x
            if fill_value is None and fill_method is None:
                x = x.dropna(how='any')
            else:
                x = x.fillna(value=fill_value, method=fill_method, axis=axis)
            if isinstance(x, Series):
                x = DataFrame({'x': x})
            if self._intercept:
                x['intercept'] = 1.

            x = x.reindex(columns=self._x.columns)

        rs = np.dot(x.values, beta.values)
        return Series(rs, x.index).reindex(orig_x.index)
    def test_groupby_preserve_categorical_dtype(self):
        # GH13743, GH13854
        df = DataFrame({'A': [1, 2, 1, 1, 2],
                        'B': [10, 16, 22, 28, 34],
                        'C1': Categorical(list("abaab"),
                                          categories=list("bac"),
                                          ordered=False),
                        'C2': Categorical(list("abaab"),
                                          categories=list("bac"),
                                          ordered=True)})
        # single grouper
        exp_full = DataFrame({'A': [2.0, 1.0, np.nan],
                              'B': [25.0, 20.0, np.nan],
                              'C1': Categorical(list("bac"),
                                                categories=list("bac"),
                                                ordered=False),
                              'C2': Categorical(list("bac"),
                                                categories=list("bac"),
                                                ordered=True)})
        for col in ['C1', 'C2']:
            result1 = df.groupby(by=col, as_index=False).mean()
            result2 = df.groupby(by=col, as_index=True).mean().reset_index()
            expected = exp_full.reindex(columns=result1.columns)
            tm.assert_frame_equal(result1, expected)
            tm.assert_frame_equal(result2, expected)

        # multiple grouper
        exp_full = DataFrame({'A': [1, 1, 1, 2, 2, 2],
                              'B': [np.nan, 20.0, np.nan, 25.0, np.nan,
                                    np.nan],
                              'C1': Categorical(list("bacbac"),
                                                categories=list("bac"),
                                                ordered=False),
                              'C2': Categorical(list("bacbac"),
                                                categories=list("bac"),
                                                ordered=True)})
        for cols in [['A', 'C1'], ['A', 'C2']]:
            result1 = df.groupby(by=cols, as_index=False).mean()
            result2 = df.groupby(by=cols, as_index=True).mean().reset_index()
            expected = exp_full.reindex(columns=result1.columns)
            tm.assert_frame_equal(result1, expected)
            tm.assert_frame_equal(result2, expected)
Beispiel #3
0
    def test_groupby_preserve_categorical_dtype(self):
        # GH13743, GH13854
        df = DataFrame({
            'A': [1, 2, 1, 1, 2],
            'B': [10, 16, 22, 28, 34],
            'C1':
            Categorical(list("abaab"), categories=list("bac"), ordered=False),
            'C2':
            Categorical(list("abaab"), categories=list("bac"), ordered=True)
        })
        # single grouper
        exp_full = DataFrame({
            'A': [2.0, 1.0, np.nan],
            'B': [25.0, 20.0, np.nan],
            'C1':
            Categorical(list("bac"), categories=list("bac"), ordered=False),
            'C2':
            Categorical(list("bac"), categories=list("bac"), ordered=True)
        })
        for col in ['C1', 'C2']:
            result1 = df.groupby(by=col, as_index=False).mean()
            result2 = df.groupby(by=col, as_index=True).mean().reset_index()
            expected = exp_full.reindex(columns=result1.columns)
            tm.assert_frame_equal(result1, expected)
            tm.assert_frame_equal(result2, expected)

        # multiple grouper
        exp_full = DataFrame({
            'A': [1, 1, 1, 2, 2, 2],
            'B': [np.nan, 20.0, np.nan, 25.0, np.nan, np.nan],
            'C1':
            Categorical(list("bacbac"), categories=list("bac"), ordered=False),
            'C2':
            Categorical(list("bacbac"), categories=list("bac"), ordered=True)
        })
        for cols in [['A', 'C1'], ['A', 'C2']]:
            result1 = df.groupby(by=cols, as_index=False).mean()
            result2 = df.groupby(by=cols, as_index=True).mean().reset_index()
            expected = exp_full.reindex(columns=result1.columns)
            tm.assert_frame_equal(result1, expected)
            tm.assert_frame_equal(result2, expected)
Beispiel #4
0
    def predict(self, beta=None, x=None, fill_value=None,
                fill_method=None, axis=0):
        """
        Parameters
        ----------
        beta : Series
        x : Series or DataFrame
        fill_value : scalar or dict, default None
        fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
        axis : {0, 1}, default 0
            See DataFrame.fillna for more details

        Notes
        -----
        1. If both fill_value and fill_method are None then NaNs are dropped
        (this is the default behavior)
        2. An intercept will be automatically added to the new_y_values if
           the model was fitted using an intercept

        Returns
        -------
        Series of predicted values
        """
        if beta is None and x is None:
            return self.y_predict

        if beta is None:
            beta = self.beta
        else:
            beta = beta.reindex(self.beta.index)
            if isnull(beta).any():
                raise ValueError('Must supply betas for same variables')

        if x is None:
            x = self._x
            orig_x = x
        else:
            orig_x = x
            if fill_value is None and fill_method is None:
                x = x.dropna(how='any')
            else:
                x = x.fillna(value=fill_value, method=fill_method, axis=axis)
            if isinstance(x, Series):
                x = DataFrame({'x': x})
            if self._intercept:
                x['intercept'] = 1.

            x = x.reindex(columns=self._x.columns)

        rs = np.dot(x.values, beta.values)
        return Series(rs, x.index).reindex(orig_x.index)