コード例 #1
0
ファイル: fitting.py プロジェクト: anu19s/myshop
    def standardize(self, data):
        """
        Returns a standardized version of data.

        Parameters
        ----------
        data : pandas Series or DataFrame

        Notes
        -----
        data is standardized according to the rules that self was initialized
        with, i.e. the rules implicit in self.stats.
        """
        self._check_compatible(data)

        # Convenience
        stats = self.stats

        standardized = data.copy().astype('float')
        if self._should_standardize:
            ss = common_math.get_item_names(data).intersection(
                self._should_standardize)
            standardized[ss] = (data[ss] - stats.mu[ss]) / stats.sigma[ss]

        return standardized
コード例 #2
0
ファイル: fitting.py プロジェクト: anu19s/myshop
    def unstandardize_params(self, w_st):
        """
        Returns "w", an unstandardized version of w_st so that
        X.dot(w) = self.standardize(X).dot(w_st)

        Parameters
        ----------
        w_st : Pandas.Series
            Index is names of variables
            Values are the fitted parameter values
        """
        self._check_compatible(w_st)
        assert self._ones_column, (
            "Specify a ones_column during initialization if you want to "
            "unstandardize")

        ## We will return this Series
        w = w_st.copy().astype('float')

        # ss = "should standardize"
        ss = common_math.get_item_names(w_st).intersection(
            self._should_standardize)

        ## Unstandardize colums that were standardized
        if len(ss) > 0:
            w_st_part_only = w_st[ss]
            sigma = self.stats.sigma[ss]
            w[ss] = w_st_part_only / sigma

        # Unstandardize the constant.  Add the "excess" to self._ones_column
        if len(ss) > 0:
            mu = self.stats.mu[ss]
            w[self._ones_column] -= (mu * w_st_part_only / sigma).sum()

        return w
コード例 #3
0
ファイル: fitting.py プロジェクト: ANB2/rosetta
    def unstandardize_params(self, w_st):
        """
        Returns "w", an unstandardized version of w_st so that
        X.dot(w) = self.standardize(X).dot(w_st)

        Parameters
        ----------
        w_st : Pandas.Series
            Index is names of variables
            Values are the fitted parameter values
        """
        self._check_compatible(w_st)
        assert self._ones_column, (
            "Specify a ones_column during initialization if you want to "
            "unstandardize")

        ## We will return this Series
        w = w_st.copy().astype('float')

        # ss = "should standardize"
        ss = common_math.get_item_names(w_st).intersection(
            self._should_standardize)

        ## Unstandardize colums that were standardized
        if len(ss) > 0:
            w_st_part_only = w_st[ss]
            sigma = self.stats.sigma[ss]
            w[ss] = w_st_part_only / sigma

        # Unstandardize the constant.  Add the "excess" to self._ones_column
        if len(ss) > 0:
            mu = self.stats.mu[ss]
            w[self._ones_column] -= (mu * w_st_part_only / sigma).sum()

        return w
コード例 #4
0
ファイル: fitting.py プロジェクト: ANB2/rosetta
    def standardize(self, data):
        """
        Returns a standardized version of data.

        Parameters
        ----------
        data : pandas Series or DataFrame

        Notes
        -----
        data is standardized according to the rules that self was initialized
        with, i.e. the rules implicit in self.stats.
        """
        self._check_compatible(data)

        # Convenience
        stats = self.stats

        standardized = data.copy().astype('float')
        if self._should_standardize:
            ss = common_math.get_item_names(data).intersection(
                self._should_standardize)
            standardized[ss] = (data[ss] - stats.mu[ss]) / stats.sigma[ss]

        return standardized
コード例 #5
0
ファイル: fitting.py プロジェクト: anu19s/myshop
    def _check_compatible(self, data):
        """
        Raises ValueError if the columns/index of the DataFrame/Series "data"
        are not contained in self.known_columns.

        In this case, we don't know how to standardize/unstandardize/winsorize
        data, so we must raise an exception.
        """
        diff = common_math.get_item_names(data).diff(self.known_columns)
        if diff:
            raise ValueError(
                "Data contained items we don't know how to work with:  %s" %
                diff)
コード例 #6
0
ファイル: fitting.py プロジェクト: ANB2/rosetta
    def _check_compatible(self, data):
        """
        Raises ValueError if the columns/index of the DataFrame/Series "data"
        are not contained in self.known_columns.

        In this case, we don't know how to standardize/unstandardize/winsorize
        data, so we must raise an exception.
        """
        diff = common_math.get_item_names(data).diff(self.known_columns)
        if diff:
            raise ValueError(
                "Data contained items we don't know how to work with:  %s"
                % diff)
コード例 #7
0
ファイル: fitting.py プロジェクト: anu19s/myshop
    def _get_clip_levels(self, df):
        def func(s):
            return _get_clip_levels_series(s, self.lower_quantile,
                                           self.upper_quantile, self.max_std)

        items = common_math.get_item_names(df)
        sw = items.intersection(self._should_winsorize)
        levels = pd.Series(np.nan * np.ones(len(items)),
                           index=items).astype('O')
        if len(sw) > 0:
            # This cast to float prevents a mixed data type frame...which can
            # cause apply to act in a funny manner
            levels[sw] = df[sw].astype('float').apply(func)

        return levels
コード例 #8
0
ファイル: fitting.py プロジェクト: ANB2/rosetta
    def _get_clip_levels(self, df):
        def func(s):
            return _get_clip_levels_series(
                s, self.lower_quantile, self.upper_quantile, self.max_std)

        items = common_math.get_item_names(df)
        sw = items.intersection(self._should_winsorize)
        levels = pd.Series(
            np.nan * np.ones(len(items)), index=items).astype('O')
        if len(sw) > 0:
            # This cast to float prevents a mixed data type frame...which can
            # cause apply to act in a funny manner
            levels[sw] = df[sw].astype('float').apply(func)

        return levels
コード例 #9
0
ファイル: fitting.py プロジェクト: anu19s/myshop
    def winsorize(self, data):
        """
        Winsorize the data using the rules determined during initialization.
        """
        self._check_compatible(data)

        def func(series):
            lower, upper = self.clip_levels[series.name]
            return np.maximum(lower, np.minimum(upper, series))

        # sw = "should winsorize"
        sw = common_math.get_item_names(data).intersection(
            self._should_winsorize)
        winsorized = data.copy()
        if len(sw) > 0:
            winsorized[sw] = winsorized[sw].apply(func)

        return winsorized
コード例 #10
0
ファイル: fitting.py プロジェクト: ANB2/rosetta
    def winsorize(self, data):
        """
        Winsorize the data using the rules determined during initialization.
        """
        self._check_compatible(data)

        def func(series):
            lower, upper = self.clip_levels[series.name]
            return np.maximum(lower, np.minimum(upper, series))

        # sw = "should winsorize"
        sw = common_math.get_item_names(data).intersection(
            self._should_winsorize)
        winsorized = data.copy()
        if len(sw) > 0:
            winsorized[sw] = winsorized[sw].apply(func)

        return winsorized