コード例 #1
0
    def _parse_features(self,
                        features,
                        names=None,
                        process_one_hot=True,
                        **kwargs):
        """
        Parse features

        Parameters
        ----------
        features : pandas.DataFrame | dict | ndarray
            Features to train on or predict from
        names : list, optional
            List of feature names, by default None
        process_one_hot : bool, optional
            Check for and process one-hot variables, by default True
        kwargs : dict, optional
            kwargs for PreProcess.one_hot

        Returns
        -------
        features : ndarray
            Parsed features array normalized and with str columns converted
            to one hot vectors if desired
        """
        features, feature_names = self._parse_data(features, names=names)

        if len(features.shape) != 2:
            msg = ('{} can only use 2D data as input!'.format(
                self.__class__.__name__))
            logger.error(msg)
            raise RuntimeError(msg)

        if self.feature_names is not None:
            if features.shape[1] != len(self.feature_names):
                msg = ('data has {} features but expected {}'.format(
                    features.shape[1], self.feature_dims))
                logger.error(msg)
                raise RuntimeError(msg)

        if self._feature_names is None:
            self._feature_names = feature_names
        elif self.feature_names != feature_names:
            msg = ('Expecting features with names: {}, but was provided with: '
                   '{}!'.format(feature_names, self.feature_names))
            logger.error(msg)
            raise RuntimeError(msg)

        if process_one_hot:
            kwargs.update({'return_ind': True})
            features, one_hot_ind = PreProcess.one_hot(features, **kwargs)
            if one_hot_ind:
                one_hot_features = [self.feature_names[i] for i in one_hot_ind]
                self._check_one_hot_norm_params(one_hot_features)

        if self.normalize_features:
            features = self.normalize(features, names=feature_names)

        return features
コード例 #2
0
    def parse_features(self, features, names=None, **kwargs):
        """Parse features - preprocessing of feature data before training or
        prediction. This will do one-hot encoding based on
        self.one_hot_categories, and feature normalization based on
        self.normalize_features

        Parameters
        ----------
        features : pandas.DataFrame | dict | ndarray
            Features to train on or predict from
        names : list, optional
            List of feature names, by default None
        kwargs : dict, optional
            kwargs for PreProcess.one_hot

        Returns
        -------
        features : ndarray
            Parsed features array normalized and with str columns converted
            to one hot vectors if desired
        """
        features, feature_names = self._parse_data(features, names=names)

        if len(features.shape) != 2:
            msg = ('{} can only use 2D data as input!'.format(
                self.__class__.__name__))
            logger.error(msg)
            raise RuntimeError(msg)

        if self.feature_names is None:
            self._feature_names = feature_names

        check = (self.one_hot_categories is not None
                 and all(np.isin(feature_names, self.input_feature_names)))
        if check:
            self._check_one_hot_feature_names(feature_names)
            kwargs.update({
                'feature_names': feature_names,
                'categories': self.one_hot_categories
            })
            features = PreProcess.one_hot(features, **kwargs)
        elif self.feature_names != feature_names:
            msg = ('Expecting features with names: {}, but was provided with: '
                   '{}!'.format(self.feature_names, feature_names))
            logger.error(msg)
            raise RuntimeError(msg)

        if self.normalize_features:
            features = self.normalize(features, names=self.feature_names)

        if features.shape[1] != self.feature_dims:
            msg = ('data has {} features but expected {}'.format(
                features.shape[1], self.feature_dims))
            logger.error(msg)
            raise RuntimeError(msg)

        return features