def _parse_features(self, features, names=None, process_one_hot=True, **kwargs): """ Parse features Parameters ---------- features : pandas.DataFrame | dict | ndarray Features to train on or predict from names : list, optional List of feature names, by default None process_one_hot : bool, optional Check for and process one-hot variables, by default True kwargs : dict, optional kwargs for PreProcess.one_hot Returns ------- features : ndarray Parsed features array normalized and with str columns converted to one hot vectors if desired """ features, feature_names = self._parse_data(features, names=names) if len(features.shape) != 2: msg = ('{} can only use 2D data as input!'.format( self.__class__.__name__)) logger.error(msg) raise RuntimeError(msg) if self.feature_names is not None: if features.shape[1] != len(self.feature_names): msg = ('data has {} features but expected {}'.format( features.shape[1], self.feature_dims)) logger.error(msg) raise RuntimeError(msg) if self._feature_names is None: self._feature_names = feature_names elif self.feature_names != feature_names: msg = ('Expecting features with names: {}, but was provided with: ' '{}!'.format(feature_names, self.feature_names)) logger.error(msg) raise RuntimeError(msg) if process_one_hot: kwargs.update({'return_ind': True}) features, one_hot_ind = PreProcess.one_hot(features, **kwargs) if one_hot_ind: one_hot_features = [self.feature_names[i] for i in one_hot_ind] self._check_one_hot_norm_params(one_hot_features) if self.normalize_features: features = self.normalize(features, names=feature_names) return features
def parse_features(self, features, names=None, **kwargs): """Parse features - preprocessing of feature data before training or prediction. This will do one-hot encoding based on self.one_hot_categories, and feature normalization based on self.normalize_features Parameters ---------- features : pandas.DataFrame | dict | ndarray Features to train on or predict from names : list, optional List of feature names, by default None kwargs : dict, optional kwargs for PreProcess.one_hot Returns ------- features : ndarray Parsed features array normalized and with str columns converted to one hot vectors if desired """ features, feature_names = self._parse_data(features, names=names) if len(features.shape) != 2: msg = ('{} can only use 2D data as input!'.format( self.__class__.__name__)) logger.error(msg) raise RuntimeError(msg) if self.feature_names is None: self._feature_names = feature_names check = (self.one_hot_categories is not None and all(np.isin(feature_names, self.input_feature_names))) if check: self._check_one_hot_feature_names(feature_names) kwargs.update({ 'feature_names': feature_names, 'categories': self.one_hot_categories }) features = PreProcess.one_hot(features, **kwargs) elif self.feature_names != feature_names: msg = ('Expecting features with names: {}, but was provided with: ' '{}!'.format(self.feature_names, feature_names)) logger.error(msg) raise RuntimeError(msg) if self.normalize_features: features = self.normalize(features, names=self.feature_names) if features.shape[1] != self.feature_dims: msg = ('data has {} features but expected {}'.format( features.shape[1], self.feature_dims)) logger.error(msg) raise RuntimeError(msg) return features