def _normalize_arr(self, arr, names): """ Normalize array and save normalization parameters to given names Parameters ---------- arr : ndarray Array of features/label to normalize names : list List of feature/label names Returns ------- norm_arr : ndarray Normalized features/label """ n_names = self._get_item_number(arr) if len(names) != n_names: msg = ("Number of item names ({}) does not match number of items " "({})".format(len(names), arr.shape[1])) logger.error(msg) raise RuntimeError(msg) means, stdevs = self.get_norm_params(names) update = means is None or stdevs is None norm_arr, means, stdevs = PreProcess.normalize(arr, mean=means, stdev=stdevs) if update: for i, n in enumerate(names): norm_params = {n: {'mean': means[i], 'stdev': stdevs[i]}} self._norm_params.update(norm_params) return norm_arr
def _normalize_df(self, df): """ Normalize DataFrame Parameters ---------- df : pandas.DataFrame DataFrame of features/label to normalize Returns ------- norm_df : pandas.DataFrame Normalized features/label """ means, stdevs = self.get_norm_params(df.columns) update = means is None or stdevs is None norm_df, means, stdevs = PreProcess.normalize(df, mean=means, stdev=stdevs) if update: for i, c in enumerate(df.columns): norm_params = {c: {'mean': means[i], 'stdev': stdevs[i]}} self._norm_params.update(norm_params) return norm_df
def _normalize_dict(self, items): """ Normalize given dictionary of items (features | labels) Parameters ---------- items : dict mapping of names to vectors Returns ------- norm_items : dict mapping of names to normalized-feature vectors """ norm_items = {} for key, value in items.items(): mean = self.get_mean(key) stdev = self.get_stdev(key) update = mean is None or stdev is None try: value, mean, stdev = PreProcess.normalize(value, mean=mean, stdev=stdev) if update: norm_params = {key: {'mean': mean, 'stdev': stdev}} self._norm_params.update(norm_params) except Exception as ex: msg = "Could not normalize {}:\n{}".format(key, ex) logger.warning(msg) warn(msg) norm_items[key] = value return norm_items
def test_OHE(): """ Test one-hot encoding """ ohe_features = FEATURES.copy() categories = list('def') ohe_features['categorical'] = np.random.choice(categories, len(FEATURES)) one_hot_categories = {'categorical': categories} model = ModelBase(None, feature_names=ohe_features.columns, label_names=LABELS.columns, normalize=True, one_hot_categories=one_hot_categories) baseline, means, stdevs = \ PreProcess.normalize(FEATURES.values.astype('float32')) test = model.parse_features(ohe_features) assert np.allclose(baseline, test[:, :2]) assert np.allclose(means, np.array(model.feature_means, dtype='float32')[:2]) assert np.allclose(stdevs, np.array(model.feature_stdevs, dtype='float32')[:2]) for c in categories: assert model.get_mean(c) is None assert model.get_stdev(c) is None assert all(np.isin(categories, model.feature_names)) assert not any(np.isin(categories, model.input_feature_names)) assert 'categorical' not in model.feature_names assert 'categorical' in model.input_feature_names
def test_norm_df(): """Test ModelBase Normalization on a dataframe""" model = ModelBase(None, feature_names=FEATURES.columns, label_names=LABELS.columns, normalize=True) baseline, means, stdevs = PreProcess.normalize(FEATURES) test = model.parse_features(FEATURES) assert np.allclose(baseline.values, test) assert np.allclose(means, model.feature_means) assert np.allclose(stdevs, model.feature_stdevs) baseline, means, stdevs = PreProcess.normalize(LABELS) test = model._parse_labels(LABELS) np.allclose(baseline.values, test) assert np.allclose(means, model.label_means) assert np.allclose(stdevs, model.label_stdevs)
def test_norm_arr(): """Test ModelBase Normalization on a dataframe""" features = FEATURES.values feature_names = FEATURES.columns.tolist() labels = LABELS.values label_names = LABELS.columns.tolist() model = ModelBase(None, feature_names=feature_names, label_names=label_names, normalize=True) baseline, means, stdevs = PreProcess.normalize(features) test = model.parse_features(features, names=feature_names) assert np.allclose(baseline, test) assert np.allclose(means, model.feature_means) assert np.allclose(stdevs, model.feature_stdevs) baseline, means, stdevs = PreProcess.normalize(labels) test = model._parse_labels(labels, names=label_names) assert np.allclose(baseline, test) assert np.allclose(means, model.label_means) assert np.allclose(stdevs, model.label_stdevs)