Example #1
0
    def _normalize_arr(self, arr, names):
        """
        Normalize array and save normalization parameters to given names

        Parameters
        ----------
        arr : ndarray
            Array of features/label to normalize
        names : list
            List of feature/label names

        Returns
        -------
        norm_arr : ndarray
            Normalized features/label
        """
        n_names = self._get_item_number(arr)
        if len(names) != n_names:
            msg = ("Number of item names ({}) does not match number of items "
                   "({})".format(len(names), arr.shape[1]))
            logger.error(msg)
            raise RuntimeError(msg)

        means, stdevs = self.get_norm_params(names)
        update = means is None or stdevs is None

        norm_arr, means, stdevs = PreProcess.normalize(arr,
                                                       mean=means,
                                                       stdev=stdevs)
        if update:
            for i, n in enumerate(names):
                norm_params = {n: {'mean': means[i], 'stdev': stdevs[i]}}
                self._norm_params.update(norm_params)

        return norm_arr
Example #2
0
    def _normalize_df(self, df):
        """
        Normalize DataFrame

        Parameters
        ----------
        df : pandas.DataFrame
            DataFrame of features/label to normalize

        Returns
        -------
        norm_df : pandas.DataFrame
            Normalized features/label
        """
        means, stdevs = self.get_norm_params(df.columns)
        update = means is None or stdevs is None

        norm_df, means, stdevs = PreProcess.normalize(df,
                                                      mean=means,
                                                      stdev=stdevs)
        if update:
            for i, c in enumerate(df.columns):
                norm_params = {c: {'mean': means[i], 'stdev': stdevs[i]}}
                self._norm_params.update(norm_params)

        return norm_df
Example #3
0
    def _normalize_dict(self, items):
        """
        Normalize given dictionary of items (features | labels)

        Parameters
        ----------
        items : dict
            mapping of names to vectors

        Returns
        -------
        norm_items : dict
            mapping of names to normalized-feature vectors
        """
        norm_items = {}
        for key, value in items.items():
            mean = self.get_mean(key)
            stdev = self.get_stdev(key)
            update = mean is None or stdev is None
            try:
                value, mean, stdev = PreProcess.normalize(value,
                                                          mean=mean,
                                                          stdev=stdev)
                if update:
                    norm_params = {key: {'mean': mean, 'stdev': stdev}}
                    self._norm_params.update(norm_params)
            except Exception as ex:
                msg = "Could not normalize {}:\n{}".format(key, ex)
                logger.warning(msg)
                warn(msg)

            norm_items[key] = value

        return norm_items
Example #4
0
def test_OHE():
    """
    Test one-hot encoding
    """
    ohe_features = FEATURES.copy()
    categories = list('def')
    ohe_features['categorical'] = np.random.choice(categories, len(FEATURES))
    one_hot_categories = {'categorical': categories}

    model = ModelBase(None,
                      feature_names=ohe_features.columns,
                      label_names=LABELS.columns,
                      normalize=True,
                      one_hot_categories=one_hot_categories)

    baseline, means, stdevs = \
        PreProcess.normalize(FEATURES.values.astype('float32'))
    test = model.parse_features(ohe_features)

    assert np.allclose(baseline, test[:, :2])
    assert np.allclose(means,
                       np.array(model.feature_means, dtype='float32')[:2])
    assert np.allclose(stdevs,
                       np.array(model.feature_stdevs, dtype='float32')[:2])
    for c in categories:
        assert model.get_mean(c) is None
        assert model.get_stdev(c) is None

    assert all(np.isin(categories, model.feature_names))
    assert not any(np.isin(categories, model.input_feature_names))
    assert 'categorical' not in model.feature_names
    assert 'categorical' in model.input_feature_names
Example #5
0
def test_norm_df():
    """Test ModelBase Normalization on a dataframe"""
    model = ModelBase(None,
                      feature_names=FEATURES.columns,
                      label_names=LABELS.columns,
                      normalize=True)

    baseline, means, stdevs = PreProcess.normalize(FEATURES)
    test = model.parse_features(FEATURES)
    assert np.allclose(baseline.values, test)
    assert np.allclose(means, model.feature_means)
    assert np.allclose(stdevs, model.feature_stdevs)

    baseline, means, stdevs = PreProcess.normalize(LABELS)
    test = model._parse_labels(LABELS)
    np.allclose(baseline.values, test)
    assert np.allclose(means, model.label_means)
    assert np.allclose(stdevs, model.label_stdevs)
Example #6
0
def test_norm_arr():
    """Test ModelBase Normalization on a dataframe"""
    features = FEATURES.values
    feature_names = FEATURES.columns.tolist()
    labels = LABELS.values
    label_names = LABELS.columns.tolist()
    model = ModelBase(None,
                      feature_names=feature_names,
                      label_names=label_names,
                      normalize=True)

    baseline, means, stdevs = PreProcess.normalize(features)
    test = model.parse_features(features, names=feature_names)
    assert np.allclose(baseline, test)
    assert np.allclose(means, model.feature_means)
    assert np.allclose(stdevs, model.feature_stdevs)

    baseline, means, stdevs = PreProcess.normalize(labels)
    test = model._parse_labels(labels, names=label_names)
    assert np.allclose(baseline, test)
    assert np.allclose(means, model.label_means)
    assert np.allclose(stdevs, model.label_stdevs)