예제 #1
0
def test_zmuv_rows():

    arr = np.random.randn(10000, 100).astype(np.float32)

    # -- numpy reference
    gt = (arr - arr.mean(0)) / arr.std(0)

    # -- cython
    rows_mean, rows_std = zmuv_rows(arr.T)
    gv = (arr - rows_mean) / rows_std
    assert_allclose(gt, gv, rtol=RTOL, atol=ATOL)
    # inplace
    zmuv_rows_apply_inplace(arr.T, rows_mean, rows_std)
    assert_allclose(gt, arr, rtol=RTOL, atol=ATOL)
예제 #2
0
    def partial_fit(self, X):
        _check_X(X)
        rows_mean, rows_std = zmuv_rows(X.T)

        if self._rows_mean is None:

            self._rows_mean = rows_mean
            self._rows_std = rows_std
            self._n_samples = len(X)

        else:

            # XXX: change this, we should use sum and ssq in
            # cython-based helpers

            n_samples_new = self._n_samples + len(X)

            rows_mean_new_n = (
                self._n_samples * self._rows_mean
                +
                len(X) * rows_mean
            )
            rows_mean_new = rows_mean_new_n / n_samples_new

            rows_ssq_new = (
                self._n_samples * ((self._rows_std ** 2.)
                                   + self._rows_mean ** 2.)
                +
                len(X) * ((rows_std ** 2.) + rows_mean ** 2.)
            )
            rows_std_new = np.sqrt(
                rows_ssq_new / n_samples_new
                -
                rows_mean_new ** 2.
                )

            self._rows_mean = rows_mean_new.astype('float32')
            self._rows_std = rows_std_new.astype('float32')
            self._n_samples = n_samples_new

        return self
예제 #3
0
 def fit(self, X):
     _check_X(X)
     self._rows_mean, self._rows_std = zmuv_rows(X.T)
     return self