def test_zmuv_rows(): arr = np.random.randn(10000, 100).astype(np.float32) # -- numpy reference gt = (arr - arr.mean(0)) / arr.std(0) # -- cython rows_mean, rows_std = zmuv_rows(arr.T) gv = (arr - rows_mean) / rows_std assert_allclose(gt, gv, rtol=RTOL, atol=ATOL) # inplace zmuv_rows_apply_inplace(arr.T, rows_mean, rows_std) assert_allclose(gt, arr, rtol=RTOL, atol=ATOL)
def partial_fit(self, X): _check_X(X) rows_mean, rows_std = zmuv_rows(X.T) if self._rows_mean is None: self._rows_mean = rows_mean self._rows_std = rows_std self._n_samples = len(X) else: # XXX: change this, we should use sum and ssq in # cython-based helpers n_samples_new = self._n_samples + len(X) rows_mean_new_n = ( self._n_samples * self._rows_mean + len(X) * rows_mean ) rows_mean_new = rows_mean_new_n / n_samples_new rows_ssq_new = ( self._n_samples * ((self._rows_std ** 2.) + self._rows_mean ** 2.) + len(X) * ((rows_std ** 2.) + rows_mean ** 2.) ) rows_std_new = np.sqrt( rows_ssq_new / n_samples_new - rows_mean_new ** 2. ) self._rows_mean = rows_mean_new.astype('float32') self._rows_std = rows_std_new.astype('float32') self._n_samples = n_samples_new return self
def fit(self, X): _check_X(X) self._rows_mean, self._rows_std = zmuv_rows(X.T) return self