def load(self, data_path): X = fetch_blizzard(data_path, self.shuffle) if self.X_mean is None or self.X_std is None: prev_mean = None prev_var = None n_seen = 0 n_inter = 10000 range_end = np.int(np.ceil(len(X) / float(n_inter))) for i in xrange(range_end): n_seen += 1 i_start = i*n_inter i_end = min((i+1)*n_inter, len(X)) if prev_mean is None: prev_mean = X[i_start:i_end].mean() prev_var = 0. else: curr_mean = prev_mean +\ (X[i_start:i_end] - prev_mean).mean() / n_seen curr_var = prev_var +\ ((X[i_start:i_end] - prev_mean) *\ (X[i_start:i_end] - curr_mean)).mean() prev_mean = curr_mean prev_var = curr_var print "[%d / %d]" % (i, range_end) self.X_mean = prev_mean self.X_std = np.sqrt(prev_var / n_seen) ipdb.set_trace() return X