Beispiel #1
0
 def load(self, data_path):
     X = fetch_blizzard(data_path, self.shuffle)
     if self.X_mean is None or self.X_std is None:
         prev_mean = None
         prev_var = None
         n_seen = 0
         n_inter = 10000
         range_end = np.int(np.ceil(len(X) / float(n_inter)))
         for i in xrange(range_end):
             n_seen += 1
             i_start = i*n_inter
             i_end = min((i+1)*n_inter, len(X))
             if prev_mean is None:
                 prev_mean = X[i_start:i_end].mean()
                 prev_var = 0.
             else:
                 curr_mean = prev_mean +\
                     (X[i_start:i_end] - prev_mean).mean() / n_seen
                 curr_var = prev_var +\
                     ((X[i_start:i_end] - prev_mean) *\
                     (X[i_start:i_end] - curr_mean)).mean()
                 prev_mean = curr_mean
                 prev_var = curr_var
             print "[%d / %d]" % (i, range_end)
         self.X_mean = prev_mean
         self.X_std = np.sqrt(prev_var / n_seen)
         ipdb.set_trace()
     return X