def rect_sqrt(x, computeGrad=False): if (not computeGrad): f = gp.sqrt(gp.abs(x) * (x > 0)) return f g = 1 / (2 * x + (x <= 0)) * (x > 0) return g
def rect_sqrt(x, computeGrad = False): if (not computeGrad): f = gp.sqrt(gp.abs(x)* (x>0)) return f g = 1 / (2*x + (x<=0))*(x>0) return g
def forward_prop(self, X, add_noise=False, compute_loss=False, is_test=True): """ Compute the forward propagation step that maps the input data matrix X into the output. """ if not is_test or self.params.mu is None or self.params.sigma is None: self.mu = X.mean(axis=0) self.sigma = gnp.std(X, axis=0) self.X_hat = (X - self.mu) / (self.sigma + 1e-10) self.params.update_mean_std(self.mu, self.sigma) else: self.X_hat = (X - self.params.mu) / (self.params.sigma + 1e-10) self._res_mean = gnp.abs(self.X_hat.mean(axis=0)).max() self._res_std = gnp.abs((gnp.std(self.X_hat)) - 1).max() #self.mu = X.mean(axis=0) #self.sigma = gnp.sqrt(((X - self.mu)**2).mean(axis=0)) #self.X_hat = (X - self.mu) / (self.sigma + 1e-10) self.Y = self.X_hat * self.params.gamma + self.params.beta return self.Y
def gradient_check(self, X, y, dweights): EPSILON = g.as_garray(1e-4) ERRORTHRESHOLD = g.as_garray(1e-2) g.GNUMPY_CPU_PRECISION = 64 g.acceptable_number_types = "no nans or infs" for ind in range(len(self.weights)): w, b = self.weights[ind] dw, db = dweights[ind] for i in range(len(b)): b[i] = b[i] + EPSILON fw = self.predict_proba(X) op = self.f_score(y, fw) b[i] -= 2 * EPSILON fw = self.predict_proba(X) om = self.f_score(y, fw) b[i] += EPSILON rs = (g.as_garray(op) - g.as_garray(om)) / (EPSILON * 2.0) / g.as_garray(len(X)) if g.abs(rs - g.as_garray(db[i])) > ERRORTHRESHOLD: print ind, i, rs, db[i], type(rs), type(db) assert (0) for i in range(w.shape[0]): for j in range(w.shape[1]): w[i, j] += EPSILON fw = self.predict_proba(X) op = self.f_score(y, fw) w[i, j] -= 2 * EPSILON fw = self.predict_proba(X) om = self.f_score(y, fw) w[i, j] += EPSILON rs = (g.as_garray(op) - g.as_garray(om)) / ( EPSILON * 2.0) / g.as_garray(len(X)) if g.abs(rs - g.as_garray(dw[i, j])) > ERRORTHRESHOLD: print ind, i, j, rs, dw[i, j], type(w), type(dw) assert (0) print "gradient_check passed"
def loss_hsq(Yh, Y, delta=0.5): """Compute Huberized least-squares loss for Yh w.r.t. Y. Values in Yh should probably be network outputs, and each row in Y must give the real-valued target outputs for each observation. Vector-valued target outputs are handled just fine. """ obs_count = float(Y.shape[0]) R = Yh - Y mask =(gp.abs(R) < delta) L = (mask * R**2.0) + ((1 - mask) * ((mask * R) - delta**2.0)) L = gp.sum(L) / obs_count dL = ((2.0*delta) / obs_count) * ((mask * R) + ((1 - mask) * gp.sign(R))) return {'L': L, 'dL': dL}
def loss_hsq(Yh, Y, delta=0.5): """Compute Huberized least-squares loss for Yh w.r.t. Y. Values in Yh should probably be network outputs, and each row in Y must give the real-valued target outputs for each observation. Vector-valued target outputs are handled just fine. """ obs_count = float(Y.shape[0]) R = Yh - Y mask = (gp.abs(R) < delta) L = (mask * R**2.0) + ((1 - mask) * ((mask * R) - delta**2.0)) L = gp.sum(L) / obs_count dL = ((2.0 * delta) / obs_count) * ((mask * R) + ((1 - mask) * gp.sign(R))) return {'L': L, 'dL': dL}
def gradient_check(self,X,y,dweights): EPSILON = g.as_garray(1e-4) ERRORTHRESHOLD = g.as_garray(1e-2) g.GNUMPY_CPU_PRECISION = 64 g.acceptable_number_types = "no nans or infs" for ind in range(len(self.weights)): w,b = self.weights[ind] dw,db = dweights[ind] for i in range(len(b)): b[i] = b[i] + EPSILON fw = self.predict_proba(X) op = self.f_score(y,fw) b[i] -= 2*EPSILON fw = self.predict_proba(X) om = self.f_score(y,fw) b[i] += EPSILON rs = (g.as_garray(op) - g.as_garray(om)) / (EPSILON * 2.0) / g.as_garray(len(X)) if g.abs(rs - g.as_garray(db[i])) > ERRORTHRESHOLD: print ind,i,rs,db[i], type(rs), type(db) assert(0) for i in range(w.shape[0]): for j in range(w.shape[1]): w[i,j] += EPSILON fw = self.predict_proba(X) op = self.f_score(y,fw) w[i,j] -= 2*EPSILON fw = self.predict_proba(X) om = self.f_score(y,fw) w[i,j] += EPSILON rs = (g.as_garray(op) - g.as_garray(om)) / (EPSILON * 2.0) / g.as_garray(len(X)) if g.abs(rs - g.as_garray(dw[i,j])) > ERRORTHRESHOLD: print ind,i,j,rs,dw[i,j],type(w) , type(dw) assert(0) print "gradient_check passed"
def not_equalish(self,a,b): dif = gpu.abs(a - b) self.assertFalse(np.all(dif.as_numpy_array().flatten() < 0.00001))
def equalish(self,a,b): print a print b dif = gpu.abs(a - b) print dif self.assertTrue(np.all(dif.as_numpy_array().flatten() < 0.00001))
def abs(x): check_type(x) if is_np(x): return np.abs(x) else: return gp.abs(x)
def train(self, fulldata, num_epochs, eta=0.01, hidden=None, sample=False, early_stop=True, verbose = True): ''' Method to learn the weights of the RBM. args: array fulldata: the training xs int num_epochs: the number of times to run through the training xs float eta: the learning rate, default 0.01 array hidden: optional array specifying the hidden representation to learn (for use in a translational-RBM) bool sample: specifies whether training should use sampling, default False bool early_stop: whether to use early stopping, default True ''' if len(fulldata) == 0: return if type(fulldata) != self.np_array_type or type(fulldata[0]) != self.np_array_type: fulldata = np.array([np.array(r) for r in fulldata]) if hidden is not None: # check that there is a hidden rep for each xs row assert hidden.shape[0] == xs.shape[0] # check that we have the right number of hidden units assert hidden.shape[1] == self.n_hidden # these parameters control momentum changes initial_momentum = 0.5 final_momentum = 0.9 momentum_iter = 5 # when dealing with large arrays, we have to break the xs into # manageable chunks to avoid out of memory mae num_rows = fulldata.shape[0] err_hist = [] # keep track of the errors for early stopping for epoch in range(num_epochs): if epoch <= momentum_iter: momentum = initial_momentum else: momentum = final_momentum mae = [] if verbose: print "Training epoch %d of %d," %(epoch+1, num_epochs), num_batches = num_rows/self.batch_size + 1 xs = gp.garray(fulldata) if hidden is not None: hid_chunk = gp.garray(hidden) for batch in range(num_batches): # positive phase if num_batches == 1: v1 = xs else: v1 = xs[batch*self.batch_size:(batch+1)*self.batch_size] if len(v1) == 0: continue if hidden is None: h1 = self.prop_up(v1) else: if num_batches == 1: h1 = hid_chunk else: h1 = hid_chunk[batch*self.batch_size:(batch+1)*self.batch_size] # negative phase if sample: hSampled = h1.rand() < h1 v2 = self.prop_down(hSampled) else: v2 = self.prop_down(h1) h2 = self.prop_up(v2) # update weights self.wu_vh = self.wu_vh * momentum + gp.dot(v1.T, h1) - gp.dot(v2.T, h2) self.wu_v = self.wu_v * momentum + v1.sum(0) - v2.sum(0) self.wu_h = self.wu_h * momentum + h1.sum(0) - h2.sum(0) self.W += self.wu_vh * (eta/self.batch_size) self.vbias += self.wu_v * (eta/self.batch_size) self.hbias += self.wu_h * (eta/self.batch_size) # calculate reconstruction error error = gp.abs(v2 - v1) #mae.append(error.euclid_norm()**2/(self.n_visible*self.batch_size)) mae.append(gp.mean(error)) err_hist.append(np.mean(mae)) if verbose: print " mean absolute error: "+ str(np.mean(mae)) # early stopping if early_stop: recent_err = np.mean(err_hist[epoch-50:epoch]) early_err = np.mean(err_hist[epoch-200:epoch-150]) if (epoch > 250) and ((recent_err * 1.2) > early_err): break
def not_equalish(self, a, b): dif = gpu.abs(a - b) self.assertFalse(np.all(dif.as_numpy_array().flatten() < 0.00001))
def equalish(self, a, b): print a print b dif = gpu.abs(a - b) print dif self.assertTrue(np.all(dif.as_numpy_array().flatten() < 0.00001))
def display_winc(self): """Display scale of weight updates. This can be used by external applications.""" for i in range(0, self.num_layers): print 'winc%d %.5f,' % (i+1, gnp.abs(self.layer[i].Winc).max()), print 'winc_out %.5f,' % gnp.abs(self.output.Winc).max(),