def relu_hard(x, computeGrad = False): if (not computeGrad): f = (1/2.)*(x+gp.sign(x)*x) return f g = gp.sign(x) return g
def relu_hard(x, computeGrad=False): if (not computeGrad): f = (1 / 2.) * (x + gp.sign(x) * x) return f g = gp.sign(x) return g
def relu(x, computeGrad = False): negslope = .01 a = (1+negslope)/2.; b = (1-negslope)/2. if (not computeGrad): f = a*x + b*gp.sign(x)*x return f g = a + b*gp.sign(x) return g
def costAndGrad(self, data, labels): # forward prop self.hActs[0] = data i = 1 for w, b in self.stack: self.hActs[i] = w.dot(self.hActs[i - 1]) + b if i <= len(self.layerSizes): self.hActs[i] = (1 / 2.) * ( self.hActs[i] + gp.sign(self.hActs[i]) * self.hActs[i]) i += 1 probs = self.hActs[-1] + gp.min(self.hActs[-1], axis=0) probs = gp.exp(probs) probs = probs / gp.sum(probs, axis=0) labelMat = np.zeros(probs.shape) labelMat[labels, range(self.mbSize)] = 1 labelMat = gp.garray(labelMat) cost = -(1. / self.mbSize) * gp.sum(labelMat * gp.log(probs)) if not self.train: return cost, None # back prop self.deltas[-1] = probs - labelMat i = len(self.layerSizes) - 1 for w, b in reversed(self.stack[1:]): self.deltas[i] = w.T.dot(self.deltas[i + 1]) * gp.sign( self.hActs[i + 1]) i -= 1 # compute gradients for i in range(len(self.grad)): self.grad[i][0] = (1. / self.mbSize) * self.deltas[i].dot( self.hActs[i].T) self.grad[i][1] = (1. / self.mbSize) * gp.sum( self.deltas[i], axis=1).reshape(-1, 1) return cost, self.grad
def loss_hsq(Yh, Y, delta=0.5): """Compute Huberized least-squares loss for Yh w.r.t. Y. Values in Yh should probably be network outputs, and each row in Y must give the real-valued target outputs for each observation. Vector-valued target outputs are handled just fine. """ obs_count = float(Y.shape[0]) R = Yh - Y mask =(gp.abs(R) < delta) L = (mask * R**2.0) + ((1 - mask) * ((mask * R) - delta**2.0)) L = gp.sum(L) / obs_count dL = ((2.0*delta) / obs_count) * ((mask * R) + ((1 - mask) * gp.sign(R))) return {'L': L, 'dL': dL}
def loss_hsq(Yh, Y, delta=0.5): """Compute Huberized least-squares loss for Yh w.r.t. Y. Values in Yh should probably be network outputs, and each row in Y must give the real-valued target outputs for each observation. Vector-valued target outputs are handled just fine. """ obs_count = float(Y.shape[0]) R = Yh - Y mask = (gp.abs(R) < delta) L = (mask * R**2.0) + ((1 - mask) * ((mask * R) - delta**2.0)) L = gp.sum(L) / obs_count dL = ((2.0 * delta) / obs_count) * ((mask * R) + ((1 - mask) * gp.sign(R))) return {'L': L, 'dL': dL}
def costAndGrad(self,data,labels): # forward prop self.hActs[0] = data i = 1 for w,b in self.stack: self.hActs[i] = w.dot(self.hActs[i-1])+b if i <= len(self.layerSizes): self.hActs[i] = (1/2.)*(self.hActs[i]+gp.sign(self.hActs[i])*self.hActs[i]) i += 1 probs = self.hActs[-1]+gp.min(self.hActs[-1],axis=0) probs = gp.exp(probs) probs = probs/gp.sum(probs,axis=0) labelMat = np.zeros(probs.shape) labelMat[labels,range(self.mbSize)] = 1 labelMat = gp.garray(labelMat) cost = -(1./self.mbSize)*gp.sum(labelMat*gp.log(probs)) if not self.train: return cost,None # back prop self.deltas[-1] = probs-labelMat i = len(self.layerSizes)-1 for w,b in reversed(self.stack[1:]): self.deltas[i] = w.T.dot(self.deltas[i+1])*gp.sign(self.hActs[i+1]) i -= 1 # compute gradients for i in range(len(self.grad)): self.grad[i][0] = (1./self.mbSize)*self.deltas[i].dot(self.hActs[i].T) self.grad[i][1] = (1./self.mbSize)*gp.sum(self.deltas[i],axis=1).reshape(-1,1) return cost,self.grad
def sign(x): """Returns an element-wise indication of the sign of a number.""" if not isinstance(x, np.ndarray): return gp.sign(x) else: return np.sign(x)
if (opts['do_validate'] == 1): if not (opts.has_key('Xv') and opts.has_key('Yv')): raise Exception('Validation requires validation set.') opts['momentum'] = min(1, max(opts['momentum'], 0)) return opts ############################################################### # Basic testing, to see the functions aren't _totally_ broken # ############################################################### if __name__ == '__main__': from time import clock obs_count = 1000 class_count = 100 Y = gp.sign(gp.randn((obs_count, class_count))) Yh = gp.randn((obs_count, class_count)) # Check that loss functions won't crash t1 = clock() print "Computing all losses 10 times:", for i in range(10): loss_info = loss_mclr(Yh, Y) loss_info = loss_mcl2h(Yh, Y) loss_info = loss_lsq(Yh, Y) loss_info = loss_hsq(Yh, Y) print ".", print " " t2 = clock() print "Total time: " + str(t2 - t1) # Check that class representation converters won't crash obs_count = 20
def sign(x): check_type(x) if is_np(x): return np.sign(x) else: return gp.sign(x)
def sign(x): return (gnp.sign(x - 0.5) + 1) / 2
def sign(x): return gp.sign(x)
def abs(x): return gp.sign(x) * x