def update(wts, grad, incr, epsilon, momentum, decay, batch_size): #assert weight.incr.get().mean() < 1 #a, b, c, = weight.incr.get().mean(), weight.wt.get().mean(), (grad.get() * weight.epsilon / batch_size).mean() #util.log_info('%s %s %s %s %s', weight.name, a, b, c, grad.get().mean()) Assert.eq(grad.shape, wts.shape) assert (grad.dtype == np.float32) assert (wts.dtype == np.float32) assert (incr.dtype == np.float32) if momentum > 0.0: matrix_add(incr, grad, alpha=momentum, beta=np.float32(epsilon / batch_size)) matrix_add(incr, wts, alpha=1, beta=np.float32(-decay * epsilon)) matrix_add(wts, incr) else: matrix_add(wts, grad, alpha=1, beta=np.float32(epsilon / batch_size))
def update(wts, grad, incr, epsilon, momentum, decay, batch_size): #assert weight.incr.get().mean() < 1 #a, b, c, = weight.incr.get().mean(), weight.wt.get().mean(), (grad.get() * weight.epsilon / batch_size).mean() #util.log_info('%s %s %s %s %s', weight.name, a, b, c, grad.get().mean()) Assert.eq(grad.shape, wts.shape) assert(grad.dtype == np.float32) assert(wts.dtype == np.float32) assert(incr.dtype == np.float32) if momentum > 0.0: matrix_add(incr, grad, alpha=momentum, beta=np.float32(epsilon / batch_size)) matrix_add(incr, wts, alpha=1, beta=np.float32(-decay * epsilon)) matrix_add(wts, incr) else: matrix_add(wts, grad, alpha=1, beta=np.float32(epsilon / batch_size))
def wt(self): if self._wt is not None: Assert.eq(self._wt.shape, self.shape) return self._wt
def set_weight(self, w): if self.shape is None: self.shape = w.shape Assert.eq(w.shape, self.shape) self._wt = to_gpu(w)