def fprop(self): if self.phase == 'train': # Calculate batch mean tmp = ca.mean(self.x.out, axis=0, keepdims=True) # Center input ca.subtract(self.x.out, tmp, self._tmp_batch_centered) # Update running mean tmp *= 1 - self.momentum self.running_mean *= self.momentum self.running_mean += tmp # Calculate batch variance ca.power(self._tmp_batch_centered, 2, self.out) ca.mean(self.out, axis=0, keepdims=True, out=self._tmp_batch_inv_std) # Calculate 1 / E([x - E(x)]^2) self._tmp_batch_inv_std += self.eps ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std) ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std) # Normalize input ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std, self.out) # Update running std self.running_std *= self.momentum ca.multiply(self._tmp_batch_inv_std, 1-self.momentum, tmp) self.running_std += tmp elif self.phase == 'test': ca.subtract(self.x.out, self.running_mean, self.out) self.out *= self.running_std else: raise ValueError('Invalid phase: %s' % self.phase) if self.affine: self.out *= self.gamma.array self.out += self.beta.array
def fprop(self): if self.phase == 'train': # Calculate batch mean tmp = ca.mean(self.x.out, axis=0, keepdims=True) # Center input ca.subtract(self.x.out, tmp, self._tmp_batch_centered) # Update running mean tmp *= 1 - self.momentum self.running_mean *= self.momentum self.running_mean += tmp # Calculate batch variance ca.power(self._tmp_batch_centered, 2, self.out) ca.mean(self.out, axis=0, keepdims=True, out=self._tmp_batch_inv_std) # Calculate 1 / E([x - E(x)]^2) self._tmp_batch_inv_std += self.eps ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std) ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std) # Normalize input ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std, self.out) # Update running std self.running_std *= self.momentum ca.multiply(self._tmp_batch_inv_std, 1 - self.momentum, tmp) self.running_std += tmp elif self.phase == 'test': ca.subtract(self.x.out, self.running_mean, self.out) self.out *= self.running_std else: raise ValueError('Invalid phase: %s' % self.phase) if self.affine: self.out *= self.gamma.array self.out += self.beta.array
def step(self, param, mean_square): grad = param.grad() # mean_square = decay*mean_square + (1 - decay)*grad mean_square *= self.decay tmp = grad**2 tmp *= (1 - self.decay) mean_square += tmp # step = -learn_rate*grad/(sqrt(mean_square) + eps) ca.sqrt(mean_square, tmp) tmp += self.eps ca.divide(grad, tmp, tmp) tmp *= -self.learn_rate param.step(tmp)
def step(self, param, last_step): last_step *= self.decay step = param.grad() last_step += (1.0 - self.decay) * step**2 scaling = ca.sqrt(last_step) + self.eps step *= -self.learn_rate step /= scaling param.step(step)
def step(self, param, last_step): last_step *= self.decay step = param.grad() last_step += (1.0 - self.decay) * step ** 2 scaling = ca.sqrt(last_step) + self.eps step *= -self.learn_rate step /= scaling param.step(step)
def normalize(matrix,gpuFlag=False): if gpuFlag==True: import cudarray as ca norm=ca.sqrt(ca.sum(ca.power(matrix,2),1,keepdims=True)); matrix_n=matrix/norm else: norm=np.sqrt(np.sum(np.square(matrix),1,keepdims=True)); matrix_n=matrix/norm return matrix_n
def step(self): for param, rms_grad in zip(self.params, self.steps): rms_grad *= self.decay step = param.grad() if param.penalty is not None: step -= param.penalty() rms_grad += (1.0 - self.decay) * step**2 scaling = ca.maximum(ca.sqrt(rms_grad), self.max_scaling_inv) step_rate = self.learn_rate * param.learn_rate / self.batch_size param.step(step / scaling * (-step_rate))
def normalize(matrix, gpuFlag=False): if gpuFlag == True: import cudarray as ca norm = ca.sqrt(ca.sum(ca.power(matrix, 2), 1, keepdims=True)) matrix_n = matrix / norm else: norm = np.sqrt(np.sum(np.square(matrix), 1, keepdims=True)) matrix_n = matrix / norm return matrix_n
def fprop(self, x, phase): n_channels = x.shape[1] # Calculate local mean tmp = self.conv_op.fprop(x, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) # Center input with local mean centered = ca.subtract(x, tmp) # Calculate local standard deviation tmp = ca.power(centered, 2) tmp = self.conv_op.fprop(tmp, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) ca.sqrt(tmp, tmp) # Scale centered input with standard deviation return centered / (tmp + self.eps)
def fprop(self, x): n_channels = x.shape[1] # Calculate local mean tmp = self.conv_op.fprop(x, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) # Center input with local mean centered = ca.subtract(x, tmp) # Calculate local standard deviation tmp = ca.power(centered, 2) tmp = self.conv_op.fprop(tmp, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) ca.sqrt(tmp, tmp) # Scale centered input with standard deviation return centered / (tmp + self.eps)
def step(self, param, last_step): last_step *= self.decay step = param.grad() penalty = param.penalty() if penalty is not None: step -= penalty last_step += (1.0 - self.decay) * step**2 scaling = ca.sqrt(last_step) + self.eps step *= -self.learn_rate step /= scaling param.step(step)
def fprop(self): if self.phase == 'train': # Calculate batch mean tmp = ca.mean(ca.mean(self.x.array, axis=0, keepdims=True), axis=(2, 3), keepdims=True) # Center input ca.subtract(self.x.array, tmp, self._tmp_batch_centered) # Update running mean tmp *= 1 - self.momentum self.running_mean *= self.momentum self.running_mean += tmp # Calculate batch variance ca.power(self._tmp_batch_centered, 2, self.array) ca.mean(ca.mean(self.array, axis=0, keepdims=True), axis=(2, 3), keepdims=True, out=self._tmp_batch_inv_std) # Calculate 1 / E([x - E(x)]^2) self._tmp_batch_inv_std += self.eps ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std) ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std) # Normalize input ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std, self.array) # Update running std self.running_std *= self.momentum ca.multiply(self._tmp_batch_inv_std, 1-self.momentum, tmp) self.running_std += tmp if self.noise_std > 0.0: noise = ca.random.normal(scale=self.noise_std, size=self.shape) ca.add(self.array, noise, self.array) elif self.phase == 'test': ca.subtract(self.x.array, self.running_mean, self.array) self.array *= self.running_std else: raise ValueError('Invalid phase: %s' % self.phase) if self.affine: self.array *= self.gamma.array self.array += self.beta.array
def step(self, param, state): m, v, t = state grad = param.grad() t += 1 t = int(t) # m = beta1*m + (1 - beta1)*grad m *= self.beta1 tmp = (1 - self.beta1)*grad m += tmp # v = beta2*v + (1 - beta2)*grad**2 v *= self.beta2 ca.power(grad, 2, tmp) tmp *= (1 - self.beta2) v += tmp # alpha = learn_rate*sqrt(1 - beta2**t)/(1 - beta1**t) # step = -alpha_t*m/(sqrt(v) + eps) alpha = self.learn_rate*np.sqrt(1 - self.beta2**t)/(1 - self.beta1**t) ca.sqrt(v, tmp) tmp += self.eps ca.divide(m, tmp, tmp) tmp *= -alpha param.step(tmp)
def step(self, param, state): m, v, t = state grad = param.grad() t += 1 t = int(t) beta1_t = self.beta1 * self.lambd ** (t - 1) m *= beta1_t m += (1 - beta1_t) * grad v *= self.beta2 v += (1 - self.beta2) * grad ** 2 learn_rate = self.learn_rate * (1 - self.beta2 ** t) ** 0.5 / (1 - self.beta1 ** t) step = m / (ca.sqrt(v) + self.eps) step *= -learn_rate param.step(step)
def step(self, param, state): m, v, t = state grad = param.grad() t += 1 t = int(t) beta1_t = self.beta1 * self.lambd**(t - 1) m *= beta1_t m += (1 - beta1_t) * grad v *= self.beta2 v += (1 - self.beta2) * grad**2 learn_rate = (self.learn_rate * (1 - self.beta2**t)**0.5 / (1 - self.beta1**t)) step = m / (ca.sqrt(v) + self.eps) step *= -learn_rate param.step(step)
def matrix_factorization(R, P, Q, mask, steps=200000000, alpha=0.00005, beta=0.02): Q = ca.transpose(Q) for step in xrange(steps): E = ca.subtract(R, ca.multiply(ca.dot(P,Q), mask)) rmse = ca.sqrt(ca.sum(ca.power(E,2)) / ca.sum(mask)) rmse = np.array(rmse)[0] print 'step: %i RMSE: %f' % (step, rmse) if rmse < 0.65: break P = ca.add(ca.multiply(P,(1-alpha*beta)),ca.multiply(ca.dot(E,ca.transpose(Q)), 2*alpha)) Q = ca.add(ca.multiply(Q,(1-alpha*beta)),ca.multiply(ca.dot(ca.transpose(P),E),2*alpha)) return P, Q
def step(self, param, state): m, v, t = state grad = param.grad() penalty = param.penalty() if penalty is not None: grad -= penalty t += 1 t = int(t) beta1_t = self.beta1 * self.lambd**(t - 1) m *= beta1_t m += (1 - beta1_t) * grad v *= self.beta2 v += (1 - self.beta2) * grad**2 learn_rate = (self.learn_rate * (1 - self.beta2**t)**0.5 / (1 - self.beta1**t)) step = m / (ca.sqrt(v) + self.eps) step *= -learn_rate param.step(step)
def matrix_factorization(R, P, Q, mask, steps=200000000, alpha=0.00005, beta=0.02): Q = ca.transpose(Q) for step in xrange(steps): E = ca.subtract(R, ca.multiply(ca.dot(P, Q), mask)) rmse = ca.sqrt(ca.sum(ca.power(E, 2)) / ca.sum(mask)) rmse = np.array(rmse)[0] print 'step: %i RMSE: %f' % (step, rmse) if rmse < 0.65: break P = ca.add(ca.multiply(P, (1 - alpha * beta)), ca.multiply(ca.dot(E, ca.transpose(Q)), 2 * alpha)) Q = ca.add(ca.multiply(Q, (1 - alpha * beta)), ca.multiply(ca.dot(ca.transpose(P), E), 2 * alpha)) return P, Q