Esempio n. 1
0
 def fprop(self):
     if self.phase == 'train':
         # Calculate batch mean
         tmp = ca.mean(self.x.out, axis=0, keepdims=True)
         # Center input
         ca.subtract(self.x.out, tmp, self._tmp_batch_centered)
         # Update running mean
         tmp *= 1 - self.momentum
         self.running_mean *= self.momentum
         self.running_mean += tmp
         # Calculate batch variance
         ca.power(self._tmp_batch_centered, 2, self.out)
         ca.mean(self.out, axis=0, keepdims=True,
                 out=self._tmp_batch_inv_std)
         # Calculate 1 / E([x - E(x)]^2)
         self._tmp_batch_inv_std += self.eps
         ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std)
         ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std)
         # Normalize input
         ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std,
                     self.out)
         # Update running std
         self.running_std *= self.momentum
         ca.multiply(self._tmp_batch_inv_std, 1-self.momentum, tmp)
         self.running_std += tmp
     elif self.phase == 'test':
         ca.subtract(self.x.out, self.running_mean, self.out)
         self.out *= self.running_std
     else:
         raise ValueError('Invalid phase: %s' % self.phase)
     if self.affine:
         self.out *= self.gamma.array
         self.out += self.beta.array
Esempio n. 2
0
 def fprop(self):
     if self.phase == 'train':
         # Calculate batch mean
         tmp = ca.mean(self.x.out, axis=0, keepdims=True)
         # Center input
         ca.subtract(self.x.out, tmp, self._tmp_batch_centered)
         # Update running mean
         tmp *= 1 - self.momentum
         self.running_mean *= self.momentum
         self.running_mean += tmp
         # Calculate batch variance
         ca.power(self._tmp_batch_centered, 2, self.out)
         ca.mean(self.out,
                 axis=0,
                 keepdims=True,
                 out=self._tmp_batch_inv_std)
         # Calculate 1 / E([x - E(x)]^2)
         self._tmp_batch_inv_std += self.eps
         ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std)
         ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std)
         # Normalize input
         ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std,
                     self.out)
         # Update running std
         self.running_std *= self.momentum
         ca.multiply(self._tmp_batch_inv_std, 1 - self.momentum, tmp)
         self.running_std += tmp
     elif self.phase == 'test':
         ca.subtract(self.x.out, self.running_mean, self.out)
         self.out *= self.running_std
     else:
         raise ValueError('Invalid phase: %s' % self.phase)
     if self.affine:
         self.out *= self.gamma.array
         self.out += self.beta.array
Esempio n. 3
0
 def step(self, param, mean_square):
     grad = param.grad()
     # mean_square = decay*mean_square + (1 - decay)*grad
     mean_square *= self.decay
     tmp = grad**2
     tmp *= (1 - self.decay)
     mean_square += tmp
     # step = -learn_rate*grad/(sqrt(mean_square) + eps)
     ca.sqrt(mean_square, tmp)
     tmp += self.eps
     ca.divide(grad, tmp, tmp)
     tmp *= -self.learn_rate
     param.step(tmp)
Esempio n. 4
0
 def step(self, param, last_step):
     last_step *= self.decay
     step = param.grad()
     last_step += (1.0 - self.decay) * step**2
     scaling = ca.sqrt(last_step) + self.eps
     step *= -self.learn_rate
     step /= scaling
     param.step(step)
Esempio n. 5
0
 def step(self, param, last_step):
     last_step *= self.decay
     step = param.grad()
     last_step += (1.0 - self.decay) * step ** 2
     scaling = ca.sqrt(last_step) + self.eps
     step *= -self.learn_rate
     step /= scaling
     param.step(step)
Esempio n. 6
0
def normalize(matrix,gpuFlag=False):
    if gpuFlag==True:
        import cudarray as ca
        norm=ca.sqrt(ca.sum(ca.power(matrix,2),1,keepdims=True));
        matrix_n=matrix/norm
    else:
        norm=np.sqrt(np.sum(np.square(matrix),1,keepdims=True));
        matrix_n=matrix/norm
    
    return matrix_n
Esempio n. 7
0
 def step(self):
     for param, rms_grad in zip(self.params, self.steps):
         rms_grad *= self.decay
         step = param.grad()
         if param.penalty is not None:
             step -= param.penalty()
         rms_grad += (1.0 - self.decay) * step**2
         scaling = ca.maximum(ca.sqrt(rms_grad), self.max_scaling_inv)
         step_rate = self.learn_rate * param.learn_rate / self.batch_size
         param.step(step / scaling * (-step_rate))
Esempio n. 8
0
 def step(self):
     for param, rms_grad in zip(self.params, self.steps):
         rms_grad *= self.decay
         step = param.grad()
         if param.penalty is not None:
             step -= param.penalty()
         rms_grad += (1.0 - self.decay) * step**2
         scaling = ca.maximum(ca.sqrt(rms_grad), self.max_scaling_inv)
         step_rate = self.learn_rate * param.learn_rate / self.batch_size
         param.step(step / scaling * (-step_rate))
Esempio n. 9
0
def normalize(matrix, gpuFlag=False):
    if gpuFlag == True:
        import cudarray as ca
        norm = ca.sqrt(ca.sum(ca.power(matrix, 2), 1, keepdims=True))
        matrix_n = matrix / norm
    else:
        norm = np.sqrt(np.sum(np.square(matrix), 1, keepdims=True))
        matrix_n = matrix / norm

    return matrix_n
Esempio n. 10
0
    def fprop(self, x, phase):
        n_channels = x.shape[1]

        # Calculate local mean
        tmp = self.conv_op.fprop(x, self.ca_kernel)
        if n_channels > 1:
            ca.divide(tmp, n_channels, tmp)

        # Center input with local mean
        centered = ca.subtract(x, tmp)

        # Calculate local standard deviation
        tmp = ca.power(centered, 2)
        tmp = self.conv_op.fprop(tmp, self.ca_kernel)
        if n_channels > 1:
            ca.divide(tmp, n_channels, tmp)
        ca.sqrt(tmp, tmp)

        # Scale centered input with standard deviation
        return centered / (tmp + self.eps)
    def fprop(self, x):
        n_channels = x.shape[1]

        # Calculate local mean
        tmp = self.conv_op.fprop(x, self.ca_kernel)
        if n_channels > 1:
            ca.divide(tmp, n_channels, tmp)

        # Center input with local mean
        centered = ca.subtract(x, tmp)

        # Calculate local standard deviation
        tmp = ca.power(centered, 2)
        tmp = self.conv_op.fprop(tmp, self.ca_kernel)
        if n_channels > 1:
            ca.divide(tmp, n_channels, tmp)
        ca.sqrt(tmp, tmp)

        # Scale centered input with standard deviation
        return centered / (tmp + self.eps)
Esempio n. 12
0
 def step(self, param, last_step):
     last_step *= self.decay
     step = param.grad()
     penalty = param.penalty()
     if penalty is not None:
         step -= penalty
     last_step += (1.0 - self.decay) * step**2
     scaling = ca.sqrt(last_step) + self.eps
     step *= -self.learn_rate
     step /= scaling
     param.step(step)
Esempio n. 13
0
 def step(self, param, last_step):
     last_step *= self.decay
     step = param.grad()
     penalty = param.penalty()
     if penalty is not None:
         step -= penalty
     last_step += (1.0 - self.decay) * step**2
     scaling = ca.sqrt(last_step) + self.eps
     step *= -self.learn_rate
     step /= scaling
     param.step(step)
Esempio n. 14
0
    def fprop(self):
        if self.phase == 'train':
            # Calculate batch mean
            tmp = ca.mean(ca.mean(self.x.array, axis=0, keepdims=True),
                          axis=(2, 3), keepdims=True)
            # Center input
            ca.subtract(self.x.array, tmp, self._tmp_batch_centered)
            # Update running mean
            tmp *= 1 - self.momentum
            self.running_mean *= self.momentum
            self.running_mean += tmp
            # Calculate batch variance
            ca.power(self._tmp_batch_centered, 2, self.array)
            ca.mean(ca.mean(self.array, axis=0, keepdims=True), axis=(2, 3),
                    keepdims=True, out=self._tmp_batch_inv_std)
            # Calculate 1 / E([x - E(x)]^2)
            self._tmp_batch_inv_std += self.eps
            ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std)
            ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std)
            # Normalize input
            ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std,
                        self.array)
            # Update running std
            self.running_std *= self.momentum
            ca.multiply(self._tmp_batch_inv_std, 1-self.momentum, tmp)
            self.running_std += tmp

            if self.noise_std > 0.0:
                noise = ca.random.normal(scale=self.noise_std,
                                         size=self.shape)
                ca.add(self.array, noise, self.array)

        elif self.phase == 'test':
            ca.subtract(self.x.array, self.running_mean, self.array)
            self.array *= self.running_std
        else:
            raise ValueError('Invalid phase: %s' % self.phase)
        if self.affine:
            self.array *= self.gamma.array
            self.array += self.beta.array
Esempio n. 15
0
 def step(self, param, state):
     m, v, t = state
     grad = param.grad()
     t += 1
     t = int(t)
     # m = beta1*m + (1 - beta1)*grad
     m *= self.beta1
     tmp = (1 - self.beta1)*grad
     m += tmp
     # v = beta2*v + (1 - beta2)*grad**2
     v *= self.beta2
     ca.power(grad, 2, tmp)
     tmp *= (1 - self.beta2)
     v += tmp
     # alpha = learn_rate*sqrt(1 - beta2**t)/(1 - beta1**t)
     # step = -alpha_t*m/(sqrt(v) + eps)
     alpha = self.learn_rate*np.sqrt(1 - self.beta2**t)/(1 - self.beta1**t)
     ca.sqrt(v, tmp)
     tmp += self.eps
     ca.divide(m, tmp, tmp)
     tmp *= -alpha
     param.step(tmp)
Esempio n. 16
0
 def step(self, param, state):
     m, v, t = state
     grad = param.grad()
     t += 1
     t = int(t)
     beta1_t = self.beta1 * self.lambd ** (t - 1)
     m *= beta1_t
     m += (1 - beta1_t) * grad
     v *= self.beta2
     v += (1 - self.beta2) * grad ** 2
     learn_rate = self.learn_rate * (1 - self.beta2 ** t) ** 0.5 / (1 - self.beta1 ** t)
     step = m / (ca.sqrt(v) + self.eps)
     step *= -learn_rate
     param.step(step)
Esempio n. 17
0
 def step(self, param, state):
     m, v, t = state
     grad = param.grad()
     t += 1
     t = int(t)
     beta1_t = self.beta1 * self.lambd**(t - 1)
     m *= beta1_t
     m += (1 - beta1_t) * grad
     v *= self.beta2
     v += (1 - self.beta2) * grad**2
     learn_rate = (self.learn_rate * (1 - self.beta2**t)**0.5 /
                   (1 - self.beta1**t))
     step = m / (ca.sqrt(v) + self.eps)
     step *= -learn_rate
     param.step(step)
Esempio n. 18
0
def matrix_factorization(R, P, Q, mask, steps=200000000, alpha=0.00005, beta=0.02):
    Q = ca.transpose(Q)
    for step in xrange(steps):
 	E = ca.subtract(R, ca.multiply(ca.dot(P,Q), mask))

	rmse = ca.sqrt(ca.sum(ca.power(E,2)) / ca.sum(mask))
	rmse = np.array(rmse)[0]

 	print 'step: %i RMSE: %f' % (step, rmse)
        if rmse < 0.65:
            break
	P = ca.add(ca.multiply(P,(1-alpha*beta)),ca.multiply(ca.dot(E,ca.transpose(Q)), 2*alpha))
	Q = ca.add(ca.multiply(Q,(1-alpha*beta)),ca.multiply(ca.dot(ca.transpose(P),E),2*alpha))

    return P, Q
Esempio n. 19
0
 def step(self, param, state):
     m, v, t = state
     grad = param.grad()
     penalty = param.penalty()
     if penalty is not None:
         grad -= penalty
     t += 1
     t = int(t)
     beta1_t = self.beta1 * self.lambd**(t - 1)
     m *= beta1_t
     m += (1 - beta1_t) * grad
     v *= self.beta2
     v += (1 - self.beta2) * grad**2
     learn_rate = (self.learn_rate * (1 - self.beta2**t)**0.5 /
                   (1 - self.beta1**t))
     step = m / (ca.sqrt(v) + self.eps)
     step *= -learn_rate
     param.step(step)
Esempio n. 20
0
def matrix_factorization(R,
                         P,
                         Q,
                         mask,
                         steps=200000000,
                         alpha=0.00005,
                         beta=0.02):
    Q = ca.transpose(Q)
    for step in xrange(steps):
        E = ca.subtract(R, ca.multiply(ca.dot(P, Q), mask))

        rmse = ca.sqrt(ca.sum(ca.power(E, 2)) / ca.sum(mask))
        rmse = np.array(rmse)[0]

        print 'step: %i RMSE: %f' % (step, rmse)
        if rmse < 0.65:
            break
        P = ca.add(ca.multiply(P, (1 - alpha * beta)),
                   ca.multiply(ca.dot(E, ca.transpose(Q)), 2 * alpha))
        Q = ca.add(ca.multiply(Q, (1 - alpha * beta)),
                   ca.multiply(ca.dot(ca.transpose(P), E), 2 * alpha))

    return P, Q