def fprop(self):
     if self.phase == 'train':
         # Calculate batch mean
         tmp = ca.mean(self.x.out, axis=0, keepdims=True)
         # Center input
         ca.subtract(self.x.out, tmp, self._tmp_batch_centered)
         # Update running mean
         tmp *= 1 - self.momentum
         self.running_mean *= self.momentum
         self.running_mean += tmp
         # Calculate batch variance
         ca.power(self._tmp_batch_centered, 2, self.out)
         ca.mean(self.out, axis=0, keepdims=True,
                 out=self._tmp_batch_inv_std)
         # Calculate 1 / E([x - E(x)]^2)
         self._tmp_batch_inv_std += self.eps
         ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std)
         ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std)
         # Normalize input
         ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std,
                     self.out)
         # Update running std
         self.running_std *= self.momentum
         ca.multiply(self._tmp_batch_inv_std, 1-self.momentum, tmp)
         self.running_std += tmp
     elif self.phase == 'test':
         ca.subtract(self.x.out, self.running_mean, self.out)
         self.out *= self.running_std
     else:
         raise ValueError('Invalid phase: %s' % self.phase)
     if self.affine:
         self.out *= self.gamma.array
         self.out += self.beta.array
Beispiel #2
0
 def monitor(self):
     if not self._monitor:
         return
     val_mean_abs = np.array(ca.mean(ca.fabs(self._array)))
     grad_mean_abs = np.array(ca.mean(ca.fabs(self._tmp_grad_array)))
     step_mean_abs = np.array(ca.mean(ca.fabs(self._tmp_last_step)))
     logger.info("%s:\t%.1e  [%.1e, %.1e]" % (self.name, val_mean_abs, grad_mean_abs, step_mean_abs))
Beispiel #3
0
    def bprop(self):
        ca.multiply(self._tmp_batch_centered, self.out_grad, self.x.out_grad)
        tmp = ca.mean(self.x.out_grad, axis=0, keepdims=True)
        ca.multiply(self._tmp_batch_centered, tmp, self.x.out_grad)
        self.x.out_grad *= -1
        self.x.out_grad *= self._tmp_batch_inv_std
        self.x.out_grad *= self._tmp_batch_inv_std

        ca.mean(self.out_grad, axis=0, keepdims=True, out=tmp)
        self.x.out_grad += self.out_grad
        self.x.out_grad -= tmp
        self.x.out_grad *= self._tmp_batch_inv_std

        if self.affine:
            self.x.out_grad *= self.gamma.array
            # Normalized input
            self._tmp_batch_centered *= self._tmp_batch_inv_std
            self._tmp_batch_centered *= self.out_grad
            ca.sum(self._tmp_batch_centered,
                   axis=0,
                   keepdims=True,
                   out=self.gamma.grad_array)
            ca.sum(self.out_grad,
                   axis=0,
                   keepdims=True,
                   out=self.beta.grad_array)
    def bprop(self):
        ca.multiply(self._tmp_batch_centered, self.out_grad, self.x.out_grad)
        tmp = ca.mean(ca.mean(self.x.out_grad, axis=0, keepdims=True),
                      axis=(2, 3), keepdims=True)
        ca.multiply(self._tmp_batch_centered, tmp, self.x.out_grad)
        self.x.out_grad *= -1
        self.x.out_grad *= self._tmp_batch_inv_std
        self.x.out_grad *= self._tmp_batch_inv_std

        tmp = ca.mean(ca.mean(self.out_grad, axis=0, keepdims=True),
                      axis=(2, 3), keepdims=True)
        self.x.out_grad += self.out_grad
        self.x.out_grad -= tmp
        self.x.out_grad *= self._tmp_batch_inv_std

        if self.affine:
            self.x.out_grad *= self.gamma.array
            # Normalized input
            self._tmp_batch_centered *= self._tmp_batch_inv_std
            self._tmp_batch_centered *= self.out_grad
            ca.sum(ca.sum(self._tmp_batch_centered, axis=(2, 3),
                          keepdims=True), axis=0, keepdims=True,
                   out=self.gamma.grad_array)
            ca.sum(ca.sum(self.out_grad, axis=(2, 3), keepdims=True), axis=0,
                   keepdims=True, out=self.beta.grad_array)
Beispiel #5
0
 def fprop(self):
     if self.phase == 'train':
         # Calculate batch mean
         tmp = ca.mean(self.x.out, axis=0, keepdims=True)
         # Center input
         ca.subtract(self.x.out, tmp, self._tmp_batch_centered)
         # Update running mean
         tmp *= 1 - self.momentum
         self.running_mean *= self.momentum
         self.running_mean += tmp
         # Calculate batch variance
         ca.power(self._tmp_batch_centered, 2, self.out)
         ca.mean(self.out,
                 axis=0,
                 keepdims=True,
                 out=self._tmp_batch_inv_std)
         # Calculate 1 / E([x - E(x)]^2)
         self._tmp_batch_inv_std += self.eps
         ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std)
         ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std)
         # Normalize input
         ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std,
                     self.out)
         # Update running std
         self.running_std *= self.momentum
         ca.multiply(self._tmp_batch_inv_std, 1 - self.momentum, tmp)
         self.running_std += tmp
     elif self.phase == 'test':
         ca.subtract(self.x.out, self.running_mean, self.out)
         self.out *= self.running_std
     else:
         raise ValueError('Invalid phase: %s' % self.phase)
     if self.affine:
         self.out *= self.gamma.array
         self.out += self.beta.array
Beispiel #6
0
    def bprop(self):
        ca.multiply(self._tmp_batch_centered, self.grad_array,
                    self.x.grad_array)
        tmp = ca.mean(ca.mean(self.x.grad_array, axis=0, keepdims=True),
                      axis=(2, 3), keepdims=True)
        ca.multiply(self._tmp_batch_centered, tmp, self.x.grad_array)
        self.x.grad_array *= -1
        self.x.grad_array *= self._tmp_batch_inv_std
        self.x.grad_array *= self._tmp_batch_inv_std

        tmp = ca.mean(ca.mean(self.grad_array, axis=0, keepdims=True),
                      axis=(2, 3), keepdims=True)
        self.x.grad_array += self.grad_array
        self.x.grad_array -= tmp
        self.x.grad_array *= self._tmp_batch_inv_std

        if self.affine:
            self.x.grad_array *= self.gamma.array
            # Normalized input
            self._tmp_batch_centered *= self._tmp_batch_inv_std
            self._tmp_batch_centered *= self.grad_array
            ca.sum(ca.sum(self._tmp_batch_centered, axis=(2, 3),
                          keepdims=True), axis=0, keepdims=True,
                   out=self.gamma.grad_array)
            ca.sum(ca.sum(self.grad_array, axis=(2, 3), keepdims=True), axis=0,
                   keepdims=True, out=self.beta.grad_array)
Beispiel #7
0
 def monitor(self):
     if not self._monitor:
         return
     val_mean_abs = np.array(ca.mean(ca.fabs(self._array)))
     grad_mean_abs = np.array(ca.mean(ca.fabs(self._tmp_grad_array)))
     step_mean_abs = np.array(ca.mean(ca.fabs(self._tmp_step)))
     log.info('%s:\t%.1e  [%.1e, %.1e]', self.name, val_mean_abs,
              grad_mean_abs, step_mean_abs)
Beispiel #8
0
 def monitor(self):
     for param, step in zip(self.params, self.steps):
         if param.monitor:
             val_mean_abs = np.array(ca.mean(ca.fabs(param.values)))
             grad_mean_abs = np.array(ca.mean(ca.fabs(param.grad())))
             step_mean_abs = np.array(ca.mean(ca.fabs(step)))
             logger.info('%s:\t%.1e  [%.1e, %.1e]'
                         % (param.name, val_mean_abs, grad_mean_abs,
                            step_mean_abs))
Beispiel #9
0
 def monitor(self):
     for param, step in zip(self.params, self.steps):
         if param.monitor:
             val_mean_abs = np.array(ca.mean(ca.fabs(param.values)))
             grad_mean_abs = np.array(ca.mean(ca.fabs(param.grad())))
             step_mean_abs = np.array(ca.mean(ca.fabs(step)))
             logger.info(
                 '%s:\t%.1e  [%.1e, %.1e]' %
                 (param.name, val_mean_abs, grad_mean_abs, step_mean_abs))
Beispiel #10
0
 def train_epoch(self):
     batch_losses = []
     for batch in self.feed.batches():
         loss = np.array(ca.mean(self.model.update(*batch)))
         for param, state in zip(self.params, self.learn_rule_states):
             self.learn_rule.step(param, state)
         batch_losses.append(loss)
     epoch_loss = np.mean(batch_losses)
     return epoch_loss
 def train_epoch(self):
     batch_losses = []
     for batch in self.feed.batches():
         loss = np.array(ca.mean(self.model.update(*batch)))
         for param, state in zip(self.params, self.learn_rule_states):
             self.learn_rule.step(param, state)
         batch_losses.append(loss)
     epoch_loss = np.mean(batch_losses)
     return epoch_loss
Beispiel #12
0
    def fprop(self):
        if self.phase == 'train':
            # Calculate batch mean
            tmp = ca.mean(ca.mean(self.x.array, axis=0, keepdims=True),
                          axis=(2, 3), keepdims=True)
            # Center input
            ca.subtract(self.x.array, tmp, self._tmp_batch_centered)
            # Update running mean
            tmp *= 1 - self.momentum
            self.running_mean *= self.momentum
            self.running_mean += tmp
            # Calculate batch variance
            ca.power(self._tmp_batch_centered, 2, self.array)
            ca.mean(ca.mean(self.array, axis=0, keepdims=True), axis=(2, 3),
                    keepdims=True, out=self._tmp_batch_inv_std)
            # Calculate 1 / E([x - E(x)]^2)
            self._tmp_batch_inv_std += self.eps
            ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std)
            ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std)
            # Normalize input
            ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std,
                        self.array)
            # Update running std
            self.running_std *= self.momentum
            ca.multiply(self._tmp_batch_inv_std, 1-self.momentum, tmp)
            self.running_std += tmp

            if self.noise_std > 0.0:
                noise = ca.random.normal(scale=self.noise_std,
                                         size=self.shape)
                ca.add(self.array, noise, self.array)

        elif self.phase == 'test':
            ca.subtract(self.x.array, self.running_mean, self.array)
            self.array *= self.running_std
        else:
            raise ValueError('Invalid phase: %s' % self.phase)
        if self.affine:
            self.array *= self.gamma.array
            self.array += self.beta.array
    def bprop(self):
        ca.multiply(self._tmp_batch_centered, self.grad_array,
                    self.x.grad_array)
        tmp = ca.mean(self.x.grad_array, axis=0, keepdims=True)
        ca.multiply(self._tmp_batch_centered, tmp, self.x.grad_array)
        self.x.grad_array *= -1
        self.x.grad_array *= self._tmp_batch_inv_std
        self.x.grad_array *= self._tmp_batch_inv_std

        ca.mean(self.grad_array, axis=0, keepdims=True, out=tmp)
        self.x.grad_array += self.grad_array
        self.x.grad_array -= tmp
        self.x.grad_array *= self._tmp_batch_inv_std

        if self.affine:
            self.x.grad_array *= self.gamma.array
            # Normalized input
            self._tmp_batch_centered *= self._tmp_batch_inv_std
            self._tmp_batch_centered *= self.grad_array
            ca.sum(self._tmp_batch_centered, axis=0, keepdims=True,
                   out=self.gamma.grad_array)
            ca.sum(self.grad_array, axis=0, keepdims=True,
                   out=self.beta.grad_array)
Beispiel #14
0
def test_reduce():
    a_np = np.random.normal(size=(1024, ))
    a_ca = ca.array(a_np)
    c_np = np.sum(a_np)
    c_ca = ca.sum(a_ca)
    print(np.allclose(c_np, np.array(c_ca)))
    c_np = np.mean(a_np)
    c_ca = ca.mean(a_ca)
    print(np.allclose(c_np, np.array(c_ca)))

    a_np = np.random.normal(size=(5, 5))
    a_ca = ca.array(a_np)
    c_np = np.sum(a_np)
    c_ca = ca.sum(a_ca)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=0)
    c_ca = ca.sum(a_ca, axis=0)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=1)
    c_ca = ca.sum(a_ca, axis=1)
    print(np.allclose(c_np, np.array(c_ca)))

    a_np = np.random.normal(size=(5, 7, 11))
    a_ca = ca.array(a_np)
    c_np = np.sum(a_np, axis=0)
    c_ca = ca.sum(a_ca, axis=0)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=2)
    c_ca = ca.sum(a_ca, axis=2)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=(0, 1))
    c_ca = ca.sum(a_ca, axis=(0, 1))
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=(1, 2))
    c_ca = ca.sum(a_ca, axis=(1, 2))
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.argmin(a_np, axis=0)
    c_ca = ca.argmin(a_ca, axis=0)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.argmin(a_np, axis=2)
    c_ca = ca.argmin(a_ca, axis=2)
    print(np.allclose(c_np, np.array(c_ca)))
Beispiel #15
0
def test_reduce():
    a_np = np.random.normal(size=(1024,))
    a_ca = ca.array(a_np)
    c_np = np.sum(a_np)
    c_ca = ca.sum(a_ca)
    print(np.allclose(c_np, np.array(c_ca)))
    c_np = np.mean(a_np)
    c_ca = ca.mean(a_ca)
    print(np.allclose(c_np, np.array(c_ca)))

    a_np = np.random.normal(size=(5, 5))
    a_ca = ca.array(a_np)
    c_np = np.sum(a_np)
    c_ca = ca.sum(a_ca)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=0)
    c_ca = ca.sum(a_ca, axis=0)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=1)
    c_ca = ca.sum(a_ca, axis=1)
    print(np.allclose(c_np, np.array(c_ca)))

    a_np = np.random.normal(size=(5, 7, 11))
    a_ca = ca.array(a_np)
    c_np = np.sum(a_np, axis=0)
    c_ca = ca.sum(a_ca, axis=0)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=2)
    c_ca = ca.sum(a_ca, axis=2)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=(0, 1))
    c_ca = ca.sum(a_ca, axis=(0, 1))
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.sum(a_np, axis=(1, 2))
    c_ca = ca.sum(a_ca, axis=(1, 2))
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.argmin(a_np, axis=0)
    c_ca = ca.argmin(a_ca, axis=0)
    print(np.allclose(c_np, np.array(c_ca)))

    c_np = np.argmin(a_np, axis=2)
    c_ca = ca.argmin(a_ca, axis=2)
    print(np.allclose(c_np, np.array(c_ca)))
Beispiel #16
0
    def train(self, model, input, error_fun=None):
        input = Input.from_any(input)
        model.setup(**input.shapes)
        params = [p for p in model.params
                  if not isinstance(p, SharedParameter)]
        self.learn_rule.learn_rate /= input.batch_size
        learn_rule_states = [self.learn_rule.init_state(p) for p in params]
        n_params = np.sum([p.array.size for p in params])
        log.info('SGD: Model contains %i parameters.', n_params)
        log.info('SGD: %d gradient updates per epoch.', input.epoch_size)

        epoch = 0
        converged = False
        patience = self.min_epochs
        best_score = np.inf
        start_time = time.clock()
        while epoch < self.max_epochs and not converged:
            epoch += 1

            batch_losses = []
            for batch in input.batches():
                loss = np.array(ca.mean(model.update(**batch)))
                batch_losses.append(loss)
                # Update gradient
                for param, state in zip(params, learn_rule_states):
                    self.learn_rule.step(param, state)

            epoch_loss = np.mean(batch_losses)
            if error_fun is not None:
                error = error_fun()
                if error < best_score:
                    improvement = error / best_score
                    if improvement < self.improvement_thresh:
                        # increase patience on significant improvement
                        patience = max(patience, epoch*self.patience_incr)
                    best_score = error
                log.info('epoch %d/%d, loss %f, error %.4f', epoch,
                         patience, epoch_loss, error)
                for param in params:
                    param.monitor()
                if patience <= epoch:
                    log.info('SGD: Converged on validation set.')
                    converged = True
            else:
                if epoch_loss < best_score:
                    improvement = epoch_loss / best_score
                    if improvement < self.improvement_thresh:
                        # increase patience on significant improvement
                        patience = max(patience, epoch*self.patience_incr)
                    best_score = epoch_loss
                log.info('epoch %d/%d, loss %f', epoch, patience, epoch_loss)
                for param in params:
                    param.monitor()
                if patience <= epoch:
                    log.info('SGD: Converged on training set.')
                    converged = True

        end_time = time.clock()
        if not converged:
            log.info('SGD: Stopped by max_epochs.')
        duration = float(end_time - start_time)
        log.info('SGD: Optimization ran for %.2f minutes (%d epochs, '
                 '%.1f s/epoch)', duration/60, epoch, duration/epoch)
Beispiel #17
0
 def fprop(self):
     ca.mean(self.x.out, axis=self.axis, out=self.out,
             keepdims=self.keepdims)
Beispiel #18
0
    def train(self, model, input, valid_error_fun=None):
        input = Input.from_any(input)
        model._setup(input)
        params = model._params
        self.learn_rule._setup(params, input.batch_size)
        n_params = np.sum([p.array.size for p in params])
        logger.info("SGD: Model contains %i parameters." % n_params)
        logger.info("SGD: %d mini-batch gradient updates per epoch." % input.n_batches)

        epoch = 0
        converged = False
        patience = self.min_epochs
        best_score = np.inf
        start_time = time.clock()
        while epoch < self.max_epochs and not converged:
            epoch += 1

            batch_costs = []
            for batch in input.batches("train"):
                cost = np.array(ca.mean(model._update(batch)))
                batch_costs.append(cost)
                # Update gradient
                self.learn_rule.step()

            epoch_cost = np.mean(batch_costs)
            if valid_error_fun is not None:
                val_error = valid_error_fun()
                if val_error < best_score:
                    improvement = val_error / best_score
                    if improvement < self.improvement_thresh:
                        # increase patience on significant improvement
                        patience = max(patience, epoch * self.patience_incr)
                    best_score = val_error
                logger.info(
                    "epoch %d/%d" % (epoch, patience) + ", cost %f" % epoch_cost + ", val_error %.4f" % val_error
                )
                for p in params:
                    p.monitor()
                if patience <= epoch:
                    logger.info("SGD: Converged on validation set.")
                    converged = True
            else:
                if epoch_cost < best_score:
                    improvement = epoch_cost / best_score
                    if improvement < self.improvement_thresh:
                        # increase patience on significant improvement
                        patience = max(patience, epoch * self.patience_incr)
                    best_score = epoch_cost
                logger.info("epoch %d/%d" % (epoch, patience) + ", cost %f" % epoch_cost)
                for p in params:
                    p.monitor()
                if patience <= epoch:
                    logger.info("SGD: Converged on training set.")
                    converged = True

        end_time = time.clock()
        if not converged:
            logger.info("SGD: Stopped by max_epochs.")
        duration = float(end_time - start_time)
        logger.info(
            "SGD: Optimization ran for %.2f minutes " % (duration / 60)
            + "(%d epochs, %.1f s/epoch)" % (epoch, duration / epoch)
        )
Beispiel #19
0
 def fprop(self):
     ca.mean(self.x.out,
             axis=self.axis,
             out=self.out,
             keepdims=self.keepdims)
Beispiel #20
0
 def loss(self, y, y_pred):
     y_pred = ca.maximum(y_pred, _FLT_MIN)
     return -ca.mean(y*ca.log(y_pred) + (1 - y)*ca.log(1 - y_pred), axis=1)
Beispiel #21
0
 def loss(self, pred, target):
     return ca.mean((target - pred) ** 2, axis=1)
Beispiel #22
0
 def loss(self, y, y_pred):
     return ca.mean((y - y_pred)**2, axis=1)
Beispiel #23
0
 def loss(self, y, y_pred):
     return ca.mean((y-y_pred)**2, axis=1)
Beispiel #24
0
 def loss(self, y, y_pred):
     return ca.mean(-ca.sum(y*ca.log(y_pred+self.eps) +
                            (1-y) * ca.log(1-y_pred+self.eps), axis=1))
Beispiel #25
0
 def loss(self, pred, target):
     return ca.mean((target-pred)**2, axis=1)
Beispiel #26
0
    def train(self, model, input, valid_error_fun=None):
        input = to_input(input)
        model._setup(input)
        params = model._params()
        self.learn_rule._setup(params, input.batch_size)
        n_params = np.sum([p.array.size for p in params])
        logger.info('SGD: Model contains %i parameters.' % n_params)
        logger.info('SGD: %d mini-batch gradient updates per epoch.'
                    % input.n_batches)

        epoch = 0
        converged = False
        patience = self.min_epochs
        best_score = np.inf
        start_time = time.clock()
        while epoch < self.max_epochs and not converged:
            epoch += 1

            batch_costs = []
            for batch in input.supervised_batches():
                cost = np.array(ca.mean(model._update(batch)))
                batch_costs.append(cost)
                # Update gradient
                self.learn_rule.step()

            epoch_cost = np.mean(batch_costs)
            if valid_error_fun is not None:
                val_error = valid_error_fun()
                if val_error < best_score:
                    improvement = val_error / best_score
                    if improvement < self.improvement_thresh:
                        # increase patience on significant improvement
                        patience = max(patience, epoch*self.patience_incr)
                    best_score = val_error
                logger.info('epoch %d/%d' % (epoch, patience)
                            + ', cost %f' % epoch_cost
                            + ', val_error %.4f' % val_error)
                self.learn_rule.monitor()
                if patience <= epoch:
                    logger.info('SGD: Converged on validation set.')
                    converged = True
            else:
                if epoch_cost < best_score:
                    improvement = epoch_cost / best_score
                    if improvement < self.improvement_thresh:
                        # increase patience on significant improvement
                        patience = max(patience, epoch*self.patience_incr)
                    best_score = epoch_cost
                logger.info('epoch %d/%d' % (epoch, patience)
                            + ', cost %f' % epoch_cost)
                self.learn_rule.monitor()
                if patience <= epoch:
                    logger.info('SGD: Converged on training set.')
                    converged = True

        end_time = time.clock()
        if not converged:
            logger.info('SGD: Stopped by max_epochs.')
        duration = float(end_time - start_time)
        logger.info('SGD: Optimization ran for %.2f minutes ' % (duration/60)
                    + '(%d epochs, %.1f s/epoch)' % (epoch, duration/epoch))
 def loss(self, pred, target):
     pred = ca.maximum(pred, _FLT_MIN)
     return -ca.mean(target*ca.log(pred) + (1 - target)*ca.log(1 - pred),
                     axis=1)
Beispiel #28
0
 def fprop(self):
     ca.mean(self.x.array, axis=self.axis, out=self.array,
             keepdims=self.keepdims)