Exemple #1
0
 def numpy_gradient_step(weight, gradient, lr, factor_l12, l1_vs_l2, factor_ortho=0, weights_transposed=False):
     gradient = gradient.copy()
     gradient += factor_l12 * ((1.0 - l1_vs_l2) * weight + 0.5 * l1_vs_l2 * numpy.sign(weight))
     if factor_ortho:
         col_sums = reshape_transposed(weight).sum(axis=1) if weights_transposed else weight.sum(axis=0)
         for i, row in enumerate(gradient):
             row += (col_sums - weight[i]) * factor_ortho / weight.shape[0]
     gradient *= lr
     return gradient
Exemple #2
0
 def numpy_gradient_step(weight, gradient, lr, factor_l12, l1_vs_l2,
                         factor_ortho=0, weights_transposed=False):
     gradient = gradient.copy()
     gradient += factor_l12 * ((1.0 - l1_vs_l2) * weight +
                               0.5 * l1_vs_l2 * numpy.sign(weight))
     if factor_ortho:
         col_sums = (reshape_transposed(weight).sum(axis=1)
                     if weights_transposed else weight.sum(axis=0))
         for i, row in enumerate(gradient):
             row += (col_sums - weight[i]) * factor_ortho / weight.shape[0]
     gradient *= lr
     return gradient
Exemple #3
0
    def numpy_err_input_update(self):
        """Backpropagate error (will compute err_input).
        """
        if not self.need_err_input:
            return

        from scipy.signal import convolve2d

        self.err_input.map_invalidate()
        self.err_output.map_read()
        self.weights.map_read()

        batch_size = self.input.mem.shape[0]
        sy = self.input.mem.shape[1]
        sx = self.input.mem.shape[2]
        n_channels = self.input.mem.size // (batch_size * sx * sy)
        sx_full = self.padding[0] + sx + self.padding[2]
        sy_full = self.padding[1] + sy + self.padding[3]

        weights = (reshape_transposed(self.weights.mem)
                   if self.weights_transposed else self.weights.mem)

        if not self.err_input_beta:
            self.err_input.mem[:] = 0
        else:
            self.err_input.mem *= self.err_input_beta
        err_input = numpy.zeros_like(self.err_input.mem)
        # initialize sparse output error
        sparse_err_output = numpy.zeros(
            (batch_size, sy_full - self.ky + 1, sx_full - self.kx + 1,
             self.n_kernels),
            dtype=self.err_output.dtype)
        for (batch, i, j, k), err in numpy.ndenumerate(self.err_output.mem):
            sparse_err_output[batch, i * self.sliding[1], j * self.sliding[0],
                              k] = err
        err_sample = numpy.zeros(
            (sy_full - self.ky + 1, sx_full - self.kx + 1),
            dtype=err_input.dtype)
        for batch, k in product(range(batch_size), range(self.n_kernels)):
            err_sample[:] = sparse_err_output[batch, :, :, k]
            cur_kernel = weights[k].reshape(self.ky, self.kx, n_channels)
            for ch in range(n_channels):
                err_input_full = convolve2d(err_sample,
                                            cur_kernel[:, :, ch],
                                            mode='full')
                err_input[batch, :, :, ch] += \
                    err_input_full[self.padding[1]:(sy_full - self.padding[3]),
                                   self.padding[0]:(sx_full - self.padding[2])]
        self.err_input.mem += err_input * self.err_input_alpha
Exemple #4
0
    def numpy_err_input_update(self):
        """Backpropagate error (will compute err_input).
        """
        if not self.need_err_input:
            return

        from scipy.signal import convolve2d

        self.err_input.map_invalidate()
        self.err_output.map_read()
        self.weights.map_read()

        batch_size = self.input.mem.shape[0]
        sy = self.input.mem.shape[1]
        sx = self.input.mem.shape[2]
        n_channels = self.input.mem.size // (batch_size * sx * sy)
        sx_full = self.padding[0] + sx + self.padding[2]
        sy_full = self.padding[1] + sy + self.padding[3]

        weights = (reshape_transposed(self.weights.mem)
                   if self.weights_transposed else self.weights.mem)

        if not self.err_input_beta:
            self.err_input.mem[:] = 0
        else:
            self.err_input.mem *= self.err_input_beta
        err_input = numpy.zeros_like(self.err_input.mem)
        # initialize sparse output error
        sparse_err_output = numpy.zeros((
            batch_size, sy_full - self.ky + 1, sx_full - self.kx + 1,
            self.n_kernels), dtype=self.err_output.dtype)
        for (batch, i, j, k), err in numpy.ndenumerate(self.err_output.mem):
            sparse_err_output[batch, i * self.sliding[1],
                              j * self.sliding[0], k] = err
        err_sample = numpy.zeros(
            (sy_full - self.ky + 1, sx_full - self.kx + 1),
            dtype=err_input.dtype)
        for batch, k in product(range(batch_size), range(self.n_kernels)):
            err_sample[:] = sparse_err_output[batch, :, :, k]
            cur_kernel = weights[k].reshape(self.ky, self.kx, n_channels)
            for ch in range(n_channels):
                err_input_full = convolve2d(err_sample, cur_kernel[:, :, ch],
                                            mode='full')
                err_input[batch, :, :, ch] += \
                    err_input_full[self.padding[1]:(sy_full - self.padding[3]),
                                   self.padding[0]:(sx_full - self.padding[2])]
        self.err_input.mem += err_input * self.err_input_alpha
Exemple #5
0
    def numpy_run(self):
        """Forward propagation from batch on CPU only.
        """
        self.input.map_read()
        self.weights.map_read()
        self.bias.map_read()
        self.output.map_invalidate()

        sx_full = self.padding[0] + self._sx + self.padding[2]
        sy_full = self.padding[1] + self._sy + self.padding[3]
        nx = (sx_full - self.kx) // self.sliding[0] + 1
        ny = (sy_full - self.ky) // self.sliding[1] + 1

        weights = (reshape_transposed(self.weights.mem)
                   if self.weights_transposed else self.weights.mem)

        assert self.kx >= 0 and self.ky >= 0
        for batch, _ in ((batch, ch)
                         for batch in range(self._batch_size)
                         for ch in range(self._n_channels)):
            for k, kernel in enumerate(weights):
                for i, j in ((i, j) for i in range(ny) for j in range(nx)):
                    full_i1 = i * self.sliding[1]
                    full_i2 = full_i1 + self.ky
                    full_j1 = j * self.sliding[0]
                    full_j2 = full_j1 + self.kx
                    in_i1 = min(max(full_i1 - self.padding[1], 0), self._sy)
                    in_i2 = min(max(full_i2 - self.padding[1], 0), self._sy)
                    in_j1 = min(max(full_j1 - self.padding[0], 0), self._sx)
                    in_j2 = min(max(full_j2 - self.padding[0], 0), self._sx)
                    cut_i1, cut_i2 = (in_i1 - full_i1 + self.padding[1],
                                      in_i2 - full_i1 + self.padding[1])
                    cut_j1, cut_j2 = (in_j1 - full_j1 + self.padding[0],
                                      in_j2 - full_j1 + self.padding[0])
                    if in_i2 - in_i1 > 0 or in_j2 - in_j1 > 0:
                        cut = self.input.mem[batch, in_i1:in_i2, in_j1:in_j2]
                        kernel_3d = kernel.reshape(self.ky, self.kx,
                                                   self._n_channels)
                        cutted_kernel = kernel_3d[cut_i1:cut_i2,
                                                  cut_j1:cut_j2, :]
                        assert cut.size == cutted_kernel.size
                        conv = numpy.sum(numpy.multiply(cut.ravel(),
                                                        cutted_kernel.ravel()))
                        self.output.mem[batch, i, j, k] = conv
        # add bias and apply activation function
        self.apply_activation()
Exemple #6
0
    def numpy_run(self):
        """Forward propagation from batch on CPU only.
        """
        self.input.map_read()
        self.weights.map_read()
        self.bias.map_read()
        self.output.map_invalidate()

        sx_full = self.padding[0] + self._sx + self.padding[2]
        sy_full = self.padding[1] + self._sy + self.padding[3]
        nx = (sx_full - self.kx) // self.sliding[0] + 1
        ny = (sy_full - self.ky) // self.sliding[1] + 1

        weights = (reshape_transposed(self.weights.mem)
                   if self.weights_transposed else self.weights.mem)

        assert self.kx >= 0 and self.ky >= 0
        for batch, _ in ((batch, ch) for batch in range(self._batch_size)
                         for ch in range(self._n_channels)):
            for k, kernel in enumerate(weights):
                for i, j in ((i, j) for i in range(ny) for j in range(nx)):
                    full_i1 = i * self.sliding[1]
                    full_i2 = full_i1 + self.ky
                    full_j1 = j * self.sliding[0]
                    full_j2 = full_j1 + self.kx
                    in_i1 = min(max(full_i1 - self.padding[1], 0), self._sy)
                    in_i2 = min(max(full_i2 - self.padding[1], 0), self._sy)
                    in_j1 = min(max(full_j1 - self.padding[0], 0), self._sx)
                    in_j2 = min(max(full_j2 - self.padding[0], 0), self._sx)
                    cut_i1, cut_i2 = (in_i1 - full_i1 + self.padding[1],
                                      in_i2 - full_i1 + self.padding[1])
                    cut_j1, cut_j2 = (in_j1 - full_j1 + self.padding[0],
                                      in_j2 - full_j1 + self.padding[0])
                    if in_i2 - in_i1 > 0 or in_j2 - in_j1 > 0:
                        cut = self.input.mem[batch, in_i1:in_i2, in_j1:in_j2]
                        kernel_3d = kernel.reshape(self.ky, self.kx,
                                                   self._n_channels)
                        cutted_kernel = kernel_3d[cut_i1:cut_i2,
                                                  cut_j1:cut_j2, :]
                        assert cut.size == cutted_kernel.size
                        conv = numpy.sum(
                            numpy.multiply(cut.ravel(), cutted_kernel.ravel()))
                        self.output.mem[batch, i, j, k] = conv
        # add bias and apply activation function
        self.apply_activation()
Exemple #7
0
    def numpy_weights_update(self):
        self.input.map_read()
        self.err_output.map_read()
        self.weights.map_write()
        self.gradient_weights.map_write()
        self.accumulated_gradient_weights.map_write()

        dtype = self.weights.dtype
        sy = self.input.shape[1]
        sx = self.input.shape[2]
        n_channels = self.input.size // (self.input.shape[0] * sx * sy)

        sx_full = self.padding[0] + sx + self.padding[2]
        sy_full = self.padding[1] + sy + self.padding[3]
        nx = (sx_full - self.kx) // self.sliding[0] + 1
        ny = (sy_full - self.ky) // self.sliding[1] + 1
        sample_shape = (nx * ny, self.kx * self.ky * n_channels)

        sh = self.err_output.shape
        if len(sh) == 3:
            sh[1] *= sh[2]
            sh[2] = 1

        # calculate gradient for weights
        gd_weights = (reshape_transposed(self.gradient_weights.mem)
                      if self.weights_transposed
                      else self.gradient_weights.mem)
        gd_weights[:] = 0
        cut = numpy.empty((self.ky, self.kx, n_channels), dtype=dtype)
        sample = numpy.empty(sample_shape, dtype=dtype)
        for batch in range(self.current_batch_size):
            # input data unrolling
            sample = numpy.empty(sample_shape)
            for by, bx in ((by, bx) for by in range(ny) for bx in range(nx)):
                y1, y2 = (by * self.sliding[1],
                          by * self.sliding[1] + self.ky)
                x1, x2 = (bx * self.sliding[0],
                          bx * self.sliding[0] + self.kx)
                i1, i2 = (min(max(y1 - self.padding[1], 0), sy),
                          min(max(y2 - self.padding[1], 0), sy))
                j1, j2 = (min(max(x1 - self.padding[0], 0), sx),
                          min(max(x2 - self.padding[0], 0), sx))
                cut_i1, cut_i2 = (i1 - y1 + self.padding[1],
                                  i2 - y1 + self.padding[1])
                cut_j1, cut_j2 = (j1 - x1 + self.padding[0],
                                  j2 - x1 + self.padding[0])
                cut = numpy.zeros((self.ky, self.kx, n_channels),
                                  dtype=self.input.mem.dtype)
                cut[cut_i1:cut_i2, cut_j1:cut_j2, :] = \
                    self.input.mem[batch, i1:i2, j1:j2, :].reshape(i2 - i1,
                                                                   j2 - j1,
                                                                   n_channels)
                sample[by * nx + bx] = cut.ravel()
            err_out_shape = self.err_output.mem.shape
            out = self.err_output.mem[batch].reshape(err_out_shape[1] *
                                                     err_out_shape[2],
                                                     self.n_kernels)
            gd_weights += numpy.dot(out.transpose(),
                                    sample)
        if self.weights_transposed:
            gd_weights = reshape_transposed(gd_weights)

        # update weights
        lr = self.learning_rate
        factor_l12 = self.weights_decay
        l1_vs_l2 = self.l1_vs_l2
        gradient = -nn_units.GradientDescentBase.numpy_gradient_step(
            self.weights.mem, gd_weights, lr, factor_l12, l1_vs_l2,
            self.factor_ortho, self.weights_transposed)
        if self.accumulate_gradient == self.OP_NONE:
            pass
        elif self.accumulate_gradient == self.OP_STORE:
            self.accumulated_gradient_weights.mem[:] = gradient
        elif self.accumulate_gradient == self.OP_ADD:
            self.accumulated_gradient_weights.mem[:] += gradient
        elif self.accumulate_gradient == self.OP_FLUSH:
            gradient += self.accumulated_gradient_weights.mem
            self.accumulated_gradient_weights.mem[:] = 0
        else:
            raise ValueError("Incorrect accumulate_gradient attribute value")
        if self.gradient_weights_with_moment:
            gradient += (self.gradient_weights_with_moment.mem *
                         self.gradient_moment)
            self.gradient_weights.mem[:] = gradient[:]
        if self.apply_gradient:
            self.weights.mem += gradient
Exemple #8
0
    def numpy_weights_update(self):
        self.input.map_read()
        self.err_output.map_read()
        self.weights.map_write()
        self.gradient_weights.map_write()
        self.accumulated_gradient_weights.map_write()

        dtype = self.weights.dtype
        sy = self.input.shape[1]
        sx = self.input.shape[2]
        n_channels = self.input.size // (self.input.shape[0] * sx * sy)

        sx_full = self.padding[0] + sx + self.padding[2]
        sy_full = self.padding[1] + sy + self.padding[3]
        nx = (sx_full - self.kx) // self.sliding[0] + 1
        ny = (sy_full - self.ky) // self.sliding[1] + 1
        sample_shape = (nx * ny, self.kx * self.ky * n_channels)

        sh = self.err_output.shape
        if len(sh) == 3:
            sh[1] *= sh[2]
            sh[2] = 1

        # calculate gradient for weights
        gd_weights = (reshape_transposed(self.gradient_weights.mem) if
                      self.weights_transposed else self.gradient_weights.mem)
        gd_weights[:] = 0
        cut = numpy.empty((self.ky, self.kx, n_channels), dtype=dtype)
        sample = numpy.empty(sample_shape, dtype=dtype)
        for batch in range(self.current_batch_size):
            # input data unrolling
            sample = numpy.empty(sample_shape)
            for by, bx in ((by, bx) for by in range(ny) for bx in range(nx)):
                y1, y2 = (by * self.sliding[1], by * self.sliding[1] + self.ky)
                x1, x2 = (bx * self.sliding[0], bx * self.sliding[0] + self.kx)
                i1, i2 = (min(max(y1 - self.padding[1], 0),
                              sy), min(max(y2 - self.padding[1], 0), sy))
                j1, j2 = (min(max(x1 - self.padding[0], 0),
                              sx), min(max(x2 - self.padding[0], 0), sx))
                cut_i1, cut_i2 = (i1 - y1 + self.padding[1],
                                  i2 - y1 + self.padding[1])
                cut_j1, cut_j2 = (j1 - x1 + self.padding[0],
                                  j2 - x1 + self.padding[0])
                cut = numpy.zeros((self.ky, self.kx, n_channels),
                                  dtype=self.input.mem.dtype)
                cut[cut_i1:cut_i2, cut_j1:cut_j2, :] = \
                    self.input.mem[batch, i1:i2, j1:j2, :].reshape(i2 - i1,
                                                                   j2 - j1,
                                                                   n_channels)
                sample[by * nx + bx] = cut.ravel()
            err_out_shape = self.err_output.mem.shape
            out = self.err_output.mem[batch].reshape(
                err_out_shape[1] * err_out_shape[2], self.n_kernels)
            gd_weights += numpy.dot(out.transpose(), sample)
        if self.weights_transposed:
            gd_weights = reshape_transposed(gd_weights)

        # update weights
        lr = self.learning_rate
        factor_l12 = self.weights_decay
        l1_vs_l2 = self.l1_vs_l2
        gradient = -nn_units.GradientDescentBase.numpy_gradient_step(
            self.weights.mem, gd_weights, lr, factor_l12, l1_vs_l2,
            self.factor_ortho, self.weights_transposed)

        if self.accumulate_gradient:
            self.accumulate_gradient_f(self.accumulated_gradient_weights.mem,
                                       gradient)

        if self.gradient_weights_with_moment:
            gradient += (self.gradient_weights_with_moment.mem *
                         self.gradient_moment)
            self.gradient_weights.mem[:] = gradient[:]
        if self.apply_gradient:
            self.weights.mem += gradient