def backward(self, x, dy):
        p = self._padding
        x_padded = np.pad(x, ((0, 0), (p, p), (p, p), (0, 0)),
                          mode='constant',
                          constant_values=0)

        N, in_H, in_W, D = x_padded.shape
        sN, sH, sW, sD = x_padded.strides

        dy_pad_x = np.insert(dy,
                             np.repeat(np.arange(1, dy.shape[1]),
                                       self._stride - 1),
                             0,
                             axis=2)
        dy_pad_y = np.insert(dy_pad_x,
                             np.repeat(np.arange(1, dy.shape[2]),
                                       self._stride - 1),
                             0,
                             axis=1)

        # calculate dW as convolution of x and dy
        x_expanded = np.lib.stride_tricks.as_strided(
            x_padded,
            shape=(N, self._kernel_size, self._kernel_size, D,
                   dy_pad_y.shape[1], dy_pad_y.shape[2]),
            strides=(sN, sH, sW, sD, sH, sW),
            writeable=False).astype(dtype())
        dF = np.einsum('nkldhw,nhwf->fkld', x_expanded, dy_pad_y)

        dy_padded = np.pad(
            dy_pad_y, ((0, 0), (self._kernel_size - 1, self._kernel_size - 1),
                       (self._kernel_size - 1, self._kernel_size - 1), (0, 0)),
            mode='constant',
            constant_values=0)
        s_dy_N, s_dy_H, s_dy_W, s_dy_F = dy_padded.strides

        # calculate dx as convolution of dy_padded and rotated by 180 degrees F matrix
        dy_expanded = np.lib.stride_tricks.as_strided(
            dy_padded,
            shape=(N, in_H, in_W, self._filters, self._kernel_size,
                   self._kernel_size),
            strides=(sN, s_dy_H, s_dy_W, s_dy_F, dy_padded.strides[1],
                     dy_padded.strides[2]),
            writeable=False).astype(dtype())
        dx = np.einsum('nHWfkl,fkld->nHWd', dy_expanded,
                       np.rot90(self._F, 2, axes=(1, 2)))
        if self._padding != 0:
            dx = dx[:, self._padding:-self._padding,
                    self._padding:-self._padding, :]

        if self._use_bias:
            db = np.sum(dy, axis=(0, 1, 2), keepdims=True)
            return dict(dx=dx, dW=dF, db=db)
        return dict(dx=dx, dW=dF)
    def _expand_input(self, x):
        p = self._padding
        x_padded = np.pad(x, ((0, 0), (p, p), (p, p), (0, 0)),
                          mode='constant',
                          constant_values=0)
        N, H, W, D = x_padded.shape
        sN, sH, sW, sD = x_padded.strides
        _, out_h, out_w, _ = self.output_shape()

        x_expanded = np.lib.stride_tricks.as_strided(
            x_padded,
            shape=(N, out_h, out_w, self._kernel_size, self._kernel_size, D),
            strides=(sN, sH * self._stride, sW * self._stride, sH, sW, sD),
            writeable=False).astype(dtype())
        return x_expanded
    def __call__(self, shape):
        fan_in, _ = get_fans(shape)
        a = None
        if self._activation_fun == 'sigmoid':
            a = 2.38
        elif self._activation_fun == 'relu':
            a = 2.0
        elif self._activation_fun == 'tanh':
            a = 1.0
        else:
            raise ValueError('Missing specified activation function')

        s = a / np.sqrt(fan_in)
        W = np.random.uniform(-s, s, size=shape).astype(dtype())
        return W
    def forward(self, x):
        p = self._padding
        x_padded = np.pad(x, ((0, 0), (p, p), (p, p), (0, 0)),
                          mode='constant',
                          constant_values=0)

        N, H, W, D = x_padded.shape
        sN, sH, sW, sD = x_padded.strides

        _, out_h, out_w, _ = self.output_shape()
        x_expanded = np.lib.stride_tricks.as_strided(
            x_padded,
            shape=(N, out_h, out_w, self._kernel_size, self._kernel_size, D),
            strides=(sN, sH * self._stride, sW * self._stride, sH, sW, sD),
            writeable=False).astype(dtype())
        return np.einsum('fhwd,nHWhwd->nHWf', self._F,
                         x_expanded) + self._bias if self._use_bias else \
            np.einsum('fhwd,nHWhwd->nHWf', self._F, x_expanded)
 def __call__(self, shape):
     fan_in, fan_out = get_fans(shape)
     s = np.sqrt(6 / (fan_in + fan_out))
     W = np.random.uniform(-s, s, size=shape).astype(dtype())
     return W
 def __call__(self, shape):
     fan_in, fan_out = get_fans(shape)
     scale = np.sqrt(2.0 / fan_in)
     W = (np.random.randn(*shape) * scale).astype(dtype())
     return W
 def __call__(self, shape):
     fan_in, fan_out = get_fans(shape)
     s = fan_out ** (1.0 / fan_in)
     W = np.random.uniform(-s, s, size=shape).astype(dtype())
     return W
 def __call__(self, shape):
     low, high = self._range
     W = np.random.uniform(low, high, shape).astype(dtype())
     return W
 def __call__(self, shape):
     W = np.random.normal(size=shape).astype(dtype())
     return W
 def __call__(self, shape):
     W = np.zeros(shape, dtype=dtype())
     return W