Ejemplo n.º 1
0
    def forward(self, x, gy):
        xp = cuda.get_array_module(x)

        col = im2col_array(x, self.kernel_size, self.stride, self.pad,
                           to_matrix=False)
        gW = xp.tensordot(gy, col, ((0, 2, 3), (0, 4, 5)))
        return gW
Ejemplo n.º 2
0
def im2col_array(img, kernel_size, stride, pad, to_matrix=True):

    N, C, H, W = img.shape
    KH, KW = pair(kernel_size)
    SH, SW = pair(stride)
    PH, PW = pair(pad)
    OH = get_conv_outsize(H, KH, SH, PH)
    OW = get_conv_outsize(W, KW, SW, PW)

    xp = cuda.get_array_module(img)
    if xp != np:
        col = _im2col_gpu(img, kernel_size, stride, pad)
    else:
        img = np.pad(img,
                     ((0, 0), (0, 0), (PH, PH + SH - 1), (PW, PW + SW - 1)),
                     mode='constant',
                     constant_values=(0, ))
        col = np.ndarray((N, C, KH, KW, OH, OW), dtype=img.dtype)

        for j in range(KH):
            j_lim = j + SH * OH
            for i in range(KW):
                i_lim = i + SW * OW
                col[:, :, j, i, :, :] = img[:, :, j:j_lim:SH, i:i_lim:SW]

    if to_matrix:
        col = col.transpose((0, 4, 5, 1, 2, 3)).reshape((N * OH * OW, -1))

    return col
    def _init_W(self, x):
        self.in_size = x.shape[1]
        xp = cuda.get_array_module(x)

        I, O = self.in_size, self.out_size
        W_data = xp.random.randn(I, O).astype(np.float32) * np.sqrt(1 / I)
        self.W.data = W_data
Ejemplo n.º 4
0
    def forward(self, x, W, b):
        xp = cuda.get_array_module(x)

        Weight = W
        SH, SW = self.stride
        PH, PW = self.pad
        C, OC, KH, KW = Weight.shape
        N, C, H, W = x.shape
        if self.outsize is None:
            out_h = get_deconv_outsize(H, KH, SH, PH)
            out_w = get_deconv_outsize(W, KW, SW, PW)
        else:
            out_h, out_w = pair(self.outsize)
        img_shape = (N, OC, out_h, out_w)

        gcol = xp.tensordot(Weight, x, (0, 1))
        gcol = xp.rollaxis(gcol, 3)
        y = col2im_array(gcol,
                         img_shape, (KH, KW),
                         self.stride,
                         self.pad,
                         to_matrix=False)
        # b, k, h, w
        if b is not None:
            self.no_bias = True
            y += b.reshape((1, b.size, 1, 1))
        return y
Ejemplo n.º 5
0
 def forward(self, x):
     if self.W.data is None:
         self.in_size = x.shape[1]
         xp = cuda.get_array_module(x)
         self._init_W(xp)
     y = F.linear(x, self.W, self.b)
     return y
    def forward(self, x, gamma, beta):
        assert x.ndim == 2 or x.ndim == 4

        x_ndim = x.ndim
        if x_ndim == 4:
            N, C, H, W = x.shape
            # (N, C, H, W) -> (N*H*W, C)
            x = x.transpose(0, 2, 3, 1).reshape(-1, C)

        xp = cuda.get_array_module(x)

        if dezero.Config.train:
            mean = x.mean(axis=0)
            var = x.var(axis=0)
            inv_std = 1 / xp.sqrt(var + self.eps)
            xc = (x - mean) * inv_std

            m = x.size // gamma.size
            s = m - 1. if m - 1. > 1. else 1.
            adjust = m / s  # unbiased estimation
            self.avg_mean *= self.decay
            self.avg_mean += (1 - self.decay) * mean
            self.avg_var *= self.decay
            self.avg_var += (1 - self.decay) * adjust * var
            self.inv_std = inv_std
        else:
            inv_std = 1 / xp.sqrt(self.avg_var + self.eps)
            xc = (x - self.avg_mean) * inv_std
        y = gamma * xc + beta

        if x_ndim == 4:
            # (N*H*W, C) -> (N, C, H, W)
            y = y.reshape(N, H, W, C).transpose(0, 3, 1, 2)
        return y
Ejemplo n.º 7
0
 def forward(self, x, t):
     xp = cuda.get_array_module(t.data)
     N = x.shape[0]
     log_z = utils.logsumexp(x, axis=1)
     log_p = x - log_z
     log_p = log_p[xp.arange(N), t.ravel()]
     y = -log_p.sum() / xp.float32(N)
     return y
Ejemplo n.º 8
0
    def forward(self, x):
        if self.W.data is None:
            self.in_channels = x.shpae[1]
            xp = cuda.get_array_module(x)
            self._init_W(xp)

        y = F.conv2d_simple(x, self.W, self.b, self.stride, self.pad)
        return y
Ejemplo n.º 9
0
def dropout(x, dropout_ratio=0.5):
    x = as_variable(x)
    if dezero.Config.train:
        xp = cuda.get_array_module(x)
        mask = xp.random.rand(*x.shape) > dropout_ratio
        scale = xp.array(1.0 - dropout_ratio).astype(x.dtype)
        y = x * mask / scale
        return y
Ejemplo n.º 10
0
    def __call__(self, x):
        if self.W.data is None:
            self.in_channels = x.shape[1]
            xp = cuda.get_array_module(x)
            self._init_W(xp)

        y = F.conv2d(x, self.W, self.b, self.stride, self.pad)
        return y
Ejemplo n.º 11
0
    def forward(self, gy):
        xp = cuda.get_array_module(gy)
        gx = xp.zeros(self.in_shape, dtype=gy.dtype)

        # if np is np:
        xp.add.at(gx, self.slices, gy)
        # else:
        #     xp.scatter_add(gx, self.slices, gy)
        return gx
Ejemplo n.º 12
0
def logsumexp(x, axis=1):
    xp = cuda.get_array_module(x)
    m = x.max(axis=axis, keepdims=True)
    y = x - m
    xp.exp(y, out=y)
    s = y.sum(axis=axis, keepdims=True)
    xp.log(s, out=s)
    m += s
    return m
    def _init_W(self, x):
        self.in_channels = x.shape[1]
        xp = cuda.get_array_module(x)

        C, OC = self.in_channels, self.out_channels
        KH, KW = pair(self.kernel_size)
        W_data = xp.random.randn(OC, C, KH, KW).astype(np.float32) * np.sqrt(
            1 / C * KH * KW)
        self.W.data = W_data
Ejemplo n.º 14
0
    def forward(self, x):
        # initialize weights when data is injected
        if self.W.data is None:
            self.in_size = x.shape[1]
            xp = cuda.get_array_module(x)
            self._init_W(xp)

        y = F.linear(x, self.W, self.b)
        return y
Ejemplo n.º 15
0
 def update_one(self, param):
     xp = cuda.get_array_module(param)
     v_key = id(param)
     if v_key not in self.vs:
         self.vs[v_key] = xp.zeros_like(param.data)
     v = self.vs[v_key]
     v *= self.momentum
     v -= self.lr * param.grad.data
     param.data += v
Ejemplo n.º 16
0
    def forward(self, gy):
        xp = cuda.get_array_module(gy)
        gx = xp.zeros(self.in_shape)

        if xp is np:
            np.add.at(gx, self.slices, gy)
        else:
            xp.scatter_add(gx, self.slices, gy)
        return gx
Ejemplo n.º 17
0
def softmax_cross_entropy_simple(x, t):
    x, t = as_variable(x), as_variable(t)
    N = x.shape[0]
    p = softmax(x)
    p = clip(p, 1e-15, 1.0)
    log_p = log(p)
    xp = cuda.get_array_module(t.data)
    tlog_p = log_p[xp.arange(N), t.data]
    y = -1 * sum(tlog_p) / N
    return y
Ejemplo n.º 18
0
def dropout(x, dropout_ratio=0.5):
    x = as_variable(x)

    if dezero.config.train:
        xp = cuda.get_array_module(x)
        mask = xp.random.rand(*x.shape) > dropout_ratio
        scale = 1.0 - dropout_ratio
        y = x * mask / scale
        return y
    else:
        return x
Ejemplo n.º 19
0
    def __call__(self, x):
        if self.W.data is None:
            self.in_size = x.shape[1]
            xp = cuda.get_array_module(x)

            I, O = self.in_size, self.out_size
            W_data = xp.random.randn(I, O).astype(np.float32) * np.sqrt(1 / I)
            self.W.data = W_data

        y = F.linear(x, self.W, self.b)
        return y
Ejemplo n.º 20
0
    def backward(self, gy):
        x, t = self.inputs
        N, CLS_NUM = x.shape

        gy *= 1 / N
        y = softmax(x)
        # convert to one-hot
        xp = cuda.get_array_module(t.data)
        t_onehot = xp.eye(CLS_NUM, dtype=t.dtype)[t.data]
        y = (y - t_onehot) * gy
        return y
Ejemplo n.º 21
0
 def _init_params(self, x):
     xp = cuda.get_array_module(x)
     D = x.shape[1]
     if self.avg_mean.data is None:
         self.avg_mean.data = xp.zeros(D, dtype=x.dtype)
     if self.avg_var.data is None:
         self.avg_var.data = xp.ones(D, dtype=x.dtype)
     if self.gamma.data is None:
         self.gamma.data = xp.ones(D, dtype=x.dtype)
     if self.beta.data is None:
         self.beta.data = xp.zeros(D, dtype=x.dtype)
Ejemplo n.º 22
0
    def forward(self, x, W, b):
        xp = cuda.get_array_module(x)

        KH, KW = W.shape[2:]
        col = im2col_array(x, (KH, KW), self.stride, self.pad, to_matrix=False)

        y = xp.tensordot(col, W, ((1, 2, 3), (1, 2, 3)))
        if b is not None:
            y += b
        y = xp.rollaxis(y, 3, 1)
        # y = np.transpose(y, (0, 3, 1, 2))
        return y
Ejemplo n.º 23
0
    def update_one(self, param):
        xp = cuda.get_array_module(param.data)
        h_key = id(param)
        if h_key not in self.hs:
            self.hs[h_key] = xp.zeros_like(param.data)

        lr = self.lr
        eps = self.eps
        grad = param.grad.data
        h = self.hs[h_key]

        h += grad * grad
        param.data -= lr * grad / (xp.sqrt(h) + eps)
def numerical_grad(f, x, *args, **kwargs):
    """数値微分で勾配を求める

    Parameters
    ----------
    f : DeZero function
        DeZeroの関数やレイヤ
    x : ndarray or dezero.Variable
        勾配を求める変数
    args : 可変長引数
        f(x, y) のように、入力する変数が x 以外にある場合はここで与える
    kwargs : キーワード引数
        f(x, key=y) のように、入力する変数が x  以外にある場合はここで与える

    Returns
    -------
    grad : ndarray
    """
    eps = 1e-4

    x = x.data if isinstance(x, Variable) else x
    xp = cuda.get_array_module(x)
    if xp is not np:
        np_x = cuda.as_numpy(x)
    else:
        np_x = x
    grad = xp.zeros_like(x)

    it = np.nditer(np_x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx].copy()

        x[idx] = tmp_val + eps
        y1 = f(x, *args, **kwargs)  # f(x+h)
        if isinstance(y1, Variable):
            y1 = y1.data
        y1 = y1.copy()

        x[idx] = tmp_val - eps
        y2 = f(x, *args, **kwargs)  # f(x-h)
        if isinstance(y2, Variable):
            y2 = y2.data
        y2 = y2.copy()

        diff = (y1 - y2).sum()
        grad[idx] = diff / (2 * eps)

        x[idx] = tmp_val
        it.iternext()
    return grad
Ejemplo n.º 25
0
    def update_one(self, param):
        xp = cuda.get_array_module(param.data)
        key = id(param)
        if key not in self.ms:
            self.ms[key] = xp.zeros_like(param.data)
            self.vs[key] = xp.zeros_like(param.data)

        m, v = self.ms[key], self.vs[key]
        beta1, beta2, eps = self.beta1, self.beta2, self.eps
        grad = param.grad.data

        m += (1 - beta1) * (grad - m)
        v += (1 - beta2) * (grad * grad - v)
        param.data -= self.lr * m / (xp.sqrt(v) + eps)
Ejemplo n.º 26
0
def numerical_grad(f, x, *args, **kwargs):
    """Computes numerical gradient by finite differences.

    Args:
        f (callable): A function which gets `Variable`s and returns `Variable`s.
        x (`ndarray` or `dezero.Variable`): A target `Variable` for computing
            the gradient.
        *args: If `f` needs variables except `x`, you can specify with this
            argument.
        **kwargs: If `f` needs keyword variables, you can specify with this
            argument.

    Returns:
        `ndarray`: Gradient.
    """
    eps = 1e-4

    x = x.data if isinstance(x, Variable) else x
    xp = cuda.get_array_module(x)
    if xp is not np:
        np_x = cuda.as_numpy(x)
    else:
        np_x = x
    grad = xp.zeros_like(x)

    it = np.nditer(np_x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx].copy()

        x[idx] = tmp_val + eps
        y1 = f(x, *args, **kwargs)  # f(x+h)
        if isinstance(y1, Variable):
            y1 = y1.data
        y1 = y1.copy()

        x[idx] = tmp_val - eps
        y2 = f(x, *args, **kwargs)  # f(x-h)
        if isinstance(y2, Variable):
            y2 = y2.data
        y2 = y2.copy()

        diff = (y1 - y2).sum()
        grad[idx] = diff / (2 * eps)

        x[idx] = tmp_val
        it.iternext()
    return grad
Ejemplo n.º 27
0
    def update_one(self, param):
        xp = cuda.get_array_module(param.data)

        key = id(param)
        if key not in self.msg:
            self.msg[key] = xp.zeros_like(param.data)
            self.msdx[key] = xp.zeros_like(param.data)

        msg, msdx = self.msg[key], self.msdx[key]
        rho = self.rho
        eps = self.eps
        grad = param.grad.data

        msg *= rho
        msg += (1 - rho) * grad * grad
        dx = xp.sqrt((msdx + eps) / (msg + eps)) * grad
        msdx *= rho
        msdx += (1 - rho) * dx * dx
        param.data -= dx
Ejemplo n.º 28
0
    def forward(self, gy):
        xp = cuda.get_array_module(gy)

        N, C, OH, OW = gy.shape
        N, C, H, W = self.input_shape
        KH, KW = pair(self.kernel_size)

        gcol = xp.zeros((N * C * OH * OW * KH * KW), dtype=self.dtype)

        indexes = (self.indexes.ravel()
                   + xp.arange(0, self.indexes.size * KH * KW, KH * KW))
        
        gcol[indexes] = gy.ravel()
        gcol = gcol.reshape(N, C, OH, OW, KH, KW)
        gcol = xp.swapaxes(gcol, 2, 4)
        gcol = xp.swapaxes(gcol, 3, 5)

        gx = col2im_array(gcol, (N, C, H, W), self.kernel_size, self.stride,
                          self.pad, to_matrix=False)
        return gx
Ejemplo n.º 29
0
def col2im(col, img_shape, kernel_size, stride, pad):
    xp = cuda.get_array_module(col)
    if xp != np:
        img = _col2im_gpu(col, img_shape, kernel_size, stride, pad)
        return img

    n, c, h, w = img_shape
    kh, kw = _pair(kernel_size)
    sh, sw = _pair(stride)
    ph, pw = _pair(pad)
    oh = get_conv_outsize(h, kh, sh, ph)
    ow = get_conv_outsize(w, kw, sw, pw)

    img = np.zeros((n, c, h + 2 * ph + sh - 1, w + 2 * pw + sw - 1),
                   dtype=col.dtype)
    for j in range(kh):
        j_lim = j + sh * oh
        for i in range(kw):
            i_lim = i + sw * ow
            img[:, :, j:j_lim:sh, i:i_lim:sw] += col[:, :, j, i, :, :]

    return img[:, :, ph:h + ph, pw:w + pw]
Ejemplo n.º 30
0
def col2im_array(col, img_shape, kernel_size, stride, pad, to_matrix=True):
    N, C, H, W = img_shape
    KH, KW = pair(kernel_size)
    SH, SW = pair(stride)
    PH, PW = pair(pad)
    OH = get_conv_outsize(H, KH, SH, PH)
    OW = get_conv_outsize(W, KW, SW, PW)

    if to_matrix:
        col = col.reshape(N, OH, OW, C, KH, KW).transpose(0, 3, 4, 5, 1, 2)

    xp = cuda.get_array_module(col)
    if xp != np:
        img = _col2im_gpu(col, SH, SW, PH, PW, H, W)
        return img
    else:
        img = np.zeros((N, C, H + 2 * PH + SH - 1, W + 2 * PW + SW - 1), dtype=col.dtype)
        for j in range(KH):
            j_lim = j + SH * OH
            for i in range(KW):
                i_lim = i + SW * OW
                img[:, :, j:j_lim:SH, i:i_lim:SW] += col[:, :, j, i, :, :]
        return img[:, :, PH : H + PH, PW : W + PW]