예제 #1
0
파일: convnd.py 프로젝트: sezan92/ReNom
    def _oper_gpu(cls, x, w, b, in_shape, kernel, stride, padding):
        conv_desc = cu.ConvolutionNDescriptor(padding, stride, precision)
        filter_desc = cu.NdFilterDescriptor(w.shape, precision)

        output_shape = [x.shape[0], w.shape[0]]
        for i in range(len(x.shape[2:])):
            output_shape.append(
                (x.shape[i + 2] + padding[i] * 2 - kernel[i]) // stride[i] + 1)
        y = GPUValue(shape=tuple(output_shape))

        with cu.cudnn_handler() as handle:
            cu.cuConvolutionForward(handle, conv_desc, filter_desc, get_gpu(x),
                                    get_gpu(w), y)
            if b is not None:
                cu.cu_add_bias(get_gpu(b), y)

        # assert type(x) is not np.ndarray

        ret = cls._create_node(y)
        ret.attrs._conv_desc = conv_desc
        ret.attrs._filter_desc = filter_desc
        ret.attrs._x = x
        ret.attrs._w = w
        ret.attrs._b = b
        return ret
예제 #2
0
 def _oper_gpu(cls, arg, alpha):
     z = get_gpu(arg).empty_like_me()
     cu.cueru_forward(alpha, get_gpu(arg), z)
     ret = cls._create_node(z)
     ret.attrs._arg = arg
     ret.attrs._alpha = alpha
     return ret
예제 #3
0
 def _oper_gpu(cls, arg, beta):
     z = get_gpu(arg).empty_like_me()
     cu.cuswish_forward(beta, get_gpu(arg), z)
     ret = cls._create_node(z)
     ret.attrs._arg = arg
     ret.attrs._beta = beta
     return ret
예제 #4
0
    def _get_gpu(self, dy, node):
        node_id = id(node)
        pdy = self._params.get(node_id, None)
        if pdy is None:
            moment1 = get_gpu(dy).zeros_like_me()
            moment2 = get_gpu(dy).zeros_like_me()
            running_beta1 = self._beta1
            running_beta2 = self._beta2
            time = 1
        else:
            moment1 = pdy['moment1']
            moment2 = pdy['moment2']
            time = pdy['time'] + 1
            # Performs (beta_1 ** (t - 1)) * (beta_1 ** 1) as replacement for beta_1 ** t
            running_beta1 = pdy['running_beta1'] * self._beta1
            running_beta2 = pdy['running_beta2'] * self._beta2
        ndy = get_gpu(dy).empty_like_me()
        cu.cu_optimizer_adamax(self._alpha, self._epsilon,
                               (self._beta1, running_beta1),
                               (self._beta2, running_beta2), moment1, moment2,
                               get_gpu(dy), ndy)

        self._params[node_id] = {
            'moment1': moment1,
            'moment2': moment2,
            'time': time,
            'running_beta1': running_beta1,
            'running_beta2': running_beta2,
        }
        ret = ndy
        return ret
예제 #5
0
 def _oper_gpu(cls, arg, slope):
     z = get_gpu(arg).empty_like_me()
     cu.culeaky_leru_forward(slope, get_gpu(arg), z)
     ret = cls._create_node(z)
     ret.attrs._arg = arg
     ret.attrs._slope = slope
     return ret
    def _oper_gpu(cls, x, w, b, in_shape, out_shape, kernel, stride, padding,
                  dilation):
        N = x.shape[0]
        conv_desc = cu.ConvolutionDescriptor(padding, stride, dilation,
                                             precision)
        filter_desc = cu.FilterDescriptor(w.shape, precision)

        y = GPUValue(shape=tuple([
            N,
        ] + list(out_shape)))
        with cu.cudnn_handler() as handle:
            cu.cuConvolutionForward(handle, conv_desc, filter_desc, get_gpu(x),
                                    get_gpu(w), y)
            if b is not None:
                cu.cu_add_bias(get_gpu(b), y)

        # assert type(x) is not np.ndarray

        ret = cls._create_node(y)
        ret.attrs._conv_desc = conv_desc
        ret.attrs._filter_desc = filter_desc
        ret.attrs._x = x
        ret.attrs._w = w
        ret.attrs._b = b
        ret.attrs._in_shape = in_shape
        ret.attrs._out_shape = out_shape
        ret.attrs._kernel = kernel
        ret.attrs._stride = stride
        ret.attrs._padding = padding
        ret.attrs._dilation = dilation
        return ret
예제 #7
0
 def _oper_gpu(cls, lhs, rhs):
     new_shape = (lhs.shape[0], rhs.shape[1])
     ret = GPUValue(shape=new_shape)
     cublas_gemm(get_gpu(lhs), 0,
                 get_gpu(rhs), 0,
                 get_gpu(ret))
     return ret
예제 #8
0
 def _backward_gpu(self, context, dy, **kwargs):
     if isinstance(self.attrs._x, Node):
         dx = get_gpu(self).empty_like_me()
         with cu.cudnn_handler() as handle:
             cu.cuLocalResponseNormalizationBackward(
                 handle, self.attrs._lrn_desc, get_gpu(self.attrs._x), get_gpu(self), dx, get_gpu(dy))
         self.attrs._x._update_diff(context, dx, **kwargs)
예제 #9
0
    def _oper_gpu(cls, x, pz, ps, w, wr, wc, b):
        if ps is None:
            s_p = GPUValue(shape=(x.shape[0], w.shape[1] // 4)).zeros_like_me()
            z_p = s_p.zeros_like_me()
        else:
            s_p, z_p = map(get_gpu, (ps, pz))

        s = s_p.empty_like_me()
        u = op.dot(x, w) + op.dot(z_p, wr)
        if b is not None:
            u += b

        u = get_gpu(u)
        z = z_p.zeros_like_me()
        cu.cupeepholelstm_forward(u, get_gpu(wc), s_p, s, z)

        ret = cls._create_node(z)
        ret.attrs._x = x
        ret.attrs._w = w
        ret.attrs._wr = wr
        ret.attrs._wc = wc
        ret.attrs._b = b
        ret.attrs._u = u
        ret.attrs._pz = pz
        ret.attrs._pstate = ps
        ret.attrs._state = s

        if isinstance(pz, Node):
            pz.attrs._pfgate = u
        return ret
예제 #10
0
 def _oper_gpu(cls, x, w):
     z = GPUValue(shape=(len(x), len(w[0])))
     cu.cuembedding_forward(get_gpu(x), get_gpu(w), z)
     ret = cls._create_node(z)
     ret.attrs._x = x
     ret.attrs._w = w
     return ret
예제 #11
0
    def _backward_gpu(self, context, dy, **kwargs):
        if isinstance(self.attrs._rhs, Node):
            self.attrs._rhs._update_diff(context, -dy * self.attrs._log_lhs, **kwargs)

        if isinstance(self.attrs._lhs, Node):
            self.attrs._lhs._update_diff(
                context, -dy * get_gpu(self.attrs._rhs) / get_gpu(self.attrs._lhs), **kwargs)
예제 #12
0
 def _backward_gpu(self, context, dy, **kwargs):
     if isinstance(self.attrs._arg, Node):
         alpha = self.attrs._alpha
         lmda = self.attrs._lmda
         dx = get_gpu(self.attrs._arg).empty_like_me()
         cu.cueru_backward(alpha, get_gpu(self.attrs._arg), dx)
         self.attrs._arg._update_diff(context,
                                      dx * get_gpu(dy) * lmda, **kwargs)
예제 #13
0
 def _oper_gpu(cls, lhs, rhs, reduce_sum=True):
     assert len(
         rhs.shape) > 1, "Input arrays must have no less than 2 dimension."
     N = len(lhs)
     if reduce_sum:
         return cu.cusum((get_gpu(lhs) - get_gpu(rhs))**2) / (N * 2)
     else:
         return ((get_gpu(lhs) - get_gpu(rhs))**2) / (N * 2)
예제 #14
0
파일: pool2d.py 프로젝트: sezan92/ReNom
 def _backward_gpu(self, context, dy, **kwargs):
     dx = get_gpu(self.attrs._x).empty_like_me()
     with cu.cudnn_handler() as handle:
         cu.cuPoolingBackward(handle, self.attrs._pool_desc,
                              get_gpu(self.attrs._x), get_gpu(self),
                              get_gpu(dy), dx)
     if isinstance(self.attrs._x, Node):
         self.attrs._x._update_diff(context, dx, **kwargs)
예제 #15
0
 def _get_gpu(self, dy, node):
     node_id = id(node)
     pdy = self._params.get(node_id, get_gpu(dy).zeros_like_me())
     ndy = get_gpu(dy).empty_like_me()
     r = get_gpu(pdy).empty_like_me()
     cu.cu_optimizer_adagrad(self._lr, self._epsilon, get_gpu(dy), get_gpu(pdy), ndy, r)
     self._params[node_id] = r
     return ndy
예제 #16
0
 def _oper_gpu(cls, x, n, k, a, b):
     lrn_desc = cu.LRNDescriptor(n, a, b, k)
     y = get_gpu(x).empty_like_me()
     with cu.cudnn_handler() as handle:
         cu.cuLocalResponseNormalizationForward(handle, lrn_desc, get_gpu(x), get_gpu(y))
     ret = cls._create_node(y)
     ret.attrs._x = x
     ret.attrs._lrn_desc = lrn_desc
     return ret
예제 #17
0
 def _oper_gpu(cls, x, dropout_ratio):
     mask = get_gpu(x).empty_like_me()
     curand_generator().rand_bernoulli(mask, 1 - dropout_ratio)
     mask = mask / dropout_ratio
     value = get_gpu(x) * mask
     ret = cls._create_node(value)
     ret.attrs._x = x
     ret.attrs._mask = mask
     return ret
 def _backward_gpu(self, context, dy, **kwargs):
     if isinstance(self.attrs._lhs, Node):
         N = len(self.attrs._lhs)
         clip = self.attrs._clip
         sub = get_gpu(self.attrs._lhs) - get_gpu(self.attrs._rhs)
         dx = sub * get_gpu(dy)
         cu.cumin(clip[1], dx, dx)
         cu.cumax(clip[0], dx, dx)
         self.attrs._lhs._update_diff(context, dx / N, **kwargs)
예제 #19
0
 def _backward_gpu(self, context, dy, **kwargs):
     if isinstance(self.attrs._arg, Node):
         with cu.cudnn_handler() as handle:
             dx = get_gpu(self).empty_like_me()
             cu.cuSoftmaxBackward(handle,
                                  get_gpu(self),
                                  get_gpu(dy),
                                  dx,
                                  mode=1)
         self.attrs._arg._update_diff(context, dx, **kwargs)
예제 #20
0
    def _get_gpu(self, dy, node):

        node_id = id(node)
        pdy = self._params.get(node_id, get_gpu(dy).zeros_like_me())
        ndy = get_gpu(dy).empty_like_me()
        cu.cu_optimizer_sgd(self._lr, self._momentum, get_gpu(dy), get_gpu(pdy), ndy)

        if self._momentum > 0:
            self._params[node_id] = ndy
        return ndy
예제 #21
0
 def _oper_gpu(cls, lhs, rhs, reduce_sum):
     log_lhs = log(lhs + 1e-8)
     if reduce_sum:
         ret = cls._create_node(-cu.cusum(get_gpu(log_lhs * rhs)))
     else:
         ret = cls._create_node(-get_gpu(log_lhs * rhs))
     ret.attrs._log_lhs = log_lhs
     ret.attrs._rhs = rhs
     ret.attrs._lhs = lhs
     return ret
예제 #22
0
    def _backward_gpu(self, context, dy, **kwargs):
        if isinstance(self.attrs._a, Node):
            ldy = get_gpu(self.attrs._a).zeros_like_me()
            ldy[self.attrs._condition] = dy[self.attrs._condition]
            self.attrs._a._update_diff(context, ldy, **kwargs)

        if isinstance(self.attrs._b, Node):
            rdy = get_gpu(self.attrs._b).zeros_like_me()
            rdy[- self.attrs._condition] = dy[- self.attrs._condition]
            self.attrs._b._update_diff(context, rdy, **kwargs)
예제 #23
0
 def _oper_gpu(cls, x, drop_out_ratio):
     shape = (x.shape[0], x.shape[1], 1, 1)
     mask = GPUValue(shape=shape)
     curand_generator().rand_bernoulli(mask, 1 - drop_out_ratio)
     mask = mask / drop_out_ratio
     mask = mask * get_gpu(x).ones_like_me()
     value = get_gpu(x) * get_gpu(mask)
     ret = cls._create_node(value)
     ret.attrs._x = x
     ret.attrs._mask = mask
     return ret
예제 #24
0
 def _backward_gpu(self, context, dy, **kwargs):
     norm = self.attrs._norm
     if isinstance(self.attrs._x, Node):
         dx = dy * norm - (rm.sum(self.attrs._x * dy, axis=1,
                                  keepdims=True) * self.attrs._x) / norm
         dx = dx / (norm * norm)
         self.attrs._x._update_diff(context, get_gpu(dx * self.attrs._w), **kwargs)
     if isinstance(self.attrs._w, Node):
         dl = dy * (self.attrs._x / norm)
         self.attrs._w._update_diff(context,
                                    get_gpu(rm.sum(dl.as_ndarray(), axis=(0, 2, 3), keepdims=True)), **kwargs)
예제 #25
0
 def _oper_gpu(cls, x, prev_pool):
     dx = GPUValue(shape=prev_pool.attrs._x.shape)
     with cu.cudnn_handler() as handle:
         cu.cuPoolingBackward(handle, prev_pool.attrs._pool_desc, get_gpu(
             prev_pool.attrs._x), get_gpu(prev_pool), get_gpu(x), dx)
     ret = cls._create_node(dx)
     ret.attrs._x = x
     ret.attrs._original_x = prev_pool.attrs._x
     ret.attrs._kernel = prev_pool.attrs._kernel
     ret.attrs._stride = prev_pool.attrs._stride
     ret.attrs._padding = prev_pool.attrs._padding
     return ret
예제 #26
0
 def _oper_gpu(cls, x, rois, ch, h, w, n_rois, outh, outw, spatial_scale):
     z = GPUValue(shape=(n_rois, ch, outh, outw))
     argmax_data = z.empty_like_me()
     rois = get_gpu(rois)
     cu.curoi_pool2d_forward(rois, get_gpu(x), spatial_scale, ch, h, w,
                             outh, outw, z, argmax_data)
     ret = cls._create_node(z)
     ret.attrs._index = argmax_data
     ret.attrs._x = x
     ret.attrs._rois = rois
     ret.attrs._outh = outh
     ret.attrs._outw = outw
     ret.attrs._spatial_scale = spatial_scale
     return ret
예제 #27
0
    def _oper_gpu(cls, x, pz, ps, w, wr, b):
        if ps is None:
            tmp = GPUValue(shape=(x.shape[0], w.shape[1] // 4))
            s_p = tmp.zeros_like_me()
            z_p = tmp.zeros_like_me()
        else:
            s_p = ps
            z_p = get_gpu(pz)

        u = dot(x, w) + dot(z_p, wr)
        if b is not None:
            u += b

        z = get_gpu(z_p).empty_like_me()
        state = get_gpu(s_p).empty_like_me()

        cu.culstm_forward_activate(get_gpu(u))
        cu.culstm_forward(get_gpu(u), get_gpu(state), get_gpu(s_p), get_gpu(z))

        ret = cls._create_node(z)

        ret.attrs._x = x
        ret.attrs._w = w
        ret.attrs._wr = wr
        ret.attrs._b = b
        ret.attrs._pz = pz
        ret.attrs._u = u
        ret.attrs._pstate = s_p
        ret.attrs._state = state
        ret._state = state

        if isinstance(pz, Node):
            pz.attrs._pfgate = u

        return ret
예제 #28
0
 def _oper_gpu(cls, arg, axis=None, keepdims=False):
     if isinstance(axis, (int, tuple, type(None))):
         if isinstance(axis, tuple):
             size = 1
             for r in range(len(arg.shape)):
                 if r in axis:
                     size *= arg.shape[r]
         else:
             size = np.size(arg, axis)
         if not keepdims:
             if axis is None:
                 newshape = ()
             elif isinstance(axis, tuple):
                 temp_l = []
                 for r in range(len(arg.shape)):
                     if r not in axis:
                         temp_l.append(arg.shape[r])
                 newshape = tuple(temp_l)
             else:
                 newshape = arg.shape[:axis] + arg.shape[axis + 1:]
         else:
             axis_list = list(arg.shape)
             if axis is None:
                 newshape = tuple([1 for e in list(axis_list)])
             elif isinstance(axis, tuple):
                 for e in axis:
                     axis_list[e] = 1
                 newshape = tuple(axis_list)
             else:
                 axis_list[axis] = 1
                 newshape = tuple(axis_list)
         ret = GPUValue(shape=newshape)
         cudiv(cusum(get_gpu(arg), axis=axis, keepdims=keepdims), size, ret)
     return ret
예제 #29
0
 def _backward_gpu(self, context, dy, **kwargs):
     axis = self.attrs._axis
     args = get_gpu(dy).split(self.attrs._index, axis=axis)
     for i in range(len(self.attrs._index) + 1):
         arg = getattr(self.attrs, "_arg%d" % i)
         if isinstance(arg, Node):
             arg._update_diff(context, args[i], **kwargs)
예제 #30
0
    def _oper_gpu(cls, args, axis):
        newshape = args[0].shape[:axis] + \
            (np.sum([a.shape[axis] for a in args]), ) + args[0].shape[axis + 1:]

        ret = GPUValue(shape=newshape)
        cuconcat([get_gpu(a) for a in args], ret, axis)
        return ret