def _oper_gpu(cls, x, w, b, in_shape, kernel, stride, padding): conv_desc = cu.ConvolutionNDescriptor(padding, stride, precision) filter_desc = cu.NdFilterDescriptor(w.shape, precision) output_shape = [x.shape[0], w.shape[0]] for i in range(len(x.shape[2:])): output_shape.append( (x.shape[i + 2] + padding[i] * 2 - kernel[i]) // stride[i] + 1) y = GPUValue(shape=tuple(output_shape)) with cu.cudnn_handler() as handle: cu.cuConvolutionForward(handle, conv_desc, filter_desc, get_gpu(x), get_gpu(w), y) if b is not None: cu.cu_add_bias(get_gpu(b), y) # assert type(x) is not np.ndarray ret = cls._create_node(y) ret.attrs._conv_desc = conv_desc ret.attrs._filter_desc = filter_desc ret.attrs._x = x ret.attrs._w = w ret.attrs._b = b return ret
def _oper_gpu(cls, arg, alpha): z = get_gpu(arg).empty_like_me() cu.cueru_forward(alpha, get_gpu(arg), z) ret = cls._create_node(z) ret.attrs._arg = arg ret.attrs._alpha = alpha return ret
def _oper_gpu(cls, arg, beta): z = get_gpu(arg).empty_like_me() cu.cuswish_forward(beta, get_gpu(arg), z) ret = cls._create_node(z) ret.attrs._arg = arg ret.attrs._beta = beta return ret
def _get_gpu(self, dy, node): node_id = id(node) pdy = self._params.get(node_id, None) if pdy is None: moment1 = get_gpu(dy).zeros_like_me() moment2 = get_gpu(dy).zeros_like_me() running_beta1 = self._beta1 running_beta2 = self._beta2 time = 1 else: moment1 = pdy['moment1'] moment2 = pdy['moment2'] time = pdy['time'] + 1 # Performs (beta_1 ** (t - 1)) * (beta_1 ** 1) as replacement for beta_1 ** t running_beta1 = pdy['running_beta1'] * self._beta1 running_beta2 = pdy['running_beta2'] * self._beta2 ndy = get_gpu(dy).empty_like_me() cu.cu_optimizer_adamax(self._alpha, self._epsilon, (self._beta1, running_beta1), (self._beta2, running_beta2), moment1, moment2, get_gpu(dy), ndy) self._params[node_id] = { 'moment1': moment1, 'moment2': moment2, 'time': time, 'running_beta1': running_beta1, 'running_beta2': running_beta2, } ret = ndy return ret
def _oper_gpu(cls, arg, slope): z = get_gpu(arg).empty_like_me() cu.culeaky_leru_forward(slope, get_gpu(arg), z) ret = cls._create_node(z) ret.attrs._arg = arg ret.attrs._slope = slope return ret
def _oper_gpu(cls, x, w, b, in_shape, out_shape, kernel, stride, padding, dilation): N = x.shape[0] conv_desc = cu.ConvolutionDescriptor(padding, stride, dilation, precision) filter_desc = cu.FilterDescriptor(w.shape, precision) y = GPUValue(shape=tuple([ N, ] + list(out_shape))) with cu.cudnn_handler() as handle: cu.cuConvolutionForward(handle, conv_desc, filter_desc, get_gpu(x), get_gpu(w), y) if b is not None: cu.cu_add_bias(get_gpu(b), y) # assert type(x) is not np.ndarray ret = cls._create_node(y) ret.attrs._conv_desc = conv_desc ret.attrs._filter_desc = filter_desc ret.attrs._x = x ret.attrs._w = w ret.attrs._b = b ret.attrs._in_shape = in_shape ret.attrs._out_shape = out_shape ret.attrs._kernel = kernel ret.attrs._stride = stride ret.attrs._padding = padding ret.attrs._dilation = dilation return ret
def _oper_gpu(cls, lhs, rhs): new_shape = (lhs.shape[0], rhs.shape[1]) ret = GPUValue(shape=new_shape) cublas_gemm(get_gpu(lhs), 0, get_gpu(rhs), 0, get_gpu(ret)) return ret
def _backward_gpu(self, context, dy, **kwargs): if isinstance(self.attrs._x, Node): dx = get_gpu(self).empty_like_me() with cu.cudnn_handler() as handle: cu.cuLocalResponseNormalizationBackward( handle, self.attrs._lrn_desc, get_gpu(self.attrs._x), get_gpu(self), dx, get_gpu(dy)) self.attrs._x._update_diff(context, dx, **kwargs)
def _oper_gpu(cls, x, pz, ps, w, wr, wc, b): if ps is None: s_p = GPUValue(shape=(x.shape[0], w.shape[1] // 4)).zeros_like_me() z_p = s_p.zeros_like_me() else: s_p, z_p = map(get_gpu, (ps, pz)) s = s_p.empty_like_me() u = op.dot(x, w) + op.dot(z_p, wr) if b is not None: u += b u = get_gpu(u) z = z_p.zeros_like_me() cu.cupeepholelstm_forward(u, get_gpu(wc), s_p, s, z) ret = cls._create_node(z) ret.attrs._x = x ret.attrs._w = w ret.attrs._wr = wr ret.attrs._wc = wc ret.attrs._b = b ret.attrs._u = u ret.attrs._pz = pz ret.attrs._pstate = ps ret.attrs._state = s if isinstance(pz, Node): pz.attrs._pfgate = u return ret
def _oper_gpu(cls, x, w): z = GPUValue(shape=(len(x), len(w[0]))) cu.cuembedding_forward(get_gpu(x), get_gpu(w), z) ret = cls._create_node(z) ret.attrs._x = x ret.attrs._w = w return ret
def _backward_gpu(self, context, dy, **kwargs): if isinstance(self.attrs._rhs, Node): self.attrs._rhs._update_diff(context, -dy * self.attrs._log_lhs, **kwargs) if isinstance(self.attrs._lhs, Node): self.attrs._lhs._update_diff( context, -dy * get_gpu(self.attrs._rhs) / get_gpu(self.attrs._lhs), **kwargs)
def _backward_gpu(self, context, dy, **kwargs): if isinstance(self.attrs._arg, Node): alpha = self.attrs._alpha lmda = self.attrs._lmda dx = get_gpu(self.attrs._arg).empty_like_me() cu.cueru_backward(alpha, get_gpu(self.attrs._arg), dx) self.attrs._arg._update_diff(context, dx * get_gpu(dy) * lmda, **kwargs)
def _oper_gpu(cls, lhs, rhs, reduce_sum=True): assert len( rhs.shape) > 1, "Input arrays must have no less than 2 dimension." N = len(lhs) if reduce_sum: return cu.cusum((get_gpu(lhs) - get_gpu(rhs))**2) / (N * 2) else: return ((get_gpu(lhs) - get_gpu(rhs))**2) / (N * 2)
def _backward_gpu(self, context, dy, **kwargs): dx = get_gpu(self.attrs._x).empty_like_me() with cu.cudnn_handler() as handle: cu.cuPoolingBackward(handle, self.attrs._pool_desc, get_gpu(self.attrs._x), get_gpu(self), get_gpu(dy), dx) if isinstance(self.attrs._x, Node): self.attrs._x._update_diff(context, dx, **kwargs)
def _get_gpu(self, dy, node): node_id = id(node) pdy = self._params.get(node_id, get_gpu(dy).zeros_like_me()) ndy = get_gpu(dy).empty_like_me() r = get_gpu(pdy).empty_like_me() cu.cu_optimizer_adagrad(self._lr, self._epsilon, get_gpu(dy), get_gpu(pdy), ndy, r) self._params[node_id] = r return ndy
def _oper_gpu(cls, x, n, k, a, b): lrn_desc = cu.LRNDescriptor(n, a, b, k) y = get_gpu(x).empty_like_me() with cu.cudnn_handler() as handle: cu.cuLocalResponseNormalizationForward(handle, lrn_desc, get_gpu(x), get_gpu(y)) ret = cls._create_node(y) ret.attrs._x = x ret.attrs._lrn_desc = lrn_desc return ret
def _oper_gpu(cls, x, dropout_ratio): mask = get_gpu(x).empty_like_me() curand_generator().rand_bernoulli(mask, 1 - dropout_ratio) mask = mask / dropout_ratio value = get_gpu(x) * mask ret = cls._create_node(value) ret.attrs._x = x ret.attrs._mask = mask return ret
def _backward_gpu(self, context, dy, **kwargs): if isinstance(self.attrs._lhs, Node): N = len(self.attrs._lhs) clip = self.attrs._clip sub = get_gpu(self.attrs._lhs) - get_gpu(self.attrs._rhs) dx = sub * get_gpu(dy) cu.cumin(clip[1], dx, dx) cu.cumax(clip[0], dx, dx) self.attrs._lhs._update_diff(context, dx / N, **kwargs)
def _backward_gpu(self, context, dy, **kwargs): if isinstance(self.attrs._arg, Node): with cu.cudnn_handler() as handle: dx = get_gpu(self).empty_like_me() cu.cuSoftmaxBackward(handle, get_gpu(self), get_gpu(dy), dx, mode=1) self.attrs._arg._update_diff(context, dx, **kwargs)
def _get_gpu(self, dy, node): node_id = id(node) pdy = self._params.get(node_id, get_gpu(dy).zeros_like_me()) ndy = get_gpu(dy).empty_like_me() cu.cu_optimizer_sgd(self._lr, self._momentum, get_gpu(dy), get_gpu(pdy), ndy) if self._momentum > 0: self._params[node_id] = ndy return ndy
def _oper_gpu(cls, lhs, rhs, reduce_sum): log_lhs = log(lhs + 1e-8) if reduce_sum: ret = cls._create_node(-cu.cusum(get_gpu(log_lhs * rhs))) else: ret = cls._create_node(-get_gpu(log_lhs * rhs)) ret.attrs._log_lhs = log_lhs ret.attrs._rhs = rhs ret.attrs._lhs = lhs return ret
def _backward_gpu(self, context, dy, **kwargs): if isinstance(self.attrs._a, Node): ldy = get_gpu(self.attrs._a).zeros_like_me() ldy[self.attrs._condition] = dy[self.attrs._condition] self.attrs._a._update_diff(context, ldy, **kwargs) if isinstance(self.attrs._b, Node): rdy = get_gpu(self.attrs._b).zeros_like_me() rdy[- self.attrs._condition] = dy[- self.attrs._condition] self.attrs._b._update_diff(context, rdy, **kwargs)
def _oper_gpu(cls, x, drop_out_ratio): shape = (x.shape[0], x.shape[1], 1, 1) mask = GPUValue(shape=shape) curand_generator().rand_bernoulli(mask, 1 - drop_out_ratio) mask = mask / drop_out_ratio mask = mask * get_gpu(x).ones_like_me() value = get_gpu(x) * get_gpu(mask) ret = cls._create_node(value) ret.attrs._x = x ret.attrs._mask = mask return ret
def _backward_gpu(self, context, dy, **kwargs): norm = self.attrs._norm if isinstance(self.attrs._x, Node): dx = dy * norm - (rm.sum(self.attrs._x * dy, axis=1, keepdims=True) * self.attrs._x) / norm dx = dx / (norm * norm) self.attrs._x._update_diff(context, get_gpu(dx * self.attrs._w), **kwargs) if isinstance(self.attrs._w, Node): dl = dy * (self.attrs._x / norm) self.attrs._w._update_diff(context, get_gpu(rm.sum(dl.as_ndarray(), axis=(0, 2, 3), keepdims=True)), **kwargs)
def _oper_gpu(cls, x, prev_pool): dx = GPUValue(shape=prev_pool.attrs._x.shape) with cu.cudnn_handler() as handle: cu.cuPoolingBackward(handle, prev_pool.attrs._pool_desc, get_gpu( prev_pool.attrs._x), get_gpu(prev_pool), get_gpu(x), dx) ret = cls._create_node(dx) ret.attrs._x = x ret.attrs._original_x = prev_pool.attrs._x ret.attrs._kernel = prev_pool.attrs._kernel ret.attrs._stride = prev_pool.attrs._stride ret.attrs._padding = prev_pool.attrs._padding return ret
def _oper_gpu(cls, x, rois, ch, h, w, n_rois, outh, outw, spatial_scale): z = GPUValue(shape=(n_rois, ch, outh, outw)) argmax_data = z.empty_like_me() rois = get_gpu(rois) cu.curoi_pool2d_forward(rois, get_gpu(x), spatial_scale, ch, h, w, outh, outw, z, argmax_data) ret = cls._create_node(z) ret.attrs._index = argmax_data ret.attrs._x = x ret.attrs._rois = rois ret.attrs._outh = outh ret.attrs._outw = outw ret.attrs._spatial_scale = spatial_scale return ret
def _oper_gpu(cls, x, pz, ps, w, wr, b): if ps is None: tmp = GPUValue(shape=(x.shape[0], w.shape[1] // 4)) s_p = tmp.zeros_like_me() z_p = tmp.zeros_like_me() else: s_p = ps z_p = get_gpu(pz) u = dot(x, w) + dot(z_p, wr) if b is not None: u += b z = get_gpu(z_p).empty_like_me() state = get_gpu(s_p).empty_like_me() cu.culstm_forward_activate(get_gpu(u)) cu.culstm_forward(get_gpu(u), get_gpu(state), get_gpu(s_p), get_gpu(z)) ret = cls._create_node(z) ret.attrs._x = x ret.attrs._w = w ret.attrs._wr = wr ret.attrs._b = b ret.attrs._pz = pz ret.attrs._u = u ret.attrs._pstate = s_p ret.attrs._state = state ret._state = state if isinstance(pz, Node): pz.attrs._pfgate = u return ret
def _oper_gpu(cls, arg, axis=None, keepdims=False): if isinstance(axis, (int, tuple, type(None))): if isinstance(axis, tuple): size = 1 for r in range(len(arg.shape)): if r in axis: size *= arg.shape[r] else: size = np.size(arg, axis) if not keepdims: if axis is None: newshape = () elif isinstance(axis, tuple): temp_l = [] for r in range(len(arg.shape)): if r not in axis: temp_l.append(arg.shape[r]) newshape = tuple(temp_l) else: newshape = arg.shape[:axis] + arg.shape[axis + 1:] else: axis_list = list(arg.shape) if axis is None: newshape = tuple([1 for e in list(axis_list)]) elif isinstance(axis, tuple): for e in axis: axis_list[e] = 1 newshape = tuple(axis_list) else: axis_list[axis] = 1 newshape = tuple(axis_list) ret = GPUValue(shape=newshape) cudiv(cusum(get_gpu(arg), axis=axis, keepdims=keepdims), size, ret) return ret
def _backward_gpu(self, context, dy, **kwargs): axis = self.attrs._axis args = get_gpu(dy).split(self.attrs._index, axis=axis) for i in range(len(self.attrs._index) + 1): arg = getattr(self.attrs, "_arg%d" % i) if isinstance(arg, Node): arg._update_diff(context, args[i], **kwargs)
def _oper_gpu(cls, args, axis): newshape = args[0].shape[:axis] + \ (np.sum([a.shape[axis] for a in args]), ) + args[0].shape[axis + 1:] ret = GPUValue(shape=newshape) cuconcat([get_gpu(a) for a in args], ret, axis) return ret