def _backward_gpu(self, context, dy): ldy, rdy = np.hsplit(get_gpu(dy).new_array(), [self.attrs._index]) if isinstance(self.attrs._lhs, Node): self.attrs._lhs._update_diff(context, GPUValue(ldy)) if isinstance(self.attrs._rhs, Node): self.attrs._rhs._update_diff(context, GPUValue(rdy))
def _oper_gpu(cls, x, pz, ps, w, wr, b): if ps is None: tmp = GPUValue(shape=(x.shape[0], w.shape[1] // 4)) s_p = tmp.zeros_like_me() z_p = tmp.zeros_like_me() else: s_p = ps z_p = get_gpu(pz) u = dot(x, w) + dot(z_p, wr) + b z = get_gpu(z_p).empty_like_me() state = get_gpu(s_p).empty_like_me() cu.culstm_forward_activate(get_gpu(u)) cu.culstm_forward(get_gpu(u), get_gpu(state), get_gpu(s_p), get_gpu(z)) ret = cls._create_node(z) ret.attrs._x = x ret.attrs._w = w ret.attrs._wr = wr ret.attrs._b = b ret.attrs._pz = pz ret.attrs._u = u ret.attrs._pstate = s_p ret.attrs._state = state ret._state = state if isinstance(pz, Node): pz.attrs._pfgate = u return ret
def _oper_gpu(cls, x, pz, ps, w, wr, wc, b): if ps is None: s_p = GPUValue(shape=(x.shape[0], w.shape[1] // 4)).zeros_like_me() z_p = s_p.zeros_like_me() else: s_p, z_p = map(get_gpu, (ps, pz)) s = s_p.empty_like_me() u = get_gpu(op.dot(x, w) + op.dot(z_p, wr) + b) z = z_p.zeros_like_me() cu.cupeepholelstm_forward(u, get_gpu(wc), s_p, s, z) ret = cls._create_node(z) ret.attrs._x = x ret.attrs._w = w ret.attrs._wr = wr ret.attrs._wc = wc ret.attrs._b = b ret.attrs._u = u ret.attrs._pz = pz ret.attrs._pstate = ps ret.attrs._state = s if isinstance(pz, Node): pz.attrs._pfgate = u return ret
def _oper_gpu(cls, x, pz, ps, parameter): p = parameter if ps is None: tmp = GPUValue(shape=(x.shape[0], p["w"].shape[1] // 4)) s_p = tmp.zeros_like_me() z_p = tmp.zeros_like_me() else: s_p = ps z_p = get_gpu(pz) u = dot(x, p["w"]) + dot(z_p, p["wr"]) + p["b"] z = get_gpu(z_p).empty_like_me() state = get_gpu(s_p).empty_like_me() cu.culstm_forward_activate(get_gpu(u)) cu.culstm_forward(get_gpu(u), get_gpu(state), get_gpu(s_p), get_gpu(z)) ret = cls._create_node(z) ret.attrs._x = x ret.attrs._p = parameter ret.attrs._u = u ret.attrs._pstate = s_p ret.attrs._state = state ret.attrs._dt_d = [p[k] for k in ["wr", "w"]] ret._state = state if isinstance(pz, Node): pz.attrs._pfgate = u return ret
def test_negate_float(): arr = np.array(np.random.rand(4, )) v = GPUValue(arr) dest = v.empty_like_me() thrust_float.negate(v, dest) assert np.allclose(arr * -1, dest.to_array())
def _oper_gpu(cls, arg, axis=None): if axis is None: return GPUValue(precision(cusum(get_gpu(arg)))) elif axis == 0: new_shape = tuple( [arg.shape[i] for i in range(len(arg.shape)) if not i == axis]) ret = GPUValue(np.zeros(shape=new_shape, dtype=arg.dtype)) cusum(get_gpu(arg), get_gpu(ret), axis=axis) return ret else: a_cpu = get_gpu(arg).new_array() ret = GPUValue(np.sum(a_cpu, axis=axis)) return ret
def _backward_gpu(self, context, dy, **kwargs): lhs = self.attrs._lhs rhs = self.attrs._rhs if isinstance(self.attrs._lhs, Node): new_shape = lhs.shape ldx = GPUValue(shape=new_shape) cublas_gemm(get_gpu(dy), 0, get_gpu(rhs), 1, get_gpu(ldx)) self.attrs._lhs._update_diff(context, ldx, **kwargs) if isinstance(self.attrs._rhs, Node): new_shape = rhs.shape rdx = GPUValue(shape=new_shape) cublas_gemm(get_gpu(lhs), 1, get_gpu(dy), 0, get_gpu(rdx)) self.attrs._rhs._update_diff(context, rdx, **kwargs)
def _oper_gpu(cls, x, w): z = GPUValue(shape=(len(x), len(w[0]))) cu.cuembedding_forward(get_gpu(x), get_gpu(w), z) ret = cls._create_node(z) ret.attrs._x = x ret.attrs._w = w return ret
def _oper_gpu(cls, args, axis): newshape = args[0].shape[:axis] + \ (np.sum([a.shape[axis] for a in args]), ) + args[0].shape[axis + 1:] ret = GPUValue(shape=newshape) cuconcat([get_gpu(a) for a in args], ret, axis) return ret
def _oper_gpu(cls, lhs, rhs): axis = 1 newshape = lhs.shape[:axis] + ( lhs.shape[axis] + rhs.shape[axis], ) + lhs.shape[axis + 1:] ret = GPUValue(shape=newshape) cuconcat(get_gpu(lhs), get_gpu(rhs), ret, axis) return ret
def _oper_gpu(cls, x, in_shape, out_shape, karnel, stride, padding): N = x.shape[0] pool_desc = cu.createPoolingDescriptor(karnel, padding, stride, pool_mode=1) y = GPUValue(shape=tuple([N, ] + list(out_shape))) with cu.cudnn_handler() as handle: cu.cuPoolingForward(handle, pool_desc, x, y) ret = cls._create_node(y) ret.attrs._pool_desc = pool_desc ret.attrs._x = x return ret
def _oper_gpu(cls, x, drop_out_ratio): shape = (x.shape[0], x.shape[1], 1, 1) mask = GPUValue(shape=shape) curand_generator().rand_bernoulli(mask, 1 - drop_out_ratio) mask = mask / drop_out_ratio mask = mask * get_gpu(x).ones_like_me() value = get_gpu(x) * get_gpu(mask) ret = cls._create_node(value) ret.attrs._x = x ret.attrs._mask = mask return ret
def _backward_gpu(self, context, dy, **kwargs): n, m = dy.shape w = self.attrs._w wr = self.attrs._wr wc = self.attrs._wc b = self.attrs._b u = self.attrs._u s = self.attrs._state ps = get_gpu(s).zeros_like_me( ) if self.attrs._pstate is None else self.attrs._pstate dot = context.restore(w, get_gpu(dy).zeros_like_me()) drt = context.restore(wr, get_gpu(u).zeros_like_me()) pfg = getattr(self.attrs, "_pfgate", get_gpu(u).zeros_like_me()) dr = get_gpu(drt).empty_like_me() dwc = GPUValue(shape=(n, m * 3)) dou = get_gpu(dot).empty_like_me() cu.cupeepholelstm_backward( *map(get_gpu, (u, ps, s, pfg, wc, dy, drt, dot, dr, dou, dwc))) context.store(wr, dr) context.store(w, dou) if isinstance(self.attrs._x, Node): dx = op.dot(dr, w.T) self.attrs._x._update_diff(context, dx) if isinstance(w, Node): w._update_diff(context, op.dot(self.attrs._x.T, dr)) if isinstance(wr, Node): wr._update_diff(context, op.dot(self.T, drt)) if isinstance(wc, Node): wc._update_diff(context, op.sum(dwc, axis=0)) if isinstance(b, Node): b._update_diff(context, op.sum(dr, axis=0)) if isinstance(self.attrs._pz, Node): self.attrs._pz._update_diff(context, op.dot(dr, wr.T))
def _oper_gpu(cls, x, w, b, in_shape, out_shape, kernel, stride, padding): conv_desc = cu.ConvolutionDescriptor(padding, stride, precision) filter_desc = cu.FilterDescriptor(w.shape, precision) N = x.shape[0] # TODO: dirty code z = GPUValue(shape=tuple([N, ] + list(out_shape))) with cu.cudnn_handler() as handle: cu.cuConvolutionBackwardData(handle, conv_desc, filter_desc, w, x, z) if b is not None: cu.cu_add_bias(get_gpu(b), z) ret = cls._create_node(z) ret.attrs._conv_desc = conv_desc ret.attrs._filter_desc = filter_desc ret.attrs._x = x ret.attrs._w = w ret.attrs._b = b return ret
def _oper_gpu(cls, x, w, b, in_shape, out_shape, kernel, stride, padding): N = x.shape[0] conv_desc = cu.createConvplutionDescriptor(padding, stride, precision) filter_desc = cu.createFilterDescriptor(w.shape, precision) # TODO: dirty code y = GPUValue(shape=tuple([N, ] + list(out_shape))) with cu.cudnn_handler() as handle: cu.cuConvolutionForward(handle, conv_desc, filter_desc, x, w, y) if b is not None: cu.cuadd(get_gpu(y), get_gpu(b), get_gpu(y)) ret = cls._create_node(y) ret.attrs._conv_desc = conv_desc ret.attrs._filter_desc = filter_desc ret.attrs._x = x ret.attrs._w = w ret.attrs._b = b return ret
def _oper_gpu(cls, x, w, b, in_shape, out_shape, kernel, stride, padding): N = x.shape[0] conv_desc = cu.ConvolutionDescriptor(padding, stride, precision) filter_desc = cu.FilterDescriptor(w.shape, precision) y = GPUValue(shape=tuple([ N, ] + list(out_shape))) with cu.cudnn_handler() as handle: cu.cuConvolutionForward(handle, conv_desc, filter_desc, x, w, y) if b is not None: cu.cu_add_bias(get_gpu(b), y) # assert type(x) is not np.ndarray ret = cls._create_node(y) ret.attrs._conv_desc = conv_desc ret.attrs._filter_desc = filter_desc ret.attrs._x = x ret.attrs._w = w ret.attrs._b = b return ret
def _oper_gpu(cls, arg): ret = GPUValue(shape=arg.shape) cupow(get_gpu(arg), 2, ret) return ret
def _oper_gpu(cls, condition, a, b): a_cpu = getattr(get_gpu(a), "new_array()", a) b_cpu = getattr(get_gpu(b), "new_array()", b) ret = GPUValue(np.where(condition, a_cpu, b_cpu)) return ret
def _oper_gpu(cls, lhs, rhs): new_shape = (lhs.shape[0], rhs.shape[1]) ret = GPUValue(shape=new_shape) cublas_gemm(get_gpu(lhs), 0, get_gpu(rhs), 0, get_gpu(ret)) return ret
def _oper_gpu(cls, lhs, rhs): a_cpu = get_gpu(lhs).new_array() b_cpu = get_gpu(rhs).new_array() ret = GPUValue(np.hstack((a_cpu, b_cpu))) return ret
def _oper_gpu(cls, arg): ret = GPUValue(arg) cusqrt(get_gpu(arg), ret) return ret