def backward(self, gy): xp = get_array_module(gy) if jhML.ProgramConfig.train: gx = gy * self.mask / (1.0 - self.dropout_ratio) return gx else: return gy
def logsumexp(self, x, axis=1): xp = get_array_module(x) m = x.max(axis=axis, keepdims=True) y = x - m xp.exp(y, out=y) s = y.sum(axis=axis, keepdims=True) xp.log(s, out=s) m += s return m
def backward(self, gy): x = self.inputs[0].data if self.axes is None: inv_axes = range(x.ndim)[::-1] else: xp = get_array_module(gy) inv_axes = tuple(xp.argsort([ax for ax in self.axes])) gx = gy.transpose(self.axes) return gx
def forward(self, x): xp = get_array_module(x) m = x.max(axis=self.axis, keepdims=True) y = x - m xp.exp(y, out=y) s = y.sum(axis=self.axis, keepdims=True) xp.log(s, out=s) log_z = m + s y = x - log_z return y
def update_one(self, param: Parameter): xp = get_array_module(param.data) key = id(param) if key not in self.h: self.h[key] = xp.zeros_like(param.data) if self.weight_decay != 0.0: param.grad += self.weight_decay * param.data self.h[key] = self.h[key] + param.grad * param.grad param.data -= self.lr * param.grad / (self.eps + xp.sqrt(self.h[key]))
def update_one(self, param: Parameter): xp = get_array_module(param.data) if self.weight_decay != 0.0: param.grad += self.weight_decay * param.data key = id(param) if key not in self.v: self.v[key] = xp.zeros_like(param.data) self.v[key] = self.m * self.v[key] - self.lr * param.grad param.data += self.v[key]
def forward(self, x): xp = get_array_module(x) if jhML.ProgramConfig.train: self.mask = (xp.random.rand(*x.shape) > self.dropout_ratio).astype( x.dtype) #self.scale = xp.array(1.0-self.dropout_ratio).astype(x.dtype) y = x * self.mask / (1.0 - self.dropout_ratio) return y else: return x
def forward(self, x, gt): xp = get_array_module(x) N = x.shape[0] log_z = self.logsumexp(x) log_p = x - log_z log_p = log_p[xp.arange(N), gt.ravel()] if self.weight is not None: weight = self.weights.astype(x.dtype)[gt.ravel()] log_p = weight * log_p y = -log_p.sum() / xp.float32(N) return y
def backward(self, gy): xp = get_array_module(gy) x, t = self.inputs[0].data, self.inputs[1].data N, num_class = x.shape gy *= 1 / N y = softmax(x) t_onehot = xp.eye(num_class, dtype=t.dtype)[t.ravel()] gx = (y.data - t_onehot) * gy if self.weight is not None: weight = self.weights.astype(x.dtype)[t.ravel()] gx = gx * weight return gx
def forward(self, x): xp = get_array_module(x) y = xp.log(x) return y
def argmax(x: Variable, axis=1): xp = get_array_module(x.data) return xp.argmax(x.data, axis=axis)
def forward(self, x): xp = get_array_module(x) y = xp.clip(x, self.x_min, self.x_max) return y
def backward(self, gy): xp = get_array_module(gy) x = self.inputs[0].data gx = xp.zeros(x.shape, dtype=gy.dtype) xp.add.at(gx, self.slices, gy) return gx
def backward(self, gy): xp = get_array_module(gy) x = self.inputs[0].data gy = self.reshape_sum_backward(gy) gx = xp.broadcast_to(gy, x.shape) return gx
def backward(self, gy): xp = get_array_module(gy) y = self.outputs[0]().data gx = gy - xp.exp(y) * gy.sum(axis=self.axis, keepdims=True) return gx
def forward(self, x): xp = get_array_module(x) y = x - x.max(axis=self.axis, keepdims=True) y = xp.exp(y) y /= y.sum(axis=self.axis, keepdims=True) return y
def backward(self, gy): xp = get_array_module(gy) x = self.inputs[0].data gx = gy * -1 * xp.sin(x) return gx
def forward(self, x): xp = get_array_module(x) y = xp.maximum(x, 0.0) return y
def forward(self, x): xp = get_array_module(x) y = xp.tanh(x * 0.5) * 0.5 + 0.5 return y
def forward(self, x): xp = get_array_module(x) y = xp.broadcast_to(x, self.y_shape) return y