xvar = x2mean - xmean * xmean norm_x = (x - xmean) / jt.sqrt(xvar + self.eps) self.running_mean += (xmean.sum([0, 2, 3]) - self.running_mean) * self.momentum self.running_var += (xvar.sum([0, 2, 3]) - self.running_var) * self.momentum else: running_mean = self.running_mean.broadcast(x, [0, 2, 3]) running_var = self.running_var.broadcast(x, [0, 2, 3]) norm_x = (x - running_mean) / jt.sqrt(running_var + self.eps) w = self.weight.broadcast(x, [0, 2, 3]) b = self.bias.broadcast(x, [0, 2, 3]) return norm_x * w + b Relu = jt.make_module(relu) ReLU = Relu Leaky_relu = jt.make_module(leaky_relu, 0.01) LeakyReLU = Leaky_relu ReLU6 = jt.make_module(relu6) Softmax = jt.make_module(softmax, 2) class Conv(Module): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1,
f32 = jt.float32 def matmul(a, b): (n, m), k = a.shape, b.shape[-1] a = a.broadcast([n, m, k], dims=[2]) b = b.broadcast([n, m, k], dims=[0]) return (a * b).sum(dim=1) def relu(x): return jt.maximum(x, 0.0) Relu = jt.make_module(relu) class Model(Module): def __init__(self, input_size): self.linear1 = Linear(input_size, 10) self.relu1 = Relu() self.linear2 = Linear(10, 1) def execute(self, x): x = self.linear1(x) x = self.relu1(x) return self.linear2(x) class Linear(Module):
if x.ndim == 4: output_shape = x.shape assert C % self.num_groups == 0 x = x.reshape((N, self.num_groups, int(C / self.num_groups), -1)) xmean = jt.mean(x, dims=[2, 3], keepdims=1) x2mean = jt.mean(x * x, dims=[2, 3], keepdims=1) xvar = jt.maximum(x2mean - xmean * xmean, 0) norm_x = (x - xmean) / jt.sqrt(xvar + self.eps) if not self.affine: return norm_x.reshape(output_shape) w = self.weight.reshape((1, self.num_groups, C // self.num_groups, 1)) b = self.bias.reshape((1, self.num_groups, C // self.num_groups, 1)) return (norm_x * w + b).reshape(output_shape) Relu = jt.make_module(relu) ReLU = Relu Leaky_relu = jt.make_module(leaky_relu, 2) LeakyReLU = Leaky_relu ReLU6 = jt.make_module(relu6) Softmax = jt.make_module(softmax, 2) GELU = jt.make_module(gelu) class Conv(Module): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0,
if self.is_train: xmean = jt.mean(x, dims=[0,2,3], keepdims=1) x2mean = jt.mean(x*x, dims=[0,2,3], keepdims=1) xvar = x2mean-xmean*xmean norm_x = (x-xmean)/jt.sqrt(xvar+self.eps) self.running_mean += (xmean.sum([0,2,3])-self.running_mean)*self.momentum self.running_var += (xvar.sum([0,2,3])-self.running_var)*self.momentum else: running_mean = self.running_mean.broadcast(x, [0,2,3]) running_var = self.running_var.broadcast(x, [0,2,3]) norm_x = (x-running_mean)/jt.sqrt(running_var+self.eps) w = self.weight.broadcast(x, [0,2,3]) b = self.bias.broadcast(x, [0,2,3]) return norm_x * w + b Relu = jt.make_module(relu) ReLU = Relu Leaky_relu = jt.make_module(leaky_relu, 2) Softmax = jt.make_module(softmax, 2) class Conv(Module): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): assert groups == 1 self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size) self.stride = stride if isinstance(stride, tuple) else (stride, stride) self.padding = padding if isinstance(padding, tuple) else (padding, padding) self.dilation = dilation if isinstance(dilation, tuple) else (dilation, dilation) Kh, Kw = self.kernel_size