def __init__(self, sz, eps=1e-5, track_running_stats=False, training=False, momentum=0.1): self.eps, self.track_running_stats, self.training, self.momentum = eps, track_running_stats, training, momentum self.weight, self.bias = Tensor.ones(sz), Tensor.zeros(sz) self.running_mean, self.running_var = Tensor.zeros(sz, requires_grad=False), Tensor.ones(sz, requires_grad=False) self.num_batches_tracked = Tensor.zeros(1, requires_grad=False)
def __init__(self, kernel_size, strides, expand_ratio, input_filters, output_filters, se_ratio, has_se): oup = expand_ratio * input_filters if expand_ratio != 1: self._expand_conv = Tensor.uniform(oup, input_filters, 1, 1) self._bn0 = BatchNorm2D(oup) else: self._expand_conv = None self.strides = strides if strides == (2,2): self.pad = [(kernel_size-1)//2-1, (kernel_size-1)//2]*2 else: self.pad = [(kernel_size-1)//2]*4 self._depthwise_conv = Tensor.uniform(oup, 1, kernel_size, kernel_size) self._bn1 = BatchNorm2D(oup) self.has_se = has_se if self.has_se: num_squeezed_channels = max(1, int(input_filters * se_ratio)) self._se_reduce = Tensor.uniform(num_squeezed_channels, oup, 1, 1) self._se_reduce_bias = Tensor.zeros(num_squeezed_channels) self._se_expand = Tensor.uniform(oup, num_squeezed_channels, 1, 1) self._se_expand_bias = Tensor.zeros(oup) self._project_conv = Tensor.uniform(output_filters, oup, 1, 1) self._bn2 = BatchNorm2D(output_filters)
def test_gc(self): a = Tensor.zeros(4, 4, gpu=self.gpu) b = Tensor.zeros(4, 4, gpu=self.gpu) (a * b).mean().backward() assert (Tensor.allocated > 0) del a, b assert (Tensor.allocated == 0)
def test_gc(self): a = Tensor.zeros(4, 4, device=self.device) b = Tensor.zeros(4, 4, device=self.device) (a * b).mean().backward() assert (tensors_allocated() > 0) del a, b assert (tensors_allocated() == 0)
def __init__(self, sz, eps=1e-5, affine=True, track_running_stats=True, momentum=0.1): assert affine == True, "BatchNorm2D is only supported with affine" self.eps, self.track_running_stats, self.momentum = eps, track_running_stats, momentum self.weight, self.bias = Tensor.ones(sz), Tensor.zeros(sz) self.running_mean, self.running_var = Tensor.zeros(sz, requires_grad=False), Tensor.ones(sz, requires_grad=False) self.num_batches_tracked = Tensor.zeros(1, requires_grad=False)
def __init__(self, number=0): self.number = number global_params = [ # width, depth (1.0, 1.0), # b0 (1.0, 1.1), # b1 (1.1, 1.2), # b2 (1.2, 1.4), # b3 (1.4, 1.8), # b4 (1.6, 2.2), # b5 (1.8, 2.6), # b6 (2.0, 3.1), # b7 (2.2, 3.6), # b8 (4.3, 5.3), # l2 ][number] def round_filters(filters): multiplier = global_params[0] divisor = 8 filters *= multiplier new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor) if new_filters < 0.9 * filters: # prevent rounding by more than 10% new_filters += divisor return int(new_filters) def round_repeats(repeats): return int(math.ceil(global_params[1] * repeats)) out_channels = round_filters(32) self._conv_stem = Tensor.zeros(out_channels, 3, 3, 3) self._bn0 = BatchNorm2D(out_channels) blocks_args = [ [1, 3, (1, 1), 1, 32, 16, 0.25], [2, 3, (2, 2), 6, 16, 24, 0.25], [2, 5, (2, 2), 6, 24, 40, 0.25], [3, 3, (2, 2), 6, 40, 80, 0.25], [3, 5, (1, 1), 6, 80, 112, 0.25], [4, 5, (2, 2), 6, 112, 192, 0.25], [1, 3, (1, 1), 6, 192, 320, 0.25], ] self._blocks = [] # num_repeats, kernel_size, strides, expand_ratio, input_filters, output_filters, se_ratio for b in blocks_args: args = b[1:] args[3] = round_filters(args[3]) args[4] = round_filters(args[4]) for n in range(round_repeats(b[0])): self._blocks.append(MBConvBlock(*args)) args[3] = args[4] args[1] = (1, 1) in_channels = round_filters(320) out_channels = round_filters(1280) self._conv_head = Tensor.zeros(out_channels, in_channels, 1, 1) self._bn1 = BatchNorm2D(out_channels) self._fc = Tensor.zeros(out_channels, 1000) self._fc_bias = Tensor.zeros(1000)
def __init__(self, sz, eps=0.001): self.eps = eps self.weight = Tensor.zeros(sz) self.bias = Tensor.zeros(sz) # TODO: need running_mean and running_var self.running_mean = Tensor.zeros(sz) self.running_var = Tensor.zeros(sz) self.num_batches_tracked = Tensor.zeros(1)
def __init__(self, sz, eps=0.001): self.eps = Tensor([eps], requires_grad=False) self.two = Tensor([2], requires_grad=False) self.weight = Tensor.ones(sz) self.bias = Tensor.zeros(sz) self.running_mean = Tensor.zeros(sz, requires_grad=False) self.running_var = Tensor.ones(sz, requires_grad=False) self.num_batches_tracked = Tensor.zeros(1, requires_grad=False)
def __init__(self, embed_dim, num_heads, ff_dim, prenorm=False, act=lambda x: x.relu()): self.num_heads = num_heads self.head_size = embed_dim // num_heads assert self.head_size * self.num_heads == embed_dim self.prenorm, self.act = prenorm, act self.query = (Tensor.uniform(embed_dim, embed_dim), Tensor.zeros(embed_dim)) self.key = (Tensor.uniform(embed_dim, embed_dim), Tensor.zeros(embed_dim)) self.value = (Tensor.uniform(embed_dim, embed_dim), Tensor.zeros(embed_dim)) self.out = (Tensor.uniform(embed_dim, embed_dim), Tensor.zeros(embed_dim)) self.ff1 = (Tensor.uniform(embed_dim, ff_dim), Tensor.zeros(ff_dim)) self.ff2 = (Tensor.uniform(ff_dim, embed_dim), Tensor.zeros(embed_dim)) self.ln1 = (Tensor.ones(embed_dim), Tensor.zeros(embed_dim)) self.ln2 = (Tensor.ones(embed_dim), Tensor.zeros(embed_dim))
def __init__(self, params, lr=0.001, b1=0.9, b2=0.999, eps=1e-8): super().__init__(params) self.lr, self.b1, self.b2, self.eps, self.t = lr, b1, b2, eps, 0 self.m = [ Tensor.zeros(*t.shape, device=params[0].device, requires_grad=False) for t in self.params ] self.v = [ Tensor.zeros(*t.shape, device=params[0].device, requires_grad=False) for t in self.params ]
def test_gc_complex(self): a = Tensor.zeros(4, 4, gpu=self.gpu) b = Tensor.zeros(4, 4, gpu=self.gpu) assert (Tensor.allocated == 2) (a * b).mean().backward() assert (Tensor.allocated == 4) del b assert (Tensor.allocated == 2) b = Tensor.zeros(4, 4, gpu=self.gpu) print(Tensor.allocated) (a * b).mean().backward() print(Tensor.allocated) assert (Tensor.allocated == 4) del b assert (Tensor.allocated == 2)
def test_gc_complex(self): a = Tensor.zeros(4, 4, device=self.device) b = Tensor.zeros(4, 4, device=self.device) assert (tensors_allocated() == 2) (a * b).mean().backward() assert (tensors_allocated() == 4) del b assert (tensors_allocated() == 2) b = Tensor.zeros(4, 4, device=self.device) print(tensors_allocated()) (a * b).mean().backward() print(tensors_allocated()) assert (tensors_allocated() == 4) del b assert (tensors_allocated() == 2)
def __init__(self, layers=12, embed_dim=192, num_heads=3): self.embedding = (Tensor.uniform(embed_dim, 3, 16, 16), Tensor.zeros(embed_dim)) self.embed_dim = embed_dim self.cls = Tensor.ones(1, 1, embed_dim) self.pos_embedding = Tensor.ones(1, 197, embed_dim) self.tbs = [ TransformerBlock(embed_dim=embed_dim, num_heads=num_heads, ff_dim=embed_dim * 4, prenorm=True, act=lambda x: x.gelu()) for i in range(layers) ] self.encoder_norm = (Tensor.uniform(embed_dim), Tensor.zeros(embed_dim)) self.head = (Tensor.uniform(embed_dim, 1000), Tensor.zeros(1000))
def forward(self, x): ce = self.cls.add(Tensor.zeros(x.shape[0], 1, 1)) pe = self.patch_embed(x) x = ce.cat(pe, dim=1) x = x.add(self.pos_embedding).sequential(self.tbs) x = x.layernorm().linear(*self.encoder_norm) return x[:, 0].linear(*self.head)
def __init__(self, params, lr=0.001, decay=0.9, eps=1e-8): super().__init__(params) self.lr, self.decay, self.eps = lr, decay, eps self.v = [ Tensor.zeros(*t.shape, device=params[0].device, requires_grad=False) for t in self.params ]
def __init__(self, block, num_blocks, num_classes=10, url=None): self.url = url self.in_planes = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, bias=False, padding=3) self.bn1 = nn.BatchNorm2D(64) self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=2) self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.fc = {"weight": Tensor.uniform(512 * block.expansion, num_classes), "bias": Tensor.zeros(num_classes)}
def __init__(self, inC, outC, last = False): # Massively overstate the weights to get them to be focused on, # since otherwise the biases overrule everything self.weight = Tensor.uniform(outC, inC, 3, 3) * 16.0 # Layout-wise, blatant cheat, but serious_mnist does it. I'd guess channels either have to have a size of 1 or whatever the target is? # Values-wise, entirely different blatant cheat. # In most cases, use uniform bias, but tiny. # For the last layer, use just 0.5, constant. if last: self.bias = Tensor.zeros(1, outC, 1, 1) + 0.5 else: self.bias = Tensor.uniform(1, outC, 1, 1)
def __init__(self): self._conv_stem = Tensor.zeros(32, 3, 3, 3) self._bn0 = BatchNorm2D(32) blocks_args = [ [1, 3, (1, 1), 1, 32, 16, 0.25], [2, 3, (2, 2), 6, 16, 24, 0.25], [2, 5, (2, 2), 6, 24, 40, 0.25], [3, 3, (2, 2), 6, 40, 80, 0.25], [3, 5, (1, 1), 6, 80, 112, 0.25], [4, 5, (1, 1), 6, 112, 192, 0.25], [1, 3, (1, 1), 6, 192, 320, 0.25], ] self._blocks = [] # num_repeats, kernel_size, strides, expand_ratio, input_filters, output_filters, se_ratio for b in blocks_args: args = b[1:] for n in range(b[0]): self._blocks.append(MBConvBlock(*args)) args[3] = args[4] args[1] = (1, 1) self._conv_head = Tensor.zeros(1280, 320, 1, 1) self._bn1 = BatchNorm2D(1280) self._fc = Tensor.zeros(1280, 1000) self._fc_bias = Tensor.zeros(1000)
def __init__(self, num, num_classes): self.num = num self.block = { 18: BasicBlock, 34: BasicBlock, 50: Bottleneck, 101: Bottleneck, 152: Bottleneck }[num] self.num_blocks = { 18: [2, 2, 2, 2], 34: [3, 4, 6, 3], 50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3] }[num] self.in_planes = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, bias=False, padding=3) self.bn1 = nn.BatchNorm2D(64) self.layer1 = self._make_layer(self.block, 64, self.num_blocks[0], stride=2) self.layer2 = self._make_layer(self.block, 128, self.num_blocks[1], stride=2) self.layer3 = self._make_layer(self.block, 256, self.num_blocks[2], stride=2) self.layer4 = self._make_layer(self.block, 512, self.num_blocks[3], stride=2) self.fc = { "weight": Tensor.uniform(512 * self.block.expansion, num_classes), "bias": Tensor.zeros(num_classes) }
def __call__(self, x): if self.track_running_stats or self.training: batch_mean = x.mean(axis=(0,2,3)) y = (x - batch_mean.reshape(shape=[1, -1, 1, 1])) batch_var = (y*y).mean(axis=(0,2,3)) if self.track_running_stats: self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * batch_mean self.running_var = (1 - self.momentum) * self.running_var + self.momentum * batch_var if self.num_batches_tracked is None: self.num_batches_tracked = Tensor.zeros(1, requires_grad=False) self.num_batches_tracked += 1 if self.training: return self.normalize(x, batch_mean, batch_var) return self.normalize(x, self.running_mean, self.running_var)
def _test_linear(x): # create in tinygrad layer = (Tensor.uniform(in_dim, out_dim), Tensor.zeros(out_dim)) z = x.linear(*layer) # create in torch with torch.no_grad(): torch_layer = torch.nn.Linear(in_dim, out_dim).eval() torch_layer.weight[:] = torch.tensor(layer[0].data.T, dtype=torch.float32) torch_layer.bias[:] = torch.tensor(layer[1].data, dtype=torch.float32) torch_x = torch.tensor(x.cpu().data, dtype=torch.float32) torch_z = torch_layer(torch_x) # test np.testing.assert_allclose(z.data, torch_z.detach().numpy(), atol=5e-4, rtol=1e-5)
def __init__(self, in_size: int, out_size: int): self.weight = Tensor.randn(in_size, out_size) self.bias = Tensor.zeros(1, out_size)
def profile_conv(bs, chans, conv, cnt=100): img = Tensor.zeros(bs, 1, 28, 28) conv = Tensor.randn(chans, 1, conv, conv) for i in range(cnt): out = img.conv2d(conv)