def WideResnetBlock(channels, strides=(1, 1), bn_momentum=0.9, mode='train'): """WideResnet convolutional block.""" return [ tl.BatchNorm(momentum=bn_momentum, mode=mode), tl.Relu(), tl.Conv(channels, (3, 3), strides, padding='SAME'), tl.BatchNorm(momentum=bn_momentum, mode=mode), tl.Relu(), tl.Conv(channels, (3, 3), padding='SAME'), ]
def model_fn(mode='train'): return tl.Serial( tl.Dropout(mode=mode, rate=0.1), tl.BatchNorm(mode=mode), models.MLP(d_hidden=16, n_output_classes=n_classes, mode=mode))
def WideResnet(n_blocks=3, widen_factor=1, n_output_classes=10, bn_momentum=0.9, mode='train'): """WideResnet from https://arxiv.org/pdf/1605.07146.pdf. Args: n_blocks: int, number of blocks in a group. total layers = 6n + 4. widen_factor: int, widening factor of each group. k=1 is vanilla resnet. n_output_classes: int, number of distinct output classes. bn_momentum: float, momentum in BatchNorm. mode: Whether we are training or evaluating or doing inference. Returns: The list of layers comprising a WideResnet model with the given parameters. """ return tl.Serial( tl.ToFloat(), tl.Conv(16, (3, 3), padding='SAME'), WideResnetGroup(n_blocks, 16 * widen_factor, bn_momentum=bn_momentum, mode=mode), WideResnetGroup(n_blocks, 32 * widen_factor, (2, 2), bn_momentum=bn_momentum, mode=mode), WideResnetGroup(n_blocks, 64 * widen_factor, (2, 2), bn_momentum=bn_momentum, mode=mode), tl.BatchNorm(momentum=bn_momentum, mode=mode), tl.Relu(), tl.AvgPool(pool_size=(8, 8)), tl.Flatten(), tl.Dense(n_output_classes), tl.LogSoftmax(), )
def test_forward_dtype(self, backend, dtype): with math.use_backend(backend): layer = tl.BatchNorm() x = np.ones((3, 2, 7)).astype(dtype) _, _ = layer.init(shapes.signature(x)) y = layer(x) self.assertEqual(y.dtype, dtype)
def test_new_weights_and_state(self): layer = tl.BatchNorm() x = np.ones((3, 2, 7)).astype(np.float32) _, _ = layer.init(shapes.signature(x)) running_mean, running_var, n_batches = layer.state np.testing.assert_allclose(running_mean, 0.0) np.testing.assert_allclose(running_var, 1.0) self.assertEqual(n_batches, 0)
def IdentityBlock(kernel_size, filters, mode='train'): """ResNet identical size block.""" # TODO(jonni): Use good defaults so Resnet50 code is cleaner / less redundant. ks = kernel_size filters1, filters2, filters3 = filters main = [ tl.Conv(filters1, (1, 1)), tl.BatchNorm(mode=mode), tl.Relu(), tl.Conv(filters2, (ks, ks), padding='SAME'), tl.BatchNorm(mode=mode), tl.Relu(), tl.Conv(filters3, (1, 1)), tl.BatchNorm(mode=mode), ] return [ tl.Residual(main), tl.Relu(), ]
def test_input_signatures_serial_batch_norm(self): # Include a layer that actively uses state. input_signature = shapes.ShapeDtype((3, 28, 28)) batch_norm = tl.BatchNorm() relu = tl.Relu() batch_norm_and_relu = tl.Serial(batch_norm, relu) batch_norm_and_relu.init(input_signature) # Check for correct shapes entering and exiting the batch_norm layer. # And the code should run without errors. batch_norm_and_relu._set_input_signature_recursive(input_signature) self.assertEqual(batch_norm.input_signature, input_signature) self.assertEqual(relu.input_signature, input_signature)
def Resnet50(d_hidden=64, n_output_classes=1001, mode='train'): """ResNet. Args: d_hidden: Dimensionality of the first hidden layer (multiplied later). n_output_classes: Number of distinct output classes. mode: Whether we are training or evaluating or doing inference. Returns: The list of layers comprising a ResNet model with the given parameters. """ return tl.Model( tl.ToFloat(), tl.Conv(d_hidden, (7, 7), (2, 2), 'SAME'), tl.BatchNorm(mode=mode), tl.Relu(), tl.MaxPool(pool_size=(3, 3), strides=(2, 2)), ConvBlock(3, [d_hidden, d_hidden, 4 * d_hidden], (1, 1), mode=mode), IdentityBlock(3, [d_hidden, d_hidden, 4 * d_hidden], mode=mode), IdentityBlock(3, [d_hidden, d_hidden, 4 * d_hidden], mode=mode), ConvBlock(3, [2 * d_hidden, 2 * d_hidden, 8 * d_hidden], (2, 2), mode=mode), IdentityBlock(3, [2 * d_hidden, 2 * d_hidden, 8 * d_hidden], mode=mode), IdentityBlock(3, [2 * d_hidden, 2 * d_hidden, 8 * d_hidden], mode=mode), IdentityBlock(3, [2 * d_hidden, 2 * d_hidden, 8 * d_hidden], mode=mode), ConvBlock(3, [4 * d_hidden, 4 * d_hidden, 16 * d_hidden], (2, 2), mode=mode), IdentityBlock(3, [4 * d_hidden, 4 * d_hidden, 16 * d_hidden], mode=mode), IdentityBlock(3, [4 * d_hidden, 4 * d_hidden, 16 * d_hidden], mode=mode), IdentityBlock(3, [4 * d_hidden, 4 * d_hidden, 16 * d_hidden], mode=mode), IdentityBlock(3, [4 * d_hidden, 4 * d_hidden, 16 * d_hidden], mode=mode), IdentityBlock(3, [4 * d_hidden, 4 * d_hidden, 16 * d_hidden], mode=mode), ConvBlock(3, [8 * d_hidden, 8 * d_hidden, 32 * d_hidden], (2, 2), mode=mode), IdentityBlock(3, [8 * d_hidden, 8 * d_hidden, 32 * d_hidden], mode=mode), IdentityBlock(3, [8 * d_hidden, 8 * d_hidden, 32 * d_hidden], mode=mode), tl.AvgPool(pool_size=(7, 7)), tl.Flatten(), tl.Dense(n_output_classes), tl.LogSoftmax(), )
def ConvBlock(kernel_size, filters, strides, mode='train'): """ResNet convolutional striding block.""" # TODO(jonni): Use good defaults so Resnet50 code is cleaner / less redundant. ks = kernel_size filters1, filters2, filters3 = filters main = [ tl.Conv(filters1, (1, 1), strides), tl.BatchNorm(mode=mode), tl.Relu(), tl.Conv(filters2, (ks, ks), padding='SAME'), tl.BatchNorm(mode=mode), tl.Relu(), tl.Conv(filters3, (1, 1)), tl.BatchNorm(mode=mode), ] shortcut = [ tl.Conv(filters3, (1, 1), strides), tl.BatchNorm(mode=mode), ] return [ tl.Residual(main, shortcut=shortcut), tl.Relu(), ]
def test_forward(self, momentum): layer = tl.BatchNorm(momentum=momentum) x = np.array([[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]]).astype(np.float32) _, _ = layer.init(shapes.signature(x)) y = layer(x) running_mean, running_var, n_batches = layer.state fraction_old = momentum fraction_new = 1.0 - momentum mean_of_x = 11.5 # mean of range(24) var_of_x = 47.9167 # variance of range(24) np.testing.assert_allclose( running_mean, 0.0 * fraction_old + mean_of_x * fraction_new) np.testing.assert_allclose(running_var, 1.0 * fraction_old + var_of_x * fraction_new, rtol=1e-6) self.assertEqual(n_batches, 1) eps = 1e-5 np.testing.assert_allclose(y, (x - mean_of_x) / np.sqrt(var_of_x + eps), rtol=1e-6)
def model_fn(mode='train'): return tl.Serial( tl.Dropout(mode=mode, rate=0.1), tl.BatchNorm(mode=mode), models.MLP(layer_widths=(16, 16, n_classes), mode=mode))
def test_forward_shape(self): layer = tl.BatchNorm() x = np.ones((30, 20, 70)).astype(np.float32) _, _ = layer.init(shapes.signature(x)) y = layer(x) self.assertEqual(y.shape, x.shape)
def model_fn(mode="train"): return layers.Model( layers.Dropout(mode=mode, rate=0.1), layers.BatchNorm(mode=mode), models.MLP(d_hidden=16, n_output_classes=n_classes, mode=mode))