def __init__(self, name="kid", **kwargs): super().__init__(name=name, **kwargs) # KID is estimated per batch and is averaged across batches self.kid_tracker = keras.metrics.Mean() # a pretrained InceptionV3 is used without its classification layer # transform the pixel values to the 0-255 range, then use the same # preprocessing as during pretraining self.encoder = keras.Sequential( [ layers.InputLayer(input_shape=(image_size, image_size, 3)), layers.Rescaling(255.0), layers.Resizing(height=kid_image_size, width=kid_image_size), layers.Lambda( keras.applications.inception_v3.preprocess_input), keras.applications.InceptionV3( include_top=False, input_shape=(kid_image_size, kid_image_size, 3), weights="imagenet", ), layers.GlobalAveragePooling2D(), ], name="inception_encoder", )
def get_preprocessing(): model = keras.Sequential( [ layers.Rescaling(1 / 255.0), layers.Resizing(IMAGE_SIZE, IMAGE_SIZE), ], name="preprocessing", ) return model
def get_test_augmentation_model(): model = keras.Sequential( [ layers.Rescaling(1 / 255.0), layers.Resizing(IMAGE_SIZE, IMAGE_SIZE), ], name="test_data_augmentation", ) return model
def get_train_augmentation_model(): model = keras.Sequential( [ layers.Rescaling(1 / 255.0), layers.Resizing(INPUT_SHAPE[0] + 20, INPUT_SHAPE[0] + 20), layers.RandomCrop(IMAGE_SIZE, IMAGE_SIZE), layers.RandomFlip("horizontal"), ], name="train_data_augmentation", ) return model
def build(self, hp, inputs=None): input_node = nest.flatten(inputs)[0] pretrained = self.pretrained if input_node.shape[3] not in [1, 3]: if self.pretrained: raise ValueError( "When pretrained is set to True, expect input to " "have 1 or 3 channels, bug got " "{channels}.".format(channels=input_node.shape[3]) ) pretrained = False if pretrained is None: pretrained = hp.Boolean(PRETRAINED, default=False) if pretrained: with hp.conditional_scope(PRETRAINED, [True]): trainable = hp.Boolean("trainable", default=False) elif pretrained: trainable = hp.Boolean("trainable", default=False) if len(self.models) > 1: version = hp.Choice("version", list(self.models.keys())) else: version = list(self.models.keys())[0] min_size = self.min_size if hp.Boolean("imagenet_size", default=False): min_size = 224 if input_node.shape[1] < min_size or input_node.shape[2] < min_size: input_node = layers.Resizing( max(min_size, input_node.shape[1]), max(min_size, input_node.shape[2]), )(input_node) if input_node.shape[3] == 1: input_node = layers.Concatenate()([input_node] * 3) if input_node.shape[3] != 3: input_node = layers.Conv2D(filters=3, kernel_size=1, padding="same")( input_node ) if pretrained: model = self.models[version](weights="imagenet", include_top=False) model.trainable = trainable else: model = self.models[version]( weights=None, include_top=False, input_shape=input_node.shape[1:] ) return model(input_node)
def get_learnable_resizer(filters=16, num_res_blocks=1, interpolation=INTERPOLATION): inputs = layers.Input(shape=[None, None, 3]) # First, perform naive resizing. naive_resize = layers.Resizing(*TARGET_SIZE, interpolation=interpolation)(inputs) # First convolution block without batch normalization. x = layers.Conv2D(filters=filters, kernel_size=7, strides=1, padding="same")(inputs) x = layers.LeakyReLU(0.2)(x) # Second convolution block with batch normalization. x = layers.Conv2D(filters=filters, kernel_size=1, strides=1, padding="same")(x) x = layers.LeakyReLU(0.2)(x) x = layers.BatchNormalization()(x) # Intermediate resizing as a bottleneck. bottleneck = layers.Resizing(*TARGET_SIZE, interpolation=interpolation)(x) # Residual passes. for _ in range(num_res_blocks): x = res_block(bottleneck) # Projection. x = layers.Conv2D( filters=filters, kernel_size=3, strides=1, padding="same", use_bias=False )(x) x = layers.BatchNormalization()(x) # Skip connection. x = layers.Add()([bottleneck, x]) # Final resized image. x = layers.Conv2D(filters=3, kernel_size=7, strides=1, padding="same")(x) final_resize = layers.Add()([naive_resize, x]) return tf.keras.Model(inputs, final_resize, name="learnable_resizer")
num_heads = 4 transformer_units = [ projection_dim * 2, projection_dim, ] # Size of the transformer layers transformer_layers = 8 # Size of the dense layers of the final classifier mlp_head_units = [2048, 1024] """ ## Use data augmentation """ data_augmentation = keras.Sequential( [ layers.Normalization(), layers.Resizing(image_size, image_size), layers.RandomFlip("horizontal"), layers.RandomRotation(factor=0.02), layers.RandomZoom(height_factor=0.2, width_factor=0.2), ], name="data_augmentation", ) # Compute the mean and the variance of the training data for normalization. data_augmentation.layers[0].adapt(x_train) """ ## Implement multilayer perceptron (MLP) """ def mlp(x, hidden_units, dropout_rate): for units in hidden_units:
test_ds = test_ds.batch(BATCH_SIZE).prefetch(AUTO) """ ## Data augmentation The augmentation pipeline consists of: - Rescaling - Resizing - Random cropping (fixed-sized or random sized) - Random horizontal flipping """ data_augmentation = keras.Sequential( [ layers.Rescaling(1 / 255.0), layers.Resizing(INPUT_SHAPE[0] + 20, INPUT_SHAPE[0] + 20), layers.RandomCrop(IMAGE_SIZE, IMAGE_SIZE), layers.RandomFlip("horizontal"), ], name="data_augmentation", ) """ Note that image data augmentation layers do not apply data transformations at inference time. This means that when these layers are called with `training=False` they behave differently. Refer [to the documentation](https://keras.io/api/layers/preprocessing_layers/image_augmentation/) for more details. """ """ ## Positional embedding module A [Transformer](https://arxiv.org/abs/1706.03762) architecture consists of **multi-head
A snippet from the paper: *"According to DeiT, various techniques are required to effectively train ViTs. Thus, we applied data augmentations such as CutMix, Mixup, Auto Augment, Repeated Augment to all models."* In this example, we will focus solely on the novelty of the approach and not on reproducing the paper results. For this reason, we don't use the mentioned data augmentation schemes. Please feel free to add to or remove from the augmentation pipeline. """ data_augmentation = keras.Sequential( [ layers.Normalization(), layers.Resizing(IMAGE_SIZE, IMAGE_SIZE), layers.RandomFlip("horizontal"), layers.RandomRotation(factor=0.02), layers.RandomZoom(height_factor=0.2, width_factor=0.2), ], name="data_augmentation", ) # Compute the mean and the variance of the training data for normalization. data_augmentation.layers[0].adapt(x_train) """ ## Implement Shifted Patch Tokenization In a ViT pipeline, the input images are divided into patches that are then linearly projected into tokens. Shifted patch tokenization (STP) is introduced to combat the low receptive field of ViTs. The steps
representation_dim = 512 # The dimensions of the features vector. projection_units = 128 # The projection head of the representation learner. num_clusters = 20 # Number of clusters. k_neighbours = 5 # Number of neighbours to consider during cluster learning. tune_encoder_during_clustering = False # Freeze the encoder in the cluster learning. """ ## Implement data preprocessing The data preprocessing step resizes the input images to the desired `target_size` and applies feature-wise normalization. Note that, when using `keras.applications.ResNet50V2` as the visual encoder, resizing the images into 255 x 255 inputs would lead to more accurate results but require a longer time to train. """ data_preprocessing = keras.Sequential([ layers.Resizing(target_size, target_size), layers.Normalization(), ]) # Compute the mean and the variance from the data for normalization. data_preprocessing.layers[-1].adapt(x_data) """ ## Data augmentation Unlike simCLR, which randomly picks a single data augmentation function to apply to an input image, we apply a set of data augmentation functions randomly to the input image. (You can experiment with other image augmentation techniques by following the [data augmentation tutorial](https://www.tensorflow.org/tutorials/images/data_augmentation).) """ data_augmentation = keras.Sequential([ layers.RandomTranslation(height_factor=(-0.2, 0.2),
for spectrogram, _ in spectrogram_ds.take(1): input_shape = spectrogram.shape print('Input shape:', input_shape) num_labels = len(commands) # Instantiate the `tf.keras.layers.Normalization` layer. norm_layer = layers.Normalization() # Fit the state of the layer to the spectrograms # with `Normalization.adapt`. norm_layer.adapt(data=spectrogram_ds.map(map_func=lambda spec, label: spec)) model = models.Sequential([ layers.Input(shape=input_shape), # Downsample the input. layers.Resizing(32, 32), # Normalize. norm_layer, layers.Conv2D(32, 3, activation='relu'), layers.Conv2D(64, 3, activation='relu'), layers.MaxPooling2D(), layers.Dropout(0.25), layers.Flatten(), layers.Dense(128, activation='relu'), layers.Dropout(0.5), layers.Dense(num_labels), ]) model.summary() model.compile(