def build(self, hp, inputs=None): input_node = nest.flatten(inputs)[0] output_node = input_node # Translate translation_factor = self.translation_factor if translation_factor is None: translation_factor = hp.Choice("translation_factor", [0.0, 0.1]) if translation_factor != 0 and translation_factor != (0, 0): height_factor, width_factor = self._get_fraction_value( translation_factor) output_node = preprocessing.RandomTranslation( height_factor, width_factor)(output_node) # Flip horizontal_flip = self.horizontal_flip if horizontal_flip is None: horizontal_flip = hp.Boolean("horizontal_flip", default=True) vertical_flip = self.vertical_flip if self.vertical_flip is None: vertical_flip = hp.Boolean("vertical_flip", default=True) if not horizontal_flip and not vertical_flip: flip_mode = "" elif horizontal_flip and vertical_flip: flip_mode = "horizontal_and_vertical" elif horizontal_flip and not vertical_flip: flip_mode = "horizontal" elif not horizontal_flip and vertical_flip: flip_mode = "vertical" if flip_mode != "": output_node = preprocessing.RandomFlip(mode=flip_mode)(output_node) # Rotate rotation_factor = self.rotation_factor if rotation_factor is None: rotation_factor = hp.Choice("rotation_factor", [0.0, 0.1]) if rotation_factor != 0: output_node = preprocessing.RandomRotation(rotation_factor)( output_node) # Zoom zoom_factor = self.zoom_factor if zoom_factor is None: zoom_factor = hp.Choice("zoom_factor", [0.0, 0.1]) if zoom_factor != 0 and zoom_factor != (0, 0): height_factor, width_factor = self._get_fraction_value(zoom_factor) # TODO: Add back RandomZoom when it is ready. # output_node = preprocessing.RandomZoom( # height_factor, width_factor)(output_node) # Contrast contrast_factor = self.contrast_factor if contrast_factor is None: contrast_factor = hp.Choice("contrast_factor", [0.0, 0.1]) if contrast_factor != 0 and contrast_factor != (0, 0): output_node = preprocessing.RandomContrast(contrast_factor)( output_node) return output_node
def __init__(self): super(ClassifierHybrid, self).__init__() self.global_step = 0 self.backbone = self.get_backbone() self.backbone.trainable = False trainable_count = np.sum( [K.count_params(w) for w in self.backbone.trainable_weights]) non_trainable_count = np.sum( [K.count_params(w) for w in self.backbone.non_trainable_weights]) print('Total params: {:,}'.format(trainable_count + non_trainable_count)) print('Trainable params: {:,}'.format(trainable_count)) print('Non-trainable params: {:,}'.format(non_trainable_count)) # self.head = tf.keras.Sequential([ # layers.Flatten(), # layers.Dense(256, activation='relu'), # layers.Dense(196) # ]) # self.vision_transformer = ViT(img_size=9, channels=1408, patch_size=1, num_layers=8, # num_classes=196, d_model=512, num_heads=8, d_mlp=512) self.vision_transformer = ViT(img_size=args.num_patches, channels=args.num_channels, patch_size=args.patch_size, num_layers=args.num_layers, num_classes=args.num_classes, d_model=args.d_model, num_heads=args.num_heads, d_mlp=args.d_mlp) self.prepare_datasets() self.flag = True self.augmentation = tf.keras.Sequential( [ tf.keras.Input(shape=(260, 260, 3)), preprocessing.RandomRotation(factor=0.15), preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1), preprocessing.RandomFlip(), preprocessing.RandomContrast(factor=0.1), ], name="augmentation", )
def generic_builder(self, name, net, lr=1e-2, dropout_rate=0.2): cfg = self.cfg inputs = layers.Input(shape=cfg['img_shape']) img_augmentation = Sequential( [ preprocessing.RandomRotation(factor=0.15), #preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1), # preprocessing.RandomFlip(), preprocessing.RandomContrast(factor=0.1), ], name="img_augmentation", ) x = img_augmentation(inputs) if cfg['transfer_learning']: model = net(include_top=False, input_tensor=x, weights='imagenet') # Freeze the pretrained weights model.trainable = False # Rebuild top x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output) x = layers.BatchNormalization()(x) top_dropout_rate = dropout_rate x = layers.Dropout(top_dropout_rate, name="top_dropout")(x) outputs = layers.Dense(cfg['num_classes'], activation="softmax", name="pred")(x) else: model = net(include_top=False, input_tensor=x, weights=None) model.trainable = True top_dropout_rate = dropout_rate x = layers.Dropout(top_dropout_rate, name="top_dropout")(x) outputs = layers.Dense(cfg['num_classes'], activation="softmax", name="pred")(x) # Compile model = tf.keras.Model(inputs, outputs, name=name) optimizer = tf.keras.optimizers.SGD(learning_rate=lr) model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy", "top_k_categorical_accuracy"]) return model
def build(self, hp, inputs=None): input_node = nest.flatten(inputs)[0] output_node = input_node if self.translation_factor != 0 and self.translation_factor != (0, 0): height_factor, width_factor = self._get_fraction_value( self.translation_factor) output_node = preprocessing.RandomTranslation( height_factor, width_factor)(output_node) horizontal_flip = self.horizontal_flip if horizontal_flip is None: horizontal_flip = hp.Boolean('horizontal_flip', default=True) vertical_flip = self.vertical_flip if self.vertical_flip is None: vertical_flip = hp.Boolean('vertical_flip', default=True) if not horizontal_flip and not vertical_flip: flip_mode = '' elif horizontal_flip and vertical_flip: flip_mode = 'horizontal_and_vertical' elif horizontal_flip and not vertical_flip: flip_mode = 'horizontal' elif not horizontal_flip and vertical_flip: flip_mode = 'vertical' if flip_mode != '': output_node = preprocessing.RandomFlip(mode=flip_mode)(output_node) if self.rotation_factor != 0: output_node = preprocessing.RandomRotation( self.rotation_factor)(output_node) if self.zoom_factor != 0 and self.zoom_factor != (0, 0): height_factor, width_factor = self._get_fraction_value( self.zoom_factor) # TODO: Add back RandomZoom when it is ready. # output_node = preprocessing.RandomZoom( # height_factor, width_factor)(output_node) if self.contrast_factor != 0 and self.contrast_factor != (0, 0): output_node = preprocessing.RandomContrast( self.contrast_factor)(output_node) return output_node
def __init__(self, model_name, learning_rate, pre_trained_model_path): self.model_name = model_name self.lr = float(learning_rate) self.base_model, self.image_size = base_model[model_name] if model_name.startswith('b'): self.weight_path = os.path.join(pre_trained_model_path, self.model_name + ".h5") else: self.weight_path = 'imagenet' self.img_augmentation = Sequential( [ preprocessing.RandomRotation(factor=0.15), preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1), preprocessing.RandomFlip(), preprocessing.RandomContrast(factor=0.1), ], name="img_augmentation", )
def get_data_augmentation_layers(rotation: bool = False, flip: bool = False, zoom: bool = False, contrast: bool = False) -> List[PreprocessingLayer]: """ Creates a list of augmentation layers which can be applied later. :param rotation: Data Augmentation: Whether to apply random rotation to the images. :param flip: Data Augmentation: Whether to apply random horizontal flip to the images. :param zoom: Data Augmentation: Whether to apply random zoom to the images. :param contrast: Data Augmentation: Whether to apply random contrast enhancement to the images. :return: The list of data augmentation layers. """ data_augmentation = [] if rotation: data_augmentation.append(preprocessing.RandomRotation(factor=(1 / 6))) # Between +/- 30deg if flip: data_augmentation.append(preprocessing.RandomFlip("horizontal")) if zoom: data_augmentation.append(preprocessing.RandomZoom(height_factor=0.2)) # Zoom +/- 20% if contrast: data_augmentation.append(preprocessing.RandomContrast(factor=0.1)) return data_augmentation
def build_model(NUM_CLASSES, IMG_SIZE): """モデルの構築 Args: NUM_CLASSES (int): 種類数 IMG_SIZE (int): サイズ Returns: tf.keras.Model: モデル int: Adamのハイパーパラメータ """ img_augmentation = Sequential([ preprocessing.RandomRotation(factor=0.15), preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1), preprocessing.RandomFlip(), preprocessing.RandomContrast(factor=0.1) ], name='img_augmentation') inputs = Input(shape=(IMG_SIZE, IMG_SIZE, 3)) x = img_augmentation(inputs) model = EfficientNetB3(include_top=False, input_tensor=x, weights='imagenet') model.trainable = False x = GlobalAveragePooling2D(name='avg_pool')(model.output) x = BatchNormalization(trainable=True)(x) top_dropout_rate = 0.2 x = Dropout(top_dropout_rate, name='top_dropout')(x) outputs = Dense(NUM_CLASSES, activation='softmax', name='pred')(x) model = Model(inputs, outputs, name='EfficientNet') lr = 1e-4 optimizer = Adam(learning_rate=lr) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) return (model, lr)
def build_model(num_classes, config): img_augmentation = Sequential( [ preprocessing.RandomRotation(factor=0.1), preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1), preprocessing.RandomContrast(factor=0.1), ], name="img_augmentation", ) inputs = layers.Input(shape=(config.img_size, config.img_size, 3)) x = img_augmentation(inputs) model = EfficientNetB0(include_top=False, input_tensor=x, weights="imagenet") # Freeze the pretrained weights model.trainable = False # Rebuild top x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output) x = layers.BatchNormalization()(x) top_dropout_rate = 0.3 x = layers.Dropout(top_dropout_rate, name="top_dropout")(x) outputs = layers.Dense(num_classes, activation="softmax", name="pred")(x) # Compile model = tf.keras.Model(inputs, outputs, name="EfficientNet") optimizer = tf.keras.optimizers.Adam(learning_rate=config.learning_rate) model.compile(optimizer=optimizer, loss=config.loss_function, metrics=["accuracy", metrics.top_k_categorical_accuracy]) return model
# One hot encoding. The data labels must be reformatted in a 3-bit vector enc = OneHotEncoder() y = enc.fit_transform(y[:, np.newaxis]).toarray() early_stopping = EarlyStopping( min_delta=0.001, # minimium amount of change to count as an improvement patience=5, # how many epochs to wait before stopping restore_best_weights=True, ) # elaborate convnet model = Sequential() # data augmentation model.add(preprocessing.RandomFlip('horizontal')) # flip left-to-right model.add(preprocessing.RandomContrast(0.5)) # contrast change by up to 50%) #first convolution layer model.add( Conv2D(filters=64, kernel_size=3, input_shape=X.shape[1:], activation="relu", padding='same')) model.add(MaxPooling2D(pool_size=(2, 2))) #second convolution layer model.add(Conv2D(filters=128, kernel_size=3, activation="relu", padding='same')) model.add(MaxPooling2D(pool_size=(2, 2)))
def __init__(self, factor=0.2, name=None, **kwargs): self.RandomContrast = preprocessing.RandomContrast(factor=factor) super(BetterContrast, self).__init__(name=name, **kwargs)
from tensorflow import keras from tensorflow.keras import layers # these are a new feature in TF 2.2 from tensorflow.keras.layers.experimental import preprocessing pretrained_base = tf.keras.models.load_model( 'vgg16-pretrained-base', ) pretrained_base.trainable = False model = keras.Sequential([ # Preprocessing preprocessing.RandomFlip('horizontal'), # flip left-to-right preprocessing.RandomContrast(0.5), # contrast change by up to 50% # Base pretrained_base, # Head layers.Flatten(), layers.Dense(6, activation='relu'), layers.Dense(1, activation='sigmoid'), ]) """ STEP 3 - TRAIN AND EVALUATE """ model.compile( optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'], )
else: x_train, y_train = prepare_all_data(config, x_train, y_train, input_shape, random_labels=config.random_labels) x_test, y_test = prepare_all_data(config, x_test, y_test, input_shape) if input_shape[-1] > 1: means = x_train.mean(axis=(0, 1, 2)) std = x_train.std(axis=(0, 1, 2)) x_train = (x_train - means) / std x_test = (x_test - means) / std print("LOADING OVER", flush=True) augmenter = tf.keras.Sequential([ preprocessing.RandomContrast(0.2), preprocessing.RandomRotation(0.05), # = 12° rotation maximum preprocessing.RandomTranslation(0.13, 0.13, fill_mode='reflect'), # nearest better preprocessing.RandomFlip('horizontal') ]) augmenter.build((None, ) + input_shape) class CustomSequential(Sequential): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def train_step(self, data): x, y = data x = augmenter(x, training=True)
def visualize_contrast(image): visualize_plots([ image, Sequential([preprocessing.RandomContrast(factor=(0.1, 0.1))])(tf.expand_dims(image, 0))[0] ], "contrast")
def build_augmenters(imgaug=True): if imgaug: ia.seed(SEED) sometimes = lambda aug: iaa.Sometimes(0.8, aug, seed=SEED) augmenters = iaa.Sequential( [ iaa.SomeOf( (1, 3) if EXTRACT_IMAGES else (2, 4), [ iaa.Add((-10, 10), per_channel=0.5, seed=SEED), iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5, seed=SEED), iaa.OneOf( [ iaa.GaussianBlur(sigma=(0, 0.5), seed=SEED), iaa.AverageBlur(k=1, seed=SEED), iaa.MedianBlur(k=1, seed=SEED), ], seed=SEED, ), iaa.LinearContrast( (0.8, 1.2), per_channel=0.5, seed=SEED), ], ), sometimes( iaa.OneOf( [ iaa.Fliplr(0.5, seed=SEED), iaa.Flipud(0.2, seed=SEED), ], seed=SEED, ), ), sometimes( iaa.Affine( scale={ "x": (0.5, 1) if EXTRACT_IMAGES else (0.8, 1.2), "y": (0.5, 1) if EXTRACT_IMAGES else (0.8, 1.2) }, translate_percent={ "x": (-0.01, 0.01) if EXTRACT_IMAGES else (-0.2, 0.2), "y": (-0.01, 0.01) if EXTRACT_IMAGES else (-0.2, 0.2), }, rotate=(-25, 25) if EXTRACT_IMAGES else (-45, 45), shear=(-16, 16), order=[0, 1], cval=(0, 255), mode=ia.ALL, seed=SEED, )), ], random_order=True, seed=SEED, ) return augmenters else: img_augmentation = Sequential( [ preprocessing.RandomRotation(factor=0.3, seed=SEED), preprocessing.RandomTranslation( height_factor=0.01 if EXTRACT_IMAGES else 0.2, width_factor=0.01 if EXTRACT_IMAGES else 0.2, seed=SEED), preprocessing.RandomFlip(seed=SEED), preprocessing.RandomContrast( factor=0.1 if EXTRACT_IMAGES else 0.2, seed=SEED), ], name="img_augmentation", ) return img_augmentation
ax = plt.subplot(5, 6, n + 1) plt.imshow(img_test[n].astype("uint8")) plt.title(np.array(class_names)[label_test[n] == True][0]) plt.axis("off") """ ## Augmentation Define image augmentation using keras preprocessing layers and apply them to the training set. """ # Define image augmentation model image_augmentation = keras.Sequential([ preprocessing.RandomFlip(mode="horizontal"), preprocessing.RandomRotation(factor=0.1), preprocessing.RandomZoom(height_factor=(-0.1, -0)), preprocessing.RandomContrast(factor=0.1), ], ) # Apply the augmentations to the training images and plot a few examples img_train = image_augmentation(img_train).numpy() plt.figure(figsize=(16, 12)) for n in range(30): ax = plt.subplot(5, 6, n + 1) plt.imshow(img_train[n].astype("uint8")) plt.title(np.array(class_names)[label_train[n] == True][0]) plt.axis("off") """ ## Define model building & training functions We create a few convenience functions to build a transfer-learning model, compile and
) def convert_to_float(image, label): image = tf.image.convert_image_dtype(image, dtype=tf.float32) return image, label AUTOTUNE = tf.data.experimental.AUTOTUNE ds_train = (ds_train_.map(convert_to_float).cache().prefetch( buffer_size=AUTOTUNE)) # Data Augmentation augment = keras.Sequential([ preprocessing.RandomContrast(factor=0.5), preprocessing.RandomFlip(mode='horizontal'), # meaning, left-to-right preprocessing.RandomFlip(mode='vertical'), # meaning, top-to-bottom preprocessing.RandomWidth(factor=0.15), # horizontal stretch preprocessing.RandomRotation(factor=0.20), preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1), preprocessing.RandomContrast(factor=0.10), preprocessing.RandomFlip(mode='horizontal'), preprocessing.RandomRotation(factor=0.10), ]) ex = next(iter(ds_train.unbatch().map(lambda x, y: x).batch(1))) plt.figure(figsize=(10, 10)) for i in range(16): image = augment(ex, training=True)
def main(): # Loading data. # Load data from tensorflow_dataset (hereafter TFDS). Stanford Dogs # dataset is provided in TFDS as stanford_dogs. It features 20,580 # images that belong to 120 classes of dog breeds (12,000 for # training and 8,580 for testing). # By simply changing dataset_name below, you may also try this # notebook for other datasets in TFDS such as cifar10, cifar100, # food101, etc. When the images are much smaller than the size of # Efficientnet input, we can simply upsample the input images. It # has been shown in Tan and Le, 2019 that transfer learning result # is better for increased resolution even if input images remain # small. # For TPU: if using TFDS datasets, a GCS bucket location is # required to save the datasets. For example: # tfds.load(dataset_name, data_dir="gs://example-bucket/datapath") # Also, both the current environment and the TPU service account # have proper access to the bucket. Alternatively, for small # datasets you may try loading data into the memory and use # tf.data.Dataset.from_tensor_slices(). batch_size = 64 dataset_name = "stanford_dogs" (ds_train, ds_test), ds_info = tfds.load(dataset_name, split=["train", "test"], with_info=True, as_supervised=True) num_classes = ds_info.features["label"].num_classes # When the dataset include images with various size, need to resize # them into a shared size. The Stanford Dogs dataset includes only # images at least 200x200 pixels in size. Here, resize the images # to the input size needed for EfficientNet. size = (IMG_SIZE, IMG_SIZE) ds_train = ds_train.map(lambda image, label: (tf.image.resize(image, size), label)) ds_test = ds_test.map(lambda image, label: (tf.image.resize(image, size), label)) # Visualize the data. # The following code shows the first 9 images with their labels. #''' def format_label(label): string_label = label_info.int2str(label) return string_label.split("-")[1] label_info = ds_info.features["labels"] for i, (image, label) in enumerate(ds_train.take(9)): ax = plt.subplot(3, 3, i + 1) plt.imshow(image.numpy().astype("uint8")) plt.title("{}".format(format_label(label))) plt.axis("off") #''' # Data augmentation. # Use preprocessing layers APIs for image augmentation. img_augmentation = Sequential( [ preprocessing.RandomRotation(factor - 0.15), preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1), preprocessing.RandomFlip(), preprocessing.RandomContrast(factor=0.1), ], name="img_augmentation", ) # This sequential model object can be used both as part of the # model built later, and as a function to preprocess data before # feeding into the model. Using them as a function makes it easy to # visualize the augmented images. Here, plot 9 examples of # augmentation result of a given figure. #''' for image, label in ds_train.take(1): for i in range(9): ax = plt.subplot(3, 3, i + 1) aug_img = img_augmentation(tf.expand_dims(image, axis=0)) plt.imshow(image[0].numpy().astype("uint8")) plt.title("{}".format(format_label(label))) plt.axis("off") #''' # Prepare inputs. # Once verified the input data and augmentation are working # correctly, prepared dataset for training. The input data is # resized to uniform IMG_SIZE. The labels are put into a one-hot # (a.k.a categorical) encoding. The dataset is batched. # Note: prefetch and AUTOTUNE may in some situation improve # performance, but depends on environment and the specific dataset # used. See this guide for more information on data pipeline # performance. # One-hot / categorical encoding. def input_preprocess(image, label): label = tf.one_hot(label, num_classes) return image, label ds_train = ds_train.map(input_preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE) ds_train = ds_train.batch(batch_size=batch_size, drop_remainder=True) ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE) ds_test = ds_test.map(input_preprocess) ds_test = ds_test.batch(batch_size=batch_size, drop_remainder=True) # Train a model from scratch. # Build an EfficientNetB0 with 120 output classes, that is # initialized from scratch. # Note: the accuracy will increase very slowly and may overfit. with strategy.scope(): inputs = la # Exit the program. exit(0)