def build(self, hp, inputs=None): input_node = nest.flatten(inputs)[0] output_node = input_node # Translate translation_factor = self.translation_factor if translation_factor is None: translation_factor = hp.Choice("translation_factor", [0.0, 0.1]) if translation_factor != 0 and translation_factor != (0, 0): height_factor, width_factor = self._get_fraction_value( translation_factor) output_node = preprocessing.RandomTranslation( height_factor, width_factor)(output_node) # Flip horizontal_flip = self.horizontal_flip if horizontal_flip is None: horizontal_flip = hp.Boolean("horizontal_flip", default=True) vertical_flip = self.vertical_flip if self.vertical_flip is None: vertical_flip = hp.Boolean("vertical_flip", default=True) if not horizontal_flip and not vertical_flip: flip_mode = "" elif horizontal_flip and vertical_flip: flip_mode = "horizontal_and_vertical" elif horizontal_flip and not vertical_flip: flip_mode = "horizontal" elif not horizontal_flip and vertical_flip: flip_mode = "vertical" if flip_mode != "": output_node = preprocessing.RandomFlip(mode=flip_mode)(output_node) # Rotate rotation_factor = self.rotation_factor if rotation_factor is None: rotation_factor = hp.Choice("rotation_factor", [0.0, 0.1]) if rotation_factor != 0: output_node = preprocessing.RandomRotation(rotation_factor)( output_node) # Zoom zoom_factor = self.zoom_factor if zoom_factor is None: zoom_factor = hp.Choice("zoom_factor", [0.0, 0.1]) if zoom_factor != 0 and zoom_factor != (0, 0): height_factor, width_factor = self._get_fraction_value(zoom_factor) # TODO: Add back RandomZoom when it is ready. # output_node = preprocessing.RandomZoom( # height_factor, width_factor)(output_node) # Contrast contrast_factor = self.contrast_factor if contrast_factor is None: contrast_factor = hp.Choice("contrast_factor", [0.0, 0.1]) if contrast_factor != 0 and contrast_factor != (0, 0): output_node = preprocessing.RandomContrast(contrast_factor)( output_node) return output_node
def get_augmenter(min_area, brightness, jitter): zoom_factor = 1.0 - tf.sqrt(min_area) return keras.Sequential([ keras.Input(shape=(image_size, image_size, image_channels)), preprocessing.Rescaling(1 / 255), preprocessing.RandomFlip("horizontal"), preprocessing.RandomTranslation(zoom_factor / 2, zoom_factor / 2), preprocessing.RandomZoom((-zoom_factor, 0.0), (-zoom_factor, 0.0)), RandomColorAffine(brightness, jitter), ])
def __init__(self): super(ClassifierHybrid, self).__init__() self.global_step = 0 self.backbone = self.get_backbone() self.backbone.trainable = False trainable_count = np.sum( [K.count_params(w) for w in self.backbone.trainable_weights]) non_trainable_count = np.sum( [K.count_params(w) for w in self.backbone.non_trainable_weights]) print('Total params: {:,}'.format(trainable_count + non_trainable_count)) print('Trainable params: {:,}'.format(trainable_count)) print('Non-trainable params: {:,}'.format(non_trainable_count)) # self.head = tf.keras.Sequential([ # layers.Flatten(), # layers.Dense(256, activation='relu'), # layers.Dense(196) # ]) # self.vision_transformer = ViT(img_size=9, channels=1408, patch_size=1, num_layers=8, # num_classes=196, d_model=512, num_heads=8, d_mlp=512) self.vision_transformer = ViT(img_size=args.num_patches, channels=args.num_channels, patch_size=args.patch_size, num_layers=args.num_layers, num_classes=args.num_classes, d_model=args.d_model, num_heads=args.num_heads, d_mlp=args.d_mlp) self.prepare_datasets() self.flag = True self.augmentation = tf.keras.Sequential( [ tf.keras.Input(shape=(260, 260, 3)), preprocessing.RandomRotation(factor=0.15), preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1), preprocessing.RandomFlip(), preprocessing.RandomContrast(factor=0.1), ], name="augmentation", )
def build(self, hp, inputs=None): input_node = nest.flatten(inputs)[0] output_node = input_node if self.translation_factor != 0 and self.translation_factor != (0, 0): height_factor, width_factor = self._get_fraction_value( self.translation_factor) output_node = preprocessing.RandomTranslation( height_factor, width_factor)(output_node) horizontal_flip = self.horizontal_flip if horizontal_flip is None: horizontal_flip = hp.Boolean('horizontal_flip', default=True) vertical_flip = self.vertical_flip if self.vertical_flip is None: vertical_flip = hp.Boolean('vertical_flip', default=True) if not horizontal_flip and not vertical_flip: flip_mode = '' elif horizontal_flip and vertical_flip: flip_mode = 'horizontal_and_vertical' elif horizontal_flip and not vertical_flip: flip_mode = 'horizontal' elif not horizontal_flip and vertical_flip: flip_mode = 'vertical' if flip_mode != '': output_node = preprocessing.RandomFlip(mode=flip_mode)(output_node) if self.rotation_factor != 0: output_node = preprocessing.RandomRotation( self.rotation_factor)(output_node) if self.zoom_factor != 0 and self.zoom_factor != (0, 0): height_factor, width_factor = self._get_fraction_value( self.zoom_factor) # TODO: Add back RandomZoom when it is ready. # output_node = preprocessing.RandomZoom( # height_factor, width_factor)(output_node) if self.contrast_factor != 0 and self.contrast_factor != (0, 0): output_node = preprocessing.RandomContrast( self.contrast_factor)(output_node) return output_node
def __init__(self, model_name, learning_rate, pre_trained_model_path): self.model_name = model_name self.lr = float(learning_rate) self.base_model, self.image_size = base_model[model_name] if model_name.startswith('b'): self.weight_path = os.path.join(pre_trained_model_path, self.model_name + ".h5") else: self.weight_path = 'imagenet' self.img_augmentation = Sequential( [ preprocessing.RandomRotation(factor=0.15), preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1), preprocessing.RandomFlip(), preprocessing.RandomContrast(factor=0.1), ], name="img_augmentation", )
def build_model(NUM_CLASSES, IMG_SIZE): """モデルの構築 Args: NUM_CLASSES (int): 種類数 IMG_SIZE (int): サイズ Returns: tf.keras.Model: モデル int: Adamのハイパーパラメータ """ img_augmentation = Sequential([ preprocessing.RandomRotation(factor=0.15), preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1), preprocessing.RandomFlip(), preprocessing.RandomContrast(factor=0.1) ], name='img_augmentation') inputs = Input(shape=(IMG_SIZE, IMG_SIZE, 3)) x = img_augmentation(inputs) model = EfficientNetB3(include_top=False, input_tensor=x, weights='imagenet') model.trainable = False x = GlobalAveragePooling2D(name='avg_pool')(model.output) x = BatchNormalization(trainable=True)(x) top_dropout_rate = 0.2 x = Dropout(top_dropout_rate, name='top_dropout')(x) outputs = Dense(NUM_CLASSES, activation='softmax', name='pred')(x) model = Model(inputs, outputs, name='EfficientNet') lr = 1e-4 optimizer = Adam(learning_rate=lr) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) return (model, lr)
def build_model(num_classes, config): img_augmentation = Sequential( [ preprocessing.RandomRotation(factor=0.1), preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1), preprocessing.RandomContrast(factor=0.1), ], name="img_augmentation", ) inputs = layers.Input(shape=(config.img_size, config.img_size, 3)) x = img_augmentation(inputs) model = EfficientNetB0(include_top=False, input_tensor=x, weights="imagenet") # Freeze the pretrained weights model.trainable = False # Rebuild top x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output) x = layers.BatchNormalization()(x) top_dropout_rate = 0.3 x = layers.Dropout(top_dropout_rate, name="top_dropout")(x) outputs = layers.Dense(num_classes, activation="softmax", name="pred")(x) # Compile model = tf.keras.Model(inputs, outputs, name="EfficientNet") optimizer = tf.keras.optimizers.Adam(learning_rate=config.learning_rate) model.compile(optimizer=optimizer, loss=config.loss_function, metrics=["accuracy", metrics.top_k_categorical_accuracy]) return model
return image, label AUTOTUNE = tf.data.experimental.AUTOTUNE ds_train = (ds_train_.map(convert_to_float).cache().prefetch( buffer_size=AUTOTUNE)) # Data Augmentation augment = keras.Sequential([ preprocessing.RandomContrast(factor=0.5), preprocessing.RandomFlip(mode='horizontal'), # meaning, left-to-right preprocessing.RandomFlip(mode='vertical'), # meaning, top-to-bottom preprocessing.RandomWidth(factor=0.15), # horizontal stretch preprocessing.RandomRotation(factor=0.20), preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1), preprocessing.RandomContrast(factor=0.10), preprocessing.RandomFlip(mode='horizontal'), preprocessing.RandomRotation(factor=0.10), ]) ex = next(iter(ds_train.unbatch().map(lambda x, y: x).batch(1))) plt.figure(figsize=(10, 10)) for i in range(16): image = augment(ex, training=True) plt.subplot(4, 4, i + 1) plt.imshow(tf.squeeze(image)) plt.axis('off') plt.show()
from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import tensorflow.keras as keras from tensorflow.keras import layers from tensorflow.keras.layers.experimental import preprocessing from kerastuner.engine import hypermodel # dict of functions that create layers for transforms. # Each function takes a factor (0 to 1) for the strength # of the transform. TRANSFORMS = { "translate_x": lambda x: preprocessing.RandomTranslation(x, 0), "translate_y": lambda y: preprocessing.RandomTranslation(0, y), "rotate": preprocessing.RandomRotation, "contrast": preprocessing.RandomContrast, } class HyperImageAugment(hypermodel.HyperModel): """Builds HyperModel for image augmentation. Only supporting augmentations available in Keras preprocessing layers currently. # Arguments: input_shape: Optional shape tuple, e.g. `(256, 256, 3)`. input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. rotate: A number between [0, 1], a list of two numbers between [0, 1]
def main(): # Loading data. # Load data from tensorflow_dataset (hereafter TFDS). Stanford Dogs # dataset is provided in TFDS as stanford_dogs. It features 20,580 # images that belong to 120 classes of dog breeds (12,000 for # training and 8,580 for testing). # By simply changing dataset_name below, you may also try this # notebook for other datasets in TFDS such as cifar10, cifar100, # food101, etc. When the images are much smaller than the size of # Efficientnet input, we can simply upsample the input images. It # has been shown in Tan and Le, 2019 that transfer learning result # is better for increased resolution even if input images remain # small. # For TPU: if using TFDS datasets, a GCS bucket location is # required to save the datasets. For example: # tfds.load(dataset_name, data_dir="gs://example-bucket/datapath") # Also, both the current environment and the TPU service account # have proper access to the bucket. Alternatively, for small # datasets you may try loading data into the memory and use # tf.data.Dataset.from_tensor_slices(). batch_size = 64 dataset_name = "stanford_dogs" (ds_train, ds_test), ds_info = tfds.load(dataset_name, split=["train", "test"], with_info=True, as_supervised=True) num_classes = ds_info.features["label"].num_classes # When the dataset include images with various size, need to resize # them into a shared size. The Stanford Dogs dataset includes only # images at least 200x200 pixels in size. Here, resize the images # to the input size needed for EfficientNet. size = (IMG_SIZE, IMG_SIZE) ds_train = ds_train.map(lambda image, label: (tf.image.resize(image, size), label)) ds_test = ds_test.map(lambda image, label: (tf.image.resize(image, size), label)) # Visualize the data. # The following code shows the first 9 images with their labels. #''' def format_label(label): string_label = label_info.int2str(label) return string_label.split("-")[1] label_info = ds_info.features["labels"] for i, (image, label) in enumerate(ds_train.take(9)): ax = plt.subplot(3, 3, i + 1) plt.imshow(image.numpy().astype("uint8")) plt.title("{}".format(format_label(label))) plt.axis("off") #''' # Data augmentation. # Use preprocessing layers APIs for image augmentation. img_augmentation = Sequential( [ preprocessing.RandomRotation(factor - 0.15), preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1), preprocessing.RandomFlip(), preprocessing.RandomContrast(factor=0.1), ], name="img_augmentation", ) # This sequential model object can be used both as part of the # model built later, and as a function to preprocess data before # feeding into the model. Using them as a function makes it easy to # visualize the augmented images. Here, plot 9 examples of # augmentation result of a given figure. #''' for image, label in ds_train.take(1): for i in range(9): ax = plt.subplot(3, 3, i + 1) aug_img = img_augmentation(tf.expand_dims(image, axis=0)) plt.imshow(image[0].numpy().astype("uint8")) plt.title("{}".format(format_label(label))) plt.axis("off") #''' # Prepare inputs. # Once verified the input data and augmentation are working # correctly, prepared dataset for training. The input data is # resized to uniform IMG_SIZE. The labels are put into a one-hot # (a.k.a categorical) encoding. The dataset is batched. # Note: prefetch and AUTOTUNE may in some situation improve # performance, but depends on environment and the specific dataset # used. See this guide for more information on data pipeline # performance. # One-hot / categorical encoding. def input_preprocess(image, label): label = tf.one_hot(label, num_classes) return image, label ds_train = ds_train.map(input_preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE) ds_train = ds_train.batch(batch_size=batch_size, drop_remainder=True) ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE) ds_test = ds_test.map(input_preprocess) ds_test = ds_test.batch(batch_size=batch_size, drop_remainder=True) # Train a model from scratch. # Build an EfficientNetB0 with 120 output classes, that is # initialized from scratch. # Note: the accuracy will increase very slowly and may overfit. with strategy.scope(): inputs = la # Exit the program. exit(0)
x_train, y_train, input_shape, random_labels=config.random_labels) x_test, y_test = prepare_all_data(config, x_test, y_test, input_shape) if input_shape[-1] > 1: means = x_train.mean(axis=(0, 1, 2)) std = x_train.std(axis=(0, 1, 2)) x_train = (x_train - means) / std x_test = (x_test - means) / std print("LOADING OVER", flush=True) augmenter = tf.keras.Sequential([ preprocessing.RandomContrast(0.2), preprocessing.RandomRotation(0.05), # = 12° rotation maximum preprocessing.RandomTranslation(0.13, 0.13, fill_mode='reflect'), # nearest better preprocessing.RandomFlip('horizontal') ]) augmenter.build((None, ) + input_shape) class CustomSequential(Sequential): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def train_step(self, data): x, y = data x = augmenter(x, training=True) with tf.GradientTape() as tape: y_pred = self(x, training=True) if config.alpha == 'adaptive':
print (x_test.shape) print (y_test.shape) print (x_train.shape) print (y_train.shape) #random_state = 1: Initial Seeding x_train, x_validate, y_train, y_validate = train_test_split( x_train, y_train, test_size=0.3, random_state = 1 ) text_model = Sequential([ #Padding y stride por defecto #preprocessing.RandomRotation(factor=0.1, fill_mode='constant'), preprocessing.RandomZoom(height_factor=0.05, width_factor=0.05, fill_mode='constant'), preprocessing.RandomTranslation(height_factor=0.05, width_factor=0.05, fill_mode='constant'), #Caracteristicas: Alta frecuencia tipicamente, no importa tanto el color # Se uso un tamaño de kernel de 7 para permitir la formación de filtros # capaces de indentificar caracteristicas de alta frecuencia del texto #El numero de filtros se dio a través de prueba y error (gracias GPU) Conv2D(filters=32, kernel_size=5, activation='relu', input_shape=IM_SHAPE), MaxPooling2D(pool_size=2), Conv2D(filters=24, kernel_size=3, activation='relu'), MaxPooling2D(pool_size=2), # Dropout alto # Se observo previene mejor el overfitting (|val_accuracy - accuracy|) # Se justifica por la calidad de los datos y la cantidad de estso Dropout(0.5), Flatten(), Dense(32, activation='relu'), #Entrada #Se justifica la adición de la capa oculta debido a que no
def build_augmenters(imgaug=True): if imgaug: ia.seed(SEED) sometimes = lambda aug: iaa.Sometimes(0.8, aug, seed=SEED) augmenters = iaa.Sequential( [ iaa.SomeOf( (1, 3) if EXTRACT_IMAGES else (2, 4), [ iaa.Add((-10, 10), per_channel=0.5, seed=SEED), iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5, seed=SEED), iaa.OneOf( [ iaa.GaussianBlur(sigma=(0, 0.5), seed=SEED), iaa.AverageBlur(k=1, seed=SEED), iaa.MedianBlur(k=1, seed=SEED), ], seed=SEED, ), iaa.LinearContrast( (0.8, 1.2), per_channel=0.5, seed=SEED), ], ), sometimes( iaa.OneOf( [ iaa.Fliplr(0.5, seed=SEED), iaa.Flipud(0.2, seed=SEED), ], seed=SEED, ), ), sometimes( iaa.Affine( scale={ "x": (0.5, 1) if EXTRACT_IMAGES else (0.8, 1.2), "y": (0.5, 1) if EXTRACT_IMAGES else (0.8, 1.2) }, translate_percent={ "x": (-0.01, 0.01) if EXTRACT_IMAGES else (-0.2, 0.2), "y": (-0.01, 0.01) if EXTRACT_IMAGES else (-0.2, 0.2), }, rotate=(-25, 25) if EXTRACT_IMAGES else (-45, 45), shear=(-16, 16), order=[0, 1], cval=(0, 255), mode=ia.ALL, seed=SEED, )), ], random_order=True, seed=SEED, ) return augmenters else: img_augmentation = Sequential( [ preprocessing.RandomRotation(factor=0.3, seed=SEED), preprocessing.RandomTranslation( height_factor=0.01 if EXTRACT_IMAGES else 0.2, width_factor=0.01 if EXTRACT_IMAGES else 0.2, seed=SEED), preprocessing.RandomFlip(seed=SEED), preprocessing.RandomContrast( factor=0.1 if EXTRACT_IMAGES else 0.2, seed=SEED), ], name="img_augmentation", ) return img_augmentation
validation_data = batch_test_dataset, verbose=2) plot_hist(hist) from tensorflow.keras.layers.experimental import preprocessing from tensorflow.keras.models import Sequential from tensorflow.keras import layers img_augmentation = Sequential( [ # representing lower and upper bound for rotating clockwise and counter-clockwise. preprocessing.RandomRotation(factor=0.15), # a float represented as fraction of 2pi, ex :0.15 (= 54 degree!) preprocessing.RandomTranslation(height_factor=0.1, # lower and upper bound for shifting vertically width_factor=0.1 #lower and upper bound for shifting horizontally. ), preprocessing.RandomFlip(), # Randomly flip each image horizontally and vertically. preprocessing.RandomContrast(factor=0.1), ], name="img_augmentation", ) # 각각의 layer가 순서대로? 실행된다고 생각 for image, label in train_dataset.take(1): for i in range(9): ax = plt.subplot(3, 3, i + 1) aug_img = img_augmentation(tf.expand_dims(image, axis=0)) plt.imshow(aug_img[0].numpy().astype("uint8")) # 이동에 대한 ~가 줄어들고, 일반화 잘됨. 데이터는 많아지겠지. 학습시간도 plt.title(f"{format_label(label)}") plt.axis("off")
from __future__ import division from __future__ import print_function import tensorflow as tf import tensorflow.keras as keras from kerastuner.engine import hypermodel from tensorflow.keras import layers from tensorflow.keras.layers.experimental import preprocessing # dict of functions that create layers for transforms. # Each function takes a factor (0 to 1) for the strength # of the transform. TRANSFORMS = { 'translate_x': lambda x: preprocessing.RandomTranslation(x, 0), 'translate_y': lambda y: preprocessing.RandomTranslation(0, y), 'rotate': preprocessing.RandomRotation, 'contrast': preprocessing.RandomContrast, } class HyperImageAugment(hypermodel.HyperModel): """ Builds HyperModel for image augmentation. Only supporting augmentations available in Keras preprocessing layers currently. # Arguments: input_shape: Optional shape tuple, e.g. `(256, 256, 3)`. input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. rotate: A number between [0, 1], a list of two numbers between [0, 1]