def random_augment(img, mask, aug_dict): image_datagen = ImageDataGenerator(**aug_dict) seed = np.random.randint(1e9) out_img = image_datagen.random_transform(img,seed=seed) out_mask = image_datagen.random_transform(mask,seed=seed) return out_img, out_mask
class TrainSequence(Sequence): directory: str # 画像が保存されているフォルダ df: pd.DataFrame # データの情報がかかれたDataFrame image_size: tuple # 入力画像サイズ classes: int # 分類クラス数 batch_size: int # バッチサイズ aug_params: dict # ImageDataGenerator画像増幅のパラメータ def __post_init__(self): self.df_index = list(self.df.index) self.train_datagen = ImageDataGenerator(**self.aug_params) def __len__(self): return math.ceil(len(self.df_index) / self.batch_size) def __getitem__(self, idx): batch_x = self.df_index[idx * self.batch_size:(idx + 1) * self.batch_size] x = [] y = [] for i in batch_x: rand = np.random.randint(0, int(1e9)) # 入力画像 img = cv2.imread(f'{self.directory}/{self.df.at[i, "filename"]}') img = cv2.resize(img, self.image_size, interpolation=cv2.INTER_LANCZOS4) img = np.array(img, dtype=np.float32) img = self.train_datagen.random_transform(img, seed=rand) img *= 1. / 255 x.append(img) # セグメンテーション画像 img = cv2.imread(f'{self.directory}/{self.df.at[i, "label"]}', cv2.IMREAD_GRAYSCALE) img = cv2.resize(img, self.image_size, interpolation=cv2.INTER_LANCZOS4) img = np.array(img, dtype=np.float32) img = np.reshape(img, (self.image_size[0], self.image_size[1], 1)) img = self.train_datagen.random_transform(img, seed=rand) img = np.reshape(img, (self.image_size[0], self.image_size[1])) seg = [] for label in range(self.classes): seg.append(img == label) seg = np.array(seg, np.float32) seg = seg.transpose(1, 2, 0) y.append(seg) x = np.array(x) y = np.array(y) return x, y
def preprocess_img(self, img_path): """处理每张图片,大小, 数据增强 :param img_path: :return: """ # 1、读取图片对应内容,做形状,内容处理, (h, w) img = Image.open(img_path) # [180, 200, 3] scale = self.img_size[0] / max(img.size[:2]) img = img.resize((int(img.size[0] * scale), int(img.size[1] * scale))) img = img.convert('RGB') img = np.array(img) # 2、数据增强:如果是训练集进行数据增强操作 if self.use_aug: # 1、随机擦处 img = self.eraser(img) # 2、翻转 datagen = ImageDataGenerator( width_shift_range=0.05, height_shift_range=0.05, horizontal_flip=True, vertical_flip=True, ) img = datagen.random_transform(img) # 4、处理一下形状 【300, 300, 3】 # 改变到[300, 300] 建议不要进行裁剪操作,变形操作,保留数据增强之后的效果,填充到300x300 img = self.center_img(img, self.img_size[0]) return img
def augment_input(img,mask,aug_dict,batch_size=32,random_crop_size=(256,256),only_crop=False): LX,LY,LZ = img.shape image_datagen = ImageDataGenerator(**aug_dict) out_imgs = np.zeros((batch_size,random_crop_size[0],random_crop_size[1],1)) out_masks = np.zeros((batch_size,random_crop_size[0],random_crop_size[1],1)) for b in range(batch_size): seed = np.random.randint(1e9) crop_img, crop_mask = random_crop(img,mask,random_crop_size) if not only_crop: out_imgs[b,...] = image_datagen.random_transform(crop_img,seed=seed) out_masks[b,...] = image_datagen.random_transform(crop_mask,seed=seed) else: out_imgs[b,...] = crop_img out_masks[b,...] = crop_mask return out_imgs, out_masks
class SegmentationSequence(Sequence): def __init__(self, images, masks, batch_size, jitter=False): self.masks = masks self.images = images self.batch_size = batch_size self.shuffled_indices = np.random.permutation(self.images.shape[0]) self.jitter = jitter if self.jitter: self.jitter_datagen = ImageDataGenerator(rotation_range=5, width_shift_range=0.05, height_shift_range=0.05, fill_mode="nearest") def __len__(self): return self.images.shape[0] // self.batch_size def __getitem__(self, idx): # The shuffled indices in this batch batch_inds = self.shuffled_indices[idx * self.batch_size:(idx + 1) * self.batch_size] if self.jitter: batch_images_list = [] batch_masks_list = [] for i in batch_inds: # Stack mask and image together to ensure that they are transformed # in exactly the same way stacked = np.dstack([ self.images[i, :, :, :].astype(np.uint8), self.masks[i, :, :, :] ]) transformed = self.jitter_datagen.random_transform(stacked) batch_images_list.append(transformed[:, :, 0].astype(float)) batch_masks_list.append(transformed[:, :, 1]) batch_images = np.dstack(batch_images_list) batch_images = np.transpose(batch_images[:, :, :, np.newaxis], [2, 0, 1, 3]) batch_masks = np.dstack(batch_masks_list) batch_masks = np.transpose(batch_masks[:, :, :, np.newaxis], [2, 0, 1, 3]) else: # Slice images and labels for this batch batch_images = self.images[batch_inds, :, :, :] batch_masks = self.masks[batch_inds, :, :, :] return (batch_images, batch_masks) def on_epoch_end(self): # Shuffle the dataset indices again self.shuffled_indices = np.random.permutation(self.images.shape[0])
def generator(): idg = ImageDataGenerator(horizontal_flip=True, rotation_range=20, zoom_range=0.2) while True: for i in range(0, len(X), batch_size): X_batch = X[i:i + batch_size].copy() y_batch = [x[i:i + batch_size] for x in y] if aug: for j in range(len(X_batch)): X_batch[j] = idg.random_transform(X_batch[j]) yield X_batch, y_batch
def data_generator(X: Union[np.ndarray, list], Y: Union[np.ndarray, list], batch_size: int, target_shape: tuple = (224, 224, 3), if_shuffle: bool = False, augment: bool = False, rotation_range: int = 30, horizontal_flip: bool = True): """ Function to generate mini batches of data with possibility of performing data augmentation Args: X : Images to take the batches from for training data Y: Images to take the batches from for test data batch_size: Size of the batch used target_shape (tuple): Shape of an image to be transformed to if_shuffle (bool): Whether to randomly shuffle dataset augment (bool): Whether to augment the dataset rotation_range (int): Rotation range for an image if it is augmented horizontal_flip (bool): Whether to flip image horizontally Yields: Mini-batch of data """ start = 0 end = start + batch_size num_of_batches = X.shape[0] if if_shuffle: X, Y = shuffle(X, Y) if augment: data_augmenter = ImageDataGenerator(rotation_range=rotation_range, horizontal_flip=horizontal_flip) while True: X_batch = X[start:end] Y_batch = Y[start:end] X_batch_resized = resize_images(X_batch, target_shape) if augment: X_batch_resized = np.array([ data_augmenter.random_transform(image, seed=42) for image in X_batch_resized ]) X_preprocessed = preprocess_input(X_batch_resized) start += batch_size end += batch_size if start >= num_of_batches: start = 0 end = batch_size if if_shuffle: X, Y = shuffle(X, Y) yield (X_preprocessed, Y_batch)
def add_augmented_images(*ids): from tensorflow.keras.preprocessing.image import ImageDataGenerator # Takes a list of ids containing the old id and the new id # Applies augmentation to the old image and saves it as a new one image_paths = [ str(Path(Config.DATA_DIR, 'images', f'{id}.jpg')) for id in ids ] image = tf.image.decode_jpeg(tf.io.read_file(image_paths[0])) image = tf.image.resize(image, Config.IMG_SHAPE) dg = ImageDataGenerator( rotation_range=30, zoom_range=0.5, shear_range=0.3, horizontal_flip=True, width_shift_range=0.3, height_shift_range=0.3, ) tf.keras.preprocessing.image.save_img( image_paths[1], dg.random_transform(image.numpy()))
class SliceSelectionSequence(Sequence): def __init__(self, labels, image_dir, batch_size, batches_per_epoch, jitter=False, sigmoid_scale=None): self.labels = labels self.image_dir = image_dir self.batch_size = batch_size self.batches_per_epoch = batches_per_epoch self.jitter = jitter self.sigmoid_scale = sigmoid_scale self.shuffled_indices = np.random.permutation(len(labels)) if self.jitter: self.jitter_datagen = ImageDataGenerator(rotation_range=5, width_shift_range=0.05, height_shift_range=0.05, fill_mode="constant", cval=0) def __len__(self): return self.batches_per_epoch def __getitem__(self, idx): # The shuffled indices in this batch batch_inds = self.shuffled_indices[idx * self.batch_size:(idx + 1) * self.batch_size] # Labels for this batch batch_labels = self.labels[batch_inds] # Soft-threshold the distances using a sigmoid if self.sigmoid_scale is not None: batch_labels = expit(batch_labels / self.sigmoid_scale) # The images for this batch images_list = [] for i in batch_inds: # Load in image filename = os.path.join(self.image_dir, str(i).zfill(6) + '.png') im = resize(imread(filename), (256, 256), mode='constant', preserve_range=True, anti_aliasing=True)[:, :, np.newaxis] # Apply random jitter (rotation, shift, zoom) if self.jitter: im = self.jitter_datagen.random_transform(im) images_list.append(im) batch_images = np.dstack(images_list).astype(float) batch_images = np.transpose(batch_images[:, :, :, np.newaxis], [2, 0, 1, 3]) return (batch_images, batch_labels) def on_epoch_end(self): # Shuffle the dataset indices again required = self.batches_per_epoch * self.batch_size use_replacement = required > len(self.labels) self.shuffled_indices = np.random.choice(len(self.labels), required, replace=use_replacement)
]) model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model2.fit(X, y, epochs=6) """## Validation et prédictions""" test_loss, test_accuracy = model1.evaluate(X_test, y_test, steps=math.ceil(len(X_test)/50)) print('Accuracy on test dataset:', test_accuracy) test_loss, test_accuracy = model2.evaluate(X_test, y_test, steps=math.ceil(len(X_test)/50)) print('Accuracy on test dataset:', test_accuracy) #Changer l'indice pour predire dans les arrays X_test et y_test a = random.randint(0,1500) img = X_test[a] plt.imshow(img) img = np.array([img],dtype="float16") print(CATEGORIES[y_test[a][0]]) predictions = model1.predict(img) print(CATEGORIES[np.argmax(predictions[0])]) img_trans = datagen.random_transform(X[0]) plt.imshow(img_trans)
class BalanceCovidDataset(keras.utils.Sequence): 'Generates data for Keras' def __init__(self, data_dir, csv_file, is_training=True, batch_size=8, input_shape=(224, 224), n_classes=3, num_channels=3, mapping={ 'normal': 0, 'pneumonia': 1, 'COVID-19': 2 }, shuffle=True, augmentation=True, covid_percent=0.3, class_weights=[1., 1., 6.]): 'Initialization' self.datadir = data_dir self.dataset = _process_csv_file(csv_file) self.is_training = is_training self.batch_size = batch_size self.N = len(self.dataset) self.input_shape = input_shape self.n_classes = n_classes self.num_channels = num_channels self.mapping = mapping self.shuffle = True self.covid_percent = covid_percent self.class_weights = class_weights self.n = 0 if augmentation: self.augmentation = ImageDataGenerator( featurewise_center=False, featurewise_std_normalization=False, rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True, brightness_range=(0.9, 1.1), zoom_range=(0.85, 1.15), fill_mode='constant', cval=0., ) datasets = {'normal': [], 'pneumonia': [], 'COVID-19': []} for l in self.dataset: datasets[l.split()[2]].append(l) self.datasets = [ datasets['normal'] + datasets['pneumonia'], datasets['COVID-19'], ] self.on_epoch_end() def __next__(self): # Get one batch of data batch_x, batch_y = self.__getitem__(self.n) # Batch index self.n += 1 # If we have processed the entire dataset then if self.n >= self.__len__(): self.on_epoch_end self.n = 0 return batch_x, batch_y def __len__(self): return int(np.ceil(len(self.datasets[0]) / float(self.batch_size))) def on_epoch_end(self): 'Updates indexes after each epoch' if self.shuffle == True: for v in self.datasets: np.random.shuffle(v) def __getitem__(self, idx): batch_x, batch_y = np.zeros( (self.batch_size, *self.input_shape, self.num_channels)), np.zeros(self.batch_size) if (idx % 200 == 0 and idx != 0): batch_files = np.random.choice(self.datasets[1], size=self.batch_size, replace=False) else: batch_files = self.datasets[0][idx * self.batch_size:(idx + 1) * self.batch_size] # upsample covid cases covid_size = max(int(len(batch_files) * self.covid_percent), 1) covid_inds = np.random.choice(np.arange(len(batch_files)), size=covid_size, replace=False) covid_files = np.random.choice(self.datasets[1], size=covid_size, replace=False) for i in range(covid_size): batch_files[covid_inds[i]] = covid_files[i] for i in range(len(batch_files)): sample = batch_files[i].split() if self.is_training: folder = 'train' else: folder = 'test' x = cv2.imread(os.path.join(self.datadir, folder, sample[1])) h, w, c = x.shape x = x[int(h / 6):, :] x = cv2.resize(x, self.input_shape) if self.is_training and hasattr(self, 'augmentation'): x = self.augmentation.random_transform(x) x = x.astype('float32') / 255.0 y = self.mapping[sample[2]] batch_x[i] = x batch_y[i] = y class_weights = self.class_weights weights = np.take(class_weights, batch_y.astype('int64')) return batch_x, keras.utils.to_categorical(batch_y, num_classes=self.n_classes)
from tensorflow.keras.preprocessing.image import ImageDataGenerator img_gen = ImageDataGenerator(rotation_range=20, width_shift_range = 0.1, height_shift_range=0.1, shear_range=0.1, zoom_range=0.1, horizontal_flip = True, fill_mode = 'nearest') #showing the sample working of image data generator sample_im.shape = imread(sample_image) plt.imshow(sample_im) # show a sample image of a randomly augumented image plt.imshow(img_gen.random_transform(sample_im)) # the below tow code lines displays the labeling of the train and test img_gen.flow_from_directory(train_set) img_gen.flow_from_directory(test_set) # ============================================================================= # Creating the data augumentoin instance for the train and test set # ============================================================================= batch_size = 16 train_image_gen = img_gen.flow_from_directory(train_set, target_size = image_shape[:2], color_mode = 'rgb', batch_size = batch_size, class_mode = 'binary')
# Print a random paintings and it's random augmented version fig, axes = plt.subplots(1, 2, figsize=(20, 10)) random_artist = random.choice(artists_top_name) random_image = random.choice( os.listdir(os.path.join(images_dir, random_artist))) random_image_file = os.path.join(images_dir, random_artist, random_image) # Original image image = plt.imread(random_image_file) axes[0].imshow(image) axes[0].set_title("An original Image of " + random_artist.replace('_', ' ')) axes[0].axis('off') # Transformed image aug_image = train_datagen.random_transform(image) axes[1].imshow(aug_image) axes[1].set_title("A transformed Image of " + random_artist.replace('_', ' ')) axes[1].axis('off') plt.show() # %% [markdown] # ## Build Model # CNN model # Build model model = tf.keras.Sequential([ tf.keras.layers.Conv2D(32, (3, 3), input_shape=train_input_shape, activation=tf.keras.activations.relu, padding='same'),
class DataGenerator(keras.utils.Sequence): """ Generates frame batches for CNN """ def __init__(self, list_IDs, targets, train_test, batch_size=128, dim=(96, 96), n_channels=1, shuffle=True): 'Initialization' self.dim = dim self.batch_size = batch_size self.list_IDs = list_IDs self.targets = targets self.n_channels = n_channels self.shuffle = shuffle self.train_test = train_test self.on_epoch_end() if train_test == 'train': self.image_gen = ImageDataGenerator(rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.01, zoom_range=[0.9, 1.25], horizontal_flip=True, vertical_flip=False, fill_mode='reflect', data_format='channels_last', brightness_range=[0.5, 1.5]) def __len__(self): 'Denotes the number of batches per epoch' return int(np.floor(len(self.list_IDs) / self.batch_size)) def __getitem__(self, index): 'Generate one batch of data' # Generate indexes of the batch indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] # Find list of IDs list_IDs_temp = [self.list_IDs[k] for k in indexes] # Generate data X, y = self.__data_generation(list_IDs_temp) return X, y def on_epoch_end(self): 'Updates indexes after each epoch' self.indexes = np.arange(len(self.list_IDs)) if self.shuffle == True: np.random.shuffle(self.indexes) def __data_generation(self, list_IDs_temp): 'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels) # Initialization X = np.empty((self.batch_size, *self.dim, self.n_channels)) y = np.empty((self.batch_size, 2), dtype=np.float32) # Generate data for i, ID in enumerate(list_IDs_temp): # Store sample preprocessed_image = process_image(ID, self.dim) augmented_image = preprocessed_image if self.train_test == 'train': augmented_image = self.image_gen.random_transform( preprocessed_image, seed=RANDOM_STATE) augmented_image = (augmented_image / 255.).astype(np.float32) X[i, ] = augmented_image # Store target y[i] = self.targets[ID] # X = utils.preprocess_input(X, version=1) return X, y
def loadData(): datagen = ImageDataGenerator(rotation_range=80, width_shift_range=0.3, height_shift_range=0.3, shear_range=0.3, zoom_range=0.3, horizontal_flip=True, vertical_flip=True, fill_mode='nearest') data_img = [] data_label = [] for file in os.listdir(fileRoot + path_healthy): img = cv2.imread(fileRoot + path_healthy + file) res = preprocess(img) data_img.append(res) data_label.append(2) data_img.append(cv2.flip(res, 1)) data_label.append(2) data_img.append(cv2.flip(res, 0)) data_label.append(2) data_img.append(cv2.flip(res, -1)) data_label.append(2) data_img.append(datagen.random_transform(res)) data_label.append(2) data_img.append(datagen.random_transform(res)) data_label.append(2) data_img.append(datagen.random_transform(res)) data_label.append(2) for file in os.listdir(fileRoot + path_leaf): img = cv2.imread(fileRoot + path_leaf + file) if img is None: print(file) continue res = preprocess(img) data_label.append(0) data_img.append(res) data_img.append(cv2.flip(res, 1)) data_label.append(0) data_img.append(cv2.flip(res, 0)) data_label.append(0) data_img.append(cv2.flip(res, -1)) data_label.append(0) data_img.append(datagen.random_transform(res)) data_label.append(0) data_img.append(datagen.random_transform(res)) data_label.append(0) data_img.append(datagen.random_transform(res)) data_label.append(0) for file in os.listdir(fileRoot + path_rust): img = cv2.imread(fileRoot + path_rust + file) res = preprocess(img) data_img.append(res) data_label.append(1) data_img.append(cv2.flip(res, 1)) data_label.append(1) data_img.append(cv2.flip(res, 0)) data_label.append(1) data_img.append(cv2.flip(res, -1)) data_label.append(1) data_img.append(datagen.random_transform(res)) data_label.append(1) data_img.append(datagen.random_transform(res)) data_label.append(1) data_img.append(datagen.random_transform(res)) data_label.append(1) for i in range(len(data_img)): data_img[i] = data_img[i] / 255 data_img = np.array(data_img) return data_img, data_label
class ImageGeneratorParallel(keras.utils.Sequence): """Generates data for Keras""" def __init__(self, features, targets, n_classes=2, batch_size=32, shuffle=True, repeats=1, parallel=True): self.n_classes = n_classes self.n_vals = len(targets) # since we are using data agumentation we repeat the number of times we show each image. # we show the same original image but it can be rotated or flipper each time, so it is not the "same" image self.list_IDs = np.repeat( np.arange(self.n_vals), repeats ) # OJO con esto, deberian ser las imagenes validas si queremos hacer bien las cosas self.batch_size = batch_size self.features = features self.shuffle = shuffle self.targets = targets self.targets_mc = keras.utils.to_categorical( targets, num_classes=self.n_classes) self.indexes = np.arange(len(self.list_IDs)) self.pool = None self.parallel = parallel self.agumentator = ImageDataGenerator( # featurewise_center=True, # featurewise_std_normalization=True, rescale=1 / 255, rotation_range=40, zoom_range=0.2, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True, fill_mode='nearest') def __len__(self): 'Denotes the number of batches per epoch' return int(np.floor(len(self.list_IDs) / self.batch_size)) def __getitem__(self, index): 'Generate one batch of data' # Generate indexes of the batch if self.parallel: if self.pool is None: self.pool = ThreadPool(4) indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] # Find list of IDs list_IDs_temp = [self.list_IDs[k] for k in indexes] # Generate data if self.parallel: X, y = self.__data_generation_threads(list_IDs_temp) else: X, y = self.__data_generation(list_IDs_temp) return X, y def on_epoch_end(self): 'Updates indexes after each epoch' self.indexes = np.arange(len(self.list_IDs)) if self.shuffle: np.random.shuffle(self.indexes) def __data_generation(self, list_IDs_temp): 'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels) # Initialization X = self.features[list_IDs_temp] y = self.targets_mc[list_IDs_temp] X = np.array( self.agumentator.flow(X, batch_size=self.batch_size, shuffle=self.shuffle).next()) return X, y def __data_generation_threads(self, list_IDs_temp): 'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels) # Initialization X = self.features[list_IDs_temp] y = self.targets_mc[list_IDs_temp] X = np.array( self.pool.map( lambda xi: self.agumentator.random_transform( self.agumentator.standardize(xi)), X)) return X, y
from tensorflow.keras.preprocessing.image import ImageDataGenerator image_gen = ImageDataGenerator( rotation_range=20, width_shift_range=0.1, height_shift_range=0.1, #rescale = 1 / 255 shear_range=0.1, zoom_range=0.1, horizontal_flip=True, fill_mode='nearest') plt.imshow(un) plt.show() plt.imshow(image_gen.random_transform(un)) plt.show() print(image_gen.flow_from_directory(train_path)) print(image_gen.flow_from_directory(test_path)) from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Dropout, Flatten model = Sequential() model.add( Conv2D(filters=32, kernel_size=(3, 3), input_shape=image_shape, activation='relu'))
plt.imshow(img) img = np.array([img],dtype="float16") print(CATEGORIES[y_test[a][0]]) predictions = model1.predict(img) print(CATEGORIES[np.argmax(predictions[0])]) photo = Image.open("drive/My Drive/photo/14.jpg") photo = photo.resize((50, 50)) photo = np.array(photo) photo = np.concatenate(([photo, 255 * np.ones((50, 50, 1), dtype=np.uint8)]), axis=-1) print(photo.shape) plt.imshow(photo) img = np.array([photo],dtype="float16") result = [] predictions = model1.predict(img) print(predictions[0]) result.append(np.argmax(predictions[0])) DIVISIONS = ["bend", "bend sinister", "chevron", "fess", "gyronny", "pale", "quarter", "saltire", "aucun"] print(result[0]) img_trans = datagen.random_transform(X_test[0]) plt.imshow(img_trans)
help(ImageDataGenerator) image_gen = ImageDataGenerator(rotation_range=20, # rotate the image 20 degrees width_shift_range=0.10, # Shift the pic width by a max of 5% height_shift_range=0.10, # Shift the pic height by a max of 5% rescale=1/255, # Rescale the image by normalzing it. shear_range=0.1, # Shear means cutting away part of the image (max 10%) zoom_range=0.1, # Zoom in by 10% max horizontal_flip=True, # Allo horizontal flipping fill_mode='nearest' # Fill in missing pixels with the nearest filled value ) plt.imshow(para_img) plt.imshow(image_gen.random_transform(para_img)) #Generating many manipulated images from a directory image_gen.flow_from_directory(train_path) image_gen.flow_from_dirctory(test_path) #https://stats.stackexchange.com/questions/148139/rules-for-selecting-convolutional-neural-network-hyperparameters model = Sequential() model.add(Conv2D(filters=32, kernel_size=(3,3),input_shape=image_shape, activation='relu',)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(filters=64, kernel_size=(3,3),input_shape=image_shape, activation='relu',)) model.add(MaxPooling2D(pool_size=(2, 2)))
class ImageDataSequence(Sequence): def __init__(self, directories, batch_size, target_size, frac=1.0, **kwargs): if not isinstance(directories, list): directories = [directories] self.batch_size = batch_size self.target_size = target_size self.classes = sorted(os.listdir(directories[0])) self.num_classes = len(self.classes) self.filenames = [] self.labels = [] self.transformer = ImageDataGenerator(**kwargs) for d in range(0, len(directories)): for idx, cls in zip(range(len(self.classes)), self.classes): filenames = os.listdir(os.path.join(directories[d], cls)) for f in filenames: self.filenames.append(os.path.join(directories[d], cls, f)) self.labels.append(idx) self.on_epoch_end() num_instances = int(frac * len(self.filenames)) self.filenames = self.filenames[0:num_instances] self.labels = self.labels[0:num_instances] def __getitem__(self, batch_idx): X_batch = np.zeros((self.batch_size, ) + self.target_size) y_batch = np.zeros((self.batch_size, len(self.classes))) filenames = [] labels = [] aux = [] current_index = batch_idx * self.batch_size for i in range(self.batch_size): filenames.append(self.filenames[current_index]) labels.append(self.labels[current_index]) if hasattr(self, "auxiliary"): aux.append(self.auxiliary[current_index]) current_index += 1 color_mode = "rgb" if self.target_size[2] == 3 else "grayscale" target_size = (self.target_size[0], self.target_size[1]) X_batch = np.array([ self.transformer.random_transform( img_to_array( load_img(fn, color_mode=color_mode, target_size=target_size))) for fn in filenames ]) y_batch = to_categorical(labels, num_classes=len(self.classes)) if hasattr(self, "auxiliary"): return X_batch, [y_batch, aux] else: return X_batch, y_batch def __len__(self): return int(len(self.filenames) / self.batch_size) def on_epoch_end(self): X = self.filenames y = self.labels if hasattr(self, "auxiliary"): aux = self.auxiliary insts = list(zip(X, y, aux)) random.shuffle(insts) X, y, aux = zip(*insts) self.auxiliary = aux else: insts = list(zip(X, y)) random.shuffle(insts) X, y = zip(*insts) self.filenames = X self.labels = y
class OntheflyAugmentedImages(BaseDataset): """Use a tensorflow.keras ImageDataGenerator to augment images on the fly in a determenistic way.""" def __init__(self, dataset, augmentation_params, N=None, random_state=0, cache_size=None): # Initialize some member variables self.dataset = dataset self.generator = ImageDataGenerator(**augmentation_params) self.N = N or (len(self.dataset.train_data) * 10) self.random_state = random_state assert len(self.dataset.shape) == 3 # Figure out the base images for each of the augmented ones self.idxs = np.random.choice(len(self.dataset.train_data), self.N) # Fit the generator self.generator.fit(self.dataset.train_data[:][0]) # Standardize the test data self._x_test = np.copy(self.dataset.test_data[:][0]) self._x_test = self.generator.standardize(self._x_test) self._y_test = self.dataset.test_data[:][1] # Create an LRU cache to speed things up a bit for the transforms cache_size = cache_size or len(self.dataset.train_data) self.cache = OrderedDict([(-i, i) for i in range(cache_size)]) self.cache_data = np.empty(shape=(cache_size, ) + self.dataset.shape, dtype=np.float32) def _transform(self, idx, x): # if it is not cached add it if idx not in self.cache: # Remove the first in and add the new idx (i is the offset in # cache_data) _, i = self.cache.popitem(last=False) self.cache[idx] = i # Do the transformation and add it to the data np.random.seed(idx + self.random_state) x = self.generator.random_transform(x) x = self.generator.standardize(x) self.cache_data[i] = x # and if it is update it as the most recently used else: self.cache[idx] = self.cache.pop(idx) return self.cache_data[self.cache[idx]] def _train_data(self, idxs=slice(None)): # Make sure we accept everything that numpy accepts as indices idxs = np.arange(self.N)[idxs] # Get the original images and then transform them x, y = self.dataset.train_data[self.idxs[idxs]] x_hat = np.copy(x) random_state = np.random.get_state() for i, idx in enumerate(idxs): x_hat[i] = self._transform(idx, x_hat[i]) np.random.set_state(random_state) return x_hat, y def _test_data(self, idxs=slice(None)): return self._x_test[idxs], self._y_test[idxs] def _train_size(self): return self.N @property def shape(self): return self.dataset.shape @property def output_size(self): return self.dataset.output_size
class BalanceDataGenerator(tf.keras.utils.Sequence): 'Generates data for tf.keras' def __init__(self, dataset, images=None, labels=None, le=None, is_training=True, batch_size=8, input_shape=(512, 512), n_classes=3, num_channels=3, mapping={ 'normal': 0, 'pneumonia': 1, 'COVID-19': 2 }, shuffle=True, augmentation=True, datadir='data', args=None): if args.datapipeline == 'chexpert': 'CHexPert Initialization' images, labels, le = get_data_references() self.train_images = images[:int(len(images) * args.val_split)] self.train_labels = labels[:int(len(labels) * args.val_split)] if args.datapipeline == 'chexpert': self.n_classes = len(le.classes_) 'Initialization' self.datadir = datadir self.dataset = dataset self.is_training = is_training self.batch_size = args.bs self.N = len(self.dataset) self.input_shape = input_shape if args.datapipeline == 'covidx': self.n_classes = n_classes self.num_channels = num_channels self.mapping = mapping self.shuffle = True self.args = args if self.args.datapipeline == 'chexpert': 'CHexPert Initialization' self.images = images self.labels = labels self.n_classes = len(le.classes_) if augmentation: self.augmentation = ImageDataGenerator( featurewise_center=False, featurewise_std_normalization=False, rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True, brightness_range=(0.9, 1.1), fill_mode='constant', cval=0., ) datasets = {'normal': [], 'pneumonia': [], 'COVID-19': []} for l in dataset: datasets[l.split()[-1]].append(l) self.datasets = [ datasets['normal'] + datasets['pneumonia'], datasets['COVID-19'], ] print( f"Train: NO-COVID={len(self.datasets[0])}, COVID={len(self.datasets[1])}" ) self.on_epoch_end() def __len__(self): if self.args.datapipeline == 'covidx': return int(np.ceil(len(self.datasets[0]) / float(self.batch_size))) elif self.args.datapipeline == 'chexpert': return int(np.ceil(len(self.images) / float(self.batch_size))) def on_epoch_end(self): 'Updates indexes after each epoch' if self.shuffle == True: if self.args.datapipeline == 'covidx': for v in self.datasets: np.random.shuffle(v) elif self.args.datapipeline == 'chexpert': self.images = shuffle(self.images, random_state=0) self.labels = shuffle(self.labels, random_state=0) def __getitem__(self, idx): batch_x, batch_y = np.zeros( (self.batch_size, *self.input_shape, self.num_channels)), np.zeros(self.batch_size) # COVIDX Pipeline if self.args.datapipeline == 'covidx': batch_files = self.datasets[0][idx * self.batch_size:(idx + 1) * self.batch_size] batch_files[np.random.randint(self.batch_size)] = np.random.choice( self.datasets[1]) for i in range(self.batch_size): sample = batch_files[i].split() if self.is_training: folder = 'train' else: folder = 'test' x = cv2.imread(os.path.join(self.datadir, folder, sample[1])) x = cv2.resize(x, self.input_shape) if self.is_training and hasattr(self, 'augmentation'): x = self.augmentation.random_transform(x) x = x.astype('float32') / 255.0 y = self.mapping[sample[2]] batch_x[i] = x batch_y[i] = y # ChexPert Pipeline elif self.args.datapipeline == 'chexpert': idx = min(idx, BalanceDataGenerator.__len__(self) - self.batch_size) batch_files_images = self.images[idx * self.batch_size:(idx + 1) * self.batch_size] batch_files_labels = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size] for i in range(self.batch_size): try: x = cv2.imread(batch_files_images[i]) x = cv2.resize(x, self.input_shape) if self.is_training and hasattr(self, 'augmentation'): x = self.augmentation.random_transform(x) x = 2 * (x.astype('float32') / 255.0) - 1 y = batch_files_labels[i] batch_x[i] = x batch_y[i] = y except Exception as e: print(e) return batch_x, tf.keras.utils.to_categorical( batch_y, num_classes=self.n_classes)
class SiameseBatchGenerator(BaseBatchGenerator): """ For loading utterances from input dataset with categories and making batches of pairs of images, during training Positive pair (1) - utterances of one class Negative pair (0) - utterances from different classes # Arguments """ def __init__(self, X, y, batch_size=32, flow_from_dir=False, augment=False, **kwargs): self.x = X self.y = y self.batch_size = batch_size self.flow_from_dir = flow_from_dir self.augment = augment if flow_from_dir: # we already have all statistics self.__dict__.update(kwargs) else: self.__count_stats() # augmentation if self.augment: self.__get_distortion_generator() @classmethod def from_directory(cls, dirname, batch_size=32, augment=False): '''Constructor only for images ''' assert os.path.isdir( dirname), "There is no such directory `%s`" % dirname X, y = [], [] class_folders = glob(os.path.join(dirname, "*", "")) n_classes = len(class_folders) samples_per_class = np.zeros(n_classes, dtype=np.int32) class_idx = [None] * n_classes for i, folder in enumerate(class_folders): img_fnames = glob(os.path.join(dirname, folder, '*.jpg')) # add all image files with other extensions for ext in ["*.png", "*jpeg"]: img_fnames.extend(glob(os.path.join(dirname, folder, ext))) # add filenames and corresponding labels to array X.extend(img_fnames) y.extend([i] * len(img_fnames)) samples_per_class[i] = len(img_fnames) # split sorted indices on classes if i == 0: class_idx[i] = np.arange(len(img_fnames)) else: low = sum(samples_per_class[:i]) high = low + samples_per_class[i] class_idx[i] = np.arange(low, high, dtype=np.int32) # transform to arrays for convenience X = np.array(X) y = np.array(y, dtype=np.int8) # call __init__ return cls( X, y, batch_size, flow_from_dir=True, augment=augment, # kwargs n_classes=n_classes, samples_per_class=samples_per_class, class_idx=class_idx) def __count_stats(self): self.samples_per_class = np.unique(self.y, return_counts=True)[1] self.n_classes = len(self.samples_per_class) # sort indices by their value, i.e. sort labels sorted_idx = np.argsort(self.y) # split sorted indices on classes self.class_idx = np.split(sorted_idx, np.cumsum(self.samples_per_class)[:-1]) def __get_distortion_generator(self): self.distortion_generator = ImageDataGenerator(rotation_range=75, shear_range=0.3, zoom_range=0.3, width_shift_range=0.2, height_shift_range=0.2, channel_shift_range=0.2, vertical_flip=True, horizontal_flip=True) def random_distortion(self, img): return self.distortion_generator.random_transform(img) def __get_pair(self, c, pos): '''c - class number pos - positive or negative ''' # randomly select two samples for each class to create pair idx = np.random.permutation(self.samples_per_class[c])[:2] if not pos or len(idx) == 1: # for negatives choose the opposite class c_ = np.random.choice([x for x in range(self.n_classes) if x != c]) # choose the sample from the opposite class i_ = np.random.randint(self.samples_per_class[c_]) l_sample = self.x[self.class_idx[c][idx[0]]] r_sample = self.x[self.class_idx[c_][i_]] return l_sample, r_sample return self.x[self.class_idx[c][idx]] def __create_pairs(self, batch_size, pos=True): # if batch_size is odd number, then negatives will be one more pair n = (batch_size // 2) if pos else (batch_size // 2 + batch_size % 2) # array for storing pairs pairs = np.zeros((2, n, *self.x.shape[1:]), dtype=self.x.dtype) # randomly choose n class labels classes = np.random.randint(self.n_classes, size=n) i = 0 while i < n: pairs[0][i], pairs[1][i] = self.__get_pair(classes[i], pos) i += 1 return pairs def __get_files_from_names(self, arr): result = [None] * arr.size # read all files for i, x in enumerate(np.nditer(arr)): result[i] = imread(str(x)) / 255. if self.augment: result[i] = self.random_distortion(result[i]) result = np.array(result) result = result.reshape((*arr.shape, *result[0].shape)) return result def next_batch(self, batch_size=None, shuffle=True, seed=None): if seed is not None: np.random.seed(seed) # if batch size was not specified use the default one batch_size = self.batch_size if batch_size is None else batch_size # arrays for pairs and labels respectively batch_xs = np.zeros((2, batch_size, *self.x.shape[1:]), dtype=self.x.dtype) batch_ys = np.ones((batch_size, ), dtype=np.int8) # positive pairs batch_xs[:, :batch_size // 2] = self.__create_pairs(batch_size) # negative pairs batch_xs[:, batch_size // 2:] = self.__create_pairs(batch_size, pos=False) batch_ys[batch_size // 2:] = 0 # permutation if shuffle: batch_xs, batch_ys = shuffle_arrays(batch_xs, batch_ys, axes=[1, 0]) # if flow_from_dir = True, batch_xs - filenames # so we should to read files if self.flow_from_dir: batch_xs = self.__get_files_from_names(batch_xs) return batch_xs, batch_ys
# In[50]: filenames = pd.read_csv("../../data/interimdata.csv") imgs = np.array([ plt.imread(f["train_example"], format="jpeg") for _, f in filenames.iterrows() ]) # In[51]: plt.imshow(imgs[0]) # In[52]: plt.imshow(augmenter.random_transform(imgs[0]).astype("uint8")) # In[59]: augmented = [] for i in range(269): augmented.append( augmenter.random_transform(imgs[random.randint(0, len(imgs) - 1)]).astype("uint8")) # In[62]: links = {"path": []} for i, img in enumerate(augmented):