def train_model(self, model): """ Train model and set data attributes The model and datasets (test, train, calibration) are saved to 'workspace.pickle'. Training data is randomized and training is done using .flow() where inputs are numpy arrays. """ # Get dataframe of paths dataset = DataSet(self.root, self.train_set, self.test_set, type='list') paths_dataframe = dataset.train_set[2] train_df, validation_df, calibration_df = split_data(paths_dataframe) # set new parameters for train and calibration_set test_df = dataset.test_set[2] random.shuffle(test_df) self.__test_set = test_df self.__train_set = train_df self.__validation_set = validation_df self.__calibration_set = calibration_df train_images, train_labels = read_img_io(train_df) # randomize the data train_images, train_labels = shuffle(train_images, train_labels) validation_images, validation_labels = read_img_io(validation_df) train_datagen = ImageDataGenerator(rescale=1, shear_range=0.2, zoom_range=0.2, rotation_range=18) valid_datagen = ImageDataGenerator() train_set = train_datagen.flow(train_images, train_labels, batch_size=32, shuffle=True) valid_set = valid_datagen.flow(validation_images, validation_labels, batch_size=32, shuffle=True) model.fit_generator(train_set, steps_per_epoch=train_images.shape[0] // 32, epochs=5, validation_data=valid_set, validation_steps=validation_images.shape[0] // 32) # save to current workspace with open('workspace.pickle', 'wb+') as handle: pickle.dump( [model, train_df, test_df, calibration_df, validation_df], handle) return model
def get_augmented_images_generator(in_gen, seed=None): """ Augmented data generator :param in_gen: image loader generator :param seed: a random seed :return: an image generator """ dg_args = dict(featurewise_center=False, samplewise_center=False, rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.01, zoom_range=[0.9, 1.25], horizontal_flip=True, vertical_flip=True, fill_mode='reflect', data_format='channels_last') image_gen = ImageDataGenerator(**dg_args) label_gen = ImageDataGenerator(**dg_args) np.random.seed(seed if seed is not None else np.random.choice(range(9999))) for in_x, in_y in in_gen: seed = np.random.choice(range(9999)) # keep the seeds synchronized otherwise the augmentation to the images is different from the masks g_x = image_gen.flow(255 * in_x, batch_size=in_x.shape[0], seed=seed, shuffle=True) g_y = label_gen.flow(in_y, batch_size=in_x.shape[0], seed=seed, shuffle=True) yield next(g_x) / 255.0, next(g_y)
def train_5fold(self, training_images, training_labels): self.model = InceptionV3(include_top=False, weights='imagenet') self.model = self.add_new_last_layer(self.model, nb_classes=2) self.model.trainable = True self.model.compile(optimizer=Adam(lr=0.0001, beta_1=0.1), loss='categorical_crossentropy', metrics=['categorical_accuracy']) # self.model.load_weights("D:\Projects\jiaomo-master\Model\model5_resNet5fold\ResNet_best_weights_fold_0.h5") k = 5 train_datagen = ImageDataGenerator(rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') val_datagen = ImageDataGenerator(rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') folds, x_train, y_train = self.load_data_kfold(k, training_images, training_labels) # print(folds) print(len(training_images)) print(len(x_train)) for j, (train_idx, val_idx) in enumerate(folds): print('\nFold ', j) x_train_cv = x_train[train_idx] y_train_cv = y_train[train_idx] y_train_cv = to_categorical(np.array(y_train_cv)) print(len(x_train_cv)) x_valid_cv = x_train[val_idx] y_valid_cv = y_train[val_idx] print(len(x_valid_cv)) y_valid_cv = to_categorical(np.array(y_valid_cv)) steps = int(np.size(x_train_cv, 0) // self.batch_size) val_steps = int(np.size(x_valid_cv, 0) // self.batch_size) name_weights = "_fold_" + str(j) self.init_callbacks(name=str(name_weights)) self.model.fit_generator( generator=train_datagen.flow(x=x_train_cv, y=y_train_cv, batch_size=self.batch_size), epochs=self.args.epoch, steps_per_epoch=steps, validation_steps=val_steps, verbose=1, callbacks=self.callbacks, validation_data=val_datagen.flow(x=x_valid_cv, y=y_valid_cv, batch_size=self.batch_size)) print(self.model.evaluate(x_valid_cv, y_valid_cv))
def ProbGenerator(x, y, aug_dict, batch_size, seed=1): # used for ProbUNet input_datagen = ImageDataGenerator(**aug_dict) label_datagen = ImageDataGenerator(**aug_dict) input_generator = input_datagen.flow(x, batch_size=batch_size, seed=seed) label_generator = label_datagen.flow(y, batch_size=batch_size, seed=seed) train_generator = zip(input_generator, label_generator) for (input, label) in train_generator: yield ([input, label], label)
def trainGenerator(x, y, aug_dict, batch_size, seed=1): # Normal Generator input_datagen = ImageDataGenerator(**aug_dict) label_datagen = ImageDataGenerator(**aug_dict) input_generator = input_datagen.flow(x, batch_size=batch_size, seed=seed) label_generator = label_datagen.flow(y, batch_size=batch_size, seed=seed) train_generator = zip(input_generator, label_generator) for (input, label) in train_generator: yield (input, label)
def train_classifier(classes, path, epoch, lr, weights_path=None): model = res18(classes) if weights_path: model.load_weights(weights_path) model.compile(optimizer=Adam(lr=float(lr)), loss='binary_crossentropy', metrics=['accuracy']) trains, labels = img_reader() generator = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, width_shift_range=0.1, height_shift_range=0.1, rotation_range=10, shear_range=0.2, zoom_range=(0.8, 1.2), rescale=1. / 255, horizontal_flip=True, validation_split=0.2) generator.fit(trains) train_flow = generator.flow(trains, labels, shuffle=True, subset='training') validate_flow = generator.flow(trains, labels, shuffle=True, subset='validation') STEP_SIZE_TRAIN = train_flow.n // train_flow.batch_size STEP_SIZE_VALID = validate_flow.n // validate_flow.batch_size p = os.path.abspath('.') weights_name = 'lr_0' + lr[2:] + '_weights_{epoch:02d}_{val_acc:.2f}.hdf5' model_checkpoint = ModelCheckpoint(os.path.join(p, weights_name), monitor='val_acc', save_best_only=True) reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.1, patience=10) callbacks = [model_checkpoint, reduce_lr] history = model.fit_generator(train_flow, epochs=epoch, steps_per_epoch=STEP_SIZE_TRAIN, verbose=1, callbacks=callbacks, validation_data=validate_flow, validation_steps=STEP_SIZE_VALID) with open('history.json', 'w') as f: json.dump(history.history, f)
def train(self, training_images, training_labels, validation_images, validation_labels): self.init_callbacks('') self.model = InceptionV3(include_top=False, weights="imagenet") self.model = self.add_new_last_layer(self.model, nb_classes=2) self.model.trainable = True self.model.compile(optimizer=Adam(lr=0.0001, beta_1=0.1), loss='categorical_crossentropy', metrics=['categorical_accuracy']) # self.model.compile(optimizer=Adam(lr=0.0001, beta_1=0.1), # loss=[focal_loss(alpha=.25, gamma=2)], metrics=['categorical_accuracy']) if len(validation_images) == 0: print('no val') validation_images = training_images[150:] validation_labels = training_labels[150:] training_labels = training_labels[:150] training_images = training_images[:150] train_datagen = ImageDataGenerator(rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') val_datagen = ImageDataGenerator(rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') steps = int(np.size(training_images, 0) // self.batch_size) val_steps = int(np.size(validation_images, 0) // self.batch_size) self.model.fit_generator( generator=train_datagen.flow(x=training_images, y=training_labels, batch_size=self.batch_size), epochs=self.args.epoch, steps_per_epoch=steps, validation_steps=val_steps, verbose=1, callbacks=self.callbacks, validation_data=val_datagen.flow(x=validation_images, y=validation_labels, batch_size=self.batch_size))
def data_augumentation(): #https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html datagen = ImageDataGenerator(rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') img = load_img('data/train/cats/cat.0.jpg') # this is a PIL image x = img_to_array(img) # this is a Numpy array with shape (3, 150, 150) x = x.reshape( (1, ) + x.shape) # this is a Numpy array with shape (1, 3, 150, 150) # the .flow() command below generates batches of randomly transformed images # and saves the results to the `preview/` directory i = 0 for batch in datagen.flow(x, batch_size=1, save_to_dir='preview', save_prefix='cat', save_format='jpeg'): i += 1 if i > 20: break # otherwise the generator would loop indefinitely
def dataAug(imageInput, category, name): #creates a data generator 0object that transforms images datagen = ImageDataGenerator(rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=False, fill_mode='nearest') #pick an image to transform test_img = imageInput img = image.img_to_array(test_img) #convert image to numpy array img = img.reshape((1, ) + img.shape) #reshape image i = 0 for batch in datagen.flow(img, save_prefix='test', save_format='jpg'): plt.figure(i) # plot = plt.imshow(image.img_to_array(batch[0])) WRITE_PATH = 'datasets/images/' + str(category) + '/' + str(i) + str( name) cv2.imwrite(WRITE_PATH, image.img_to_array(batch[0])) i += 1 if i > 4: #show 4 images break
def fit(model, data): datagen = ImageDataGenerator(featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, samplewise_std_normalization=False, zca_whitening=False, rotation_range=10, zoom_range=0.1, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=False, vertical_flip=False) learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience=3, verbose=1, factor=0.5, min_lr=0.00001) datagen.fit(data[0]) history = model.fit_generator(datagen.flow(data[0], data[2], batch_size=batch_size), epochs=epochs, validation_data=(data[1], data[3]), verbose=2, steps_per_epoch=data[0].shape[0] // batch_size, callbacks=[learning_rate_reduction]) model.save(model_dir + name + "_" + str(epochs) + "_" + str(batch_size) + ".h5") return history
def generate_second_model(num_classes, batch_size, epochs, x_train, y_train): model = Sequential() # 1st convolution layer model.add( Conv2D(64, (5, 5), input_shape=(48, 48, 1), activation='softmax', padding='same')) model.add(MaxPooling2D(pool_size=(5, 5))) # 2nd convolution layer model.add(Conv2D(128, (5, 5), activation='softmax', padding='same')) model.add(Conv2D(128, (5, 5), activation='softmax', padding='same')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(1024, activation='softmax')) model.add(Dropout(0.2)) model.add(Dense(1024, activation='softmax')) model.add(Dropout(0.2)) model.add(Dense(num_classes, activation='softmax')) # ------------------------------ # batch process gen = ImageDataGenerator() train_generator = gen.flow(x_train, y_train, batch_size=batch_size) model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) model.fit_generator(train_generator, steps_per_epoch=batch_size, epochs=epochs) return model
def train(self): # Image shifting # used to augement in the input data datagen = ImageDataGenerator(width_shift_range=0.05) # steps per epoch is the number of rounds the generator goes within one epoch steps_per_epoch = len( self.training_data[0]) / self.config.trainer.batch_size # using a generator to load the data history = self.model.fit_generator( datagen.flow(self.training_data[0], self.training_data[1], batch_size=self.config.trainer.batch_size), # self.training_data, # batch_size=self.config.trainer.batch_size, epochs=self.config.trainer.num_epochs, steps_per_epoch=steps_per_epoch, verbose=self.config.trainer.verbose_training, validation_data=(self.validation_data[0], self.validation_data[1]), callbacks=self.callbacks, ) self.loss.extend(history.history['loss']) # self.acc.extend(history.history['acc']) self.val_loss.extend(history.history['val_loss'])
def sample_image_augmentation(train_cats_dir): datagen = ImageDataGenerator(rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') fnames = [ os.path.join(train_cats_dir, fname) for fname in os.listdir(train_cats_dir) ] img_path = fnames[10] img = image.load_img(img_path, target_size=(150, 150)) x = image.img_to_array(img) x = x.reshape((1, ) + x.shape) i = 0 for batch in datagen.flow(x, batch_size=1): plt.figure(i) imgplot = plt.imshow(image.array_to_img(batch[0])) i += 1 if i % 4 == 0: break plt.show()
def preprocessing_data(tifSet, labels): filters_per_image = 20 input_shape = 128 ConstPixelDims = (len(tifSet) * filters_per_image, input_shape, input_shape, 1) processedTIFSet = np.zeros(ConstPixelDims) processedLabels = np.zeros(len(labels) * filters_per_image, dtype=labels.dtype) aug = ImageDataGenerator( rotation_range=270, zoom_range=0.15, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.15, horizontal_flip=True, fill_mode="nearest", data_format="channels_last") tifSet = tifSet.reshape(-1, input_shape, input_shape, 1) index = 0 # plt.figure(figsize=[5, 5]) for i in range(len(tifSet)): filter_index = 0 imageGen = aug.flow(tifSet[i:i + 1], batch_size=1) for x_batch in imageGen: processedTIFSet[index, :, :, :] = np.squeeze(x_batch, axis=0) processedLabels[index] = labels[i] # plt.subplot(121) # img = np.squeeze(processedTIFSet[index], axis=2) # plt.imshow(img, cmap='gray') # plt.show() index += 1 filter_index += 1 if filter_index == filters_per_image: break return processedTIFSet, processedLabels
def train_generator(x, y, batch_size, shift_fraction=0.): train_datagen = ImageDataGenerator(width_shift_range=shift_fraction, height_shift_range=shift_fraction) generator = train_datagen.flow(x, y, batch_size=batch_size) while 1: x_batch, y_batch = generator.next() yield ([x_batch, y_batch], [y_batch, x_batch])
def training_augmented(self): aug = ImageDataGenerator(rotation_range=25, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode="nearest") print("[LOG] training AUG") net = self.model.fit_generator( aug.flow(self.dataset.data, self.dataset.labels, batch_size=self.BS), validation_data=(self.dataset.validation_data, self.dataset.val_labels), steps_per_epoch=len(self.dataset.data) // self.BS, epochs=self.initial_epoch + self.EPOCHS, initial_epoch=self.initial_epoch) # TODO: if lepszy od modelu to zapisz print("[LOG] compare") print("[LOG] saving") file = open("ep.txt", "w") file.write(str(self.initial_epoch + self.EPOCHS)) file.close() self.model.save("AUG.model") self.plot_it(net) print("[LOG] Ploted")
def imageAugmentation(directory, export_directory=None, prefix="aug", extension="jpg", logger=None): aug = ImageDataGenerator(rotation_range=10, zoom_range=0.15, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.15, horizontal_flip=True, fill_mode="nearest") dir_path = os.path.abspath(directory) if export_directory is None: export_directory = dir_path + "\\augmented" try: os.mkdir(export_directory) print("Directory", export_directory, "Created ") except FileExistsError: print("Directory", export_directory, "already exists") extension = extension.lower() try: file_names = os.listdir(dir_path) counter = 1 for file_name in file_names: if not file_name.lower().endswith(extension): continue fp = FilePath(dir_path + "\\" + file_name) try: image = np.expand_dims(load_img(fp.getAbsPath()), axis=0) except Exception: continue print("Processing Image " + str(counter) + ": " + file_name) if logger is not None: logger.info("Processing Image " + str(counter) + ": " + file_name) aug.fit(image) for x, val in zip( aug.flow(image, save_to_dir=export_directory, save_format=extension, save_prefix=prefix), range(10)): pass # if logger is not None: # logger.info("...... saving augmented image " + str(val + 1) + " ......") counter += 1 print("Done") if logger is not None: logger.info("Done") except Exception as e: if logger is not None: logger.info(e) print(e)
def train_net(X_train, y_train, params): """ :param X_train: :param y_train: :param params: :return: """ # Params input_shape = params['input_shape'] epochs = params['epochs'] batch_size = params['batch_size'] steps = params['steps'] # Init the ResNet resnet = resnet_v2.ResNet50V2(include_top=False, weights='imagenet', pooling='avg', input_shape=input_shape) # Make all layers un-trainable for layer in resnet.layers: layer.trainable = False # Add ResNet to your net with some more layers net = Sequential() net.add(resnet) net.add(Dropout(0.5)) net.add(Dense(1, activation='sigmoid')) # Compile net net.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=2e-5), metrics=['accuracy']) # Print the net summary net.summary() # Fit the net # net.fit(np.asarray(X_train), np.asarray(y_train), batch_size=batch_size, epochs=epochs) # For using data augmentation train_datagen = ImageDataGenerator(rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.15, horizontal_flip=False, fill_mode="nearest", vertical_flip=True) train_generator = train_datagen.flow(np.asarray(X_train), np.asarray(y_train), batch_size=batch_size) net.fit_generator(train_generator, epochs=epochs, steps_per_epoch=steps) return net
def test(**kwargs): c = K.tf.ConfigProto() c.gpu_options.allow_growth = True session = K.tf.Session(config=c) set_session(session) test_df = pd.read_pickle(config.TEST_PATH) y_true = np.asarray(test_df['emotion']) test_size = len(test_df) # 读取测试集文件 x_test = [] for path in test_df['path']: x_test.append( img_to_array( load_img(config.TEST_DIR + '/' + path, color_mode='grayscale'))) x_test = np.asarray(x_test) test_datagen = ImageDataGenerator( samplewise_center=True, samplewise_std_normalization=True, brightness_range=(0.8, 1.2), rotation_range=10, width_shift_range=0.10, height_shift_range=0.10, zoom_range=0.10, horizontal_flip=True, ) model_save_path = config.MODEL_DIR + '/' + 'model_{}.h5'.format( kwargs['model']) model_image = keras.models.load_model(model_save_path) print(model_image.summary()) steps = np.ceil(test_size / kwargs['batch_size']) scores = [] for i in tqdm(range(kwargs['tta'])): score = model_image.predict_generator(test_datagen.flow( x_test, batch_size=kwargs['batch_size'], shuffle=False), steps=steps) scores.append(score) mean_score = np.mean(scores, axis=0) assert mean_score.shape[0] == y_true.shape[0] y_pred = np.argmax(mean_score, axis=-1) f1 = metrics.f1_score(y_true, y_pred, average='weighted') acc = metrics.accuracy_score(y_true, y_pred) recall = metrics.recall_score(y_true, y_pred, average='weighted') draw_confusion_matrix(y_true, y_pred) print('f1 score: {}, acc: {}, recall: {}'.format(f1, acc, recall))
def main(): # Check that necessary paths exists, if not create them. if not os.path.exists(MODEL_CHECKPOINT_PATH): print_info('Creating model checkpoint path...') os.makedirs(MODEL_CHECKPOINT_PATH) # Get the dataset from the CSV, load it and preprocess the data. print_info('Preparing training and validation data...') train_img, train_labels, val_img, val_labels = get_training_val_data( TRAINING_DATA_CSV_PATH) # Plot the distribution. # view_label_distribution(train_labels, title='Emotions in training set') # view_label_distribution(val_labels, title='Emotions in validation set') # Calculate class weights. print_info('Calculating class weights...') train_weights = calculate_class_weights(train_labels) # Load and compile the model with the correct input shape and number of classes. print_info('Loading model...') input_shape = train_img[0].shape num_of_classes = len(np.unique(train_labels)) model = resnet(input_shape, num_of_classes) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Calculate the number of steps taken during training. training_steps = len(train_img) / BATCH_SIZE # Add all callbacks in a list. Will be used during training. save_path = MODEL_CHECKPOINT_PATH + 'weights_{epoch:03d}.hdf5' checkpoint = ModelCheckpoint(filepath=save_path, monitor='val_acc', verbose=1, save_best_only=True) callbacks = [checkpoint] # Create an ImageDataGenerator. Eventual augmentations are done here. data_generator = ImageDataGenerator(rotation_range=20, horizontal_flip=True, width_shift_range=0.2, height_shift_range=0.2) # Train the model. print_info('Starting training...') model.fit_generator(data_generator.flow(train_img, train_labels), steps_per_epoch=training_steps, class_weight=train_weights, epochs=EPOCHS, validation_data=(val_img, val_labels), shuffle=True, callbacks=callbacks, use_multiprocessing=False, workers=multiprocessing.cpu_count())
def setUpClass(cls): (x_train, y_train), (x_test, y_test), min_, max_ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] cls.mnist = (x_train, y_train), (x_test, y_test), (min_, max_) # Create simple keras model import tensorflow as tf tf_version = [int(v) for v in tf.__version__.split(".")] if tf_version[0] == 2 and tf_version[1] >= 3: tf.compat.v1.disable_eager_execution() from tensorflow.keras import backend as k from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D else: import keras.backend as k from keras.models import Sequential from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D k.set_learning_phase(1) model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=x_train.shape[1:])) model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Flatten()) model.add(Dense(10, activation="softmax")) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) from art.estimators.classification.keras import KerasClassifier cls.classifier = KerasClassifier(model=model, clip_values=(min_, max_)) cls.classifier.fit(x_train, y_train, nb_epochs=1, batch_size=128) cls.defence = ActivationDefence(cls.classifier, x_train, y_train) datagen = ImageDataGenerator() datagen.fit(x_train) data_gen = KerasDataGenerator(datagen.flow(x_train, y_train, batch_size=NB_TRAIN), size=NB_TRAIN, batch_size=NB_TRAIN) cls.defence_gen = ActivationDefence(cls.classifier, None, None, generator=data_gen)
def img_generating_2(main_dir, categories, n=3): """ img_generating 업그레이드 버전 : - 파일크기가 큰 이미지를 부풀릴 때 속도가 오래 걸리는 점을 감안하여 픽셀을 비율에 맞게 축소 후 부풀림. - 파일크기가 250KB 이상인 이미지는 픽셀사이즈를 256 비율에 맞게 줄여 부풀리고, - 250KB 이하는 128 비율에 맞게 줄여 부풀림 """ datagen = ImageDataGenerator( rescale=1. / 255, rotation_range=30, # 각도 범위내 회전 width_shift_range=0.1, # 수평방향 height_shift_range=0.1, # 수직방향 brightness_range=[0.2, 1.5], # 밝기 shear_range=0.7, # 시계반대방향 zoom_range=[0.8, 1.0], horizontal_flip=True, # 수평방향 뒤집기 vertical_flip=True, fill_mode='nearest') for cat in categories: cat_dir = main_dir + cat # 각 카테고리별 디렉토리 files = glob.glob(cat_dir + "/*.jpg") # cat_dir의 jpg 파일들 다 가져옴 for file in files: im = load_img(file) im_size = os.stat(file) file_size = im_size.st_size / 1024 # KB로 변환 print(F'file_size: {file_size} KB') pixel1 = 299, 299 pixel2 = 299, 299 if file_size >= 250: print(im.size, '-') # im.size는 픽셀을 말함 im.thumbnail(pixel1, Image.ANTIALIAS) im = im.convert('RGB') print(im.size) x = img_to_array(im) x = x.reshape((1, ) + x.shape) # x 전체를 하나의 []로 묶음 -> 뒤에서 Conv2D할 때 input이 4차원형태로 들어가야 되기 때문에 미리 변환해놓음 else: print(im.size, '-') # im.size는 픽셀을 말함 im.thumbnail(pixel2, Image.ANTIALIAS) im = im.convert('RGB') print(im.size) x = img_to_array(im) x = x.reshape((1, ) + x.shape) i = 0 for _ in datagen.flow( x, batch_size=1, save_to_dir=cat_dir, save_prefix=os.path.basename(file).split('.')[0] + '_copy', # file에서 맨마지막 부분인 tsu0.jpg를 출력하고, '.'으로 분리해 앞에 tsu0만 가져옴. save_format='jpg'): i += 1 if i >= n: break
def fit_generator(self, X, n_epochs, batch_size=256): indices_fracs = split(fracs=[0.9, 0.1], N=len(X), seed=0) X_train, X_valid = X[indices_fracs[0]], X[indices_fracs[1]] dataAugmentaion = ImageDataGenerator(fill_mode="nearest") self.autoencoder.fit_generator(dataAugmentaion.flow(X_train, X_train, batch_size=32), validation_data=(X_valid, X_valid), steps_per_epoch=len(X_train) // 32, epochs=10)
def train(self): # training parameters batch_size = 128 maxepoches = 50 learning_rate = 0.1 lr_decay = 1e-6 lr_drop = 20 # The data, shuffled and split between train and test sets: trainX_norm, testX_norm = self.normalize(self.trainX, self.testX) def lr_scheduler(epoch): return learning_rate * (0.5**(epoch // lr_drop)) reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler) # data augmentation datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 15, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(trainX_norm) # optimization details sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True) self.model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) # training process in a for loop with learning rate drop every 25 epoches. history = self.model.fit_generator( datagen.flow(trainX_norm, self.trainY, batch_size=batch_size), steps_per_epoch=trainX_norm.shape[0] // batch_size, epochs=maxepoches, validation_data=(testX_norm, self.testY), callbacks=[reduce_lr], verbose=2) self.model.save_weights('weights/vgg16_cifar10.h5') return history
def train_network(network, train_images, test_images, train_labels, test_labels, args): if args['augmentation']: aug = ImageDataGenerator(rotation_range=20, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode="nearest") results = network.fit_generator(aug.flow(train_images, train_labels, batch_size=args['batch_size']), validation_data=(test_images, test_labels), steps_per_epoch=len(train_images), epochs=args['epochs']) else: results = network.fit(train_images, train_labels, validation_data=(test_images, test_labels), epochs=args['epochs'], batch_size=args['batch_size']) return results
def train(): imgDim = (224,224,1) classNum = 2 batchSize = 32 epochsnum = 200 INIT_LR = 6e-4 train_data,train_label,test_data,test_label = read_data(labelPath,imgPath,imgDim) aug = ImageDataGenerator(rotation_range=20,width_shift_range=0.1, height_shift_range=0.1, shear_range=0, zoom_range=0.2, horizontal_flip=True, vertical_flip=True) # model = createModel(*imgDim,classNum) #256 # model = createModel_AlexNet(*imgDim,classNum) #227 # model = createModel_ResNet(*imgDim,classNum) #227 # model = createModel_ResNet18(*imgDim,classNum) #229 model = createModel_DensNet(*imgDim,classNum) #224 opt = Adam(lr=INIT_LR,decay=INIT_LR / epochsnum) # opt = Adam(lr=INIT_LR) #model.compile(loss='binary_crossentropy',optimizer=opt,metrics=["accuracy"]) model.compile(loss='categorical_crossentropy',optimizer=opt,metrics=["categorical_accuracy"]) tfbd = TensorBoard(log_dir=tfbdPath, histogram_freq=0,write_graph=True, write_images=True) # checkpoint ckptName = 'checkpoint{epoch:02d}-loss{loss:.2f}-val_loss{val_loss:.2f}-acc{categorical_accuracy:.2f}-val_acc{val_categorical_accuracy:.2f}.model' checkpoint = ModelCheckpoint(checkpointPath+ckptName, monitor='val_categorical_accuracy', verbose=1, save_best_only=True, mode='max', period=4) callbacks_list = [checkpoint,tfbd] # fit the data H = model.fit_generator(aug.flow(train_data,train_label,batch_size=batchSize,shuffle=True), validation_data=(test_data,test_label), steps_per_epoch=len(train_data)//batchSize, epochs=epochsnum, verbose=1, callbacks=callbacks_list) #print(H.history) ## Save the model and plot model.save(savePath+'needle.model') # plot the training loss and accuracy plt.style.use("ggplot") plt.figure() N = epochsnum plt.plot(np.arange(0, N), H.history["loss"], label="train_loss") plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss") plt.plot(np.arange(0, N), H.history["categorical_accuracy"], label="train_acc") plt.plot(np.arange(0, N), H.history["val_categorical_accuracy"], label="val_acc") plt.title("Training Loss and Accuracy on Needle/not Needle") plt.xlabel("Epoch #") plt.ylabel("Loss/Accuracy") plt.legend(loc="lower left") plt.savefig(savePath+'plot.png')
def data_generator_online(data_gen_args, X, y, batch_size): ## value based to retun predict_generator point_break = len(X) batches = 0 # Provide the same seed and keyword arguments to the fit and flow methods seed = 1 image_datagen = ImageDataGenerator(**data_gen_args) mask_datagen = ImageDataGenerator(**data_gen_args) ## generate data ## for each loop generate one new batch (size_fold, width, heigth, channel) for x_batch, y_batch in zip( image_datagen.flow(X, None, batch_size=batch_size, shuffle=False, seed=seed), mask_datagen.flow(y, None, batch_size=batch_size, shuffle=False, seed=seed)): if batches == 0: X_batch_test = x_batch y_batch_test = y_batch else: X_batch_test = np.append(X_batch_test, x_batch, axis=0) y_batch_test = np.append(y_batch_test, y_batch, axis=0) batches += 1 ## we need to break the loop by hand because ## the generator loops indefinitely if batches >= point_break: break return X_batch_test, y_batch_test
def training_set_generator(X_train, Y_train): data_gen_args = dict(featurewise_center=True, featurewise_std_normalization=True, rotation_range=90, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2, rescale=1. / 255) # Provide the same seeda and keyword arguments to the fit and flow methods seed = 1 image_datagen = ImageDataGenerator(**data_gen_args) mask_datagen = ImageDataGenerator(**data_gen_args) image_datagen.fit(X_train, augment=True, seed=seed) mask_datagen.fit(Y_train, augment=True, seed=seed) image_generator = image_datagen.flow(X_train, seed=seed, batch_size=1) mask_generator = mask_datagen.flow(Y_train, seed=seed, batch_size=1) for (img, mask) in zip(image_generator, mask_generator): yield (img, mask)
def data_augmentation(x_train_in, x_train_out, augment_size): def histogram_equalization(x): if np.random.random() < 0.5: x = exposure.equalize_hist(x) def adaptive_equalization(x): if np.random.random() < 0.5: x = exposure.equalize_adapthist(x, clip_limit=0.01) def contrast_stretching(x): if np.random.random() < 0.5: p2, p98 = np.percentile(x, (2, 98)) x = exposure.rescale_intensity(x, in_range=(p2, p98)) def to_lab(x): if np.random.random() < 0.5: x = exposure.rgb2lab(x, illuminant='D65', observer='2') x_train_in = np.reshape( x_train_in, (x_train_in.shape[0], 120, 120, 1)).astype('float32') / 255 x_train_out = np.reshape( x_train_out, (x_train_out.shape[0], 120, 120, 1)).astype('float32') / 255 image_generator = ImageDataGenerator( #rescale=1.0/255.0, rotation_range=10, #shear_range=0.8, featurewise_center=False, samplewise_std_normalization=False, zoom_range=0.05, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True, vertical_flip=True, #preprocessing_function=histogram_equalization ) # fit data for zca whitening image_generator.fit(x_train_in, augment=True) # get transformed images randidx = np.random.randint(x_train_in.shape[0], size=augment_size) x_augmented = x_train_in[randidx].copy() y_augmented = x_train_out[randidx].copy() x_augmented = image_generator.flow(x_augmented, np.zeros(augment_size), batch_size=augment_size, shuffle=False).next()[0] # append augmented data to trainset x_train = np.concatenate((x_train_in, x_augmented)) y_train = np.concatenate((x_train_out, y_augmented)) return x_train, y_train
def augment_data(set_to_augment, prefix="", total_size=35000, batchsize=64, use_cached_training_data=None, verbose=True): """ Helper function to load the CIFAR-10 data """ filepath = prefix + use_cached_training_data + str(total_size) # Look for cached training data if use_cached_training_data: x, y = load_2d_numpy_array_if_exists(filepath) if x is not None and y is not None: print(" Found cached training data for {}".format( use_cached_training_data)) return (x, y), True # Enhance training set with augmentation generated_data = set_to_augment[0].copy(), set_to_augment[1].copy() if not len(generated_data[0]) >= total_size: datagen = ImageDataGenerator( rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True, ) datagen.fit(set_to_augment[0]) generator = datagen.flow(set_to_augment[0], set_to_augment[1], batch_size=batchsize) print_percentage = 0.1 while len(generated_data[0]) < total_size: next_sample = generator.next() generated_data = np.concatenate((generated_data[0], next_sample[0]), axis=0), \ np.concatenate((generated_data[1], next_sample[1]), axis=0) if verbose and len( generated_data[0]) / total_size > print_percentage: print("{}%..".format(int(print_percentage * 100)), end="", flush=True) print_percentage += 0.1 if verbose: print("100%! Done!") # Look for cached training data if use_cached_training_data: save_2d_numpy_array_if_exists(filepath, generated_data[0][:total_size], generated_data[1][:total_size]) generated_data = shuffle(*generated_data) return (generated_data[0][:total_size], generated_data[1][:total_size]), False