def train_model(name, experiment, image_size, training_data_list, training_mask_list, model_spec=[16, 32, 64, 128, 256], preprocess_list=None, preprocess_stretch=False, preprocess_mask=None, preprocess_fisher=False, keep_image=True, load_model=False, epochs=15): # make copies of the input array before shuffling training_data_list = list(training_data_list) training_mask_list = list(training_mask_list) random.Random(experiment*42).shuffle(training_data_list) random.Random(experiment*42).shuffle(training_mask_list) # we're augmenting data -- expand the list of training data train_input_img_paths = training_data_list[:-(test_samples + val_samples)] * random_factor train_target_img_paths = training_mask_list[:-(test_samples + val_samples)] * random_factor val_input_img_paths = training_data_list[-( test_samples + val_samples):-val_samples] val_target_img_paths = training_mask_list[-( test_samples + val_samples):-val_samples] test_input_img_paths = training_data_list[-test_samples:] test_target_img_paths = training_mask_list[-test_samples:] pp = None # Chain of preprocessing functions, first one added is performed first if preprocess_list is not None: # Instantiate data Sequences for each split if not preprocess_stretch: pp = ImagePreprocessGradient(preprocess_list, keep_image, pp) else: pp = ImagePreprocessStretchedGradient(preprocess_list, pp) if preprocess_mask is not None: # Apply mask after gradients - masking first only gets overwritten pp = ImagePreprocessMask(preprocess_mask, pp) if preprocess_fisher is True: pp = ImagePreprocessFisherize(pp) if pp is not None: # Instantiate pre-processed data sequences for each split train_gen = RoadSeq(batch_size, image_size, train_input_img_paths, train_target_img_paths, augment_data=True, preprocess_fn=pp.preprocess()) val_gen = RoadSeq(batch_size, image_size, val_input_img_paths, val_target_img_paths, augment_data=False, preprocess_fn=pp.preprocess()) test_gen = RoadSeq(len(test_input_img_paths), image_size, test_input_img_paths, test_target_img_paths, augment_data=False, preprocess_fn=pp.preprocess()) else: # use the images as they are train_gen = RoadSeq(batch_size, image_size, train_input_img_paths, train_target_img_paths, augment_data=True) val_gen = RoadSeq(batch_size, image_size, val_input_img_paths, val_target_img_paths, augment_data=False) test_gen = RoadSeq(len(test_input_img_paths), image_size, test_input_img_paths, test_target_img_paths, augment_data=False) model_name = name+'.'+str(experiment)+'.h5' model = UNet(image_size, model_spec) model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["acc"]) if load_model: model.load_weights(model_name) model.summary() callbacks = [ keras.callbacks.ModelCheckpoint( model_name, save_best_only=True) ] model.fit(train_gen, epochs=epochs, verbose=1, validation_data=val_gen, callbacks=callbacks) x, y = test_gen.__getitem__(0) start = timer() results = model.predict(x) end = timer() prediction_time = (end - start) / len(results) results = np.array(results > 0.5).astype(np.uint8) return calculate_error(results, test_target_img_paths) + (prediction_time,)
GPU_COUNT = 1 RESUME = False if __name__ == '__main__': batch_size = 1 * GPU_COUNT epochs = 100 lr_base = 0.01 * (float(batch_size) / 16) input_shape = (320, 320, 3) with tf.device("/cpu:0"): model = UNet(input_shape, weight_decay=3e-3, classes=21) if GPU_COUNT > 1: model.summary() from keras.utils.training_utils import multi_gpu_model model = multi_gpu_model(model, gpus=GPU_COUNT) model.compile( # loss=crossentropy_without_ambiguous, loss=crossentropy_without_ambiguous, optimizer=Adam(lr=0.001), # optimizer = SGD(lr=lr_base, momentum=0.9), metrics=[ 'accuracy', categorical_accuracy_without_ambiguous, categorical_accuracy_only_valid_classes ]) model.summary()
verbose=1), tf.keras.callbacks.CSVLogger(filename=(xp_dir / 'fit_logs.csv')), tf.keras.callbacks.ReduceLROnPlateau( patience=20, factor=0.5, verbose=1, ) ] # create the U-Net model to train unet_kwargs = dict(input_shape=(LCD.IMG_SIZE, LCD.IMG_SIZE, LCD.N_CHANNELS), num_classes=LCD.N_CLASSES, num_layers=2) print(f"Creating U-Net with arguments: {unet_kwargs}") model = UNet(**unet_kwargs) print(model.summary()) # get optimizer, loss, and compile model for training optimizer = tf.keras.optimizers.Adam(lr=config.lr) # compute class weights for the loss: inverse-frequency balanced # note: we set to 0 the weights for the classes "no_data"(0) and "clouds"(1) to ignore these class_weight = (1 / LCD.TRAIN_CLASS_COUNTS ) * LCD.TRAIN_CLASS_COUNTS.sum() / (LCD.N_CLASSES) class_weight[LCD.IGNORED_CLASSES_IDX] = 0. print(f"Will use class weights: {class_weight}") #loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) loss = WeightedSparseCategoricalCrossEntropy() #loss = dice_loss() #loss = jaccard_loss()