def lr_find(self, kf: datasets.KFoldedDataSet, model: keras.Model, ec: ExecutionConfig, start_lr, end_lr, epochs): if 'unfreeze_encoder' in self.dict and self.dict['unfreeze_encoder']: set_trainable(model) if self.loss or self.lr: self.cfg.compile(model, self.cfg.createOptimizer(self.lr), self.loss) cb = [] + self.cfg.callbacks if self.initial_weights is not None: model.load_weights(self.initial_weights) ll = LRFinder(model) num_batches = kf.numBatches(ec.fold, self.negatives, ec.subsample) * epochs ll.lr_mult = (float(end_lr) / float(start_lr))**(float(1) / float(num_batches)) K.set_value(model.optimizer.lr, start_lr) callback = LambdaCallback( on_batch_end=lambda batch, logs: ll.on_batch_end(batch, logs)) cb.append(callback) kf.trainOnFold(ec.fold, model, cb, epochs, self.negatives, subsample=ec.subsample, validation_negatives=self.validation_negatives) return ll
def train2(sm, backbone='resnet34'): from segmentation_models.utils import set_trainable myGene, val_gen = get_datagen(backbone) model = sm(backbone_name=backbone, encoder_weights='imagenet', freeze_encoder=True) model.compile('Adam', 'binary_crossentropy', ['binary_accuracy']) # pretrain model decoder # model.fit(x, y, epochs=2) model.fit_generator(myGene, validation_data=val_gen, validation_steps=1, steps_per_epoch=300, epochs=2, callbacks=[model_checkpoint]) # release all layers for training set_trainable(model) # set all layers trainable and recompile model # continue training # model.fit(x, y, epochs=100) model.fit_generator( myGene, validation_data=val_gen, validation_steps=1, steps_per_epoch=300, # epochs=100, epochs=2, callbacks=[model_checkpoint]) return model
def train_model(model, train_gen, valid_gen, epochs, batch_size, save_cp=True): total_batch_count = 0 train_batch_num = len(train_gen) train_num = train_batch_num * batch_size #train_gen_out = iter_sequence_infinite(train_gen) valid_batch_num = len(valid_gen) valid_num = valid_batch_num * batch_size #valid_gen_out = iter_sequence_infinite(valid_gen) for epoch in range(epochs): # interation as many epochs set_trainable(model) epoch_loss = 0 # loss in this epoch epoch_iou = 0 count = 0 with tqdm(total=train_num, desc=f'Epoch {epoch + 1}/{epochs}', position=0, leave=True, unit='img') as pbar: # make progress bar for batch in train_gen: #batch = next(train_gen_out) imgs = batch[0] true_masks = batch[1] loss, iou = model.train_on_batch( imgs, true_masks) # value of loss of this batch epoch_loss += loss epoch_iou += iou pbar.set_postfix(**{ 'Batch loss': loss, 'Batch IoU': iou }) # floating the loss at the post in the pbar pbar.update(imgs.shape[0]) # update progress count += 1 total_batch_count += 1 print("Epoch : loss: {}, IoU : {}".format(epoch_loss / count, epoch_iou / count)) # Do validation validation_model(model, valid_gen, valid_num) train_gen.on_epoch_end() valid_gen.on_epoch_end() if save_cp: try: if not os.path.isdir(checkpoint_dir): os.mkdir(checkpoint_dir) logging.info('Created checkpoint directory') else: pass except OSError: pass model.save_weights( os.path.join(checkpoint_dir, f'CP_epoch{epoch + 1}.h5')) logging.info(f'Checkpoint {epoch + 1} saved !')
def skip_stage(self, i, model, s, subsample): st: Stage = self.stages[s] ec = ExecutionConfig(fold=i, stage=s, subsample=subsample, dr=os.path.dirname(self.path)) if os.path.exists(ec.weightsPath()): model.load_weights(ec.weightsPath()) if 'unfreeze_encoder' in st.dict and st.dict['unfreeze_encoder']: set_trainable(model)
def train_stage_1(x_train, y_train, x_valid, y_valid): opt = optimizers.adam(lr=0.001) model = Unet(backbone_name=BACKBONE, encoder_weights='imagenet', freeze_encoder=True) model.compile(loss=bce_dice_jaccard_loss, optimizer=opt, metrics=[my_iou_metric]) model_checkpoint = ModelCheckpoint( OUTPUT_DIR + "/{}/models/{}_fold_{}_stage1.model".format( BASE_NAME, BASE_NAME, CUR_FOLD_INDEX), monitor='val_my_iou_metric', mode='max', save_best_only=True, verbose=1) reduce_lr = ReduceLROnPlateau(monitor='val_my_iou_metric', mode='max', factor=0.5, patience=6, min_lr=0.00001, verbose=1) early_stopping = EarlyStopping(monitor='val_my_iou_metric', mode='max', patience=20, verbose=1) logger = CSVLogger(OUTPUT_DIR + '/{}/logs/{}_fold_{}_stage1.log'.format( BASE_NAME, BASE_NAME, CUR_FOLD_INDEX)) model.fit_generator( TrainGenerator(x_train, y_train, batch_size=int(np.ceil(BATCH_SIZE / (len(AUGS) + 1))), img_size_target=IMG_SIZE_TARGET), steps_per_epoch=int( np.ceil(len(x_train) / int(np.ceil(BATCH_SIZE / (len(AUGS) + 1))))), epochs=WARM_EPOCHS, validation_data=ValidGenerator(x_valid, y_valid, batch_size=BATCH_SIZE, img_size_target=IMG_SIZE_TARGET), callbacks=[model_checkpoint], shuffle=True) segmentation_utils.set_trainable(model) model.fit_generator( TrainGenerator(x_train, y_train, batch_size=int(np.ceil(BATCH_SIZE / (len(AUGS) + 1))), img_size_target=IMG_SIZE_TARGET), steps_per_epoch=int( np.ceil(len(x_train) / int(np.ceil(BATCH_SIZE / (len(AUGS) + 1))))), epochs=EPOCHS, validation_data=ValidGenerator(x_valid, y_valid, batch_size=BATCH_SIZE, img_size_target=IMG_SIZE_TARGET), callbacks=[early_stopping, model_checkpoint, reduce_lr, logger], shuffle=True)
def execute(self, kf: datasets.KFoldedDataSet, model: keras.Model, ec: ExecutionConfig): if 'unfreeze_encoder' in self.dict and self.dict['unfreeze_encoder']: set_trainable(model) if self.loss or self.lr: self.cfg.compile(model, self.cfg.createOptimizer(self.lr), self.loss) cb = [] + self.cfg.callbacks if self.initial_weights is not None: model.load_weights(self.initial_weights) if 'callbacks' in self.dict: cb = configloader.parse("callbacks", self.dict['callbacks']) if 'extra_callbacks' in self.dict: cb = configloader.parse("callbacks", self.dict['extra_callbacks']) kepoch=-1; if self.cfg.resume: kepoch=maxEpoch(ec.metricsPath()) if kepoch!=-1: self.epochs=self.epochs-kepoch if os.path.exists(ec.weightsPath()): model.load_weights(ec.weightsPath()) cb.append(CSVLogger(ec.metricsPath(),append=True,start=kepoch)) else: cb.append(CSVLogger(ec.metricsPath())) kepoch=0 else: kepoch=0 cb.append(CSVLogger(ec.metricsPath())) md = self.cfg.primary_metric_mode cb.append( keras.callbacks.ModelCheckpoint(ec.weightsPath(), save_best_only=True, monitor=self.cfg.primary_metric, mode=md, verbose=1)) cb.append(DrawResults(self.cfg,kf,ec.fold,ec.stage,negatives=self.negatives)) if self.cfg.showDataExamples: cb.append(DrawResults(self.cfg, kf, ec.fold, ec.stage, negatives=self.negatives,train=True)) if self.epochs-kepoch==0: return; kf.trainOnFold(ec.fold, model, cb, self.epochs-kepoch, self.negatives, subsample=ec.subsample,validation_negatives=self.validation_negatives) pass
auc_results_test.append(auc) dice_results_test.append(dice) jaccard_results_test.append(jaccard) print("---- TESTE NORMAL - FOLD ", fold_no) print("sensitivity:", sensitivity) print("specificity:", specificity) print("accuracy:", accuracy) print("auc:", auc) print("dice:", dice) print("jaccard:", jaccard) ################################### Ajuste fino ################################### model.optimizer = Adam(lr=0.00001) # release all layers for training set_trainable(model) # set all layers trainable and recompile model callbacks = [ keras.callbacks.ModelCheckpoint( str(PASTA_DE_TESTES) + 'best_model_finetuning' + str(fold_no) + '.h5', save_weights_only=True, save_best_only=True, mode='min'), # keras.callbacks.ReduceLROnPlateau(), ] ###### AJUSTE FINO ###### # continue training model.fit( x=x_train, y=y_train, batch_size=BATCH_SIZE_GLOBAL,
def unfreeze(self, model): set_trainable(model)
model.compile(optimizer, loss = JaccardLoss(per_image = False), metrics = ['categorical_accuracy', IOUScore(per_image = False, threshold = 0.5)]) # creating generators for the image augmentation train_generator = backroom.UnetSequence(X_train, y_train_multidim, batch_size, augmentations = backroom.train_augmentation) test_generator = backroom.UnetSequence(X_test, y_test_multidim, batch_size, augmentations = None) start_time = time.time() # measuring modelling time # basic .fit method model.fit(X_train, y_train_multidim, epochs = 2, batch_size = batch_size, validation_data = (X_test, y_test_multidim)) set_trainable(model, recompile = False) # Set all layers of model trainable, so that encode_freeze is lifted. Recompile = True does not work with Tensorflow 2.0 model.compile(optimizer, loss = JaccardLoss(per_image = False), metrics = ['categorical_accuracy', IOUScore(per_image = False, threshold = 0.5)]) # fit_generator method for image augmentation model.fit_generator(train_generator, validation_data=test_generator, steps_per_epoch=len(X_train) // batch_size, validation_steps=len(X_test) // batch_size, epochs=epoch_no, callbacks=backroom.callbacks) elapsed_time = time.time()-start_time # measuring modelling time print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) # beautifying time format
def unfreeze_weights(model): set_trainable(model)
def train(data_dir, points_csv, model=None, backbone='resnet34', encoder_weights='imagenet', batch_size=2, all_layer_epochs=100, decoder_only_epochs=2, logdir='logs', run_name='fish', verbose=2, patience=10, checkpoint_path='model_fish.h5', optimizer='adam', input_size=None, keras_augmentations=None, preprocessing_function_x=None, preprocessing_function_y=None, debug_training_data=False, debug_validation_data=False, preload=False, cached_preloading=False, visual_validation_samples=None, datumbox_mode=False, random_crops=False, learning_rate=None, n_gpus=1, augment=False, use_homograpy_proposals=False): assert encoder_weights or not decoder_only_epochs training_generator = PointsDataGenerator(images_dir=data_dir, points_csv=points_csv, img_res=input_size, image_channels=3, augment=augment, subset='train', batch_size=batch_size) validation_generator = PointsDataGenerator(images_dir=data_dir, points_csv=points_csv, img_res=input_size, image_channels=3, augment=False, subset='val', batch_size=batch_size) training_steps_per_epoch = len(training_generator) validation_steps_per_epoch = len(validation_generator) print('training steps per epoch:', training_steps_per_epoch) print('val steps per epoch:', validation_steps_per_epoch) # show images as they're input into model if debug_training_data: show_generated_pairs(training_generator) return if debug_validation_data: show_generated_pairs(validation_generator) return # initialize model if datumbox_mode and model is None: # print('\n\nRunning in datumbox mode...\n\n') # try: # from datumbox_unet import DatumboxUnet # except: # from .datumbox_unet import DatumboxUnet # model = DatumboxUnet(backbone_name=backbone, # encoder_weights=encoder_weights, # encoder_freeze=encoder_weights is not None, # activation=Lambda(channelwise_softmax), # classes=training_generator.mask_shape[-1]) raise NotImplementedError elif model is None: model = Qnet(backbone_name=backbone, encoder_weights=encoder_weights, encoder_freeze=encoder_weights is not None, activation=Lambda(channelwise_softmax), classes=training_generator.mask_shape[-1]) elif isinstance(model, str): model = load_model(model) if learning_rate is not None: if optimizer == 'adam': optimizer = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) else: raise NotImplementedError( 'Adjustable learning rate not implemented for %s.' % optimizer) if n_gpus > 1: from keras.utils import multi_gpu_model model = multi_gpu_model(model, gpus=n_gpus) metrics = get_keypoint_metrics((batch_size,) + training_generator.mask_shape) if use_homograpy_proposals: model.compile(optimizer, loss={'mask_output': focal_loss(), 'class_output': focal_loss(), 'detection_output': smoothL1(), 'homogrphy_ouput': homography_loss()}, loss_weights={'mask_output': 1., 'class_output': 1., 'detection_output': 1., 'homogrphy_ouput': 1.}, metrics=metrics) else: model.compile(optimizer, loss={'mask_output': focal_loss(), 'class_output': focal_loss(), 'detection_output': smoothL1()}, loss_weights={'mask_output': 1., 'class_output': 1., 'detection_output': 1.}, metrics=metrics) # model.compile(optimizer, loss=dice_loss) # model.compile(optimizer, loss=bce_jaccard_loss, metrics=[iou_score]) # model.compile(optimizer, 'binary_crossentropy', ['binary_accuracy']) print("\n\nWARNING: Using 'binary_crossentropy' loss.\n\n") # get callbacks callbacks = get_callbacks( checkpoint_path=checkpoint_path, verbose=verbose, batch_size=batch_size, patience=patience, logdir=logdir, run_name=run_name, visual_validation_samples=visual_validation_samples, steps_per_report=training_steps_per_epoch, steps_per_epoch=training_steps_per_epoch, epochs=all_layer_epochs, input_size=input_size, predict_fcn=predict) # train for `decoder_only_epochs` epochs with encoder frozen if decoder_only_epochs and encoder_weights is not None: print('\n\nTraining decoder (only) for %s epochs...\n' '' % decoder_only_epochs) model.fit_generator(generator=training_generator, validation_data=validation_generator, validation_steps=validation_steps_per_epoch, steps_per_epoch=training_steps_per_epoch, epochs=decoder_only_epochs, callbacks=list(callbacks.values())) # train all layers if all_layer_epochs: # refresh early stopping callback callbacks['EarlyStopping'] = \ get_callbacks(patience=patience, verbose=verbose)['EarlyStopping'] # workaround for bug with calling fit multiple times when using # tensorboard callback callbacks['TensorBoard'] = TensorBoard( log_dir=os.path.join(logdir, run_name + 'stage_2')) print('\n\nTraining all layers for %s epochs...\n' % all_layer_epochs) set_trainable(model) # set all layers trainable and recompile model model.fit_generator(generator=training_generator, validation_data=validation_generator, validation_steps=validation_steps_per_epoch, steps_per_epoch=training_steps_per_epoch, epochs=all_layer_epochs, callbacks=list(callbacks.values()), initial_epoch=decoder_only_epochs) # evaluate on training data print('\n\nTraining Scores\n' + '-'*14) results = model.evaluate_generator(generator=training_generator, steps=training_steps_per_epoch, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0) for name, value in zip(model.metrics_names, list(results)): print(name + ':', value) # evaluate on training data print('\n\nValidation Scores\n' + '-'*16) results = model.evaluate_generator(generator=validation_generator, steps=validation_steps_per_epoch, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0) for name, value in zip(model.metrics_names, list(results)): print(name + ':', value) # evaluate on testing data print('\n\nTest Set Scores\n' + '-' * 15) # testing_generator = dataset.batch_generator(batch_size=batch_size, # subset='test') # testing_steps_per_epoch = len(dataset.data['test']) // batch_size test_generator = PointsDataGenerator(images_dir=data_dir, points_csv=points_csv, img_res=input_size, image_channels=3, augment=augment, subset='train', batch_size=batch_size) results = model.evaluate_generator(generator=test_generator, steps=len(test_generator), max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0) for name, value in zip(model.metrics_names, list(results)): print(name + ':', value) return model
def train(data_dir, model=None, backbone='resnet34', encoder_weights='imagenet', batch_size=2, all_layer_epochs=100, decode_only_epochs=2, logdir='logs', run_name='fish', verbose=2, patience=10, checkpoint_path='model_fish.h5', optimizer='adam', input_size=None, keras_augmentations=None, preprocessing_function_x=None, preprocessing_function_y=None, debug_training_data=False, debug_validation_data=False, preload=False, cached_preloading=False, visual_validation_samples=None, datumbox_mode=False, random_crops=False, learning_rate=None, n_gpus=1): # get data generators (training_generator, validation_generator, training_steps_per_epoch, validation_steps_per_epoch) = \ get_data_generators(data_dir=data_dir, backbone=backbone, batch_size=batch_size, input_size=input_size, keras_augmentations=keras_augmentations, preprocessing_function_x=preprocessing_function_x, preprocessing_function_y=preprocessing_function_y, preload=preload, cached_preloading=cached_preloading, random_crops=random_crops) # show images as they're input into model if debug_training_data: show_generated_pairs(training_generator) return if debug_validation_data: show_generated_pairs(validation_generator) return # initialize model if datumbox_mode and model is None: print('\n\nRunning in datumbox mode...\n\n') try: from datumbox_unet import DatumboxUnet except: from .datumbox_unet import DatumboxUnet model = DatumboxUnet(backbone_name=backbone, encoder_weights=encoder_weights, encoder_freeze=True) elif model is None: model = Unet(backbone_name=backbone, encoder_weights=encoder_weights, encoder_freeze=True) elif isinstance(model, str): model = load_model(model) if learning_rate is not None: if optimizer == 'adam': optimizer = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) else: raise NotImplementedError( 'Adjustable learning rate not implemented for %s.' % optimizer) if n_gpus > 1: from keras.utils import multi_gpu_model model = multi_gpu_model(model, gpus=n_gpus) model.compile(optimizer, loss=bce_jaccard_loss, metrics=[iou_score]) # model.compile(optimizer, 'binary_crossentropy', ['binary_accuracy']) # get callbacks callbacks = get_callbacks( checkpoint_path=checkpoint_path, verbose=verbose, batch_size=batch_size, patience=patience, logdir=logdir, run_name=run_name, visual_validation_samples=visual_validation_samples, steps_per_report=training_steps_per_epoch, steps_per_epoch=training_steps_per_epoch, epochs=all_layer_epochs, input_size=input_size, predict_fcn=predict) # train for `decoder_only_epochs` epochs with encoder frozen if decode_only_epochs: print('\n\nTraining decoder (only) for %s epochs...\n' '' % decode_only_epochs) model.fit_generator(generator=training_generator, validation_data=validation_generator, validation_steps=int(validation_steps_per_epoch), steps_per_epoch=int(training_steps_per_epoch), epochs=decode_only_epochs, callbacks=list(callbacks.values())) # train all layers if all_layer_epochs: # refresh early stopping callback callbacks['EarlyStopping'] = \ get_callbacks(patience=patience, verbose=verbose)['EarlyStopping'] print('\n\nTraining all layers for %s epochs...\n' % all_layer_epochs) set_trainable(model) # set all layers trainable and recompile model model.fit_generator(generator=training_generator, validation_data=validation_generator, validation_steps=int(validation_steps_per_epoch), steps_per_epoch=int(training_steps_per_epoch), epochs=all_layer_epochs, callbacks=list(callbacks.values()), initial_epoch=decode_only_epochs) # evaluate on training data print('\n\nTraining Scores\n' + '-'*14) results = model.evaluate_generator(generator=training_generator, steps=training_steps_per_epoch, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0) for name, value in zip(model.metrics_names, list(results)): print(name + ':', value) # evaluate on training data print('\n\nValidation Scores\n' + '-'*16) results = model.evaluate_generator(generator=training_generator, steps=validation_steps_per_epoch, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0) for name, value in zip(model.metrics_names, list(results)): print(name + ':', value) return model
def train_generatorh5(params): from hitif_losses import dice_coef_loss_bce from hitif_losses import double_head_loss print('-' * 30) print('Loading and preprocessing train data...') print('-' * 30) # Prepare for splitting the training set imgs_ind = np.arange(number_of_imgs) np.random.shuffle(imgs_ind) # Split 80-20 train_last_id = int(number_of_imgs * 0.80) # Generators training_generator = DataGeneratorH5( source_target_list_IDs=imgs_ind[0:train_last_id].copy(), **params) validation_generator = DataGeneratorH5( source_target_list_IDs=imgs_ind[train_last_id:number_of_imgs].copy(), **params) print('-' * 30) print('Creating and compiling model...') print('-' * 30) layers = get_feature_layers(backbone_name, 4) if backbone_type == 'FPN': model = FPN(input_shape=(None, None, num_channels), classes=num_mask_channels, encoder_weights=encoder_weights, encoder_freeze=freezeFlag, backbone_name=backbone_name, activation=act_fcn, encoder_features=layers) elif backbone_type == 'Unet': model = Unet(input_shape=(None, None, num_channels), classes=num_mask_channels, encoder_weights=encoder_weights, encoder_freeze=freezeFlag, backbone_name=backbone_name, activation=act_fcn, encoder_features=layers) #model.summary() #model.compile(optimizer=Adam(lr=1e-5), loss='binary_crossentropy', metrics=['binary_crossentropy','mean_squared_error',dice_coef, dice_coef_batch, dice_coef_loss_bce,focal_loss()]) #model.compile(optimizer=Adam(lr=1e-3), loss=dice_coef_loss_bce, metrics=['binary_crossentropy','mean_squared_error',dice_coef, dice_coef_batch,focal_loss()]) #model.compile(optimizer=Adam(lr=1e-3), loss=loss_fcn, metrics=['binary_crossentropy','mean_squared_error',dice_coef, dice_coef_batch,focal_loss()]) if loss_fcn == 'dice_coef_loss_bce': model.compile(optimizer=Adam(lr=1e-3), loss=dice_coef_loss_bce) elif loss_fcn == 'double_head_loss': model.compile(optimizer=Adam(lr=1e-3), loss=double_head_loss) else: model.compile(optimizer=Adam(lr=1e-3), loss=loss_fcn) # Loading previous weights for restarting if oldmodelwtsfname is not None: if os.path.isfile(oldmodelwtsfname) and reloadFlag: print('-' * 30) print('Loading previous weights ...') weights = np.load(oldmodelwtsfname, allow_pickle=True) model.set_weights(weights) #model.load_weights(oldmodelwtsfname) checkpoint_path = get_checkpoint_path(log_dir_name) print("checkpoint_path:", checkpoint_path) model_checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True, save_weights_only=True) custom_checkpoint = Checkpoints(checkpoint_path, monitor='val_loss', verbose=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=25, min_lr=1e-6, verbose=1, restore_best_weights=True) model_es = EarlyStopping(monitor='val_loss', min_delta=1e-7, patience=15, verbose=1, mode='auto') csv_logger = CSVLogger(csvfname, append=True) #my_callbacks = [reduce_lr, model_es, csv_logger] #my_callbacks = [model_checkpoint, reduce_lr, model_es, csv_logger] my_callbacks = [custom_checkpoint, reduce_lr, model_es, csv_logger] print('-' * 30) print('Fitting model encoder freeze...') print('-' * 30) if freezeFlag and num_coldstart_epochs > 0: model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=True, workers=num_gen_workers, epochs=num_coldstart_epochs, callbacks=my_callbacks, verbose=2) # release all layers for training set_trainable(model) # set all layers trainable and recompile model #model.summary() print('-' * 30) print('Fitting full model...') print('-' * 30) ## Retrain after the cold-start model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=True, workers=num_gen_workers, epochs=num_finetuning_epochs, callbacks=my_callbacks, verbose=2) ## <<FIXME>>: GZ will work on it. # Find the last best epoch model weights and then symlink it to the modelwtsfname # Note that the symlink will have issues on NON-Linux OS so it is better to copy. '''
def train(model, train_dataset, val_dataset, batch_size, epochs1, epochs2, checkpoints_path=None, last_checkpoint_path=None, logs_path=None, plots_path=None, use_multiprocessing=False, workers=1): # Build dataloaders train_dataloader = DataloaderMasks(train_dataset, batch_size=batch_size, shuffle=True) val_dataloader = DataloaderMasks(val_dataset, batch_size=batch_size, shuffle=False) # Callbacks model_callbacks = [ tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-5), tf.keras.callbacks.EarlyStopping(patience=10), # tf.keras.callbacks.ModelCheckpoint(filepath=checkpoints_path, save_best_only=True, mode='min'), # It can make the end of an epoch extremely slow tf.keras.callbacks.TensorBoard(log_dir=logs_path), # WandbCallback(), ] # define model model.summary() # Compile the model model.compile(optimizer=Adam(learning_rate=1e-3), loss=bce_jaccard_loss, metrics=[iou_score]) # train the model on the new data for a few epochs print("Training decoder first...") history1 = model.fit(train_dataloader, validation_data=val_dataloader, epochs=epochs1, callbacks=model_callbacks, use_multiprocessing=use_multiprocessing, workers=workers) print("Initial training results:") print(history1) if plots_path: plot_hist(history1, title="Training decoder", savepath=plots_path, suffix="_initial") # we need to recompile the model for these modifications to take effect # we use SGD with a low learning rate print("Fine-tuning model...") set_trainable(model, recompile=False) model.compile(optimizer=SGD(learning_rate=1e-4, momentum=0.9), loss=bce_jaccard_loss, metrics=[iou_score]) # we train our model again (this time fine-tuning the top 2 inception blocks # alongside the top Dense layers history2 = model.fit(train_dataloader, validation_data=val_dataloader, epochs=epochs2, callbacks=model_callbacks, use_multiprocessing=use_multiprocessing, workers=workers) print("Fine-tuning results:") print(history2) if plots_path: plot_hist(history2, title="Fine-tuning full model", savepath=plots_path, suffix="_finetuning") # Save model if last_checkpoint_path: print("Saving last model...") model.save(last_checkpoint_path) print(f"Model saved at: {last_checkpoint_path}")
def RunTest( params, model_name_template='models_3/{model}_{backbone}_{optimizer}_{augmented_image_size}-{padded_image_size}-{nn_image_size}_lrf{lrf}_{metric}_{CC}_f{test_fold_no}_{phash}' ): # # Params # In[ ]: DEV_MODE_RANGE = 0 # off # In[ ]: # In[ ]: def params_dict(): return { x[0]: x[1] for x in vars(params).items() if not x[0].startswith('__') } def params_str(): return '\n'.join([ repr(x[0]) + ' : ' + repr(x[1]) + ',' for x in vars(params).items() if not x[0].startswith('__') ]) def params_hash(shrink_to=6): import hashlib import json return hashlib.sha1( json.dumps(params_dict(), sort_keys=True).encode()).hexdigest()[:shrink_to] def params_save(fn, verbose=True): params_fn = fn + '.param.txt' with open(params_fn, 'w+') as f: s = params_str() hash = params_hash(shrink_to=1000) s = '{\n' + s + '\n}\nhash: ' + hash[:6] + ' ' + hash[6:] f.write(s) if verbose: print('params: ' + s + '\nsaved to ' + params_fn) # # Imports # In[ ]: import sys #sys.path.append(r'D:\Programming\3rd_party\keras') # In[ ]: import sys from imp import reload import numpy as np import keras import datetime import time from keras.models import Model, load_model from keras.layers import Input, Dropout, BatchNormalization, Activation, Add from keras.layers.core import Lambda from keras.layers.convolutional import Conv2D, Conv2DTranspose from keras.layers.pooling import MaxPooling2D from keras.layers.merge import concatenate from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger from keras import backend as K import tensorflow as tf # # Load data # In[ ]: import load_data load_data = reload(load_data) import keras_unet_divrikwicky_model keras_unet_divrikwicky_model = reload(keras_unet_divrikwicky_model) # In[ ]: train_df = load_data.LoadData(train_data=True, DEV_MODE_RANGE=DEV_MODE_RANGE, to_gray=False) # In[ ]: train_df.images[0].shape # In[ ]: train_images, train_masks, validate_images, validate_masks = load_data.SplitTrainData( train_df, params.test_fold_no) train_images.shape, train_masks.shape, validate_images.shape, validate_masks.shape # # Reproducability setup: # In[ ]: import random as rn import os os.environ['PYTHONHASHSEED'] = '0' np.random.seed(params.seed) rn.seed(params.seed) #session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) tf.set_random_seed(params.seed) #sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) sess = tf.Session(graph=tf.get_default_graph()) K.set_session(sess) # # IOU metric # In[ ]: thresholds = np.array( [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]) def iou(img_true, img_pred): assert (img_true.shape[-1] == 1) and (len(img_true.shape) == 3) or ( img_true.shape[-1] != 1) and (len(img_true.shape) == 2) i = np.sum((img_true * img_pred) > 0) u = np.sum((img_true + img_pred) > 0) if u == 0: return 1 return i / u def iou_metric(img_true, img_pred): img_pred = img_pred > 0.5 # added by sgx 20180728 if img_true.sum() == img_pred.sum() == 0: scores = 1 else: scores = (thresholds <= iou(img_true, img_pred)).mean() return scores def iou_metric_batch(y_true_in, y_pred_in): batch_size = len(y_true_in) metric = [] for batch in range(batch_size): value = iou_metric(y_true_in[batch], y_pred_in[batch]) metric.append(value) #print("metric = ",metric) return np.mean(metric) # adapter for Keras def my_iou_metric(label, pred): metric_value = tf.py_func(iou_metric_batch, [label, pred], tf.float64) return metric_value # # Data generator # In[ ]: mean_val = np.mean(train_images.apply(np.mean)) mean_std = np.mean(train_images.apply(np.std)) mean_val, mean_std ##################################### def FillCoordConvNumpy(imgs): print(imgs.shape) assert len(imgs.shape) == 4 assert imgs.shape[3] == 3 n = imgs.shape[2] hor_img = np.linspace(-1., 1., n).reshape((1, 1, n, 1)) n = imgs.shape[1] ver_img = np.linspace(-1., 1., n).reshape((1, n, 1, 1)) imgs[:, :, :, 0:1] = hor_img imgs[:, :, :, 2:3] = ver_img def FillCoordConvList(imgs): print(imgs.shape) assert len(imgs[0].shape) == 3 assert imgs[0].shape[2] == 3 for img in imgs: n = img.shape[1] hor_img = np.linspace(-1., 1., n).reshape((1, n, 1)) n = img.shape[0] ver_img = np.linspace(-1., 1., n).reshape((n, 1, 1)) img[:, :, 0:1] = hor_img img[:, :, 2:3] = ver_img if params.coord_conv: FillCoordConvList(train_images) FillCoordConvList(validate_images) print(train_images[0][0, 0, 0], train_images[0][0, 0, 2]) assert train_images[0][0, 0, 0] == -1. assert train_images[0][0, 0, 2] == 1. ###################################### from my_augs import AlbuDataGenerator # # model # In[ ]: sys.path.append('../3rd_party/segmentation_models') import segmentation_models segmentation_models = reload(segmentation_models) from segmentation_models.utils import set_trainable # In[ ]: if not hasattr(params, 'model_params'): params.model_params = {} if params.load_model_from: model = load_model(params.load_model_from, custom_objects={'my_iou_metric': my_iou_metric}) print('MODEL LOADED from: ' + params.load_model_from) else: model = None if params.model == 'FNN': model = segmentation_models.FPN( backbone_name=params.backbone, input_shape=(None, None, params.channels), encoder_weights=params.initial_weightns, freeze_encoder=True, dropout=params.dropout, **params.model_params) if params.model == 'FNNdrop': model = segmentation_models.FPNdrop( backbone_name=params.backbone, input_shape=(None, None, params.channels), encoder_weights=params.initial_weightns, freeze_encoder=True, dropout=params.dropout, **params.model_params) if params.model == 'Unet': model = segmentation_models.Unet( backbone_name=params.backbone, input_shape=(None, None, params.channels), encoder_weights=params.initial_weightns, freeze_encoder=True, **params.model_params) if params.model == 'Linknet': model = segmentation_models.Linknet( backbone_name=params.backbone, input_shape=(None, None, params.channels), encoder_weights=params.initial_weightns, freeze_encoder=True, **params.model_params) if params.model == 'divrikwicky': model = keras_unet_divrikwicky_model.CreateModel( params.nn_image_size, **params.model_params) params.backbone = '' assert model for l in model.layers: if isinstance( l, segmentation_models.fpn.layers.UpSampling2D) or isinstance( l, keras.layers.UpSampling2D): print(l) if hasattr(l, 'interpolation'): print(l.interpolation) if hasattr(params, 'model_params' ) and 'interpolation' in params.model_params: l.interpolation = params.model_params['interpolation'] else: print('qq') if hasattr(params, 'kernel_constraint_norm') and params.kernel_constraint_norm: for l in model.layers: if hasattr(l, 'kernel_constraint'): print('kernel_constraint for ', l, ' is set to ', params.kernel_constraint_norm) l.kernel_constraint = keras.constraints.get( keras.constraints.max_norm(params.kernel_constraint_norm)) # In[ ]: model_out_file = model_name_template.format( lrf=params.ReduceLROnPlateau['factor'], metric=params.monitor_metric[0], CC='CC' if params.coord_conv else '', **vars(params)) + '_f{test_fold_no}_{phash}'.format( test_fold_no=params.test_fold_no, phash=params_hash()) now = datetime.datetime.now() print('model: ' + model_out_file + ' started at ' + now.strftime("%Y.%m.%d %H:%M:%S")) assert not os.path.exists(model_out_file + '.model') params_save(model_out_file, verbose=True) log_out_file = model_out_file + '.log.csv' # In[ ]: #model = load_model(model1_file, ) #, 'lavazs_loss': lavazs_loss # # Train # In[ ]: optimizer = params.optimizer if optimizer == 'adam': optimizer = keras.optimizers.adam(**params.optimizer_params) elif optimizer == 'sgd': optimizer = keras.optimizers.sgd(**params.optimizer_params) model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["acc", my_iou_metric]) #, my_iou_metric # In[ ]: if params.coord_conv: mean = ((0, mean_val, 0), (1, mean_std, 1)) else: mean = (mean_val, mean_std) train_gen = AlbuDataGenerator(train_images, train_masks, batch_size=params.batch_size, nn_image_size=params.nn_image_size, mode=params.train_augmentation_mode, shuffle=True, params=params, mean=mean) val_gen = AlbuDataGenerator(validate_images, validate_masks, batch_size=params.test_batch_size, nn_image_size=params.nn_image_size, mode=params.test_augmentation_mode, shuffle=False, params=params, mean=mean) # In[ ]: sys.path.append('../3rd_party/keras-tqdm') from keras_tqdm import TQDMCallback, TQDMNotebookCallback # In[ ]: start_t = time.clock() if params.epochs_warmup: history = model.fit_generator( train_gen, validation_data=None, epochs=params.epochs_warmup, callbacks=[TQDMNotebookCallback(leave_inner=True)], validation_steps=None, workers=5, use_multiprocessing=False, verbose=0) set_trainable(model) batches_per_epoch = len(train_images) // params.batch_size print("batches per epoch: ", batches_per_epoch) test_epochs = 30 steps = test_epochs * batches_per_epoch val_period = steps // 1000 print("steps: ", steps, " val_period", val_period) lr_sheduler = EvalLrTest(log_out_file, val_gen, val_period=val_period, steps=steps) history = model.fit_generator( train_gen, validation_data=None, epochs=params.epochs, initial_epoch=params.epochs_warmup, callbacks=[TQDMNotebookCallback(leave_inner=True), lr_sheduler], validation_steps=None, workers=5, use_multiprocessing=False, verbose=0) # In[ ]: print(params_str()) print('done: ' + model_out_file) print('elapsed: {}s ({}s/iter)'.format( time.clock() - start_t, (time.clock() - start_t) / len(history.epoch))) return model
#model.summary() for layer in model.layers: print(layer.name) callbacks = [ EarlyStopping(patience=15, verbose=1), ReduceLROnPlateau(factor=0.1, patience=10, min_lr=1e-7, verbose=1), ModelCheckpoint( './U_checkpoints/weights.Epoch{epoch:03d}-Loss{loss:.3f}-VIou{val_iou_score:.3f}.h5', verbose=1, monitor='val_iou_score', mode="max", save_best_only=True, save_weights_only=True) ] # fit model set_trainable(model) model.fit_generator(generator=train_gen, validation_data=valid_gen, steps_per_epoch=train_steps, validation_steps=valid_steps, epochs=epochs, callbacks=callbacks) """ hist = model.fit_gen... 확인해보면 modelcheckpoint에 저장할 수 있는 로그들, epoch, loss, metirc등의 key를 확인해볼 수 있다. for key in hist.history: print(key) """
def train(train_set, val_set, args): if not os.path.isdir(args.log_dir): os.mkdir(args.log_dir) # Get callbacks checkpoint = ModelCheckpoint( args.log_dir + '/ep={epoch:03d}-loss={loss:.3f}-val_loss={val_loss:.3f}.h5', verbose=1, monitor='val_loss', save_weights_only=True, save_best_only=False, period=1) # reduce_lr = ReduceLROnPlateau(monitor='val_iou_3_sum', factor=0.2, min_delta=5e-4, patience=5, verbose=1) # early_stopping = EarlyStopping(monitor='val_iou_3_sum', min_delta=0, patience=10, verbose=1) csv_logger = CSVLogger(args.log_dir + '/record.csv') tensorboard = TensorBoard(log_dir=args.log_dir) snapshot = SnapshotCallbackBuilder(weights_path='.', nb_epochs=args.epochs, nb_snapshots=1, init_lr=args.lr).get_callbacks()[0] # Train the proposed model K.clear_session() model = create_backbone_unet( input_shape=(args.allowed_image_size // 4, args.allowed_image_size // 4, 3), pretrained_weights_file=args.pretrained_weights_file, backbone=args.backbone) model.compile(optimizer=SGD(lr=args.lr), loss=jaccard_loss, metrics=[ iou_score, dice_score, dice_0, dice_1, dice_2, dice_3, iou_0, iou_1, iou_2, iou_3, iou_3_sum ]) # train set_trainable(model=model) model.compile(optimizer=SGD(lr=args.lr), loss=jaccard_loss, metrics=[ iou_score, dice_score, dice_0, dice_1, dice_2, dice_3, iou_0, iou_1, iou_2, iou_3, iou_3_sum ]) model.fit_generator( generator=DataGenerator(dir_path=args.data_file_path, batch_size=args.batch_size, img_set=train_set, shuffle=args.shuffle), validation_data=DataGenerator(dir_path=args.data_file_path, batch_size=args.batch_size, img_set=val_set, shuffle=True), epochs=args.epochs, initial_epoch=0, callbacks=[checkpoint, tensorboard, csv_logger, snapshot], workers=4, use_multiprocessing=True) model.save_weights(os.path.join(args.log_dir, 'trained_final_weights.h5')) # exit training K.clear_session()