def start_training(args): model_name = str(args.model) output_path = str(args.out_path) + '/' input_path = str(args.in_path) + '/' epochs = int(args.epochs) batch_size = int(args.batch_size) nb_classes = 1 df_train = pd.read_csv(input_path + 'train_masks.csv') ids_train = df_train['img'].map(lambda s: s.split('.')[0]) # print(ids_train) ids_train_split, ids_valid_split = train_test_split(ids_train, test_size=0.2, random_state=42) print('Training on {} samples'.format(len(ids_train_split))) print('Validating on {} samples'.format(len(ids_valid_split))) model = None image_size = 256 # batch_size = 24 model_path = 'weights/best_weights.hdf5' csv_path = output_path + 'train_log.csv' if model_name == 'unet128': model = get_unet_128() image_size = 128 batch_size = 48 model_path = output_path + 'Unet128_{val_loss:.4f}-{val_binary_accuracy:.4f}-{val_dice_loss:.5f}.hdf5' csv_path = output_path + 'Unet128.csv' # tested elif model_name == 'unet256': model_path = output_path + 'Unet256_{val_loss:.4f}-{val_binary_accuracy:.4f}-{val_dice_loss:.5f}.hdf5' csv_path = output_path + 'Unet256.csv' img_tuple = (256, 256, 3) batch_size = 24 model = get_unet_256(input_shape=img_tuple, num_classes=nb_classes) model.compile(optimizer=optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) callbacks = [ # EarlyStopping(monitor='val_dice_loss', # patience=8, # verbose=1, # min_delta=1e-05, # mode='max'), # ReduceLROnPlateau(monitor='val_dice_loss', # factor=0.333, # patience=3, # verbose=1, # epsilon=1e-05, # mode='max', # min_lr=1e-04), ModelCheckpoint(monitor='val_dice_loss', filepath=model_path, save_best_only=True, save_weights_only=True, mode='max'), TensorBoard(log_dir=output_path), CSVLogger(filename=csv_path, separator=',', append=True), LearningRateScheduler(unet_lr_scheduler) ] # elif model_name == 'unet512': # model = get_unet_512() # image_size = 512 # batch_size = 12 # model_path = output_path + 'Unet512_{val_loss:.4f}-{val_binary_accuracy:.4f}-{val_dice_loss:.5f}.hdf5' # csv_path = output_path + 'Unet512.csv' elif model_name == 'unet1024': model_path = output_path + 'Unet1024_best_model.hdf5' csv_path = output_path + 'Unet1024.csv' img_tuple = (256, 256, 3) # batch_size = 3 model = get_unet_1024(input_shape=img_tuple, num_classes=nb_classes) model.compile(optimizer=utils.SGD(lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=np.ceil( float(64) / float(batch_size))), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) callbacks = [ EarlyStopping(monitor='val_dice_loss', patience=8, verbose=1, min_delta=1e-05, mode='max'), ReduceLROnPlateau(monitor='val_dice_loss', factor=0.333, patience=3, verbose=1, epsilon=1e-05, mode='max', min_lr=1e-04), ModelCheckpoint(monitor='val_dice_loss', filepath=model_path, save_best_only=True, save_weights_only=True, mode='max'), TensorBoard(log_dir=output_path), CSVLogger(filename=csv_path, separator=',', append=True), # LearningRateScheduler(unet_lr_scheduler) ] elif model_name == 'unet': model_path = output_path + 'Unet_best_model.hdf5' csv_path = output_path + 'Unet.csv' img_tuple = (256, 256, 3) batch_size = 24 model = get_unet(input_shape=img_tuple, num_classes=nb_classes) model.compile(optimizer=utils.SGD(lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=1), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) callbacks = [ EarlyStopping(monitor='val_dice_loss', patience=6, verbose=1, min_delta=1e-05, mode='max'), ReduceLROnPlateau(monitor='val_dice_loss', factor=0.333, patience=3, verbose=1, epsilon=1e-05, mode='max', min_lr=3e-04), ModelCheckpoint(monitor='val_dice_loss', filepath=model_path, save_best_only=True, save_weights_only=True, mode='max'), TensorBoard(log_dir=output_path), CSVLogger(filename=csv_path, separator=',', append=True), # LearningRateScheduler(unet_lr_scheduler) ] elif model_name == 'modify_unet': model_path = output_path + 'Modify_Unet_best_model.hdf5' csv_path = output_path + 'Modify_Unet.csv' img_tuple = (512, 512, 3) # batch_size = 3 model = get_modify_unet(input_shape=img_tuple, num_classes=nb_classes) if args.noweight: model.compile(optimizer=utils.SGD( lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=np.ceil(float(15) / float(batch_size))), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) else: model.compile(optimizer=utils.Adam( lr=1e-02, accumulator=np.ceil(float(15) / float(batch_size))), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) callbacks = [ EarlyStopping(monitor='val_dice_loss', patience=8, verbose=1, min_delta=1e-05, mode='max'), ReduceLROnPlateau(monitor='val_dice_loss', factor=0.333, patience=3, verbose=1, epsilon=1e-04, mode='max', min_lr=3e-04), ModelCheckpoint(monitor='val_dice_loss', filepath=model_path, save_best_only=True, save_weights_only=True, mode='max'), TensorBoard(log_dir=output_path), CSVLogger(filename=csv_path, separator=',', append=True), # LearningRateScheduler(enet_lr_scheduler) ] elif model_name == 'modify_unet_V2': model_path = output_path + 'Modify_Unet_best_model.hdf5' csv_path = output_path + 'Modify_Unet.csv' img_tuple = (1024, 1024, 3) # batch_size = 5 model = get_modify_unet_V2(input_shape=img_tuple, num_classes=nb_classes) if args.noweight: model.compile(optimizer=utils.SGD( lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=np.ceil(float(40) / float(batch_size))), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) else: model.compile(optimizer=utils.SGD( lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=np.ceil(float(40) / float(batch_size))), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) callbacks = [ EarlyStopping(monitor='val_dice_loss', patience=8, verbose=1, min_delta=1e-05, mode='max'), ReduceLROnPlateau(monitor='val_dice_loss', factor=0.333, patience=3, verbose=1, epsilon=1e-04, mode='max', min_lr=1e-04), ModelCheckpoint(monitor='val_dice_loss', filepath=model_path, save_best_only=True, save_weights_only=True, mode='max'), TensorBoard(log_dir=output_path), CSVLogger(filename=csv_path, separator=',', append=True), # LearningRateScheduler(unet_lr_scheduler) ] # elif model_name == 'segnet': # model_path = output_path + 'SegNet_{val_loss:.4f}-{val_binary_accuracy:.4f}-{val_dice_loss:.5f}.hdf5' # csv_path = output_path + 'SegNet.csv' # image_size = 1024 # batch_size = 1 # model = SegNet(input_shape=(image_size, image_size, 3), classes=nb_classes) # model.compile(optimizer=optimizers.Adam(lr=0.01), loss=bce_dice_loss, metrics=['binary_accuracy', # dice_loss]) # # elif model_name == 'segnet_full': # model_path = output_path + 'SegNet_FULL_{val_loss:.4f}-{val_binary_accuracy:.4f}-{val_dice_loss:.5f}.hdf5' # csv_path = output_path + 'SegNet_FULL.csv' # image_size = 1024 # batch_size = 2 # model = SegNet(input_shape=(image_size, image_size, 3), classes=nb_classes) # model.compile(optimizer=optimizers.Adam(lr=0.01), loss=bce_dice_loss, metrics=['binary_accuracy', # dice_loss]) # elif model_name == 'enet_unpooling': # model_path = output_path + 'Enet_unpooling_{val_loss:.4f}-{val_binary_accuracy:.4f}.hdf5' # csv_path = output_path + 'Enet_unpooling.csv' # image_size = int(256) # model = get_enet_unpooling(input_shape=(image_size, image_size, 3), classes=nb_classes) # model.compile(optimizer=optimizers.Adam(lr=0.01), loss=losses.binary_crossentropy, metrics=['binary_accuracy', # dice_loss]) elif model_name == 'enet_naive_upsampling': model_path = output_path + 'Enet_naive_upsampling_best_model.hdf5' csv_path = output_path + 'Enet_naive_upsampling.csv' img_tuple = (512, 512, 3) # batch_size = 6 model = get_enet_naive_upsampling(input_shape=img_tuple, classes=nb_classes) if args.noweight: model.compile(optimizer=utils.SGD( lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=np.ceil(float(14) / float(batch_size))), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) else: model.compile(optimizer=optimizers.Adam(lr=1e-02), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) callbacks = [ EarlyStopping(monitor='val_dice_loss', patience=8, verbose=1, min_delta=1e-05, mode='max'), ReduceLROnPlateau(monitor='val_dice_loss', factor=0.333, patience=3, verbose=1, epsilon=1e-05, mode='max', min_lr=3e-04), ModelCheckpoint(monitor='val_dice_loss', filepath=model_path, save_best_only=True, save_weights_only=True, mode='max'), TensorBoard(log_dir=output_path), CSVLogger(filename=csv_path, separator=',', append=True), # LearningRateScheduler(enet_lr_scheduler) ] # elif model_name == 'densenet_fc': # model_path = output_path + 'DenseNet_FC_{val_loss:.4f}-{val_binary_accuracy:.4f}-{val_dice_loss:.5f}.hdf5' # csv_path = output_path + 'DenseNet_FC.csv' # image_size = 256 # batch_size = 8 # base_model = DenseNetFCN(input_shape=(image_size, image_size, 3), classes=nb_classes, activation='sigmoid', include_top=False, batchsize=batch_size) # classify = Conv2D(nb_classes, (1, 1), activation='sigmoid')(base_model.output) # model = Model(inputs=base_model.input, outputs=classify) # model.compile(optimizer=optimizers.Adam(lr=0.01), loss=bce_dice_loss, metrics=['binary_accuracy', # dice_loss]) elif model_name == 'linknet': model_path = output_path + 'LinkNet_best_model.hdf5' csv_path = output_path + 'LinkNet.csv' img_tuple = (1024, 1024, 3) batch_size = 6 base_model = LinkNet(input_shape=img_tuple, classes=nb_classes) classify = Conv2D(nb_classes, (1, 1), activation='sigmoid')(base_model.output) model = Model(inputs=base_model.input, outputs=classify) model.compile(optimizer=utils.SGD(lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=4), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) callbacks = [ EarlyStopping(monitor='val_dice_loss', patience=8, verbose=1, min_delta=1e-05, mode='max'), ReduceLROnPlateau(monitor='val_dice_loss', factor=0.333, patience=3, verbose=1, epsilon=1e-05, mode='max', min_lr=1e-04), ModelCheckpoint(monitor='val_dice_loss', filepath=model_path, save_best_only=True, save_weights_only=True, mode='max'), TensorBoard(log_dir=output_path), CSVLogger(filename=csv_path, separator=',', append=True), # LearningRateScheduler(linknet_lr_scheduler) ] elif model_name == 'linknet_res34': model_path = output_path + 'LinkNet_Res34_best_model.hdf5' csv_path = output_path + 'LinkNet_Res34.csv' img_tuple = (1024, 1024, 3) # batch_size = 12 base_model = LinkNet_Res34(input_shape=img_tuple, classes=nb_classes) classify = Conv2D(nb_classes, (1, 1), activation='sigmoid')(base_model.output) model = Model(inputs=base_model.input, outputs=classify) model.compile(optimizer=utils.SGD(lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=np.ceil( float(84) / float(batch_size))), loss=weighted_loss_256, metrics=['binary_accuracy', dice_loss]) callbacks = [ EarlyStopping(monitor='val_dice_loss', patience=8, verbose=1, min_delta=1e-05, mode='max'), ReduceLROnPlateau(monitor='val_dice_loss', factor=0.333, patience=3, verbose=1, epsilon=1e-05, mode='max', min_lr=1e-04), ModelCheckpoint(monitor='val_dice_loss', filepath=model_path, save_best_only=True, save_weights_only=True, mode='max'), TensorBoard(log_dir=output_path), CSVLogger(filename=csv_path, separator=',', append=True), # LearningRateScheduler(unet_lr_scheduler) ] elif model_name == 'linknet_res50': model_path = output_path + 'LinkNet_Res50_{val_loss:.4f}-{val_binary_accuracy:.4f}-{val_dice_loss:.5f}.hdf5' csv_path = output_path + 'LinkNet_Res50.csv' img_tuple = (1280, 1280, 3) batch_size = 2 base_model = LinkNet_Res50(input_shape=img_tuple, classes=nb_classes) classify = Conv2D(nb_classes, (1, 1), activation='sigmoid')(base_model.output) model = Model(inputs=base_model.input, outputs=classify) model.compile(optimizer=utils.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True, accumulator=10), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) callbacks = [ # EarlyStopping(monitor='val_dice_loss', # patience=8, # verbose=1, # min_delta=1e-05, # mode='max'), # ReduceLROnPlateau(monitor='val_dice_loss', # factor=0.333, # patience=3, # verbose=1, # epsilon=1e-05, # mode='max', # min_lr=1e-04), ModelCheckpoint(monitor='val_dice_loss', filepath=model_path, save_best_only=True, save_weights_only=True, mode='max'), TensorBoard(log_dir=output_path), CSVLogger(filename=csv_path, separator=',', append=True), LearningRateScheduler(unet_lr_scheduler) ] if model: train_batch_gen = iter( Batch_Generator(input_path=input_path, idx=ids_train_split, batch_size=batch_size, img_tuple=img_tuple, aug=True, sat=args.sat, hue=args.hue, val=args.val, rotate=args.rotate, gray=args.gray, contrast=args.contrast, brightness=args.brightness, shear=args.shear, shift=args.shift, scale=args.scale, lowq=args.lowq)) valid_batch_gen = iter( Batch_Generator(input_path=input_path, idx=ids_valid_split, batch_size=batch_size, img_tuple=img_tuple, aug=False)) print( 'Start to train model hue %r, sat %r, val %r, rotate %r, gray %r, contrast %r, brightness %r, shear %r, shift %r, scale %r, lowq %r, none weight %r' % ( args.hue, args.sat, args.val, args.rotate, args.gray, args.contrast, args.brightness, args.shear, args.shift, args.scale, args.lowq, args.noweight, )) model.fit_generator( generator=train_batch_gen, steps_per_epoch=np.ceil( float(len(ids_train_split)) / float(batch_size)), epochs=epochs, callbacks=callbacks, validation_data=valid_batch_gen, validation_steps=np.ceil( float(len(ids_valid_split)) / float(batch_size)), workers=6)
def predict(args): nb_classes = 1 orig_width = 1918 orig_height = 1280 model_name = str(args.model) model_weight = str(args.model_weight) output_path = str(args.out_path) input_path = str(args.in_path) batch_size = int(args.batchsize) image_size = int(args.image_size) df_test = pd.read_csv(input_path + '/sample_submission.csv') ids_test = df_test['img'].map(lambda s: s.split('.')[0]) image_size = 256 threshold = 0.5 if model_name == 'unet128': image_size = 128 batch_size = 48 model = get_unet_128() model.load_weights(filepath=model_weight) # tested # elif model_name == 'unet256': # image_size = 256 # batch_size = 24 # model = get_unet_256() # model.load_weights(filepath=model_weight) # elif model_name == 'unet512': # image_size = 512 # batch_size = 12 # model = get_unet_512() # model.load_weights(filepath=model_weight) elif model_name == 'unet1024': img_tuple = (256, 256, 3) batch_size = 64 model = get_unet_1024(input_shape=img_tuple, num_classes=nb_classes) model.compile(optimizer=utils.SGD(lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=np.ceil( float(20) / float(batch_size))), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) model.load_weights(filepath=model_weight) # elif model_name == 'segnet': # image_size = 256 # batch_size = 28 # model = SegNet(input_shape=(image_size, image_size, 3), classes=nb_classes) # model.compile(optimizer=optimizers.Adam(lr=0.01), loss=bce_dice_loss, metrics=['binary_accuracy', # dice_loss]) # # model.load_weights(filepath=model_weight) # elif model_name == 'enet_unpooling': # model_path = output_path + 'Enet_unpooling_{val_loss:.4f}-{val_binary_accuracy:.4f}.hdf5' # csv_path = output_path + 'Enet_unpooling.csv' # image_size = int(256) # model = get_enet_unpooling(input_shape=(image_size, image_size, 3), classes=nb_classes) # model.compile(optimizer=optimizers.Adam(lr=0.01), loss=losses.binary_crossentropy, metrics=['binary_accuracy', # dice_loss]) elif model_name == 'enet_naive_upsampling': img_tuple = (512, 512, 3) batch_size = 4 model = get_enet_naive_upsampling(input_shape=img_tuple, classes=nb_classes) model.compile(optimizer=utils.SGD(lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=5), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) model.load_weights(filepath=model_weight) # elif model_name == 'init_unet': # img_tuple = (1024, 1024, 3) # batch_size = 6 # model = get_init_unet(input_shape=img_tuple, num_classes=nb_classes) # model.compile(optimizer=utils.SGD(lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=5), # loss=weighted_loss, # metrics=['binary_accuracy', dice_loss]) # model.load_weights(filepath=model_weight) # # elif model_name == 'unet': # img_tuple = (256, 256, 3) # batch_size = 24 # model = get_unet(input_shape=img_tuple, num_classes=nb_classes) # model.compile(optimizer=utils.SGD(lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=5), # loss=weighted_loss, # metrics=['binary_accuracy', dice_loss]) # model.load_weights(filepath=model_weight) elif model_name == 'modify_unet': img_tuple = (512, 512, 3) batch_size = 5 model = get_modify_unet(input_shape=img_tuple, num_classes=nb_classes) model.compile(optimizer=utils.SGD(lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=5), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) model.load_weights(filepath=model_weight) # elif model_name == 'modify_unet_V2': # img_tuple = (1024, 1024, 3) # batch_size = 4 # model = get_modify_unet_V2(input_shape=img_tuple, num_classes=nb_classes) # model.compile(optimizer=utils.SGD(lr=1e-02, decay=1e-6, momentum=0.9, nesterov=True, accumulator=5), # loss=weighted_loss, # metrics=['binary_accuracy', dice_loss]) # model.load_weights(filepath=model_weight) # # elif model_name == 'linknet': # img_tuple = (1024, 1024, 3) # batch_size = 6 # # base_model = LinkNet(input_shape=img_tuple, classes=nb_classes) # classify = Conv2D(nb_classes, (1, 1), activation='sigmoid')(base_model.output) # model = Model(inputs=base_model.input, outputs=classify) # model.compile(optimizer=utils.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True, accumulator=10), # loss=weighted_loss, # metrics=['binary_accuracy', dice_loss]) # # model.load_weights(filepath=model_weight) elif model_name == 'linknet_res34': img_tuple = (256, 256, 3) batch_size = 84 base_model = LinkNet_Res34(input_shape=img_tuple, classes=nb_classes) classify = Conv2D(nb_classes, (1, 1), activation='sigmoid')(base_model.output) model = Model(inputs=base_model.input, outputs=classify) model.compile(optimizer=utils.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True, accumulator=10), loss=weighted_loss, metrics=['binary_accuracy', dice_loss]) model.load_weights(filepath=model_weight) # elif model_name == 'linknet_res50': # img_tuple = (1280, 1280, 3) # batch_size = 4 # # base_model = LinkNet_Res50(input_shape=img_tuple, classes=nb_classes) # classify = Conv2D(nb_classes, (1, 1), activation='sigmoid')(base_model.output) # model = Model(inputs=base_model.input, outputs=classify) # model.compile(optimizer=utils.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True, accumulator=10), # loss=weighted_loss, # metrics=['binary_accuracy', dice_loss]) # # model.load_weights(filepath=model_weight) names = [] for id in ids_test: names.append('{}.jpg'.format(id)) rles = [] print('Predicting on {} samples with batch_size = {}...'.format( len(ids_test), batch_size)) print('Conditions low quality %r' % (args.lowq, )) for start in tqdm(range(0, len(ids_test), batch_size)): x_batch = [] end = min(start + batch_size, len(ids_test)) ids_test_batch = ids_test[start:end] for id in ids_test_batch.values: if args.lowq: img = cv2.imread(input_path + 'test/{}.jpg'.format(id)) else: img = cv2.imread(input_path + 'test_hq/{}.jpg'.format(id)) img = cv2.resize(img, (img_tuple[1], img_tuple[0])) x_batch.append(img) x_batch = np.array(x_batch, np.float32) / 255 preds = model.predict_on_batch(x_batch) preds = np.squeeze(preds, axis=3) for pred in preds: prob = cv2.resize(pred, (orig_width, orig_height)) mask = prob > threshold rle = run_length_encode(mask) rles.append(rle) print("Generating submission file...") df = pd.DataFrame({'img': names, 'rle_mask': rles}) df.to_csv(output_path + model_weight.split('/')[-1] + '.csv.gz', index=False, compression='gzip')
from tqdm import tqdm from model.u_net import get_unet_128, get_unet_256, get_unet_512, get_unet_1024 df_test = pd.read_csv('input/sample_submission.csv') ids_test = df_test['img'].map(lambda s: s.split('.')[0]) input_size = 128 batch_size = 16 orig_width = 1918 orig_height = 1280 threshold = 0.5 model = get_unet_128() model.load_weights(filepath='weights/best_weights.hdf5') names = [] for id in ids_test: names.append('{}.jpg'.format(id)) # https://www.kaggle.com/stainsby/fast-tested-rle def run_length_encode(mask): ''' img: numpy array, 1 - mask, 0 - background Returns run length as string formated ''' inds = mask.flatten() runs = np.where(inds[1:] != inds[:-1])[0] + 2