def __generate_X(self, list_IDs_batch): 'Generates data containing batch_size samples' # Initialization if self.reshape is None: X = np.empty((self.batch_size, *self.dim, self.n_channels)) elif self.reshape is not None and self.randomcrop is True: X = np.empty((self.batch_size, *self.dim, self.n_channels)) else: X = np.empty((self.batch_size, *self.reshape, self.n_channels)) # Generate data for i, ID in enumerate(list_IDs_batch): im_name = self.df['ImageId'].iloc[ID] img_path = f"{self.base_path}/{im_name}" img = self.__load_rgb(img_path) if self.reshape is not None and self.randomcrop is False: img = np_resize(img, self.reshape) # Adjust gamma if self.gamma is not None: img = adjust_gamma(img, gamma=self.gamma) # Store samples X[i, ] = img self.image_name.append(im_name) return X
def save_prediction(prediction, name): resize_prediction = np.zeros((prediction.shape[0], 350, 525, 4), dtype=np.float16) for i in range(prediction.shape[0]): resize_prediction[i, :, :, :] = np_resize( prediction[i, :, :, :].astype(np.float32), (350, 525)).astype(np.float16) # with open('../predictions/' + name +'_.pickle', 'wb') as handle: # pickle.dump(resize_prediction, handle, protocol=pickle.HIGHEST_PROTOCOL) np.save('../predictions/' + name + '_.npy', resize_prediction)
def resize_oof(folder): files = os.listdir(folder) for file in files: print("Resizing file: ", file) oof_data = np.load(folder + file) new_oof_data = np.zeros((oof_data.shape[0], 350, 525, 4), dtype=np.float16) for i in range(oof_data.shape[0]): new_oof_data[i, :, :, :] = np_resize( oof_data[i, :, :, :].astype(np.float32), (350, 525)).astype(np.float16) np.save(folder + file, new_oof_data)
def postprocess_shape(file,mode): sub = pd.read_csv(file) name = file.split('/')[-1].split('.')[0] # mode = 'convex' # choose from 'rect', 'min', 'convex' and 'approx' model_class_names = ['Fish', 'Flower', 'Gravel', 'Sugar'] min_size = [25000, 15000, 22500, 10000] img_label_list = [] enc_pixels_list = [] test_imgs = os.listdir('../../dados/test_images/') for test_img_i, test_img in enumerate(tqdm(test_imgs)): for class_i, class_name in enumerate(model_class_names): path = os.path.join('../../dados/test_images/', test_img) img = cv2.imread(path).astype(np.float32) # use already-resized ryches' dataset img = img / 255. img = np_resize(img, (350, 525)) img2 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_label_list.append(f'{test_img}_{class_name}') mask = make_mask(sub, test_img + '_' + class_name, shape=(350, 525)) if True: # if class_name == 'Flower' or class_name =='Sugar': # you can decide to post-process for some certain classes mask = draw_convex_hull(mask.astype(np.uint8), mode=mode) mask[img2 <= 2 / 255.] = 0 mask = post_process_minsize(mask, min_size[class_i]) if mask.sum() == 0: enc_pixels_list.append(np.nan) else: mask = np.where(mask > 0.5, 1.0, 0.0) enc_pixels_list.append(mask2rle(mask)) name = name + '_convex.csv' submission_df = pd.DataFrame({'Image_Label': img_label_list, 'EncodedPixels': enc_pixels_list}) submission_df.to_csv(name, index=None)
def final_predict(models, folds, shape, TTA=False, posprocess=False, swa=False, minsizes=None, thresholds=None, fixshape=False, multimodel=False): sub_df, test_imgs = get_test_data() print(test_imgs.shape[0]) # batch_idx = list(range(test_imgs.shape[0])) test_df = [] batch_pred_emsemble = [] submission_name = '' for smmodel, backbone in models: print('Predicting {} {}'.format(smmodel, backbone)) opt = Adam() model_masks = [] submission_name = submission_name + str(smmodel) + '_' + str( backbone) + '_' for i in range(0, test_imgs.shape[0], 860): batch_idx = list(range(i, min(test_imgs.shape[0], i + 860))) fold_result = [] batch_pred_resized = np.zeros((len(batch_idx), 350, 525, 4), dtype=np.float16) for i in folds: model = get_model(smmodel, backbone, opt, dice_coef_loss_bce, [dice_coef]) if multimodel: batch_pred_masks = predict_multimodel( i, smmodel, backbone, model, batch_idx, test_imgs, shape, sub_df, TTA, swa) else: batch_pred_masks = predict_fold(i, smmodel, backbone, model, batch_idx, test_imgs, shape, sub_df, TTA, swa) # print(np.array(batch_pred_masks).shape) for i in range(batch_pred_masks.shape[0]): batch_pred_resized[i, :, :, :] = np_resize( batch_pred_masks[i, :, :, :], (350, 525)).astype(np.float16) del batch_pred_masks gc.collect() fold_result.append(batch_pred_resized.astype(np.float16)) batch_pred_masks = np.mean(fold_result, axis=0, dtype=np.float16) del fold_result gc.collect() model_masks.extend(batch_pred_masks.astype(np.float16)) del batch_pred_masks gc.collect() batch_pred_emsemble.append(model_masks) del model, model_masks gc.collect() batch_pred_emsemble = np.mean(batch_pred_emsemble, axis=0, dtype=np.float16) if TTA: submission_name += '_tta' save_prediction(batch_pred_emsemble, submission_name) batch_idx = list(range(test_imgs.shape[0])) # print(pred_emsemble.shape) batch_pred_emsemble = np.array( predict_postprocess(batch_idx, posprocess, batch_pred_emsemble, shape=shape, minsize=minsizes, threshold=thresholds, fixshape=fixshape)) test_df = convert_masks_for_submission(batch_idx, test_imgs, sub_df, batch_pred_emsemble) submission_name = submission_name + '.csv' generate_submission(test_df, submission_name)
def multimodel_eval(smmodel, backbone, nfold, maxfold, shape=(320, 480), swa=False, tta=False, fixshape=True): h, w = shape train_df, mask_count_df = get_data_preprocessed() opt = Nadam(lr=0.0002) skf = StratifiedKFold(n_splits=n_fold_splits, random_state=random_seed, shuffle=True) oof_data = np.zeros((len(mask_count_df.index), 350, 525, 4), dtype=np.float16) oof_predicted_data = np.zeros((len(mask_count_df.index), 350, 525, 4), dtype=np.float16) oof_imgname = [] oof_dice = [] classes = ['Fish', 'Flower', 'Gravel', 'Sugar'] cnt_position = 0 for n_fold, (train_indices, val_indices) in enumerate( skf.split(mask_count_df.index, mask_count_df.hasMask)): final_pred = np.zeros((len(val_indices), h, w, 4), dtype=np.float32) y_true = [] if n_fold >= nfold and n_fold <= maxfold: if n_fold >= 2: nclass = 4 print(nclass) else: nclass = n_classes print('Evaluating fold number ', str(n_fold)) val_generator = DataGenerator(val_indices, df=mask_count_df, shuffle=False, target_df=train_df, batch_size=len(val_indices), reshape=shape, augment=False, n_channels=3, n_classes=4, backbone=backbone) _, y_true = val_generator.__getitem__(0) img_true = val_generator.image_name oof_imgname.extend(img_true) val_generator.batch_size = 1 for i, cls in enumerate(classes): model = get_model(smmodel, backbone, opt, dice_coef_loss_bce, [dice_coef], nclass=nclass) filepath = '../models/best_' + str(smmodel) + '_' + str( backbone) + '_' + str(n_fold) + '_' + cls if swa: filepath += '_swa.h5' else: filepath += '.h5' model.load_weights(filepath) # results = model.evaluate_generator( # val_generator, # workers=40, # verbose=1 # ) # print(results) if tta: model = tta_segmentation(model, h_flip=True, v_flip=True, input_shape=(h, w, 3), merge='mean') y_pred = model.predict_generator(val_generator, workers=40, verbose=1) final_pred[:, :, :, i] = y_pred[:, :, :, 0] del y_pred gc.collect() print(y_true.shape) print(final_pred.shape) print(len(oof_imgname)) d = np_dice_coef(y_true, final_pred) oof_dice.append(d) print("Dice: ", d) for i in range(y_true.shape[0]): oof_data[cnt_position, :, :, :] = np_resize( y_true[i, :, :, :].astype(np.float32), (350, 525)).astype(np.float16) oof_predicted_data[cnt_position, :, :, :] = np_resize( final_pred[i, :, :, :].astype(np.float32), (350, 525)).astype(np.float16) cnt_position += 1 del y_true, final_pred gc.collect() del val_generator, model gc.collect() oof_imgname = np.asarray(oof_imgname) print(oof_data.shape) print(oof_predicted_data.shape) print(oof_imgname.shape) print("CV Final Dice: ", np.mean(oof_dice)) np.save( '../validations/img_name_' + str(smmodel) + '_' + str(backbone) + '_' + str(n_fold_splits) + '.npy', oof_imgname) np.save('../validations/y_true_' + str(n_fold_splits) + '.npy', oof_data) np.save( '../validations/' + str(smmodel) + '_' + str(backbone) + '_' + str(n_fold_splits) + '.npy', oof_predicted_data)
def evaluate(smmodel, backbone, nfold, maxfold, shape=(320, 480), swa=False, tta=False, fixshape=True): h, w = shape train_df, mask_count_df = get_data_preprocessed() opt = Nadam(lr=0.0002) skf = StratifiedKFold(n_splits=n_fold_splits, random_state=random_seed, shuffle=True) oof_data = np.zeros((len(mask_count_df.index), 350, 525, 4), dtype=np.float16) oof_predicted_data = np.zeros((len(mask_count_df.index), 350, 525, 4), dtype=np.float16) oof_imgname = [] # num_cpus = psutil.cpu_count(logical=False) # ray.init(num_cpus=4) oof_dice = [] cnt_position = 0 for n_fold, (train_indices, val_indices) in enumerate( skf.split(mask_count_df.index, mask_count_df.hasMask)): for i in range(0, len(val_indices), 480): batch_idx = list(range(i, min(len(val_indices), i + 480))) model = get_model(smmodel, backbone, opt, dice_coef_loss_bce, [dice_coef]) if n_fold >= nfold and n_fold <= maxfold: print('Evaluating fold number ', str(n_fold)) val_generator = DataGenerator(val_indices[batch_idx], df=mask_count_df, shuffle=False, target_df=train_df, batch_size=len( val_indices[batch_idx]), reshape=shape, augment=False, n_channels=3, n_classes=4, backbone=backbone) _, y_true = val_generator.__getitem__(0) img_true = val_generator.image_name oof_imgname.extend(img_true) val_generator.batch_size = 1 filepath = '../models/best_' + str(smmodel) + '_' + str( backbone) + '_' + str(n_fold) if swa: filepath += '_swa.h5' else: filepath += '.h5' model.load_weights(filepath) # results = model.evaluate_generator( # val_generator, # workers=40, # verbose=1 # ) # print(results) if tta: model = tta_segmentation(model, h_flip=True, v_flip=True, input_shape=(h, w, 3), merge='mean') y_pred = model.predict_generator(val_generator, workers=40, verbose=1) print(y_true.shape) print(y_pred.shape) print(len(oof_imgname)) f = [] for i in range(y_true.shape[0]): for j in range(4): d1 = np_dice_coef(y_true[i, :, :, j], y_pred[i, :, :, j]) f.append(d1) oof_data[cnt_position, :, :, :] = np_resize( y_true[i, :, :, :].astype(np.float32), (350, 525)).astype(np.float16) oof_predicted_data[cnt_position, :, :, :] = np_resize( y_pred[i, :, :, :].astype(np.float32), (350, 525)).astype(np.float16) cnt_position += 1 print("Dice: ", np.mean(f)) oof_dice.append(np.mean(f)) del y_true, y_pred, val_generator, model gc.collect() # oof_data = np.asarray(oof_data) # oof_predicted_data = np.asarray(oof_predicted_data) oof_imgname = np.asarray(oof_imgname) print(oof_data.shape) print(oof_predicted_data.shape) print(oof_imgname.shape) print("CV Final Dice: ", np.mean(oof_dice)) np.save('../validations/img_name_' + str(n_fold_splits) + '.npy', oof_imgname) np.save('../validations/y_true_' + str(n_fold_splits) + '.npy', oof_data) np.save( '../validations/' + str(smmodel) + '_' + str(backbone) + '_' + str(n_fold_splits) + '.npy', oof_predicted_data)