def predictImage(f,IMG_SIZE = 256): ORIG_DIM = 1024 resize = (IMG_SIZE, IMG_SIZE) scan = pydicom.read_file(f) img = scan.pixel_array if (resize != (ORIG_DIM,ORIG_DIM)): img = cv2.resize(img, resize) img = img/255 img = np.expand_dims(img, axis = 2) img = np.expand_dims(img, axis = 0) mask = model.predict(img) threshold = 0.5 mask = mask.squeeze() #mask = mask.reshape(512,512) mask = cv2.resize(mask, (1024,1024)) mask = mask > threshold if mask.sum()<1024*2: mask[:] = 0 mask = (mask.T*255).astype(np.uint8) rle = mask2rle(mask, 1024, 1024) #mask = mask.round() #mask = mask*255 #rle = mask2rle(mask, 1024, 1024) return rle
def main(): args = parse_argse() sample_subm = pd.read_csv('../data/sample_submission.csv') preds_ids = sample_subm.ImageId.values segm_subm = pd.DataFrame(columns=sample_subm.columns) segm_folds = [] for pred_p in args.preds_paths: segm_folds.extend(get_pred_folds(pred_p)) thresholds = [] for thr_p in args.thresholds_paths: with open(thr_p, 'rb') as f: thresholds.extend(pickle.load(f)) for id_ in tqdm(preds_ids): out_mask = get_masks(id_, segm_folds, thresholds) if out_mask.sum() == 0: segm_subm = segm_subm.append( { 'ImageId': id_, 'EncodedPixels': '-1' }, ignore_index=True) continue rle = mask2rle((out_mask * 255).T, 1024, 1024) segm_subm = segm_subm.append({ 'ImageId': id_, 'EncodedPixels': rle }, ignore_index=True) segm_subm.to_csv(os.path.join(args.out, 'segm_class.csv'), index=False)
def get_rles(preds_test, b_th, r_th): rles = [] i, max_img = 1, 10 plt.figure(figsize=(16, 4)) for p in tqdm(preds_test): p = p.squeeze() im = cv2.resize(p, (1024, 1024)) im = (im > b_th) #zero out the smaller regions. if im.sum() < r_th: im[:] = 0 im = (im.T * 255).astype(np.uint8) rles.append(mask2rle(im, 1024, 1024)) i += 1 if i < max_img: plt.subplot(1, max_img, i) plt.imshow(im) plt.axis('off') return rles
def make_submission(model_path, thresh, small_thresh=2048, test_base_path='/data/pneumo/dicom-images-test/', save=False): ''' save submission file under a folder that model file is saved. Set save=True if you want to save binary pred numpy. all binary (small removed) npy and submission csv will be saved in /data/pneumo_log/val_1/2019_0815_1742/submission/best_weights/ ''' # with snapshot, there is a case where there are multiple model files in the dir so make `submission` folder # best_weights model_file_name = model_path.split('/')[-1].split('.')[0] # 2019_0815_1742 dir_name = model_path.split('/')[-2] # /data/pneumo_log/val_1/2019_0815_1742/submission/best_weights save_dir = '/'.join( model_path.split('/')[:-1]) + '/submission/' + model_file_name data_prep._make_dir(save_dir) # 'submission_' file_name = 'submission_' + dir_name + '_' + model_file_name + '.csv' test_data_path_list = glob(test_base_path + '/*/*/*.dcm') model = pred_util._load_model(model_path=model_path) rles = [] im_ids = [] for path in tqdm(test_data_path_list): im_id = path.split('/')[-1].split('.dcm')[0] im_ids.append(im_id) im = pydicom.dcmread(path).pixel_array # no need preprocess pred = pred_util._pred_img(im, model) # pred is already 1024 * 1024 but no binary. its values are 0-1 binary_pred = np.where(pred > thresh, 1, 0) # zero out the smaller regions if binary_pred.sum() < small_thresh: binary_pred[:] = 0 # binary -> 0, 255 and transpose for submission format if save: # save numpy. This is usually needed for ensemble. In order to use ensemble_util._ensemble_preds on the data later, # save it as dictionary like {'pred':} np.save(save_dir + '/' + im_id, { 'pred': binary_pred, 'pred_row': pred }) binary_pred = (binary_pred.T * 255).astype(np.uint8) rles.append(mask2rle(binary_pred, 1024, 1024)) sub_df = pd.DataFrame({'ImageId': im_ids, 'EncodedPixels': rles}) sub_df.loc[sub_df.EncodedPixels == '', 'EncodedPixels'] = '-1' sub_df.to_csv(save_dir + '/' + file_name, index=False) if save: # for later ensemble return save_dir
def _convert_pred_to_rle(pred, thresh=0.5, small_thresh=2048): # pred is already 1024 * 1024 but no binary. its values are 0-1 pred = np.where(pred > thresh, 1, 0) # zero out the smaller regions if pred.sum() < small_thresh: pred[:] = 0 # binary -> 0, 255 and transpose for submission format pred = (pred.T * 255).astype(np.uint8) return mask2rle(pred, 1024, 1024)
def write_mask(self, image_id, mask): # mask in chw mask = torch.squeeze(mask, dim=0) height = mask.size()[0] width = mask.size()[1] if mask.sum() > 0: rle_encoded_mask = mask_functions.mask2rle(mask, height=height, width=width) else: rle_encoded_mask = '-1' self.writer.writerow({ 'ImageId': image_id, 'EncodedPixels': rle_encoded_mask })
def predictImage(f,IMG_SIZE = 512): ORIG_DIM = 1024 resize = (IMG_SIZE, IMG_SIZE) scan = pydicom.read_file(f) img = scan.pixel_array if (resize != (ORIG_DIM,ORIG_DIM)): img = cv2.resize(img, resize) img = img/255 img = np.expand_dims(img, axis = 2) img = np.expand_dims(img, axis = 0) mask = model.predict(img) mask = mask.reshape(512,512) mask = cv2.resize(mask, (1024,1024)) mask = mask.round() mask = mask*255 rle = mask2rle(mask, 1024, 1024) return rle
def make_submission(filename, device, model, validloader, image_size, channels, threshold=0.9, original_size=1024, tta=False): ''' Function to create submission.csv file. INPUT: filename - submission filename model - model to create submission validloader - loader for validation dataset image_size - size of images for training channels - number of channels in training images threshold - threshold for submission original_size - original image size (1024) ''' submission = {'ImageId': [], 'EncodedPixels': []} model.eval() torch.cuda.empty_cache() im_width = image_size[0] im_height = image_size[1] for X, fns in validloader: X = Variable(X).to(device) output = model(X) X_flipped = torch.flip(X, dims=(3, )) output_flipped = torch.flip(model(X_flipped), dims=(3, )) if tta: for i, fname in enumerate(fns): mask = torch.sigmoid(output[i].reshape( im_width, im_height)).data.cpu().numpy() mask = binary_opening(mask > threshold, disk(2)) mask_flipped = torch.sigmoid(output_flipped[i].reshape( im_width, im_height)).data.cpu().numpy() mask_flipped = binary_opening(mask_flipped > threshold, disk(2)) mask_tta = get_prediction_with_tta(model, X[i], device, img_size=(im_width, im_height), channels=channels) im = Image.fromarray(((mask + mask_flipped + mask_tta) / 3 * 255).astype(np.uint8)).resize( (original_size, original_size)) im = np.transpose(np.asarray(im)) labels = label(im) encodings = [ mask2rle(labels == k, original_size, original_size) for k in np.unique(labels[labels > 0]) ] fname = fname.split('/')[-1][:-4] encoding = mask2rle(np.array(im), original_size, original_size) submission['ImageId'].append(fname) submission['EncodedPixels'].append(encoding) else: for i, fname in enumerate(fns): mask = torch.sigmoid(output[i].reshape( im_width, im_height)).data.cpu().numpy() mask = binary_opening(mask > threshold, disk(2)) mask = np.transpose(mask) im = Image.fromarray((mask * 255).astype(np.uint8)).resize( (original_size, original_size)) fname = fname.split('/')[-1][:-4] encoding = mask2rle(np.array(im), original_size, original_size) submission['ImageId'].append(fname) submission['EncodedPixels'].append(encoding) submission_df = pd.DataFrame(submission, columns=['ImageId', 'EncodedPixels']) submission_df.loc[submission_df.EncodedPixels == '', 'EncodedPixels'] = '-1' submission_df.to_csv(filename, index=False)
def generatecsv(csvpath, datasetfolder): # Load validation dataset dataset = pneumothorax.SiimDataset() dataset.load_siim(SIIM_DIR, datasetfolder) # Must call before using the dataset dataset.prepare() print("Images: {}\nClasses: {}".format(len(dataset.image_ids), dataset.class_names)) # ## Load Model # In[8]: # Create model in inference mode with tf.device(DEVICE): model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config) # In[9]: # Set path to balloon weights file # Download file from the Releases page and set its path # https://github.com/matterport/Mask_RCNN/releases weights_path = "/home/sa-279/Mask_RCNN/logs/siim20190813T1933/mask_rcnn_siim_0029.h5" # Or, load the last model you trained #weights_path = model.find_last() # Load weights print("Loading weights ", weights_path) model.load_weights(weights_path, by_name=True) # ## TEST # In[26]: test_images = dataset.image_ids #print(len(test_images)) test_set = np.empty((0,2)) total_images = len(test_images) for row, image_id in enumerate(test_images): info = dataset.image_info[image_id] img_id = info["id"] # image id as in csv file print("processing {} :image {} of {}".format(img_id, row+1, total_images)) image, image_meta, gt_class_id, gt_bbox, gt_mask = modellib.load_image_gt(dataset, config, image_id, use_mini_mask=False) #print(image.shape) result = model.detect([image], verbose=0) cols = result[0] mask = cols['masks'] mask = np.array(mask) if (np.any(mask)): mask = mask.astype(np.uint64) mask = mask * 255 mask = np.squeeze(mask, axis = -1) width = mask.shape[1] height = mask.shape[0] mask = mask.T rle = mask2rle(mask,width, height) #print(rle) else: rle = -1 test_set = np.append(test_set,[[img_id, rle]], axis = 0) #print(len(test_set)) # Run object detection #print(test_set.shape) csvfile = os.path.join(SIIM_DIR, csvpath) np.savetxt(csvfile,test_set, delimiter=',', fmt='%s')
def predict(image_fps, config: Config, filepath='submission.csv'): min_conf = config.DETECTION_MIN_CONFIDENCE # assume square image resize_factor = ORIG_SIZE / config.IMAGE_SHAPE[0] with open(filepath, 'w') as file: file.write("ImageId,EncodedPixels\n") for fp in tqdm_notebook(image_fps): image_id = fp.split('/')[-1][:-4] maks_written = 0 if image_id in positives.index: ds = pydicom.read_file(fp) image = ds.pixel_array # If grayscale. Convert to RGB for consistency. if len(image.shape) != 3 or image.shape[2] != 3: image = np.stack((image, ) * 3, -1) image, window, scale, padding, crop = utils.resize_image( image, min_dim=config.IMAGE_MIN_DIM, min_scale=config.IMAGE_MIN_SCALE, max_dim=config.IMAGE_MAX_DIM, mode=config.IMAGE_RESIZE_MODE) results = model.detect([image]) r = results[0] # assert( len(r['rois']) == len(r['class_ids']) == len(r['scores']) ) n_positives = positives.loc[image_id].N num_instances = min(len(r['rois']), n_positives) for i in range(num_instances): if r['scores'][i] > min_conf and np.sum(r['masks'][..., i]) > 1: mask = r['masks'][..., i].T * 255 # print(len(r['rois']), r['scores'][i], r['rois'][i], r['masks'].shape) # print(mask.shape, np.max(mask), np.stack((mask,) * 3, -1).shape) mask, _, _, _, _ = utils.resize_image( np.stack((mask, ) * 3, -1), # requires 3 channels min_dim=ORIG_SIZE, min_scale=config.IMAGE_MIN_SCALE, max_dim=ORIG_SIZE, mode=config.IMAGE_RESIZE_MODE) mask = (mask[..., 0] > 0) * 255 # print(mask.shape) # plt.imshow(mask, cmap=get_cmap('jet')) file.write(image_id + "," + mask2rle(mask, ORIG_SIZE, ORIG_SIZE) + "\n") maks_written += 1 # fill up remaining masks for i in range(n_positives - maks_written): padding = 88750 file.write( image_id + f",{padding} {ORIG_SIZE * ORIG_SIZE - padding * 2}\n") maks_written += 1 # assert n_positives == maks_written # print(image_id, n_positives, num_instances, maks_written) if maks_written == 0: file.write(image_id + ",-1\n") ## no pneumothorax
image_id + '.png') img = Image.open(img_path) width, height = img.size img = np.array(img) img = np.expand_dims(img, axis=2) img = np.expand_dims(img, axis=0) pred = model.predict(img) pred = pred[0, :, :, 0] print(img_path) print(np.unique(pred)) mask = 255 * (pred > threshold).astype(np.uint8).T print(np.unique(mask)) if np.count_nonzero(mask) == 0: rle = " -1" else: rle = mask2rle(mask, width, height) # plt.imshow(rle2mask(rle, 1024, 1024)) else: rle = " -1" sublist.append([image_id, rle]) submission_df = pd.DataFrame(sublist, columns=sample_df.columns.values) submission_df.to_csv("submission.csv", index=False) # print('Counter: ', counter) # # Generates labels using most basic setup. Supports various image sizes. Returns image labels in same format # # as original image. Normalization matches MobileNetV2 # # trained_image_width=512 # mean_subtraction_value=127.5 # image = np.array(Image.open('imgs/image1.jpg')) #
preds_flip = model.predict(x_test, batch_size = batch_size) # predict again preds_flip = np.array(list(np.fliplr(mask) for mask in preds_flip)) # flip predicted masks preds = (0.5 * preds_orig) + (0.5 * preds_flip) # average over the prediction for the original image and a horizontally flipped image del x_test, preds_orig, preds_flip # clear some memory ############## # Submission # ############## sys.path.insert(0, "./input/") threshold = 0.5 # set threshold (obtained by optimizing IOU vs threshold) masks_rle = [] for pred in tqdm(preds_test): pred = pred.squeeze() # convert (256, 256, 1) array to (256, 256) img = cv2.resize(pred, (1024, 1024)) # resize to the original size img = img > threshold if img.sum() < 1024 * 2: # zero out the smaller regions img[:] = 0 img = (img.T * 255).astype(np.uint8) # transpose and re-scale to 1...255 masks_rle.append(mask2rle(img, 1024, 1024)) # compress image (RLE) and store img_ids = list(path.split("/")[-1][:-4] for path in test_img_paths_all) # get image ID and remove .png test_df = pd.DataFrame({"ImageId": img_ids, "EncodedPixels": masks_rle}) test_df.loc[test_df["EncodedPixels"] == "", "EncodedPixels"] = "-1" # label negative if no pneumothorax predicted test_df.to_csv("submission.csv", index = False)
image_resized = cv2.resize(pixel_array, (img_size, img_size)) image_resized = np.array(image_resized, dtype=np.float64) image_resized -= image_resized.mean() image_resized /= image_resized.std() X[0, ] = np.expand_dims(image_resized, axis=2) return X submission = [] for i, row in test_metadata_df.iterrows(): test_img = get_test_tensor(test_metadata_df['file_path'][i], 1, img_size, 1) pred_mask = model.predict(test_img).reshape((img_size, img_size)) prediction = {} prediction['ImageId'] = str(test_metadata_df['id'][i]) pred_mask = (pred_mask > .5).astype(int) if pred_mask.sum() < 1: prediction['EncodedPixels'] = -1 else: prediction['EncodedPixels'] = mask2rle(pred_mask * 255, img_size, img_size) submission.append(prediction) submission_df = pd.DataFrame(submission) submission_df = submission_df[['ImageId', 'EncodedPixels']] submission_df.head() submission_df.to_csv('./submission.csv', index=False)
def tester(model, val_loader, save_dir, device, writer, best_epoch, submit=False, follow_aux=False, th=0.9): print('\n\nInference\n') tot_val_info_df = [] model.eval( ) # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance) if not submit: with torch.no_grad(): for val_fp, val_img, val_seg, val_lbl in tqdm(val_loader): val_img = val_img.permute(0, 3, 1, 2) # NHWC -> NCHW val_seg = torch.squeeze(val_seg, -1) val_img = val_img.to(device, dtype=torch.float) val_seg = val_seg.to(device, dtype=torch.long) val_outputs, val_lbl_outputs = model(val_img) val_dice_info = tools.calc_dice_info(val_fp, val_lbl, val_lbl_outputs, val_seg, val_outputs, follow_aux=follow_aux, th=th) tot_val_info_df.extend(val_dice_info) # visualize.summary_fig(0, save_dir, val_img.detach(), val_seg.detach(), val_outputs.detach(), writer=writer, # type='inf', draw_num=len(val_fp), save=True, fps=val_fp) # save fig print(save_dir) tot_val_info_df = pd.DataFrame( tot_val_info_df, columns=['fp', 'lbl', 'pred_lbl', 'dice', '2TP', 'TP_FP']) tot_val_info_df.to_csv(save_dir + '/tot_val_info_df_{}'.format(best_epoch), index=False) avg_val_dice = np.sum(tot_val_info_df['2TP']) / np.sum( tot_val_info_df['TP_FP']) print('avg val dice: {:.4f}'.format(avg_val_dice)) avg_val_kaggle_dice = np.mean(tot_val_info_df['dice']) print('avg val kaggle dice: {:.4f}'.format(avg_val_kaggle_dice)) val_acc = np.sum(tot_val_info_df['pred_lbl'] == tot_val_info_df['lbl']) / len(tot_val_info_df) print('avg val acc: {:.4f}'.format(val_acc)) else: print('Make Submission CSV') submission_csv = pd.DataFrame(columns=['ImageId', 'EncodedPixels']) with torch.no_grad(): i = 0 for val_fp, val_img, _, _ in tqdm(val_loader): val_img = val_img.permute(0, 3, 1, 2) # NHWC -> NCHW val_img = val_img.to(device, dtype=torch.float) val_outputs, val_lbl_outputs = model(val_img) test_outputs_softmax = torch.softmax( val_outputs, dim=1)[0, 1, :, :].detach().cpu().numpy() test_outputs_mask = test_outputs_softmax > th test_outputs = tools.post_process(test_outputs_softmax, test_outputs_mask) if not test_outputs.shape[0] == 1024: test_outputs = cv2.resize(test_outputs, (1024, 1024), interpolation=cv2.INTER_CUBIC) rle = mask2rle(test_outputs.T * 255, 1024, 1024) # Note to do transpose operation if len(rle) == 0: rle = -1 imgid = val_fp[0].split('/')[-1][:-4] submission_csv.loc[i, 'ImageId'] = imgid submission_csv.loc[i, 'EncodedPixels'] = rle i += 1 submission_csv.to_csv('./submission/{}_th{}_submit.csv'.format( save_dir.split('/ckpt/')[1], th), index=False)
rles = df_sub.loc[df_sub["ImageId"] == iid, "EncodedPixels"] # iterate over rles for rle in rles: # if rle is not -1, build prediction mask and add to average mask if "-1" not in str(rle): avg_mask += rle2mask(rle, 1024, 1024) / float( len(df_sub_list)) # threshold the average mask pred_mask = (avg_mask >= (min_solutions / float(len(df_sub_list)))).astype("uint8") # transform to rle if pred_mask.sum() > 0: im = PIL.Image.fromarray( (pred_mask * 255).astype(np.uint8)).resize((1024, 1024)) im = np.asarray(im) rle = mask2rle(im, 1024, 1024) else: rle = "-1" # add a row in the final dataframe df_avg_sub.loc[df_avg_sub_idx] = [iid, rle] df_avg_sub_idx += 1 # increment index #if idx>10: # break df_avg_sub.to_csv('submission/submission0.csv.gz', index=False, compression='gzip') elif model == 1: use_subs = [ 'submission/deeplabv3plus_1024_seed9012_tta_v2_6000_018.csv.gz',
from mask_functions import rle2mask, mask2rle import pdb # Generate rle encodings (images are first converted to the original size) rles = [] i, max_img = 1, 10 plt.figure(figsize=(16, 4)) for p in tqdm_notebook(preds_test): p = p.squeeze() im = cv2.resize(p, (1024, 1024)) im = im > threshold_best # zero out the smaller regions. if im.sum() < 1024 * 2: im[:] = 0 im = (im.T * 255).astype(np.uint8) rles.append(mask2rle(im, 1024, 1024)) i += 1 if i < max_img: plt.subplot(1, max_img, i) plt.imshow(im) plt.axis('off') ids = [o.split('/')[-1][:-4] for o in test_fn] sub_df = pd.DataFrame({'ImageId': ids, 'EncodedPixels': rles}) sub_df.loc[sub_df.EncodedPixels == '', 'EncodedPixels'] = '-1' sub_df.head() sub_df.to_csv('orig_submission.csv', index=False) sub_df.tail(10) ### rm -r */ sub_df = pd.read_csv('orig_submission.csv') leak_prob = pd.read_csv( '../input/leak-probabilities-siim/leak_probabilities.csv')
if "-1" not in str(rle): avg_mask += rle2mask(rle, 1024, 1024) / float(len(df_sub_list)) # threshold the average mask avg_mask = ( avg_mask >= (min_solutions * 255. / float(len(df_sub_list)))).astype("uint8") # extract rles from the average mask avg_rle_list = [] if avg_mask.max() > 0: # label regions labeled_avg_mask, n_labels = skimage.measure.label(avg_mask, return_num=True) # iterate over regions, extract rle, and save to a list for label in range(1, n_labels + 1): avg_rle = mask2rle( (255 * (labeled_avg_mask == label)).astype("uint8"), 1024, 1024) avg_rle_list.append(avg_rle) else: avg_rle_list.append("-1") # iterate over average rles and create a row in the final dataframe for avg_rle in avg_rle_list: df_avg_sub.loc[df_avg_sub_idx] = [iid, avg_rle] df_avg_sub_idx += 1 # increment index df_avg_sub["ImageId"].nunique() if args.best_score_weight: submission_file = 'best_dice_Unet_' + args.encoder_name + '_v' + str( args.version) + '_average.csv' else: