def get_probas(id, net, tile_image, tile, flip_predict, start_timer, log, tile_size, tile_average_step, tile_scale, tile_min_score): tile_probability = [] batch = np.array_split(tile_image, len(tile_image) // 4) for t, m in enumerate(batch): print('\r %s %d / %d %s' % (id, t, len(batch), time_to_str(timer() - start_timer, 'sec')), end='', flush=True) m = torch.from_numpy(m).cuda() p = [] with torch.no_grad(): # inference sur l'image de base logit = data_parallel(net, m) p.append(torch.sigmoid(logit)) if flip_predict: # inference sur les images inversées / axes x et y for _dim in [(2, ), (3, ), (2, 3)]: _logit = data_parallel(net, m.flip(dims=_dim)) p.append(_logit.flip(dims=_dim)) p = torch.stack(p).mean(0) tile_probability.append(p.data.cpu().numpy()) print('\r', end='', flush=True) log.write('%s %d / %d %s\n' % (id, t, len(batch), time_to_str(timer() - start_timer, 'sec'))) # before squeeze, dimension = N_tiles x 1 x tile_x x tile_y tile_probability = np.concatenate(tile_probability).squeeze( 1) # N_tiles x tile_x x tile_y height, width = tile['image_small'].shape[:2] probability = to_mask( tile_probability, # height x width tile['coord'], height, width, tile_scale, tile_size, tile_average_step, tile_min_score, aggregate='mean') return probability
def do_valid(net, valid_loader): valid_num = 0 valid_probability = [] valid_mask = [] net = net.eval() start_timer = timer() with torch.no_grad(): for t, batch in enumerate(valid_loader): batch_size = len(batch['index']) mask = batch['mask'] image = batch['image'].cuda() logit = data_parallel(net, image) # net(input)# probability = torch.sigmoid(logit) valid_probability.append(probability.data.cpu().numpy()) valid_mask.append(mask.data.cpu().numpy()) valid_num += batch_size # --- print('\r %8d / %d %s' % (valid_num, len(valid_loader.dataset), time_to_str(timer() - start_timer, 'sec')), end='', flush=True) # if valid_num==200*4: break assert (valid_num == len(valid_loader.dataset)) # print('') # ------ probability = np.concatenate(valid_probability) mask = np.concatenate(valid_mask) # print('\n1', timer() - start_timer) loss = np_binary_cross_entropy_loss(probability, mask) # print(timer() - start_timer) # print() # print(probability.shape) # print(type(probability), type(mask)) # _tmp = torch.from_numpy(probability) # # print() # # loss = lovasz_loss(torch.logit(torch.from_numpy(probability)), mask) # print('2', timer() - start_timer) dice = np_dice_score(probability, mask) # print('3', timer() - start_timer) tp, tn, _, _ = np_accuracy(probability, mask, all_metrics=False) return [dice, loss, tp, tn]
def message(mode='print'): if iteration % iter_valid == 0 and iteration > 0: iter_save = True if mode == 'print': asterisk = ' ' loss = batch_loss if mode == 'log': asterisk = '*' if iter_save else ' ' loss = train_loss text = \ '%0.5f %5.2f%s %4.2f | ' % (rate, iteration / 1000, asterisk, epoch,) + \ '%4.3f %4.3f %4.3f %4.3f | ' % (*valid_loss,) + \ '%4.3f %4.3f | ' % (*loss,) + \ '%s' % (time_to_str(timer() - start_timer, 'min')) return text
def submit(sha, server, iterations, fold, scale, flip_predict, checkpoint_sha, layer1, backbone): project_repo, raw_data_dir, data_dir = get_data_path(SERVER_RUN) print("*** starts inference ***") if SERVER_RUN == 'kaggle': out_dir = f'../input/hubmap-checkpoints/checkpoint_{checkpoint_sha}' result_dir = '/kaggle/working/' else: out_dir = project_repo + f"/result/Layer_2/fold{'_'.join(map(str, fold))}" result_dir = out_dir # -------------------------------------------------------------- # Verifie le sha1 du modèle à utiliser pour faire l'inférence # Le commit courant est utilisé si non spécifié # -------------------------------------------------------------- if checkpoint_sha is not None or SERVER_RUN == 'kaggle': _sha = checkpoint_sha else: _sha = sha if _sha is not None: _checkpoint_dir = out_dir + f"/checkpoint_{_sha}/" print("Checkpoint for current inference:", _sha) print(os.listdir(_checkpoint_dir)) # -------------------------------------------------------------- # Verifie les checkpoints à utiliser pour l'inférence: # - 'all' # - 'topN' avec N entier # - INTEGER (= nb iterations) # -------------------------------------------------------------- if isinstance(iterations, list): iter_tag = 'custom' initial_checkpoint = iterations elif iterations == 'all': iter_tag = 'all' model_checkpoints = [_file for _file in os.listdir(_checkpoint_dir)] initial_checkpoint = [ out_dir + f'/checkpoint_{_sha}/{model_checkpoint}' for model_checkpoint in model_checkpoints ] elif 'top' in iterations: nbest = int(iterations.strip('top')) iter_tag = f'top{nbest}' model_checkpoints = [_file for _file in os.listdir(_checkpoint_dir)] scores = [ float(_file.split('_')[1]) for _file in os.listdir(_checkpoint_dir) ] ordered_models = list(zip(model_checkpoints, scores)) ordered_models.sort(key=lambda x: x[1], reverse=True) model_checkpoints = np.array(ordered_models[:nbest])[:, 0] model_checkpoints = model_checkpoints.tolist() initial_checkpoint = [ out_dir + f'/checkpoint_{_sha}/{model_checkpoint}' for model_checkpoint in model_checkpoints ] else: iter_tag = f"{int(iterations):08}" [model_checkpoint] = [ _file for _file in os.listdir(_checkpoint_dir) if iter_tag in _file.split('_')[0] ] initial_checkpoint = [ out_dir + f'/checkpoint_{_sha}/{model_checkpoint}' ] print("checkpoint(s):", initial_checkpoint) print(f"submit with server={server}") # ------------------------------------------------------ # Get checkpoint of the model used to make predictions # ------------------------------------------------------ if SERVER_RUN == 'kaggle': submit_dir = result_dir else: if checkpoint_sha is None: tag = '' else: tag = checkpoint_sha + '-' if iterations == 'all': submit_dir = result_dir + f'/predictions_{sha}/%s-%s-%smean' % ( server, 'all', tag) elif flip_predict: submit_dir = result_dir + f'/predictions_{sha}/%s-%s-%smean' % ( server, iter_tag, tag) else: submit_dir = result_dir + f'/predictions_{sha}/%s-%s-%snoflip' % ( server, iter_tag, tag) os.makedirs(submit_dir, exist_ok=True) log = Logger() log.open(result_dir + f'/log.submit_{sha}.txt', mode='a') log.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64)) ########################################################################################## # Get the IDs of the images -------------------------------------------------------------- ########################################################################################## if SERVER_RUN == 'kaggle': df_submit = pd.read_csv( '../input/hubmap-kidney-segmentation/sample_submission.csv', index_col='id') valid_image_id = df_submit.index.tolist() elif server == 'local': valid_image_id = make_image_id('train-all') elif server == 'kaggle': valid_image_id = make_image_id('test-all') ########################################################################################## # Define prediction parameters ----------------------------------------------------------- ########################################################################################## tile_size = int(256 * 4) tile_average_step = 320 # tile_scale = 0.25 tile_min_score = 0.25 log.write('tile_size = %d \n' % tile_size) log.write('tile_average_step = %d \n' % tile_average_step) log.write('tile_scale = %f \n' % scale) log.write('tile_min_score = %f \n' % tile_min_score) log.write('\n') ################################## # Starts iterating over images ################################## predicted = [] df = pd.DataFrame() full_size = {} start_timer = timer() # effective_ids = [] for ind, id in enumerate(valid_image_id): # if ind != 5: continue # test d'usage de RAM # if ind != 0: continue # if id != '26dc41664': continue # effective_ids.append(id) log.write(50 * "=" + "\n") log.write(f"Inference for image: {id} \n") ############### # Define tiles ############### tiles = TileGenerator(image_id=id, raw_data_dir=raw_data_dir, size=tile_size, scale=scale, layer1_path=layer1, server=server) print(30 * '-') height = tiles.height width = tiles.width print(f"tile matrix shape (without scaling): {height} x {width}") tile_probability = [] results = [] ############################################## ### Iterate on sub-images with scaled sizes ############################################## for index, tile in enumerate(tiles.get_next()): if SERVER_RUN != 'kaggle': print('\r %s: n°%d %s' % (ind, index, time_to_str(timer() - start_timer, 'sec')), end='', flush=True) elif index % 50 == 0: print('\r %s: n°%d %s' % (ind, index, time_to_str(timer() - start_timer, 'sec')), end='', flush=True) ####################################### # Iterates over models. # The predictions are then averaged. ####################################### overall_probabilities = [] for _num, _checkpoint in enumerate(initial_checkpoint): net = Net(backbone).cuda() state_dict = torch.load( _checkpoint, map_location=lambda storage, loc: storage)['state_dict'] net.load_state_dict(state_dict, strict=True) net = net.eval() image_probability = get_probas(net, tile['tile_image'], flip_predict) _cut = 0 if _cut > 0: _border_cut = image_probability[_cut:-_cut, _cut:-_cut] else: _border_cut = image_probability effective_tile_size = _border_cut.shape[0] overall_probabilities.append(_border_cut) ################################################################ # Sauvegarde + visualisation de l'image courante ################################################################ last_iter = _num == len(initial_checkpoint) - 1 if SERVER_RUN == 'local': # print("\n image shape:", tile['tile_image'].shape) if server == 'local': if _cut > 0: _mask = tile['tile_mask'][_cut:-_cut, _cut:-_cut] else: _mask = tile['tile_mask'] else: _mask = None if _cut > 0: _image = tile['tile_image'][:, _cut:-_cut, _cut:-_cut] else: _image = tile['tile_image'] # print("\n image shape:", _image.shape) image_name, x0, y0, dice, tp, tn, fp, fn = result_bookeeping( id, _border_cut, overall_probabilities, _mask, _image, tile['centroids'], server, submit_dir, save_to_disk=last_iter) if last_iter: results.append( [id, image_name, x0, y0, dice, tp, tn, fp, fn]) _probas = np.mean(overall_probabilities, axis=0) tile_probability.append(_probas.astype(np.float32)) del overall_probabilities, _probas del net, state_dict, image_probability gc.collect() ############################################################################### # Concatène les sous images et recrée une image conforme à la taille initiale # Lors de la concaténation, les pixels sont pondérés / à la distance au centre # de l'image ############################################################################### scaled_centroid_list = (np.array(tiles.centroid_list) * scale).astype( np.int).tolist() probability = to_mask( tile_probability, # N * scaled_height x scaled_width scaled_centroid_list, int(scale * height), int(scale * width), scale, effective_tile_size, tile_average_step, tile_min_score, aggregate='mean') # print(probability.shape) # sys.exit() # ------------------------------------------------- # Saves the numpy array that contains probabilities # np.savez_compressed(submit_dir + f'/proba_{id}.npy', probability=probability) # --- show results --- if server == 'local': truth = tiles.original_mask.astype(np.float32) / 255 # print("before rescaling", truth.shape) truth = cv2.resize(truth, dsize=(int(scale * truth.shape[1]), int(scale * truth.shape[0])), interpolation=cv2.INTER_LINEAR) loss = np_binary_cross_entropy_loss_optimized(probability, truth) dice = np_dice_score_optimized(probability, truth) tp, tn = np_accuracy_optimized(probability, truth) # tp, tn, fp, fn = np_accuracy(probability, truth) # print(dice, tp, tn) _tmp = pd.DataFrame(results) _tmp.columns = [ 'id', 'image_name', 'x', 'y', 'dice', 'tp', 'tn', 'fp', 'fn' ] _tmp.to_csv(submit_dir + f'/{id}.csv') log.write(30 * "-" + '\n') log.write('submit_dir = %s \n' % submit_dir) log.write('initial_checkpoint = %s \n' % [c.split('2020-12-11')[-1] for c in initial_checkpoint]) log.write('loss = %0.8f \n' % loss) log.write('dice = %0.8f \n' % dice) log.write('tp, tn = %0.8f, %0.8f \n' % (tp, tn)) log.write('\n') elif server == 'kaggle': print('starts predict mask creation') if SERVER_RUN == 'kaggle': scaled_width = probability.shape[1] scaled_height = probability.shape[0] full_size[id] = (width, height, scaled_width, scaled_height) else: probability = cv2.resize(probability, dsize=(width, height), interpolation=cv2.INTER_LINEAR) predict = (probability > 0.5).astype(bool) print("predict array created") print('predict array shape:', predict.shape) del probability gc.collect() p = rle_encode_batched(predict) predicted.append(p) print("encoding created") del predict gc.collect() # ----- if server == 'kaggle': df['id'] = valid_image_id df['predicted'] = predicted if SERVER_RUN == 'kaggle': csv_file = 'submission_layer2.csv' else: csv_file = submit_dir + f'/submission_{sha}-%s-%s%s.csv' % ( out_dir.split('/')[-1], tag, iter_tag) df.to_csv(csv_file, index=False) print(df) return full_size
def submit(sha, server, iterations, fold, scale, flip_predict, checkpoint_sha, backbone, proba_threshold): project_repo, raw_data_dir, data_dir = get_data_path(SERVER_RUN) if SERVER_RUN == 'kaggle': out_dir = f'../input/hubmap-checkpoints/checkpoint_{checkpoint_sha}/' result_dir = '/kaggle/working/' else: out_dir = project_repo + f"/result/Layer_1/fold{'_'.join(map(str, fold))}" result_dir = out_dir # -------------------------------------------------------------- # Verifie le sha1 du modèle à utiliser pour faire l'inférence # Le commit courant est utilisé si non spécifié # -------------------------------------------------------------- if checkpoint_sha is not None or SERVER_RUN == 'kaggle': _sha = checkpoint_sha else: _sha = sha if _sha is not None: _checkpoint_dir = out_dir + f"/checkpoint_{_sha}/" print("Checkpoint for current inference:", _sha) print(os.listdir(_checkpoint_dir)) # -------------------------------------------------------------- # Verifie les checkpoints à utiliser pour l'inférence: # - 'all' # - 'topN' avec N entier # - INTEGER (= nb iterations) # -------------------------------------------------------------- if isinstance(iterations, list): iter_tag = 'custom' initial_checkpoint = iterations elif iterations == 'all': iter_tag = 'all' model_checkpoints = [_file for _file in os.listdir(_checkpoint_dir)] initial_checkpoint = [ out_dir + f'/checkpoint_{_sha}/{model_checkpoint}' for model_checkpoint in model_checkpoints ] elif 'top' in iterations: nbest = int(iterations.strip('top')) iter_tag = f'top{nbest}' model_checkpoints = [_file for _file in os.listdir(_checkpoint_dir)] scores = [ float(_file.split('_')[1]) for _file in os.listdir(_checkpoint_dir) ] ordered_models = list(zip(model_checkpoints, scores)) ordered_models.sort(key=lambda x: x[1], reverse=True) model_checkpoints = np.array(ordered_models[:nbest])[:, 0] model_checkpoints = model_checkpoints.tolist() initial_checkpoint = [ out_dir + f'/checkpoint_{_sha}/{model_checkpoint}' for model_checkpoint in model_checkpoints ] else: iter_tag = f"{int(iterations):08}" [model_checkpoint] = [ _file for _file in os.listdir(_checkpoint_dir) if iter_tag in _file.split('_')[0] ] initial_checkpoint = [ out_dir + f'/checkpoint_{_sha}/{model_checkpoint}' ] print("checkpoint(s):", initial_checkpoint) print(f"submit with server={server}") # ------------------------------------------------------ # Get checkpoint of the model used to make predictions # ------------------------------------------------------ if SERVER_RUN == 'kaggle': submit_dir = result_dir else: if checkpoint_sha is None: tag = '' else: tag = checkpoint_sha + '-' if iterations == 'all': submit_dir = result_dir + f'/predictions_{sha}/%s-%s-%smax' % ( server, 'all', tag) elif flip_predict: submit_dir = result_dir + f'/predictions_{sha}/%s-%s-%smax' % ( server, iter_tag, tag) else: submit_dir = result_dir + f'/predictions_{sha}/%s-%s-%snoflip' % ( server, iter_tag, tag) os.makedirs(submit_dir, exist_ok=True) log = Logger() log.open(result_dir + f'/log.submit_{sha}.txt', mode='a') log.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64)) ########################################################################################## # Get the IDs of the images -------------------------------------------------------------- ########################################################################################## if SERVER_RUN == 'kaggle': df_submit = pd.read_csv( '../input/hubmap-kidney-segmentation/sample_submission.csv', index_col='id') valid_image_id = df_submit.index.tolist() elif server == 'local': valid_image_id = make_image_id('train-all') elif server == 'kaggle': valid_image_id = make_image_id('test-all') ########################################################################################## # Define prediction parameters ----------------------------------------------------------- ########################################################################################## tile_size = 256 * 3 # taille dans le système scalé tile_average_step = 320 tile_min_score = 0.25 log.write('tile_size = %d \n' % tile_size) log.write('tile_average_step = %d \n' % tile_average_step) log.write('tile_scale = %f \n' % scale) log.write('tile_min_score = %f \n' % tile_min_score) log.write('\n') ################################## # Starts iterating over images ################################## predicted = [] df = pd.DataFrame() start_timer = timer() for ind, id in enumerate(valid_image_id): log.write(50 * "=" + "\n") log.write(f"Inference for image: {id} \n") # if id != '2ec3f1bb9': continue ############### # Define tiles ############### tiles = TileGenerator(image_id=id, raw_data_dir=raw_data_dir, size=tile_size, scale=scale, server=server) print(30 * '-') height = tiles.height width = tiles.width print(f"tile matrix shape (without scaling): {height} x {width}") tile_probability = [] results = [] ############################################## ### Iterate on sub-images with scaled sizes ############################################## for index, tile in enumerate(tiles.get_next()): # x0, y0 = tile['centroids'][:2] # if y0 != 9381 or x0 != 21384: continue if SERVER_RUN != 'kaggle': print('\r %s: n°%d %s' % (ind, index, time_to_str(timer() - start_timer, 'sec')), end='', flush=True) elif index % 100 == 0: print('\r %s: n°%d %s' % (ind, index, time_to_str(timer() - start_timer, 'sec')), end='', flush=True) h, s, v = tile['hsv'] condition = s > 0.05 if s < 0.05: # print(f"image removed, saturation: {s}") tile_probability.append(np.zeros((tile_size, tile_size))) continue ####################################### # Iterates over models. # The predictions are then averaged. ####################################### overall_probabilities = [] for _num, _checkpoint in enumerate(initial_checkpoint): net = Net(backbone).cuda() state_dict = torch.load( _checkpoint, map_location=lambda storage, loc: storage)['state_dict'] net.load_state_dict(state_dict, strict=True) net = net.eval() image_probability = get_probas(net, tile['tile_image'], flip_predict) overall_probabilities.append(image_probability) ################################################################ # Sauvegarde + visualisation de l'image courante ################################################################ last_iter = _num == len(initial_checkpoint) - 1 if SERVER_RUN == 'local': # print('\n', index, tiles.centroid_list[index], tile['hsv'], '\n') # h, s, v = tile['hsv'] # condition = s > 0.05 # print(f"kept: {condition}") image_name, x0, y0, dice = result_bookeeping( id, image_probability, overall_probabilities, tile['tile_mask'], tile['tile_image'], tile['centroids'], server, submit_dir, save_to_disk=last_iter, resize_scale=800 / tile_size) if last_iter: results.append([id, image_name, x0, y0, dice]) _probas = np.max(overall_probabilities, axis=0) # print() # print(_probas.min(), _probas.max()) # print(_probas) # # sys.exit() tile_probability.append(_probas.astype(np.float32)) del overall_probabilities, _probas del net, state_dict, image_probability gc.collect() ############################################################################### # Concatène les sous images et recrée une image conforme à la taille initiale # Lors de la concaténation, les pixels sont pondérés / à la distance au centre # de l'image ############################################################################### scaled_centroid_list = (np.array(tiles.centroid_list) * scale).astype( np.int).tolist() probability = to_mask( tile_probability, # N * scaled_height x scaled_width scaled_centroid_list, int(scale * height), int(scale * width), scale, tile_size, tile_average_step, tile_min_score, aggregate='max') # print(probability.min()) # print(probability.max()) # sys.exit() predict = (probability > proba_threshold).astype(np.uint8) cv2.imwrite(submit_dir + '/%s.probability.png' % id, (probability * 255).astype(np.uint8)) cv2.imwrite(submit_dir + '/%s.predict.png' % id, (predict * 255)) del predict, probability gc.collect()