Ejemplo n.º 1
0
def get_probas(id, net, tile_image, tile, flip_predict, start_timer, log,
               tile_size, tile_average_step, tile_scale, tile_min_score):

    tile_probability = []
    batch = np.array_split(tile_image, len(tile_image) // 4)

    for t, m in enumerate(batch):
        print('\r %s  %d / %d   %s' %
              (id, t, len(batch), time_to_str(timer() - start_timer, 'sec')),
              end='',
              flush=True)
        m = torch.from_numpy(m).cuda()

        p = []
        with torch.no_grad():
            # inference sur l'image de base
            logit = data_parallel(net, m)
            p.append(torch.sigmoid(logit))

            if flip_predict:  # inference sur les images inversées / axes x et y
                for _dim in [(2, ), (3, ), (2, 3)]:
                    _logit = data_parallel(net, m.flip(dims=_dim))
                    p.append(_logit.flip(dims=_dim))

        p = torch.stack(p).mean(0)
        tile_probability.append(p.data.cpu().numpy())

    print('\r', end='', flush=True)
    log.write('%s  %d / %d   %s\n' %
              (id, t, len(batch), time_to_str(timer() - start_timer, 'sec')))

    # before squeeze, dimension = N_tiles x 1 x tile_x x tile_y
    tile_probability = np.concatenate(tile_probability).squeeze(
        1)  # N_tiles x tile_x x tile_y
    height, width = tile['image_small'].shape[:2]
    probability = to_mask(
        tile_probability,  # height x width
        tile['coord'],
        height,
        width,
        tile_scale,
        tile_size,
        tile_average_step,
        tile_min_score,
        aggregate='mean')
    return probability
Ejemplo n.º 2
0
def do_valid(net, valid_loader):
    valid_num = 0
    valid_probability = []
    valid_mask = []

    net = net.eval()
    start_timer = timer()
    with torch.no_grad():
        for t, batch in enumerate(valid_loader):
            batch_size = len(batch['index'])
            mask = batch['mask']
            image = batch['image'].cuda()

            logit = data_parallel(net, image)  # net(input)#
            probability = torch.sigmoid(logit)

            valid_probability.append(probability.data.cpu().numpy())
            valid_mask.append(mask.data.cpu().numpy())
            valid_num += batch_size

            # ---
            print('\r %8d / %d  %s' %
                  (valid_num, len(valid_loader.dataset),
                   time_to_str(timer() - start_timer, 'sec')),
                  end='',
                  flush=True)
            # if valid_num==200*4: break

    assert (valid_num == len(valid_loader.dataset))
    # print('')
    # ------
    probability = np.concatenate(valid_probability)
    mask = np.concatenate(valid_mask)

    # print('\n1', timer() - start_timer)
    loss = np_binary_cross_entropy_loss(probability, mask)
    # print(timer() - start_timer)

    # print()
    # print(probability.shape)
    # print(type(probability), type(mask))
    # _tmp = torch.from_numpy(probability)
    #
    # print()
    #
    # loss = lovasz_loss(torch.logit(torch.from_numpy(probability)), mask)

    # print('2', timer() - start_timer)
    dice = np_dice_score(probability, mask)

    # print('3', timer() - start_timer)
    tp, tn, _, _ = np_accuracy(probability, mask, all_metrics=False)
    return [dice, loss, tp, tn]
Ejemplo n.º 3
0
    def message(mode='print'):
        if iteration % iter_valid == 0 and iteration > 0:
            iter_save = True
        if mode == 'print':
            asterisk = ' '
            loss = batch_loss
        if mode == 'log':
            asterisk = '*' if iter_save else ' '
            loss = train_loss

        text = \
            '%0.5f  %5.2f%s %4.2f | ' % (rate, iteration / 1000, asterisk, epoch,) + \
            '%4.3f  %4.3f  %4.3f  %4.3f  | ' % (*valid_loss,) + \
            '%4.3f  %4.3f   | ' % (*loss,) + \
            '%s' % (time_to_str(timer() - start_timer, 'min'))

        return text
Ejemplo n.º 4
0
def submit(sha, server, iterations, fold, scale, flip_predict, checkpoint_sha,
           layer1, backbone):
    project_repo, raw_data_dir, data_dir = get_data_path(SERVER_RUN)

    print("*** starts inference ***")

    if SERVER_RUN == 'kaggle':
        out_dir = f'../input/hubmap-checkpoints/checkpoint_{checkpoint_sha}'
        result_dir = '/kaggle/working/'
    else:
        out_dir = project_repo + f"/result/Layer_2/fold{'_'.join(map(str, fold))}"
        result_dir = out_dir

    # --------------------------------------------------------------
    # Verifie le sha1 du modèle à utiliser pour faire l'inférence
    # Le commit courant est utilisé si non spécifié
    # --------------------------------------------------------------
    if checkpoint_sha is not None or SERVER_RUN == 'kaggle':
        _sha = checkpoint_sha
    else:
        _sha = sha

    if _sha is not None:
        _checkpoint_dir = out_dir + f"/checkpoint_{_sha}/"
        print("Checkpoint for current inference:", _sha)
        print(os.listdir(_checkpoint_dir))

    # --------------------------------------------------------------
    # Verifie les checkpoints à utiliser pour l'inférence:
    # - 'all'
    # - 'topN' avec N entier
    # - INTEGER (= nb iterations)
    # --------------------------------------------------------------

    if isinstance(iterations, list):
        iter_tag = 'custom'
        initial_checkpoint = iterations

    elif iterations == 'all':
        iter_tag = 'all'
        model_checkpoints = [_file for _file in os.listdir(_checkpoint_dir)]
        initial_checkpoint = [
            out_dir + f'/checkpoint_{_sha}/{model_checkpoint}'
            for model_checkpoint in model_checkpoints
        ]

    elif 'top' in iterations:
        nbest = int(iterations.strip('top'))

        iter_tag = f'top{nbest}'
        model_checkpoints = [_file for _file in os.listdir(_checkpoint_dir)]
        scores = [
            float(_file.split('_')[1]) for _file in os.listdir(_checkpoint_dir)
        ]

        ordered_models = list(zip(model_checkpoints, scores))
        ordered_models.sort(key=lambda x: x[1], reverse=True)
        model_checkpoints = np.array(ordered_models[:nbest])[:, 0]
        model_checkpoints = model_checkpoints.tolist()

        initial_checkpoint = [
            out_dir + f'/checkpoint_{_sha}/{model_checkpoint}'
            for model_checkpoint in model_checkpoints
        ]

    else:
        iter_tag = f"{int(iterations):08}"
        [model_checkpoint] = [
            _file for _file in os.listdir(_checkpoint_dir)
            if iter_tag in _file.split('_')[0]
        ]
        initial_checkpoint = [
            out_dir + f'/checkpoint_{_sha}/{model_checkpoint}'
        ]

    print("checkpoint(s):", initial_checkpoint)
    print(f"submit with server={server}")

    # ------------------------------------------------------
    # Get checkpoint of the model used to make predictions
    # ------------------------------------------------------
    if SERVER_RUN == 'kaggle':
        submit_dir = result_dir
    else:
        if checkpoint_sha is None:
            tag = ''
        else:
            tag = checkpoint_sha + '-'

        if iterations == 'all':
            submit_dir = result_dir + f'/predictions_{sha}/%s-%s-%smean' % (
                server, 'all', tag)
        elif flip_predict:
            submit_dir = result_dir + f'/predictions_{sha}/%s-%s-%smean' % (
                server, iter_tag, tag)
        else:
            submit_dir = result_dir + f'/predictions_{sha}/%s-%s-%snoflip' % (
                server, iter_tag, tag)

    os.makedirs(submit_dir, exist_ok=True)

    log = Logger()
    log.open(result_dir + f'/log.submit_{sha}.txt', mode='a')
    log.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64))

    ##########################################################################################
    # Get the IDs of the images --------------------------------------------------------------
    ##########################################################################################
    if SERVER_RUN == 'kaggle':
        df_submit = pd.read_csv(
            '../input/hubmap-kidney-segmentation/sample_submission.csv',
            index_col='id')
        valid_image_id = df_submit.index.tolist()
    elif server == 'local':
        valid_image_id = make_image_id('train-all')
    elif server == 'kaggle':
        valid_image_id = make_image_id('test-all')

    ##########################################################################################
    # Define prediction parameters -----------------------------------------------------------
    ##########################################################################################
    tile_size = int(256 * 4)
    tile_average_step = 320
    # tile_scale = 0.25
    tile_min_score = 0.25

    log.write('tile_size = %d \n' % tile_size)
    log.write('tile_average_step = %d \n' % tile_average_step)
    log.write('tile_scale = %f \n' % scale)
    log.write('tile_min_score = %f \n' % tile_min_score)
    log.write('\n')

    ##################################
    # Starts iterating over images
    ##################################
    predicted = []
    df = pd.DataFrame()
    full_size = {}
    start_timer = timer()

    #     effective_ids = []
    for ind, id in enumerate(valid_image_id):

        # if ind != 5: continue   # test d'usage de RAM
        # if ind != 0: continue

        # if id != '26dc41664': continue

        #         effective_ids.append(id)

        log.write(50 * "=" + "\n")
        log.write(f"Inference for image: {id} \n")

        ###############
        # Define tiles
        ###############

        tiles = TileGenerator(image_id=id,
                              raw_data_dir=raw_data_dir,
                              size=tile_size,
                              scale=scale,
                              layer1_path=layer1,
                              server=server)

        print(30 * '-')
        height = tiles.height
        width = tiles.width
        print(f"tile matrix shape (without scaling): {height} x {width}")

        tile_probability = []
        results = []
        ##############################################
        ### Iterate on sub-images with scaled sizes
        ##############################################
        for index, tile in enumerate(tiles.get_next()):

            if SERVER_RUN != 'kaggle':
                print('\r %s: n°%d %s' %
                      (ind, index, time_to_str(timer() - start_timer, 'sec')),
                      end='',
                      flush=True)
            elif index % 50 == 0:
                print('\r %s: n°%d %s' %
                      (ind, index, time_to_str(timer() - start_timer, 'sec')),
                      end='',
                      flush=True)

            #######################################
            # Iterates over models.
            # The predictions are then averaged.
            #######################################
            overall_probabilities = []
            for _num, _checkpoint in enumerate(initial_checkpoint):
                net = Net(backbone).cuda()
                state_dict = torch.load(
                    _checkpoint,
                    map_location=lambda storage, loc: storage)['state_dict']
                net.load_state_dict(state_dict, strict=True)
                net = net.eval()
                image_probability = get_probas(net, tile['tile_image'],
                                               flip_predict)

                _cut = 0
                if _cut > 0:
                    _border_cut = image_probability[_cut:-_cut, _cut:-_cut]
                else:
                    _border_cut = image_probability

                effective_tile_size = _border_cut.shape[0]
                overall_probabilities.append(_border_cut)

                ################################################################
                # Sauvegarde + visualisation de l'image courante
                ################################################################
                last_iter = _num == len(initial_checkpoint) - 1

                if SERVER_RUN == 'local':

                    # print("\n image shape:", tile['tile_image'].shape)

                    if server == 'local':
                        if _cut > 0:
                            _mask = tile['tile_mask'][_cut:-_cut, _cut:-_cut]
                        else:
                            _mask = tile['tile_mask']
                    else:
                        _mask = None

                    if _cut > 0:
                        _image = tile['tile_image'][:, _cut:-_cut, _cut:-_cut]
                    else:
                        _image = tile['tile_image']

                    # print("\n image shape:", _image.shape)

                    image_name, x0, y0, dice, tp, tn, fp, fn = result_bookeeping(
                        id,
                        _border_cut,
                        overall_probabilities,
                        _mask,
                        _image,
                        tile['centroids'],
                        server,
                        submit_dir,
                        save_to_disk=last_iter)
                    if last_iter:
                        results.append(
                            [id, image_name, x0, y0, dice, tp, tn, fp, fn])

            _probas = np.mean(overall_probabilities, axis=0)
            tile_probability.append(_probas.astype(np.float32))
            del overall_probabilities, _probas
            del net, state_dict, image_probability
            gc.collect()

        ###############################################################################
        # Concatène les sous images et recrée une image conforme à la taille initiale
        # Lors de la concaténation, les pixels sont pondérés / à la distance au centre
        # de l'image
        ###############################################################################

        scaled_centroid_list = (np.array(tiles.centroid_list) * scale).astype(
            np.int).tolist()

        probability = to_mask(
            tile_probability,  # N * scaled_height x scaled_width
            scaled_centroid_list,
            int(scale * height),
            int(scale * width),
            scale,
            effective_tile_size,
            tile_average_step,
            tile_min_score,
            aggregate='mean')

        # print(probability.shape)
        # sys.exit()

        # -------------------------------------------------
        # Saves the numpy array that contains probabilities
        # np.savez_compressed(submit_dir + f'/proba_{id}.npy', probability=probability)

        # --- show results ---
        if server == 'local':
            truth = tiles.original_mask.astype(np.float32) / 255
            # print("before rescaling", truth.shape)

            truth = cv2.resize(truth,
                               dsize=(int(scale * truth.shape[1]),
                                      int(scale * truth.shape[0])),
                               interpolation=cv2.INTER_LINEAR)

            loss = np_binary_cross_entropy_loss_optimized(probability, truth)
            dice = np_dice_score_optimized(probability, truth)
            tp, tn = np_accuracy_optimized(probability, truth)
            # tp, tn, fp, fn = np_accuracy(probability, truth)
            # print(dice, tp, tn)

            _tmp = pd.DataFrame(results)
            _tmp.columns = [
                'id', 'image_name', 'x', 'y', 'dice', 'tp', 'tn', 'fp', 'fn'
            ]
            _tmp.to_csv(submit_dir + f'/{id}.csv')

            log.write(30 * "-" + '\n')
            log.write('submit_dir = %s \n' % submit_dir)
            log.write('initial_checkpoint = %s \n' %
                      [c.split('2020-12-11')[-1] for c in initial_checkpoint])
            log.write('loss   = %0.8f \n' % loss)
            log.write('dice   = %0.8f \n' % dice)
            log.write('tp, tn = %0.8f, %0.8f \n' % (tp, tn))
            log.write('\n')

        elif server == 'kaggle':
            print('starts predict mask creation')

            if SERVER_RUN == 'kaggle':
                scaled_width = probability.shape[1]
                scaled_height = probability.shape[0]
                full_size[id] = (width, height, scaled_width, scaled_height)
            else:
                probability = cv2.resize(probability,
                                         dsize=(width, height),
                                         interpolation=cv2.INTER_LINEAR)

            predict = (probability > 0.5).astype(bool)

            print("predict array created")
            print('predict array shape:', predict.shape)

            del probability
            gc.collect()
            p = rle_encode_batched(predict)
            predicted.append(p)
            print("encoding created")

            del predict
            gc.collect()

    # -----
    if server == 'kaggle':
        df['id'] = valid_image_id
        df['predicted'] = predicted
        if SERVER_RUN == 'kaggle':
            csv_file = 'submission_layer2.csv'
        else:
            csv_file = submit_dir + f'/submission_{sha}-%s-%s%s.csv' % (
                out_dir.split('/')[-1], tag, iter_tag)

        df.to_csv(csv_file, index=False)
        print(df)

    return full_size
Ejemplo n.º 5
0
def submit(sha, server, iterations, fold, scale, flip_predict, checkpoint_sha,
           backbone, proba_threshold):
    project_repo, raw_data_dir, data_dir = get_data_path(SERVER_RUN)

    if SERVER_RUN == 'kaggle':
        out_dir = f'../input/hubmap-checkpoints/checkpoint_{checkpoint_sha}/'
        result_dir = '/kaggle/working/'
    else:
        out_dir = project_repo + f"/result/Layer_1/fold{'_'.join(map(str, fold))}"
        result_dir = out_dir

    # --------------------------------------------------------------
    # Verifie le sha1 du modèle à utiliser pour faire l'inférence
    # Le commit courant est utilisé si non spécifié
    # --------------------------------------------------------------
    if checkpoint_sha is not None or SERVER_RUN == 'kaggle':
        _sha = checkpoint_sha
    else:
        _sha = sha

    if _sha is not None:
        _checkpoint_dir = out_dir + f"/checkpoint_{_sha}/"
        print("Checkpoint for current inference:", _sha)
        print(os.listdir(_checkpoint_dir))

    # --------------------------------------------------------------
    # Verifie les checkpoints à utiliser pour l'inférence:
    # - 'all'
    # - 'topN' avec N entier
    # - INTEGER (= nb iterations)
    # --------------------------------------------------------------

    if isinstance(iterations, list):
        iter_tag = 'custom'
        initial_checkpoint = iterations

    elif iterations == 'all':
        iter_tag = 'all'
        model_checkpoints = [_file for _file in os.listdir(_checkpoint_dir)]
        initial_checkpoint = [
            out_dir + f'/checkpoint_{_sha}/{model_checkpoint}'
            for model_checkpoint in model_checkpoints
        ]

    elif 'top' in iterations:
        nbest = int(iterations.strip('top'))

        iter_tag = f'top{nbest}'
        model_checkpoints = [_file for _file in os.listdir(_checkpoint_dir)]
        scores = [
            float(_file.split('_')[1]) for _file in os.listdir(_checkpoint_dir)
        ]

        ordered_models = list(zip(model_checkpoints, scores))
        ordered_models.sort(key=lambda x: x[1], reverse=True)
        model_checkpoints = np.array(ordered_models[:nbest])[:, 0]
        model_checkpoints = model_checkpoints.tolist()

        initial_checkpoint = [
            out_dir + f'/checkpoint_{_sha}/{model_checkpoint}'
            for model_checkpoint in model_checkpoints
        ]

    else:
        iter_tag = f"{int(iterations):08}"
        [model_checkpoint] = [
            _file for _file in os.listdir(_checkpoint_dir)
            if iter_tag in _file.split('_')[0]
        ]
        initial_checkpoint = [
            out_dir + f'/checkpoint_{_sha}/{model_checkpoint}'
        ]

    print("checkpoint(s):", initial_checkpoint)
    print(f"submit with server={server}")

    # ------------------------------------------------------
    # Get checkpoint of the model used to make predictions
    # ------------------------------------------------------
    if SERVER_RUN == 'kaggle':
        submit_dir = result_dir
    else:
        if checkpoint_sha is None:
            tag = ''
        else:
            tag = checkpoint_sha + '-'

        if iterations == 'all':
            submit_dir = result_dir + f'/predictions_{sha}/%s-%s-%smax' % (
                server, 'all', tag)
        elif flip_predict:
            submit_dir = result_dir + f'/predictions_{sha}/%s-%s-%smax' % (
                server, iter_tag, tag)
        else:
            submit_dir = result_dir + f'/predictions_{sha}/%s-%s-%snoflip' % (
                server, iter_tag, tag)
    os.makedirs(submit_dir, exist_ok=True)

    log = Logger()
    log.open(result_dir + f'/log.submit_{sha}.txt', mode='a')
    log.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64))

    ##########################################################################################
    # Get the IDs of the images --------------------------------------------------------------
    ##########################################################################################
    if SERVER_RUN == 'kaggle':
        df_submit = pd.read_csv(
            '../input/hubmap-kidney-segmentation/sample_submission.csv',
            index_col='id')
        valid_image_id = df_submit.index.tolist()
    elif server == 'local':
        valid_image_id = make_image_id('train-all')
    elif server == 'kaggle':
        valid_image_id = make_image_id('test-all')

    ##########################################################################################
    # Define prediction parameters -----------------------------------------------------------
    ##########################################################################################
    tile_size = 256 * 3  # taille dans le système scalé
    tile_average_step = 320
    tile_min_score = 0.25

    log.write('tile_size = %d \n' % tile_size)
    log.write('tile_average_step = %d \n' % tile_average_step)
    log.write('tile_scale = %f \n' % scale)
    log.write('tile_min_score = %f \n' % tile_min_score)
    log.write('\n')

    ##################################
    # Starts iterating over images
    ##################################
    predicted = []
    df = pd.DataFrame()
    start_timer = timer()

    for ind, id in enumerate(valid_image_id):

        log.write(50 * "=" + "\n")
        log.write(f"Inference for image: {id} \n")

        # if id != '2ec3f1bb9': continue

        ###############
        # Define tiles
        ###############
        tiles = TileGenerator(image_id=id,
                              raw_data_dir=raw_data_dir,
                              size=tile_size,
                              scale=scale,
                              server=server)
        print(30 * '-')
        height = tiles.height
        width = tiles.width
        print(f"tile matrix shape (without scaling): {height} x {width}")

        tile_probability = []
        results = []
        ##############################################
        ### Iterate on sub-images with scaled sizes
        ##############################################

        for index, tile in enumerate(tiles.get_next()):

            # x0, y0 = tile['centroids'][:2]
            # if y0 != 9381 or x0 != 21384: continue

            if SERVER_RUN != 'kaggle':
                print('\r %s: n°%d %s' %
                      (ind, index, time_to_str(timer() - start_timer, 'sec')),
                      end='',
                      flush=True)
            elif index % 100 == 0:
                print('\r %s: n°%d %s' %
                      (ind, index, time_to_str(timer() - start_timer, 'sec')),
                      end='',
                      flush=True)

            h, s, v = tile['hsv']
            condition = s > 0.05
            if s < 0.05:
                # print(f"image removed, saturation: {s}")
                tile_probability.append(np.zeros((tile_size, tile_size)))
                continue

            #######################################
            # Iterates over models.
            # The predictions are then averaged.
            #######################################
            overall_probabilities = []
            for _num, _checkpoint in enumerate(initial_checkpoint):
                net = Net(backbone).cuda()
                state_dict = torch.load(
                    _checkpoint,
                    map_location=lambda storage, loc: storage)['state_dict']
                net.load_state_dict(state_dict, strict=True)
                net = net.eval()
                image_probability = get_probas(net, tile['tile_image'],
                                               flip_predict)
                overall_probabilities.append(image_probability)

                ################################################################
                # Sauvegarde + visualisation de l'image courante
                ################################################################
                last_iter = _num == len(initial_checkpoint) - 1

                if SERVER_RUN == 'local':

                    # print('\n', index, tiles.centroid_list[index], tile['hsv'], '\n')
                    # h, s, v = tile['hsv']
                    # condition = s > 0.05
                    # print(f"kept: {condition}")
                    image_name, x0, y0, dice = result_bookeeping(
                        id,
                        image_probability,
                        overall_probabilities,
                        tile['tile_mask'],
                        tile['tile_image'],
                        tile['centroids'],
                        server,
                        submit_dir,
                        save_to_disk=last_iter,
                        resize_scale=800 / tile_size)
                    if last_iter:
                        results.append([id, image_name, x0, y0, dice])

            _probas = np.max(overall_probabilities, axis=0)

            # print()
            # print(_probas.min(), _probas.max())
            # print(_probas)
            #
            # sys.exit()

            tile_probability.append(_probas.astype(np.float32))
            del overall_probabilities, _probas
            del net, state_dict, image_probability
            gc.collect()

        ###############################################################################
        # Concatène les sous images et recrée une image conforme à la taille initiale
        # Lors de la concaténation, les pixels sont pondérés / à la distance au centre
        # de l'image
        ###############################################################################

        scaled_centroid_list = (np.array(tiles.centroid_list) * scale).astype(
            np.int).tolist()

        probability = to_mask(
            tile_probability,  # N * scaled_height x scaled_width
            scaled_centroid_list,
            int(scale * height),
            int(scale * width),
            scale,
            tile_size,
            tile_average_step,
            tile_min_score,
            aggregate='max')

        # print(probability.min())
        # print(probability.max())
        # sys.exit()

        predict = (probability > proba_threshold).astype(np.uint8)
        cv2.imwrite(submit_dir + '/%s.probability.png' % id,
                    (probability * 255).astype(np.uint8))
        cv2.imwrite(submit_dir + '/%s.predict.png' % id, (predict * 255))
        del predict, probability
        gc.collect()