def __prep_faiss_search_results(block_id=1):
    # 1 ~ 32

    dataset = loader.load_train_dataset()
    with timer('Loading train19 landmark dict'):
        landmark_dict = load_train19_landmark_dict()

    size_train = dataset.feats_train.shape[0]
    part_size = int(size_train / 32)
    idx_train_start = (block_id - 1) * part_size
    idx_train_end = (block_id) * part_size
    if block_id == 32:
        idx_train_end = size_train

    cpu_index = faiss.IndexFlatL2(dataset.feats_train.shape[1])
    gpu_index = faiss.index_cpu_to_all_gpus(cpu_index)
    gpu_index.add(dataset.feats_train)
    dists, topk_idx = gpu_index.search(
        x=dataset.feats_train[idx_train_start:idx_train_end], k=1000)

    df = pd.DataFrame(dataset.ids_train[idx_train_start:idx_train_end],
                      columns=['id'])
    df['images'] = np.apply_along_axis(' '.join,
                                       axis=1,
                                       arr=dataset.ids_train[topk_idx])

    print('generate sub')
    rows = []
    for imidx, (_, r) in tqdm.tqdm(enumerate(df.iterrows()), total=len(df)):
        landmark_id = landmark_dict[r['id']]
        same_landmark_images = []
        for rank, imid in enumerate(r.images.split(' ')):
            if landmark_id == landmark_dict[imid]:
                same_landmark_images.append(
                    f'{rank}:{dists[imidx, rank]:.8f}:{imid}')
                if len(same_landmark_images) >= 100:
                    break

        rows.append({
            'id': r['id'],
            'landmark_id': landmark_id,
            'matched': ' '.join(same_landmark_images),
        })

    fn = ('data/working/exp12/'
          f'train19_train19_faiss_search_same_landmarks_blk{block_id}.csv.gz')
    Path(fn).parent.mkdir(parents=True, exist_ok=True)

    print('to_csv')
    df = pd.DataFrame(rows).to_csv(fn, index=False, compression='gzip')
def __scoring_with_top100_arcfacefish_v4():
    dataset = loader.load_train_dataset()

    fn_out = 'data/working/exp12/v7_fish_dba2qe10.h5'
    if not Path(fn_out).exists():
        __search(dataset, fn_out, dba_niters=2, qe_topk=10)
    dataset = loader.load_train_dataset_singlefile(fn_out)

    with timer('Loading train19 landmark dict'):
        landmark_dict = load_train19_landmark_dict()

    fn_sub = 'data/working/exp12/v7_fish_nodba_top40_train19_v4.csv.gz'

    cpu_index = faiss.IndexFlatL2(dataset.feats_train.shape[1])
    gpu_index = faiss.index_cpu_to_all_gpus(cpu_index)
    gpu_index.add(dataset.feats_train)
    dists, topk_idx = gpu_index.search(x=dataset.feats_test, k=100)

    df = pd.DataFrame(dataset.ids_test, columns=['id'])
    df['images'] = np.apply_along_axis(' '.join,
                                       axis=1,
                                       arr=dataset.ids_train[topk_idx])

    print('generate sub')
    rows = []
    max_value = sum([np.exp(np.sqrt(i)) for i in range(40)])
    for _, r in tqdm.tqdm(df.iterrows(), total=len(df)):
        image_ids = [name.split('/')[-1] for name in r.images.split(' ')]
        counter = Counter()
        for i, image_id in enumerate(image_ids[:40]):
            landmark_id = landmark_dict[image_id]
            counter[landmark_id] += np.exp(-np.sqrt(i + 1))
        landmark_id, score = counter.most_common(1)[0]
        score = score / max_value
        rows.append({
            'id': r['id'],
            'landmarks': f'{landmark_id} {score:.9f}',
        })

    print('to_csv')
    df = pd.DataFrame(rows)
    df_sub = pd.read_csv('data/recognition_sample_submission.csv')
    df_sub = df_sub[['id']].merge(df, how='left', on='id')
    df_sub[['id', 'landmarks']].to_csv(fn_sub, index=False, compression='gzip')