def get_df_and_dists(topk=100): test_dirs = [ ROOT + 'exp/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/', ROOT + 'exp/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'exp/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'exp/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/', ROOT + 'exp/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/', ROOT + 'exp/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/', ] train_dirs = [ ROOT + 'exp/v19c/feats_train_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/', ROOT + 'exp/v20c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'exp/v21c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'exp/v22c/feats_train_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/', ROOT + 'exp/v23c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/', ROOT + 'exp/v24c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/', ] weights = [ 0.5, 1.0, 1.0, 0.5, 1.0, 1.0, ] logger.info('load ids and features.') ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs, weights, normalize=True) ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, weights, normalize=True) logger.info('done.') logger.info('build index...') cpu_index = faiss.IndexFlatL2(feats_train.shape[1]) cpu_index.add(feats_train) dists, topk_idx = cpu_index.search(x=feats_test, k=topk) logger.info('query search done.') df = pd.DataFrame(ids_test, columns=['id']) df['images'] = np.apply_along_axis(' '.join, axis=1, arr=ids_train[topk_idx]) return df, dists
def load_train_ensemble_dataset(): test_dirs = [ ROOT + 'experiments/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/', # noqa ROOT + 'experiments/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', # noqa ROOT + 'experiments/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', # noqa ROOT + 'experiments/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/', # noqa ROOT + 'experiments/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/', # noqa ROOT + 'experiments/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/', # noqa ] train_dirs = [ ROOT + 'experiments/v19c/feats_train_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/', # noqa ROOT + 'experiments/v20c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', # noqa ROOT + 'experiments/v21c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', # noqa ROOT + 'experiments/v22c/feats_train_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/', # noqa ROOT + 'experiments/v23c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/', # noqa ROOT + 'experiments/v24c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/', # noqa ] weights = [ 0.5, 1.0, 1.0, 0.5, 1.0, 1.0, ] ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs, weights, normalize=True) ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, weights, normalize=True) return edict(ids_test=ids_test, ids_train=ids_train, feats_test=feats_test, feats_train=feats_train)
def main(): index_dirs = [ '../exp/v2clean/feats_index19_ms_L2_ep4_freqthresh-3_loss-arcface_verifythresh-30/' ] test_dirs = [ '../exp/v2clean/feats_test19_ms_L2_ep4_freqthresh-3_loss-arcface_verifythresh-30/' ] train_dirs = [ '../exp/v2clean/feats_train_ms_L2_ep4_freqthresh-3_loss-arcface_verifythresh-30/' ] ids_index, feats_index = utils.prepare_ids_and_feats(index_dirs, normalize=True) ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs, normalize=True) ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, normalize=True) print('build index...') cpu_index = faiss.IndexFlatL2(feats_index.shape[1]) gpu_index = faiss.index_cpu_to_all_gpus(cpu_index) gpu_index.add(feats_index) dists, topk_idx = gpu_index.search(x=feats_test, k=100) print('query search done.') subm = pd.DataFrame(ids_test, columns=['id']) subm['images'] = np.apply_along_axis(' '.join, axis=1, arr=ids_index[topk_idx]) subm = reranking_submission(ids_index, feats_index, ids_test, feats_test, ids_train, feats_train, subm, topk=100) output_name = ROOT + f'output/submit_retrieval.csv.gz' subm[['id', 'images']].to_csv(output_name, compression='gzip', index=False) print('saved to ' + output_name) cmd = f'kaggle c submit -c landmark-retrieval-2019 -f {output_name} -m "" ' print(cmd) subprocess.run(cmd, shell=True)
parser.add_argument('-w', '--weights', default='1') parser.add_argument('-d', '--devices', default='0', help='gpu device indexes') args = parser.parse_args() index_dirs = args.index_dirs.split(',') test_dirs = args.test_dirs.split(',') setting = args.setting weights = list(map(int, args.weights.split(','))) os.environ['CUDA_VISIBLE_DEVICES'] = args.devices n_gpus = len(args.devices.split(',')) ids_index, feats_index = utils.prepare_ids_and_feats(index_dirs, weights, normalize=True) ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs, weights, normalize=True) # ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, weights, normalize=True) co = faiss.GpuMultipleClonerOptions() co.shard = True # co.float16 = False vres = [] for _ in range(n_gpus): res = faiss.StandardGpuResources() vres.append(res)
def get_df_and_dists(train19_csv, topk=100, n_iter=1, qe_topk=3, weighting_scheme='alpha', alpha=3, t=0.8, verifythresh=40, freqthresh=5, ): ROOT = '/fs2/groups2/gca50080/yokoo/landmark/' test_dirs = [ ROOT + 'experiments/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/', ROOT + 'experiments/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/', ROOT + 'experiments/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/', ROOT + 'experiments/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/', ] train_dirs = [ ROOT + 'experiments/v19c/feats_train_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/', ROOT + 'experiments/v20c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v21c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v22c/feats_train_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/', ROOT + 'experiments/v23c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/', ROOT + 'experiments/v24c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/', ] weights = [ 0.5, 1.0, 1.0, 0.5, 1.0, 1.0, ] logger.info('load ids and features.') ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs, weights, normalize=True) ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, weights, normalize=True) logger.info('done.') if n_iter > 0: feats_train = supervised_dba(ids_train=ids_train, feats_train=feats_train, train19_csv=train19_csv, n_iter=n_iter, qe_topk=qe_topk, weighting_scheme=weighting_scheme, alpha=alpha, t=t, verifythresh=verifythresh, freqthresh=freqthresh ) logger.info('build index...') cpu_index = faiss.IndexFlatL2(feats_train.shape[1]) cpu_index.add(feats_train) dists, topk_idx = cpu_index.search(x=feats_test, k=topk) logger.info('query search done.') df = pd.DataFrame(ids_test, columns=['id']) df['images'] = np.apply_along_axis(' '.join, axis=1, arr=ids_train[topk_idx]) np.save('data/working/exp44/dists_top5__ens3_DBAx1.npy', dists) np.save('data/working/exp44/topk_idx_top5__ens3_DBAx1.npy', topk_idx) return df, dists
def main(): index_dirs = [ ROOT + 'experiments/v19c/feats_index19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/', ROOT + 'experiments/v20c/feats_index19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v21c/feats_index19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v22c/feats_index19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/', ROOT + 'experiments/v23c/feats_index19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/', ROOT + 'experiments/v24c/feats_index19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/', ] test_dirs = [ ROOT + 'experiments/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/', ROOT + 'experiments/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/', ROOT + 'experiments/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/', ROOT + 'experiments/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/', ] train_dirs = [ ROOT + 'experiments/v19c/feats_train_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/', ROOT + 'experiments/v20c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v21c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v22c/feats_train_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/', ROOT + 'experiments/v23c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresih-3_verifythresh-30/', ROOT + 'experiments/v24c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/', ] weights = [ 0.5, 1.0, 1.0, 0.5, 1.0, 1.0, ] # intuition ids_index, feats_index = utils.prepare_ids_and_feats(index_dirs, weights, normalize=True) ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs, weights, normalize=True) ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, weights, normalize=True) print('build index...') cpu_index = faiss.IndexFlatL2(feats_index.shape[1]) gpu_index = faiss.index_cpu_to_all_gpus(cpu_index) gpu_index.add(feats_index) dists, topk_idx = gpu_index.search(x=feats_test, k=100) print('query search done.') subm = pd.DataFrame(ids_test, columns=['id']) subm['images'] = np.apply_along_axis(' '.join, axis=1, arr=ids_index[topk_idx]) subm = reranking_submission(ids_index, feats_index, ids_test, feats_test, ids_train, feats_train, subm, topk=100) output_name = ROOT + f'output/submit_retrieval.csv.gz' subm[['id', 'images']].to_csv(output_name, compression='gzip', index=False) print('saved to ' + output_name) cmd = f'kaggle c submit -c landmark-retrieval-2019 -f {output_name} -m "" ' print(cmd) subprocess.run(cmd, shell=True)
def ban_final(): import argparse import faiss import numpy as np import pandas as pd import os import subprocess import tqdm from collections import Counter from src import utils topk = 100 ROOT = '/opt/landmark/' test_dirs = [ ROOT + 'experiments/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/', ROOT + 'experiments/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/', ROOT + 'experiments/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/', ROOT + 'experiments/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/', ] weights = [ 0.5, 1.0, 1.0, 0.5, 1.0, 1.0, ] # intuition ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs, weights, normalize=True) # train19_csv = pd.read_pickle('../input/train.pkl')[['id', 'landmark_id']].set_index('id').sort_index() # landmark_dict = train19_csv.to_dict()['landmark_id'] co = faiss.GpuMultipleClonerOptions() co.shard = True # co.float16 = False vres = [] for _ in range(4): res = faiss.StandardGpuResources() vres.append(res) subm = pd.read_csv( '../output/stage2_submit_banthresh30_ens3_top3_DBAx1_v44r7.csv.gz') subm['landmark_id'], subm['score'] = list( zip(*subm['landmarks'].apply(lambda x: str(x).split(' ')))) subm['score'] = subm['score'].astype(np.float32) subm = subm.sort_values('score', ascending=False).set_index('id') ban_thresh = 30 freq = subm['landmark_id'].value_counts() ban_lids = freq[freq > ban_thresh].index is_ban = np.isin(ids_test, subm[subm['landmark_id'].isin(ban_lids)].index) ban_ids_test = ids_test[is_ban] not_ban_ids_test = ids_test[~is_ban] ban_feats_test = feats_test[is_ban] not_ban_feats_test = feats_test[~is_ban] print('build index...') cpu_index = faiss.IndexFlatL2(not_ban_feats_test.shape[1]) gpu_index = faiss.index_cpu_to_gpu_multiple_py(vres, cpu_index, co) gpu_index.add(not_ban_feats_test) dists, topk_idx = gpu_index.search(x=ban_feats_test, k=100) print('query search done.') subm = pd.read_csv( '../output/stage2_submit_banthresh30_ens3_top3_DBAx1_v44r7.csv.gz') subm['landmark_id'], subm['score'] = list( zip(*subm['landmarks'].apply(lambda x: str(x).split(' ')))) subm['score'] = subm['score'].astype(np.float32) subm = subm.sort_values('score', ascending=False).set_index('id') new_ban_ids = np.unique(not_ban_ids_test[topk_idx[dists < 0.5]]) subm.loc[new_ban_ids, 'landmarks'] = subm.loc[new_ban_ids, 'landmark_id'] + ' 0' # subm.loc[new_ban_ids, 'landmarks'] = subm.loc[new_ban_ids, 'landmark_id'] + ' ' + (subm.loc[new_ban_ids, 'score'] * 0.001).map(str) output_filename = '../output/l2dist_0.5.csv.gz' subm.reset_index()[['id', 'landmarks']].to_csv(output_filename, index=False, compression='gzip')