def fuse(self, result_dir): train_json = self.load_json_file('./data/train_fusesize.json') train_df = pd.DataFrame({'label_id': [x['class'] for x in train_json]}) results_file = glob(os.path.join(result_dir, '*.val.*.pkl')) results_file.sort() print('Total {} ensemble files.'.format(len(results_file))) new_dict = {} weight = [0.111111, 0.333333, 0.333333, 0.222222] weight = [0.2] * 5 for i, res in tqdm(enumerate(results_file)): res_dict = load_dict(res) for k,v in res_dict.items(): v = v['logit'] if not k in new_dict.keys(): new_dict[k] = v / v.sum() * weight[i] else: new_dict[k] += v / v.sum() * weight[i] # tmp = np.sqrt(new_dict[k] * (v / v.sum())) # new_dict[k] = tmp / tmp.sum() all_id_list = [] all_predicted = [] correct_count = 0 prob_list = [] label_list = [] for k,v in tqdm(new_dict.items(), desc='Fusing result'): v /= v.sum() prob_list.append(v) top3_result = np.argsort(-v, axis=-1)[:3] all_predicted.append('{:d} {:d} {:d}'.format(top3_result[0], top3_result[1], top3_result[2])) all_id_list.append(k) label = res_dict[k]['label'] label_list.append([label]) a = -np.sort(-v)[:10] / 6 correct_count += np.sum(top3_result == label) datafame = pd.DataFrame({'id': all_id_list, 'predicted': all_predicted}) predict_csv = os.path.join(result_dir, 'ensemble.csv') # datafame.to_csv(predict_csv, index=False, sep=',') print('Top3 error rate: {:.6f}'.format(1 - correct_count / float(len(all_id_list)))) probs = np.stack(prob_list) label = np.array(label_list) # probability calibration # calibrated_prob = self.calibrate_all(probs, train_df) calibrated_prob = self.calibrate_probs(probs, train_df) topk_predict_calib = np.argsort(-calibrated_prob, axis=-1)[:, 0:3] err_calib = 1. - np.sum((topk_predict_calib == label), axis=-1).mean() print('Top3 error rate (calibration): {:.6f}'.format(err_calib)) if False: calibrated_prob_sp = self.calibrate_superlabel(train_json, calibrated_prob) topk_predict_calib_sp = np.argsort(-calibrated_prob_sp, axis=-1)[:, 0:3] err_calib_sp = 1. - np.sum((topk_predict_calib_sp == label), axis=-1).mean() print('Top3 error rate (calibration hirechical label): {:.6f}'.format(err_calib_sp)) all_predicted_calib = [] for item in topk_predict_calib: all_predicted_calib.append('{:d} {:d} {:d}'.format(item[0], item[1], item[2])) datafame_calib = pd.DataFrame({'id': all_id_list, 'predicted': all_predicted_calib}) predict_csv_calib = os.path.join(result_dir, 'calib_ensemble_4.csv')
def cover_dict(): src = './log/ensemble/ep11.logit.test.pkl' dst = './log/ensemble/ep11.test.ens100.logit.pkl' cur_dict= load_dict(src) new_dict = {} for k,v in tqdm(cur_dict.items()): new_dict[k] = {'logit': v, 'label': -1} save_dict(new_dict, dst)
def load_feat_single(self, filename): dict_feat_list = [] filename_list = glob(filename) filename_list.sort() for fn in filename_list: print('Loading feature from {:s}'.format(fn)) dict_feat = load_dict(fn) dict_feat_list.append(dict_feat) return dict_feat_list
def knn(self, result_dir): train_json = self.load_json_file('./data/train_fusesize.json') train_df = pd.DataFrame({'label_id': [x['class'] for x in train_json]}) results_file_train = os.path.join(result_dir, 'train.logit.pkl') print('Loading training file...') dict_train = load_dict(results_file_train) train_data = [] train_label = [] for k, v in dict_train.items(): train_data.append(v['logit'] / v['logit'].sum()) train_label.append(v['label']) train_data = np.stack(train_data, axis=0) train_label = np.array(train_label) results_file_val = os.path.join(result_dir, 'val.logit.pkl') print('Loading validation file...') dict_val = load_dict(results_file_val) val_data = [] val_label = [] for k, v in dict_val.items(): val_data.append(v['logit'] / v['logit'].sum()) val_label.append(v['label']) val_data = np.stack(val_data, axis=0) val_label = np.array(val_label) print('Building KD tree...') kdt = KDTree(train_data, leaf_size=30, metric='euclidean') print('Predicting...') idx = kdt.query(val_data, k=500, return_distance=False) pred_tmp = train_label[idx] count = np.stack([np.bincount(x, minlength=2019) for x in pred_tmp], axis=0) pred = np.argsort(-count, axis=-1)[:, 0:3] err = 1. - np.sum(val_label[:, np.newaxis] == pred, axis=-1).mean() print('Error: {:f}'.format(err)) pass
def calibrate_superlabel(self, train_json, probs): splabel = load_dict('./data/fglabel2splabel.pkl') sp_lab2idx_dict = {} dict_fg2sp = {} count = 0 for k, v in splabel.items(): tmp_str = ''.join([str(x) for x in v]) splabel[k] = tmp_str if tmp_str not in sp_lab2idx_dict.keys(): sp_lab2idx_dict[tmp_str] = count count += 1 for k, v in splabel.items(): tmp_str = ''.join([str(x) for x in v]) dict_fg2sp[k] = sp_lab2idx_dict[tmp_str] topk_predict = np.argsort(-probs, axis=-1)[:, 0:3] fgidx = np.array([dict_fg2sp[x] for x in range(self.num_class)]) new_probs = np.zeros_like(probs) for i in tqdm(range(fgidx[-1])): mask = fgidx == i mask_float = mask / np.sum(mask) mask_float = mask_float[np.newaxis, :] tmpp = np.matmul(probs, mask_float.transpose()) new_probs += (probs + tmpp) * mask return new_probs
def __init__(self, phase, is_transform=True, DEBUG=False): self.img_size = 'fusesize' self.img_root = '/data1/zhaoj/data/challenge/iMaterialist/image_' + self.img_size self.img_list_root = './data' self.phase = phase self.fglabel2splabel_dict = load_dict('./data/fglabel2splabel.pkl') reimgsize = 256 self.transform_dict = { 'resnet': { 'train': transforms.Compose([ transforms.Resize(reimgsize), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), 'test': transforms.Compose([ transforms.Resize(reimgsize), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) }, # 'test': transforms.Compose([transforms.Resize(reimgsize), transforms.CenterCrop(224), transforms.ToTensor(), # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])}, 'senet': { 'train': transforms.Compose([ transforms.Resize(256), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), 'test': transforms.Compose([ transforms.Resize(256), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) }, 'inceptionresnet': { 'train': transforms.Compose([ transforms.Resize(342), transforms.RandomResizedCrop(299), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]), 'test': transforms.Compose([ transforms.Resize(342), transforms.RandomResizedCrop(299), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) }, 'nasnet': { 'train': transforms.Compose([ transforms.Resize(378), transforms.RandomResizedCrop(331), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]), 'test': transforms.Compose([ transforms.Resize(378), transforms.RandomResizedCrop(331), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) } } if phase == 'train' and is_transform: self.transform = self.transform_dict['inceptionresnet']['train'] # self.transform = transforms.Compose([transforms.Resize(512), transforms.RandomResizedCrop(448), transforms.RandomHorizontalFlip(), transforms.ToTensor(), # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) else: self.transform = self.transform_dict['inceptionresnet']['test'] # self.transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) # self.transform = transforms.Compose([transforms.Resize(512), transforms.RandomCrop(448), transforms.ToTensor(), # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) self.img_list = self.load_json_file() if DEBUG: self.img_list = self.img_list[:301]