def main(): args = parse_args() args.uncropped = True with open('models/detection/%s/config.yml' % args.name, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) # config["tvec"] = False print('-'*20) for key in config.keys(): print('%s: %s' % (key, str(config[key]))) print('-'*20) cudnn.benchmark = False df = pd.read_csv('inputs/sample_submission.csv') img_ids = df['ImageId'].values img_paths = np.array('inputs/test_images/' + df['ImageId'].values + '.jpg') mask_paths = np.array('inputs/test_masks/' + df['ImageId'].values + '.jpg') labels = np.array([convert_str_to_labels(s, names=['yaw', 'pitch', 'roll', 'x', 'y', 'z', 'score']) for s in df['PredictionString']]) if not args.uncropped: cropped_img_ids = pd.read_csv('inputs/testset_cropped_imageids.csv')['ImageId'].values for i, img_id in enumerate(img_ids): if img_id in cropped_img_ids: img_paths[i] = 'inputs/test_images_uncropped/' + img_id + '.jpg' mask_paths[i] = 'inputs/test_masks_uncropped/' + img_id + '.jpg' test_set = Dataset( img_paths, mask_paths, labels, input_w=config['input_w'], input_h=config['input_h'], transform=None, test=True, lhalf=config['lhalf']) test_loader = torch.utils.data.DataLoader( test_set, batch_size=16, shuffle=False, num_workers=0, # num_workers=config['num_workers'], # pin_memory=True, ) heads = OrderedDict([ ('hm', 1), ('reg', 2), ('depth', 1), ]) if config['rot'] == 'eular': heads['eular'] = 3 elif config['rot'] == 'trig': heads['trig'] = 6 elif config['rot'] == 'quat': heads['quat'] = 4 else: raise NotImplementedError if config['wh']: heads['wh'] = 2 if config['tvec']: heads['tvec'] = 3 name = args.name if args.uncropped: name += '_uncropped' if args.hflip: name += '_hf' if os.path.exists('outputs/raw/test/%s.pth' %name): merged_outputs = torch.load('outputs/raw/test/%s.pth' %name) else: merged_outputs = {} for i in tqdm(range(len(df))): img_id = df.loc[i, 'ImageId'] output = { 'hm': 0, 'reg': 0, 'depth': 0, 'eular': 0 if config['rot'] == 'eular' else None, 'trig': 0 if config['rot'] == 'trig' else None, 'quat': 0 if config['rot'] == 'quat' else None, 'wh': 0 if config['wh'] else None, 'tvec': 0 if config['tvec'] else None, } merged_outputs[img_id] = output preds = [] for fold in range(config['n_splits']): print('Fold [%d/%d]' %(fold + 1, config['n_splits'])) model = get_model(config['arch'], heads=heads, head_conv=config['head_conv'], num_filters=config['num_filters'], dcn=config['dcn'], gn=config['gn'], ws=config['ws'], freeze_bn=config['freeze_bn']) model = model.cuda() model_path = 'models/detection/%s/model_%d.pth' % (config['name'], fold+1) if not os.path.exists(model_path): print('%s is not exists.' %model_path) continue model.load_state_dict(torch.load(model_path)) model.eval() preds_fold = [] outputs_fold = {} with torch.no_grad(): pbar = tqdm(total=len(test_loader)) for i, batch in enumerate(test_loader): input = batch['input'].cuda() mask = batch['mask'].cuda() output = model(input) # print(output) if args.hflip: output_hf = model(torch.flip(input, (-1,))) output_hf['hm'] = torch.flip(output_hf['hm'], (-1,)) output_hf['reg'] = torch.flip(output_hf['reg'], (-1,)) output_hf['reg'][:, 0] = 1 - output_hf['reg'][:, 0] output_hf['depth'] = torch.flip(output_hf['depth'], (-1,)) if config['rot'] == 'trig': output_hf['trig'] = torch.flip(output_hf['trig'], (-1,)) yaw = torch.atan2(output_hf['trig'][:, 1], output_hf['trig'][:, 0]) yaw *= -1.0 output_hf['trig'][:, 0] = torch.cos(yaw) output_hf['trig'][:, 1] = torch.sin(yaw) roll = torch.atan2(output_hf['trig'][:, 5], output_hf['trig'][:, 4]) roll = rotate(roll, -np.pi) roll *= -1.0 roll = rotate(roll, np.pi) output_hf['trig'][:, 4] = torch.cos(roll) output_hf['trig'][:, 5] = torch.sin(roll) if config['wh']: output_hf['wh'] = torch.flip(output_hf['wh'], (-1,)) if config['tvec']: output_hf['tvec'] = torch.flip(output_hf['tvec'], (-1,)) output_hf['tvec'][:, 0] *= -1.0 output['hm'] = (output['hm'] + output_hf['hm']) / 2 output['reg'] = (output['reg'] + output_hf['reg']) / 2 output['depth'] = (output['depth'] + output_hf['depth']) / 2 if config['rot'] == 'trig': output['trig'] = (output['trig'] + output_hf['trig']) / 2 if config['wh']: output['wh'] = (output['wh'] + output_hf['wh']) / 2 if config['tvec']: output['tvec'] = (output['tvec'] + output_hf['tvec']) / 2 for b in range(len(batch['img_path'])): img_id = os.path.splitext(os.path.basename(batch['img_path'][b]))[0] outputs_fold[img_id] = { 'hm': output['hm'][b:b+1].cpu(), 'reg': output['reg'][b:b+1].cpu(), 'depth': output['depth'][b:b+1].cpu(), 'eular': output['eular'][b:b+1].cpu() if config['rot'] == 'eular' else None, 'trig': output['trig'][b:b+1].cpu() if config['rot'] == 'trig' else None, 'quat': output['quat'][b:b+1].cpu() if config['rot'] == 'quat' else None, 'wh': output['wh'][b:b+1].cpu() if config['wh'] else None, 'tvec': output['tvec'][b:b+1].cpu() if config['tvec'] else None, 'mask': mask[b:b+1].cpu(), } merged_outputs[img_id]['hm'] += outputs_fold[img_id]['hm'] / config['n_splits'] merged_outputs[img_id]['reg'] += outputs_fold[img_id]['reg'] / config['n_splits'] merged_outputs[img_id]['depth'] += outputs_fold[img_id]['depth'] / config['n_splits'] if config['rot'] == 'eular': merged_outputs[img_id]['eular'] += outputs_fold[img_id]['eular'] / config['n_splits'] if config['rot'] == 'trig': merged_outputs[img_id]['trig'] += outputs_fold[img_id]['trig'] / config['n_splits'] if config['rot'] == 'quat': merged_outputs[img_id]['quat'] += outputs_fold[img_id]['quat'] / config['n_splits'] if config['wh']: merged_outputs[img_id]['wh'] += outputs_fold[img_id]['wh'] / config['n_splits'] if config['tvec']: merged_outputs[img_id]['tvec'] += outputs_fold[img_id]['tvec'] / config['n_splits'] merged_outputs[img_id]['mask'] = outputs_fold[img_id]['mask'] batch_det = decode( config, output['hm'], output['reg'], output['depth'], eular=output['eular'] if config['rot'] == 'eular' else None, trig=output['trig'] if config['rot'] == 'trig' else None, quat=output['quat'] if config['rot'] == 'quat' else None, wh=output['wh'] if config['wh'] else None, tvec=output['tvec'] if config['tvec'] else None, mask=mask, ) batch_det = batch_det.cpu().numpy() for k, det in enumerate(batch_det): if args.nms: det = nms(det, dist_th=args.nms_th) preds_fold.append(convert_labels_to_str(det[det[:, 6] > args.score_th, :7])) if args.show and not config['cv']: img = cv2.imread(batch['img_path'][k]) img_pred = visualize(img, det[det[:, 6] > args.score_th]) plt.imshow(img_pred[..., ::-1]) plt.show() pbar.update(1) pbar.close() if not config['cv']: df['PredictionString'] = preds_fold name = '%s_1_%.2f' %(args.name, args.score_th) if args.uncropped: name += '_uncropped' if args.nms: name += '_nms%.2f' %args.nms_th df.to_csv('outputs/submissions/test/%s.csv' %name, index=False) return if not args.uncropped: # ensemble duplicate images dup_df = pd.read_csv('processed/test_image_hash.csv') dups = dup_df.hash.value_counts() dups = dups.loc[dups>1] for i in range(len(dups)): img_ids = dup_df[dup_df.hash == dups.index[i]].ImageId output = { 'hm': 0, 'reg': 0, 'depth': 0, 'eular': 0 if config['rot'] == 'eular' else None, 'trig': 0 if config['rot'] == 'trig' else None, 'quat': 0 if config['rot'] == 'quat' else None, 'wh': 0 if config['wh'] else None, 'tvec': 0 if config['tvec'] else None, 'mask': 0, } for img_id in img_ids: if img_id in cropped_img_ids: print('fooo') output['hm'] += merged_outputs[img_id]['hm'] / len(img_ids) output['reg'] += merged_outputs[img_id]['reg'] / len(img_ids) output['depth'] += merged_outputs[img_id]['depth'] / len(img_ids) if config['rot'] == 'eular': output['eular'] += merged_outputs[img_id]['eular'] / len(img_ids) if config['rot'] == 'trig': output['trig'] += merged_outputs[img_id]['trig'] / len(img_ids) if config['rot'] == 'quat': output['quat'] += merged_outputs[img_id]['quat'] / len(img_ids) if config['wh']: output['wh'] += merged_outputs[img_id]['wh'] / len(img_ids) if config['tvec']: output['tvec'] += merged_outputs[img_id]['tvec'] / len(img_ids) output['mask'] += merged_outputs[img_id]['mask'] / len(img_ids) for img_id in img_ids: merged_outputs[img_id] = output torch.save(merged_outputs, 'outputs/raw/test/%s.pth' %name) # decode dets = {} for i in tqdm(range(len(df))): img_id = df.loc[i, 'ImageId'] output = merged_outputs[img_id] det = decode( config, output['hm'], output['reg'], output['depth'], eular=output['eular'] if config['rot'] == 'eular' else None, trig=output['trig'] if config['rot'] == 'trig' else None, quat=output['quat'] if config['rot'] == 'quat' else None, wh=output['wh'] if config['wh'] else None, tvec=output['tvec'] if config['tvec'] else None, mask=output['mask'], ) det = det.numpy()[0] dets[img_id] = det.tolist() if args.nms: det = nms(det, dist_th=args.nms_th) if np.sum(det[:, 6] > args.score_th) >= args.min_samples: det = det[det[:, 6] > args.score_th] else: det = det[:args.min_samples] if args.show: img = cv2.imread('inputs/test_images/%s.jpg' %img_id) img_pred = visualize(img, det) plt.imshow(img_pred[..., ::-1]) plt.show() df.loc[i, 'PredictionString'] = convert_labels_to_str(det[:, :7]) with open('outputs/decoded/test/%s.json' %name, 'w') as f: json.dump(dets, f) name = '%s_%.2f' %(args.name, args.score_th) if args.uncropped: name += '_uncropped' if args.nms: name += '_nms%.2f' %args.nms_th if args.hflip: name += '_hf' if args.min_samples > 0: name += '_min%d' %args.min_samples df.to_csv('outputs/submissions/test/%s.csv' %name, index=False)
def main(): config = vars(parse_args()) if config['name'] is None: config['name'] = 'ensemble_%s' % datetime.now().strftime('%m%d%H') if os.path.exists('models/detection/%s/config.yml' % config['name']): with open('models/detection/%s/config.yml' % config['name'], 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) else: config['models'] = config['models'].split(',') if not os.path.exists('models/detection/%s' % config['name']): os.makedirs('models/detection/%s' % config['name']) with open('models/detection/%s/config.yml' % config['name'], 'w') as f: yaml.dump(config, f) print('-'*20) for key in config.keys(): print('%s: %s' % (key, str(config[key]))) print('-'*20) with open('models/detection/%s/config.yml' % config['models'][0], 'r') as f: model_config = yaml.load(f, Loader=yaml.FullLoader) df = pd.read_csv('inputs/train.csv') img_paths = np.array('inputs/train_images/' + df['ImageId'].values + '.jpg') img_ids = df['ImageId'].values mask_paths = np.array('inputs/train_masks/' + df['ImageId'].values + '.jpg') labels = np.array([convert_str_to_labels(s, names=['yaw', 'pitch', 'roll', 'x', 'y', 'z', 'score']) for s in df['PredictionString']]) dets = {} kf = KFold(n_splits=model_config['n_splits'], shuffle=True, random_state=41) for fold, (train_idx, val_idx) in enumerate(kf.split(img_paths)): val_img_ids = img_ids[val_idx] if os.path.exists('outputs/raw/val/%s.pth' %config['name']): merged_outputs = torch.load('outputs/raw/val/%s.pth' %config['name']) else: merged_outputs = {} for img_id in tqdm(val_img_ids, total=len(val_img_ids)): output = { 'hm': 0, 'reg': 0, 'depth': 0, 'eular': 0 if model_config['rot'] == 'eular' else None, 'trig': 0 if model_config['rot'] == 'trig' else None, 'quat': 0 if model_config['rot'] == 'quat' else None, 'wh': 0 if model_config['wh'] else None, 'mask': 0, } merged_outputs[img_id] = output for model_name in config['models']: outputs = torch.load('outputs/raw/val/%s_%d.pth' %(model_name, fold + 1)) for img_id in tqdm(val_img_ids, total=len(val_img_ids)): output = outputs[img_id] merged_outputs[img_id]['hm'] += output['hm'] / len(config['models']) merged_outputs[img_id]['reg'] += output['reg'] / len(config['models']) merged_outputs[img_id]['depth'] += output['depth'] / len(config['models']) merged_outputs[img_id]['trig'] += output['trig'] / len(config['models']) merged_outputs[img_id]['wh'] += output['wh'] / len(config['models']) merged_outputs[img_id]['mask'] += output['mask'] / len(config['models']) torch.save(merged_outputs, 'outputs/raw/val/%s_%d.pth' %(config['name'], fold + 1)) # decode for img_id in tqdm(val_img_ids, total=len(val_img_ids)): output = merged_outputs[img_id] det = decode( model_config, output['hm'], output['reg'], output['depth'], eular=output['eular'] if model_config['rot'] == 'eular' else None, trig=output['trig'] if model_config['rot'] == 'trig' else None, quat=output['quat'] if model_config['rot'] == 'quat' else None, wh=output['wh'] if model_config['wh'] else None, mask=output['mask'], ) det = det.numpy()[0] dets[img_id] = det.tolist() if config['nms']: det = nms(det, dist_th=config['nms_th']) if np.sum(det[:, 6] > config['score_th']) >= config['min_samples']: det = det[det[:, 6] > config['score_th']] else: det = det[:config['min_samples']] if config['show']: img = cv2.imread('inputs/train_images/%s.jpg' %img_id) img_pred = visualize(img, det) plt.imshow(img_pred[..., ::-1]) plt.show() df.loc[df.ImageId == img_id, 'PredictionString'] = convert_labels_to_str(det[:, :7]) with open('outputs/decoded/val/%s.json' %config['name'], 'w') as f: json.dump(dets, f) df.to_csv('outputs/submissions/val/%s.csv' %config['name'], index=False)
def main(): args = parse_args() with open('models/detection/%s/config.yml' % args.name, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) print('-' * 20) for key in config.keys(): print('%s: %s' % (key, str(config[key]))) print('-' * 20) cudnn.benchmark = True df = pd.read_csv('inputs/train.csv') img_paths = np.array('inputs/train_images/' + df['ImageId'].values + '.jpg') mask_paths = np.array('inputs/train_masks/' + df['ImageId'].values + '.jpg') labels = np.array( [convert_str_to_labels(s) for s in df['PredictionString']]) heads = OrderedDict([ ('hm', 1), ('reg', 2), ('depth', 1), ]) if config['rot'] == 'eular': heads['eular'] = 3 elif config['rot'] == 'trig': heads['trig'] = 6 elif config['rot'] == 'quat': heads['quat'] = 4 else: raise NotImplementedError if config['wh']: heads['wh'] = 2 pred_df = df.copy() pred_df['PredictionString'] = np.nan dets = {} kf = KFold(n_splits=config['n_splits'], shuffle=True, random_state=41) for fold, (train_idx, val_idx) in enumerate(kf.split(img_paths)): print('Fold [%d/%d]' % (fold + 1, config['n_splits'])) train_img_paths, val_img_paths = img_paths[train_idx], img_paths[ val_idx] train_mask_paths, val_mask_paths = mask_paths[train_idx], mask_paths[ val_idx] train_labels, val_labels = labels[train_idx], labels[val_idx] val_set = Dataset(val_img_paths, val_mask_paths, val_labels, input_w=config['input_w'], input_h=config['input_h'], transform=None, lhalf=config['lhalf']) val_loader = torch.utils.data.DataLoader( val_set, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'], # pin_memory=True, ) model = get_model(config['arch'], heads=heads, head_conv=config['head_conv'], num_filters=config['num_filters'], dcn=config['dcn'], gn=config['gn'], ws=config['ws'], freeze_bn=config['freeze_bn']) model = model.cuda() model_path = 'models/detection/%s/model_%d.pth' % (config['name'], fold + 1) if not os.path.exists(model_path): print('%s is not exists.' % model_path) continue model.load_state_dict(torch.load(model_path)) model.eval() outputs = {} with torch.no_grad(): pbar = tqdm(total=len(val_loader)) for i, batch in enumerate(val_loader): input = batch['input'].cuda() mask = batch['mask'].cuda() hm = batch['hm'].cuda() reg_mask = batch['reg_mask'].cuda() output = model(input) if args.hflip: output_hf = model(torch.flip(input, (-1, ))) output_hf['hm'] = torch.flip(output_hf['hm'], (-1, )) output_hf['reg'] = torch.flip(output_hf['reg'], (-1, )) output_hf['reg'][:, 0] = 1 - output_hf['reg'][:, 0] output_hf['depth'] = torch.flip(output_hf['depth'], (-1, )) if config['rot'] == 'trig': output_hf['trig'] = torch.flip(output_hf['trig'], (-1, )) yaw = torch.atan2(output_hf['trig'][:, 1], output_hf['trig'][:, 0]) yaw *= -1.0 output_hf['trig'][:, 0] = torch.cos(yaw) output_hf['trig'][:, 1] = torch.sin(yaw) roll = torch.atan2(output_hf['trig'][:, 5], output_hf['trig'][:, 4]) roll = rotate(roll, -np.pi) roll *= -1.0 roll = rotate(roll, np.pi) output_hf['trig'][:, 4] = torch.cos(roll) output_hf['trig'][:, 5] = torch.sin(roll) if config['wh']: output_hf['wh'] = torch.flip(output_hf['wh'], (-1, )) output['hm'] = (output['hm'] + output_hf['hm']) / 2 output['reg'] = (output['reg'] + output_hf['reg']) / 2 output['depth'] = (output['depth'] + output_hf['depth']) / 2 if config['rot'] == 'trig': output['trig'] = (output['trig'] + output_hf['trig']) / 2 if config['wh']: output['wh'] = (output['wh'] + output_hf['wh']) / 2 batch_det = decode( config, output['hm'], output['reg'], output['depth'], eular=output['eular'] if config['rot'] == 'eular' else None, trig=output['trig'] if config['rot'] == 'trig' else None, quat=output['quat'] if config['rot'] == 'quat' else None, wh=output['wh'] if config['wh'] else None, mask=mask, ) batch_det = batch_det.cpu().numpy() for k, det in enumerate(batch_det): img_id = os.path.splitext( os.path.basename(batch['img_path'][k]))[0] outputs[img_id] = { 'hm': output['hm'][k:k + 1].cpu(), 'reg': output['reg'][k:k + 1].cpu(), 'depth': output['depth'][k:k + 1].cpu(), 'eular': output['eular'][k:k + 1].cpu() if config['rot'] == 'eular' else None, 'trig': output['trig'][k:k + 1].cpu() if config['rot'] == 'trig' else None, 'quat': output['quat'][k:k + 1].cpu() if config['rot'] == 'quat' else None, 'wh': output['wh'][k:k + 1].cpu() if config['wh'] else None, 'mask': mask[k:k + 1].cpu(), } dets[img_id] = det.tolist() if args.nms: det = nms(det, dist_th=args.nms_th) pred_df.loc[pred_df.ImageId == img_id, 'PredictionString'] = convert_labels_to_str( det[det[:, 6] > args.score_th, :7]) if args.show: gt = batch['gt'].numpy()[k] img = cv2.imread(batch['img_path'][k]) img_gt = visualize(img, gt[gt[:, -1] > 0]) img_pred = visualize(img, det[det[:, 6] > args.score_th]) plt.subplot(121) plt.imshow(img_gt[..., ::-1]) plt.subplot(122) plt.imshow(img_pred[..., ::-1]) plt.show() pbar.update(1) pbar.close() torch.save(outputs, 'outputs/raw/val/%s_%d.pth' % (args.name, fold + 1)) torch.cuda.empty_cache() if not config['cv']: break with open('outputs/decoded/val/%s.json' % args.name, 'w') as f: json.dump(dets, f) name = '%s_%.2f' % (args.name, args.score_th) if args.nms: name += '_nms%.2f' % args.nms_th if args.hflip: name += '_hf' pred_df.to_csv('outputs/submissions/val/%s.csv' % name, index=False) print(pred_df.head())