def demo(sess, net, image_name): # 根据路径,使用opencv读取图片数据 im_file = os.path.join(cfg.FLAGS2["data_dir"], 'demo', image_name) im = cv2.imread(im_file) # 进行目标检查 timer = Timer() timer.tic() # 进行预测返回300个box的得分和位置 scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, boxes.shape[0])) # 每个类最高得分上图的阈值 CONF_THRESH = 0.1 # 每个类NMS阈值 NMS_THRESH = 0.1 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # +1需要跳过背景 # 获取到所有候选框对应这个分类的位置 cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] # 获取到所有候选框对应这个分类的得分 cls_scores = scores[:, cls_ind] # 合并所有的位置和得分,(x1,y1,x2,y2,score) dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) # 通过非极大值抑制保留0.1的候选框以及得分 keep = nms(dets, NMS_THRESH) dets = dets[keep, :] # 上图 vis_detections(im, cls, dets, thresh=CONF_THRESH)
def demo(net, matlab, image_filepath, classes, method, par1, par2): # Detect all object classes and regress object bounds timer = Timer() timer.tic() # Load pre-computed Selected Search object proposals obj_proposals = ROI_boxes(matlab, image_filepath, method, par1, par2) global OP_num OP_num = len(obj_proposals) if len(obj_proposals)==0: dets = [] timer.toc() return dets, timer.total_time # Load the demo image im = cv2.imread(image_filepath) scores, boxes = im_detect(net, im, obj_proposals) timer.toc() # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] return dets, timer.total_time
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') if cfg_key == "TRAIN": pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n #12000 post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n #2000 nms_thresh = cfg.FLAGS.rpn_train_nms_thresh #0.7 else: pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n #6000 post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n #300 nms_thresh = cfg.FLAGS.rpn_test_nms_thresh #0.7 # 因为我们的输入是(1,3)维的 im_info = im_info[0] # 1 * H * W * 9 其他维度不变,取18元素后9个为前景得分 scores = rpn_cls_prob[:, :, :, num_anchors:] # 9 # 9WH * 4 个偏移量 rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) # 9WH 个得分 scores = scores.reshape((-1, 1)) # 通过偏移量对anchor进行调整,得到proposals proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # 修剪proposal,将超出边界的proposal修剪到图片范围内 proposals = clip_boxes(proposals, im_info[:2]) # im_info[:2] 表示 宽和高 # 得分从大到小排序,存储的为坐标 order = scores.ravel().argsort()[::-1] # 根据坐标,筛选出前12000个proposals和scores if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 非极大值抑制 keep = nms(np.hstack((proposals, scores)), nms_thresh) # np.hstack((proposals, scores) [x1,y1,x2,y2,score] # 因为keep已经排过序了,直接取前2000个 if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # 给proposal叠加一个维度,第一列全是0.0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def demo(net, matlab, image_filepath, classes, args): """Detect object classes in an image using pre-computed object proposals.""" timer = Timer() timer.tic() # Load pre-computed Selected Search object proposals obj_proposals = ROI_boxes(matlab, image_filepath, args.OP_method) if len(obj_proposals) == 0: return # Load the demo image im = cv2.imread(image_filepath) # Detect all object classes and regress object bounds scores, boxes = im_detect(net, im, obj_proposals) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] if (len(dets) == 0): global count count += 1 print('{} No Ear detected').format(count) # print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls, # CONF_THRESH) if args.video_mode: visualise(im, cls, dets, thresh=CONF_THRESH) elif args.image_path is not None: vis_detections(im, cls, dets, thresh=CONF_THRESH)
def demo(net, matlab, image_filepath, classes, args): """Detect object classes in an image using pre-computed object proposals.""" timer = Timer() timer.tic() # Load pre-computed Selected Search object proposals obj_proposals = ROI_boxes(matlab, image_filepath, args.OP_method) if len(obj_proposals)==0: return # Load the demo image im = cv2.imread(image_filepath) # Detect all object classes and regress object bounds scores, boxes = im_detect(net, im, obj_proposals) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] if (len(dets) == 0): global count count += 1 print('{} No Ear detected').format(count) # print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls, # CONF_THRESH) if args.video_mode: visualise(im, cls, dets, thresh=CONF_THRESH) elif args.image_path is not None: vis_detections(im, cls, dets, thresh=CONF_THRESH)
def ctpn(sess, net, image_name): img = cv2.imread(image_name) im = check_img(img) timer = Timer() timer.tic() scores, boxes = test_ctpn(sess, net, im) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.9 NMS_THRESH = 0.3 dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] keep = np.where(dets[:, 4] >= 0.7)[0] dets = dets[keep, :] line = connect_proposal(dets[:, 0:4], dets[:, 4], im.shape) save_results(image_name, im, line,thresh=0.9)
def demo(net, image_name, classes): """Detect object classes in an image using pre-computed object proposals.""" # Load pre-computed Selected Search object proposals box_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '_boxes.mat') obj_proposals = sio.loadmat(box_file)['boxes'] # Load the demo image im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '.jpg') im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im, obj_proposals) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls, CONF_THRESH) vis_detections(im, cls, dets, thresh=CONF_THRESH)
def demo(net, image_name, classes): """Detect object classes in an image using pre-computed object proposals.""" # Load pre-computed Selected Search object proposals box_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '_boxes.mat') obj_proposals = sio.loadmat(box_file)['boxes'] # Load the demo image im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '.jpg') im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im, obj_proposals) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] print 'All {} detections with p({} | box) >= {:.1f}'.format( cls, cls, CONF_THRESH) vis_detections(im, cls, dets, thresh=CONF_THRESH)
def main(): config = vars(parse_args()) if config['name'] is None: config['name'] = 'ensemble_%s' % datetime.now().strftime('%m%d%H') if os.path.exists('models/detection/%s/config.yml' % config['name']): with open('models/detection/%s/config.yml' % config['name'], 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) else: config['models'] = config['models'].split(',') if not os.path.exists('models/detection/%s' % config['name']): os.makedirs('models/detection/%s' % config['name']) with open('models/detection/%s/config.yml' % config['name'], 'w') as f: yaml.dump(config, f) print('-'*20) for key in config.keys(): print('%s: %s' % (key, str(config[key]))) print('-'*20) with open('models/detection/%s/config.yml' % config['models'][0], 'r') as f: model_config = yaml.load(f, Loader=yaml.FullLoader) df = pd.read_csv('inputs/train.csv') img_paths = np.array('inputs/train_images/' + df['ImageId'].values + '.jpg') img_ids = df['ImageId'].values mask_paths = np.array('inputs/train_masks/' + df['ImageId'].values + '.jpg') labels = np.array([convert_str_to_labels(s, names=['yaw', 'pitch', 'roll', 'x', 'y', 'z', 'score']) for s in df['PredictionString']]) dets = {} kf = KFold(n_splits=model_config['n_splits'], shuffle=True, random_state=41) for fold, (train_idx, val_idx) in enumerate(kf.split(img_paths)): val_img_ids = img_ids[val_idx] if os.path.exists('outputs/raw/val/%s.pth' %config['name']): merged_outputs = torch.load('outputs/raw/val/%s.pth' %config['name']) else: merged_outputs = {} for img_id in tqdm(val_img_ids, total=len(val_img_ids)): output = { 'hm': 0, 'reg': 0, 'depth': 0, 'eular': 0 if model_config['rot'] == 'eular' else None, 'trig': 0 if model_config['rot'] == 'trig' else None, 'quat': 0 if model_config['rot'] == 'quat' else None, 'wh': 0 if model_config['wh'] else None, 'mask': 0, } merged_outputs[img_id] = output for model_name in config['models']: outputs = torch.load('outputs/raw/val/%s_%d.pth' %(model_name, fold + 1)) for img_id in tqdm(val_img_ids, total=len(val_img_ids)): output = outputs[img_id] merged_outputs[img_id]['hm'] += output['hm'] / len(config['models']) merged_outputs[img_id]['reg'] += output['reg'] / len(config['models']) merged_outputs[img_id]['depth'] += output['depth'] / len(config['models']) merged_outputs[img_id]['trig'] += output['trig'] / len(config['models']) merged_outputs[img_id]['wh'] += output['wh'] / len(config['models']) merged_outputs[img_id]['mask'] += output['mask'] / len(config['models']) torch.save(merged_outputs, 'outputs/raw/val/%s_%d.pth' %(config['name'], fold + 1)) # decode for img_id in tqdm(val_img_ids, total=len(val_img_ids)): output = merged_outputs[img_id] det = decode( model_config, output['hm'], output['reg'], output['depth'], eular=output['eular'] if model_config['rot'] == 'eular' else None, trig=output['trig'] if model_config['rot'] == 'trig' else None, quat=output['quat'] if model_config['rot'] == 'quat' else None, wh=output['wh'] if model_config['wh'] else None, mask=output['mask'], ) det = det.numpy()[0] dets[img_id] = det.tolist() if config['nms']: det = nms(det, dist_th=config['nms_th']) if np.sum(det[:, 6] > config['score_th']) >= config['min_samples']: det = det[det[:, 6] > config['score_th']] else: det = det[:config['min_samples']] if config['show']: img = cv2.imread('inputs/train_images/%s.jpg' %img_id) img_pred = visualize(img, det) plt.imshow(img_pred[..., ::-1]) plt.show() df.loc[df.ImageId == img_id, 'PredictionString'] = convert_labels_to_str(det[:, :7]) with open('outputs/decoded/val/%s.json' %config['name'], 'w') as f: json.dump(dets, f) df.to_csv('outputs/submissions/val/%s.csv' %config['name'], index=False)
def main(): args = parse_args() args.uncropped = True with open('models/detection/%s/config.yml' % args.name, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) # config["tvec"] = False print('-'*20) for key in config.keys(): print('%s: %s' % (key, str(config[key]))) print('-'*20) cudnn.benchmark = False df = pd.read_csv('inputs/sample_submission.csv') img_ids = df['ImageId'].values img_paths = np.array('inputs/test_images/' + df['ImageId'].values + '.jpg') mask_paths = np.array('inputs/test_masks/' + df['ImageId'].values + '.jpg') labels = np.array([convert_str_to_labels(s, names=['yaw', 'pitch', 'roll', 'x', 'y', 'z', 'score']) for s in df['PredictionString']]) if not args.uncropped: cropped_img_ids = pd.read_csv('inputs/testset_cropped_imageids.csv')['ImageId'].values for i, img_id in enumerate(img_ids): if img_id in cropped_img_ids: img_paths[i] = 'inputs/test_images_uncropped/' + img_id + '.jpg' mask_paths[i] = 'inputs/test_masks_uncropped/' + img_id + '.jpg' test_set = Dataset( img_paths, mask_paths, labels, input_w=config['input_w'], input_h=config['input_h'], transform=None, test=True, lhalf=config['lhalf']) test_loader = torch.utils.data.DataLoader( test_set, batch_size=16, shuffle=False, num_workers=0, # num_workers=config['num_workers'], # pin_memory=True, ) heads = OrderedDict([ ('hm', 1), ('reg', 2), ('depth', 1), ]) if config['rot'] == 'eular': heads['eular'] = 3 elif config['rot'] == 'trig': heads['trig'] = 6 elif config['rot'] == 'quat': heads['quat'] = 4 else: raise NotImplementedError if config['wh']: heads['wh'] = 2 if config['tvec']: heads['tvec'] = 3 name = args.name if args.uncropped: name += '_uncropped' if args.hflip: name += '_hf' if os.path.exists('outputs/raw/test/%s.pth' %name): merged_outputs = torch.load('outputs/raw/test/%s.pth' %name) else: merged_outputs = {} for i in tqdm(range(len(df))): img_id = df.loc[i, 'ImageId'] output = { 'hm': 0, 'reg': 0, 'depth': 0, 'eular': 0 if config['rot'] == 'eular' else None, 'trig': 0 if config['rot'] == 'trig' else None, 'quat': 0 if config['rot'] == 'quat' else None, 'wh': 0 if config['wh'] else None, 'tvec': 0 if config['tvec'] else None, } merged_outputs[img_id] = output preds = [] for fold in range(config['n_splits']): print('Fold [%d/%d]' %(fold + 1, config['n_splits'])) model = get_model(config['arch'], heads=heads, head_conv=config['head_conv'], num_filters=config['num_filters'], dcn=config['dcn'], gn=config['gn'], ws=config['ws'], freeze_bn=config['freeze_bn']) model = model.cuda() model_path = 'models/detection/%s/model_%d.pth' % (config['name'], fold+1) if not os.path.exists(model_path): print('%s is not exists.' %model_path) continue model.load_state_dict(torch.load(model_path)) model.eval() preds_fold = [] outputs_fold = {} with torch.no_grad(): pbar = tqdm(total=len(test_loader)) for i, batch in enumerate(test_loader): input = batch['input'].cuda() mask = batch['mask'].cuda() output = model(input) # print(output) if args.hflip: output_hf = model(torch.flip(input, (-1,))) output_hf['hm'] = torch.flip(output_hf['hm'], (-1,)) output_hf['reg'] = torch.flip(output_hf['reg'], (-1,)) output_hf['reg'][:, 0] = 1 - output_hf['reg'][:, 0] output_hf['depth'] = torch.flip(output_hf['depth'], (-1,)) if config['rot'] == 'trig': output_hf['trig'] = torch.flip(output_hf['trig'], (-1,)) yaw = torch.atan2(output_hf['trig'][:, 1], output_hf['trig'][:, 0]) yaw *= -1.0 output_hf['trig'][:, 0] = torch.cos(yaw) output_hf['trig'][:, 1] = torch.sin(yaw) roll = torch.atan2(output_hf['trig'][:, 5], output_hf['trig'][:, 4]) roll = rotate(roll, -np.pi) roll *= -1.0 roll = rotate(roll, np.pi) output_hf['trig'][:, 4] = torch.cos(roll) output_hf['trig'][:, 5] = torch.sin(roll) if config['wh']: output_hf['wh'] = torch.flip(output_hf['wh'], (-1,)) if config['tvec']: output_hf['tvec'] = torch.flip(output_hf['tvec'], (-1,)) output_hf['tvec'][:, 0] *= -1.0 output['hm'] = (output['hm'] + output_hf['hm']) / 2 output['reg'] = (output['reg'] + output_hf['reg']) / 2 output['depth'] = (output['depth'] + output_hf['depth']) / 2 if config['rot'] == 'trig': output['trig'] = (output['trig'] + output_hf['trig']) / 2 if config['wh']: output['wh'] = (output['wh'] + output_hf['wh']) / 2 if config['tvec']: output['tvec'] = (output['tvec'] + output_hf['tvec']) / 2 for b in range(len(batch['img_path'])): img_id = os.path.splitext(os.path.basename(batch['img_path'][b]))[0] outputs_fold[img_id] = { 'hm': output['hm'][b:b+1].cpu(), 'reg': output['reg'][b:b+1].cpu(), 'depth': output['depth'][b:b+1].cpu(), 'eular': output['eular'][b:b+1].cpu() if config['rot'] == 'eular' else None, 'trig': output['trig'][b:b+1].cpu() if config['rot'] == 'trig' else None, 'quat': output['quat'][b:b+1].cpu() if config['rot'] == 'quat' else None, 'wh': output['wh'][b:b+1].cpu() if config['wh'] else None, 'tvec': output['tvec'][b:b+1].cpu() if config['tvec'] else None, 'mask': mask[b:b+1].cpu(), } merged_outputs[img_id]['hm'] += outputs_fold[img_id]['hm'] / config['n_splits'] merged_outputs[img_id]['reg'] += outputs_fold[img_id]['reg'] / config['n_splits'] merged_outputs[img_id]['depth'] += outputs_fold[img_id]['depth'] / config['n_splits'] if config['rot'] == 'eular': merged_outputs[img_id]['eular'] += outputs_fold[img_id]['eular'] / config['n_splits'] if config['rot'] == 'trig': merged_outputs[img_id]['trig'] += outputs_fold[img_id]['trig'] / config['n_splits'] if config['rot'] == 'quat': merged_outputs[img_id]['quat'] += outputs_fold[img_id]['quat'] / config['n_splits'] if config['wh']: merged_outputs[img_id]['wh'] += outputs_fold[img_id]['wh'] / config['n_splits'] if config['tvec']: merged_outputs[img_id]['tvec'] += outputs_fold[img_id]['tvec'] / config['n_splits'] merged_outputs[img_id]['mask'] = outputs_fold[img_id]['mask'] batch_det = decode( config, output['hm'], output['reg'], output['depth'], eular=output['eular'] if config['rot'] == 'eular' else None, trig=output['trig'] if config['rot'] == 'trig' else None, quat=output['quat'] if config['rot'] == 'quat' else None, wh=output['wh'] if config['wh'] else None, tvec=output['tvec'] if config['tvec'] else None, mask=mask, ) batch_det = batch_det.cpu().numpy() for k, det in enumerate(batch_det): if args.nms: det = nms(det, dist_th=args.nms_th) preds_fold.append(convert_labels_to_str(det[det[:, 6] > args.score_th, :7])) if args.show and not config['cv']: img = cv2.imread(batch['img_path'][k]) img_pred = visualize(img, det[det[:, 6] > args.score_th]) plt.imshow(img_pred[..., ::-1]) plt.show() pbar.update(1) pbar.close() if not config['cv']: df['PredictionString'] = preds_fold name = '%s_1_%.2f' %(args.name, args.score_th) if args.uncropped: name += '_uncropped' if args.nms: name += '_nms%.2f' %args.nms_th df.to_csv('outputs/submissions/test/%s.csv' %name, index=False) return if not args.uncropped: # ensemble duplicate images dup_df = pd.read_csv('processed/test_image_hash.csv') dups = dup_df.hash.value_counts() dups = dups.loc[dups>1] for i in range(len(dups)): img_ids = dup_df[dup_df.hash == dups.index[i]].ImageId output = { 'hm': 0, 'reg': 0, 'depth': 0, 'eular': 0 if config['rot'] == 'eular' else None, 'trig': 0 if config['rot'] == 'trig' else None, 'quat': 0 if config['rot'] == 'quat' else None, 'wh': 0 if config['wh'] else None, 'tvec': 0 if config['tvec'] else None, 'mask': 0, } for img_id in img_ids: if img_id in cropped_img_ids: print('fooo') output['hm'] += merged_outputs[img_id]['hm'] / len(img_ids) output['reg'] += merged_outputs[img_id]['reg'] / len(img_ids) output['depth'] += merged_outputs[img_id]['depth'] / len(img_ids) if config['rot'] == 'eular': output['eular'] += merged_outputs[img_id]['eular'] / len(img_ids) if config['rot'] == 'trig': output['trig'] += merged_outputs[img_id]['trig'] / len(img_ids) if config['rot'] == 'quat': output['quat'] += merged_outputs[img_id]['quat'] / len(img_ids) if config['wh']: output['wh'] += merged_outputs[img_id]['wh'] / len(img_ids) if config['tvec']: output['tvec'] += merged_outputs[img_id]['tvec'] / len(img_ids) output['mask'] += merged_outputs[img_id]['mask'] / len(img_ids) for img_id in img_ids: merged_outputs[img_id] = output torch.save(merged_outputs, 'outputs/raw/test/%s.pth' %name) # decode dets = {} for i in tqdm(range(len(df))): img_id = df.loc[i, 'ImageId'] output = merged_outputs[img_id] det = decode( config, output['hm'], output['reg'], output['depth'], eular=output['eular'] if config['rot'] == 'eular' else None, trig=output['trig'] if config['rot'] == 'trig' else None, quat=output['quat'] if config['rot'] == 'quat' else None, wh=output['wh'] if config['wh'] else None, tvec=output['tvec'] if config['tvec'] else None, mask=output['mask'], ) det = det.numpy()[0] dets[img_id] = det.tolist() if args.nms: det = nms(det, dist_th=args.nms_th) if np.sum(det[:, 6] > args.score_th) >= args.min_samples: det = det[det[:, 6] > args.score_th] else: det = det[:args.min_samples] if args.show: img = cv2.imread('inputs/test_images/%s.jpg' %img_id) img_pred = visualize(img, det) plt.imshow(img_pred[..., ::-1]) plt.show() df.loc[i, 'PredictionString'] = convert_labels_to_str(det[:, :7]) with open('outputs/decoded/test/%s.json' %name, 'w') as f: json.dump(dets, f) name = '%s_%.2f' %(args.name, args.score_th) if args.uncropped: name += '_uncropped' if args.nms: name += '_nms%.2f' %args.nms_th if args.hflip: name += '_hf' if args.min_samples > 0: name += '_min%d' %args.min_samples df.to_csv('outputs/submissions/test/%s.csv' %name, index=False)
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """ Parameters ---------- rpn_cls_prob: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN im_info: a list of [image_height, image_width, scale_ratios] cfg_key: 'TRAIN' or 'TEST' _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = RPN_PRE_NMS_TOP_N # 12000,在做nms之前,最多保留的候选box数目 post_nms_topN = RPN_POST_NMS_TOP_N # 2000,做完nms之后,最多保留的box的数目 nms_thresh = RPN_NMS_THRESH # nms用参数,阈值是0.7 # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # (1, H, W, A) # import pdb # pdb.set_trace() scores = rpn_cls_prob[:, :, :, 1::2] rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) scores = scores.contiguous().view(-1, 1) # import pdb # pdb.set_trace() proposals = bbox_transform_inv(anchors.data.numpy(), rpn_bbox_pred.data.numpy()) proposals = clip_boxes(proposals, im_info[:2]) # np.where(anchors[:,0]==206) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] proposals = torch.from_numpy(proposals) # Non-maximal suppression if DEBUG: print('proposal size {} \n --> {}'.format(proposals.size(), proposals)) print('scores size {} \n --> {}'.format(scores.size(), scores)) keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] # Only support single image as input batch_inds = proposals.new_zeros(proposals.size(0), 1) blob = torch.cat((batch_inds, proposals), 1) return blob, scores
def main(): args = parse_args() with open('models/detection/%s/config.yml' % args.name, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) print('-' * 20) for key in config.keys(): print('%s: %s' % (key, str(config[key]))) print('-' * 20) cudnn.benchmark = True df = pd.read_csv('inputs/train.csv') img_paths = np.array('inputs/train_images/' + df['ImageId'].values + '.jpg') mask_paths = np.array('inputs/train_masks/' + df['ImageId'].values + '.jpg') labels = np.array( [convert_str_to_labels(s) for s in df['PredictionString']]) heads = OrderedDict([ ('hm', 1), ('reg', 2), ('depth', 1), ]) if config['rot'] == 'eular': heads['eular'] = 3 elif config['rot'] == 'trig': heads['trig'] = 6 elif config['rot'] == 'quat': heads['quat'] = 4 else: raise NotImplementedError if config['wh']: heads['wh'] = 2 pred_df = df.copy() pred_df['PredictionString'] = np.nan dets = {} kf = KFold(n_splits=config['n_splits'], shuffle=True, random_state=41) for fold, (train_idx, val_idx) in enumerate(kf.split(img_paths)): print('Fold [%d/%d]' % (fold + 1, config['n_splits'])) train_img_paths, val_img_paths = img_paths[train_idx], img_paths[ val_idx] train_mask_paths, val_mask_paths = mask_paths[train_idx], mask_paths[ val_idx] train_labels, val_labels = labels[train_idx], labels[val_idx] val_set = Dataset(val_img_paths, val_mask_paths, val_labels, input_w=config['input_w'], input_h=config['input_h'], transform=None, lhalf=config['lhalf']) val_loader = torch.utils.data.DataLoader( val_set, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'], # pin_memory=True, ) model = get_model(config['arch'], heads=heads, head_conv=config['head_conv'], num_filters=config['num_filters'], dcn=config['dcn'], gn=config['gn'], ws=config['ws'], freeze_bn=config['freeze_bn']) model = model.cuda() model_path = 'models/detection/%s/model_%d.pth' % (config['name'], fold + 1) if not os.path.exists(model_path): print('%s is not exists.' % model_path) continue model.load_state_dict(torch.load(model_path)) model.eval() outputs = {} with torch.no_grad(): pbar = tqdm(total=len(val_loader)) for i, batch in enumerate(val_loader): input = batch['input'].cuda() mask = batch['mask'].cuda() hm = batch['hm'].cuda() reg_mask = batch['reg_mask'].cuda() output = model(input) if args.hflip: output_hf = model(torch.flip(input, (-1, ))) output_hf['hm'] = torch.flip(output_hf['hm'], (-1, )) output_hf['reg'] = torch.flip(output_hf['reg'], (-1, )) output_hf['reg'][:, 0] = 1 - output_hf['reg'][:, 0] output_hf['depth'] = torch.flip(output_hf['depth'], (-1, )) if config['rot'] == 'trig': output_hf['trig'] = torch.flip(output_hf['trig'], (-1, )) yaw = torch.atan2(output_hf['trig'][:, 1], output_hf['trig'][:, 0]) yaw *= -1.0 output_hf['trig'][:, 0] = torch.cos(yaw) output_hf['trig'][:, 1] = torch.sin(yaw) roll = torch.atan2(output_hf['trig'][:, 5], output_hf['trig'][:, 4]) roll = rotate(roll, -np.pi) roll *= -1.0 roll = rotate(roll, np.pi) output_hf['trig'][:, 4] = torch.cos(roll) output_hf['trig'][:, 5] = torch.sin(roll) if config['wh']: output_hf['wh'] = torch.flip(output_hf['wh'], (-1, )) output['hm'] = (output['hm'] + output_hf['hm']) / 2 output['reg'] = (output['reg'] + output_hf['reg']) / 2 output['depth'] = (output['depth'] + output_hf['depth']) / 2 if config['rot'] == 'trig': output['trig'] = (output['trig'] + output_hf['trig']) / 2 if config['wh']: output['wh'] = (output['wh'] + output_hf['wh']) / 2 batch_det = decode( config, output['hm'], output['reg'], output['depth'], eular=output['eular'] if config['rot'] == 'eular' else None, trig=output['trig'] if config['rot'] == 'trig' else None, quat=output['quat'] if config['rot'] == 'quat' else None, wh=output['wh'] if config['wh'] else None, mask=mask, ) batch_det = batch_det.cpu().numpy() for k, det in enumerate(batch_det): img_id = os.path.splitext( os.path.basename(batch['img_path'][k]))[0] outputs[img_id] = { 'hm': output['hm'][k:k + 1].cpu(), 'reg': output['reg'][k:k + 1].cpu(), 'depth': output['depth'][k:k + 1].cpu(), 'eular': output['eular'][k:k + 1].cpu() if config['rot'] == 'eular' else None, 'trig': output['trig'][k:k + 1].cpu() if config['rot'] == 'trig' else None, 'quat': output['quat'][k:k + 1].cpu() if config['rot'] == 'quat' else None, 'wh': output['wh'][k:k + 1].cpu() if config['wh'] else None, 'mask': mask[k:k + 1].cpu(), } dets[img_id] = det.tolist() if args.nms: det = nms(det, dist_th=args.nms_th) pred_df.loc[pred_df.ImageId == img_id, 'PredictionString'] = convert_labels_to_str( det[det[:, 6] > args.score_th, :7]) if args.show: gt = batch['gt'].numpy()[k] img = cv2.imread(batch['img_path'][k]) img_gt = visualize(img, gt[gt[:, -1] > 0]) img_pred = visualize(img, det[det[:, 6] > args.score_th]) plt.subplot(121) plt.imshow(img_gt[..., ::-1]) plt.subplot(122) plt.imshow(img_pred[..., ::-1]) plt.show() pbar.update(1) pbar.close() torch.save(outputs, 'outputs/raw/val/%s_%d.pth' % (args.name, fold + 1)) torch.cuda.empty_cache() if not config['cv']: break with open('outputs/decoded/val/%s.json' % args.name, 'w') as f: json.dump(dets, f) name = '%s_%.2f' % (args.name, args.score_th) if args.nms: name += '_nms%.2f' % args.nms_th if args.hflip: name += '_hf' pred_df.to_csv('outputs/submissions/val/%s.csv' % name, index=False) print(pred_df.head())
def main(): args = parse_args() with open('models/pose/%s/config.yml' % args.pose_name, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) print('-' * 20) for key in config.keys(): print('%s: %s' % (key, str(config[key]))) print('-' * 20) cudnn.benchmark = True df = pd.read_csv('inputs/train.csv') img_ids = df['ImageId'].values img_paths = np.array('inputs/train_images/' + df['ImageId'].values + '.jpg') mask_paths = np.array('inputs/train_masks/' + df['ImageId'].values + '.jpg') labels = np.array( [convert_str_to_labels(s) for s in df['PredictionString']]) with open('outputs/decoded/val/%s.json' % args.det_name, 'r') as f: dets = json.load(f) if config['rot'] == 'eular': num_outputs = 3 elif config['rot'] == 'trig': num_outputs = 6 elif config['rot'] == 'quat': num_outputs = 4 else: raise NotImplementedError test_transform = Compose([ transforms.Resize(config['input_w'], config['input_h']), transforms.Normalize(), ToTensor(), ]) det_df = { 'ImageId': [], 'img_path': [], 'det': [], 'mask': [], } name = '%s_%.2f' % (args.det_name, args.score_th) if args.nms: name += '_nms%.2f' % args.nms_th output_dir = 'processed/pose_images/val/%s' % name os.makedirs(output_dir, exist_ok=True) df = [] kf = KFold(n_splits=config['n_splits'], shuffle=True, random_state=41) for fold, (train_idx, val_idx) in enumerate(kf.split(img_paths)): print('Fold [%d/%d]' % (fold + 1, config['n_splits'])) # create model model = get_pose_model(config['arch'], num_outputs=num_outputs, freeze_bn=config['freeze_bn']) model = model.cuda() model_path = 'models/pose/%s/model_%d.pth' % (config['name'], fold + 1) if not os.path.exists(model_path): print('%s is not exists.' % model_path) continue model.load_state_dict(torch.load(model_path)) model.eval() val_img_ids = img_ids[val_idx] val_img_paths = img_paths[val_idx] fold_det_df = { 'ImageId': [], 'img_path': [], 'det': [], 'mask': [], } for img_id, img_path in tqdm(zip(val_img_ids, val_img_paths), total=len(val_img_ids)): img = cv2.imread(img_path) height, width = img.shape[:2] det = np.array(dets[img_id]) det = det[det[:, 6] > args.score_th] if args.nms: det = nms(det, dist_th=args.nms_th) for k in range(len(det)): pitch, yaw, roll, x, y, z, score, w, h = det[k] fold_det_df['ImageId'].append(img_id) fold_det_df['det'].append(det[k]) output_path = '%s_%d.jpg' % (img_id, k) fold_det_df['img_path'].append(output_path) x, y = convert_3d_to_2d(x, y, z) w *= 1.1 h *= 1.1 xmin = int(round(x - w / 2)) xmax = int(round(x + w / 2)) ymin = int(round(y - h / 2)) ymax = int(round(y + h / 2)) cropped_img = img[ymin:ymax, xmin:xmax] if cropped_img.shape[0] > 0 and cropped_img.shape[1] > 0: cv2.imwrite(os.path.join(output_dir, output_path), cropped_img) fold_det_df['mask'].append(1) else: fold_det_df['mask'].append(0) fold_det_df = pd.DataFrame(fold_det_df) test_set = PoseDataset(output_dir + '/' + fold_det_df['img_path'].values, fold_det_df['det'].values, transform=test_transform, masks=fold_det_df['mask'].values) test_loader = torch.utils.data.DataLoader( test_set, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'], # pin_memory=True, ) fold_dets = [] with torch.no_grad(): for input, batch_det, mask in tqdm(test_loader, total=len(test_loader)): input = input.cuda() batch_det = batch_det.numpy() mask = mask.numpy() output = model(input) output = output.cpu() if config['rot'] == 'trig': yaw = torch.atan2(output[..., 1:2], output[..., 0:1]) pitch = torch.atan2(output[..., 3:4], output[..., 2:3]) roll = torch.atan2(output[..., 5:6], output[..., 4:5]) roll = rotate(roll, -np.pi) pitch = pitch.cpu().numpy()[:, 0] yaw = yaw.cpu().numpy()[:, 0] roll = roll.cpu().numpy()[:, 0] batch_det[mask, 0] = pitch[mask] batch_det[mask, 1] = yaw[mask] batch_det[mask, 2] = roll[mask] fold_dets.append(batch_det) fold_dets = np.vstack(fold_dets) fold_det_df['det'] = fold_dets.tolist() fold_det_df = fold_det_df.groupby('ImageId')['det'].apply(list) fold_det_df = pd.DataFrame({ 'ImageId': fold_det_df.index.values, 'PredictionString': fold_det_df.values, }) df.append(fold_det_df) break df = pd.concat(df).reset_index(drop=True) for i in tqdm(range(len(df))): img_id = df.loc[i, 'ImageId'] det = np.array(df.loc[i, 'PredictionString']) if args.show: img = cv2.imread('inputs/train_images/%s.jpg' % img_id) img_pred = visualize(img, det) plt.imshow(img_pred[..., ::-1]) plt.show() df.loc[i, 'PredictionString'] = convert_labels_to_str(det[:, :7]) name += '_%s' % args.pose_name df.to_csv('outputs/submissions/val/%s.csv' % name, index=False)