Python nmsの例、lib.utils.nms.nms Pythonの例

コード例 #1

0

ファイルを表示

ファイル: demo.py プロジェクト: YZcxy/my-faster-rcnn

def demo(sess, net, image_name):
    # 根据路径，使用opencv读取图片数据
    im_file = os.path.join(cfg.FLAGS2["data_dir"], 'demo', image_name)
    im = cv2.imread(im_file)

    # 进行目标检查
    timer = Timer()
    timer.tic()
    # 进行预测返回300个box的得分和位置
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(
        timer.total_time, boxes.shape[0]))

    # 每个类最高得分上图的阈值
    CONF_THRESH = 0.1
    # 每个类NMS阈值
    NMS_THRESH = 0.1
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # +1需要跳过背景
        # 获取到所有候选框对应这个分类的位置
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        # 获取到所有候选框对应这个分类的得分
        cls_scores = scores[:, cls_ind]
        # 合并所有的位置和得分，(x1,y1,x2,y2,score)
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        # 通过非极大值抑制保留0.1的候选框以及得分
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        # 上图
        vis_detections(im, cls, dets, thresh=CONF_THRESH)

コード例 #2

0

ファイルを表示

def demo(net, matlab, image_filepath, classes, method, par1, par2):
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    # Load pre-computed Selected Search object proposals
    obj_proposals = ROI_boxes(matlab, image_filepath, method, par1, par2)
    global OP_num
    OP_num = len(obj_proposals)
    if len(obj_proposals)==0:
        dets = []
        timer.toc()
        return dets, timer.total_time

    # Load the demo image
    im = cv2.imread(image_filepath)
    scores, boxes = im_detect(net, im, obj_proposals)
    timer.toc()

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in classes:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        cls_boxes = cls_boxes[keep, :]
        cls_scores = cls_scores[keep]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
    return dets, timer.total_time

コード例 #3

0

ファイルを表示

ファイル: proposal_layer.py プロジェクト: YZcxy/my-faster-rcnn

def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    if cfg_key == "TRAIN":
        pre_nms_topN = cfg.FLAGS.rpn_train_pre_nms_top_n #12000
        post_nms_topN = cfg.FLAGS.rpn_train_post_nms_top_n #2000
        nms_thresh = cfg.FLAGS.rpn_train_nms_thresh #0.7
    else:
        pre_nms_topN = cfg.FLAGS.rpn_test_pre_nms_top_n #6000
        post_nms_topN = cfg.FLAGS.rpn_test_post_nms_top_n #300
        nms_thresh = cfg.FLAGS.rpn_test_nms_thresh #0.7
    # 因为我们的输入是(1,3)维的
    im_info = im_info[0]
    # 1 * H * W * 9 其他维度不变，取18元素后9个为前景得分
    scores = rpn_cls_prob[:, :, :, num_anchors:]  # 9
    # 9WH * 4 个偏移量
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    # 9WH 个得分
    scores = scores.reshape((-1, 1))
    # 通过偏移量对anchor进行调整，得到proposals
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    # 修剪proposal，将超出边界的proposal修剪到图片范围内
    proposals = clip_boxes(proposals, im_info[:2])  # im_info[:2] 表示 宽和高

    # 得分从大到小排序，存储的为坐标
    order = scores.ravel().argsort()[::-1]
    # 根据坐标，筛选出前12000个proposals和scores
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 非极大值抑制
    keep = nms(np.hstack((proposals, scores)), nms_thresh)  # np.hstack((proposals, scores) [x1,y1,x2,y2,score]

    # 因为keep已经排过序了，直接取前2000个
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 给proposal叠加一个维度，第一列全是0.0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores

コード例 #4

0

ファイルを表示

def demo(net, matlab, image_filepath, classes, args):
    """Detect object classes in an image using pre-computed object proposals."""
    timer = Timer()
    timer.tic()
    # Load pre-computed Selected Search object proposals
    obj_proposals = ROI_boxes(matlab, image_filepath, args.OP_method)
    if len(obj_proposals) == 0:
        return

    # Load the demo image
    im = cv2.imread(image_filepath)

    # Detect all object classes and regress object bounds

    scores, boxes = im_detect(net, im, obj_proposals)
    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in classes:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        cls_boxes = cls_boxes[keep, :]
        cls_scores = cls_scores[keep]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        if (len(dets) == 0):
            global count
            count += 1
            print('{} No Ear detected').format(count)
        # print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls,
        #                                                             CONF_THRESH)
        if args.video_mode:
            visualise(im, cls, dets, thresh=CONF_THRESH)
        elif args.image_path is not None:
            vis_detections(im, cls, dets, thresh=CONF_THRESH)

コード例 #5

0

ファイルを表示

ファイル: ear_recog_demo.py プロジェクト: harrysocool/ear_recognition

def demo(net, matlab, image_filepath, classes, args):
    """Detect object classes in an image using pre-computed object proposals."""
    timer = Timer()
    timer.tic()
    # Load pre-computed Selected Search object proposals
    obj_proposals = ROI_boxes(matlab, image_filepath, args.OP_method)
    if len(obj_proposals)==0:
        return

    # Load the demo image
    im = cv2.imread(image_filepath)

    # Detect all object classes and regress object bounds

    scores, boxes = im_detect(net, im, obj_proposals)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in classes:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        cls_boxes = cls_boxes[keep, :]
        cls_scores = cls_scores[keep]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        if (len(dets) == 0):
            global count
            count += 1
            print('{} No Ear detected').format(count)
        # print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls,
        #                                                             CONF_THRESH)
        if args.video_mode:
            visualise(im, cls, dets, thresh=CONF_THRESH)
        elif args.image_path is not None:
            vis_detections(im, cls, dets, thresh=CONF_THRESH)

コード例 #6

0

ファイルを表示

ファイル: demo.py プロジェクト: aggreyeric/Text-Detection-using-CNN

def ctpn(sess, net, image_name):
    img = cv2.imread(image_name)
    im = check_img(img)
    timer = Timer()
    timer.tic()
    scores, boxes = test_ctpn(sess, net, im)
    timer.toc()
    print(('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0]))

    # Visualize detections for each class
    CONF_THRESH = 0.9
    NMS_THRESH = 0.3
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32)
    keep = nms(dets, NMS_THRESH)
    dets = dets[keep, :]
    
    keep = np.where(dets[:, 4] >= 0.7)[0]
    dets = dets[keep, :]
    line = connect_proposal(dets[:, 0:4], dets[:, 4], im.shape)
    save_results(image_name, im, line,thresh=0.9)

コード例 #7

0

ファイルを表示

ファイル: demo.py プロジェクト: harrysocool/ear_recognition

def demo(net, image_name, classes):

    """Detect object classes in an image using pre-computed object proposals."""

    # Load pre-computed Selected Search object proposals
    box_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo',
                            image_name + '_boxes.mat')
    obj_proposals = sio.loadmat(box_file)['boxes']

    # Load the demo image
    im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '.jpg')
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im, obj_proposals)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in classes:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        cls_boxes = cls_boxes[keep, :]
        cls_scores = cls_scores[keep]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls,
                                                                    CONF_THRESH)
        vis_detections(im, cls, dets, thresh=CONF_THRESH)

コード例 #8

0

ファイルを表示

ファイル: demo.py プロジェクト: caomw/ear_recognition

def demo(net, image_name, classes):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load pre-computed Selected Search object proposals
    box_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo',
                            image_name + '_boxes.mat')
    obj_proposals = sio.loadmat(box_file)['boxes']

    # Load the demo image
    im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '.jpg')
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im, obj_proposals)
    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in classes:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        cls_boxes = cls_boxes[keep, :]
        cls_scores = cls_scores[keep]
        dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        print 'All {} detections with p({} | box) >= {:.1f}'.format(
            cls, cls, CONF_THRESH)
        vis_detections(im, cls, dets, thresh=CONF_THRESH)

コード例 #9

0

ファイルを表示

ファイル: ensemble_val.py プロジェクト: kento-forest/kaggle-pku-autonomous-driving

def main():
    config = vars(parse_args())

    if config['name'] is None:
        config['name'] = 'ensemble_%s' % datetime.now().strftime('%m%d%H')

    if os.path.exists('models/detection/%s/config.yml' % config['name']):
        with open('models/detection/%s/config.yml' % config['name'], 'r') as f:
            config = yaml.load(f, Loader=yaml.FullLoader)
    else:
        config['models'] = config['models'].split(',')

    if not os.path.exists('models/detection/%s' % config['name']):
        os.makedirs('models/detection/%s' % config['name'])

    with open('models/detection/%s/config.yml' % config['name'], 'w') as f:
        yaml.dump(config, f)

    print('-'*20)
    for key in config.keys():
        print('%s: %s' % (key, str(config[key])))
    print('-'*20)

    with open('models/detection/%s/config.yml' % config['models'][0], 'r') as f:
        model_config = yaml.load(f, Loader=yaml.FullLoader)

    df = pd.read_csv('inputs/train.csv')
    img_paths = np.array('inputs/train_images/' + df['ImageId'].values + '.jpg')
    img_ids = df['ImageId'].values
    mask_paths = np.array('inputs/train_masks/' + df['ImageId'].values + '.jpg')
    labels = np.array([convert_str_to_labels(s, names=['yaw', 'pitch', 'roll',
                       'x', 'y', 'z', 'score']) for s in df['PredictionString']])

    dets = {}
    kf = KFold(n_splits=model_config['n_splits'], shuffle=True, random_state=41)
    for fold, (train_idx, val_idx) in enumerate(kf.split(img_paths)):
        val_img_ids = img_ids[val_idx]

        if os.path.exists('outputs/raw/val/%s.pth' %config['name']):
            merged_outputs = torch.load('outputs/raw/val/%s.pth' %config['name'])

        else:
            merged_outputs = {}
            for img_id in tqdm(val_img_ids, total=len(val_img_ids)):
                output = {
                    'hm': 0,
                    'reg': 0,
                    'depth': 0,
                    'eular': 0 if model_config['rot'] == 'eular' else None,
                    'trig': 0 if model_config['rot'] == 'trig' else None,
                    'quat': 0 if model_config['rot'] == 'quat' else None,
                    'wh': 0 if model_config['wh'] else None,
                    'mask': 0,
                }

                merged_outputs[img_id] = output

            for model_name in config['models']:
                outputs = torch.load('outputs/raw/val/%s_%d.pth' %(model_name, fold + 1))

                for img_id in tqdm(val_img_ids, total=len(val_img_ids)):
                    output = outputs[img_id]

                    merged_outputs[img_id]['hm'] += output['hm'] / len(config['models'])
                    merged_outputs[img_id]['reg'] += output['reg'] / len(config['models'])
                    merged_outputs[img_id]['depth'] += output['depth'] / len(config['models'])
                    merged_outputs[img_id]['trig'] += output['trig'] / len(config['models'])
                    merged_outputs[img_id]['wh'] += output['wh'] / len(config['models'])
                    merged_outputs[img_id]['mask'] += output['mask'] / len(config['models'])

            torch.save(merged_outputs, 'outputs/raw/val/%s_%d.pth' %(config['name'], fold + 1))

        # decode
        for img_id in tqdm(val_img_ids, total=len(val_img_ids)):
            output = merged_outputs[img_id]

            det = decode(
                model_config,
                output['hm'],
                output['reg'],
                output['depth'],
                eular=output['eular'] if model_config['rot'] == 'eular' else None,
                trig=output['trig'] if model_config['rot'] == 'trig' else None,
                quat=output['quat'] if model_config['rot'] == 'quat' else None,
                wh=output['wh'] if model_config['wh'] else None,
                mask=output['mask'],
            )
            det = det.numpy()[0]

            dets[img_id] = det.tolist()

            if config['nms']:
                det = nms(det, dist_th=config['nms_th'])

            if np.sum(det[:, 6] > config['score_th']) >= config['min_samples']:
                det = det[det[:, 6] > config['score_th']]
            else:
                det = det[:config['min_samples']]

            if config['show']:
                img = cv2.imread('inputs/train_images/%s.jpg' %img_id)
                img_pred = visualize(img, det)
                plt.imshow(img_pred[..., ::-1])
                plt.show()

            df.loc[df.ImageId == img_id, 'PredictionString'] = convert_labels_to_str(det[:, :7])

    with open('outputs/decoded/val/%s.json' %config['name'], 'w') as f:
        json.dump(dets, f)

    df.to_csv('outputs/submissions/val/%s.csv' %config['name'], index=False)

コード例 #10

0

ファイルを表示

def main():
    args = parse_args()
    args.uncropped = True

    with open('models/detection/%s/config.yml' % args.name, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    # config["tvec"] = False
    print('-'*20)
    for key in config.keys():
        print('%s: %s' % (key, str(config[key])))
    print('-'*20)

    cudnn.benchmark = False

    df = pd.read_csv('inputs/sample_submission.csv')
    img_ids = df['ImageId'].values
    img_paths = np.array('inputs/test_images/' + df['ImageId'].values + '.jpg')
    mask_paths = np.array('inputs/test_masks/' + df['ImageId'].values + '.jpg')
    labels = np.array([convert_str_to_labels(s, names=['yaw', 'pitch', 'roll',
                       'x', 'y', 'z', 'score']) for s in df['PredictionString']])

    if not args.uncropped:
        cropped_img_ids = pd.read_csv('inputs/testset_cropped_imageids.csv')['ImageId'].values
        for i, img_id in enumerate(img_ids):
            if img_id in cropped_img_ids:
                img_paths[i] = 'inputs/test_images_uncropped/' + img_id + '.jpg'
                mask_paths[i] = 'inputs/test_masks_uncropped/' + img_id + '.jpg'

    test_set = Dataset(
        img_paths,
        mask_paths,
        labels,
        input_w=config['input_w'],
        input_h=config['input_h'],
        transform=None,
        test=True,
        lhalf=config['lhalf'])
    test_loader = torch.utils.data.DataLoader(
        test_set,
        batch_size=16,
        shuffle=False,
        num_workers=0,
        # num_workers=config['num_workers'],
        # pin_memory=True,
    )

    heads = OrderedDict([
        ('hm', 1),
        ('reg', 2),
        ('depth', 1),
    ])

    if config['rot'] == 'eular':
        heads['eular'] = 3
    elif config['rot'] == 'trig':
        heads['trig'] = 6
    elif config['rot'] == 'quat':
        heads['quat'] = 4
    else:
        raise NotImplementedError

    if config['wh']:
        heads['wh'] = 2
    
    if config['tvec']:
        heads['tvec'] = 3

    name = args.name
    if args.uncropped:
        name += '_uncropped'
    if args.hflip:
        name += '_hf'

    if os.path.exists('outputs/raw/test/%s.pth' %name):
        merged_outputs = torch.load('outputs/raw/test/%s.pth' %name)

    else:
        merged_outputs = {}
        for i in tqdm(range(len(df))):
            img_id = df.loc[i, 'ImageId']

            output = {
                'hm': 0,
                'reg': 0,
                'depth': 0,
                'eular': 0 if config['rot'] == 'eular' else None,
                'trig': 0 if config['rot'] == 'trig' else None,
                'quat': 0 if config['rot'] == 'quat' else None,
                'wh': 0 if config['wh'] else None,
                'tvec': 0 if config['tvec'] else None,
            }

            merged_outputs[img_id] = output

        preds = []
        for fold in range(config['n_splits']):
            print('Fold [%d/%d]' %(fold + 1, config['n_splits']))

            model = get_model(config['arch'], heads=heads,
                              head_conv=config['head_conv'],
                              num_filters=config['num_filters'],
                              dcn=config['dcn'],
                              gn=config['gn'], ws=config['ws'],
                              freeze_bn=config['freeze_bn'])
            model = model.cuda()

            model_path = 'models/detection/%s/model_%d.pth' % (config['name'], fold+1)
            if not os.path.exists(model_path):
                print('%s is not exists.' %model_path)
                continue
            model.load_state_dict(torch.load(model_path))

            model.eval()

            preds_fold = []
            outputs_fold = {}
            with torch.no_grad():
                pbar = tqdm(total=len(test_loader))
                for i, batch in enumerate(test_loader):
                    input = batch['input'].cuda()
                    mask = batch['mask'].cuda()

                    output = model(input)
                    # print(output)

                    if args.hflip:
                        output_hf = model(torch.flip(input, (-1,)))
                        output_hf['hm'] = torch.flip(output_hf['hm'], (-1,))
                        output_hf['reg'] = torch.flip(output_hf['reg'], (-1,))
                        output_hf['reg'][:, 0] = 1 - output_hf['reg'][:, 0]
                        output_hf['depth'] = torch.flip(output_hf['depth'], (-1,))
                        if config['rot'] == 'trig':
                            output_hf['trig'] = torch.flip(output_hf['trig'], (-1,))
                            yaw = torch.atan2(output_hf['trig'][:, 1], output_hf['trig'][:, 0])
                            yaw *= -1.0
                            output_hf['trig'][:, 0] = torch.cos(yaw)
                            output_hf['trig'][:, 1] = torch.sin(yaw)
                            roll = torch.atan2(output_hf['trig'][:, 5], output_hf['trig'][:, 4])
                            roll = rotate(roll, -np.pi)
                            roll *= -1.0
                            roll = rotate(roll, np.pi)
                            output_hf['trig'][:, 4] = torch.cos(roll)
                            output_hf['trig'][:, 5] = torch.sin(roll)

                        if config['wh']:
                            output_hf['wh'] = torch.flip(output_hf['wh'], (-1,))
                        
                        if config['tvec']:
                            output_hf['tvec'] = torch.flip(output_hf['tvec'], (-1,))
                            output_hf['tvec'][:, 0] *= -1.0

                        output['hm'] = (output['hm'] + output_hf['hm']) / 2
                        output['reg'] = (output['reg'] + output_hf['reg']) / 2
                        output['depth'] = (output['depth'] + output_hf['depth']) / 2
                        if config['rot'] == 'trig':
                            output['trig'] = (output['trig'] + output_hf['trig']) / 2
                        if config['wh']:
                            output['wh'] = (output['wh'] + output_hf['wh']) / 2
                        if config['tvec']:
                            output['tvec'] = (output['tvec'] + output_hf['tvec']) / 2

                    for b in range(len(batch['img_path'])):
                        img_id = os.path.splitext(os.path.basename(batch['img_path'][b]))[0]

                        outputs_fold[img_id] = {
                            'hm': output['hm'][b:b+1].cpu(),
                            'reg': output['reg'][b:b+1].cpu(),
                            'depth': output['depth'][b:b+1].cpu(),
                            'eular': output['eular'][b:b+1].cpu() if config['rot'] == 'eular' else None,
                            'trig': output['trig'][b:b+1].cpu() if config['rot'] == 'trig' else None,
                            'quat': output['quat'][b:b+1].cpu() if config['rot'] == 'quat' else None,
                            'wh': output['wh'][b:b+1].cpu() if config['wh'] else None,
                            'tvec': output['tvec'][b:b+1].cpu() if config['tvec'] else None,
                            'mask': mask[b:b+1].cpu(),
                        }

                        merged_outputs[img_id]['hm'] += outputs_fold[img_id]['hm'] / config['n_splits']
                        merged_outputs[img_id]['reg'] += outputs_fold[img_id]['reg'] / config['n_splits']
                        merged_outputs[img_id]['depth'] += outputs_fold[img_id]['depth'] / config['n_splits']
                        if config['rot'] == 'eular':
                            merged_outputs[img_id]['eular'] += outputs_fold[img_id]['eular'] / config['n_splits']
                        if config['rot'] == 'trig':
                            merged_outputs[img_id]['trig'] += outputs_fold[img_id]['trig'] / config['n_splits']
                        if config['rot'] == 'quat':
                            merged_outputs[img_id]['quat'] += outputs_fold[img_id]['quat'] / config['n_splits']
                        if config['wh']:
                            merged_outputs[img_id]['wh'] += outputs_fold[img_id]['wh'] / config['n_splits']
                        if config['tvec']:
                            merged_outputs[img_id]['tvec'] += outputs_fold[img_id]['tvec'] / config['n_splits']
                        merged_outputs[img_id]['mask'] = outputs_fold[img_id]['mask']

                    batch_det = decode(
                        config,
                        output['hm'],
                        output['reg'],
                        output['depth'],
                        eular=output['eular'] if config['rot'] == 'eular' else None,
                        trig=output['trig'] if config['rot'] == 'trig' else None,
                        quat=output['quat'] if config['rot'] == 'quat' else None,
                        wh=output['wh'] if config['wh'] else None,
                        tvec=output['tvec'] if config['tvec'] else None,
                        mask=mask,
                    )
                    batch_det = batch_det.cpu().numpy()

                    for k, det in enumerate(batch_det):
                        if args.nms:
                            det = nms(det, dist_th=args.nms_th)
                        preds_fold.append(convert_labels_to_str(det[det[:, 6] > args.score_th, :7]))

                        if args.show and not config['cv']:
                            img = cv2.imread(batch['img_path'][k])
                            img_pred = visualize(img, det[det[:, 6] > args.score_th])
                            plt.imshow(img_pred[..., ::-1])
                            plt.show()

                    pbar.update(1)
                pbar.close()

            if not config['cv']:
                df['PredictionString'] = preds_fold
                name = '%s_1_%.2f' %(args.name, args.score_th)
                if args.uncropped:
                    name += '_uncropped'
                if args.nms:
                    name += '_nms%.2f' %args.nms_th
                df.to_csv('outputs/submissions/test/%s.csv' %name, index=False)
                return

        if not args.uncropped:
            # ensemble duplicate images
            dup_df = pd.read_csv('processed/test_image_hash.csv')
            dups = dup_df.hash.value_counts()
            dups = dups.loc[dups>1]

            for i in range(len(dups)):
                img_ids = dup_df[dup_df.hash == dups.index[i]].ImageId

                output = {
                    'hm': 0,
                    'reg': 0,
                    'depth': 0,
                    'eular': 0 if config['rot'] == 'eular' else None,
                    'trig': 0 if config['rot'] == 'trig' else None,
                    'quat': 0 if config['rot'] == 'quat' else None,
                    'wh': 0 if config['wh'] else None,
                    'tvec': 0 if config['tvec'] else None,
                    'mask': 0,
                }
                for img_id in img_ids:
                    if img_id in cropped_img_ids:
                        print('fooo')
                    output['hm'] += merged_outputs[img_id]['hm'] / len(img_ids)
                    output['reg'] += merged_outputs[img_id]['reg'] / len(img_ids)
                    output['depth'] += merged_outputs[img_id]['depth'] / len(img_ids)
                    if config['rot'] == 'eular':
                        output['eular'] += merged_outputs[img_id]['eular'] / len(img_ids)
                    if config['rot'] == 'trig':
                        output['trig'] += merged_outputs[img_id]['trig'] / len(img_ids)
                    if config['rot'] == 'quat':
                        output['quat'] += merged_outputs[img_id]['quat'] / len(img_ids)
                    if config['wh']:
                        output['wh'] += merged_outputs[img_id]['wh'] / len(img_ids)
                    if config['tvec']:
                        output['tvec'] += merged_outputs[img_id]['tvec'] / len(img_ids)
                    output['mask'] += merged_outputs[img_id]['mask'] / len(img_ids)

                for img_id in img_ids:
                    merged_outputs[img_id] = output

        torch.save(merged_outputs, 'outputs/raw/test/%s.pth' %name)

    # decode
    dets = {}
    for i in tqdm(range(len(df))):
        img_id = df.loc[i, 'ImageId']

        output = merged_outputs[img_id]

        det = decode(
            config,
            output['hm'],
            output['reg'],
            output['depth'],
            eular=output['eular'] if config['rot'] == 'eular' else None,
            trig=output['trig'] if config['rot'] == 'trig' else None,
            quat=output['quat'] if config['rot'] == 'quat' else None,
            wh=output['wh'] if config['wh'] else None,
            tvec=output['tvec'] if config['tvec'] else None,
            mask=output['mask'],
        )
        det = det.numpy()[0]

        dets[img_id] = det.tolist()

        if args.nms:
            det = nms(det, dist_th=args.nms_th)

        if np.sum(det[:, 6] > args.score_th) >= args.min_samples:
            det = det[det[:, 6] > args.score_th]
        else:
            det = det[:args.min_samples]

        if args.show:
            img = cv2.imread('inputs/test_images/%s.jpg' %img_id)
            img_pred = visualize(img, det)
            plt.imshow(img_pred[..., ::-1])
            plt.show()

        df.loc[i, 'PredictionString'] = convert_labels_to_str(det[:, :7])

    with open('outputs/decoded/test/%s.json' %name, 'w') as f:
        json.dump(dets, f)

    name = '%s_%.2f' %(args.name, args.score_th)
    if args.uncropped:
        name += '_uncropped'
    if args.nms:
        name += '_nms%.2f' %args.nms_th
    if args.hflip:
        name += '_hf'
    if args.min_samples > 0:
        name += '_min%d' %args.min_samples
    df.to_csv('outputs/submissions/test/%s.csv' %name, index=False)

コード例 #11

0

ファイルを表示

def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, num_anchors):
    """
    Parameters
    ----------
    rpn_cls_prob: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg
                         NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
    rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN
    im_info: a list of [image_height, image_width, scale_ratios]
    cfg_key: 'TRAIN' or 'TEST'
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)

    """

    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')

    pre_nms_topN = RPN_PRE_NMS_TOP_N  # 12000,在做nms之前，最多保留的候选box数目
    post_nms_topN = RPN_POST_NMS_TOP_N  # 2000，做完nms之后，最多保留的box的数目
    nms_thresh = RPN_NMS_THRESH  #  nms用参数，阈值是0.7

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    # (1, H, W, A)
    # import pdb
    # pdb.set_trace()

    scores = rpn_cls_prob[:, :, :, 1::2]
    rpn_bbox_pred = rpn_bbox_pred.view((-1, 4))
    scores = scores.contiguous().view(-1, 1)
    # import pdb
    # pdb.set_trace()
    proposals = bbox_transform_inv(anchors.data.numpy(),
                                   rpn_bbox_pred.data.numpy())
    proposals = clip_boxes(proposals, im_info[:2])

    # np.where(anchors[:,0]==206)
    # Pick the top region proposals
    scores, order = scores.view(-1).sort(descending=True)
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
        scores = scores[:pre_nms_topN].view(-1, 1)
    proposals = proposals[order.data, :]
    proposals = torch.from_numpy(proposals)

    # Non-maximal suppression
    if DEBUG:
        print('proposal size {} \n --> {}'.format(proposals.size(), proposals))
        print('scores size {} \n --> {}'.format(scores.size(), scores))
    keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh)

    # Pick th top region proposals after NMS
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep, ]

    # Only support single image as input
    batch_inds = proposals.new_zeros(proposals.size(0), 1)
    blob = torch.cat((batch_inds, proposals), 1)

    return blob, scores

コード例 #12

0

ファイルを表示

def main():
    args = parse_args()

    with open('models/detection/%s/config.yml' % args.name, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    print('-' * 20)
    for key in config.keys():
        print('%s: %s' % (key, str(config[key])))
    print('-' * 20)

    cudnn.benchmark = True

    df = pd.read_csv('inputs/train.csv')
    img_paths = np.array('inputs/train_images/' + df['ImageId'].values +
                         '.jpg')
    mask_paths = np.array('inputs/train_masks/' + df['ImageId'].values +
                          '.jpg')
    labels = np.array(
        [convert_str_to_labels(s) for s in df['PredictionString']])

    heads = OrderedDict([
        ('hm', 1),
        ('reg', 2),
        ('depth', 1),
    ])

    if config['rot'] == 'eular':
        heads['eular'] = 3
    elif config['rot'] == 'trig':
        heads['trig'] = 6
    elif config['rot'] == 'quat':
        heads['quat'] = 4
    else:
        raise NotImplementedError

    if config['wh']:
        heads['wh'] = 2

    pred_df = df.copy()
    pred_df['PredictionString'] = np.nan

    dets = {}
    kf = KFold(n_splits=config['n_splits'], shuffle=True, random_state=41)
    for fold, (train_idx, val_idx) in enumerate(kf.split(img_paths)):
        print('Fold [%d/%d]' % (fold + 1, config['n_splits']))

        train_img_paths, val_img_paths = img_paths[train_idx], img_paths[
            val_idx]
        train_mask_paths, val_mask_paths = mask_paths[train_idx], mask_paths[
            val_idx]
        train_labels, val_labels = labels[train_idx], labels[val_idx]

        val_set = Dataset(val_img_paths,
                          val_mask_paths,
                          val_labels,
                          input_w=config['input_w'],
                          input_h=config['input_h'],
                          transform=None,
                          lhalf=config['lhalf'])
        val_loader = torch.utils.data.DataLoader(
            val_set,
            batch_size=config['batch_size'],
            shuffle=False,
            num_workers=config['num_workers'],
            # pin_memory=True,
        )

        model = get_model(config['arch'],
                          heads=heads,
                          head_conv=config['head_conv'],
                          num_filters=config['num_filters'],
                          dcn=config['dcn'],
                          gn=config['gn'],
                          ws=config['ws'],
                          freeze_bn=config['freeze_bn'])
        model = model.cuda()

        model_path = 'models/detection/%s/model_%d.pth' % (config['name'],
                                                           fold + 1)
        if not os.path.exists(model_path):
            print('%s is not exists.' % model_path)
            continue
        model.load_state_dict(torch.load(model_path))

        model.eval()

        outputs = {}

        with torch.no_grad():
            pbar = tqdm(total=len(val_loader))
            for i, batch in enumerate(val_loader):
                input = batch['input'].cuda()
                mask = batch['mask'].cuda()
                hm = batch['hm'].cuda()
                reg_mask = batch['reg_mask'].cuda()

                output = model(input)

                if args.hflip:
                    output_hf = model(torch.flip(input, (-1, )))
                    output_hf['hm'] = torch.flip(output_hf['hm'], (-1, ))
                    output_hf['reg'] = torch.flip(output_hf['reg'], (-1, ))
                    output_hf['reg'][:, 0] = 1 - output_hf['reg'][:, 0]
                    output_hf['depth'] = torch.flip(output_hf['depth'], (-1, ))
                    if config['rot'] == 'trig':
                        output_hf['trig'] = torch.flip(output_hf['trig'],
                                                       (-1, ))
                        yaw = torch.atan2(output_hf['trig'][:, 1],
                                          output_hf['trig'][:, 0])
                        yaw *= -1.0
                        output_hf['trig'][:, 0] = torch.cos(yaw)
                        output_hf['trig'][:, 1] = torch.sin(yaw)
                        roll = torch.atan2(output_hf['trig'][:, 5],
                                           output_hf['trig'][:, 4])
                        roll = rotate(roll, -np.pi)
                        roll *= -1.0
                        roll = rotate(roll, np.pi)
                        output_hf['trig'][:, 4] = torch.cos(roll)
                        output_hf['trig'][:, 5] = torch.sin(roll)

                    if config['wh']:
                        output_hf['wh'] = torch.flip(output_hf['wh'], (-1, ))

                    output['hm'] = (output['hm'] + output_hf['hm']) / 2
                    output['reg'] = (output['reg'] + output_hf['reg']) / 2
                    output['depth'] = (output['depth'] +
                                       output_hf['depth']) / 2
                    if config['rot'] == 'trig':
                        output['trig'] = (output['trig'] +
                                          output_hf['trig']) / 2
                    if config['wh']:
                        output['wh'] = (output['wh'] + output_hf['wh']) / 2

                batch_det = decode(
                    config,
                    output['hm'],
                    output['reg'],
                    output['depth'],
                    eular=output['eular']
                    if config['rot'] == 'eular' else None,
                    trig=output['trig'] if config['rot'] == 'trig' else None,
                    quat=output['quat'] if config['rot'] == 'quat' else None,
                    wh=output['wh'] if config['wh'] else None,
                    mask=mask,
                )
                batch_det = batch_det.cpu().numpy()

                for k, det in enumerate(batch_det):
                    img_id = os.path.splitext(
                        os.path.basename(batch['img_path'][k]))[0]

                    outputs[img_id] = {
                        'hm':
                        output['hm'][k:k + 1].cpu(),
                        'reg':
                        output['reg'][k:k + 1].cpu(),
                        'depth':
                        output['depth'][k:k + 1].cpu(),
                        'eular':
                        output['eular'][k:k + 1].cpu()
                        if config['rot'] == 'eular' else None,
                        'trig':
                        output['trig'][k:k + 1].cpu()
                        if config['rot'] == 'trig' else None,
                        'quat':
                        output['quat'][k:k + 1].cpu()
                        if config['rot'] == 'quat' else None,
                        'wh':
                        output['wh'][k:k + 1].cpu() if config['wh'] else None,
                        'mask':
                        mask[k:k + 1].cpu(),
                    }

                    dets[img_id] = det.tolist()
                    if args.nms:
                        det = nms(det, dist_th=args.nms_th)
                    pred_df.loc[pred_df.ImageId == img_id,
                                'PredictionString'] = convert_labels_to_str(
                                    det[det[:, 6] > args.score_th, :7])

                    if args.show:
                        gt = batch['gt'].numpy()[k]

                        img = cv2.imread(batch['img_path'][k])
                        img_gt = visualize(img, gt[gt[:, -1] > 0])
                        img_pred = visualize(img,
                                             det[det[:, 6] > args.score_th])

                        plt.subplot(121)
                        plt.imshow(img_gt[..., ::-1])
                        plt.subplot(122)
                        plt.imshow(img_pred[..., ::-1])
                        plt.show()

                pbar.update(1)
            pbar.close()

        torch.save(outputs,
                   'outputs/raw/val/%s_%d.pth' % (args.name, fold + 1))

        torch.cuda.empty_cache()

        if not config['cv']:
            break

    with open('outputs/decoded/val/%s.json' % args.name, 'w') as f:
        json.dump(dets, f)

    name = '%s_%.2f' % (args.name, args.score_th)
    if args.nms:
        name += '_nms%.2f' % args.nms_th
    if args.hflip:
        name += '_hf'
    pred_df.to_csv('outputs/submissions/val/%s.csv' % name, index=False)
    print(pred_df.head())

コード例 #13

0

ファイルを表示

ファイル: pose_val.py プロジェクト: kento-forest/kaggle-pku-autonomous-driving

def main():
    args = parse_args()

    with open('models/pose/%s/config.yml' % args.pose_name, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    print('-' * 20)
    for key in config.keys():
        print('%s: %s' % (key, str(config[key])))
    print('-' * 20)

    cudnn.benchmark = True

    df = pd.read_csv('inputs/train.csv')
    img_ids = df['ImageId'].values
    img_paths = np.array('inputs/train_images/' + df['ImageId'].values +
                         '.jpg')
    mask_paths = np.array('inputs/train_masks/' + df['ImageId'].values +
                          '.jpg')
    labels = np.array(
        [convert_str_to_labels(s) for s in df['PredictionString']])
    with open('outputs/decoded/val/%s.json' % args.det_name, 'r') as f:
        dets = json.load(f)

    if config['rot'] == 'eular':
        num_outputs = 3
    elif config['rot'] == 'trig':
        num_outputs = 6
    elif config['rot'] == 'quat':
        num_outputs = 4
    else:
        raise NotImplementedError

    test_transform = Compose([
        transforms.Resize(config['input_w'], config['input_h']),
        transforms.Normalize(),
        ToTensor(),
    ])

    det_df = {
        'ImageId': [],
        'img_path': [],
        'det': [],
        'mask': [],
    }

    name = '%s_%.2f' % (args.det_name, args.score_th)
    if args.nms:
        name += '_nms%.2f' % args.nms_th

    output_dir = 'processed/pose_images/val/%s' % name
    os.makedirs(output_dir, exist_ok=True)

    df = []
    kf = KFold(n_splits=config['n_splits'], shuffle=True, random_state=41)
    for fold, (train_idx, val_idx) in enumerate(kf.split(img_paths)):
        print('Fold [%d/%d]' % (fold + 1, config['n_splits']))

        # create model
        model = get_pose_model(config['arch'],
                               num_outputs=num_outputs,
                               freeze_bn=config['freeze_bn'])
        model = model.cuda()

        model_path = 'models/pose/%s/model_%d.pth' % (config['name'], fold + 1)
        if not os.path.exists(model_path):
            print('%s is not exists.' % model_path)
            continue
        model.load_state_dict(torch.load(model_path))

        model.eval()

        val_img_ids = img_ids[val_idx]
        val_img_paths = img_paths[val_idx]

        fold_det_df = {
            'ImageId': [],
            'img_path': [],
            'det': [],
            'mask': [],
        }

        for img_id, img_path in tqdm(zip(val_img_ids, val_img_paths),
                                     total=len(val_img_ids)):
            img = cv2.imread(img_path)
            height, width = img.shape[:2]

            det = np.array(dets[img_id])
            det = det[det[:, 6] > args.score_th]
            if args.nms:
                det = nms(det, dist_th=args.nms_th)

            for k in range(len(det)):
                pitch, yaw, roll, x, y, z, score, w, h = det[k]

                fold_det_df['ImageId'].append(img_id)
                fold_det_df['det'].append(det[k])
                output_path = '%s_%d.jpg' % (img_id, k)
                fold_det_df['img_path'].append(output_path)

                x, y = convert_3d_to_2d(x, y, z)
                w *= 1.1
                h *= 1.1
                xmin = int(round(x - w / 2))
                xmax = int(round(x + w / 2))
                ymin = int(round(y - h / 2))
                ymax = int(round(y + h / 2))

                cropped_img = img[ymin:ymax, xmin:xmax]
                if cropped_img.shape[0] > 0 and cropped_img.shape[1] > 0:
                    cv2.imwrite(os.path.join(output_dir, output_path),
                                cropped_img)
                    fold_det_df['mask'].append(1)
                else:
                    fold_det_df['mask'].append(0)

        fold_det_df = pd.DataFrame(fold_det_df)

        test_set = PoseDataset(output_dir + '/' +
                               fold_det_df['img_path'].values,
                               fold_det_df['det'].values,
                               transform=test_transform,
                               masks=fold_det_df['mask'].values)
        test_loader = torch.utils.data.DataLoader(
            test_set,
            batch_size=config['batch_size'],
            shuffle=False,
            num_workers=config['num_workers'],
            # pin_memory=True,
        )

        fold_dets = []
        with torch.no_grad():
            for input, batch_det, mask in tqdm(test_loader,
                                               total=len(test_loader)):
                input = input.cuda()
                batch_det = batch_det.numpy()
                mask = mask.numpy()

                output = model(input)
                output = output.cpu()

                if config['rot'] == 'trig':
                    yaw = torch.atan2(output[..., 1:2], output[..., 0:1])
                    pitch = torch.atan2(output[..., 3:4], output[..., 2:3])
                    roll = torch.atan2(output[..., 5:6], output[..., 4:5])
                    roll = rotate(roll, -np.pi)

                pitch = pitch.cpu().numpy()[:, 0]
                yaw = yaw.cpu().numpy()[:, 0]
                roll = roll.cpu().numpy()[:, 0]

                batch_det[mask, 0] = pitch[mask]
                batch_det[mask, 1] = yaw[mask]
                batch_det[mask, 2] = roll[mask]

                fold_dets.append(batch_det)

        fold_dets = np.vstack(fold_dets)

        fold_det_df['det'] = fold_dets.tolist()
        fold_det_df = fold_det_df.groupby('ImageId')['det'].apply(list)
        fold_det_df = pd.DataFrame({
            'ImageId': fold_det_df.index.values,
            'PredictionString': fold_det_df.values,
        })

        df.append(fold_det_df)
        break
    df = pd.concat(df).reset_index(drop=True)

    for i in tqdm(range(len(df))):
        img_id = df.loc[i, 'ImageId']
        det = np.array(df.loc[i, 'PredictionString'])

        if args.show:
            img = cv2.imread('inputs/train_images/%s.jpg' % img_id)
            img_pred = visualize(img, det)
            plt.imshow(img_pred[..., ::-1])
            plt.show()

        df.loc[i, 'PredictionString'] = convert_labels_to_str(det[:, :7])

    name += '_%s' % args.pose_name

    df.to_csv('outputs/submissions/val/%s.csv' % name, index=False)