Esempio n. 1
0
    args.cfg_file = "cfgs/{}.yml".format(args.net)
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)

    if torch.cuda.is_available() and not cfg.CUDA:
        print("Warning: You have a CUDA device, so you should run on it")

    imdbval_name = "coco_2014_minival"
    imdb, roidb = combined_roidb(imdbval_name, False)
    if args.net == 'res101':
        fasterRCNN = resnet(imdb.classes, 101, pretrained=True)
    fasterRCNN.create_architecture()
    checkpoint = torch.load(args.models)
    fasterRCNN.load_state_dict(checkpoint['model'])
    print("Load model from %s" % (args.models))
    if args.gpu:
        fasterRCNN.cuda()

    fasterRCNN.eval()
    max_per_image = 100
    thresh = 0.05
    # vis = True

    imglist = os.listdir(args.image)
    num_images = len(imglist)
    print('Loaded Photo: {} images.'.format(num_images))
def inference(_test_img_path,
              _check_point,
              _score_threshold=0.3,
              class_agnostic=False):
    test_img_path = _test_img_path
    check_point = _check_point
    score_threshold = _score_threshold

    device = torch.device("cuda: 0" if torch.cuda.is_available() else "cpu")

    fasterRCNN = resnet(cfg.backbone,
                        is_training=False,
                        pretrained=False,
                        class_agnostic=class_agnostic)
    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (check_point))
    checkpoint = torch.load(check_point)
    fasterRCNN.load_state_dict(checkpoint['model_state_dict'])
    print('load model successfully!')

    fasterRCNN.eval()
    fasterRCNN.to(device)

    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    im_data = im_data.cuda()
    im_info = im_data.cuda()

    start_time = time.time()

    test_img = cv2.imread(test_img_path)

    test_img_copy = copy.deepcopy(test_img)
    test_img_copy, scale = image_preprocess(test_img_copy)
    test_img_copy = torch.from_numpy(test_img_copy)
    im_info_tensor = torch.Tensor(
        [[[test_img_copy.size(2), test_img_copy.size(3)]]])

    im_data.resize_(test_img_copy.shape).copy_(test_img_copy)
    im_info.resize_(im_info_tensor.shape).copy_(im_info_tensor)

    rois, cls_prob, bbox_pred, _, _, _, _, _ = fasterRCNN(im_data,
                                                          None)  #without gt
    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    box_deltas = bbox_pred.data
    if cfg.bbox_normalize_targets_precomputed:
        if class_agnostic:
            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.bbox_normalize_std).cuda() \
                         + torch.FloatTensor(cfg.bbox_normalize_means).cuda()
            box_deltas = box_deltas.view(1, -1, 4)
        else:
            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.bbox_normalize_std).cuda() \
                         + torch.FloatTensor(cfg.bbox_normalize_means).cuda()
            print(box_deltas.size())
            box_deltas = box_deltas.view(1, -1, 4 * len(cfg.class_to_ind))
    pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
    pred_boxes = clip_boxes(pred_boxes, (im_data.size(2), im_data.size(3)), 1)
    pred_boxes = pred_boxes / scale

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    for j in range(1, len(cfg.class_to_ind)):
        inds = torch.nonzero(scores[:, j] > score_threshold).view(-1)
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)

            if class_agnostic:
                cls_boxes = pred_boxes[inds, :]
            else:
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = cls_boxes[order]
            cls_scores = cls_scores[order]

            keep = nms(cls_dets, cls_scores, cfg.test_nms_threshold)
            cls_dets = cls_dets[keep.view(-1).long()]  #当前类别保留下来的目标框
            cls_scores = cls_scores[keep.view(-1).long()]
            test_img = draw_target(test_img, cls_dets, cls_scores, j)

    end_time = time.time()
    print('detect time:{}s'.format(end_time - start_time))

    cv2.imshow('result', test_img)
    cv2.waitKey(0)
Esempio n. 3
0
def train():
    np.random.seed(cfg.rng_seed)

    if not os.path.exists(cfg.work_dir):
        os.makedirs(cfg.work_dir)

    train_set = PASCAL_VOC(cfg.trainset_root_path, 'trainval')
    dataloader = DataLoader(train_set,
                            batch_size=cfg.batch_size,
                            shuffle=True,
                            num_workers=4)
    iters_per_epoch = len(train_set) // cfg.batch_size

    device = torch.device("cuda: 0" if torch.cuda.is_available() else "cpu")

    fasterRCNN = resnet(cfg.backbone, pretrained=True, class_agnostic=False)
    fasterRCNN.create_architecture()

    optimizer = torch.optim.SGD(fasterRCNN.parameters(),
                                lr=cfg.learning_rate,
                                momentum=0.9,
                                weight_decay=5e-4)

    fasterRCNN.to(device)

    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    gt_boxes = torch.FloatTensor(1)

    im_data = im_data.cuda()
    im_info = im_info.cuda()
    gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    gt_boxes = Variable(gt_boxes)

    start_epoch = 0

    #load from
    if not cfg.load_from is None:
        checkpoint = torch.load(cfg.load_from)
        fasterRCNN.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch']

    for ep in range(start_epoch, cfg.epoch):

        fasterRCNN.train()

        if ep != 0 and (ep + 1) % cfg.lr_decay_step == 0:
            cur_lr = get_learing_rate(optimizer)
            adjust_learning_rate(optimizer, cur_lr * cfg.lr_decay)

        for step, batch_data in enumerate(dataloader):
            with torch.no_grad():
                im_data.resize_(batch_data['image'].size()).copy_(
                    batch_data['image'])
                gt_boxes.resize_(batch_data['gt_boxes'].size()).copy_(
                    batch_data['gt_boxes'])
                im_info.resize_(batch_data['im_info'].size()).copy_(
                    batch_data['im_info'])

            fasterRCNN.zero_grad()
            print('[epoch:{}/{}], [step {}/{}]'.format(ep + 1, cfg.epoch,
                                                       step + 1,
                                                       iters_per_epoch))

            rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, \
                    RCNN_loss_cls, RCNN_loss_bbox, \
                    roi_labels = fasterRCNN(im_data, gt_boxes, im_info)

            loss = rpn_loss_cls.mean() + rpn_loss_bbox.mean(
            ) + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
            optimizer.zero_grad()
            loss.backward()
            if cfg.clip_grad:
                clip_grad_norm(fasterRCNN.parameters(), 10)  #限制每个梯度,防止梯度爆炸
            optimizer.step()

            cur_lr = get_learing_rate(optimizer)
            print(
                'loss:{:.5f}, lr:{}, rpn cls loss:{:.5f}, rpn bbox loss:{:.5f}, rcnn cls loss:{:.5f}, rcnn bbox loss:{:.5f}'
                .format(loss.item(), cur_lr, rpn_loss_cls.item(),
                        rpn_loss_bbox.item(), RCNN_loss_cls.item(),
                        RCNN_loss_bbox.item()))
            print('cls_prob:', cls_prob)

        #一个epoch结束后,则保存模型
        if ep % (cfg.checkpoint_interval + 1) == 0:
            state = {
                'epoch': ep,
                'model_state_dict': fasterRCNN.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
            }
            save_path = os.path.join(
                cfg.work_dir, cfg.checkpoint_name + '-' + str(ep + 1) + '.pth')
            torch.save(state, save_path)
def evalue(check_point,
           cache_path='./result.pkl',
           class_agnostic=False,
           ovthresh=0.5,
           use_07_metric=False):

    ind_class = {v: k for k, v in cfg.class_to_ind.items()}
    class_result_dic = {k: []
                        for k in cfg.class_to_ind.keys()
                        }  # store every class result

    imagenames = []

    if not os.path.exists(cache_path):

        test_set = PASCAL_VOC(cfg.testset_root_path, 'test')
        dataloader = DataLoader(test_set,
                                batch_size=cfg.batch_size,
                                shuffle=True,
                                num_workers=4)

        device = torch.device(
            "cuda: 0" if torch.cuda.is_available() else "cpu")

        fasterRCNN = resnet(cfg.backbone,
                            is_training=False,
                            pretrained=False,
                            class_agnostic=class_agnostic)
        fasterRCNN.create_architecture()

        print("load checkpoint %s" % (check_point))

        checkpoint = torch.load(check_point)
        fasterRCNN.load_state_dict(checkpoint['model_state_dict'])

        print('load model successfully!')

        fasterRCNN.eval()
        fasterRCNN.to(device)

        im_data = torch.FloatTensor(1)
        im_info = torch.FloatTensor(1)
        gt_boxes = torch.FloatTensor(1)
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        gt_boxes = gt_boxes.cuda()

        #detect for result
        for batch_data in tqdm(dataloader):
            # batch_data = dataloader.next()
            with torch.no_grad():
                im_data.resize_(batch_data['image'].size()).copy_(
                    batch_data['image'])
                gt_boxes.resize_(batch_data['gt_boxes'].size()).copy_(
                    batch_data['gt_boxes'])
                im_info.resize_(batch_data['im_info'].size()).copy_(
                    batch_data['im_info'])

                image_name = os.path.basename(
                    batch_data['imname'][0]).split('.')[0]
                imagenames.append(image_name)

                rois, cls_prob, bbox_pred, _, _, _, _, _ = fasterRCNN(
                    im_data, gt_boxes)

                scores = cls_prob.data
                boxes = rois.data[:, :, 1:5]

                box_deltas = bbox_pred.data

                if cfg.bbox_normalize_targets_precomputed:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.bbox_normalize_std).cuda() \
                                 + torch.FloatTensor(cfg.bbox_normalize_means).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info, 1)
                pred_boxes = pred_boxes / batch_data['im_info'][0, 2]

                scores = scores.squeeze()
                pred_boxes = pred_boxes.squeeze()

                for j in range(1, len(cfg.class_to_ind)):
                    inds = torch.nonzero(scores[:, j] > 0).view(-1)
                    if inds.numel() > 0:
                        cls_scores = scores[:, j][inds]
                        _, order = torch.sort(cls_scores, 0, True)

                        if class_agnostic:
                            cls_boxes = pred_boxes[inds, :]
                        else:
                            cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                        cls_dets = pred_boxes[order]
                        cls_scores = cls_scores[order]

                        keep = nms(cls_dets, cls_scores,
                                   cfg.test_nms_threshold)
                        cls_dets = cls_dets[keep.view(
                            -1).long()]  # 当前类别保留下来的目标框
                        cls_scores = cls_scores[keep.view(-1).long()]

                        for score, bbox in zip(cls_scores, cls_dets):
                            class_result_dic[ind_class[j]].append({
                                'image_name':
                                image_name,
                                'score':
                                score,
                                'bbox': [bbox[0], bbox[1], bbox[2], bbox[3]]
                            })

        print('writting result cache ......')
        with open(cache_path, 'wb') as fp:
            pickle.dump(class_result_dic, fp)
    else:
        with open(
                os.path.join(cfg.testset_root_path, 'ImageSets', 'Main',
                             'test.txt')) as fp:
            for line in fp:
                imagenames.append(line.strip())
        with open(cache_path, 'rb') as fp:
            class_result_dic = pickle.load(fp)

    print('computer mAP... ')
    # computer map
    recs = {}
    for i, imagename in enumerate(imagenames):
        recs[imagename] = parse_rec(
            os.path.join(cfg.testset_root_path, 'Annotations',
                         imagename + '.xml'))

    # extract gt objects for this class
    mAP = 0
    for classname in cfg.class_to_ind.keys():
        if classname == 'BG':
            continue
        print(classname, end=' ')
        class_recs = {}
        npos = 0
        for imagename in imagenames:
            R = [obj for obj in recs[imagename] if obj['name'] == classname]
            bbox = np.array([x['bbox'] for x in R])
            difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
            det = [False] * len(R)
            npos = npos + sum(~difficult)
            class_recs[imagename] = {
                'bbox': bbox,
                'difficult': difficult,
                'det': det
            }

        class_result = class_result_dic[classname]
        image_ids = [r['image_name'] for r in class_result]
        confidence = np.array([float(r['score']) for r in class_result])
        BB = np.array([r['bbox'] for r in class_result])

        # sort by confidence
        sorted_ind = np.argsort(-confidence)
        BB = BB[sorted_ind, :]
        image_ids = [image_ids[x] for x in sorted_ind]

        # go down dets and mark TPs and FPs
        nd = len(image_ids)
        tp = np.zeros(nd)
        fp = np.zeros(nd)
        for d in range(nd):
            R = class_recs[image_ids[d]]
            bb = BB[d, :].astype(float)
            ovmax = -np.inf
            BBGT = R['bbox'].astype(float)
            if BBGT.size > 0:
                # compute overlaps
                # intersection
                ixmin = np.maximum(BBGT[:, 0], bb[0])
                iymin = np.maximum(BBGT[:, 1], bb[1])
                ixmax = np.minimum(BBGT[:, 2], bb[2])
                iymax = np.minimum(BBGT[:, 3], bb[3])
                iw = np.maximum(ixmax - ixmin + 1., 0.)
                ih = np.maximum(iymax - iymin + 1., 0.)
                inters = iw * ih

                # union
                uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                       (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                       (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

                overlaps = inters / uni
                ovmax = np.max(overlaps)
                jmax = np.argmax(overlaps)

            if ovmax > ovthresh:
                if not R['difficult'][jmax]:
                    if not R['det'][jmax]:
                        tp[d] = 1.
                        R['det'][jmax] = 1
                    else:
                        fp[d] = 1.
            else:
                fp[d] = 1.

        # compute precision recall
        fp = np.cumsum(fp)
        tp = np.cumsum(tp)
        rec = tp / float(npos)
        # avoid divide by zero in case the first detection matches a difficult
        # ground truth
        prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
        ap = voc_ap(rec, prec, use_07_metric)
        print(ap)
        mAP += ap
    mAP = mAP / (len(cfg.class_to_ind) - 1)

    print('mAP:', mAP)