def box_filter(box, box_scores, thresh, use_nms = True):
    """
    :param box: N x d_box
    :param box_scores: N scores
    :param thresh:
    :param use_nms:
    :return:
    """
    d_box = box.size(-1)
    inds = torch.nonzero(box_scores > thresh).view(-1)
    if inds.numel() > 0:
        cls_scores = box_scores[inds]
        _, order = torch.sort(cls_scores, 0, True)
        cls_boxes = box[inds, :]
        if use_nms:
            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_dets, cfg.TEST.COMMON.NMS)
            cls_scores = cls_dets[keep.view(-1).long()][:, -1]
            cls_dets = cls_dets[keep.view(-1).long()][:, :-1]
            order = order[keep.view(-1).long()]
        else:
            cls_scores = cls_scores[order]
            cls_dets = cls_boxes[order]
        cls_dets = cls_dets.cpu().numpy()
        cls_scores = cls_scores.cpu().numpy()
        order = order.cpu().numpy()
    else:
        cls_scores = np.zeros(shape=(0,), dtype=np.float32)
        cls_dets = np.zeros(shape=(0, d_box), dtype=np.float32)
        order = np.array([], dtype=np.int32)
    return cls_dets, cls_scores, (inds.cpu().numpy())[order]
Exemple #2
0
def apply_nms(utt2predict, nms_thres, device):
    # predict has the shape [*, 4]
    # (start_t, end_t, prob_bg, spk_label)
    utt2seg = {}
    uttlist = list(utt2predict.keys())
    for utt in uttlist:
        utt_predict = utt2predict[utt]
        spklist = list(set(utt_predict[:, 3]))
        spklist.sort()
        segments_list = []

        for spk in spklist:
            predict = utt_predict[utt_predict[:, 3] == spk, :]
            predict[:, :2] = (predict[:, :2] * 100.0).astype(int)
            predict = torch.from_numpy(predict).to(device)

            # apply nms
            # convert to 4 dim for NMS
            predict_input = torch.zeros(predict.size(0), 5).type_as(predict)
            predict_input[:, 0] = predict[:, 0]
            predict_input[:, 2] = predict[:, 1]
            predict_input[:, 4] = 1 - predict[:, 2]

            keep = nms(predict_input, nms_thres, force_cpu=True)
            segments = predict[keep.view(-1).long()].data.cpu().numpy()

            segments = segments[segments[:, 0].argsort()]
            segments = segments[:, [0, 1]]
            segments[:, :2] = segments[:, :2] / 100.0
            segments = np.insert(segments, 2, spk, axis=1)
            segments_list.append(segments)
        segments_array = np.concatenate(segments_list, axis=0)
        segments_array = segments_array[segments_array[:, 0].argsort()]
        utt2seg[utt] = segments_array
    return utt2seg
Exemple #3
0
            def get_all_boxes(classes, im2show):
                for j in xrange(1, imdb.num_classes):
                    inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                    # if there is det
                    if inds.numel() > 0:
                        cls_scores = scores[:, j][inds]
                        _, order = torch.sort(cls_scores, 0, True)
                        if args.class_agnostic:
                            cls_boxes = pred_boxes[inds, :]
                        else:
                            cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                        cls_dets = torch.cat(
                            (cls_boxes, cls_scores.unsqueeze(1)), 1)
                        # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                        cls_dets = cls_dets[order]
                        keep = nms(cls_dets, cfg.TEST.NMS)
                        cls_dets = cls_dets[keep.view(-1).long()]
                        if vis or args.save_for_vis:
                            im2show = vis_detections(
                                im2show, id2chn[imdb.classes[j]],
                                np.array([cls_dets.cpu().numpy()[0, :]]), 0.5)
                        all_boxes[j][i] = cls_dets.cpu().numpy()
                    else:
                        all_boxes[j][i] = empty_array
                return all_boxes, im2show
def visualize_without_paths(video_dataset, pred_boxes, scores, pred_trk_boxes,
                            det_classes):
    print("Visualizing...")
    list_im = video_dataset._frame_paths

    num_classes = len(det_classes)
    num_frames = len(list_im)

    for i_frame in range(num_frames - 1):
        print('frame: {}/{}'.format(i_frame, num_frames))
        fig, ax = plt.subplots(figsize=(12, 12))
        img_path = list_im[i_frame]
        img = cv2.imread(img_path)
        img = img[:, :, (2, 1, 0)]
        disp_image = Image.fromarray(np.uint8(img))
        for cls_ind in range(1, num_classes):
            ax.imshow(disp_image, aspect='equal')
            class_name = det_classes[cls_ind]
            keep = torch.nonzero(
                scores[i_frame][0][:, cls_ind] > CONF_THRESH).view(-1)
            if keep.numel() == 0:
                # no detections above threshold for this class
                continue
            cls_scores = scores[i_frame][0][keep][:, cls_ind]
            _, order = torch.sort(cls_scores, 0, True)
            cls_boxes = pred_boxes[i_frame][0][keep, :]
            cls_dets = torch.cat(
                [cls_boxes, cls_scores.contiguous().view(-1, 1)], dim=1)
            cls_dets = cls_dets[order]
            keep = nms(cls_dets, 0.3)
            cls_dets = cls_dets[keep.view(-1).long()]
            for ibox in range(cls_dets.size(0)):
                bbox = cls_dets[ibox, :4].cpu().numpy().flatten()
                score = cls_dets[ibox, 4]
                ax.add_patch(
                    plt.Rectangle((bbox[0], bbox[1]),
                                  bbox[2] - bbox[0],
                                  bbox[3] - bbox[1],
                                  fill=False,
                                  edgecolor=COLOR_WHEEL[cls_ind],
                                  linewidth=3.5))
                ax.text(bbox[0],
                        bbox[1] - 2,
                        '{:s} {:.3f}'.format(class_name, score),
                        bbox=dict(facecolor=COLOR_WHEEL[cls_ind], alpha=0.5),
                        fontsize=14,
                        color='white')

        # Save image with bboxes overlaid
        plt.axis('off')
        plt.tight_layout()
        #plt.show()
        if not os.path.exists(video_dataset._output_dir):
            os.makedirs(video_dataset._output_dir)
        plt.savefig(
            os.path.join(video_dataset._output_dir,
                         os.path.basename(img_path)))
        plt.clf()
        plt.close('all')
Exemple #5
0
def nms_detections(obj_rois, obj_scores):

    obj_scores = obj_scores.unsqueeze(1)  ###
    # print(obj_rois.shape)     ###
    # print(obj_scores.shape)   ###

    cls_dets = torch.cat((obj_rois, obj_scores), 1)
    keep = nms(cls_dets, 0.9)
    return keep
def bbox_proposal(obj_prob, att_prob, rois, conf_thresh=0.2, thresh=0.01):

    scores, clss = obj_prob[:, 1:].max(1)
    clss = clss.view(-1) + 1
    scores = scores.view(-1)

    max_conf = obj_prob.new(obj_prob.size(0)).zero_()
    max_index = obj_prob.new(obj_prob.size(0)).zero_().long()
    for obj_cls_ind in range(1, obj_prob.size(1)):
        obj_cls_scores = obj_prob[:, obj_cls_ind]

        inds = torch.nonzero(obj_cls_scores > thresh).view(-1)
        # if there is det
        if inds.numel() > 0:
            obj_cls_boxes_p = rois[inds]
            obj_cls_scores_p = obj_cls_scores[inds]

            _, order = torch.sort(obj_cls_scores_p, 0, True)
            cls_dets = torch.cat((obj_cls_boxes_p, obj_cls_scores_p), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_dets, 0.5)
            keep = keep.view(-1).long()
            keep_idx = inds[order[keep]]
            tmp_idx = torch.nonzero(
                obj_cls_scores[keep_idx] > max_conf[keep_idx]).view(-1)
            if tmp_idx.numel() != 0:
                max_index[keep_idx[tmp_idx]] = obj_cls_ind

            max_conf[keep_idx] = torch.max(obj_cls_scores[keep_idx],
                                           max_conf[keep_idx])

    keep_boxes = (max_conf >= conf_thresh).nonzero().view(-1)
    num_boxes = keep_boxes.numel()
    if num_boxes < MIN_BOXES:
        _, order = torch.sort(max_conf, 0, True)
        keep_boxes = order[:MIN_BOXES]

        keep_clss = max_index[keep_boxes]
        all_clss = clss[keep_boxes]
        keep_clss[num_boxes:] = all_clss[num_boxes:]

        keep_scores = max_conf[keep_boxes]
        all_scores = scores[keep_boxes]
        keep_scores[num_boxes:] = all_scores[num_boxes:]

    elif num_boxes > MAX_BOXES:
        _, order = torch.sort(max_conf, 0, True)
        keep_boxes = order[:MAX_BOXES]

        keep_clss = max_index[keep_boxes]
        keep_scores = max_conf[keep_boxes]

    else:
        keep_clss = max_index[keep_boxes]
        keep_scores = max_conf[keep_boxes]

    return keep_boxes, keep_clss, keep_scores, num_boxes
Exemple #7
0
    def _get_single_obj_det_results(self, cls_prob, bbox_pred, im_info):

        scores = cls_prob.data
        thresh = 0.05  # filter out low confidence boxes for acceleration
        results = []

        if cfg.TEST.COMMON.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.COMMON.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.COMMON.BBOX_NORMALIZE_STDS).type_as(box_deltas) \
                                + torch.FloatTensor(cfg.TRAIN.COMMON.BBOX_NORMALIZE_MEANS).type_as(box_deltas)
                box_deltas = box_deltas.view(1, -1, 4)
            pred_boxes = bbox_transform_inv(self.priors.type_as(bbox_pred).data, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(self.priors.data, (1, scores.shape[1]))

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()

        for j in xrange(1, self.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                cls_boxes = pred_boxes[inds, :]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.COMMON.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]

                final_keep = torch.nonzero(cls_dets[:, -1] > cfg.TEST.COMMON.OBJ_DET_THRESHOLD).squeeze()
                result = cls_dets[final_keep]

                if result.numel()>0 and result.dim() == 1:
                    result = result.unsqueeze(0)
                # in testing, concat object labels
                if final_keep.numel() > 0:
                    if self.training:
                        result = result[:,:4]
                    else:
                        result = torch.cat([result[:,:4],
                                j * torch.ones(result.size(0),1).type_as(result)],1)
                if result.numel() > 0:
                    results.append(result)

        if len(results):
            final = torch.cat(results, 0)
        else:
            final = torch.Tensor([]).type_as(bbox_pred)
        return final
 def detect(file_path, NMS_THRESH=0.3):
     im = cv2.imread(file_path)
     scores, boxes = im_detect(net, im)
     cls_scores = scores[:, 1]
     cls_boxes = boxes[:, 4:8]
     dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(
         np.float32)
     keep = nms(dets, NMS_THRESH)
     return dets[keep, :]
Exemple #9
0
def detect(net,
           im_path,
           device,
           thresh=0.5,
           visualize=False,
           timers=None,
           pyramid=False,
           visualization_folder=None):
    """
    Main module to detect faces
    :param net: The trained network
    :param im_path: The path to the image
    :param device: GPU or CPU device to be used
    :param thresh: Detection with a less score than thresh are ignored
    :param visualize: Whether to visualize the detections
    :param timers: Timers for calculating detect time (if None new timers would be created)
    :param pyramid: Whether to use pyramid during inference
    :param visualization_folder: If set the visualizations would be saved in this folder (if visualize=True)
    :return: cls_dets (bounding boxes concatenated with scores) and the timers
    """

    if not timers:
        timers = {'detect': Timer(), 'misc': Timer()}

    im = cv2.imread(im_path)
    imfname = os.path.basename(im_path)
    sys.stdout.flush()
    timers['detect'].tic()

    if not pyramid:
        im_scale = _compute_scaling_factor(im.shape, cfg.TEST.SCALES[0],
                                           cfg.TEST.MAX_SIZE)
        im_blob = _get_image_blob(im, [im_scale])[0]
        ssh_rois = forward(net, im_blob, im_scale, device, thresh)

    else:
        assert False, 'not implement'

    timers['detect'].toc()
    timers['misc'].tic()

    nms_keep = nms(ssh_rois, cfg.TEST.RPN_NMS_THRESH)
    cls_dets = ssh_rois[nms_keep, :]

    if visualize:
        plt_name = os.path.splitext(imfname)[0] + '_detections_{}'.format(
            "SSH pytorch")
        visusalize_detections(im,
                              cls_dets,
                              plt_name=plt_name,
                              visualization_folder=visualization_folder)
    timers['misc'].toc()
    return cls_dets, timers
    def generate_paths(self):
        for cls_ix in range(1, self.num_classes):  # skip background
            all_scores = np.ndarray(shape=(self.num_frame_pairs, ),
                                    dtype=np.object)
            cls_boxes = np.ndarray(shape=(self.num_frame_pairs, ),
                                   dtype=np.object)
            cls_scores = np.ndarray(shape=(self.num_frame_pairs, ),
                                    dtype=np.object)
            print('Class: {}'.format(self.classes[cls_ix]))
            self._curr_class = self.classes[cls_ix]
            for pair_ix in range(self.num_frame_pairs):
                boxes_t0 = self.pred_boxes[pair_ix][0].clone()
                scores_t0 = self.scores[pair_ix][0][:, cls_ix].clone()
                pick = torch.nonzero(scores_t0 > 0.0).view(-1)
                # If no good scores for this frame/class, go to next frame
                assert pick.numel() > 0, "No detections found for this class."
                if pick.numel() == 0:
                    all_scores[pair_ix] = torch.cuda.FloatTensor(
                        0)  # empty tensor
                    cls_boxes[pair_ix] = torch.cuda.FloatTensor(
                        0)  # empty tensor
                    cls_scores[pair_ix] = torch.cuda.FloatTensor(
                        0)  # empty tensor
                    continue
                # Get scores that passed filter and sort highest-->lowest
                scores_t0 = scores_t0[pick]
                boxes_t0 = boxes_t0[pick, :]
                all_scores_t0 = self.scores[pair_ix][0][pick, :]
                _, pick = torch.sort(scores_t0, descending=True)
                # Take at most 50 per frame per class
                to_pick = min(50, pick.numel())
                pick = pick[:to_pick]
                scores_t0 = scores_t0[pick]
                boxes_t0 = boxes_t0[pick, :]
                all_scores_t0 = all_scores_t0[pick, :]
                cls_dets_t0 = torch.cat(
                    [boxes_t0, scores_t0.contiguous().view(-1, 1)], dim=1)
                pick = nms(cls_dets_t0, 0.3)
                # TODO check pick is sorted in descending order
                # Take top 10 dets after nms
                pick = pick.view(-1).long()
                pick = pick[:min(10, pick.numel())]
                print(pick)

                cls_boxes[pair_ix] = boxes_t0[pick, :].clone()
                cls_scores[pair_ix] = scores_t0[pick].clone()
                all_scores[pair_ix] = all_scores_t0[pick, :].clone()

            paths = self.incremental_linking(cls_boxes, cls_scores, all_scores)
            self.all_paths[cls_ix] = paths
Exemple #11
0
def load_predict(predict_file, topn):
    """
    nms within class ans then select the top n bbox of higher score among classes
    :param predict_file:
    :return:
    """
    with open(predict_file, 'rb') as fp:
        predict = pkl.load(fp)

    pred_boxes = predict['bbox'].squeeze()
    scores = predict['cls_prob'].squeeze()
    roi_feat = predict['roi_feat'].squeeze()

    pthresh = 0.00001
    bbox = []
    keep_inds = []
    first = True
    for j in range(81):
        if j == 0: continue  # skip the background
        inds = torch.nonzero(scores[:, j] > pthresh).view(-1)
        if len(inds) == 0: continue

        cls_scores = scores[:, j][inds]
        _, order = torch.sort(cls_scores, 0, True)

        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

        cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)

        cls_dets = cls_dets[order]
        keep = nms(cls_dets, 0.4, force_cpu=0)
        inds = keep.view(-1).long()
        if len(inds) > 0:
            tmp_bbox = cls_dets[inds].cpu().data.numpy()
            keep = keep.cpu().data.numpy()
            if first:
                bbox = tmp_bbox
                keep_inds = keep
            else:
                bbox = np.vstack((bbox, tmp_bbox))
                keep_inds = np.vstack((keep_inds, keep))
            first = False

    rank_ind = bbox[:, -1].argsort()
    select_inds = keep_inds[rank_ind][-topn:]
    select_classme = scores[select_inds, :].squeeze()
    select_feat = roi_feat[select_inds, :].squeeze()
    select_bbox = bbox[rank_ind][-topn:, 0:4]
    return select_bbox, select_classme.cpu().data.numpy(), select_feat.cpu(
    ).data.numpy()
    def detect(self, img_data):
        np_arr = np.fromstring(img_data, np.uint8)
        image = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
        blobs, im_scales = get_image_blob(image)

        im_blob = blobs
        im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)
        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)

        self.im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
        self.im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
        self.gt_boxes.data.resize_(1, 1, 5).zero_()
        self.num_boxes.data.resize_(1).zero_()

        rois, cls_prob, bbox_pred, *_ = \
            self.model(self.im_data, self.im_info, self.gt_boxes, self.num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        box_deltas = bbox_pred.data
        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                   + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        box_deltas = box_deltas.view(1, -1, 4 * len(self.classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, self.im_info.data, 1)

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()

        thresh = 0.7
        inds = torch.nonzero(scores[:, 1] > thresh).view(-1)
        cls_dets = []
        if inds.numel() > 0:
            cls_scores = scores[:, 1][inds]
            _, order = torch.sort(cls_scores, 0, True)
            cls_boxes = pred_boxes[inds][:, 4:8]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=False)
            cls_dets = cls_dets[keep.view(-1).long()]
            cls_dets = cls_dets.cpu().numpy()
        cls_dets = [cls_det[:4] for cls_det in cls_dets]
        return cls_dets
Exemple #13
0
def from_dets(imdb_name, output_dir, args):
    imdb = get_imdb(imdb_name)
    imdb.competition_mode(args.comp_mode)
    imdb.config['matlab_eval'] = args.matlab_eval
    with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f:
        dets = cPickle.load(f)

    if args.apply_nms:
        print 'Applying NMS to all detections'
        nms_dets = nms(dets, cfg.TEST.NMS)
    else:
        nms_dets = dets

    print 'Evaluating detections'
    imdb.evaluate_detections(nms_dets, output_dir)
Exemple #14
0
def apply_nms(all_boxes, thresh):
    """Apply non-maximum suppression to all predicted boxes output by the
    test_net method.
    """
    num_classes = len(all_boxes)
    num_images = len(all_boxes[0])
    nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
    for cls_ind in range(num_classes):
        for im_ind in range(num_images):
            dets = all_boxes[cls_ind][im_ind]
            if dets == []:
                continue
            keep = nms(torch.FloatTensor(dets).cuda(), thresh)
            if len(keep) == 0:
                continue
            nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
    return nms_boxes
Exemple #15
0
    def _make_tubes(self, frameBoxes, max_per_image, box_voting, tracks_cell):
        '''Build tubes for cth class.
        '''

        tracks = {'boxes': None, 'scores': None, 'c': None}
        tracks['boxes'] = tracks_cell[0]
        tracks['scores'] = tracks_cell[1]
        tracks['c'] = tracks_cell[2]

        nms_thresh = 0.3

        object_frames_boxes = np.ndarray((len(frameBoxes), ), dtype=np.object)
        object_frames_scores = np.ndarray((len(frameBoxes), ), dtype=np.object)
        object_frames_boxes_idx = np.ndarray((len(frameBoxes), ),
                                             dtype=np.object)
        object_frames_trackedboxes = np.ndarray((len(frameBoxes), ),
                                                dtype=np.object)

        # Iterate over the non-empty frames
        for f in range(len(frameBoxes) - 1):
            # boxes in frame f
            boxes = frameBoxes[f]
            if box_voting:  # TODO
                raise NotImplementedError
            else:
                nms_idx = nms(boxes[:, :5].clone(), nms_thresh).long().view(-1)
                if nms_idx.numel() > max_per_image:
                    nms_idx = nms_idx[:max_per_image]
                boxes = boxes[nms_idx]
                object_frames_boxes[f] = boxes[:, :4]
                object_frames_scores[f] = boxes[:, 4]
                object_frames_boxes_idx[f] = torch.arange(boxes.size(0)).cuda()
                if tracks['boxes'] is not None and tracks['boxes'][
                        f, 0] is not None:
                    object_frames_trackedboxes[f] = tracks['boxes'][f, :]

        paths = self._zero_jump_link(object_frames_boxes, object_frames_scores,
                                     object_frames_boxes_idx,
                                     object_frames_trackedboxes)

        return paths
Exemple #16
0
    def non_max_suppression(self, ssh_rois: List[torch.Tensor]):
        """
        perform NMS on ROIs given by SSH network 

        Arguments:
            ssh_rois {List[torch.Tensor]} -- list ROIs given by SSH network     
        Returns:
            bounding_boxes {[numpy.ndarray]} -- final list of bounding boxes for all detected faces
        """

        # NOTE :- The ROI operations are currently being perfomred on CPU, instead of cuda Tensors.
        # I've tried moving them to gpu but it doesn't work, atleast on my machine, despite there being a gpu version
        # of nms_code (./model/nms/gpu_nms.pyx)
        # NMS part of the code is barely taking any time as is, so i've left it this way for now

        bounding_boxes = []
        for single_roi in ssh_rois:
            single_roi = single_roi.cpu().numpy()
            nms_keep = nms(single_roi, cfg.TEST.RPN_NMS_THRESH)
            cls_single = single_roi[nms_keep, :]
            bounding_boxes.append(cls_single)
        return bounding_boxes
Exemple #17
0
def normalized_predictions(scores,boxes,img2,thresh=0.6,NMS=0.3):
    if cfg.TEST.BBOX_REG:
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                   + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        box_deltas = box_deltas.view(1, -1, 4 * len(classes))
        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
    else:
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))
        
    pred_boxes /= im_scales[0]
    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()
    
    
    im2show = np.copy(img2)

    # Iterating over all classes

    for j in range(1, len(classes)):
        
        inds = torch.nonzero(scores[:,j]>thresh).view(-1)
        
          # if there is det
        if inds.numel() > 0:
            cls_scores = scores[:,j][inds]
            _, order = torch.sort(cls_scores, 0, True)

            cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_dets,NMS, force_cpu=not cfg.USE_GPU_NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            im2show = vis_detections(im2show, classes[j], cls_dets.cpu().numpy(), 0.5)
    return im2show,cls_dets.cpu().numpy()
def bbox_proposal_fast(obj_prob, att_prob, rois):

    batch_size = obj_prob.size(0)

    # get the top obj cls, excluded the background class.
    max_obj_prob, max_obj_clss = obj_prob[:, :, 1:].max(2)
    # get the top att cls, exclude the background class.
    max_att_prob, max_att_clss = att_prob[:, :, 1:].max(2)
    # get the top rel cls, exlude the background class
    # max_rel_scores, max_rel_ind = rel_prob[:, :, 1:].max(2)

    # compute the final score, B x N
    obj_att_scores = max_obj_prob * max_att_prob

    # sort final scores
    obj_att_scores_sorted, order = torch.sort(obj_att_scores, 1, True)

    rois_pop = rois.new(batch_size, MIN_BOXES, rois.size(2)).zero_()
    rois_pop_id = rois.new(batch_size, MIN_BOXES).long().zero_()

    # rel_pairs_pop = rel_pairs.new(batch_size, self.rel_num, rel_pairs.size(2))
    # rel_pairs_pop_id = rel_pairs.new(batch_size, self.rel_num)

    # pdb.set_trace()

    for i in range(batch_size):
        proposals_i = rois[i][order[i]][:, 1:]
        scores_i = obj_att_scores[i][order[i]].view(-1, 1)
        keep_idx_i = nms(torch.cat((proposals_i, scores_i), 1), 0.5)
        keep_idx_i = keep_idx_i.long().view(-1)
        num_rois_pop = min(keep_idx_i.size(0), MIN_BOXES)
        rois_pop[i][:num_rois_pop] = rois[i][order[i][
            keep_idx_i[:num_rois_pop]]]
        rois_pop_id[i][:num_rois_pop] = order[i][keep_idx_i[:num_rois_pop]]

    return rois_pop, rois_pop_id
Exemple #19
0
def vis_dets(img, im_info, rois, bbox_pred, obj_cls_prob, imdb):

    pdb.set_trace()
    im2show = img.data.permute(2, 3, 1, 0).squeeze().cpu().numpy()
    im2show += cfg.PIXEL_MEANS
    thresh = 0.01
    boxes = rois[:, :, 1:5]
    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
            box_deltas = box_deltas.view(1, -1, 4)
        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

    obj_scores = obj_cls_prob.squeeze()
    pred_boxes = pred_boxes.squeeze()

    for j in xrange(1, len(imdb._classes)):
        inds = torch.nonzero(obj_scores[:, j] > thresh).view(-1)
        # if there is det
        if inds.numel() > 0:
            obj_cls_scores = obj_scores[:, j][inds]
            _, order = torch.sort(obj_cls_scores, 0, True)
            cls_boxes = pred_boxes[inds, :]
            cls_dets = torch.cat((cls_boxes, obj_cls_scores), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            im2show = vis_detections(im2show, imdb._classes[j],
                                     cls_dets.cpu().numpy(), 0.2)
    # save image to disk
    cv2.imwrite("detections.jpg", im2show)
Exemple #20
0
        for j in xrange(1, len(pascal_classes)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets,
                           cfg.TEST.NMS,
                           force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                dets = cls_dets.cpu().numpy()
                for i in range(dets.shape[0]):
                    writer.writerow([
                        imglist[num_images], pascal_classes[j], dets[i, 0],
                        dets[i, 1], dets[i, 2], dets[i, 3]
                    ])
                if vis:
                    im2show = vis_detections(im2show, pascal_classes[j],
                                             cls_dets.cpu().numpy(), 0.5)

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic
Exemple #21
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, :, 1]  # batch_size x num_rois x 1
        bbox_deltas = input[1]  # batch_size x num_rois x 4
        im_info = input[2]
        cfg_key = input[3]
        feat_shapes = input[4]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        anchors = torch.from_numpy(
            generate_anchors_all_pyramids(
                self._fpn_scales, self._anchor_ratios, feat_shapes,
                self._fpn_feature_strides,
                self._fpn_anchor_stride)).type_as(scores)
        num_anchors = anchors.size(0)

        anchors = anchors.view(1, num_anchors,
                               4).expand(batch_size, num_anchors, 4)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze()

        scores_keep = scores
        proposals_keep = proposals

        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
Exemple #22
0
    def bbox_selection(self, relation, pred_boxes, scores, pooled_feat):
        """
        delete bbox of low scores and do NMS
        :param pred_boxes:
        :param scores:
        :return:
        """

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        class_bboxes = {}
        class_feats = {}
        class_pros = {}
        sub, pre, obj = relation[0].split('-')
        sind, oind = self.classes.index(sub), self.classes.index(obj)
        for c, j in enumerate([sind, oind]):
            inds = torch.nonzero(scores[:, j] > self.pthresh).view(-1)

            if inds.numel() > 0:

                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if self.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)

                cls_dets = cls_dets[order]
                keep = nms(cls_dets,
                           cfg.TEST.NMS,
                           force_cpu=not cfg.USE_GPU_NMS)

                inds = keep.view(-1).long()
                #inds = inds.numpy()
                inds_new = []
                for i in inds:
                    bbox = cls_dets[i, 0:4]
                    if bbox[2] - bbox[1] < 5.0 or bbox[3] - bbox[1] < 5.0:
                        continue
                    inds_new.append(i)

                if len(inds_new) == 0: continue

                inds = torch.cuda.LongTensor(np.array(inds_new))

                cls_dets = cls_dets[inds]
                cls_feats = pooled_feat[inds]
                cls_pros = scores[inds]

                class_bboxes[c] = cls_dets.data.cpu().numpy()
                class_feats[c] = cls_feats.data.cpu().numpy()
                class_pros[c] = cls_pros.data.cpu().numpy()
                if c == 0 and sind == oind:
                    class_bboxes[1] = cls_dets.data.cpu().numpy()
                    class_feats[1] = cls_feats.data.cpu().numpy()
                    class_pros[1] = cls_pros.data.cpu().numpy()
                    break
                """
                print('bbox shape:{}\t classme shape:{}\tfeat shape:{}'.format(cls_dets.shape,
                                                                               cls_feats.shape,
                                                                               cls_pros.shape))
                """
        return class_bboxes, class_pros, class_feats
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                cls_dets = cls_dets[order]
                if args.soft_nms:
                    np_dets = cls_dets.cpu().numpy().astype(np.float32)
                    keep = soft_nms(np_dets, method=cfg.TEST.SOFT_NMS_METHOD
                                    )  # np_dets will be changed
                    keep = torch.from_numpy(keep).type_as(cls_dets).int()
                    cls_dets = torch.from_numpy(np_dets).type_as(cls_dets)
                else:
                    keep = nms(cls_dets, 0.1)
                cls_dets = cls_dets[keep.view(-1).long()]
                cls_dets = cls_dets.cpu().numpy()
            else:
                cls_dets = np.array([])

            if vis:
                im2show = vis_detections(final_result,
                                         imglist[i],
                                         im2show,
                                         classes[j],
                                         cls_dets,
                                         thresh=args.thresh)

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic
Exemple #24
0
def evaluator(model, args, evl_rec=False):

    fasterRCNN = model
    np.random.seed(cfg.RNG_SEED)
    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]

    args.cfg_file = "cfgs/{}_ls.yml".format(
        args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)
    cfg.TRAIN.USE_FLIPPED = False

    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdbval_name, False)
    imdb.competition_mode(on=True)

    print('{:d} roidb entries'.format(len(roidb)))

    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if args.cuda:
        cfg.CUDA = True

    if args.cuda:
        fasterRCNN.cuda()

    start = time.time()
    max_per_image = 100

    vis = False

    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = 'faster_rcnn_10'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)

    # These models are pytorch pretrained with RGB channel
    rgb = True if args.net in ('res18', 'res34', 'inception') else False

    dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \
               imdb.num_classes, training=False, normalize = False, rgb=rgb)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)
    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))

    if evl_rec:
        true_postive, ground_truth = 0.0, 0.0
        recall = AverageMeter()

    for i in range(num_images):

        data = next(data_iter)
        im_data.data.resize_(data[0].size()).copy_(data[0])
        im_info.data.resize_(data[1].size()).copy_(data[1])
        gt_boxes.data.resize_(data[2].size()).copy_(data[2])
        num_boxes.data.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()

        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                          + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                          + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        if evl_rec:
            # evluate rpn recall only
            boxes_per_img = boxes.squeeze().cpu().numpy() / data[1][0][2].item(
            )
            #pdb.set_trace()
            #TP, GT = evaluate_final_recall(pred_boxes.squeeze().cpu().numpy(), i, imdb, thr=0.5)
            TP, GT = evaluate_recall(boxes_per_img, i, imdb, thr=0.5)
            recall.update(TP, GT)

            sys.stdout.write('TP/GT: {}/{} | Recall: {:.3f} \r'.format(
                TP, GT, recall.avg))
            sys.stdout.flush()
            continue

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, imdb.classes[j],
                                             cls_dets.cpu().numpy(), 0.3)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
          .format(i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

        if vis:
            cv2.imwrite('result.png', im2show)
            pdb.set_trace()
            #cv2.imshow('test', im2show)
            #cv2.waitKey(0)

    if evl_rec:
        print('\r\nThe average rpn recall is: {:.4f}'.format(recall.avg))
        return recall.avg

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    mAP = imdb.evaluate_detections(all_boxes, output_dir)

    end = time.time()
    print("test time: %0.4fs" % (end - start))
    return mAP
def test_net(tdcnn_demo, dataloader, args):

    start = time.time()
    # TODO: Add restriction for max_per_video
    max_per_video = 0

    if args.vis:
        thresh = 0.05
    else:
        thresh = 0.005
    
    all_twins = [[[] for _ in xrange(args.num_videos)]
               for _ in xrange(args.num_classes)]

    _t = {'im_detect': time.time(), 'misc': time.time()}

    tdcnn_demo.eval()
    empty_array = np.transpose(np.array([[],[],[]]), (1,0))
  
    data_tic = time.time()
    for i, (video_data, gt_twins, num_gt, video_info) in enumerate(dataloader):
        video_data = video_data.cuda()
        gt_twins = gt_twins.cuda()
        batch_size = video_data.shape[0]
        data_toc = time.time()
        data_time = data_toc - data_tic

        det_tic = time.time()
        rois, cls_prob, twin_pred = tdcnn_demo(video_data, gt_twins)
#        rpn_loss_cls, rpn_loss_twin, \
#        RCNN_loss_cls, RCNN_loss_twin, rois_label = tdcnn_demo(video_data, gt_twins)

        scores_all = cls_prob.data
        twins = rois.data[:, :, 1:3]

        if cfg.TEST.TWIN_REG:
            # Apply bounding-twin regression deltas
            twin_deltas = twin_pred.data
            if cfg.TRAIN.TWIN_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                twin_deltas = twin_deltas.view(-1, 2) * torch.FloatTensor(cfg.TRAIN.TWIN_NORMALIZE_STDS).type_as(twin_deltas) \
                       + torch.FloatTensor(cfg.TRAIN.TWIN_NORMALIZE_MEANS).type_as(twin_deltas)
                twin_deltas = twin_deltas.view(batch_size, -1, 2 * args.num_classes)

            pred_twins_all = twin_transform_inv(twins, twin_deltas)
            pred_twins_all = clip_twins(pred_twins_all, cfg.TRAIN.LENGTH[0])
        else:
            # Simply repeat the twins, once for each class
            pred_twins_all = np.tile(twins, (1, scores_all.shape[1]))
            
        det_toc = time.time()
        detect_time = det_toc - det_tic
        
        for b in range(batch_size):
            misc_tic = time.time()        
            print(video_info[b])        
            scores = scores_all[b] #scores.squeeze()
            pred_twins = pred_twins_all[b] #.squeeze()

            # skip j = 0, because it's the background class          
            for j in xrange(1, args.num_classes):
                inds = torch.nonzero(scores[:,j]>thresh).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:,j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    cls_twins = pred_twins[inds][:, j * 2:(j + 1) * 2]
                    
                    cls_dets = torch.cat((cls_twins, cls_scores.unsqueeze(1)), 1)
                    # cls_dets = torch.cat((cls_twins, cls_scores), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_dets, cfg.TEST.NMS)
                    if ( len(keep)>0 ):
                          cls_dets = cls_dets[keep.view(-1).long()]
                          print ("activity: ", j)
                          print (cls_dets.cpu().numpy())
                      
                    all_twins[j][i*batch_size+b] = cls_dets.cpu().numpy()
                else:
                    all_twins[j][i*batch_size+b] = empty_array

            # Limit to max_per_video detections *over all classes*
            if max_per_video > 0:
                  video_scores = np.hstack([all_twins[j][i*batch_size+b][:, -1]
                                            for j in xrange(1, args.num_classes)])
                  if len(video_scores) > max_per_video:
                      video_thresh = np.sort(video_scores)[-max_per_video]
                      for j in xrange(1, args.num_classes):
                          keep = np.where(all_twins[j][i*batch_size+b][:, -1] >= video_thresh)[0]
                          all_twins[j][i*batch_size+b] = all_twins[j][i*batch_size+b][keep, :]
                          
            misc_toc = time.time()
            nms_time = misc_toc - misc_tic                          
            print ('im_detect: {:d}/{:d} {:.3f}s {:.3f}s {:.3f}s' \
              .format(i*batch_size+b+1, args.num_videos, data_time/batch_size, detect_time/batch_size, nms_time))              

        if args.vis:
          pass
          
        data_tic = time.time()
    end = time.time()
    print("test time: %0.4fs" % (end - start))
    def Predict(self, im_in, area):
        # initilize the tensor holder here.
        im_data = torch.FloatTensor(1)
        im_info = torch.FloatTensor(1)
        num_boxes = torch.LongTensor(1)
        gt_boxes = torch.FloatTensor(1)

        # ship to cuda
        if self.cuda > 0:
            im_data = im_data.cuda()
            im_info = im_info.cuda()
            num_boxes = num_boxes.cuda()
            gt_boxes = gt_boxes.cuda()

        # make variable
        with torch.no_grad():
            im_data = Variable(im_data)
            im_info = Variable(im_info)
            num_boxes = Variable(num_boxes)
            gt_boxes = Variable(gt_boxes)

        if self.cuda > 0:
            cfg.CUDA = True

        if self.cuda > 0:
            self.fasterRCNN.cuda()

        self.fasterRCNN.eval()

        #im_in = cv2.imread(im_file)
        if len(im_in.shape) == 2:
            im_in = im_in[:, :, np.newaxis]
            im_in = np.concatenate((im_in, im_in, im_in), axis=2)
        # rgb -> bgr
        im_in = im_in[:, :, ::-1]
        im = cv2.cvtColor(im_in, cv2.COLOR_BGR2RGB)

        blobs, im_scales = self._get_image_blob(im)
        assert len(im_scales) == 1, "Only single-image batch implemented"
        im_blob = blobs
        im_info_np = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)

        im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.data.resize_(1, 1, 5).zero_()
        num_boxes.data.resize_(1).zero_()


        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = self.fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if self.class_agnostic:
                    if self.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    if self.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1,
                                                 4 * len(self.pascal_classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1])))
            pred_boxes = _.cuda() if self.cuda > 0 else _

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        ItemAndBoxes_all = []
        im2show = np.copy(im)
        for j in xrange(1, len(self.pascal_classes)):
            inds = torch.nonzero(scores[:, j] > self.thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if self.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets,
                           cfg.TEST.NMS,
                           force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                im2show, ItemAndBoxes = vis_detections(im2show,
                                                       self.pascal_classes[j],
                                                       cls_dets.cpu().numpy(),
                                                       self.visThresh)
                ItemAndBoxes_all.append(ItemAndBoxes)

        ItemAndBoxes_all = sorted(ItemAndBoxes_all,
                                  key=lambda x: x[2],
                                  reverse=True)
        ItemAndBoxes_all = ItemAndBoxes_all[0:3]
        ItemAndBoxes_all = sorted(ItemAndBoxes_all, key=lambda x: x[1][0])

        if self.vis == 1:
            cv2.namedWindow("result", 0)
            cv2.resizeWindow("result", 1080, 720)
            cv2.imshow('result', im2show)
            cv2.waitKey(0)
            result_path = os.path.join(self.image_dir, str(area) + ".jpg")
            cv2.imwrite(result_path, im2show)

        return {
            "Left": ItemAndBoxes_all[0][0],
            "Mid": ItemAndBoxes_all[1][0],
            "Right": ItemAndBoxes_all[2][0]
        }
          im = cv2.imread(imdb.image_path_at(i))
          im2show = np.copy(im)
      for j in xrange(1, imdb.num_classes):
          inds = torch.nonzero(scores[:,j]>thresh).view(-1)
          # if there is det
          if inds.numel() > 0:
            cls_scores = scores[:,j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            if args.class_agnostic:
              cls_boxes = pred_boxes[inds, :]
            else:
              cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            if vis:
              im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3)
            all_boxes[j][i] = cls_dets.cpu().numpy()
          else:
            all_boxes[j][i] = empty_array

      # Limit to max_per_image detections *over all classes*
      if max_per_image > 0:
          image_scores = np.hstack([all_boxes[j][i][:, -1]
                                    for j in xrange(1, imdb.num_classes)])
          if len(image_scores) > max_per_image:
              image_thresh = np.sort(image_scores)[-max_per_image]
              for j in xrange(1, imdb.num_classes):
                  keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
Exemple #28
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                opt_frcn=None,
                vis_flag=False):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        #self.FRCN.eval()
        #self.FRCN.RCNN_base.eval()

        #for param in self.FRCN.parameters():
        #    param.requires_grad = False

        gt_boxes_frcn = gt_boxes.cpu().data.numpy()[0]

        for gt in gt_boxes_frcn:
            if gt[..., -1] < 4 and gt[..., -1] > 0:
                gt[..., -1] = 1
            elif gt[..., -1] >= 4:
                gt[..., -1] = 2

        gt_boxes_frcn = gt_boxes_frcn[None, ...]
        gt_boxes_frcn = torch.Tensor(gt_boxes_frcn)

        if self.use_cuda:
            gt_boxes_frcn = gt_boxes_frcn.cuda()
        rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, \
            RCNN_loss_cls, RCNN_loss_bbox, rois_label \
            = self.FRCN(im_data, im_info, gt_boxes_frcn, num_boxes)

        if opt_frcn is not None:
            opt.zero_grad()

        # get global and local region from Faster R-CNN

        base_feat = self.FRCN.RCNN_base(im_data)

        #print(rois.data.cpu().numpy())
        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        box_deltas = self.FRCN._bbox_pred.data

        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            if self.class_agnostic:
                if self.use_cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda(
                        ) + torch.FloatTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS) * torch.FlaotTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4)
            else:
                if self.use_cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda(
                        ) + torch.FloatTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torhc.FlaotTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS) + torch.FloatTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4 * 3)

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        #print(scores)
        # get global region
        thresh = 0.0

        region_g = np.ndarray((0, 5))
        region_l = np.ndarray((0, 5))

        # get glocal region
        inds = torch.nonzero(scores[:, 1] >= thresh).view(-1)
        if inds.numel() > 0:
            cls_scores = scores[:, 1][inds]
            _, order = torch.sort(cls_scores, 0, True)
            if self.class_agnostic:
                cls_boxes = pred_boxes[inds]
            else:
                cls_boxes = pred_boxes[inds][:, 4:8]
            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            cls_dets = cls_dets[order]
            region_g = cls_dets

        # get local region
        inds = torch.nonzero(scores[:, 2] >= thresh).view(-1)
        if inds.numel() > 0:
            cls_scores = scores[:, 2][inds]
            _, order = torch.sort(cls_scores, 0, True)
            if self.class_agnostic:
                cls_boxes = pred_boxes[inds]
            else:
                cls_boxes = pred_boxes[inds][:, 8:12]
            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            cls_dets = cls_dets[order]
            region_l = cls_dets

        # select region for recognition
        if not self.training:
            self.minibatch = 1

        if self.training:
            if self.minibatch % 2 == 0:
                high_ind = self.minibatch // 2
                low_ind = self.minibatch // 2
            elif self.minibatch == 1:
                high_ind = 1
                low_ind = 0
            else:
                high_ind = self.minibatch // 2 + 1
                low_ind = self.minibatch // 2

            keep = nms(torch.tensor(region_g).cuda(),
                       self.nms_iou,
                       force_cpu=not cfg.USE_GPU_NMS)
            if type(keep) is not list:
                keep = keep.view(-1).long()
            region_g = region_g[keep]
            sort_ind = np.argsort(region_g[..., -1])
            high_ind_g = sort_ind[-high_ind:]
            low_ind_g = sort_ind[:low_ind]

            keep = nms(torch.tensor(region_l).cuda(),
                       self.nms_iou,
                       force_cpu=not cfg.USE_GPU_NMS)
            if type(keep) is not list:
                keep = keep.view(-1).long()
            region_l = region_l[keep]
            sort_ind = np.argsort(region_l[..., -1])
            high_ind_l = sort_ind[-high_ind:]
            low_ind_l = sort_ind[:low_ind]

            high_num = min(len(high_ind_g), len(high_ind_l))
            high_ind_g = high_ind_g[:high_num]
            high_ind_l = high_ind_l[:high_num]

            low_num = min(len(low_ind_g), len(low_ind_l))
            low_ind_g = low_ind_g[:low_num]
            low_ind_l = low_ind_l[:low_num]

            if len(high_ind_g.data) < 1:
                region_g_high = torch.tensor(np.ndarray((5))[None, ...])
            else:
                region_g_high = region_g[high_ind_g]
            if len(low_ind_g.data) < 1:
                region_g_low = torch.tensor(np.ndarray((5))[None, ...])
            else:
                region_g_low = region_g[low_ind_g]

            if len(high_ind_l.data) < 1:
                region_l_high = torch.tensor(np.ndarray((5))[None, ...])
            else:
                region_l_high = region_l[high_ind_l]
            if len(low_ind_l.data) < 1:
                region_l_low = torch.tensor(np.ndarray((5))[None, ...])
            else:
                region_l_low = region_l[low_ind_l]

            proposal_g = np.vstack((region_g_high, region_g_low))
            proposal_l = np.vstack((region_l_high, region_l_low))
            #proposal_g = np.vstack((region_g[high_ind_g], region_g[low_ind_g]))
            #proposal_l = np.vstack((region_l[high_ind_l], region_l[low_ind_l]))

            #self.proposal_g.data.resize_(proposal_g.size()).copy_(proposal_g)
            #self.proposal_l.data.resize_(proposal_l.size()).copy_(proposal_l)

            gt_boxes = gt_boxes.cpu().numpy()[0, :2]

            gt_g = gt_boxes[np.where(gt_boxes[..., -1] < 4)[0]]
            gt_l = gt_boxes[np.where(gt_boxes[..., -1] >= 4)[0]]

            # compute pare ground truth
            def compute_iou(ps, gt):
                iou_x1 = np.maximum(ps[..., 0], gt[0])
                iou_y1 = np.maximum(ps[..., 1], gt[1])
                iou_x2 = np.minimum(ps[..., 2], gt[2])
                iou_y2 = np.minimum(ps[..., 3], gt[3])
                iou_w = np.maximum(iou_x2 - iou_x1, 0)
                iou_h = np.maximum(iou_y2 - iou_y1, 0)
                iou_area = iou_w * iou_h
                gt_area = (gt[2] - gt[0]) * (gt[3] - gt[1])
                p_area = (ps[..., 2] - ps[..., 0]) * (ps[..., 3] - ps[..., 1])
                overlap = iou_area / (gt_area + p_area - iou_area)
                count = np.zeros((ps.shape[0]), dtype=int)
                count[overlap >= self.gt_iou] += 1
                return count

            cou = compute_iou(proposal_g, gt_g[0]) + compute_iou(
                proposal_l, gt_l[0])

            ## 2019.2.13
            glcc_gt = np.zeros((proposal_g.shape[0]), dtype=int)
            glcc_gt[cou >= 1] = gt_g[0, -1]
            #glcc_gt[:] = gt_g[0, -1]
            glcc_gt = torch.tensor(glcc_gt, dtype=torch.long).cuda()
            #print(glcc_gt)
            #self.glcc_gt.data.resize_(glcc_gt.size()).copy_(glcc_gt)

        else:
            # test phase
            proposal_g = region_g[np.argmax(region_g[..., -1])][None, ...]
            proposal_l = region_l[np.argmax(region_l[..., -1])][None, ...]
            #self.proposal_g.data.resize_(proposal_g.size()).copy_(proposal_g.size())
            #self.proposal_l.data.resize_(proposal_l.size()).copy_(proposal_l.size())

        # if true, then show detection global and local region
        if vis_flag:
            gt_boxes = gt_boxes.astype(np.int)
            im = im_data.cpu().numpy()[0]
            im = np.transpose(im, (1, 2, 0))[..., ::-1]
            im -= im.min()
            im /= im.max()
            plt.imshow(im.astype(np.float))
            ax = plt.axes()
            ax.add_patch(
                plt.Rectangle((region_g[0, 0], region_g[0, 1]),
                              region_g[0, 2] - region_g[0, 0],
                              region_g[0, 3] - region_g[0, 1],
                              fill=False,
                              edgecolor='red',
                              linewidth=1))

            ax.add_patch(
                plt.Rectangle((region_l[0, 0], region_l[0, 1]),
                              region_l[0, 2] - region_l[0, 0],
                              region_l[0, 3] - region_l[0, 1],
                              fill=False,
                              edgecolor='yellow',
                              linewidth=1))

            ax.add_patch(
                plt.Rectangle((gt_boxes[0, 0], gt_boxes[0, 1]),
                              gt_boxes[0, 2] - gt_boxes[0, 0],
                              gt_boxes[0, 3] - gt_boxes[0, 1],
                              fill=False,
                              edgecolor='green',
                              linewidth=1))
            ax.add_patch(
                plt.Rectangle((gt_boxes[1, 0], gt_boxes[1, 1]),
                              gt_boxes[1, 2] - gt_boxes[1, 0],
                              gt_boxes[1, 3] - gt_boxes[1, 1],
                              fill=False,
                              edgecolor='white',
                              linewidth=1))
            plt.show()

        rois_g = np.zeros((1, proposal_g.shape[0], 5), dtype=np.float32)
        rois_g[0, :, 1:5] = proposal_g[:, :4]
        #rois_g /= 16.
        rois_l = np.zeros((1, proposal_l.shape[0], 5), dtype=np.float32)
        rois_l[0, :, 1:5] = proposal_l[:, :4]
        #rois_l /= 16.
        rois_g = torch.tensor(rois_g, dtype=torch.float).cuda()
        rois_l = torch.tensor(rois_l, dtype=torch.float).cuda()
        self.rois_g.data.resize_(rois_g.size()).copy_(rois_g)
        self.rois_l.data.resize_(rois_l.size()).copy_(rois_l)
        # global region
        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(self.rois_g.view(-1, 5),
                                       base_feat.size()[2:],
                                       self.FRCN.grid_size)
            grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]],
                                  3).contiguous()
            pooled_feat_g = self.FRCN.RCNN_roi_crop(base_feat,
                                                    Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_g = F.max_pool2d(pooled_feat_g, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat_g = self.FRCN.RCNN_roi_align(base_feat,
                                                     self.rois_g.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_g = self.FRCN.RCNN_roi_pool(base_feat,
                                                    self.rois_g.view(-1, 5))

        # local region
        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(self.rois_l.view(-1, 5),
                                       base_feat.size()[2:],
                                       self.FRCN.grid_size)
            grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]],
                                  3).contiguous()
            pooled_feat_l = self.FRCN.RCNN_roi_crop(base_feat,
                                                    Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_l = F.max_pool2d(pooled_feat_l, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat_l = self.FRCN.RCNN_roi_align(base_feat,
                                                     self.rois_l.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_l = self.FRCN.RCNN_roi_pool(base_feat,
                                                    self.rois_l.view(-1, 5))

        #print(pooled_feat_g.cpu().detach().numpy().shape)
        x = torch.cat((pooled_feat_g, pooled_feat_l), dim=1)
        #print(x.cpu().detach().numpy().shape)
        x = self.glcc_conv1(x)
        x = F.relu(x)
        x = x.view(-1, self.roipool * self.roipool * 512)
        x = self.glcc_fc1(x)
        x = F.relu(x)
        x = nn.Dropout()(x)
        x = self.glcc_fc2(x)
        x = F.relu(x)
        x = nn.Dropout()(x)
        glcc_out = self.glcc_fc_out(x)

        if self.training:
            glcc_gt = torch.tensor(glcc_gt, dtype=torch.long).cuda()
            glcc_loss = F.cross_entropy(glcc_out, glcc_gt)
        else:
            glcc_out = F.softmax(glcc_out, dim=1)
            glcc_loss = 0.
            glcc_gt = None

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, glcc_out, glcc_loss, glcc_gt
Exemple #29
0
            im2show = np.copy(im)
        for j in xrange(1, len(pascal_classes)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, config.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    # TODO: want: frame_number x_min y_min x_max y_max confidence_score
                    # cls_dets.cpu().numpy()'s last float is confidence score!

                    # print('dets: ', cls_dets.cpu().numpy())

                    # writeout
                    for detection in cls_dets.cpu().numpy():
                        fid.write(
                            str(processed_images) + "\t" +
                            "\t".join(map(str, detection)) + "\n")

                    im2show = vis_detections(im2show, pascal_classes[j],
                                             cls_dets.cpu().numpy(), 0.5)
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)

        # _, order = torch.sort(scores_keep, 1, True)

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

            if DEBUG:
                debug_img = np.zeros(
                    im_info[0, :2].cpu().numpy().astype('int'))
                for i in range(proposals_single.shape[0]):
                    x0, y0, x1, y1 = proposals_single[i, :]
                    p1 = (int(x0), int(y0))
                    p2 = (int(x1), int(y1))
                    debug_img = cv2.rectangle(debug_img,
                                              p1,
                                              p2,
                                              color=(255, 255, 255))
                cv2.imshow("region_proposals", debug_img)
                cv2.waitKey(1)

        return output
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)


        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size      = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
                                  shift_x.ravel(), shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)
        
        # _, order = torch.sort(scores_keep, 1, True)
        
        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1,1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i,:,0] = i
            output[i,:num_proposal,1:] = proposals_single

        return output
Exemple #32
0
        if vis:
            im2show = np.copy(im)
        for j in xrange(1, len(_classes)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores[:, None]), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, _classes[j],
                                             cls_dets.cpu().numpy(), 0.5)

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
            .format(i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

        if vis:
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)
Exemple #33
0
            im2show = np.copy(im)
        for j in xrange(1, len(pascal_classes)):
            inds = torch.nonzero(scores[:,j]>thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:,j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5)

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        if webcam_num == -1:
            sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                             .format(num_images + 1, len(imglist), detect_time, nms_time))
            sys.stdout.flush()

        if vis and webcam_num == -1:
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)