예제 #1
0
def nms_fallback(boxes, thresh):
    """
    Perform non-maximal suppression and return the indices
    Parameters
    ----------
    boxes: [[x, y, xmax, ymax, score]]

    Returns kept box indices
    -------

    """
    order = np.argsort(boxes[:, -1])[::-1]

    iou_mat = bbox_iou(boxes[:, :4], boxes[:, :4])

    keep = []

    while len(order) > 0:
        i = order[0]
        keep.append(i)

        IOU = iou_mat[i, order[1:]]

        remaining = np.where(IOU <= thresh)[0]
        order = order[remaining + 1]

    return keep
예제 #2
0
    def __call__(self, target, size, neg=False):
        anchor_num = len(self.anchor_ratios) * len(self.anchor_scales)

        # -1 ignore 0 negative 1 positive
        cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64)
        delta = np.zeros((4, anchor_num, size, size), dtype=np.float32)
        delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32)

        def select(position, keep_num=16):
            num = position[0].shape[0]
            if num <= keep_num:
                return position, num
            slt = np.arange(num)
            np.random.shuffle(slt)
            slt = slt[:keep_num]
            return tuple(p[slt] for p in position), keep_num

        tcx, tcy, tw, th = corner2center(target)

        if neg:

            cx = size // 2
            cy = size // 2
            cx += int(
                np.ceil((tcx - self.train_search_size // 2) /
                        self.anchor_stride + 0.5))
            cy += int(
                np.ceil((tcy - self.train_search_size // 2) /
                        self.anchor_stride + 0.5))
            l = max(0, cx - 3)
            r = min(size, cx + 4)
            u = max(0, cy - 3)
            d = min(size, cy + 4)
            cls[:, u:d, l:r] = 0

            neg, _ = select(np.where(cls == 0), self.train_neg_num)
            cls[:] = -1
            cls[neg] = 0

            overlap = np.zeros((anchor_num, size, size), dtype=np.float32)
            return cls, delta, delta_weight, overlap

        anchor_box = self.anchors.all_anchors[0]
        anchor_center = self.anchors.all_anchors[1]

        x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \
            anchor_box[2], anchor_box[3]
        cx, cy, w, h = anchor_center[0], anchor_center[1], \
            anchor_center[2], anchor_center[3]

        delta[0] = (tcx - cx) / w
        delta[1] = (tcy - cy) / h
        delta[2] = np.log(tw / w)
        delta[3] = np.log(th / h)

        target = np.array([target[0], target[1], target[2],
                           target[3]]).reshape(1, -1)
        bbox = np.array([x1, y1, x2, y2]).reshape(4, -1).T
        overlap = bbox_iou(bbox, target)
        overlap = overlap.reshape(-1, self.train_output_size,
                                  self.train_output_size)
        pos = np.where(overlap > self.train_thr_high)
        neg = np.where(overlap < self.train_thr_low)
        pos, pos_num = select(pos, self.train_pos_num)
        neg, _ = select(neg, self.train_total_num - self.train_pos_num)

        cls[pos] = 1
        delta_weight[pos] = 1. / (pos_num + 1e-6)

        cls[neg] = 0
        return cls, delta, delta_weight, overlap
예제 #3
0
    def update(self,
               new_detections: np.ndarray,
               tracking_predictions: np.ndarray,
               detection_anchor_indices: np.ndarray,
               tracking_anchor_indices: np.ndarray,
               tracking_anchor_weights: np.ndarray,
               tracking_classes: np.ndarray,
               extra_info: dict = None):
        """
        Update the tracks according to tracking and detection predictions.
        Parameters
        ----------
        new_detections: Nx5 ndarray
        tracking_predictions: Mx5 ndarray
        extra_info: a dictionary with extra information

        Returns
        -------
        """
        # pylint: disable=too-many-nested-blocks
        t_pose_processing = time.time()

        logging.info("tracking predictions 's shape is {}".format(
            tracking_predictions.shape))
        logging.debug(tracking_predictions)
        logging.debug(self.waiting_update_tracks)

        detection_landmarks = extra_info[
            'detection_landmarks'] if 'detection_landmarks' in extra_info else None
        tracking_landmarks = extra_info[
            'tracking_landmarks'] if 'tracking_landmarks' in extra_info else None

        for t in self.tracks:
            t.predict()

        # STEP 1: track level NMS
        still_active_track_pred_indices = []
        still_active_track_indices = []

        if len(tracking_predictions) > 0:

            # class wise NMS
            keep_set = set()
            for c in set(tracking_classes.ravel().tolist()):
                class_pick = np.nonzero(tracking_classes == c)[0]
                keep_tracking_pred_nms_indices = nms_fallback(
                    tracking_predictions[class_pick, ...],
                    self.track_nms_thresh)
                for i_keep in keep_tracking_pred_nms_indices:
                    keep_set.add(class_pick[i_keep])

            still_active_track_pred_indices = []
            for i_pred, i_track in enumerate(self.waiting_update_tracks):
                if i_pred in keep_set:
                    self.tracks[i_track].update(
                        tracking_predictions[i_pred, :],
                        (tracking_anchor_indices[i_pred, :],
                         tracking_anchor_weights[i_pred, :]),
                        tracking_landmarks[i_pred, :]
                        if tracking_landmarks is not None else None)
                else:
                    # suppressed tracks in the track NMS process will be marked as Missing
                    self.tracks[i_track].mark_missed()

                if self.tracks[i_track].is_active():
                    still_active_track_pred_indices.append(i_pred)
                    still_active_track_indices.append(i_track)

        # STEP 2: Remove New Detection Overlapping with Tracks
        if len(still_active_track_pred_indices) > 0 and len(
                new_detections) > 0:
            active_tracking_predictions = tracking_predictions[
                still_active_track_pred_indices, :]
            det_track_max_iou = bbox_iou(new_detections[:, :4],
                                         active_tracking_predictions[:, :4])
            same_class = new_detections[:, -1:] == (
                tracking_classes[still_active_track_pred_indices, :].T)
            # suppress all new detections that have high IOU with active tracks
            affinity = (det_track_max_iou * same_class).max(axis=1)
            keep_detection_indices = np.nonzero(
                affinity <= self.new_track_iou_thresh)[0]
        else:
            # otherwise simply keep all detections
            keep_detection_indices = list(range(len(new_detections)))
            active_tracking_predictions = np.array([])

        # STEP 3: New Track Initialization
        if len(keep_detection_indices) > 0:

            active_new_detections = new_detections[keep_detection_indices, :]
            # (Optional) STEP 3.a: Perform joint linking of body and head
            if self.joint_linking:
                tracking_classes = np.array(tracking_classes)
                body2face_link, face2body_link = \
                    self._link_face_body(active_new_detections,
                                         extra_info['detection_keypoints'][keep_detection_indices],
                                         active_tracking_predictions,
                                         extra_info['tracking_keypoints'][still_active_track_pred_indices],
                                         tracking_classes[still_active_track_pred_indices]
                                         )
            else:
                body2face_link, face2body_link = None, None

            new_tracks = []
            for idx, i_new_track in enumerate(keep_detection_indices):
                new_track = Track(
                    new_detections[i_new_track, :4],
                    self.all_track_id,
                    (detection_anchor_indices[i_new_track, :], np.array([1])),
                    keep_alive_thresh=self.keep_alive,
                    class_id=new_detections[i_new_track, -1],
                    attributes=detection_landmarks[i_new_track, :]
                    if detection_landmarks is not None else None)
                if self.joint_linking:
                    if new_track.class_id == 0:
                        # new face track
                        if idx in face2body_link[0]:
                            logging.debug(idx, i_new_track, '0')
                            body_idx = face2body_link[0][idx]
                            if idx > body_idx:
                                new_track.link_to(new_tracks[body_idx])
                        elif idx in face2body_link[2]:
                            logging.debug(idx, i_new_track, '1')
                            body_idx = face2body_link[2][idx]
                            new_track.link_to(self.tracks[
                                still_active_track_indices[body_idx]])

                    if new_track.class_id == 1:
                        # new body track
                        if idx in body2face_link[0]:
                            face_idx = body2face_link[0][idx]
                            if idx > face_idx:
                                new_track.link_to(new_tracks[face_idx])
                        elif idx in body2face_link[2]:
                            face_idx = body2face_link[2][idx]
                            new_track.link_to(self.tracks[
                                still_active_track_indices[face_idx]])

                self.all_track_id += 1
                self.tracks.append(new_track)
                new_tracks.append(new_track)

        elapsed_post_processing = time.time() - t_pose_processing
        logging.info(
            "total tracklets to now is {}, post-processing time: {:.05f} sec".
            format(self.all_track_id, elapsed_post_processing))
    def validate(self):
        """Test on validation dataset."""
        val_data = self.val_loader
        ctx = self.ctx
        val_metric = self.val_metric
        nms_threshold = self.nms_threshold
        validation_threshold = self.validation_threshold

        val_metric.reset()
        # set nms threshold and topk constraint
        # post_nms = maximum number of objects per image
        self.net.set_nms(nms_thresh=nms_threshold,
                         nms_topk=200,
                         post_nms=len(
                             self.classes))  # default: iou=0.45 e topk=400

        # >>>> Verificar eficácia
        # mx.nd.waitall()

        # allow the MXNet engine to perform graph optimization for best performance.
        self.net.hybridize(static_alloc=True, static_shape=True)

        num_of_classes = len(self.classes)
        # total number of correct prediction by class
        tp = [0] * num_of_classes
        # false positives by class
        fp = [0] * num_of_classes
        # count the number of gt by class
        gt_by_class = [0] * num_of_classes
        # rec and prec by class
        rec_by_class = [0] * num_of_classes
        prec_by_class = [0] * num_of_classes
        confusion_matrix = np.zeros((num_of_classes, num_of_classes))

        for batch in val_data:
            batch_size = batch[0].shape[0]
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0,
                                              even_split=False)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0,
                                               even_split=False)

            pred_bboxes_list = []
            pred_label_list = []
            pred_scores_list = []
            gt_bboxes_list = []
            gt_label_list = []

            for x, y in zip(data, label):
                # get prediction results
                ids, scores, bboxes = self.net(x)
                pred_label_list.append(ids)
                pred_scores_list.append(scores)
                # clip to image size
                pred_bboxes_list.append(bboxes.clip(0, batch[0].shape[2]))
                # split ground truths
                gt_label_list.append(y.slice_axis(axis=-1, begin=4, end=5))
                gt_bboxes_list.append(y.slice_axis(axis=-1, begin=0, end=4))

            # Uncomment the following line if you want to plot the images in each inference to visually  check the tp, fp and fn
            # self.show_images(x, pred_label_list, pred_bboxes_list, gt_label_list, gt_bboxes_list)

            # update metric
            val_metric.update(pred_bboxes_list, pred_label_list,
                              pred_scores_list, gt_bboxes_list,
                              gt_label_list)  #, gt_difficults)

            # Get Micro Averaging (precision and recall by each class) in each batch
            for img in range(batch_size):
                # count +1 for this class id. It will get the total number of gt by class
                # It is useful when considering unbalanced datasets
                for gt_idx in gt_label_list[0][img]:
                    index = int(gt_idx.asnumpy()[0])
                    gt_by_class[index] += 1

                for (pred_label,
                     pred_bbox) in zip(pred_label_list[0][img],
                                       list(pred_bboxes_list[0][img])):
                    pred_label = int(pred_label.asnumpy()[0])
                    pred_bbox = pred_bbox.asnumpy()
                    pred_bbox = np.expand_dims(pred_bbox, axis=0)
                    match = 0
                    for (gt_bbox_label, gt_bbox_coordinates) in zip(
                            gt_label_list[0][img],
                            list(gt_bboxes_list[0][img])):
                        gt_bbox_coord = gt_bbox_coordinates.asnumpy()
                        gt_bbox_coord = np.expand_dims(gt_bbox_coord, axis=0)
                        gt_bbox_label = int(gt_bbox_label.asnumpy()[0])
                        iou = bbox_iou(pred_bbox, gt_bbox_coord)

                        # Correct inference
                        if iou > validation_threshold and pred_label == gt_bbox_label:
                            confusion_matrix[gt_bbox_label][pred_label] += 1
                            tp[gt_bbox_label] += 1  # Correct classification
                            match = 1
                        # Incorrect inference - missed the correct class but put the bounding box in other class
                        elif iou > validation_threshold:
                            confusion_matrix[gt_bbox_label][pred_label] += 1
                            fp[pred_label] += 1
                            match = 1

                    if not match:
                        fp[pred_label] += 1

        # calculate the Recall and Precision by class
        tp = np.array(tp)  # we can also sum the matrix diagonal
        fp = np.array(fp)

        fp_sum = sum(fp)
        tp_sum = sum(tp)

        # rec and prec according to the micro averaging
        for i, (gt_value, tp_value) in enumerate(zip(gt_by_class, tp)):
            rec_by_class[i] += tp_value / gt_value
            # If an element of fp + tp is 0,
            # the corresponding element of prec[l] is nan.
            with np.errstate(divide='ignore', invalid='ignore'):
                prec_by_class[i] += tp_value / (tp_value + fp[i])

        return val_metric.get(), rec_by_class, prec_by_class
예제 #5
0
    def update(self,
               pred_bboxes,
               pred_labels,
               pred_scores,
               gt_bboxes,
               gt_labels,
               gt_difficults=None):
        """Update internal buffer with latest prediction and gt pairs.

        Parameters
        ----------
        pred_bboxes : mxnet.NDArray or numpy.ndarray
            Prediction bounding boxes with shape `B, N, 4`.
            Where B is the size of mini-batch, N is the number of bboxes.
        pred_labels : mxnet.NDArray or numpy.ndarray
            Prediction bounding boxes labels with shape `B, N`.
        pred_scores : mxnet.NDArray or numpy.ndarray
            Prediction bounding boxes scores with shape `B, N`.
        gt_bboxes : mxnet.NDArray or numpy.ndarray
            Ground-truth bounding boxes with shape `B, M, 4`.
            Where B is the size of mini-batch, M is the number of ground-truths.
        gt_labels : mxnet.NDArray or numpy.ndarray
            Ground-truth bounding boxes labels with shape `B, M`.
        gt_difficults : mxnet.NDArray or numpy.ndarray, optional, default is None
            Ground-truth bounding boxes difficulty labels with shape `B, M`.

        """

        if gt_difficults is None:
            gt_difficults = [None for _ in as_numpy(gt_labels)]

        # Not sure about this code  ..   # lodged issue on github #872 https://github.com/dmlc/gluon-cv/issues/872
        # if isinstance(gt_labels, list):
        #     if len(gt_difficults) != len(gt_labels) * gt_labels[0].shape[0]:
        #         gt_difficults = [None] * len(gt_labels) * gt_labels[0].shape[0]

        for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in zip(
                *[
                    as_numpy(x) for x in [
                        pred_bboxes, pred_labels, pred_scores, gt_bboxes,
                        gt_labels, gt_difficults
                    ]
                ]):
            # strip padding -1 for pred and gt
            valid_pred = np.where(pred_label.flat >= 0)[0]
            pred_bbox = pred_bbox[valid_pred, :]
            pred_label = pred_label.flat[valid_pred].astype(int)
            pred_score = pred_score.flat[valid_pred]

            # change the class ids for the ground truths
            if self.class_map is not None:
                gt_label = np.expand_dims(np.array(
                    [self.class_map[int(l)] for l in gt_label.flat]),
                                          axis=0)

            valid_gt = np.where(gt_label.flat >= 0)[0]
            gt_bbox = gt_bbox[valid_gt, :]
            gt_label = gt_label.flat[valid_gt].astype(int)

            if gt_difficult is None:
                gt_difficult = np.zeros(gt_bbox.shape[0])
            else:
                gt_difficult = gt_difficult.flat[valid_gt]

            for l in np.unique(
                    np.concatenate((pred_label, gt_label)).astype(int)):
                pred_mask_l = pred_label == l
                pred_bbox_l = pred_bbox[pred_mask_l]
                pred_score_l = pred_score[pred_mask_l]
                # sort by score
                order = pred_score_l.argsort()[::-1]
                pred_bbox_l = pred_bbox_l[order]
                pred_score_l = pred_score_l[order]

                gt_mask_l = gt_label == l
                gt_bbox_l = gt_bbox[gt_mask_l]
                gt_difficult_l = gt_difficult[gt_mask_l]

                self._n_pos[l] += np.logical_not(gt_difficult_l).sum()
                self._score[l].extend(pred_score_l)

                if len(pred_bbox_l) == 0:
                    continue
                if len(gt_bbox_l) == 0:
                    self._match[l].extend((0, ) * pred_bbox_l.shape[0])
                    continue

                # VOC evaluation follows integer typed bounding boxes.
                pred_bbox_l = pred_bbox_l.copy()
                pred_bbox_l[:, 2:]  # += 1
                gt_bbox_l = gt_bbox_l.copy()
                gt_bbox_l[:, 2:]  # += 1

                iou = bbox_iou(pred_bbox_l, gt_bbox_l)
                gt_index = iou.argmax(axis=1)
                # set -1 if there is no matching ground truth
                gt_index[iou.max(axis=1) < self.iou_thresh] = -1
                del iou

                selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
                for gt_idx in gt_index:
                    if gt_idx >= 0:
                        if gt_difficult_l[gt_idx]:
                            self._match[l].append(-1)
                        else:
                            if not selec[gt_idx]:
                                self._match[l].append(1)
                            else:
                                self._match[l].append(0)
                        selec[gt_idx] = True
                    else:
                        self._match[l].append(0)
    def validate(self):
        """Test on validation dataset."""
        val_data = self.val_loader
        ctx = self.ctx
        val_metric = self.val_metric
        nms_threshold = self.nms_threshold
        validation_threshold = self.validation_threshold

        val_metric.reset()
        # set nms threshold and topk constraint
        # post_nms = maximum number of objects per image
        self.net.set_nms(nms_thresh=nms_threshold, nms_topk=200, post_nms=len(self.classes)) # default: iou=0.45 e topk=400

        # allow the MXNet engine to perform graph optimization for best performance.
        self.net.hybridize(static_alloc=True, static_shape=True)

        # total number of correct prediction by class
        tp = [0] * len(self.classes)
        # count the number of gt by class
        gt_by_class = [0] * len(self.classes)
        # false positives by class
        fp = [0] * len(self.classes)
        # rec and prec by class
        rec_by_class = [0] * len(self.classes)
        prec_by_class = [0] * len(self.classes)

        for batch in val_data:
            batch_size = batch[0].shape[0]
            data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
            label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)

            det_bboxes = []
            det_ids = []
            det_scores = []
            gt_bboxes = []
            gt_ids = []
            gt_difficults = []
            
            for x, y in zip(data, label):
                # get prediction results
                ids, scores, bboxes = self.net(x)
                det_ids.append(ids)
                det_scores.append(scores)
                # clip to image size
                det_bboxes.append(bboxes.clip(0, batch[0].shape[2]))
                # split ground truths
                gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5))
                gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4))
                # gt_difficults.append(y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None)
            
            # update metric
            val_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids) #, gt_difficults)

            # Get Micro Averaging (precision and recall by each class) in each batch
            for img in range(batch_size):
                gt_ids_teste, gt_bboxes_teste = [], []
                for ids in det_ids[0][img]:
                    det_ids_number = (int(ids.asnumpy()[0]))
                    # It is required to check if the predicted class is in the image
                    # otherwise, count it as a false positive and do not include in the list
                    if det_ids_number in list(gt_ids[0][img]):
                        gt_index = list(gt_ids[0][img]).index(det_ids_number)
                        gt_ids_teste.extend(gt_ids[0][img][gt_index])
                        gt_bboxes_teste.append(gt_bboxes[0][img][gt_index])
                    else:
                        fp[det_ids_number] += 1  # Wrong classification

                xww = 1
                
                # count +1 for this class id. It will get the total number of gt by class
                # It is useful when considering unbalanced datasets
                for gt_idx in gt_ids[0][img]:
                    index = int(gt_idx.asnumpy()[0])
                    gt_by_class[index] += 1
                
                for ids in range(len(gt_bboxes_teste)):
                    det_bbox_ids = det_bboxes[0][img][ids]
                    det_bbox_ids = det_bbox_ids.asnumpy()
                    det_bbox_ids = np.expand_dims(det_bbox_ids, axis=0)
                    predict_ind = int(det_ids[0][img][ids].asnumpy()[0])
                    
                    gt_bbox_ids = gt_bboxes_teste[ids]
                    gt_bbox_ids = gt_bbox_ids.asnumpy()
                    gt_bbox_ids = np.expand_dims(gt_bbox_ids, axis=0)
                    gt_ind = int(gt_ids_teste[ids].asnumpy()[0])
                    
                    iou = bbox_iou(det_bbox_ids, gt_bbox_ids)

                    # Uncomment the following line if you want to plot the images in each inference to visually  check the tp, fp and fn 
                    # self.show_images(x, gt_bbox_ids, det_bbox_ids, img)
                    
                    # Check if IoU is above the threshold and the class id corresponds to the ground truth
                    if (iou > validation_threshold) and (predict_ind == gt_ind):
                        tp[gt_ind] += 1 # Correct classification
                    else:
                        fp[predict_ind] += 1  # Wrong classification
        
        # calculate the Recall and Precision by class
        tp = np.array(tp)
        fp = np.array(fp)
        # rec and prec according to the micro averaging
        for i, (gt_value, tp_value) in enumerate(zip(gt_by_class, tp)):
            rec_by_class[i] += tp_value/gt_value

            # If an element of fp + tp is 0,
            # the corresponding element of prec[l] is nan.
            with np.errstate(divide='ignore', invalid='ignore'):
                prec_by_class[i] += tp_value/(tp_value+fp[i])

        rec, prec = val_metric._recall_prec()
        return val_metric.get(), rec_by_class, prec_by_class