Beispiel #1
0
    def run_network(self, img: np.ndarray):
        """ Runs an image through the network + postprocessing and returns the masks and bboxes

        Args:
            img (np.ndarray): The image to process.

        Returns:
            (tuple): the masks and bboxes
        """
        # Run image through the network
        img_gpu = torch.from_numpy(img).cuda().float()
        batch = FastBaseTransform()(img_gpu.unsqueeze(0))
        preds = self.net(batch)
        h, w, _ = img.shape

        # Post process
        t = postprocess(preds,
                        w,
                        h,
                        visualize_lincomb=True,
                        crop_masks=True,
                        score_threshold=0.15)
        top_k = 15  # Further restrict the number of predictions to parse
        idx = t[1].argsort(0, descending=True)[:top_k]
        masks = t[3][idx].cpu().numpy()
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

        return masks, boxes
Beispiel #2
0
    def prediction(self, img):
        self.net.detect.cross_class_nms = True
        cfg.mask_proto_debug = False

        with torch.no_grad():
            frame = torch.Tensor(img).cuda().float()
            batch = FastBaseTransform()(frame.unsqueeze(0))
            time_start = time.clock()
            preds = self.net(batch)
            h, w, _ = img.shape
            t = postprocess(preds,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=True,
                            score_threshold=self.threshold)
            torch.cuda.synchronize()
            masks = t[3][:self.top_k]
            classes, scores, bboxes = [
                x[:self.top_k].cpu().numpy() for x in t[:3]
            ]
            time_elapsed = (time.clock() - time_start)
            num_dets_to_consider = min(self.top_k, classes.shape[0])

            for i in range(num_dets_to_consider):
                if scores[i] < self.threshold:
                    num_dets_to_consider = i
                    break

            if num_dets_to_consider >= 1:
                masks = masks[:num_dets_to_consider, :, :, None]

            masks_msg = masks.cpu().detach().numpy()
            masks_msg = masks_msg.astype(np.uint8)
            scores_msg = np.zeros(num_dets_to_consider)
            class_label_msg = np.empty(num_dets_to_consider, dtype="S20")
            bboxes_msg = np.zeros([num_dets_to_consider, 4], dtype=int)
            for i in reversed(range(num_dets_to_consider)):
                scores_msg[i] = scores[i]
                class_label_msg[i] = cfg.dataset.class_names[classes[i]]
                bboxes_msg[i] = bboxes[i]
                print(class_label_msg[i].decode(), "%.2f" % (scores_msg[i]))

            os.system('cls' if os.name == 'nt' else 'clear')
            print("%.2f" % (1 / time_elapsed), "hz")

            if self.display_cv:
                self.display(frame, masks, classes, scores, bboxes,
                             num_dets_to_consider)

            return masks_msg, class_label_msg, scores_msg, bboxes_msg
Beispiel #3
0
def prep_benchmark(dets_out, h, w):
    with timer.env('Postprocess'):
        t = postprocess(dets_out, w, h, crop_masks=args.crop, score_threshold=args.score_threshold)

    with timer.env('Copy'):
        classes, scores, boxes, masks = [x[:args.top_k] for x in t]
        if isinstance(scores, list):
            box_scores = scores[0].cpu().numpy()
            mask_scores = scores[1].cpu().numpy()
        else:
            scores = scores.cpu().numpy()
        classes = classes.cpu().numpy()
        boxes = boxes.cpu().numpy()
        masks = masks.cpu().numpy()
    
    with timer.env('Sync'):
        # Just in case
        torch.cuda.synchronize()
Beispiel #4
0
    def prep_display(self,
                     dets_out,
                     img,
                     h,
                     w,
                     undo_transform=True,
                     class_color=False,
                     mask_alpha=0.45):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape

        with timer.env('Postprocess'):
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=args.display_lincomb,
                            crop_masks=args.crop,
                            score_threshold=args.score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:args.top_k]
            classes, scores, boxes = [
                x[:args.top_k].cpu().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(args.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < args.score_threshold:
                num_dets_to_consider = j
                break

        if num_dets_to_consider == 0:
            # No detections found so just output the original image
            return (img_gpu * 255).byte().cpu().numpy()

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            global color_cache
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in color_cache[on_gpu]:
                return color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    color_cache[on_gpu][color_idx] = color
                return color

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        if args.display_masks and cfg.eval_mask_branch:
            # After this, mask is of size [num_dets, h, w, 1]
            masks = masks[:num_dets_to_consider, :, :, None]

            # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # This is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        if args.display_text or args.display_bboxes:
            str_ = ""
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if args.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if args.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    text_str = '%s: %.2f' % (
                        _class, score) if args.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1),
                                  (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

                    #pub = rospy.Publisher('chatter',String,queue_size=10)
                    #rate = rospy.Rate(50) #10hz
                    #str_ += text_str
            #rospy.loginfo(str_)
            #pub.publish(str_)
            #rate.sleep()

        return img_numpy
Beispiel #5
0
    def prep_display(self,
                     dets_out,
                     img,
                     h,
                     w,
                     undo_transform=True,
                     class_color=False,
                     mask_alpha=0.45,
                     image_header=Header()):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        with torch.no_grad():
            detections = Detections()

            if undo_transform:
                img_numpy = undo_image_transformation(img, w, h)
                img_gpu = torch.Tensor(img_numpy).cuda()
            else:
                img_gpu = img / 255.0
                h, w, _ = img.shape

            with timer.env('Postprocess'):
                t = postprocess(dets_out,
                                w,
                                h,
                                visualize_lincomb=args.display_lincomb,
                                crop_masks=args.crop,
                                score_threshold=args.score_threshold)
                torch.cuda.synchronize()

            with timer.env('Copy'):
                if cfg.eval_mask_branch:
                    # Masks are drawn on the GPU, so don't copy
                    masks = t[3][:args.top_k]
                classes, scores, boxes = [
                    x[:args.top_k].cpu().numpy() for x in t[:3]
                ]

            num_dets_to_consider = min(args.top_k, classes.shape[0])
            for j in range(num_dets_to_consider):
                if scores[j] < args.score_threshold:
                    num_dets_to_consider = j
                    break

            if num_dets_to_consider == 0:
                # No detections found so just output the original image
                return (img_gpu * 255).byte().cpu().numpy()

            # Quick and dirty lambda for selecting the color for a particular index
            # Also keeps track of a per-gpu color cache for maximum speed
            def get_color(j, on_gpu=None):
                global color_cache
                color_idx = (classes[j] * 5 if class_color else j *
                             5) % len(COLORS)

                if on_gpu is not None and color_idx in color_cache[on_gpu]:
                    return color_cache[on_gpu][color_idx]
                else:
                    color = COLORS[color_idx]
                    if not undo_transform:
                        # The image might come in as RGB or BRG, depending
                        color = (color[2], color[1], color[0])
                    if on_gpu is not None:
                        color = torch.Tensor(color).to(on_gpu).float() / 255.
                        color_cache[on_gpu][color_idx] = color
                    return color

            # First, draw the masks on the GPU where we can do it really fast
            # Beware: very fast but possibly unintelligible mask-drawing code ahead
            # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
            if args.display_masks and cfg.eval_mask_branch:
                # After this, mask is of size [num_dets, h, w, 1]
                masks = masks[:num_dets_to_consider, :, :, None]

                # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
                colors = torch.cat([
                    get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                    for j in range(num_dets_to_consider)
                ],
                                   dim=0)
                masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

                # This is 1 everywhere except for 1-mask_alpha where the mask is
                inv_alph_masks = masks * (-mask_alpha) + 1

                # I did the math for this on pen and paper. This whole block should be equivalent to:
                #    for j in range(num_dets_to_consider):
                #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
                masks_color_summand = masks_color[0]
                if num_dets_to_consider > 1:
                    inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                      1)].cumprod(dim=0)
                    masks_color_cumul = masks_color[1:] * inv_alph_cumul
                    masks_color_summand += masks_color_cumul.sum(dim=0)

                img_gpu = img_gpu * inv_alph_masks.prod(
                    dim=0) + masks_color_summand

            # Then draw the stuff that needs to be done on the cpu
            # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
            img_numpy = (img_gpu * 255).byte().cpu().numpy()

            print("Num dets: ", num_dets_to_consider)
            if args.display_text or args.display_bboxes:
                for j in reversed(range(num_dets_to_consider)):
                    x1, y1, x2, y2 = boxes[j, :]
                    color = get_color(j)
                    score = scores[j]

                    if args.display_bboxes:
                        cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 2)

                    if args.display_text:
                        _class = cfg.dataset.class_names[classes[j]]
                        text_str = '%s: %.2f' % (
                            _class, score) if args.display_scores else _class

                        font_face = cv2.FONT_HERSHEY_DUPLEX
                        font_scale = 0.6
                        font_thickness = 1

                        text_w, text_h = cv2.getTextSize(
                            text_str, font_face, font_scale, font_thickness)[0]

                        text_pt = (x1, y1 - 10)
                        text_color = [255, 255, 255]

                        cv2.rectangle(img_numpy, (x1, y1),
                                      (x1 + text_w, y1 - text_h - 4), color,
                                      -1)
                        cv2.putText(img_numpy, text_str, text_pt, font_face,
                                    font_scale, text_color, font_thickness,
                                    cv2.LINE_AA)

                    det = Detection()
                    det.box.x1 = x1
                    det.box.y1 = y1
                    det.box.x2 = x2
                    det.box.y2 = y2
                    det.class_name = _class
                    det.score = score
                    mask_shape = np.shape(masks[j])
                    #print("Shape: ", mask_shape)
                    #mask_bb = np.squeeze(masks[j].cpu().numpy(), axis=2)[y1:y2,x1:x2] # Crop
                    mask_bb = np.squeeze(
                        masks[j].cpu().numpy(),
                        axis=2)[:, :]  # Every mask (1280 * 720)
                    #print("Box: x1:", x1,", x2: ",x2,", y1: ",y1,", y2: ",y2)
                    #print("Mask in box shape: ", np.shape(mask_bb))
                    mask_rs = np.reshape(mask_bb, -1)
                    #print("New shape: ", np.shape(mask_rs))
                    #print("Mask:\n",mask_bb)
                    det.mask.height = y2 - y1
                    det.mask.width = x2 - x1
                    det.mask.mask = np.array(mask_rs, dtype=bool)
                    detections.detections.append(det)
                detections.header.stamp = image_header.stamp
                detections.header.frame_id = image_header.frame_id

            self.detections_pub.publish(detections)
            self.get_orientation_from_mask(num_dets_to_consider, img_numpy,
                                           detections, masks)

            try:
                self.image_pub.publish(
                    self.bridge.cv2_to_imgmsg(img_numpy, "bgr8"))
            except CvBridgeError as e:
                print(e)
Beispiel #6
0
    def process(self, image: np.ndarray, pos: int):
        """:returns (classes, scores, boxes)

        where `boxes` is an array of bounding boxes of detected objects in
        (xleft, ytop, width, height) format.

        `classes` is the class ids of the corresponding objects.

        `scores` are the computed class scores corresponding to the detected objects.
        Roughly high score indicates strong belief that the object belongs to
        the identified class.
        """
        _ts = time.perf_counter()
        logging.debug(f'Received frame {pos}')
        if self.net is None:
            self.sigError.emit(YolactException('Network not initialized'))
            return
        # Partly follows yolact eval.py
        tic = time.perf_counter_ns()
        _ = qc.QMutexLocker(self.mutex)
        with torch.no_grad():
            if self.cuda:
                image = torch.from_numpy(image).cuda().float()
            else:
                image = torch.from_numpy(image).float()
            batch = FastBaseTransform()(image.unsqueeze(0))
            preds = self.net(batch)
            image_gpu = image / 255.0
            h, w, _ = image.shape
            save = self.config.rescore_bbox
            self.config.rescore_bbox = True
            classes, scores, boxes, masks = oututils.postprocess(
                preds,
                w,
                h,
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=self.score_threshold)
            idx = scores.argsort(0, descending=True)[:self.top_k]
            # if self.config.eval_mask_branch:
            #     masks = masks[idx]
            classes, scores, boxes = [
                x[idx].cpu().numpy() for x in (classes, scores, boxes)
            ]
            # This is probably not required, `postprocess` uses
            # `score_thresh` already
            num_dets_to_consider = min(self.top_k, classes.shape[0])
            for j in range(num_dets_to_consider):
                if scores[j] < self.score_threshold:
                    num_dets_to_consider = j
                    break
            # logging.debug('Bounding boxes: %r', boxes)
            # Convert from top-left bottom-right format to
            # top-left, width, height format
            if len(boxes) == 0:
                self.sigProcessed.emit(boxes, pos)
                return
            boxes[:, 2:] = boxes[:, 2:] - boxes[:, :2]
            boxes = np.asanyarray(boxes, dtype=np.int_)
            if self.overlap_thresh < 1:
                dist_matrix = pairwise_distance(new_bboxes=boxes,
                                                bboxes=boxes,
                                                boxtype=OutlineStyle.bbox,
                                                metric=DistanceMetric.ios)
                bad_idx = [jj for ii in range(dist_matrix.shape[0] - 1) \
                             for jj in range(ii+1, dist_matrix.shape[1]) \
                              if dist_matrix[ii, jj] < 1 - self.overlap_thresh]
                good_idx = list(set(range(boxes.shape[0])) - set(bad_idx))
                boxes = boxes[good_idx].copy()

            toc = time.perf_counter_ns()
            logging.debug('Time to process single _image: %f s',
                          1e-9 * (toc - tic))
            self.sigProcessed.emit(boxes, pos)
            logging.debug(f'Emitted bboxes for frame {pos}: {boxes}')
        _dt = time.perf_counter() - _ts
        logging.debug(
            f'{__name__}.{self.__class__.__name__}.process: Runtime: {_dt}s')
def prep_metrics(ap_data,
                 dets,
                 img,
                 gt,
                 gt_masks,
                 h,
                 w,
                 num_crowd,
                 image_id,
                 detections: Detections = None):
    """ Returns a list of APs for this image, with each element being for a class  """
    if not args.output_coco_json:
        with timer.env('Prepare gt'):
            gt_boxes = torch.Tensor(gt[:, :4])
            gt_boxes[:, [0, 2]] *= w
            gt_boxes[:, [1, 3]] *= h
            gt_classes = list(gt[:, 4].astype(int))
            gt_masks = torch.Tensor(gt_masks).view(-1, h * w)

            if num_crowd > 0:
                split = lambda x: (x[-num_crowd:], x[:-num_crowd])
                crowd_boxes, gt_boxes = split(gt_boxes)
                crowd_masks, gt_masks = split(gt_masks)
                crowd_classes, gt_classes = split(gt_classes)

    with timer.env('Postprocess'):
        classes, scores, boxes, masks = postprocess(
            dets,
            w,
            h,
            crop_masks=args.crop,
            score_threshold=args.score_threshold)

        if classes.size(0) == 0:
            return

        classes = list(classes.cpu().numpy().astype(int))
        if isinstance(scores, list):
            box_scores = list(scores[0].cpu().numpy().astype(float))
            mask_scores = list(scores[1].cpu().numpy().astype(float))
        else:
            scores = list(scores.cpu().numpy().astype(float))
            box_scores = scores
            mask_scores = scores
        masks = masks.view(-1, h * w).cuda()
        boxes = boxes.cuda()

    if args.output_coco_json:
        with timer.env('JSON Output'):
            boxes = boxes.cpu().numpy()
            masks = masks.view(-1, h, w).cpu().numpy()
            for i in range(masks.shape[0]):
                # Make sure that the bounding box actually makes sense and a mask was produced
                if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] -
                                                  boxes[i, 0]) > 0:
                    detections.add_bbox(image_id, classes[i], boxes[i, :],
                                        box_scores[i])
                    detections.add_mask(image_id, classes[i], masks[i, :, :],
                                        mask_scores[i])
            return

    with timer.env('Eval Setup'):
        num_pred = len(classes)
        num_gt = len(gt_classes)

        mask_iou_cache = _mask_iou(masks, gt_masks)
        bbox_iou_cache = _bbox_iou(boxes.float(), gt_boxes.float())

        if num_crowd > 0:
            crowd_mask_iou_cache = _mask_iou(masks, crowd_masks, iscrowd=True)
            crowd_bbox_iou_cache = _bbox_iou(boxes.float(),
                                             crowd_boxes.float(),
                                             iscrowd=True)
        else:
            crowd_mask_iou_cache = None
            crowd_bbox_iou_cache = None

        box_indices = sorted(range(num_pred), key=lambda i: -box_scores[i])
        mask_indices = sorted(box_indices, key=lambda i: -mask_scores[i])

        iou_types = [('box', lambda i, j: bbox_iou_cache[i, j].item(),
                      lambda i, j: crowd_bbox_iou_cache[i, j].item(),
                      lambda i: box_scores[i], box_indices),
                     ('mask', lambda i, j: mask_iou_cache[i, j].item(),
                      lambda i, j: crowd_mask_iou_cache[i, j].item(),
                      lambda i: mask_scores[i], mask_indices)]

    timer.start('Main loop')
    for _class in set(classes + gt_classes):
        ap_per_iou = []
        num_gt_for_class = sum([1 for x in gt_classes if x == _class])

        for iouIdx in range(len(iou_thresholds)):
            iou_threshold = iou_thresholds[iouIdx]

            for iou_type, iou_func, crowd_func, score_func, indices in iou_types:
                gt_used = [False] * len(gt_classes)

                ap_obj = ap_data[iou_type][iouIdx][_class]
                ap_obj.add_gt_positives(num_gt_for_class)

                for i in indices:
                    if classes[i] != _class:
                        continue

                    max_iou_found = iou_threshold
                    max_match_idx = -1
                    for j in range(num_gt):
                        if gt_used[j] or gt_classes[j] != _class:
                            continue

                        iou = iou_func(i, j)

                        if iou > max_iou_found:
                            max_iou_found = iou
                            max_match_idx = j

                    if max_match_idx >= 0:
                        gt_used[max_match_idx] = True
                        ap_obj.push(score_func(i), True)
                    else:
                        # If the detection matches a crowd, we can just ignore it
                        matched_crowd = False

                        if num_crowd > 0:
                            for j in range(len(crowd_classes)):
                                if crowd_classes[j] != _class:
                                    continue

                                iou = crowd_func(i, j)

                                if iou > iou_threshold:
                                    matched_crowd = True
                                    break

                        # All this crowd code so that we can make sure that our eval code gives the
                        # same result as COCOEval. There aren't even that many crowd annotations to
                        # begin with, but accuracy is of the utmost importance.
                        if not matched_crowd:
                            ap_obj.push(score_func(i), False)
    timer.stop('Main loop')
Beispiel #8
0
def segment_yolact(frame, score_threshold, top_k, overlap_thresh, cfgfile,
                   netfile, cuda):
    """Segment objects in frame using YOLACT.

    Parameters
    ----------
    frame: numpy.ndarray
        (WxHxC) integer array with the image content.
    score_threshold: float
        Minimum score to include object, should be in `(0, 1)`.
    top_k: int
        The number of segmented objects to keep.
    overlap_thresh: float
        Merge objects whose bounding boxes overlap (intersection over union)
        more than this amount.
    cfgfile: str
        Path to YOLACT configuration file.
    netfile: str
        Path to YOLACT network weights file.
    cuda: bool
        Whether to use CUDA.
    Returns
    -------
    numpy.ndarray
        An array of bounding boxes of detected objects in
        (xleft, ytop, width, height) format.
    """
    global ynet
    global config

    if ynet is None:
        init_yolact(cfgfile, netfile, cuda)
    # Partly follows yolact eval.py
    tic = time.perf_counter_ns()
    with torch.no_grad():
        if cuda:
            frame = torch.from_numpy(frame).cuda().float()
        else:
            frame = torch.from_numpy(frame).float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = ynet(batch)
        h, w, _ = frame.shape
        config.rescore_bbox = True
        classes, scores, boxes, masks = oututils.postprocess(
            preds, w, h,
            visualize_lincomb=False,
            crop_masks=True,
            score_threshold=score_threshold)
        idx = scores.argsort(0, descending=True)[:top_k]
        # if self.config.eval_mask_branch:
        #     masks = masks[idx]
        classes, scores, boxes = [x[idx].cpu().numpy()
                                  for x in (classes, scores, boxes)]
        # This is probably not required, `postprocess` uses
        # `score_thresh` already
        # num_dets_to_consider = min(self.top_k, classes.shape[0])
        # for j in range(num_dets_to_consider):
        #     if scores[j] < self.score_threshold:
        #         num_dets_to_consider = j
        #         break
        # logging.debug('Bounding boxes: %r', boxes)
        # Convert from top-left bottom-right format to
        # top-left, width, height format
        if len(boxes) == 0:
            return np.empty(0)

        boxes[:, 2:] = boxes[:, 2:] - boxes[:, :2]
        boxes = np.asanyarray(np.rint(boxes), dtype=np.int_)
        if overlap_thresh < 1:
            dist_matrix = ut.pairwise_distance(new_bboxes=boxes, bboxes=boxes,
                                               boxtype=OutlineStyle.bbox,
                                               metric=DistanceMetric.iou)
            bad_boxes = []
            for ii in range(dist_matrix.shape[0] - 1):
                for jj in range(ii + 1, dist_matrix.shape[1]):
                    if dist_matrix[ii, jj] < 1 - overlap_thresh:
                        bad_boxes.append(jj)
            boxes = np.array([boxes[ii] for ii in range(boxes.shape[0]) if
                              ii not in bad_boxes], dtype=np.int_)
        toc = time.perf_counter_ns()
        logging.debug('Time to process single image: %f s',
                      1e-9 * (toc - tic))
        return boxes
Beispiel #9
0
def prep_display(
    dets_out,
    img,
    h,
    w,
    cfg: YolactConfig,
    undo_transform=True,
    class_color=False,
    mask_alpha=0.45,
    fps_str="",
    display_lincomb=False,
):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env("Postprocess"):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out, w, h)
        cfg.rescore_bbox = save

    with timer.env("Copy"):
        idx = t[1].argsort(0, descending=True)  # [:args.top_k]

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

    num_dets_to_consider = classes.shape[0]

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.0
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat(
            [
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
            dim=0,
        )
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[: (num_dets_to_consider - 1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    if args.display_fps:
        # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(
            fps_str, font_face, font_scale, font_thickness
        )[0]

        img_gpu[0 : text_h + 8, 0 : text_w + 8] *= 0.6  # 1 - Box alpha

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(
            img_numpy,
            fps_str,
            text_pt,
            font_face,
            font_scale,
            text_color,
            font_thickness,
            cv2.LINE_AA,
        )

    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = (
                    "%s: %.2f" % (_class, score) if args.display_scores else _class
                )

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(
                    text_str, font_face, font_scale, font_thickness
                )[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(
                    img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1
                )
                cv2.putText(
                    img_numpy,
                    text_str,
                    text_pt,
                    font_face,
                    font_scale,
                    text_color,
                    font_thickness,
                    cv2.LINE_AA,
                )

    return img_numpy
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape
    
    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb,
                                        crop_masks        = args.crop,
                                        score_threshold   = args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:args.top_k]
        
        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]
        
        if args.only_person:
            for i, _class in enumerate(classes):
                if _class != 0:
                    scores[i] = -1

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)
        
        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if (args.display_masks or args.identify_people) and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]
        
        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1
        
        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)
        
        
        if args.identify_people:
            # Key = original detection index. Value = person index.
            det_to_person_index = {}
            prep_silh_images = np.empty((0, 299, 299, 3))
            for i in range(num_dets_to_consider):
                _class = cfg.dataset.class_names[classes[i]]
                
                if _class == "person":
                    x1, y1, x2, y2 = boxes[i, :]
        
                    silh_image = (img_gpu * masks[i] * 255)[y1:(y2+1), x1:(x2+1), [2, 1, 0]]
                    numpy_silh_image = silh_image.byte().cpu().numpy()
        
                    prep_silh_image, _ = data.dataset.preprocess(numpy_silh_image, None, 299)
                    prep_silh_images = np.vstack((prep_silh_images, np.expand_dims(prep_silh_image, axis=0)))
                    
                    det_to_person_index[i] = prep_silh_images.shape[0] - 1
        
                    # cv2.imshow("mask", numpy_silh_image)
                    # while cv2.waitKey(1) != ord("q"):
                    #     pass
        
            # data.dataset.show_batch(prep_silh_images, [0, 1, 2], ["prova1", "prova2", "prova3"])
            # pickle.dump(prep_silh_images, open("prep_silh_images.pkl", "wb"))
        
            raw_person_preds = person_classifier.predict(prep_silh_images)
            person_preds = np.argmax(raw_person_preds, axis=1)
            person_scores = np.max(raw_person_preds, axis=1)
            print(person_preds, person_scores)
        
        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
    
    if args.display_fps:
            # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0]

        img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha


    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
    
    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        if args.identify_people:
            with open("data/casia_gait/DatasetB_split_reduced/demo_class_names.txt", "r") as person_classes_file:
                person_classes = person_classes_file.read().splitlines()
            
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                
                if args.identify_people and (j in det_to_person_index):
                    person_index = det_to_person_index[j]
                    person_pred = person_preds[person_index]
                    
                    _class = person_classes[person_pred]
                    
                    score = person_scores[person_index]
                    
                text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
            
    
    return img_numpy