예제 #1
0
 def run_once(self, src):
     self.net.detect.cross_class_nms = True
     self.net.detect.use_fast_nms = True
     cfg.mask_proto_debug = False
     with torch.no_grad():
         frame = torch.Tensor(src).cuda().float()
         batch = FastBaseTransform()(frame.unsqueeze(0))
         time_start = time.clock()
         preds = self.net(batch)
         time_elapsed = (time.clock() - time_start)
         h, w, _ = src.shape
         t = postprocess(
             preds,
             w,
             h,
             visualize_lincomb=False,
             crop_masks=True,
             score_threshold=0.)  # TODO: give a suitable threshold
         torch.cuda.synchronize()
         classes, scores, bboxes, masks = [
             x[:self.output_num].cpu().numpy() for x in t
         ]  # TODO: Only 5 objects for test
         print(time_elapsed)
     instances = self.build_up_result(masks.shape[0], classes, bboxes,
                                      masks, scores)
     return {"instances": instances}
예제 #2
0
def processing(dets_out, img):
    boxes = np.array([])
    masks = np.array([])

    h, w, _ = img.shape

    cfg.rescore_bbox = True
    t = postprocess(dets_out, w, h, score_threshold=score_threshold)

    idx = t[1].argsort(0, descending=True)[:top_k]

    if cfg.eval_mask_branch:
        # Masks are drawn on the GPU, so don't copy
        masks = t[3][idx]
    classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

    boxes = boxes[classes == 0]
    masks = masks[classes == 0]
    scores = scores[classes == 0]

    boxes = boxes[scores >= 0.6]
    masks = masks[scores >= 0.6]
    scores = scores[scores >= 0.6]

    return boxes, masks.to(torch.bool).detach().cpu().numpy(), scores
예제 #3
0
def evalimages(net: Yolact):
    times = 0
    cocoGt = COCO("data/test.json")
    detections = Detections()
    for imgid in cocoGt.imgs:
        name = cocoGt.loadImgs(ids=imgid)[0]['file_name']
        h = cocoGt.loadImgs(ids=imgid)[0]['height']
        w = cocoGt.loadImgs(ids=imgid)[0]['width']
        path = "data/test_images/" + name
        predict, timee = evalimage(net, path)
        times += timee
        classes, scores, boxes, masks = postprocess(
            predict,
            w,
            h,
            crop_masks=args.crop,
            score_threshold=args.score_threshold)

        classes = list(classes.cpu().numpy().astype(int))
        scores = list(scores.cpu().numpy().astype(float))
        masks = masks.cpu().numpy()
        boxes = boxes.cpu().numpy()

        if len(classes) > 0:  # If any objects are detected in this image
            for i in range(masks.shape[0]):  # Loop all instances
                # save information of the instance in a dictionary then append on coco_dt list
                if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] -
                                                  boxes[i, 0]) > 0:
                    detections.add_mask(imgid, classes[i], masks[i, :, :],
                                        scores[i])

    detections.dump()
    print(times / 100)
    print('Done.')
예제 #4
0
파일: eval.py 프로젝트: li-xl/Yolact.jittor
def prep_benchmark(dets_out, h, w):
    with timer.env('Postprocess'):
        t = postprocess(dets_out,
                        w,
                        h,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)

    result = {}
    with timer.env('Copy'):
        classes, scores, boxes, masks = [x[:args.top_k] for x in t]
        if isinstance(scores, list):
            box_scores = scores[0]  #.numpy()
            mask_scores = scores[1]  #.numpy()
            jt.fetch(
                box_scores,
                lambda box_scores: result.update({'box_scores': box_scores}))
            jt.fetch(
                mask_scores, lambda mask_scores: result.update(
                    {'mask_scores': mask_scores}))

        else:
            # scores = scores#.numpy()
            jt.fetch(scores, lambda scores: result.update({'scores': scores}))

        # classes = classes#.numpy()
        # boxes = boxes#.numpy()
        # masks = masks#.numpy()
        jt.fetch(classes, lambda classes: result.update({'classes': classes}))
        jt.fetch(boxes, lambda boxes: result.update({'boxes': boxes}))
        jt.fetch(masks, lambda masks: result.update({'masks': masks}))

    with timer.env('Sync'):
        # Just in case
        jt.sync_all()
예제 #5
0
    def post_process(dets_out,
                     img,
                     h,
                     w,
                     top_k=1,
                     score_threshold=0.6,
                     undo_transform=True):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape

        with timer.env('Postprocess'):
            save = cfg.rescore_bbox
            cfg.rescore_bbox = True
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=False,
                            score_threshold=score_threshold)
            cfg.rescore_bbox = save

        with timer.env('Copy'):
            idx = t[1].argsort(0, descending=True)[:top_k]

            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][idx]
            classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

        num_dets_to_consider = min(top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < score_threshold:
                num_dets_to_consider = j
                break

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        # After this, mask is of size [num_dets, h, w, 1]
        final_res = (img_gpu * 255).byte().cpu().numpy()
        final_res = cv2.cvtColor(final_res, cv2.COLOR_RGB2RGBA)

        if num_dets_to_consider == 0:
            return final_res

        masks = masks[:num_dets_to_consider, :, :, None]

        _mask = (masks * 255).byte().cpu().numpy()[0]

        # Then assign the mask to the last channel of the image
        final_res[:, :, 3] = _mask.squeeze()

        return final_res
예제 #6
0
def prep_benchmark(dets_out, h, w):
    with timer.env('Postprocess'):
        t = postprocess(dets_out, w, h, crop_masks=args.crop, score_threshold=args.score_threshold)

    with timer.env('Copy'):
        classes, scores, boxes, masks = [x[:args.top_k].cpu().numpy() for x in t]
    
    with timer.env('Sync'):
        # Just in case
        torch.cuda.synchronize()
예제 #7
0
def get_mask_bbox_and_score(yolact_net: Yolact,
                            img,
                            threshold=0.0,
                            max_predictions=1):
    """
    Create and return the masks, bboxs and scores given the yolact net and the img
    :param yolact_net: Yolact net initialized
    :param img: ndarray img to segment
    :param threshold: threshold segmentation
    :param max_predictions: maximum number of predictions
    :returns:
        - masks_to_return - a list or single value of ndarray with 0 and 1 representing the mask(s)
        - boxes_to_return - a list or single value of ndarray representing the bbox(s)
        - scores_to_return - a list or single value of float in [0, 1] representing the confidence(s)
    """
    with torch.no_grad():
        frame = torch.from_numpy(img).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = yolact_net(batch)

        h, w, _ = img.shape

        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(preds, w, h, visualize_lincomb=False, crop_masks=True)
        cfg.rescore_bbox = save

        idx = t[1].argsort(0, descending=True)[:max_predictions]
        classes, scores, boxes, masks = [x[idx].cpu().numpy() for x in t[:]]

        num_dets_to_consider = min(max_predictions, classes.shape[0])
        # Remove detections below the threshold
        for j in range(num_dets_to_consider):
            if scores[j] < threshold:
                num_dets_to_consider = j
                break
        masks_to_return = boxes_to_return = scores_to_return = None
        if num_dets_to_consider > 0:
            masks = masks[:num_dets_to_consider, :, :, None]
            masks_to_return = []
            boxes_to_return = []
            scores_to_return = []
            for m, b, s in zip(masks, boxes, scores):
                masks_to_return.append(m)
                boxes_to_return.append(b)
                scores_to_return.append(s)
            if len(masks_to_return) == 1:
                masks_to_return = masks_to_return[0]
            if len(boxes_to_return) == 1:
                boxes_to_return = boxes_to_return[0]
            if len(scores_to_return) == 1:
                scores_to_return = scores_to_return[0]
        return masks_to_return, boxes_to_return, scores_to_return
def detection(image_frame):
    with torch.no_grad():
        frame = torch.from_numpy(image_frame).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = net(batch)

        # 建立每个目标的蒙版target_masks、类别target_classes、置信度target_scores、边界框target_boxes
        h, w, _ = frame.shape
        with timer.env('Postprocess'):
            save = cfg.rescore_bbox
            cfg.rescore_bbox = True
            # 检测结果
            t = postprocess(preds,
                            w,
                            h,
                            visualize_lincomb=args.display_lincomb,
                            crop_masks=args.crop,
                            score_threshold=args.score_threshold)
            cfg.rescore_bbox = save

        with timer.env('Copy'):
            idx = t[1].argsort(0, descending=True)[:args.top_k]
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][idx]
            classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

        # 提取类别为'person'和'car'的目标
        remain_list = []
        items_1 = ['person']
        items_2 = ['car']
        num_items_1 = 0
        num_items_2 = 0
        for j in range(classes.shape[0]):
            if cfg.dataset.class_names[classes[j]] in items_1:
                if num_items_1 < top_k_person and scores[
                        j] > score_threshold_person:
                    remain_list.append(j)
                    num_items_1 += 1
            elif cfg.dataset.class_names[classes[j]] in items_2:
                if num_items_2 < top_k_vehicle and scores[
                        j] > score_threshold_vehicle:
                    remain_list.append(j)
                    num_items_2 += 1
        num_dets_to_consider = len(remain_list)

        target_masks = masks[remain_list]
        target_classes = classes[remain_list]

    return frame, num_dets_to_consider, target_masks, target_classes
예제 #9
0
    def raw_inference(self, img, preds=None, frame=None, batch_idx=0):
        """
        optional arg preds, frame: if not None, avoids process_batch() call, used to speedup cached inferences.
        """
        if preds is None or frame is None:
            preds, frame = self.process_batch(img)
        if frame.ndim == 4:
            n, h, w, _ = frame.shape
        elif frame.ndim == 3:
            h, w, _ = frame.shape
        else:
            assert False, "Oops, the frame has unexpected number of dimensions: {}".format(
                frame.shape)

        if self.batchsize > 1:
            assert batch_idx is not None, "In batch mode, you must provide batch_idx - meaning which row of batch is used as the results, [0, {}-1]".format(
                n)

        t = postprocess(preds,
                        w=w,
                        h=h,
                        batch_idx=batch_idx,
                        interpolation_mode='bilinear',
                        visualize_lincomb=False,
                        crop_masks=True,
                        score_threshold=self.score_threshold)

        #honor top_k limit
        col_scores = 1
        idx = t[col_scores].argsort(0, descending=True)[:self.top_k]
        classes, scores, boxes, masks = [x[idx].cpu().numpy() for x in t[:4]
                                         ]  #x[idx] or x[idx].cpu().numpy()
        assert len(classes) == len(scores) == len(masks)

        # also get centroids
        centroids = []
        for i in range(len(masks)):
            #cv2.imshow('bin_mask',masks[i])
            #cv2.waitKey(200)
            #if classes[i] > 0: #kuka class
            centroids.append(self.find_centroids_(masks[i],
                                                  1))  #in pixel space

        class_names = [self.class_names_tuple[x] for x in classes]
        #TODO do we want to keep tensor, or convert to py list[]?
        assert len(classes) <= self.top_k
        return classes, class_names, scores, boxes, masks, centroids
예제 #10
0
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45,
                 fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """

    boxes = np.array([])
    masks = np.array([])

    img_gpu = img / 255.0
    h, w, _ = img.shape

    save = cfg.rescore_bbox
    cfg.rescore_bbox = True
    t = postprocess(dets_out,
                    w,
                    h,
                    visualize_lincomb=args.display_lincomb,
                    crop_masks=args.crop,
                    score_threshold=args.score_threshold)
    cfg.rescore_bbox = save

    idx = t[1].argsort(0, descending=True)[:args.top_k]

    if cfg.eval_mask_branch:
        # Masks are drawn on the GPU, so don't copy
        masks = t[3][idx]
    classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    boxes = boxes[classes == 0]
    masks = masks[classes == 0]
    scores = scores[classes == 0]

    return boxes, np.squeeze(masks.to(torch.bool).detach().cpu().numpy())
예제 #11
0
파일: eval.py 프로젝트: zengtiwei/CIoU
def prep_benchmark(dets_out, h, w):
    with timer.env('Postprocess'):
        t = postprocess(dets_out, w, h, crop_masks=args.crop, score_threshold=args.score_threshold)

    with timer.env('Copy'):
        classes, scores, boxes, masks = [x[:args.top_k] for x in t]
        if isinstance(scores, list):
            box_scores = scores[0].cpu().numpy()
            mask_scores = scores[1].cpu().numpy()
        else:
            scores = scores.cpu().numpy()
        classes = classes.cpu().numpy()
        boxes = boxes.cpu().numpy()
        masks = masks.cpu().numpy()
    
    with timer.env('Sync'):
        # Just in case
        torch.cuda.synchronize()
예제 #12
0
파일: model.py 프로젝트: agermanidis/yolact
    def display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, top_k = 100, score_threshold = 0.3):
        img_gpu = img / 255.0
        h, w, _ = img.shape
        
        with timer.env('Postprocess'):
            t = postprocess(dets_out, w, h, visualize_lincomb = False,
                                            crop_masks        = True,
                                            score_threshold   = score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:top_k]

        img_gpu = img_gpu * masks[0]
            
        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()
               
        return img_numpy        
예제 #13
0
 def run_once(self, src, image_name):
     """
     只对一张图像进行预测.参数:
         - src           # ? 要预测的图像
         - image_name    图像名称 # ? 猜测就是图像的文件名
     """
     # step 0 准备
     self.net.detect.cross_class_nms = True
     self.net.detect.use_fast_nms = True
     cfg.mask_proto_debug = False
     # step 1 预测
     with torch.no_grad():
         frame = torch.Tensor(src).cuda().float()
         batch = FastBaseTransform()(frame.unsqueeze(0))
         time_start = time.clock()
         preds = self.net(batch)
         time_elapsed = (time.clock() - time_start)
         h, w, _ = src.shape
         # NOTICE 这里并没有设置最小的阈值
         t = postprocess(
             preds,
             w,
             h,
             visualize_lincomb=False,
             crop_masks=True,
             score_threshold=0.)  # TODO: give a suitable threshold
         torch.cuda.synchronize()
         classes, scores, boxes, masks = [
             x[:self.output_num].cpu().numpy() for x in t
         ]  # TODO: Only 5 objects for test
         print(time_elapsed)
         # 将预测得到的每一个结果都添加到detection对象中
         for i in range(masks.shape[0]):
             self.detections.add_instance(image_name, i, classes[i],
                                          boxes[i, :], masks[i, :, :],
                                          scores[i])
     # step 2 保存所有预测结果
     self.detections.dump_all()
예제 #14
0
  def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out, w, h, visualize_lincomb = self.args.display_lincomb,
                                        crop_masks        = self.args.crop,
                                        score_threshold   = self.args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:self.args.top_k]

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]
        self.classes, self.scores, self.boxes = classes, scores, boxes

    num_dets_to_consider = min(self.args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < self.args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in self.color_cache[on_gpu]:
            return self.color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                self.color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if self.args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]

        # remove overlapped area of mask results
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            overlapped_list = []
            box_size = int((x2-x1)*(y2-y1))
            color = get_color(j)
            score = scores[j]
            for k in reversed(range(num_dets_to_consider)):
                if (k != j):
                    a1, b1, a2, b2 = boxes[k, :]
                    box_size_sub = int((a2-a1)*(b2-b1))
                    if ((min(a2, x2) - max(a1, x1) > 0) and (min(b2, y2) - max(b1, y1) > 0)):
                        # overlapped area
                        S_jk = (min(a2, x2) - max(a1, x1)) * (min(b2, y2) - max(b1, y1))
                        if (S_jk / box_size > 0.9):
                            # included other BBox
                            pass
                        elif (S_jk / box_size_sub > 0.3):
                            # Subtract overlapped area in current bounding box
                            # Find overlapped Bbox position
                            x_list = [x1, x2, a1, a2]
                            y_list = [y1, y2, b1, b2]
                            x_list.sort()
                            y_list.sort()
                            overlapped_list.append([int(x_list[1]), int(y_list[1]), int(x_list[2]), int(y_list[2])])
            for ov_bbox in overlapped_list:
                masks[j][ov_bbox[1]: ov_bbox[3], ov_bbox[0]: ov_bbox[2]] = 0

        self.masks = masks

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
        self.masks_color = colors
        self.masks_color_2 = masks_color

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
        self.img_gpu = img_gpu

    if self.args.display_fps:
            # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0]

        img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha


    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if self.args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)

    self.num_dets_to_consider = num_dets_to_consider
    if num_dets_to_consider == 0:
        return img_numpy

    if self.args.display_text or self.args.display_bboxes:
        self.text_str = {}
        draw_masks = self.masks.squeeze(-1).to(torch.device("cpu")).detach().numpy().astype(np.float32)
        update_masks = self.masks.clone()
        overlapped_list = []
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            box_size = int((x2-x1)*(y2-y1))
            color = get_color(j)
            score = scores[j]

            if self.args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if self.args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = '%s:%d_%.2f' % (_class, classes[j], score) if self.args.display_scores else _class
                self.text_str[j] = text_str
                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]

                text_pt = (x1, y1 + 15)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 + text_h + 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)

    return img_numpy
예제 #15
0
def detect():
    img_path = '/home/user/dataset/pear/train/JPEGImages'
    save_path = '/home/user/pear_output'
    weight_path = '/home/user/caoliwei/yolact/weights/20200901/yolact_darknet53_1176_20000.pth'

    set_cfg('pear_config')

    with torch.no_grad():
        torch.cuda.set_device(0)

        ######
        # If the input image size is constant, this make things faster (hence why we can use it in a video setting).
        # cudnn.benchmark = True
        # cudnn.fastest = True
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        ######

        net = Yolact()
        net.load_weights(weight_path)
        net.eval()
        net = net.cuda()
        print('model loaded...')

        net.detect.cross_class_nms = True
        net.detect.use_fast_nms = True
        cfg.mask_proto_debug = False

        if not os.path.exists(save_path):
            os.mkdir(save_path)

        img_names = [
            name for name in os.listdir(img_path)
            if name.endswith('.jpg') or name.endswith('.png')
        ]
        #for img_name in tqdm(img_names):
        for img_name in img_names:
            img = cv2.imread(os.path.join(img_path, img_name))
            img = torch.from_numpy(img).cuda().float()
            img = FastBaseTransform()(img.unsqueeze(0))
            start = time.time()
            preds = net(img)
            print('clw: image_name: %s, inference time use %.3fs' %
                  (img_name,
                   time.time() - start))  # inference time use 0.023s, 550x550

            # start = time.time()
            h, w = img.shape[2:]
            result = postprocess(
                preds, w, h, crop_masks=True,
                score_threshold=0.3)  # classes, scores, boxes, masks 按照score排序
            # top_k = 10
            # classes, scores, boxes, masks = [x[:top_k].cpu().numpy() for x in result]  # clw note TODO: 是否有必要只取top_k个?
            # print('clw: postprocess time use %.3fs' % (time.time() - start))  # 0.001s

            ### 顺序遍历result[0],找到第一个是0的值,也就是梨,也就拿到了相应的mask
            # start = time.time()
            bFindPear = False
            for i, cls_id in enumerate(result[0]):
                if cls_id == 0 and not bFindPear:
                    pear_mask = result[3][i].cpu().numpy()
                    bFindPear = True

            # 从梨的mask中提取轮廓
            pear_outline = get_outline_from_mask(pear_mask, w, h)
            # print('pear_mask.sum:', pear_mask.sum())     # 124250.0
            # print('pear_outline.sum:', pear_outline.sum())  # 34335.0
            # print('clw: outline extract time use %.3fs' % (time.time() - start))  # 0.001s
            roundness = compute_roundness(pear_outline)
            ###

            result.append(roundness)
예제 #16
0
def prep_display_for_img(dets_out,
                         img,
                         h=None,
                         w=None,
                         undo_transform=True,
                         class_color=False,
                         mask_alpha=0.45):
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:args.top_k]

        if cfg.eval_mask_branch:
            masks = t[3][idx]
        classes, scores, boxes = [x[idx] for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] if class_color else j) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        masks = masks[:num_dets_to_consider, :, :, None]

        colors = torch.cat([
            get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        inv_alph_masks = masks * (-mask_alpha) + 1

        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if num_dets_to_consider == 0:
        return img_numpy

    font_face = cv2.FONT_HERSHEY_DUPLEX
    font_scale = 0.6
    font_thickness = 1

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                if args.display_scores:
                    text_str_class = f"{_class}"
                    text_str_score = f": {score:.2f}"

                    text_w_class, text_h_class = cv2.getTextSize(
                        text_str_class, font_face, font_scale,
                        font_thickness)[0]

                    img_numpy = ps.putBText(img_numpy,
                                            text_str_class,
                                            text_offset_x=x1,
                                            text_offset_y=y1,
                                            vspace=0,
                                            hspace=0,
                                            font=font_face,
                                            font_scale=0.6,
                                            thickness=font_thickness,
                                            alpha=0.7,
                                            background_RGB=color,
                                            text_RGB=(255, 255, 255))
                    img_numpy = ps.putBText(img_numpy,
                                            text_str_score,
                                            text_offset_x=x1,
                                            text_offset_y=y1 + text_h_class +
                                            2,
                                            vspace=0,
                                            hspace=0,
                                            font=font_face,
                                            font_scale=0.6,
                                            thickness=font_thickness,
                                            alpha=0.7,
                                            background_RGB=color,
                                            text_RGB=(255, 255, 255))
                else:
                    text_str_class = '%s' % _class

                    img_numpy = ps.putBText(img_numpy,
                                            text_str_class,
                                            text_offset_x=x1,
                                            text_offset_y=y1,
                                            vspace=0,
                                            hspace=0,
                                            font=font_face,
                                            font_scale=0.6,
                                            thickness=font_thickness,
                                            alpha=0.7,
                                            background_RGB=color,
                                            text_RGB=(255, 255, 255))

    return img_numpy
예제 #17
0
def prep_display_for_video(dets_out,
                           img,
                           h=None,
                           w=None,
                           save_folder=None,
                           undo_transform=True,
                           class_color=False,
                           mask_alpha=0.45,
                           fps_str='',
                           override_args: Config = None):
    if undo_transform:
        assert w is not None and h is not None, "with undo_transform=True, w,h params must be specified!"
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    img_numpy_ori = (img_gpu * 255).byte().cpu().numpy()

    global args
    if override_args is not None:
        args = override_args

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:args.top_k]

        if cfg.eval_mask_branch:
            masks = t[3][idx]
        classes, scores, boxes = [x[idx] for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] if class_color else j) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    global frame_compare

    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        if frame_compare != save_folder[4]:
            masks = masks[:num_dets_to_consider, :, :, None]

            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            inv_alph_masks = masks * (-mask_alpha) + 1

            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if num_dets_to_consider == 0:
        if os.path.isdir(
                save_folder[0]) and save_folder[4] % args.video_fps == 0:
            file_name = save_folder[1] + "_%05d" % save_folder[4] + '.png'
            cv2.imwrite(os.path.join(save_folder[3], file_name), img_numpy)
            cv2.imwrite(os.path.join(save_folder[2], file_name), img_numpy_ori)

        return [img_numpy, img_numpy_ori]

    font_face = cv2.FONT_HERSHEY_DUPLEX
    font_scale = 0.6
    font_thickness = 1

    if args.display_text or args.display_bboxes:
        if frame_compare != save_folder[4]:
            frame_compare = save_folder[4]
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if args.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if args.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    # text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class
                    if args.display_scores:
                        text_str_class = f"{_class}"
                        text_str_score = f": {score:.2f}"

                        text_w_class, text_h_class = \
                            cv2.getTextSize(text_str_class, font_face, font_scale, font_thickness)[0]

                        img_numpy = ps.putBText(img_numpy,
                                                text_str_class,
                                                text_offset_x=x1,
                                                text_offset_y=y1,
                                                vspace=0,
                                                hspace=0,
                                                font=font_face,
                                                font_scale=0.6,
                                                thickness=font_thickness,
                                                alpha=0.7,
                                                background_RGB=color,
                                                text_RGB=(255, 255, 255))
                        img_numpy = ps.putBText(img_numpy,
                                                text_str_score,
                                                text_offset_x=x1,
                                                text_offset_y=y1 +
                                                text_h_class + 2,
                                                vspace=0,
                                                hspace=0,
                                                font=font_face,
                                                font_scale=0.6,
                                                thickness=font_thickness,
                                                alpha=0.7,
                                                background_RGB=color,
                                                text_RGB=(255, 255, 255))
                    else:
                        text_str_class = '%s' % (_class)

                        img_numpy = ps.putBText(img_numpy,
                                                text_str_class,
                                                text_offset_x=x1,
                                                text_offset_y=y1,
                                                vspace=0,
                                                hspace=0,
                                                font=font_face,
                                                font_scale=0.6,
                                                thickness=font_thickness,
                                                alpha=0.7,
                                                background_RGB=color,
                                                text_RGB=(255, 255, 255))

                    if save_folder[4] % args.video_fps == 0:
                        dist = ocr(img_numpy_ori)
                        result = save_folder[
                            4], f"{dist}", f"{_class}", f"{score:.2f}", f"{x1}", f"{y1}", f"{x2}", f"{y2}"
                        result_list.append(result)

            if os.path.isdir(
                    save_folder[0]) and save_folder[4] % args.video_fps == 0:
                file_name = save_folder[1] + "_%05d" % save_folder[4] + '.png'
                cv2.imwrite(os.path.join(save_folder[3], file_name), img_numpy)
                cv2.imwrite(os.path.join(save_folder[2], file_name),
                            img_numpy_ori)

            return [img_numpy, img_numpy_ori, result_list]

    return [img_numpy, img_numpy_ori]
예제 #18
0
def image_callback(image_data):
    time_start = time.time()
    global cv_image
    cv_image = np.frombuffer(image_data.data, dtype=np.uint8).reshape(image_data.height, image_data.width, -1)
    
    global display_switch
    display_switch = rospy.get_param("/display_mode")
    global record_switch
    record_switch = rospy.get_param("/record_mode")
    global record_initialized
    global video_out
    
    if record_switch and not record_initialized:
        out_path = 'video_out.mp4'
        if seucar_switch:
            target_fps = 10
        else:
            target_fps = 30
        frame_height = 480
        frame_width = 640
        video_out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (frame_width, frame_height), True)
        record_initialized = True
        print("Start recording.")
    
    if not display_switch:
        cv2.destroyAllWindows()
    
    if not record_switch and record_initialized:
        video_out.release()
        record_initialized = False
        print("Save video.")
    
    # region_output是8行4列数组,第i行存储第i个区域的信息
    # 每行的第1列为污染等级(0,1,2,3,4,5,6,7)、第2列为植被类型(0无,1草,2灌木,3花)、第3列为行人标志(0无,1有)、第4列为区域ID(1,2,3,4,5,6,7,8)
    region_output = np.zeros((8, 4))
    for region_i in range(8):
        region_output[region_i, 3] = region_i + 1
    
    with torch.no_grad():
        # 目标检测
        frame = torch.from_numpy(cv_image).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = net(batch)
        
        # 建立每个目标的蒙版target_masks、类别target_classes、置信度target_scores、边界框target_boxes的一一对应关系
        h, w, _ = frame.shape
        with timer.env('Postprocess'):
            save = cfg.rescore_bbox
            cfg.rescore_bbox = True
            # 检测结果
            t = postprocess(preds, w, h, visualize_lincomb = args.display_lincomb,
                                         crop_masks        = args.crop,
                                         score_threshold   = args.score_threshold)
            cfg.rescore_bbox = save
        with timer.env('Copy'):
            idx = t[1].argsort(0, descending=True)[:args.top_k]
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][idx]
            classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]
        
        num_dets_to_consider = min(args.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < args.score_threshold:
                num_dets_to_consider = j
                break
        
        if num_dets_to_consider > 0:
            target_masks = masks[:num_dets_to_consider, :, :]
            target_classes = classes[:num_dets_to_consider]
            target_scores = scores[:num_dets_to_consider]
            target_boxes = boxes[:num_dets_to_consider, :]
            
            # 显示检测结果
            if display_switch or record_switch:
                result_image = result_display(frame, target_masks, target_classes, target_scores, target_boxes, num_dets_to_consider)
            else:
                result_image = frame.byte().cpu().numpy()
            
            # 分别存储垃圾目标和植被目标
            check_k = 0
            rubbish_remain_list = []
            vegetation_remain_list = []
            rubbish_items = ['ads', 'cigarette', 'firecracker', 'glass bottle', 'leaves', 'metal', 'paper', 'peel', 'plastic', 'solid clod', 'solid crumb']
            vegetation_items = ['grass', 'shrub', 'flower']
            while check_k < target_classes.shape[0]:
                if cfg.dataset.class_names[target_classes[check_k]] in rubbish_items:
                    rubbish_remain_list.append(check_k)
                if cfg.dataset.class_names[target_classes[check_k]] in vegetation_items:
                    vegetation_remain_list.append(check_k)
                check_k += 1
            
            rubbish_masks = target_masks[rubbish_remain_list, :, :]
            rubbish_classes = target_classes[rubbish_remain_list]
            rubbish_scores = target_scores[rubbish_remain_list]
            rubbish_boxes = target_boxes[rubbish_remain_list, :]
            
            vegetation_masks = target_masks[vegetation_remain_list, :, :]
            vegetation_classes = target_classes[vegetation_remain_list]
            vegetation_scores = target_scores[vegetation_remain_list]
            vegetation_boxes = target_boxes[vegetation_remain_list, :]
            
            rubbsih_num = len(rubbish_remain_list)
            vegetation_num = len(vegetation_remain_list)
            
            # 针对垃圾目标的处理
            if rubbsih_num > 0:
                # 掩膜边界取点
                result_image, rubbish_boundary_pts = get_boundary(result_image, rubbsih_num, rubbish_masks, cpt_num=10)
                # s_polygon存储每个垃圾目标在世界坐标系中投影于地面的面积
                s_polygon = np.zeros((rubbsih_num, 1))
                rubbish_list = ['ads', 'cigarette', 'firecracker', 'glass bottle', 'leaves', 'metal', 'paper', 'peel', 'plastic', 'solid clod', 'solid crumb']
                rubbish_weight_coefficient_list = [80, 200, 200, 8000, 80, 1050, 80, 6000, 775, 15750, 4000]
                # region_s存储各区域内垃圾目标的最大单体面积
                region_s = np.zeros((8, 1))
                # region_w存储各区域内垃圾目标的质量的总和
                region_w = np.zeros((8, 1))
                
                # 遍历每个目标
                for i in range(rubbish_boundary_pts.shape[0]):
                    effective_pt_num = 0
                    b_x, b_z = [], []
                    b_area_id = []
                    # 统计有效点数量,计算每个有效点的世界坐标和所在区域
                    for b_pt in range(rubbish_boundary_pts.shape[1]):
                        b_pt_u = rubbish_boundary_pts[i, b_pt, 0, 0]
                        b_pt_v = rubbish_boundary_pts[i, b_pt, 0, 1]
                        # 排除像素坐标无效点(u=0,v=0)
                        if b_pt_u or b_pt_v:
                            loc_b_pt = p2d_table[b_pt_u, b_pt_v]
                            # 排除世界坐标无效点(x=0,z=0)
                            if loc_b_pt[0] or loc_b_pt[1]:
                                effective_pt_num += 1
                                b_x.append(loc_b_pt[0])
                                b_z.append(loc_b_pt[1])
                                b_area_id.append(CameraT.whatArea(loc_b_pt[0], loc_b_pt[1]))
                    
                    # 如果有效点数不小于3,计算面积和质量
                    if effective_pt_num >= 3:
                        # 计算目标面积
                        s_sum = 0
                        for b_pt in range(effective_pt_num):
                            s_sum += b_x[b_pt]*b_z[(b_pt + 1)%effective_pt_num] - b_z[b_pt]*b_x[(b_pt + 1)%effective_pt_num]
                        s_polygon[i, 0] = abs(s_sum) / 2
                        # 更新各区域内最大单体目标面积
                        for b_pt in range(effective_pt_num):
                            # 排除区域ID无效点(ID=0)
                            if b_area_id[b_pt]:
                                if s_polygon[i, 0] > region_s[b_area_id[b_pt] - 1, 0]:
                                    region_s[b_area_id[b_pt] - 1, 0] = s_polygon[i, 0]
                        # 计算目标质量并分配给各区域
                        for b_pt in range(effective_pt_num):
                            # 排除区域ID无效点(ID=0)
                            if b_area_id[b_pt]:
                                rubbish_weight = s_polygon[i, 0] * rubbish_weight_coefficient_list[rubbish_list.index(cfg.dataset.class_names[rubbish_classes[i]])]
                                region_w[b_area_id[b_pt] - 1, 0] += rubbish_weight / effective_pt_num
                
                # 界定污染等级
                for region_i in range(8):
                    if region_w[region_i, 0] > 0 and region_w[region_i, 0] <= 50:
                        region_output[region_i, 0] = 1
                    elif region_w[region_i, 0] > 50 and region_w[region_i, 0] <= 100:
                        region_output[region_i, 0] = 2
                    elif region_w[region_i, 0] > 100 and region_w[region_i, 0] <= 150:
                        region_output[region_i, 0] = 3
                    elif region_w[region_i, 0] > 150 and region_w[region_i, 0] <= 200:
                        region_output[region_i, 0] = 4
                    elif region_w[region_i, 0] > 200 and region_w[region_i, 0] <= 250:
                        region_output[region_i, 0] = 5
                    elif region_w[region_i, 0] > 250 and region_w[region_i, 0] <= 300:
                        region_output[region_i, 0] = 6
                    elif region_w[region_i, 0] > 300:
                        region_output[region_i, 0] = 7
                
                if display_switch or record_switch:
                    result_image = s_display(result_image, region_s, font_face = cv2.FONT_HERSHEY_DUPLEX, font_scale = 0.5, font_thickness = 1)
                    result_image = w_display(result_image, region_w, font_face = cv2.FONT_HERSHEY_DUPLEX, font_scale = 0.5, font_thickness = 1)
            
            # 针对植被目标的处理
            if vegetation_num > 0:
                # 掩膜边界取点
                result_image, vegetation_boundary_pts = get_boundary(result_image, vegetation_num, vegetation_masks, cpt_num=20)
                # region_vegetation_type存储各区域内植被类型
                region_vegetation_type = np.zeros((8, 1))
                
                # 遍历每个目标
                for i in range(vegetation_boundary_pts.shape[0]):
                    effective_pt_num = 0
                    b_area_id = []
                    # 统计有效点数量,计算每个有效点的世界坐标和所在区域
                    for b_pt in range(vegetation_boundary_pts.shape[1]):
                        b_pt_u = vegetation_boundary_pts[i, b_pt, 0, 0]
                        b_pt_v = vegetation_boundary_pts[i, b_pt, 0, 1]
                        # 排除像素坐标无效点(u=0,v=0)
                        if b_pt_u or b_pt_v:
                            loc_b_pt = p2d_table[b_pt_u, b_pt_v]
                            # 排除世界坐标无效点(x=0,z=0)
                            if loc_b_pt[0] or loc_b_pt[1]:
                                effective_pt_num += 1
                                b_area_id.append(CameraT.whatArea(loc_b_pt[0], loc_b_pt[1]))
                    
                    # 计算植被类型优先级
                    vegetation_list = ['grass', 'shrub', 'flower']
                    v_type = vegetation_list.index(cfg.dataset.class_names[vegetation_classes[i]]) + 1
                    # 更新各区域内植被类型
                    for b_pt in range(effective_pt_num):
                        # 排除区域ID无效点(ID=0)
                        if b_area_id[b_pt]:
                            if v_type > region_vegetation_type[b_area_id[b_pt] - 1, 0]:
                                region_vegetation_type[b_area_id[b_pt] - 1, 0] = v_type
                
                # 界定植被类型
                for region_i in range(8):
                    region_output[region_i, 1] = region_vegetation_type[region_i, 0]
            
        else:
            result_image = frame.byte().cpu().numpy()
    
    if seucar_switch:
        areasinfo_msg = AreasInfo()
        for region_i in range(8):
            region_output_msg = AreaInfo()
            region_output_msg.rubbish_grade = int(region_output[region_i, 0])
            region_output_msg.has_person = bool(region_output[region_i, 2])
            region_output_msg.vegetation_type = int(region_output[region_i, 1])
            region_output_msg.area_id = int(region_output[region_i, 3])
            areasinfo_msg.infos.append(region_output_msg)
        pub.publish(areasinfo_msg)
    
    if display_switch or record_switch:
        result_image = CameraT.drawLine(result_image, w=1)
        result_image = output_display(result_image, region_output, font_face = cv2.FONT_HERSHEY_DUPLEX, font_scale = 0.5, font_thickness = 1)
        cv2.putText(result_image, str(time.time()), (5, 20), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
    
    if display_switch:
        print('region_output')
        print(region_output)
        cv2.imshow("result_image", result_image)
        if cv2.waitKey(1) == 27:
            if record_switch and record_initialized:
                video_out.release()
                print("Save video.")
            cv2.destroyAllWindows()
            # 停止python程序
            rospy.signal_shutdown("It's over.")
    
    if record_switch and record_initialized:
        video_out.write(result_image)
    
    time_end_all = time.time()
    print("totally time cost:", time_end_all - time_start)
예제 #19
0
    def live_analysis(self):
        """
        Function for live stream video masking
        """
        
        bar = [
                " Waiting for frame [=     ]              ",
                " Waiting for frame [ =    ]              ",
                " Waiting for frame [  =   ]              ",
                " Waiting for frame [   =  ]              ",
                " Waiting for frame [    = ]              ",
                " Waiting for frame [     =]              ",
                " Waiting for frame [    = ]              ",
                " Waiting for frame [   =  ]              ",
                " Waiting for frame [  =   ]              ",
                " Waiting for frame [ =    ]              ",
            ]
        idx = 0
        while not rospy.is_shutdown():
            start_time = time.time()
            self.masked_id = []
            current_frame = self.frame
            current_depth_frame = self.depth_frame

            if len(current_frame)==0  or  len(current_depth_frame)==0 :

                print(bar[idx % len(bar)], end= "\r")
                idx = idx +1
                time.sleep(0.1)
            
            else:
                
                nn_start_time = time.time()
                
                if self.nn == 'yolact' or self.nn == 'yolact++' or self.nn == 'yolact_edge':
                    frame = torch.from_numpy(current_frame).cuda().float()
                    batch = FastBaseTransform()(frame.unsqueeze(0))
                    if self.nn == 'yolact_edge':
                        extras = {"backbone": "full", "interrupt":False, "keep_statistics":False, "moving_statistics":None}
                        preds = self.net(batch.cuda(), extras=extras)
                        preds = preds["pred_outs"]
                    else:
                        preds = self.net(batch.cuda())
                        
                    nn_pred_time = time.time()
                    h, w, _ = frame.shape
                    b = {}
                    r = {}
                    b['class_ids'], b['scores'], b['rois'], b['masks'] = postprocess(preds, w, h, score_threshold=self.score_threshold)

                    r['class_ids'] = copy.deepcopy(b['class_ids'].cpu().data.numpy())
                    r['scores'] = copy.deepcopy(b['scores'].cpu().data.numpy())
                    r['rois'] = copy.deepcopy(b['rois'].cpu().data.numpy())
                    r['masks'] = copy.deepcopy(b['masks'].cpu().data.numpy())    
               
                elif self.nn == 'mrcnn':
                    results = self.model.detect([current_frame],verbose=1)
                    r = results[0]
                    r['masks'] = np.swapaxes(r['masks'],0,2)
                    r['masks'] = np.swapaxes(r['masks'],1,2)

                    for i in range(r['rois'].shape[0]):
                        buff = r['rois'][i]
                        r['rois'][i] = [buff[1],buff[0],buff[3],buff[2]]
                    r['class_ids'] = r['class_ids'] - 1
                
                ''' Deprecated, did not enhance speed
                j=0
                for i in range(len(r['class_ids'])):
                    if not np.in1d(r['class_ids'][j], self.selected_classes):
                        r['class_ids'] = np.delete(r['class_ids'], j)
                        r['scores']= np.delete(r['scores'], j)
                        r['rois']= np.delete(r['rois'], j,axis=0)
                        r['masks']= np.delete(r['masks'], j, axis=0)
                    else:
                        j=j+1
                '''
                self.number_observation = min(self.max_number_observation, r['class_ids'].shape[0])
                for j in range(self.number_observation):
                    if r['scores'][j] < self.score_threshold:
                        self.number_observation = j
                        break

                r['class_ids'] = r['class_ids'][:self.number_observation]
                r['scores'] = r['scores'][:self.number_observation]
                r['rois'] = r['rois'][:self.number_observation]
                r['masks'] = r['masks'][:self.number_observation]

                nn_time = time.time()

                mask_depth = self.get_masking_depth(current_depth_frame, r['masks'])
                
                # Read object tf pose
                self.read_objects_pose()
                
                # Read camera tf pose
                try:
                    (transc, rotc) = listener.lookupTransform(self.tf_camera,'/map', rospy.Time(0))
                except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException):
                    transc = np.array([0.,0.,0.])
                    rotc = np.array([0.,0.,0.,1.])

                euler = tf.transformations.euler_from_quaternion(rotc)
                rot = tf.transformations.euler_matrix(euler[0],euler[1],euler[2])
        
                h_mat = rot
                h_mat[0:3,3:] = np.array([transc]).T

                objects_to_delete = []

                # Main filter update and prediction step
                if len(r['rois']) == 0:
                    for i in self.objects_dict:
                        self.objects_dict[i]["inactiveNbFrame"] = self.objects_dict[i]["inactiveNbFrame"] + 1

                        if self.objects_dict[i]["inactiveNbFrame"] > self._max_inactive_frames:                            
                            objects_to_delete.append(i)
                    
                    for i in objects_to_delete:
                        self.delete_object(i)
                        
                else : 
                    current_centroids, current_dimensions = self.mask_to_centroid(r['rois'],mask_depth)

                    if not self.objects_dict:
                        if not len(current_centroids)==0:
                            for i in range(len(current_centroids)):
                                self.add_object(current_centroids[i], current_dimensions[i], i, r['class_ids'][i], r['masks'][i], r['rois'][i])

                            for i in self.objects_dict:
                                self.objects_dict[i]["kalmanFilter"].prediction()
                                self.objects_dict[i]["kalmanFilter"].update(self.objects_dict[i]["centroid"], h_mat)
                                self.objects_dict[i]["estimatedPose"] = self.objects_dict[i]["kalmanFilter"].x[0:3]
                                self.objects_dict[i]["estimatedVelocity"] = self.objects_dict[i]["kalmanFilter"].x[3:6]
                    else:
                        objects_pose = np.zeros((len(self.objects_dict),3))
                        objects_ids = np.zeros((len(self.objects_dict)))
                        index = 0
                        for i in self.objects_dict:
                            objects_pose[index,] = self.objects_dict[i]["centroid"]
                            objects_ids[index] = i
                            index = index + 1

                        centroids_pose = np.zeros((len(current_centroids),3))
                        for i in range(len(current_centroids)):
                            centroids_pose[i,] = current_centroids[i]
                        
                        eucledian_dist_pairwise = np.array(cdist(objects_pose, centroids_pose)).flatten()
                        index_sorted = np.argsort(eucledian_dist_pairwise)

                        used_objects = []
                        used_centroids = []
                        
                        for index in range(len(eucledian_dist_pairwise)):
                            object_id = int(index_sorted[index] / len(centroids_pose))
                            centroid_id = index_sorted[index] % len(centroids_pose)

                            if not np.in1d(object_id, used_objects) and not np.in1d(centroid_id, used_centroids):# and (eucledian_dist_pairwise[index]<0.5):
                                if self.objects_dict[objects_ids[object_id]]["classID"] == r['class_ids'][centroid_id]:
                                    timebefore = time.time()
                                    used_objects.append(object_id)
                                    used_centroids.append(centroid_id)

                                    self.objects_dict[objects_ids[object_id]]["kalmanFilter"].prediction()
                                    self.objects_dict[objects_ids[object_id]]["kalmanFilter"].update(current_centroids[centroid_id], h_mat)
                                    self.objects_dict[objects_ids[object_id]]["estimatedPose"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x[0:3]
                                    self.objects_dict[objects_ids[object_id]]["estimatedVelocity"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x[3:6]

                                    if self.objects_dict[objects_ids[object_id]]["classID"] == 0:
                                        max_threshold = self.human_threshold
                                    else:
                                        max_threshold = self.object_threshold
                                    
                                    if abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][0])>max_threshold or abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][1])>max_threshold or abs(self.objects_dict[objects_ids[object_id]]["estimatedVelocity"][2])>max_threshold:
                                        self.objects_dict[objects_ids[object_id]]["activeObject"] = 1
                                    else:
                                        self.objects_dict[objects_ids[object_id]]["activeObject"] = 0

                                    if self.objects_dict[objects_ids[object_id]]["classID"] == 0 and self.objects_dict[objects_ids[object_id]]["activeObject"] == 0:
                                        
                                        iou = self.iou_centered_centroid(self.objects_dict[objects_ids[object_id]]["roisOld"], r['rois'][centroid_id], self.objects_dict[objects_ids[object_id]]["maskOld"],r['masks'][centroid_id])         
                                        if iou<self.iou_threshold:
                                            self.objects_dict[objects_ids[object_id]]["activeObject"] = 1
                                        else:
                                            x=1
                                    
                                    self.objects_dict[objects_ids[object_id]]["centroid"] = centroids_pose[centroid_id]
                                    self.objects_dict[objects_ids[object_id]]["dimensions"] = current_dimensions[centroid_id]
                                    self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] = 0
                                    self.objects_dict[objects_ids[object_id]]["maskID"] = centroid_id
                                    self.objects_dict[objects_ids[object_id]]["maskOld"] = r['masks'][centroid_id]
                                    self.objects_dict[objects_ids[object_id]]["roisOld"] = r['rois'][centroid_id]
                        
                        if len(centroids_pose) < len(objects_pose):
                            for index in range(len(eucledian_dist_pairwise)):
                                object_id = int(index_sorted[index] / len(objects_pose))
                                if not np.in1d(object_id, used_objects):
                                    self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] += 1
                                    self.objects_dict[objects_ids[object_id]]["activeObject"] = 0
                                    if self.objects_dict[objects_ids[object_id]]["inactiveNbFrame"] >= self._max_inactive_frames:
                                        self.delete_object(objects_ids[object_id])
                                        used_objects.append(object_id)
                                    else:
                                        self.objects_dict[objects_ids[object_id]]["kalmanFilter"].prediction()
                                        self.objects_dict[objects_ids[object_id]]["estimatedPose"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x_[0:3]
                                        self.objects_dict[objects_ids[object_id]]["estimatedVelocity"] = self.objects_dict[objects_ids[object_id]]["kalmanFilter"].x_[3:6]

                        elif len(centroids_pose) > len(objects_pose):
                            buff_id = self.next_object_id
                            for index in range(len(eucledian_dist_pairwise)):
                                centroid_id = index_sorted[index] % len(centroids_pose)
                                if not np.in1d(centroid_id, used_centroids):
                                    self.add_object(current_centroids[centroid_id], current_dimensions[centroid_id], centroid_id, r['class_ids'][centroid_id], r['masks'][centroid_id], r['rois'][centroid_id])
                                    self.objects_dict[buff_id]["kalmanFilter"].prediction()
                                    self.objects_dict[buff_id]["kalmanFilter"].update(current_centroids[centroid_id], h_mat)
                                    self.objects_dict[buff_id]["estimatedPose"] = self.objects_dict[buff_id]["kalmanFilter"].x[0:3]
                                    self.objects_dict[buff_id]["estimatedVelocity"] = self.objects_dict[buff_id]["kalmanFilter"].x[3:6]
                                    buff_id = buff_id + 1
                               
                kalman_time = time.time()
                # Write objects filter pose to tf
                self.handle_objects_pose()

                result_dynamic_depth_image, result_depth_image = self.apply_depth_image_masking(current_depth_frame, r['masks'])
                
                DDITS = Image()
                DDITS = self.bridge.cv2_to_imgmsg(result_dynamic_depth_image,'32FC1')
                DDITS.header = self.depth_msg_header
                self.dynamic_depth_image_pub.publish(DDITS)

                DITS = Image()
                DITS = self.bridge.cv2_to_imgmsg(result_depth_image,'32FC1')
                DITS.header = self.depth_msg_header
                self.depth_image_pub.publish(DITS)
                
                print_time = time.time()
예제 #20
0
파일: eval.py 프로젝트: prashiyn/yolact
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape
        # print("height:", h, "width:", w)

    with timer.env('Postprocess'):
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        torch.cuda.synchronize()

    with timer.env('Copy'):
        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][:args.top_k]
        classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    if num_dets_to_consider == 0:
        # No detections found so just output the original image
        return (img_gpu * 255).byte().cpu().numpy()

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([
            get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    from skimage.feature import hog

    def bin_spatial(img, color_space='RGB', size=(32, 32)):
        # Convert image to new color space (if specified)
        if color_space != 'RGB':
            if color_space == 'HSV':
                feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
            elif color_space == 'LUV':
                feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2LUV)
            elif color_space == 'HLS':
                feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
            elif color_space == 'YUV':
                feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
            elif color_space == 'YCrCb':
                feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
        else:
            feature_image = np.copy(img)
        # Use cv2.resize().ravel() to create the feature vector
        #    small_img = cv2.resize(feature_image, (32, 32))
        features = feature_image.ravel()  # Remove this line!
        # Return the feature vector
        return features

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason

    img_numpy = (img_gpu * 255).byte().cpu().numpy()
    crop_tray_img = img_numpy.copy()
    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if classes[j] == 6:  #_class == "tray":
                crop_tray_img = crop_tray_img[y1:y2, x1:x2]
                # process tray cropped image using regression model to predict hidden patties
                height, width = crop_tray_img.shape[0], crop_tray_img.shape[1]
                aspect_ratio = height / width
                height = int(aspect_ratio * 256)
                # print(type(crop_tray_img))
                crop_tray_img = np.array(crop_tray_img, dtype='uint8')
                crop_tray_img = Image.fromarray(crop_tray_img).resize(
                    (256, height), Image.BICUBIC)
                crop_tray_img = np.asarray(crop_tray_img, dtype=float)
                # print("Crop_tray:",crop_tray_img)
                # print("Crop_tray shape:",crop_tray_img.shape)

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = '%s: %.2f' % (
                    _class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                 font_scale, font_thickness)[0]
                text_color = [255, 255, 255]
                text_pt = (x1, y1 - 3)
                cv2.rectangle(img_numpy, (x1, y1),
                              (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face,
                            font_scale, text_color, font_thickness,
                            cv2.LINE_AA)

        # counting not visible patties
        n_patties = np.sum(classes != 6)
        chicken_count = np.sum(classes == 0) + np.sum(classes == 1)
        ham_quarter_count = np.sum(classes == 2) + np.sum(classes == 3)
        ham_1by10_count = np.sum(classes == 4) + np.sum(classes == 5)

        # find the maximum of the three by adding count to a list and choose model accordingly
        labels = [chicken_count, ham_quarter_count, ham_1by10_count]
        max_index = labels.index(max(labels))
        if max_index == 0:
            model_path = "weights/regressor/cp_regressor_crop"
        elif max_index == 1:
            model_path = "weights/regressor/qp_regressor_crop"
        else:
            model_path = "weights/regressor/op_regressor_crop"

        # load the pickle model in memory, scale the input and feed it into the model
        import pickle
        from sklearn.preprocessing import StandardScaler
        loaded_model = pickle.load(open(model_path, 'rb'))
        n_bin = 32
        originalFeatures = bin_spatial(crop_tray_img)
        originalFeatures, _ = np.histogram(originalFeatures,
                                           n_bin,
                                           density=True)
        originalFeatures = np.array([originalFeatures])

        sc_X = StandardScaler()
        originalFeatures = sc_X.fit_transform(
            originalFeatures.reshape(n_bin, 1)).reshape(1, n_bin)
        # print("originalFeatures:",originalFeatures)
        prediction = round(loaded_model.predict(originalFeatures)[0])
        # print("model_path:",model_path)
        if max_index == 0 and chicken_count < 4:
            prediction = chicken_count
        count_text = "Calculated: {} Visible Patties: {} -> Chicken: {}, Ham Quarter: {}, HAM 1by10: {}".format(
            prediction, n_patties, chicken_count, ham_quarter_count,
            ham_1by10_count)
        count_text_w, count_text_h = cv2.getTextSize(count_text, font_face,
                                                     font_scale,
                                                     font_thickness)[0]
        text_pt = (x1, y1 - 3)
        count_text_pt = (w - count_text_w, h - count_text_h)
        cv2.putText(img_numpy, count_text, count_text_pt, font_face,
                    font_scale, text_color, font_thickness, cv2.LINE_AA)
        # print(count_text)
    return img_numpy
예제 #21
0
    def prep_display(self,
                     dets_out,
                     img,
                     h,
                     w,
                     undo_transform=True,
                     class_color=False,
                     mask_alpha=0.45):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape

        with timer.env('Postprocess'):
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=args.display_lincomb,
                            crop_masks=args.crop,
                            score_threshold=args.score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:args.top_k]
            classes, scores, boxes = [
                x[:args.top_k].cpu().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(args.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < args.score_threshold:
                num_dets_to_consider = j
                break

        if num_dets_to_consider == 0:
            # No detections found so just output the original image
            return (img_gpu * 255).byte().cpu().numpy()

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            global color_cache
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in color_cache[on_gpu]:
                return color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    color_cache[on_gpu][color_idx] = color
                return color

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        if args.display_masks and cfg.eval_mask_branch:
            # After this, mask is of size [num_dets, h, w, 1]
            masks = masks[:num_dets_to_consider, :, :, None]

            # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # This is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        if args.display_text or args.display_bboxes:
            str_ = ""
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if args.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if args.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    text_str = '%s: %.2f' % (
                        _class, score) if args.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1),
                                  (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

                    #pub = rospy.Publisher('chatter',String,queue_size=10)
                    #rate = rospy.Rate(50) #10hz
                    #str_ += text_str
            #rospy.loginfo(str_)
            #pub.publish(str_)
            #rate.sleep()

        return img_numpy
예제 #22
0
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape
    
    with timer.env('Postprocess'):
        t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb,
                                        crop_masks        = args.crop,
                                        score_threshold   = args.score_threshold)
        torch.cuda.synchronize()
    all_pred = []
    #print(len(t))
    #print(type(t))
    #print("classes")
    #print(t[0].cpu().numpy())
    #print(len(t[0].cpu().numpy()))
    #print(t[1].cpu().numpy())
    # bbox print(t[2].cpu().numpy())
    #print(t[3].cpu().numpy())
    # classes, scores, boxes, masks
    
    categories = t[0].cpu().numpy()
    scores = t[1].cpu().numpy()
    masks = t[3].cpu().numpy()
    #print(masks.shape)
    n_instances = len(scores)    #if len(categories) > 0: # If any objects are detected in this image
    
    for i in range(n_instances): # Loop all instances
        # save information of the instance in a dictionary then append on all_pred list
        pred = {}
        #pred['image_id'] = imgid # this imgid must be same as the key of test.json
        pred['category_id'] = int(categories[i]) + 1
        pred['segmentation'] = binary_mask_to_rle(masks[i,:,:]) # save binary mask to RLE, e.g. 512x512 -> rle
        pred['score'] = float(scores[i])
        all_pred.append(pred)
        
    with timer.env('Copy'):
        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][:args.top_k]
        classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)
        
        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]
        
        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1
        
        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
    
    if args.display_fps:
            # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0]

        img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha


    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
    
    if num_dets_to_consider == 0:
        return img_numpy, all_pred

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
            
    
    return img_numpy, all_pred
예제 #23
0
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45,
                 fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):

        #idx = t[1].argsort(0, descending=True)[:args.top_k]
        idx1 = t[1].argsort()
        idx = idx1.argsort()

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
            mask_picture = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

        obj_info, obj_num = data_save(mask_picture, classes, scores, boxes)

        #print(classes)
        #print('---------')
        #np.save('masks.npy', masks.cpu().numpy())
        #print(obj_info[0][4][0], obj_info[0][4][1])
    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (obj_info[j][0] * 5 if class_color else j *
                     5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]

        masks = masks[:num_dets_to_consider, :, :, None]
        #img_gpu = img_gpu * (masks.sum(dim=0) > 0.5).float()  #only show mask
        #img_gpu = img_gpu * masks[0]

        #mike0225
        mask_img = img_gpu * (masks.sum(dim=0) > 0.5).float()  #0209
        global mask_numpy
        mask_numpy = (mask_img * 255).byte().cpu().numpy()  #0209
        mask_numpy = cv2.cvtColor(mask_numpy, cv2.COLOR_BGR2GRAY)

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])

        colors = torch.cat([
            get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    if args.display_fps:
        # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale,
                                         font_thickness)[0]

        img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6  # 1 - Box alpha

        #mask_img[0:text_h+8, 0:text_w+8] *= 0.6 #0209
    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale,
                    text_color, font_thickness, cv2.LINE_AA)

    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        global frame_count, state_pre, flag, predict_pos, centerX, centerY, preX, preY, degree
        frame_count += 1

        for j in range(obj_num):
            #mask_info = obj_info[j][5]

            global mask_numpy1, img_num, temp_x, temp_y
            mask_image = mask_picture[j:j + 1, :, :, None]
            mask_image = img_gpu * (mask_image.sum(dim=0) > 0.5).float()  #0209
            mask_numpy1 = (mask_image * 255).byte().cpu().numpy()  #0209
            mask_numpy1 = cv2.cvtColor(mask_numpy1, cv2.COLOR_BGR2GRAY)

            if obj_info[j][2] == 1:
                '''
                if frame_count%10 == 3:
                    
                    centerX.append(obj_info[j][4][0])
                    centerY.append(obj_info[j][4][1])

                    predict_pos[j][0].append(obj_info[j][4][0])
                    predict_pos[j][1].append(obj_info[j][4][1])
                    
                    if predict_pos[j][0][0] == 0:
                        predict_pos[j][0].pop(0)
                    if predict_pos[j][1][0] == 0:
                        predict_pos[j][1].pop(0) 

                    if len(predict_pos[j][0]) > 2:
                        #predict_pos[j][2] = predict_next( predict_pos[j][0], predict_pos[j][1]) 

                        degree, predict_pos[j][2] = predict1_next( mask_numpy1, predict_pos[j][0], predict_pos[j][1]) # test0227
                        temp_x,temp_y=trans_degree(predict_pos[j][2][0,4,0],predict_pos[j][2][0,4,1],degree)
                        
                        
                        predict_pos[j][0].pop(0) #0->1
                        predict_pos[j][1].pop(0)
  
                if state_pre == True:
                    
                    if predict_pos[j][2] != []:
                        
                        for i in range(5):
                            if (predict_pos[j][2][0,i,0]) > 640 or (predict_pos[j][2][0,i,1]) > 480:
                                pass
                            else:    
                                pass
                                #cv2.circle(img_numpy,(predict_pos[j][2][0,i,0],predict_pos[j][2][0,i,1]),5,(0,0,213),-1)      
                        cv2.line(img_numpy,(int(obj_info[j][4][0]+temp_x),int(obj_info[j][4][1]+temp_y)),(int(obj_info[j][4][0]-temp_x),int(obj_info[j][4][1]-temp_y)),(0,0,255),3)
                        
                        if flag ==False:
                            for i in range(5):
                                preX.append(predict_pos[j][2][0,i,0])
                                preY.append(predict_pos[j][2][0,i,1])
                                #preY.append(num)
                        else:
                            preX.append(predict_pos[j][2][0,4,0])
                            preY.append(predict_pos[j][2][0,4,1])
                            #preY.append(num)

                        flag = True
                '''
                color = get_color(obj_info[j][0])
                score = obj_info[j][3]

                if args.display_bboxes:
                    cv2.rectangle(img_numpy,
                                  (obj_info[j][4][2], obj_info[j][4][4]),
                                  (obj_info[j][4][3], obj_info[j][4][5]),
                                  color, 1)

                if args.display_text:

                    _class = obj_info[j][1]

                    #text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class
                    text_str = '%s: %s' % (obj_info[j][0], _class
                                           ) if args.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (obj_info[j][4][2], obj_info[j][4][4] - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy,
                                  (obj_info[j][4][2], obj_info[j][4][4]),
                                  (obj_info[j][4][2] + text_w,
                                   obj_info[j][4][4] - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)
            else:
                for i in range(2):
                    predict_pos[j][i] = [0]
                predict_pos[j][2] = []

    return img_numpy
예제 #24
0
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 args,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45,
                 fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:args.top_k]

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].detach().cpu().numpy() for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        masks = masks[:num_dets_to_consider, :, :, None]
        colors = torch.cat([
            get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
        inv_alph_masks = masks * (-mask_alpha) + 1
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)
        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
        img_numpy_mask = (masks_color_summand * 255).byte().cpu().numpy()
        cv2.imwrite('results/mask_car_image.jpg', img_numpy_mask)
        print("Mask for all visible car is generated")

    if args.display_best_masks_only == True and args.top_k == 1:
        masks = masks[:num_dets_to_consider, :, :, None]
        num_dets_to_consider = min(args.top_k, classes.shape[0])
        print('maskshape', (masks.shape))
        for i in range(num_dets_to_consider):
            msk = masks[i, :, :, None]
            mask = msk.view(1, masks.shape[1], masks.shape[2], 1)
            print('newmaskshape', (mask.shape))
            img_gpu_masked = img_gpu * (mask.sum(dim=0) >= 1).float().expand(
                -1, -1, 3)
            img_numpy_masked = (img_gpu_masked * 255).byte().cpu().numpy()
            cv2.imwrite('results/mask_image' + str(i) + '.jpg',
                        img_numpy_masked)
            print("Mask for the most visible car is generated")

    if args.display_fps:
        # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale,
                                         font_thickness)[0]

        img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6  # 1 - Box alpha

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale,
                    text_color, font_thickness, cv2.LINE_AA)

    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_best_bboxes_only == 'True':
                crop = img_numpy[y1:y2, x1:x2]
                cv2.imwrite('results/crop_object.png', crop)
                print("crop for the most visible car is generated")

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = '%s: %.2f' % (
                    _class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                 font_scale, font_thickness)[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1),
                              (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face,
                            font_scale, text_color, font_thickness,
                            cv2.LINE_AA)

    return img_numpy
예제 #25
0
def prep_display(dets_out, img, gt, gt_masks, h, w, undo_transform=True, class_color=False):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    gt and gt_masks are also allowed to be none (until I reimplement that functionality).
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape
    
    with timer.env('Postprocess'):
        t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold)
        torch.cuda.synchronize()

    with timer.env('Copy'):
        if cfg.eval_mask_branch:
            masks = t[3][:args.top_k] # We'll need this later
        classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]]
    
    if classes.shape[0] == 0:
        return (img_gpu * 255).byte().cpu().numpy()

    def get_color(j):
        color = COLORS[(classes[j] * 5 if class_color else j * 5) % len(COLORS)]
        if not undo_transform:
            color = (color[2], color[1], color[0])
        return color

    # Draw masks first on the gpu
    if args.display_masks and cfg.eval_mask_branch:
        for j in reversed(range(min(args.top_k, classes.shape[0]))):
            if scores[j] >= args.score_threshold:
                color = get_color(j)

                mask = masks[j, :, :, None]
                mask_color = mask @ (torch.Tensor(color).view(1, 3) / 255.0)
                mask_alpha = 0.45

                # Alpha only the region of the image that contains the mask
                img_gpu = img_gpu * (1 - mask) \
                        + img_gpu * mask * (1-mask_alpha) + mask_color * mask_alpha
        
    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()
    
    if args.display_text or args.display_bboxes:
        for j in reversed(range(min(args.top_k, classes.shape[0]))):
            score = scores[j]

            if scores[j] >= args.score_threshold:
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)

                if args.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if args.display_text:
                    _class = COCO_CLASSES[classes[j]]
                    text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
    
    return img_numpy
예제 #26
0
def image_callback(image_data):
    time_start = time.time()
    global cv_image
    cv_image = np.frombuffer(image_data.data, dtype=np.uint8).reshape(image_data.height, image_data.width, -1)
    
    # region_output是8行4列数组,第i行存储第i个区域的信息,每行的第1列为污染等级(0,1,2,3,4)、第2列为植被类型(0无,1草,2灌木,3花)、第3列为行人标志(0无,1有)、第4列为区域ID(1,2,3,4,5,6,7,8)
    region_output = np.zeros((8, 4))
    for region_i in range(8):
        region_output[region_i, 3] = region_i + 1
    
    with torch.no_grad():
        # 目标检测
        frame = torch.from_numpy(cv_image).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = net(batch)
        
        # 建立每个目标的蒙版target_masks、类别target_classes、置信度target_scores、边界框target_boxes的一一对应关系
        h, w, _ = frame.shape
        with timer.env('Postprocess'):
            save = cfg.rescore_bbox
            cfg.rescore_bbox = True
            # 检测结果
            t = postprocess(preds, w, h, visualize_lincomb = args.display_lincomb,
                                         crop_masks        = args.crop,
                                         score_threshold   = args.score_threshold)
            cfg.rescore_bbox = save
        with timer.env('Copy'):
            idx = t[1].argsort(0, descending=True)[:args.top_k]
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][idx]
            classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]
        
        num_dets_to_consider = min(args.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < args.score_threshold:
                num_dets_to_consider = j
                break
        
        if num_dets_to_consider > 0:
            target_masks = masks[:num_dets_to_consider, :, :]
            target_classes = classes[:num_dets_to_consider]
            target_scores = scores[:num_dets_to_consider]
            target_boxes = boxes[:num_dets_to_consider, :]
            
            # 忽略类别为'road hole' 'water stain'的目标
            check_k = 0
            remain_list = []
            while check_k < target_classes.shape[0]:
                if cfg.dataset.class_names[target_classes[check_k]] not in ['road hole', 'water stain']:
                    remain_list.append(check_k)
                check_k += 1
        
            target_masks = target_masks[remain_list, :, :]
            target_classes = target_classes[remain_list]
            target_scores = target_scores[remain_list]
            target_boxes = target_boxes[remain_list, :]
            
            # 显示检测结果
            if display_switch:
                result_image = result_display(frame, target_masks, target_classes, target_scores, target_boxes, num_dets_to_consider)
            else:
                result_image = frame
            
            # 分别存储垃圾目标和植被目标
            check_k = 0
            rubbish_remain_list = []
            vegetation_remain_list = []
            rubbish_items = ['ads', 'cigarette', 'firecracker', 'glass bottle', 'leaves', 'metal', 'paper', 'peel', 'plastic', 'solid clod', 'solid crumb']
            vegetation_items = ['grass', 'shrub', 'flower']
            while check_k < target_classes.shape[0]:
                if cfg.dataset.class_names[target_classes[check_k]] in rubbish_items:
                    rubbish_remain_list.append(check_k)
                if cfg.dataset.class_names[target_classes[check_k]] in vegetation_items:
                    vegetation_remain_list.append(check_k)
                check_k += 1
            
            rubbish_masks = target_masks[rubbish_remain_list, :, :]
            rubbish_classes = target_classes[rubbish_remain_list]
            rubbish_scores = target_scores[rubbish_remain_list]
            rubbish_boxes = target_boxes[rubbish_remain_list, :]
            
            vegetation_masks = target_masks[vegetation_remain_list, :, :]
            vegetation_classes = target_classes[vegetation_remain_list]
            vegetation_scores = target_scores[vegetation_remain_list]
            vegetation_boxes = target_boxes[vegetation_remain_list, :]
            
            rubbsih_num = len(rubbish_remain_list)
            vegetation_num = len(vegetation_remain_list)
            
            # 针对垃圾目标的处理
            if rubbsih_num > 0:
                # 掩膜边界取点
                result_image, rubbish_boundary_pts = get_boundary(result_image, rubbsih_num, rubbish_masks, cpt_num=10)
                # s_polygon存储每个垃圾目标在世界坐标系中投影于地面的面积
                s_polygon = np.zeros((rubbsih_num, 1))
                rubbish_list = ['ads', 'cigarette', 'firecracker', 'glass bottle', 'leaves', 'metal', 'paper', 'peel', 'plastic', 'solid clod', 'solid crumb']
                rubbish_weight_list = [80, 200, 200, 8000, 80, 1050, 80, 6000, 775, 15750, 4000]
                # region_w存储各区域内垃圾目标的质量的总和
                region_w = np.zeros((8, 1))
                for i in range(rubbish_boundary_pts.shape[0]):
                    effective_pt_num = 0
                    b_x, b_z = [], []
                    b_area_id = []
                    for b_pt in range(rubbish_boundary_pts.shape[1]):
                        b_pt_u = rubbish_boundary_pts[i, b_pt, 0, 0]
                        b_pt_v = rubbish_boundary_pts[i, b_pt, 0, 1]
                        # 排除像素坐标无效点(u=0,v=0)
                        if b_pt_u or b_pt_v:
                            loc_b_pt = p2d_table[b_pt_u, b_pt_v]
                            # 排除世界坐标无效点(x=0,z=0)
                            if loc_b_pt[0] or loc_b_pt[1]:
                                effective_pt_num += 1
                                b_x.append(loc_b_pt[0])
                                b_z.append(loc_b_pt[1])
                                b_area_id.append(CameraT.whatArea(loc_b_pt[0], loc_b_pt[1]))
                    if effective_pt_num >= 3:
                        s_sum = 0
                        for b_pt in range(effective_pt_num - 2):
                            s_sum += abs(b_x[0]*b_z[b_pt + 1] - b_z[0]*b_x[b_pt + 1] + b_x[b_pt + 1]*b_z[b_pt + 2] - b_z[b_pt + 1]*b_x[b_pt + 2] + b_x[b_pt + 2]*b_z[0] - b_z[b_pt + 2]*b_x[0])
                        s_polygon[i, 0] = s_sum / 2
                        for b_pt in range(effective_pt_num):
                            # 排除区域ID无效点(ID=0)
                            if b_area_id[b_pt]:
                                region_w[b_area_id[b_pt] - 1, 0] += s_polygon[i, 0] * rubbish_weight_list[rubbish_list.index(cfg.dataset.class_names[rubbish_classes[i]])] / effective_pt_num
                
                # 界定污染等级
                for region_i in range(8):
                    if region_w[region_i, 0] > 0 and region_w[region_i, 0] <= 250:
                        region_output[region_i, 0] = 1
                    elif region_w[region_i, 0] > 250 and region_w[region_i, 0] <= 500:
                        region_output[region_i, 0] = 2
                    elif region_w[region_i, 0] > 500 and region_w[region_i, 0] <= 1000:
                        region_output[region_i, 0] = 3
                    elif region_w[region_i, 0] > 1000:
                        region_output[region_i, 0] = 4
                if display_switch:
                    print('region_w')
                    print(region_w)
                    result_image = w_display(result_image, region_w, font_face = cv2.FONT_HERSHEY_DUPLEX, font_scale = 0.5, font_thickness = 1)
            
            # 针对植被目标的处理
            if vegetation_num > 0:
                # 掩膜边界取点
                result_image, vegetation_boundary_pts = get_boundary(result_image, vegetation_num, vegetation_masks, cpt_num=20)
                # region_vegetation_type存储各区域内植被类型
                region_vegetation_type = np.zeros((8, 1))
                for i in range(vegetation_boundary_pts.shape[0]):
                    effective_pt_num = 0
                    b_area_id = []
                    for b_pt in range(vegetation_boundary_pts.shape[1]):
                        b_pt_u = vegetation_boundary_pts[i, b_pt, 0, 0]
                        b_pt_v = vegetation_boundary_pts[i, b_pt, 0, 1]
                        # 排除像素坐标无效点(u=0,v=0)
                        if b_pt_u or b_pt_v:
                            loc_b_pt = p2d_table[b_pt_u, b_pt_v]
                            # 排除世界坐标无效点(x=0,z=0)
                            if loc_b_pt[0] or loc_b_pt[1]:
                                effective_pt_num += 1
                                b_area_id.append(CameraT.whatArea(loc_b_pt[0], loc_b_pt[1]))
                    for b_pt in range(effective_pt_num):
                        # 排除区域ID无效点(ID=0)
                        if b_area_id[b_pt]:
                            # 优先级顺序
                            vegetation_list = ['grass', 'shrub', 'flower']
                            v_type = vegetation_list.index(cfg.dataset.class_names[vegetation_classes[i]]) + 1
                            current_v_type = region_vegetation_type[b_area_id[b_pt] - 1, 0]
                            if v_type > current_v_type:
                                region_vegetation_type[b_area_id[b_pt] - 1, 0] = v_type
                
                for region_i in range(8):
                    region_output[region_i, 1] = region_vegetation_type[region_i, 0]
            
        else:
            result_image = frame.byte().cpu().numpy()
    
    areasinfo_msg = AreasInfo()
    for region_i in range(8):
        region_output_msg = AreaInfo()
        region_output_msg.rubbish_grade = int(region_output[region_i, 0])
        region_output_msg.has_person = bool(region_output[region_i, 2])
        region_output_msg.vegetation_type = int(region_output[region_i, 1])
        region_output_msg.area_id = int(region_output[region_i, 3])
        areasinfo_msg.infos.append(region_output_msg)
    pub.publish(areasinfo_msg)
    
    if display_switch:
        print('region_output')
        print(region_output)
        result_image = CameraT.drawLine(result_image, w=1)
        result_image = output_display(result_image, region_output, font_face = cv2.FONT_HERSHEY_DUPLEX, font_scale = 0.5, font_thickness = 1)
        cv2.imshow("result_image", result_image)
    if record_switch:
        video_out.write(result_image)
    
    if cv2.waitKey(1) == 27:
        if record_switch:
            video_out.release()
        cv2.destroyAllWindows()
        rospy.signal_shutdown("It's over.")
    
    time_end_all = time.time()
    print("totally time cost:", time_end_all - time_start)
예제 #27
0
파일: eval.py 프로젝트: li-xl/Yolact.jittor
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45,
                 fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = jt.array(img_numpy)
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx, _ = t[1].argsort(0, descending=True)[:args.top_k]

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].numpy() for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = jt.array(list(color)).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pyjt tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider].unsqueeze(3)

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = jt.contrib.concat([
            get_color(j, 0).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                                   dim=0)

        #print(masks.repeat(1,1,1,3).shape,colors.shape,mask_alpha)

        masks_color = masks.repeat(1, 1, 1, 3) * colors.repeat(
            1, masks.shape[1], masks.shape[2], 1) * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]

        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    if args.display_fps:
        # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale,
                                         font_thickness)[0]

        img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6  # 1 - Box alpha

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).uint8().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale,
                    text_color, font_thickness, cv2.LINE_AA)

    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = '%s: %.2f' % (
                    _class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                 font_scale, font_thickness)[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1),
                              (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face,
                            font_scale, text_color, font_thickness,
                            cv2.LINE_AA)

    return img_numpy
예제 #28
0
  def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, image_header=Header()):
    with torch.no_grad():
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        dets = Detections()   

        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape
        
        with timer.env('Postprocess'):
            t = postprocess(dets_out, w, h, visualize_lincomb = False,
                                            crop_masks        = True,
                                            score_threshold   = 0.3)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:100]
            classes, scores, boxes = [x[:100].cpu().numpy() for x in t[:3]]

        num_dets_to_consider = min(100, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < 0.3:
                num_dets_to_consider = j
                break
        
        if num_dets_to_consider == 0:
            # No detections found so just output the original image
            return (img_gpu * 255).byte().cpu().numpy()

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            global color_cache
            color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)
            
            if on_gpu is not None and color_idx in color_cache[on_gpu]:
                return color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    color_cache[on_gpu][color_idx] = color
                return color

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]
            
        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1
            
        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
            
        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()
        
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            _class = cfg.dataset.class_names[classes[j]]
            text_str = '%s: %.2f' % (_class, score)

            font_face = cv2.FONT_HERSHEY_DUPLEX
            font_scale = 0.6
            font_thickness = 1

            text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]

            text_pt = (x1, y1 - 3)
            text_color = [255, 255, 255]

            cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
            cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
               
            det = Detection()
            det.box.x1 = x1
            det.box.y1 = y1
            det.box.x2 = x2
            det.box.y2 = y2
            det.class_name = _class
            det.score = score
            mask_shape = np.shape(masks[j])
            #print("Num dets: ",  num_dets_to_consider)
            #print("Shape: ", mask_shape)
            mask_bb = np.squeeze(masks[j].cpu().numpy(), axis=2)[y1:y2,x1:x2]
            #print("Box: ", x1,",",x2,",",y1,",",y2)
            #print("Mask in box shape: ", np.shape(mask_bb))
            mask_rs = np.reshape(mask_bb, -1)
            #print("New shape: ", np.shape(mask_rs))
            #print("Mask:\n",mask_bb)
            det.mask.height = y2 - y1
            det.mask.width = x2 - x1
            det.mask.mask = np.array(mask_rs, dtype=bool)
            dets.detections.append(det)
 
        dets.header.stamp = image_header.stamp
        dets.header.frame_id = image_header.frame_id
        self.detections_pub.publish(dets)
    return img_numpy
예제 #29
0
파일: eval.py 프로젝트: li-xl/Yolact.jittor
def prep_metrics(ap_data,
                 dets,
                 img,
                 gt,
                 gt_masks,
                 h,
                 w,
                 num_crowd,
                 image_id,
                 detections: Detections = None):
    """ Returns a list of APs for this image, with each element being for a class  """
    if not args.output_coco_json:
        with timer.env('Prepare gt'):
            gt_boxes = jt.array(gt[:, :4])
            gt_boxes[:, [0, 2]] *= w
            gt_boxes[:, [1, 3]] *= h
            gt_classes = list(gt[:, 4].astype(int))
            gt_masks = jt.array(gt_masks).view(-1, h * w)

            if num_crowd > 0:
                split = lambda x: (x[-num_crowd:], x[:-num_crowd])
                crowd_boxes, gt_boxes = split(gt_boxes)
                crowd_masks, gt_masks = split(gt_masks)
                crowd_classes, gt_classes = split(gt_classes)
    with timer.env('Postprocess'):
        classes, scores, boxes, masks = postprocess(
            dets,
            w,
            h,
            crop_masks=args.crop,
            score_threshold=args.score_threshold)

        if classes.size(0) == 0:
            return

        classes = list(classes.numpy().astype(int))
        if isinstance(scores, list):
            box_scores = list(scores[0].numpy().astype(float))
            mask_scores = list(scores[1].numpy().astype(float))
        else:
            scores = list(scores.numpy().astype(float))
            box_scores = scores
            mask_scores = scores
        masks = masks.view(-1, h * w)
        boxes = boxes

    #print('GG')
    if args.output_coco_json:
        with timer.env('JSON Output'):
            boxes = boxes.numpy()
            masks = masks.view(-1, h, w).numpy()
            for i in range(masks.shape[0]):
                # Make sure that the bounding box actually makes sense and a mask was produced
                if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] -
                                                  boxes[i, 0]) > 0:
                    detections.add_bbox(image_id, classes[i], boxes[i, :],
                                        box_scores[i])
                    detections.add_mask(image_id, classes[i], masks[i, :, :],
                                        mask_scores[i])
            return

    #print('GG')
    with timer.env('Eval Setup'):
        num_pred = len(classes)
        num_gt = len(gt_classes)

        mask_iou_cache = _mask_iou(masks, gt_masks).numpy()
        bbox_iou_cache = _bbox_iou(boxes.float(), gt_boxes.float()).numpy()
        if num_crowd > 0:
            crowd_mask_iou_cache = _mask_iou(masks, crowd_masks,
                                             iscrowd=True).numpy()
            crowd_bbox_iou_cache = _bbox_iou(boxes.float(),
                                             crowd_boxes.float(),
                                             iscrowd=True).numpy()
        else:
            crowd_mask_iou_cache = None
            crowd_bbox_iou_cache = None

        box_indices = sorted(range(num_pred), key=lambda i: -box_scores[i])
        mask_indices = sorted(box_indices, key=lambda i: -mask_scores[i])

        iou_types = [('box', lambda i, j: bbox_iou_cache[i, j].item(),
                      lambda i, j: crowd_bbox_iou_cache[i, j].item(),
                      lambda i: box_scores[i], box_indices),
                     ('mask', lambda i, j: mask_iou_cache[i, j].item(),
                      lambda i, j: crowd_mask_iou_cache[i, j].item(),
                      lambda i: mask_scores[i], mask_indices)]
    #print('GG')

    #print(bbox_iou_cache)

    timer.start('Main loop')
    for _class in set(classes + gt_classes):
        ap_per_iou = []
        num_gt_for_class = sum([1 for x in gt_classes if x == _class])

        for iouIdx in range(len(iou_thresholds)):
            iou_threshold = iou_thresholds[iouIdx]

            for iou_type, iou_func, crowd_func, score_func, indices in iou_types:
                gt_used = [False] * len(gt_classes)

                ap_obj = ap_data[iou_type][iouIdx][_class]
                ap_obj.add_gt_positives(num_gt_for_class)

                for i in indices:
                    if classes[i] != _class:
                        continue

                    max_iou_found = iou_threshold
                    max_match_idx = -1
                    for j in range(num_gt):
                        if gt_used[j] or gt_classes[j] != _class:
                            continue

                        iou = iou_func(i, j)

                        if iou > max_iou_found:
                            max_iou_found = iou
                            max_match_idx = j

                    if max_match_idx >= 0:
                        gt_used[max_match_idx] = True
                        ap_obj.push(score_func(i), True)
                    else:
                        # If the detection matches a crowd, we can just ignore it
                        matched_crowd = False

                        if num_crowd > 0:
                            for j in range(len(crowd_classes)):
                                if crowd_classes[j] != _class:
                                    continue

                                iou = crowd_func(i, j)

                                if iou > iou_threshold:
                                    matched_crowd = True
                                    break

                        # All this crowd code so that we can make sure that our eval code gives the
                        # same result as COCOEval. There aren't even that many crowd annotations to
                        # begin with, but accuracy is of the utmost importance.
                        if not matched_crowd:
                            ap_obj.push(score_func(i), False)
    timer.stop('Main loop')
예제 #30
0
def prep_display(net,
                 dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45,
                 fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True

        preds = net.detect(
            {
                'loc': dets_out[0],
                'conf': dets_out[1],
                'mask': dets_out[2],
                'priors': dets_out[3],
                'proto': dets_out[4]
            }, net)

        t = postprocess(preds,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:args.top_k]

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    img_tmp = torch.zeros(img_gpu.shape)

    for i in range(num_dets_to_consider):
        cfg.dataset.class_names[classes[i]]
        mask = masks[i]
        classy = cfg.dataset.class_names[classes[i]]

        if args.classes == None or classy in args.classes:
            img_tmp[mask == 1] = img_gpu[mask == 1]

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_tmp * 255).byte().cpu().numpy()

    if num_dets_to_consider == 0:
        return img_numpy

    return img_numpy