Exemple #1
0
    def post_process(dets_out,
                     img,
                     h,
                     w,
                     top_k=1,
                     score_threshold=0.6,
                     undo_transform=True):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape

        with timer.env('Postprocess'):
            save = cfg.rescore_bbox
            cfg.rescore_bbox = True
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=False,
                            crop_masks=False,
                            score_threshold=score_threshold)
            cfg.rescore_bbox = save

        with timer.env('Copy'):
            idx = t[1].argsort(0, descending=True)[:top_k]

            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][idx]
            classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

        num_dets_to_consider = min(top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < score_threshold:
                num_dets_to_consider = j
                break

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        # After this, mask is of size [num_dets, h, w, 1]
        final_res = (img_gpu * 255).byte().cpu().numpy()
        final_res = cv2.cvtColor(final_res, cv2.COLOR_RGB2RGBA)

        if num_dets_to_consider == 0:
            return final_res

        masks = masks[:num_dets_to_consider, :, :, None]

        _mask = (masks * 255).byte().cpu().numpy()[0]

        # Then assign the mask to the last channel of the image
        final_res[:, :, 3] = _mask.squeeze()

        return final_res
Exemple #2
0
def plot_tfboard_figure(cfg, vis_imgs, vis_show=False, show_grad=False, max_vis=3):
    num_col = len(vis_imgs.keys())+1 if show_grad else len(vis_imgs.keys())
    bs = vis_imgs['gts'].size(0)
    vis_idxs = np.random.choice(bs, min(max_vis, bs), replace=False)
    fig, ax = plt.subplots(max(2, len(vis_idxs)), num_col)

    # show each image in one line
    for k, ik in enumerate(vis_idxs):
        rgb_img = vis_imgs['rgb'][ik]
        tp  = vis_imgs['preds'][ik,0].cpu().detach().numpy()
        tg  = vis_imgs['gts'][ik,0].cpu().detach().numpy()
        tw  = vis_imgs['wghts'][ik,0].cpu().detach().numpy()

        # for multiple gpus
        if torch.cuda.device_count()>1:
            tp, tg, tw = tp[0], tg[0], tw[0]
            rgb_img = rgb_img[0]
        ti  = undo_image_transformation(cfg.backbone, rgb_img)
        ax[k, 0].imshow(ti)
        ax[k, 1].imshow(tg)
        plt.colorbar(ax[k, 2].imshow(tw), ax=ax[k, 2])
        plt.colorbar(ax[k, 3].imshow(tp), ax=ax[k, 3])
        if show_grad:
            tpg = vis_imgs['grad'][ik].cpu().detach().numpy()
            plt.colorbar(ax[k, 5].imshow(tpg), ax=ax[k, 5])

        # close axis
        for i in range(num_col):
            ax[k,i].axis('off')
            ax[k,i].tick_params(axis='both', left=False, top=False,
                                             right=False, bottom=False,
                                             labelright=False, labelbottom=False)
    if vis_show:
        plt.show()
    return fig
Exemple #3
0
  def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, image_header=Header()):
    with torch.no_grad():
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        dets = Detections()   

        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape
        
        with timer.env('Postprocess'):
            t = postprocess(dets_out, w, h, visualize_lincomb = False,
                                            crop_masks        = True,
                                            score_threshold   = 0.3)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:100]
            classes, scores, boxes = [x[:100].cpu().numpy() for x in t[:3]]

        num_dets_to_consider = min(100, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < 0.3:
                num_dets_to_consider = j
                break
        
        if num_dets_to_consider == 0:
            # No detections found so just output the original image
            return (img_gpu * 255).byte().cpu().numpy()

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            global color_cache
            color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)
            
            if on_gpu is not None and color_idx in color_cache[on_gpu]:
                return color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    color_cache[on_gpu][color_idx] = color
                return color

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]
            
        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1
            
        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
            
        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()
        
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            _class = cfg.dataset.class_names[classes[j]]
            text_str = '%s: %.2f' % (_class, score)

            font_face = cv2.FONT_HERSHEY_DUPLEX
            font_scale = 0.6
            font_thickness = 1

            text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]

            text_pt = (x1, y1 - 3)
            text_color = [255, 255, 255]

            cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
            cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
               
            det = Detection()
            det.box.x1 = x1
            det.box.y1 = y1
            det.box.x2 = x2
            det.box.y2 = y2
            det.class_name = _class
            det.score = score
            mask_shape = np.shape(masks[j])
            #print("Num dets: ",  num_dets_to_consider)
            #print("Shape: ", mask_shape)
            mask_bb = np.squeeze(masks[j].cpu().numpy(), axis=2)[y1:y2,x1:x2]
            #print("Box: ", x1,",",x2,",",y1,",",y2)
            #print("Mask in box shape: ", np.shape(mask_bb))
            mask_rs = np.reshape(mask_bb, -1)
            #print("New shape: ", np.shape(mask_rs))
            #print("Mask:\n",mask_bb)
            det.mask.height = y2 - y1
            det.mask.width = x2 - x1
            det.mask.mask = np.array(mask_rs, dtype=bool)
            dets.detections.append(det)
 
        dets.header.stamp = image_header.stamp
        dets.header.frame_id = image_header.frame_id
        self.detections_pub.publish(dets)
    return img_numpy
Exemple #4
0
def prep_display(dets_out, img, gt, gt_masks, h, w, undo_transform=True, class_color=False):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    gt and gt_masks are also allowed to be none (until I reimplement that functionality).
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape
    
    with timer.env('Postprocess'):
        t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold)
        torch.cuda.synchronize()

    with timer.env('Copy'):
        if cfg.eval_mask_branch:
            masks = t[3][:args.top_k] # We'll need this later
        classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]]
    
    if classes.shape[0] == 0:
        return (img_gpu * 255).byte().cpu().numpy()

    def get_color(j):
        color = COLORS[(classes[j] * 5 if class_color else j * 5) % len(COLORS)]
        if not undo_transform:
            color = (color[2], color[1], color[0])
        return color

    # Draw masks first on the gpu
    if args.display_masks and cfg.eval_mask_branch:
        for j in reversed(range(min(args.top_k, classes.shape[0]))):
            if scores[j] >= args.score_threshold:
                color = get_color(j)

                mask = masks[j, :, :, None]
                mask_color = mask @ (torch.Tensor(color).view(1, 3) / 255.0)
                mask_alpha = 0.45

                # Alpha only the region of the image that contains the mask
                img_gpu = img_gpu * (1 - mask) \
                        + img_gpu * mask * (1-mask_alpha) + mask_color * mask_alpha
        
    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()
    
    if args.display_text or args.display_bboxes:
        for j in reversed(range(min(args.top_k, classes.shape[0]))):
            score = scores[j]

            if scores[j] >= args.score_threshold:
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)

                if args.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if args.display_text:
                    _class = COCO_CLASSES[classes[j]]
                    text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
    
    return img_numpy
Exemple #5
0
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 args,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45,
                 fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:args.top_k]

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].detach().cpu().numpy() for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        masks = masks[:num_dets_to_consider, :, :, None]
        colors = torch.cat([
            get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
        inv_alph_masks = masks * (-mask_alpha) + 1
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)
        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
        img_numpy_mask = (masks_color_summand * 255).byte().cpu().numpy()
        cv2.imwrite('results/mask_car_image.jpg', img_numpy_mask)
        print("Mask for all visible car is generated")

    if args.display_best_masks_only == True and args.top_k == 1:
        masks = masks[:num_dets_to_consider, :, :, None]
        num_dets_to_consider = min(args.top_k, classes.shape[0])
        print('maskshape', (masks.shape))
        for i in range(num_dets_to_consider):
            msk = masks[i, :, :, None]
            mask = msk.view(1, masks.shape[1], masks.shape[2], 1)
            print('newmaskshape', (mask.shape))
            img_gpu_masked = img_gpu * (mask.sum(dim=0) >= 1).float().expand(
                -1, -1, 3)
            img_numpy_masked = (img_gpu_masked * 255).byte().cpu().numpy()
            cv2.imwrite('results/mask_image' + str(i) + '.jpg',
                        img_numpy_masked)
            print("Mask for the most visible car is generated")

    if args.display_fps:
        # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale,
                                         font_thickness)[0]

        img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6  # 1 - Box alpha

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale,
                    text_color, font_thickness, cv2.LINE_AA)

    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_best_bboxes_only == 'True':
                crop = img_numpy[y1:y2, x1:x2]
                cv2.imwrite('results/crop_object.png', crop)
                print("crop for the most visible car is generated")

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = '%s: %.2f' % (
                    _class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                 font_scale, font_thickness)[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1),
                              (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face,
                            font_scale, text_color, font_thickness,
                            cv2.LINE_AA)

    return img_numpy
Exemple #6
0
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    # print(img.shape)    # torch.Size([480, 360, 3])
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        torch.cuda.synchronize()

    with timer.env('Copy'):
        # 这里面取了最高分的k个,由传入参数设定
        classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]]
        # 获取到了最高k个的类别、分数、框,因此可以在这里进行修改
        # print(classes) # 类别说明 class 0: person, class 2: car
        # print(scores)
        # print(boxes)

        # 定义变量area_b,框的面积
        person_index = (classes == 0)
        # person_index表示了第几个框是否是person类别
        if person_index.any():
            # 存在person这个类别
            boxes = boxes[person_index]
            scores = scores[person_index]
            # 对person的框面积进行计算
            area = np.zeros(len(scores))
            for i in range(person_index.sum()):
                box = boxes[i]
                area[i] = (box[2] - box[0]) * (box[3] - box[1])
            # 对person的框面积进行筛选
            # 假设最小的人的面积: 25*100 像素,并约束阈值
            # valid_person_index = ((area >= 2500) and (scores < 0.01))
            valid_person_index = (area >= 2500)
            boxes = boxes[valid_person_index]
            scores = scores[valid_person_index]
            if valid_person_index.any():
                # 筛选面积和阈值之后还有person
                print('----- Person detected -----')
            else:
                # 筛选面积和阈值之后已经没有person了
                print('----- No person -----')
            num_dets_to_consider = valid_person_index.sum()
        else:
            # 直接就没有person类
            print('----- No person -----')
            num_dets_to_consider = 0

    if num_dets_to_consider == 0:
        # 没检测到人,返回原图
        return (img_gpu * 255).byte().cpu().numpy()

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()
    # img_numpy = (masks * 255).byte().cpu().numpy()

    # 检测到框并输出文字
    for j in reversed(range(num_dets_to_consider)):
        # 这个循环中的boxes, scores, classes都要减少一个维度
        x1, y1, x2, y2 = boxes[j][:]
        color = get_color(classes[j])
        score = scores[j]

        # 绘制检测框
        cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

        # 显示检测结果的文本
        _class = cfg.dataset.class_names[classes[j]]
        text_str = '%s: %.2f' % (_class, score
                                 )  # if args.display_scores else _class

        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale,
                                         font_thickness)[0]

        text_pt = (x1, y1 - 3)
        text_color = [255, 255, 255]

        cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4),
                      color, -1)
        cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale,
                    text_color, font_thickness, cv2.LINE_AA)

    return img_numpy
Exemple #7
0
def prep_display_for_video(dets_out,
                           img,
                           h=None,
                           w=None,
                           save_folder=None,
                           undo_transform=True,
                           class_color=False,
                           mask_alpha=0.45,
                           fps_str='',
                           override_args: Config = None):
    if undo_transform:
        assert w is not None and h is not None, "with undo_transform=True, w,h params must be specified!"
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    img_numpy_ori = (img_gpu * 255).byte().cpu().numpy()

    global args
    if override_args is not None:
        args = override_args

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:args.top_k]

        if cfg.eval_mask_branch:
            masks = t[3][idx]
        classes, scores, boxes = [x[idx] for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] if class_color else j) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    global frame_compare

    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        if frame_compare != save_folder[4]:
            masks = masks[:num_dets_to_consider, :, :, None]

            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            inv_alph_masks = masks * (-mask_alpha) + 1

            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if num_dets_to_consider == 0:
        if os.path.isdir(
                save_folder[0]) and save_folder[4] % args.video_fps == 0:
            file_name = save_folder[1] + "_%05d" % save_folder[4] + '.png'
            cv2.imwrite(os.path.join(save_folder[3], file_name), img_numpy)
            cv2.imwrite(os.path.join(save_folder[2], file_name), img_numpy_ori)

        return [img_numpy, img_numpy_ori]

    font_face = cv2.FONT_HERSHEY_DUPLEX
    font_scale = 0.6
    font_thickness = 1

    if args.display_text or args.display_bboxes:
        if frame_compare != save_folder[4]:
            frame_compare = save_folder[4]
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if args.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if args.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    # text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class
                    if args.display_scores:
                        text_str_class = f"{_class}"
                        text_str_score = f": {score:.2f}"

                        text_w_class, text_h_class = \
                            cv2.getTextSize(text_str_class, font_face, font_scale, font_thickness)[0]

                        img_numpy = ps.putBText(img_numpy,
                                                text_str_class,
                                                text_offset_x=x1,
                                                text_offset_y=y1,
                                                vspace=0,
                                                hspace=0,
                                                font=font_face,
                                                font_scale=0.6,
                                                thickness=font_thickness,
                                                alpha=0.7,
                                                background_RGB=color,
                                                text_RGB=(255, 255, 255))
                        img_numpy = ps.putBText(img_numpy,
                                                text_str_score,
                                                text_offset_x=x1,
                                                text_offset_y=y1 +
                                                text_h_class + 2,
                                                vspace=0,
                                                hspace=0,
                                                font=font_face,
                                                font_scale=0.6,
                                                thickness=font_thickness,
                                                alpha=0.7,
                                                background_RGB=color,
                                                text_RGB=(255, 255, 255))
                    else:
                        text_str_class = '%s' % (_class)

                        img_numpy = ps.putBText(img_numpy,
                                                text_str_class,
                                                text_offset_x=x1,
                                                text_offset_y=y1,
                                                vspace=0,
                                                hspace=0,
                                                font=font_face,
                                                font_scale=0.6,
                                                thickness=font_thickness,
                                                alpha=0.7,
                                                background_RGB=color,
                                                text_RGB=(255, 255, 255))

                    if save_folder[4] % args.video_fps == 0:
                        dist = ocr(img_numpy_ori)
                        result = save_folder[
                            4], f"{dist}", f"{_class}", f"{score:.2f}", f"{x1}", f"{y1}", f"{x2}", f"{y2}"
                        result_list.append(result)

            if os.path.isdir(
                    save_folder[0]) and save_folder[4] % args.video_fps == 0:
                file_name = save_folder[1] + "_%05d" % save_folder[4] + '.png'
                cv2.imwrite(os.path.join(save_folder[3], file_name), img_numpy)
                cv2.imwrite(os.path.join(save_folder[2], file_name),
                            img_numpy_ori)

            return [img_numpy, img_numpy_ori, result_list]

    return [img_numpy, img_numpy_ori]
Exemple #8
0
    def prep_display(self,
                     dets_out,
                     img,
                     h,
                     w,
                     undo_transform=True,
                     class_color=False,
                     mask_alpha=0.45):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape

        with timer.env('Postprocess'):
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=args.display_lincomb,
                            crop_masks=args.crop,
                            score_threshold=args.score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:args.top_k]
            classes, scores, boxes = [
                x[:args.top_k].cpu().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(args.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < args.score_threshold:
                num_dets_to_consider = j
                break

        if num_dets_to_consider == 0:
            # No detections found so just output the original image
            return (img_gpu * 255).byte().cpu().numpy()

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            global color_cache
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in color_cache[on_gpu]:
                return color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    color_cache[on_gpu][color_idx] = color
                return color

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        if args.display_masks and cfg.eval_mask_branch:
            # After this, mask is of size [num_dets, h, w, 1]
            masks = masks[:num_dets_to_consider, :, :, None]

            # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # This is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        if args.display_text or args.display_bboxes:
            str_ = ""
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if args.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if args.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    text_str = '%s: %.2f' % (
                        _class, score) if args.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1),
                                  (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

                    #pub = rospy.Publisher('chatter',String,queue_size=10)
                    #rate = rospy.Rate(50) #10hz
                    #str_ += text_str
            #rospy.loginfo(str_)
            #pub.publish(str_)
            #rate.sleep()

        return img_numpy
    def prep_display(self,
                     dets_out,
                     img,
                     h,
                     w,
                     undo_transform=True,
                     class_color=False,
                     mask_alpha=0.45,
                     batch_idx=0,
                     create_mask=False,
                     return_imgs=False):
        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape
            #print(h, " ", w)

        with timer.env('Postprocess'):
            t = postprocess(dets_out,
                            w,
                            h,
                            batch_idx,
                            visualize_lincomb=self.args.display_linecomb,
                            crop_masks=self.args.crop,
                            score_threshold=self.args.score_threshold)
            torch.cuda.synchronize()

        with timer.env('Copy'):
            if cfg.eval_mask_branch:
                masks = t[3][:self.args.top_k]
            classes, scores, boxes = [
                x[:self.args.top_k].cpu().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(self.args.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < self.args.score_threshold:
                num_dets_to_consider = j
                break

        idx_fil = []
        for i in range(num_dets_to_consider):
            if cfg.dataset.class_names[
                    classes[i]] == 'car' or cfg.dataset.class_names[
                        classes[i]] == 'truck':
                idx_fil.append(i)
        num_dets_to_consider = len(idx_fil)

        if num_dets_to_consider == 0:
            # no detection found so just output original image
            if not create_mask:
                return (img_gpu * 255).byte().cpu().numpy()
            elif return_imgs:
                return (img_gpu * 255).byte().cpu().numpy(), ImageResult(
                    None, None, None, np.zeros((h, w, 1), dtype='uint8'), 0)
            else:
                return ImageResult(None, None, None,
                                   np.zeros((h, w, 1), dtype='uint8'), 0)

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in self.color_cache[on_gpu]:
                return self.color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    self.color_cache[on_gpu][color_idx] = color
                return color

        if self.args.display_masks and cfg.eval_mask_branch:
            # after this, mask is of size [num_dets, h, w, l]
            #masks = masks[:num_dets_to_consider, :, :, None]
            #classes = classes[:num_dets_to_consider]
            #scores = scores[:num_dets_to_consider]
            #boxes = boxes[:num_dets_to_consider, :]

            masks = masks[idx_fil, :, :, None]
            classes = classes[idx_fil]
            scores = scores[idx_fil]
            boxes = boxes[idx_fil, :]

            if create_mask:
                mask_img = np.zeros((h, w, 1), dtype='uint8')
                for j in range(num_dets_to_consider):
                    mask_img += 10 * (j + 1) * masks[j].cpu().numpy().astype(
                        np.uint8)
                if not return_imgs:
                    return ImageResult(classes, scores, boxes, mask_img,
                                       num_dets_to_consider)

            # prepare the rgb image for each mask given their color (of size [num_dets, w, h, l])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # this is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        # then draw the stuff that needs to be done on cpu
        # note make sure this is a uint8 tensor or opencv will not anti aliaz text for wahtever reason
        img_numpy = (img_gpu * 255).byte().cpu().numpy()

        if self.args.display_text or self.args.display_bboxes:
            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if self.args.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if self.args.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    text_str = '%s: %.2f' % (
                        _class, score) if self.args.display_scores else _class
                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)
        return img_numpy, ImageResult(classes, scores, boxes, mask_img,
                                      num_dets_to_consider)
Exemple #10
0
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape
        # print("height:", h, "width:", w)

    with timer.env('Postprocess'):
        t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb,
                                        crop_masks        = args.crop,
                                        score_threshold   = args.score_threshold)
        torch.cuda.synchronize()

    with timer.env('Copy'):
        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][:args.top_k]
        classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    if num_dets_to_consider == 0:
        # No detections found so just output the original image
        return (img_gpu * 255).byte().cpu().numpy()

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    img_numpy = (img_gpu * 255).byte().cpu().numpy()
    if args.display_masks and cfg.eval_mask_branch:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    has_stacked = False
    if args.display_text or args.display_bboxes:
        bboxes = {
          'cp': [],
          'qp': [],
          'op': [],
          'tray': []
        }
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]
            print(classes[j], x1, y1, x2, y2)
            if (classes[j] == 2): # cp stacked
                bboxes['cp'].append([x1, y1, x2, y2])
                has_stacked = True
            elif (classes[j] == 5): #qp stacked
                bboxes['qp'].append([x1, y1, x2, y2])
                has_stacked = True
            elif (classes[j] == 8): #op stacked
                bboxes['op'].append([x1, y1, x2, y2])
                has_stacked = True
            elif (classes[j] == 9): #tray
                bboxes['tray'].append([x1, y1, x2, y2])            
                # print("Crop_tray:",crop_tray_img)
            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]
                text_color = [255, 255, 255]
                text_pt = (x1, y1 - 3)
                cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)

        # counting not visible patties
        # 1 - non overlapped, 2- overlapped, 3 - stacked
        # 1 - ch, 2 qp, 3 op
        n_patties = np.sum(classes!=9)
        cp_count = np.sum(classes==0) + np.sum(classes==1) 
        qp_count = np.sum(classes==3) + np.sum(classes==4)
        op_count = np.sum(classes==6) + np.sum(classes==7)
        visible_cp = np.sum(classes==0) + np.sum(classes==1) + np.sum(classes==2)
        visible_qp = np.sum(classes==3) + np.sum(classes==4) + np.sum(classes==5)
        visible_op = np.sum(classes==6) + np.sum(classes==7) + np.sum(classes==8)
        # find the maximum of the three by adding count to a list and choose model accordingly
        labels = [visible_cp, visible_qp, visible_op]
        max_index = labels.index(max(labels))
        stacked_boxes = []
        if max_index == 0:
            model_path = "weights/regressor/cp_stack_regressor"
            stacked_boxes = bboxes['cp']
        elif max_index == 1:
            model_path = "weights/regressor/qp_stack_regressor"
            stacked_boxes = bboxes['qp']
        else:
            model_path = "weights/regressor/op_stack_regressor"
            stacked_boxes = bboxes['op']
        # sort the bboxes accordingly
        stacked_boxes = sorted(stacked_boxes, key=lambda x: x[1])
        print('found stacked', len(stacked_boxes))
        #stacked_boxes = remove_overlapping(stacked_boxes)
        #### calculation of X
        if len(bboxes['tray']) > 0:
          prediction = 0
          tray_height = bboxes['tray'][0][3] - bboxes['tray'][0][1]
          loaded_model = pickle.load(open(model_path, 'rb'))
          for box in stacked_boxes:
              stack_height = box[3] - box[1]
              gap =  box[1] - bboxes['tray'][0][1]
              X = np.array((gap / tray_height, stack_height / tray_height))
              this_prediction = round(loaded_model.predict(X.reshape(1, -1))[0])
              print("this prediction: ", this_prediction)
              prediction = prediction + this_prediction
          # load the pickle model in memory, scale the input and feed it into the model
          if max_index == 0:
              cp_count = cp_count + prediction
          elif max_index == 1:
              qp_count = qp_count + prediction
          else:
              op_count = op_count + prediction
        # print("model_path:",model_path)
        count_text = "VISIBLE: {} ... CP: {}, QP: {}, OP: {}".format(n_patties, cp_count, qp_count, op_count)
        count_text_w, count_text_h = cv2.getTextSize(count_text, font_face, font_scale, font_thickness)[0]
        text_pt = (x1, y1 - 3)
        padding = 20
        count_text_pt = (w - count_text_w - padding, h - count_text_h)
        cv2.putText(img_numpy, count_text, count_text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA )
        # print(count_text)
    return img_numpy
Exemple #11
0
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    # print(img.shape)    # torch.Size([480, 360, 3])
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        torch.cuda.synchronize()

    with timer.env('Copy'):
        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][:args.top_k]
            # 这里面取了最高分的k个,由传入参数设定
        classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]]
        # 获取到了最高k个的类别、分数、框,因此可以在这里进行修改
        # print(classes) # 类别说明 class 0: person, class 2: car
        # print(scores)
        # print(boxes)
        """
        index_person = 0
        person_found = True
        # 遍历类别数组,如果遇到person就跳出
        while (classes[index_person]):
            # 这样当class是0的时候,检测的就是person,就记录下index
            index_person += 1
            # 如果整个图片都没找到person
            if (index_person == args.top_k):
                person_found = False
                break
        if (not person_found):
            print('----- No person -----')
            num_dets_to_consider = 0
        else:
            # 这里加入了一个修改,把除了person之外的其他检测结果屏蔽掉
            classes_all, scores_all, boxes_all = classes, scores, boxes
            classes = classes_all[index_person]
            scores = scores_all[index_person]
            boxes = boxes_all[index_person]
            num_dets_to_consider = 1
            # print(masks.shape) # torch.Size([10, 480, 360])
            masks_all = masks
            masks = masks_all[index_person]
        """
        # 之前的方法有个BUG,就是对小person的score大于主要person时,输出错误的结果,修改如下
        # 定义变量area_b,框的面积
        person_index = (classes == 0)
        # person_index表示了第几个框是否是person类别
        if person_index.any():
            # 存在person这个类别
            # 如果只检测到1个人,直接取这个人就可以
            classes = classes[person_index]
            scores = scores[person_index]
            boxes = boxes[person_index]
            masks = masks[person_index]
            if (person_index.sum() > 1):
                # 检测到多个人,需要取最大面积框
                # 之前已经把person类过滤出来了,还需要逐个算面积
                area = classes
                for i in range(person_index.sum()):
                    box = boxes[i]
                    area[i] = (box[2] - box[0]) * (box[3] - box[1])
                # 最后再从person这类里面的框中挑选出最大面积的那个
                person_index = (area == area.max())
                classes = classes[person_index]
                scores = scores[person_index]
                boxes = boxes[person_index]
                masks = masks[person_index]
            num_dets_to_consider = 1
        else:
            # 没有person类
            print('----- No person -----')
            num_dets_to_consider = 0
        # raise Exception("Keyboard~")

    # 因为只保留一个框,因此不进行阈值测试
    """
    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break
    """

    if num_dets_to_consider == 0:
        # No detections found so just output the original image
        return (img_gpu * 255).byte().cpu().numpy()

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch:
        # After this, mask is of size [num_dets, h, w, 1]
        # 这里需要删掉第一个维度
        # masks = masks[:num_dets_to_consider, :, :, None]
        # print(masks.shape)    # torch.Size([1, 480, 360])
        if (num_dets_to_consider):
            masks = masks[:, :, :, None]
        else:
            masks = []
        # debug settings
        # print(masks.shape)        # torch.Size([1, 480, 360, 1])
        # mask_img = np.reshape(masks.cpu().numpy(), [480, 360])
        # print(np.max(mask_img))   # >>> 1.0
        # cv2.namedWindow('Test', cv2.WINDOW_AUTOSIZE)
        # cv2.imshow('Test', mask_img)
        # cv2.waitKey(0)
        # cv2.destroyAllWindows()
        # cv2.imwrite('test.png', mask_img)

        mask_img = (masks * 255).byte().cpu().numpy()
        # print(masks.shape)
        # print(mask_img.shape)     # (480, 360, 1)
        # 这里需要删掉第一个维度
        mask_img = mask_img[0, :, :, 0]

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([
            get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                           dim=0)
        # debug
        # print(masks.repeat(1, 1, 1, 3).shape)
        # print(colors.shape)     # torch.Size([1, 1, 1, 3])
        # masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
        # print(np.max(masks_color.cpu().numpy()))

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]

        # 这里的benchsize全都改成1了
        masks_color = masks.repeat(1, 1, 1,
                                   3) * colors * mask_alpha  # 注意这个时候还是float小数

        # 这句不用管,反正执行不到,无视就行
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        masks_color_summand = masks_color[0]
        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
        # debug看一下图像有没有问题,看完再注释掉
        # cv2.namedWindow('Debug', cv2.WINDOW_AUTOSIZE)
        # cv2.imshow('Debug', img_gpu.cpu().numpy())
        # cv2.waitKey(0)
        # cv2.destroyAllWindows()   # --- 貌似一直到这都没毛病的~

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()
    # img_numpy = (masks * 255).byte().cpu().numpy()

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            # 这个循环中的boxes, scores, classes都要减少一个维度
            x1, y1, x2, y2 = boxes[:]
            color = get_color(0)
            score = scores

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes]
                text_str = '%s: %.2f' % (
                    _class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                 font_scale, font_thickness)[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1),
                              (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face,
                            font_scale, text_color, font_thickness,
                            cv2.LINE_AA)

    img_crop = img.byte().cpu().numpy()
    # print(np.max(mask_img))
    for i in range(3):
        img_crop[:, :, i] = img_crop[:, :, i] * (mask_img // 255)
    # debug看一下图像有没有问题,看完再注释掉
    # cv2.namedWindow('Debug', cv2.WINDOW_AUTOSIZE)
    # cv2.imshow('Debug', img_crop)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()
    return img_numpy, mask_img, img_crop
    def prep_display(self,
                     dets_out,
                     img,
                     h,
                     w,
                     undo_transform=True,
                     class_color=False,
                     mask_alpha=0.45,
                     fps_str=''):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """

        lineThickness = 2

        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape

        with timer.env('Postprocess'):
            save = cfg.rescore_bbox
            cfg.rescore_bbox = True
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=self.display_lincomb,
                            crop_masks=self.crop,
                            score_threshold=self.score_threshold)
            cfg.rescore_bbox = save

        with timer.env('Copy'):
            # idx = t[1].argsort(0, descending=True)[top_k]
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:self.top_k]

            classes, scores, boxes = [
                x[:self.top_k].cpu().detach().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(self.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < self.score_threshold:
                num_dets_to_consider = j
                break

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            global color_cache
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in color_cache[on_gpu]:
                return color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    color_cache[on_gpu][color_idx] = color
                return color

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        if self.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
            # After this, mask is of size [num_dets, h, w, 1]
            masks = masks[:num_dets_to_consider, :, :, None]
            # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # This is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        if self.display_fps:
            # Draw the box for the fps on the GPU
            font_face = cv2.FONT_HERSHEY_DUPLEX
            font_scale = 0.6
            font_thickness = 1

            text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale,
                                             font_thickness)[0]

            img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6  # 1 - Box alpha

        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().detach().numpy()

        if self.display_fps:
            # Draw the text on the CPU
            text_pt = (4, text_h + 2)
            text_color = [255, 255, 255]

            cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale,
                        text_color, font_thickness, cv2.LINE_AA)

        if num_dets_to_consider == 0:
            return img_numpy

        if self.display_text or self.display_bboxes:
            distance_boxes = []

            def all_subsets(ss):
                return chain(
                    *map(lambda x: combinations(ss, x), range(0,
                                                              len(ss) + 1)))

            def draw_distance(boxes):
                """
                    input : boxes(type=list)
                    Make all possible combinations between the detected boxes of persons
                    perform distance measurement between the boxes to measure distancing
                
                """
                red_counter = 0  ## Countting people who are in high risk
                green_counter = 0
                for subset in all_subsets(boxes):
                    if len(subset) == 2:
                        a = np.array((subset[0][2], subset[0][3]))
                        b = np.array((subset[1][2], subset[1][3]))
                        dist = np.linalg.norm(
                            a - b
                        )  ## Eucledian distance if you want differnt ways to measure distance b/w two boxes you can use the following options
                        # dist = spatial.distance.cosine(a, b)
                        # # print ('Eucledian distance is version-1', dist)
                        # # print ('Eucledian distance is', spatial.distance.euclidean(a, b))
                        # print ('Cosine distance is', dist)
                        if dist < 250:
                            red_counter += len(subset)
                            cv2.line(img_numpy, (subset[0][2], subset[0][3]),
                                     (subset[1][2], subset[1][3]), (0, 0, 255),
                                     lineThickness)

                        elif dist < 300:
                            green_counter += len(subset)
                            cv2.line(img_numpy, (subset[0][2], subset[0][3]),
                                     (subset[1][2], subset[1][3]), (0, 255, 0),
                                     lineThickness)
                    log["total_person_in_red_zone"] = red_counter // 2
                    log["total_person_in_green_zone"] = green_counter // 2
                    # gc.collect()

            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if self.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if self.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    if _class == "person":
                        log["total_person"] = num_dets_to_consider
                        distance_boxes.append(boxes[j, :].tolist())
                        draw_distance(distance_boxes)

                    text_str = '%s: %.2f' % (
                        _class, score) if self.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1),
                                  (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

        return img_numpy
Exemple #13
0
def prep_display_mod(dets_out,
                     img,
                     h,
                     w,
                     depth_map,
                     rel_depth,
                     undo_transform=True,
                     mask_alpha=1.0):  # was mask_alpha=0.45
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    score_threshold = 0.15
    top_k = 15

    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out, w, h, score_threshold=score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:top_k]  # top_k = 15

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]
    num_dets_to_consider = min(top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < score_threshold:
            num_dets_to_consider = j
            break
    classes = classes[:num_dets_to_consider]  # added

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        # color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)          #original
        color_idx = j  # black
        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if num_dets_to_consider > 0:  # was ...>0
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]
        # print("masks_og.shape", masks.shape)

        # begin added       // filter out the person masks and class indices
        people_masks_idxs = []
        classes_to_mask = []
        x = [
        ]  # save the center points of the boxes in the same order as the masks
        y = []
        for i, j in enumerate(classes):
            if j == 0:  # j = 0 for person class            # filter out only people's masks
                people_masks_idxs.append(i)
                classes_to_mask.append(j)
                x1, y1, x2, y2 = boxes[i, :]
                x.append(int((x1 + x2) / 2))
                y.append(int((y1 + y2) / 2))
        num_dets_to_consider = len(classes_to_mask)

        if num_dets_to_consider == 0:  # if no people, return black image
            return ((img_gpu * 0).byte().cpu().numpy()
                    )  # make it black before returning

        x_arr = np.array(y)
        y_arr = np.array(x)

        obj_depths = []
        for i in range(x_arr.size):  # store the depths of the people
            obj_depths.append(depth_map[x_arr[i], y_arr[i], 0])
            # print("depth at object i: ", x_arr[i], y_arr[i], " : ", obj_depths[i])

        obj_depths = np.array(obj_depths)
        people_masks_idxs = np.array(people_masks_idxs)
        sorted_idx_by_depth = np.array(
            np.argsort(-obj_depths)
        )  # sort the masks and people_loc by depth in Descending order
        # x = x[sorted_idx_by_depth]
        # y = y[sorted_idx_by_depth]
        obj_depths = obj_depths[sorted_idx_by_depth]
        people_masks_idxs = people_masks_idxs[sorted_idx_by_depth]

        depth_thres = obj_depths[0] * (
            1.0 - rel_depth
        )  # filter out the people within the depth_threshold
        people_masks_idxs = people_masks_idxs[[
            i for i, v in enumerate(obj_depths) if v >= depth_thres
        ]]

        np.array(people_masks_idxs).T.tolist()
        masks = masks[people_masks_idxs]
        num_dets_to_consider = len(people_masks_idxs)

        colors = torch.cat(
            [get_color(0, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)],
            dim=0)
        tmp = masks[0]
        if num_dets_to_consider > 1:
            for msk in masks[1:]:
                tmp = tmp + msk
        # print("masks.shape: ", masks.shape)
        # print("tmp.shape: ", (tmp.unsqueeze(0)).shape)
        masks = tmp.unsqueeze(0)
        masks[masks != 0.0] = 1.0

        inv_alph_masks = masks * (-mask_alpha) + 1
        masks_color = (inv_alph_masks.repeat(1, 1, 1, 3)) * colors * mask_alpha
        inv_alph_masks = masks.repeat(1, 1, 1, 3)

        # inv_alph_masks = masks
        # inv_alph_masks = masks
        # print("masks : ", masks)
        # masks = (masks-1.)*-1.
        # print("masks : ", masks)
        # inv_alph_masks = masks * (-mask_alpha)+1
        # masks_color = masks_color*0.5
        # end added

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        # masks_color_summand = masks_color[0]
        # if num_dets_to_consider > 1:
        #     inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0)
        #     masks_color_cumul = masks_color[1:] * inv_alph_cumul
        #     masks_color_summand += masks_color_cumul.sum(dim=0)

        # img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand  # original
        # print("inv_alph_masks.shape: ", (torch.squeeze(inv_alph_masks,0)).shape)
        # print("masks_color.shape: ", (torch.squeeze(masks_color,0)).shape)
        img_gpu = img_gpu * torch.squeeze(inv_alph_masks, 0) + torch.squeeze(
            masks_color, 0)  # added
        # img_gpu = img_gpu

    img_numpy = (img_gpu * 255.0).byte().cpu().numpy()

    return img_numpy
Exemple #14
0
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45,
                 fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    global first_frame, old_obj_info
    name = []
    mask_img = []
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):

        #idx = t[1].argsort(0, descending=True)[:args.top_k]
        idx1 = t[1].argsort()
        idx = idx1.argsort()

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
            mask_picture = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]
        for i in range(len(classes)):
            name.append(cfg.dataset.class_names[classes[i]])
            mask_img.append(mask_picture[i:i + 1, :, :, None])

        #obj_info, obj_num = data_save(mask_img, classes, scores, boxes)
        start = time.time()
        obj_info, obj_num = sort_info.data_save(mask_img, classes, name,
                                                scores, boxes, first_frame,
                                                old_obj_info)
        end = time.time()
        print('aaaaaaaaaa', end - start)
        first_frame = True

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (obj_info[j][0] * 5 if class_color else j *
                     5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice

    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]

        masks = masks[:num_dets_to_consider, :, :, None]
        #img_gpu = img_gpu * (masks.sum(dim=0) > 0.5).float()  #only show mask
        #img_gpu = img_gpu * masks[0]

        #mike0225
        mask_img = img_gpu * (masks.sum(dim=0) > 0.5).float()  #0209
        global mask_numpy
        mask_numpy = (mask_img * 255).byte().cpu().numpy()  #0209
        mask_numpy = cv2.cvtColor(mask_numpy, cv2.COLOR_BGR2GRAY)

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])

        colors = torch.cat([
            get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    if args.display_fps:
        # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale,
                                         font_thickness)[0]

        img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6  # 1 - Box alpha
    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale,
                    text_color, font_thickness, cv2.LINE_AA)

    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        global frame_count, state_pre, flag, predict_pos, centerX, centerY, degree, mask_color, mask_flag, pub_Flag
        frame_count += 1

        pub_array_msg = obj_array()
        for j in range(obj_num):
            global img_num, temp_x, temp_y, yhat
            if obj_info[j][2] != 0:

                #0502-------------------------------------------------------------------
                mask_image = img_gpu * (obj_info[j][3].sum(dim=0) >
                                        0.5).float()
                mask_numpy1 = (mask_image * 255).byte().cpu().numpy()
                mask_color = cv2.cvtColor(mask_numpy1, cv2.COLOR_BGR2GRAY)
                '''
                kernel = np.ones((5,5), np.uint8)
                mask_color = cv2.erode(mask_color, kernel, iterations = 1)
                mask_color = cv2.dilate(mask_color, kernel, iterations = 1)
                '''
                mask_flag = False
                #-------------------------------------------------------------------------

                if frame_count % 20 == 3:
                    #-----------------------------
                    obj_info[j][5].append(mask_color)
                    mask_flag = True
                    #cv2.imwrite('/home/chien/123/test_{}.jpg'.format(j),mask_numpy1)

                    if len(obj_info[j][5]) > 2:
                        '''
                        for k in range(len(obj_info[j][5])):
                            cv2.imwrite('/home/chien/123/test_{}.jpg'.format(k),obj_info[j][5][k])
                        '''

                        obj_msg = obj_infomsg()
                        obj_msg.id = obj_info[j][0]
                        obj_msg.object_name = obj_info[j][1]

                        imagedata1 = np.array(obj_info[j][5])

                        imagedata1 = imagedata1.reshape((-1, 3, 480, 640, 1))
                        imagedata1 = imagedata1 / 255.
                        start = time.time()
                        yhat = model.predict(imagedata1, verbose=0)
                        end = time.time()
                        '''
                        print(end-start)
                        print('---------------')
                        '''
                        if obj_info[j][6] == []:
                            for i in range(5):
                                x1 = yhat[1][0][i][1] * 320 + 320
                                y1 = yhat[1][0][i][2] * 240 + 240
                                degree1 = arctan_recovery(
                                    yhat[1][0][i][3], yhat[1][0][i][4])
                                temp_x1, temp_y1 = trans_degree(
                                    x1, y1, degree1)
                                obj_info[j][6].append(
                                    (x1, y1, temp_x1, temp_y1))

                        else:
                            for i in range(5):
                                x1 = yhat[1][0][i][1] * 320 + 320
                                y1 = yhat[1][0][i][2] * 240 + 240
                                degree1 = arctan_recovery(
                                    yhat[1][0][i][3], yhat[1][0][i][4])
                                temp_x1, temp_y1 = trans_degree(
                                    x1, y1, degree1)
                                obj_info[j][6][i] = (x1, y1, temp_x1, temp_y1)
                            '''
                            obj_info[j][6].pop(0)
                            x1 = yhat[1][0][4][1]*320+320
                            y1 = yhat[1][0][4][2]*240+240
                            degree1 = arctan_recovery(yhat[1][0][4][3],yhat[1][0][4][4])
                            temp_x1,temp_y1=trans_degree(x1,y1,degree1)
                            obj_info[j][6].append((x1,y1,temp_x1,temp_y1))
                            '''
                        obj_msg.x = yhat[1][0][4][
                            1] * 320 + 320  #yhat[1][0][3][1]*320+320
                        obj_msg.y = yhat[1][0][4][2] * 240 + 240
                        obj_msg.degree = arctan_recovery(
                            yhat[1][0][4][3], yhat[1][0][4][4])
                        tx1, ty1 = trans_degree(obj_msg.x, obj_msg.y,
                                                obj_msg.degree)
                        '''
                        print( obj_msg.degree)
                        cv2.circle(img_numpy, (int(obj_msg.x),int(obj_msg.y)),5,(0, 0, 255),5)
                        cv2.line(img_numpy,(int(obj_msg.x+tx1),int(obj_msg.y+ty1)),(int(obj_msg.x-tx1),int(obj_msg.y-ty1)),(0,0,255),5)
                        '''
                        #print( obj_msg.degree)
                        pub_array_msg.Obj_list.append(obj_msg)

                        pub_Flag = True
                        obj_info[j][5].pop(0)  #0->1
                    '''
                    global pointx,pointy,real_pointx,real_pointy, point_count ,use_count
                    use_count+=1
                    if use_count >=10:
                        pointx.append(obj_info[j][6][4][0])
                        pointy.append(obj_info[j][6][4][1])
                        point_count += 1
                        if point_count >= 5:
                            real_pointx.append(yhat[0][0][2][1]*320+320)
                            real_pointy.append(yhat[0][0][2][2]*240+240)
                    '''

                if obj_info[j][6] != []:
                    for i in range(5):
                        px = obj_info[j][6][i][0]
                        py = obj_info[j][6][i][1]
                        temp_px = obj_info[j][6][i][2]
                        temp_py = obj_info[j][6][i][3]

                        cv2.circle(img_numpy, (int(px), int(py)), 5,
                                   (0, 0, 255), 5)
                        cv2.line(img_numpy,
                                 (int(px + temp_px), int(py + temp_py)),
                                 (int(px - temp_px), int(py - temp_py)),
                                 (0, 0, 255), 5)

                color = get_color(obj_info[j][0])
                score = obj_info[j][3]

                if args.display_bboxes:
                    cv2.rectangle(img_numpy,
                                  (obj_info[j][4][2], obj_info[j][4][4]),
                                  (obj_info[j][4][3], obj_info[j][4][5]),
                                  color, 1)

                if args.display_text:

                    _class = obj_info[j][1]

                    #text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class
                    text_str = '%s: %s' % (obj_info[j][0], _class
                                           ) if args.display_scores else _class
                    #text_str = '%s: %s' % (_class, obj_info[j][2]) if args.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (obj_info[j][4][2], obj_info[j][4][4] - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy,
                                  (obj_info[j][4][2], obj_info[j][4][4]),
                                  (obj_info[j][4][2] + text_w,
                                   obj_info[j][4][4] - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

        if pub_Flag == True:
            #print(pub_array_msg)
            array_pub.publish(pub_array_msg)
        pub_Flag = False
        old_obj_info = obj_info
    return img_numpy
Exemple #15
0
def prep_display(net,
                 dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45,
                 fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True

        preds = net.detect(
            {
                'loc': dets_out[0],
                'conf': dets_out[1],
                'mask': dets_out[2],
                'priors': dets_out[3],
                'proto': dets_out[4]
            }, net)

        t = postprocess(preds,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:args.top_k]

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    img_tmp = torch.zeros(img_gpu.shape)

    for i in range(num_dets_to_consider):
        cfg.dataset.class_names[classes[i]]
        mask = masks[i]
        classy = cfg.dataset.class_names[classes[i]]

        if args.classes == None or classy in args.classes:
            img_tmp[mask == 1] = img_gpu[mask == 1]

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_tmp * 255).byte().cpu().numpy()

    if num_dets_to_consider == 0:
        return img_numpy

    return img_numpy
Exemple #16
0
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """


    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape
    
    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb,
                                        crop_masks        = args.crop,
                                        score_threshold   = args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:args.top_k]
        
        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]

        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

        for index, val in enumerate(zip(classes, scores, boxes)):
            print(classes[index], boxes[index], scores[index],'index', index)


    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)
        
        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:1, :, :, None]
        img_gpu = (masks.sum(dim=0) >= 1).float().expand(-1, -1, 3).contiguous()
    else:
        img_gpu *= 0

    if args.display_fps:
            # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0]

        img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha


    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    #cv2.imwrite('images/output/', args.images + '.jpg', img_numpy)

    print(args.image, args.images)

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
    
    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]

                text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)


            
    
    return img_numpy
Exemple #17
0
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45,
                 fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):

        #idx = t[1].argsort(0, descending=True)[:args.top_k]
        idx1 = t[1].argsort()
        idx = idx1.argsort()

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
            mask_picture = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

        obj_info, obj_num = data_save(mask_picture, classes, scores, boxes)

        #print(classes)
        #print('---------')
        #np.save('masks.npy', masks.cpu().numpy())
        #print(obj_info[0][4][0], obj_info[0][4][1])
    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (obj_info[j][0] * 5 if class_color else j *
                     5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]

        masks = masks[:num_dets_to_consider, :, :, None]
        #img_gpu = img_gpu * (masks.sum(dim=0) > 0.5).float()  #only show mask
        #img_gpu = img_gpu * masks[0]

        #mike0225
        mask_img = img_gpu * (masks.sum(dim=0) > 0.5).float()  #0209
        global mask_numpy
        mask_numpy = (mask_img * 255).byte().cpu().numpy()  #0209
        mask_numpy = cv2.cvtColor(mask_numpy, cv2.COLOR_BGR2GRAY)

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])

        colors = torch.cat([
            get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    if args.display_fps:
        # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale,
                                         font_thickness)[0]

        img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6  # 1 - Box alpha

        #mask_img[0:text_h+8, 0:text_w+8] *= 0.6 #0209
    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale,
                    text_color, font_thickness, cv2.LINE_AA)

    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        global frame_count, state_pre, flag, predict_pos, centerX, centerY, preX, preY, degree
        frame_count += 1

        for j in range(obj_num):
            #mask_info = obj_info[j][5]

            global mask_numpy1, img_num, temp_x, temp_y
            mask_image = mask_picture[j:j + 1, :, :, None]
            mask_image = img_gpu * (mask_image.sum(dim=0) > 0.5).float()  #0209
            mask_numpy1 = (mask_image * 255).byte().cpu().numpy()  #0209
            mask_numpy1 = cv2.cvtColor(mask_numpy1, cv2.COLOR_BGR2GRAY)

            if obj_info[j][2] == 1:
                '''
                if frame_count%10 == 3:
                    
                    centerX.append(obj_info[j][4][0])
                    centerY.append(obj_info[j][4][1])

                    predict_pos[j][0].append(obj_info[j][4][0])
                    predict_pos[j][1].append(obj_info[j][4][1])
                    
                    if predict_pos[j][0][0] == 0:
                        predict_pos[j][0].pop(0)
                    if predict_pos[j][1][0] == 0:
                        predict_pos[j][1].pop(0) 

                    if len(predict_pos[j][0]) > 2:
                        #predict_pos[j][2] = predict_next( predict_pos[j][0], predict_pos[j][1]) 

                        degree, predict_pos[j][2] = predict1_next( mask_numpy1, predict_pos[j][0], predict_pos[j][1]) # test0227
                        temp_x,temp_y=trans_degree(predict_pos[j][2][0,4,0],predict_pos[j][2][0,4,1],degree)
                        
                        
                        predict_pos[j][0].pop(0) #0->1
                        predict_pos[j][1].pop(0)
  
                if state_pre == True:
                    
                    if predict_pos[j][2] != []:
                        
                        for i in range(5):
                            if (predict_pos[j][2][0,i,0]) > 640 or (predict_pos[j][2][0,i,1]) > 480:
                                pass
                            else:    
                                pass
                                #cv2.circle(img_numpy,(predict_pos[j][2][0,i,0],predict_pos[j][2][0,i,1]),5,(0,0,213),-1)      
                        cv2.line(img_numpy,(int(obj_info[j][4][0]+temp_x),int(obj_info[j][4][1]+temp_y)),(int(obj_info[j][4][0]-temp_x),int(obj_info[j][4][1]-temp_y)),(0,0,255),3)
                        
                        if flag ==False:
                            for i in range(5):
                                preX.append(predict_pos[j][2][0,i,0])
                                preY.append(predict_pos[j][2][0,i,1])
                                #preY.append(num)
                        else:
                            preX.append(predict_pos[j][2][0,4,0])
                            preY.append(predict_pos[j][2][0,4,1])
                            #preY.append(num)

                        flag = True
                '''
                color = get_color(obj_info[j][0])
                score = obj_info[j][3]

                if args.display_bboxes:
                    cv2.rectangle(img_numpy,
                                  (obj_info[j][4][2], obj_info[j][4][4]),
                                  (obj_info[j][4][3], obj_info[j][4][5]),
                                  color, 1)

                if args.display_text:

                    _class = obj_info[j][1]

                    #text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class
                    text_str = '%s: %s' % (obj_info[j][0], _class
                                           ) if args.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (obj_info[j][4][2], obj_info[j][4][4] - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy,
                                  (obj_info[j][4][2], obj_info[j][4][4]),
                                  (obj_info[j][4][2] + text_w,
                                   obj_info[j][4][4] - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)
            else:
                for i in range(2):
                    predict_pos[j][i] = [0]
                predict_pos[j][2] = []

    return img_numpy
Exemple #18
0
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape
        # print("height:", h, "width:", w)

    with timer.env('Postprocess'):
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        torch.cuda.synchronize()

    with timer.env('Copy'):
        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][:args.top_k]
        classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    if num_dets_to_consider == 0:
        # No detections found so just output the original image
        return (img_gpu * 255).byte().cpu().numpy()

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([
            get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    from skimage.feature import hog

    def bin_spatial(img, color_space='RGB', size=(32, 32)):
        # Convert image to new color space (if specified)
        if color_space != 'RGB':
            if color_space == 'HSV':
                feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
            elif color_space == 'LUV':
                feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2LUV)
            elif color_space == 'HLS':
                feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
            elif color_space == 'YUV':
                feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
            elif color_space == 'YCrCb':
                feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
        else:
            feature_image = np.copy(img)
        # Use cv2.resize().ravel() to create the feature vector
        #    small_img = cv2.resize(feature_image, (32, 32))
        features = feature_image.ravel()  # Remove this line!
        # Return the feature vector
        return features

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason

    img_numpy = (img_gpu * 255).byte().cpu().numpy()
    crop_tray_img = img_numpy.copy()
    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if classes[j] == 6:  #_class == "tray":
                crop_tray_img = crop_tray_img[y1:y2, x1:x2]
                # process tray cropped image using regression model to predict hidden patties
                height, width = crop_tray_img.shape[0], crop_tray_img.shape[1]
                aspect_ratio = height / width
                height = int(aspect_ratio * 256)
                # print(type(crop_tray_img))
                crop_tray_img = np.array(crop_tray_img, dtype='uint8')
                crop_tray_img = Image.fromarray(crop_tray_img).resize(
                    (256, height), Image.BICUBIC)
                crop_tray_img = np.asarray(crop_tray_img, dtype=float)
                # print("Crop_tray:",crop_tray_img)
                # print("Crop_tray shape:",crop_tray_img.shape)

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = '%s: %.2f' % (
                    _class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                 font_scale, font_thickness)[0]
                text_color = [255, 255, 255]
                text_pt = (x1, y1 - 3)
                cv2.rectangle(img_numpy, (x1, y1),
                              (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face,
                            font_scale, text_color, font_thickness,
                            cv2.LINE_AA)

        # counting not visible patties
        n_patties = np.sum(classes != 6)
        chicken_count = np.sum(classes == 0) + np.sum(classes == 1)
        ham_quarter_count = np.sum(classes == 2) + np.sum(classes == 3)
        ham_1by10_count = np.sum(classes == 4) + np.sum(classes == 5)

        # find the maximum of the three by adding count to a list and choose model accordingly
        labels = [chicken_count, ham_quarter_count, ham_1by10_count]
        max_index = labels.index(max(labels))
        if max_index == 0:
            model_path = "weights/regressor/cp_regressor_crop"
        elif max_index == 1:
            model_path = "weights/regressor/qp_regressor_crop"
        else:
            model_path = "weights/regressor/op_regressor_crop"

        # load the pickle model in memory, scale the input and feed it into the model
        import pickle
        from sklearn.preprocessing import StandardScaler
        loaded_model = pickle.load(open(model_path, 'rb'))
        n_bin = 32
        originalFeatures = bin_spatial(crop_tray_img)
        originalFeatures, _ = np.histogram(originalFeatures,
                                           n_bin,
                                           density=True)
        originalFeatures = np.array([originalFeatures])

        sc_X = StandardScaler()
        originalFeatures = sc_X.fit_transform(
            originalFeatures.reshape(n_bin, 1)).reshape(1, n_bin)
        # print("originalFeatures:",originalFeatures)
        prediction = round(loaded_model.predict(originalFeatures)[0])
        # print("model_path:",model_path)
        if max_index == 0 and chicken_count < 4:
            prediction = chicken_count
        count_text = "Calculated: {} Visible Patties: {} -> Chicken: {}, Ham Quarter: {}, HAM 1by10: {}".format(
            prediction, n_patties, chicken_count, ham_quarter_count,
            ham_1by10_count)
        count_text_w, count_text_h = cv2.getTextSize(count_text, font_face,
                                                     font_scale,
                                                     font_thickness)[0]
        text_pt = (x1, y1 - 3)
        count_text_pt = (w - count_text_w, h - count_text_h)
        cv2.putText(img_numpy, count_text, count_text_pt, font_face,
                    font_scale, text_color, font_thickness, cv2.LINE_AA)
        # print(count_text)
    return img_numpy
Exemple #19
0
def prep_display_for_img(dets_out,
                         img,
                         h=None,
                         w=None,
                         undo_transform=True,
                         class_color=False,
                         mask_alpha=0.45):
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:args.top_k]

        if cfg.eval_mask_branch:
            masks = t[3][idx]
        classes, scores, boxes = [x[idx] for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] if class_color else j) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        masks = masks[:num_dets_to_consider, :, :, None]

        colors = torch.cat([
            get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        inv_alph_masks = masks * (-mask_alpha) + 1

        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if num_dets_to_consider == 0:
        return img_numpy

    font_face = cv2.FONT_HERSHEY_DUPLEX
    font_scale = 0.6
    font_thickness = 1

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                if args.display_scores:
                    text_str_class = f"{_class}"
                    text_str_score = f": {score:.2f}"

                    text_w_class, text_h_class = cv2.getTextSize(
                        text_str_class, font_face, font_scale,
                        font_thickness)[0]

                    img_numpy = ps.putBText(img_numpy,
                                            text_str_class,
                                            text_offset_x=x1,
                                            text_offset_y=y1,
                                            vspace=0,
                                            hspace=0,
                                            font=font_face,
                                            font_scale=0.6,
                                            thickness=font_thickness,
                                            alpha=0.7,
                                            background_RGB=color,
                                            text_RGB=(255, 255, 255))
                    img_numpy = ps.putBText(img_numpy,
                                            text_str_score,
                                            text_offset_x=x1,
                                            text_offset_y=y1 + text_h_class +
                                            2,
                                            vspace=0,
                                            hspace=0,
                                            font=font_face,
                                            font_scale=0.6,
                                            thickness=font_thickness,
                                            alpha=0.7,
                                            background_RGB=color,
                                            text_RGB=(255, 255, 255))
                else:
                    text_str_class = '%s' % _class

                    img_numpy = ps.putBText(img_numpy,
                                            text_str_class,
                                            text_offset_x=x1,
                                            text_offset_y=y1,
                                            vspace=0,
                                            hspace=0,
                                            font=font_face,
                                            font_scale=0.6,
                                            thickness=font_thickness,
                                            alpha=0.7,
                                            background_RGB=color,
                                            text_RGB=(255, 255, 255))

    return img_numpy
Exemple #20
0
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape
    
    with timer.env('Postprocess'):
        t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb,
                                        crop_masks        = args.crop,
                                        score_threshold   = args.score_threshold)
        torch.cuda.synchronize()
    all_pred = []
    #print(len(t))
    #print(type(t))
    #print("classes")
    #print(t[0].cpu().numpy())
    #print(len(t[0].cpu().numpy()))
    #print(t[1].cpu().numpy())
    # bbox print(t[2].cpu().numpy())
    #print(t[3].cpu().numpy())
    # classes, scores, boxes, masks
    
    categories = t[0].cpu().numpy()
    scores = t[1].cpu().numpy()
    masks = t[3].cpu().numpy()
    #print(masks.shape)
    n_instances = len(scores)    #if len(categories) > 0: # If any objects are detected in this image
    
    for i in range(n_instances): # Loop all instances
        # save information of the instance in a dictionary then append on all_pred list
        pred = {}
        #pred['image_id'] = imgid # this imgid must be same as the key of test.json
        pred['category_id'] = int(categories[i]) + 1
        pred['segmentation'] = binary_mask_to_rle(masks[i,:,:]) # save binary mask to RLE, e.g. 512x512 -> rle
        pred['score'] = float(scores[i])
        all_pred.append(pred)
        
    with timer.env('Copy'):
        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][:args.top_k]
        classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)
        
        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]
        
        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1
        
        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
    
    if args.display_fps:
            # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0]

        img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha


    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
    
    if num_dets_to_consider == 0:
        return img_numpy, all_pred

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
            
    
    return img_numpy, all_pred
Exemple #21
0
  def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out, w, h, visualize_lincomb = self.args.display_lincomb,
                                        crop_masks        = self.args.crop,
                                        score_threshold   = self.args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:self.args.top_k]

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]
        self.classes, self.scores, self.boxes = classes, scores, boxes

    num_dets_to_consider = min(self.args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < self.args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in self.color_cache[on_gpu]:
            return self.color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                self.color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if self.args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]

        # remove overlapped area of mask results
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            overlapped_list = []
            box_size = int((x2-x1)*(y2-y1))
            color = get_color(j)
            score = scores[j]
            for k in reversed(range(num_dets_to_consider)):
                if (k != j):
                    a1, b1, a2, b2 = boxes[k, :]
                    box_size_sub = int((a2-a1)*(b2-b1))
                    if ((min(a2, x2) - max(a1, x1) > 0) and (min(b2, y2) - max(b1, y1) > 0)):
                        # overlapped area
                        S_jk = (min(a2, x2) - max(a1, x1)) * (min(b2, y2) - max(b1, y1))
                        if (S_jk / box_size > 0.9):
                            # included other BBox
                            pass
                        elif (S_jk / box_size_sub > 0.3):
                            # Subtract overlapped area in current bounding box
                            # Find overlapped Bbox position
                            x_list = [x1, x2, a1, a2]
                            y_list = [y1, y2, b1, b2]
                            x_list.sort()
                            y_list.sort()
                            overlapped_list.append([int(x_list[1]), int(y_list[1]), int(x_list[2]), int(y_list[2])])
            for ov_bbox in overlapped_list:
                masks[j][ov_bbox[1]: ov_bbox[3], ov_bbox[0]: ov_bbox[2]] = 0

        self.masks = masks

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
        self.masks_color = colors
        self.masks_color_2 = masks_color

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
        self.img_gpu = img_gpu

    if self.args.display_fps:
            # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0]

        img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha


    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if self.args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)

    self.num_dets_to_consider = num_dets_to_consider
    if num_dets_to_consider == 0:
        return img_numpy

    if self.args.display_text or self.args.display_bboxes:
        self.text_str = {}
        draw_masks = self.masks.squeeze(-1).to(torch.device("cpu")).detach().numpy().astype(np.float32)
        update_masks = self.masks.clone()
        overlapped_list = []
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            box_size = int((x2-x1)*(y2-y1))
            color = get_color(j)
            score = scores[j]

            if self.args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if self.args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = '%s:%d_%.2f' % (_class, classes[j], score) if self.args.display_scores else _class
                self.text_str[j] = text_str
                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]

                text_pt = (x1, y1 + 15)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 + text_h + 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)

    return img_numpy
Exemple #22
0
def prep_display_single(dets_out,
                        img,
                        pad_h,
                        pad_w,
                        img_ids=None,
                        img_meta=None,
                        undo_transform=True,
                        mask_alpha=0.45,
                        fps_str='',
                        display_mode=None):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    -- display_model: 'train', 'test', 'None' means groundtruth results
    """

    if undo_transform:
        img_numpy = undo_image_transformation(img, img_meta, pad_h, pad_w)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        pad_h, pad_w, _ = img.shape

    with timer.env('Postprocess'):
        cfg.mask_proto_debug = args.mask_proto_debug
        cfg.preserve_aspect_ratio = False
        dets_out = postprocess_ytbvis(dets_out,
                                      pad_h,
                                      pad_w,
                                      img_meta,
                                      display_mask=True,
                                      visualize_lincomb=args.display_lincomb,
                                      crop_masks=args.crop,
                                      score_threshold=cfg.eval_conf_thresh,
                                      img_ids=img_ids,
                                      mask_det_file=args.mask_det_file)
        torch.cuda.synchronize()
        scores = dets_out['score'][:args.top_k].detach().cpu().numpy()
        boxes = dets_out['box'][:args.top_k].detach().cpu().numpy()

    if 'segm' in dets_out:
        masks = dets_out['segm'][:args.top_k]
        args.display_masks = True
    else:
        args.display_masks = False

    classes = dets_out['class'][:args.top_k].detach().cpu().numpy()

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    color_type = dets_out['box_ids']
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    if num_dets_to_consider == 0:
        # No detections found so just output the original image
        return (img_gpu * 255).byte().cpu().numpy()

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([
            get_color(j,
                      color_type,
                      on_gpu=img_gpu.device.index,
                      undo_transform=undo_transform).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)
        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    if args.display_fps:
        # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale,
                                         font_thickness)[0]

        img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6  # 1 - Box alpha

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale,
                    text_color, font_thickness, cv2.LINE_AA)

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j, color_type)
            # plot priors
            h, w, _ = img_meta['img_shape']
            priors = dets_out['priors'].detach().cpu().numpy()
            if j < dets_out['priors'].size(0):
                cpx, cpy, pw, ph = priors[j, :] * [w, h, w, h]
                px1, py1 = cpx - pw / 2.0, cpy - ph / 2.0
                px2, py2 = cpx + pw / 2.0, cpy + ph / 2.0
                px1, py1, px2, py2 = int(px1), int(py1), int(px2), int(py2)
                pcolor = [255, 0, 255]

            # plot the range of features for classification and regression
            pred_scales = [24, 48, 96, 192, 384]
            x = torch.clamp(torch.tensor([x1, x2]), min=2, max=638).tolist(),
            y = torch.clamp(torch.tensor([y1, y2]), min=2, max=358).tolist(),
            x, y = x[0], y[0]

            if display_mode is not None:
                score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x[0], y[0]), (x[1], y[1]), color, 1)
                if j < dets_out['priors'].size(0):
                    cv2.rectangle(img_numpy, (px1, py1), (px2, py2),
                                  pcolor,
                                  2,
                                  lineType=8)
                # cv2.rectangle(img_numpy, (x[4], y[4]), (x[5], y[5]), fcolor, 2)

            if args.display_text:
                if classes[j] - 1 < 0:
                    _class = 'None'
                else:
                    _class = cfg.classes[classes[j] - 1]

                if display_mode == 'test':
                    # if cfg.use_maskiou and not cfg.rescore_bbox:
                    train_centerness = False
                    if train_centerness:
                        rescore = dets_out['DIoU_score'][j] * score
                        text_str = '%s: %.2f: %.2f: %s' % (_class, score, rescore, str(color_type[j].cpu().numpy())) \
                            if args.display_scores else _class
                    else:

                        text_str = '%s: %.2f: %s' % (
                            _class, score, str(color_type[j].cpu().numpy())
                        ) if args.display_scores else _class
                else:
                    text_str = '%s' % _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.5
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                 font_scale, font_thickness)[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]
                cv2.rectangle(img_numpy, (x1, y1),
                              (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face,
                            font_scale, text_color, font_thickness,
                            cv2.LINE_AA)

    return img_numpy
Exemple #23
0
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45,
                 fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = jt.array(img_numpy)
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx, _ = t[1].argsort(0, descending=True)[:args.top_k]

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].numpy() for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = jt.array(list(color)).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pyjt tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider].unsqueeze(3)

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = jt.contrib.concat([
            get_color(j, 0).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                                   dim=0)

        #print(masks.repeat(1,1,1,3).shape,colors.shape,mask_alpha)

        masks_color = masks.repeat(1, 1, 1, 3) * colors.repeat(
            1, masks.shape[1], masks.shape[2], 1) * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]

        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    if args.display_fps:
        # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale,
                                         font_thickness)[0]

        img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6  # 1 - Box alpha

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).uint8().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale,
                    text_color, font_thickness, cv2.LINE_AA)

    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = '%s: %.2f' % (
                    _class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                 font_scale, font_thickness)[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1),
                              (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face,
                            font_scale, text_color, font_thickness,
                            cv2.LINE_AA)

    return img_numpy
Exemple #24
0
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=True, mask_alpha=0.45, fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape
    
    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb,
                                        crop_masks        = args.crop,
                                        score_threshold   = args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:args.top_k]
        
        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break


#-----------------------1128n 22:49d---------------------------#            
        # if cfg.dataset.class_names[classes[j]] == 'tree': 
        #     continue
        #     #num_dets_to_consider =j
        #     break

#--------------------------------------------------------------#


    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)
        
        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            #color = COLORS[color_idx]
            color = (100, 149, 237)  #rgb light blue for line
            # color = (124, 252, 0)  #rgb light green for tree
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color 
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]
        
         ##########################################################################
        """
        nzCount=-1
        for i in range(num_dets_to_consider):
            temp_class_check = cfg.dataset.class_names[classes[i]]
            if temp_class_check == 'line':
                msk = masks[i,:,:,None]
                mask=msk.view(1,masks.shape[1], masks.shape[2], masks.shape[3])
                img_gpu=(mask.sum(dim=0)>=1).float().expand(-1,-1,3).contiguous()
                img_numpy_aux=(img_gpu * 255).byte().cpu().numpy()
                img_numpy_aux = cv2.cvtColor(img_numpy_aux, cv2.COLOR_BGR2GRAY)

                if nzCount == -1:
                    nzCount=0
                    img_numpy=img_numpy_aux
                else:
                    if cv2.countNonZero(img_numpy_aux) > cv2.countNonZero(img_numpy):
                        img_numpy=img_numpy_aux
        img_gpu=(masks.sum(dim=0)>=1).float().expand(-1,-1,3).contiguous()
    else:
        img_gpu *- 0
        """
        ##########################################################################
        
        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1
        
        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
    

        
        ##########################################################################
    
    if args.display_fps:
            # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0]

        img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha


    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
    
    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 3)    ###changed: 1->3

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 1 ###changed: 0.6 -> 3
                font_thickness = 1 ###changed: 1 -> 3

                text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]

                # if _class == 'line':
                #     text_pt = (x1, y1 + 6)
                # else:
                #     continue
                #     #text_pt = (x1, y1 + 6)
                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
            
    
    return img_numpy