Exemple #1
0
    def forward(self, x):
        """ The input should be of size [batch_size, 3, img_h, img_w] """
        _, _, img_h, img_w = x.size()
        cfg._tmp_img_h = img_h
        cfg._tmp_img_w = img_w

        with timer.env('backbone'):
            outs = self.backbone(x)

        if cfg.fpn is not None:
            with timer.env('fpn'):
                # Use backbone.selected_layers because we overwrote self.selected_layers
                outs = [outs[i] for i in cfg.backbone.selected_layers]
                outs = self.fpn(outs)

        proto_out = None
        if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch:
            with timer.env('proto'):
                proto_x = x if self.proto_src is None else outs[self.proto_src]

                if self.num_grids > 0:
                    grids = self.grid.repeat(proto_x.size(0), 1, 1, 1)
                    proto_x = torch.cat([proto_x, grids], dim=1)

                proto_out = self.proto_net(proto_x)
                proto_out = cfg.mask_proto_prototype_activation(proto_out)

                if cfg.mask_proto_prototypes_as_features:
                    # Clone here because we don't want to permute this, though idk if contiguous makes this unnecessary
                    proto_downsampled = proto_out.clone()

                    if cfg.mask_proto_prototypes_as_features_no_grad:
                        proto_downsampled = proto_out.detach()

                # Move the features last so the multiplication is easy
                proto_out = proto_out.permute(0, 2, 3, 1).contiguous()

                if cfg.mask_proto_bias:
                    bias_shape = [x for x in proto_out.size()]
                    bias_shape[-1] = 1
                    proto_out = torch.cat(
                        [proto_out, torch.ones(*bias_shape)], -1)

        with timer.env('pred_heads'):
            pred_outs = {'loc': [], 'conf': [], 'mask': [], 'priors': []}

            if cfg.use_mask_scoring:
                pred_outs['score'] = []

            if cfg.use_instance_coeff:
                pred_outs['inst'] = []

            for idx, pred_layer in zip(self.selected_layers,
                                       self.prediction_layers):
                pred_x = outs[idx]

                if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_prototypes_as_features:
                    # Scale the prototypes down to the current prediction layer's size and add it as inputs
                    proto_downsampled = F.interpolate(
                        proto_downsampled,
                        size=outs[idx].size()[2:],
                        mode='bilinear',
                        align_corners=False)
                    pred_x = torch.cat([pred_x, proto_downsampled], dim=1)

                # A hack for the way dataparallel works
                if cfg.share_prediction_module and pred_layer is not self.prediction_layers[
                        0]:
                    pred_layer.parent = [self.prediction_layers[0]]

                p = pred_layer(pred_x)

                for k, v in p.items():
                    pred_outs[k].append(v)

        for k, v in pred_outs.items():
            pred_outs[k] = torch.cat(v, -2)

        if proto_out is not None:
            pred_outs['proto'] = proto_out

        if self.training:
            # For the extra loss functions
            if cfg.use_class_existence_loss:
                pred_outs['classes'] = self.class_existence_fc(
                    outs[-1].mean(dim=(2, 3)))

            if cfg.use_semantic_segmentation_loss:
                pred_outs['segm'] = self.semantic_seg_conv(outs[0])

            return pred_outs
        else:
            if cfg.use_mask_scoring:
                pred_outs['score'] = torch.sigmoid(pred_outs['score'])

            if cfg.use_focal_loss:
                if cfg.use_sigmoid_focal_loss:
                    # Note: even though conf[0] exists, this mode doesn't train it so don't use it
                    pred_outs['conf'] = torch.sigmoid(pred_outs['conf'])
                    if cfg.use_mask_scoring:
                        pred_outs['conf'] *= pred_outs['score']
                elif cfg.use_objectness_score:
                    # See focal_loss_sigmoid in multibox_loss.py for details
                    objectness = torch.sigmoid(pred_outs['conf'][:, :, 0])
                    pred_outs['conf'][:, :,
                                      1:] = objectness[:, :, None] * F.softmax(
                                          pred_outs['conf'][:, :, 1:], -1)
                    pred_outs['conf'][:, :, 0] = 1 - objectness
                else:
                    pred_outs['conf'] = F.softmax(pred_outs['conf'], -1)
            else:

                if cfg.use_objectness_score:
                    objectness = torch.sigmoid(pred_outs['conf'][:, :, 0])

                    pred_outs['conf'][:, :, 1:] = (objectness > 0.10)[..., None] \
                        * F.softmax(pred_outs['conf'][:, :, 1:], dim=-1)

                else:
                    pred_outs['conf'] = F.softmax(pred_outs['conf'], -1)

            return self.detect(pred_outs, self)
Exemple #2
0
def prep_display_mod(dets_out,
                     img,
                     h,
                     w,
                     depth_map,
                     rel_depth,
                     undo_transform=True,
                     mask_alpha=1.0):  # was mask_alpha=0.45
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    score_threshold = 0.15
    top_k = 15

    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out, w, h, score_threshold=score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:top_k]  # top_k = 15

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]
    num_dets_to_consider = min(top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < score_threshold:
            num_dets_to_consider = j
            break
    classes = classes[:num_dets_to_consider]  # added

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        # color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)          #original
        color_idx = j  # black
        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if num_dets_to_consider > 0:  # was ...>0
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]
        # print("masks_og.shape", masks.shape)

        # begin added       // filter out the person masks and class indices
        people_masks_idxs = []
        classes_to_mask = []
        x = [
        ]  # save the center points of the boxes in the same order as the masks
        y = []
        for i, j in enumerate(classes):
            if j == 0:  # j = 0 for person class            # filter out only people's masks
                people_masks_idxs.append(i)
                classes_to_mask.append(j)
                x1, y1, x2, y2 = boxes[i, :]
                x.append(int((x1 + x2) / 2))
                y.append(int((y1 + y2) / 2))
        num_dets_to_consider = len(classes_to_mask)

        if num_dets_to_consider == 0:  # if no people, return black image
            return ((img_gpu * 0).byte().cpu().numpy()
                    )  # make it black before returning

        x_arr = np.array(y)
        y_arr = np.array(x)

        obj_depths = []
        for i in range(x_arr.size):  # store the depths of the people
            obj_depths.append(depth_map[x_arr[i], y_arr[i], 0])
            # print("depth at object i: ", x_arr[i], y_arr[i], " : ", obj_depths[i])

        obj_depths = np.array(obj_depths)
        people_masks_idxs = np.array(people_masks_idxs)
        sorted_idx_by_depth = np.array(
            np.argsort(-obj_depths)
        )  # sort the masks and people_loc by depth in Descending order
        # x = x[sorted_idx_by_depth]
        # y = y[sorted_idx_by_depth]
        obj_depths = obj_depths[sorted_idx_by_depth]
        people_masks_idxs = people_masks_idxs[sorted_idx_by_depth]

        depth_thres = obj_depths[0] * (
            1.0 - rel_depth
        )  # filter out the people within the depth_threshold
        people_masks_idxs = people_masks_idxs[[
            i for i, v in enumerate(obj_depths) if v >= depth_thres
        ]]

        np.array(people_masks_idxs).T.tolist()
        masks = masks[people_masks_idxs]
        num_dets_to_consider = len(people_masks_idxs)

        colors = torch.cat(
            [get_color(0, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)],
            dim=0)
        tmp = masks[0]
        if num_dets_to_consider > 1:
            for msk in masks[1:]:
                tmp = tmp + msk
        # print("masks.shape: ", masks.shape)
        # print("tmp.shape: ", (tmp.unsqueeze(0)).shape)
        masks = tmp.unsqueeze(0)
        masks[masks != 0.0] = 1.0

        inv_alph_masks = masks * (-mask_alpha) + 1
        masks_color = (inv_alph_masks.repeat(1, 1, 1, 3)) * colors * mask_alpha
        inv_alph_masks = masks.repeat(1, 1, 1, 3)

        # inv_alph_masks = masks
        # inv_alph_masks = masks
        # print("masks : ", masks)
        # masks = (masks-1.)*-1.
        # print("masks : ", masks)
        # inv_alph_masks = masks * (-mask_alpha)+1
        # masks_color = masks_color*0.5
        # end added

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        # masks_color_summand = masks_color[0]
        # if num_dets_to_consider > 1:
        #     inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0)
        #     masks_color_cumul = masks_color[1:] * inv_alph_cumul
        #     masks_color_summand += masks_color_cumul.sum(dim=0)

        # img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand  # original
        # print("inv_alph_masks.shape: ", (torch.squeeze(inv_alph_masks,0)).shape)
        # print("masks_color.shape: ", (torch.squeeze(masks_color,0)).shape)
        img_gpu = img_gpu * torch.squeeze(inv_alph_masks, 0) + torch.squeeze(
            masks_color, 0)  # added
        # img_gpu = img_gpu

    img_numpy = (img_gpu * 255.0).byte().cpu().numpy()

    return img_numpy
Exemple #3
0
    # GPU
    net = net.cuda()
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

    x = torch.zeros((1, 3, cfg.max_size, cfg.max_size))
    y = net(x)

    for p in net.prediction_layers:
        print(p.last_conv_size)

    print()
    for k, a in y.items():
        print(k + ': ', a.size(), torch.sum(a))
    exit()

    net(x)
    # timer.disable('pass2')
    avg = MovingAverage()
    try:
        while True:
            timer.reset()
            with timer.env('everything else'):
                net(x)
            avg.add(timer.total_time())
            print('\033[2J')  # Moves console cursor to 0,0
            timer.print_stats()
            print('Avg fps: %.2f\tAvg ms: %.2f         ' %
                  (1 / avg.get_avg(), avg.get_avg() * 1000))
    except KeyboardInterrupt:
        pass
Exemple #4
0
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    # print(img.shape)    # torch.Size([480, 360, 3])
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        torch.cuda.synchronize()

    with timer.env('Copy'):
        # 这里面取了最高分的k个,由传入参数设定
        classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]]
        # 获取到了最高k个的类别、分数、框,因此可以在这里进行修改
        # print(classes) # 类别说明 class 0: person, class 2: car
        # print(scores)
        # print(boxes)

        # 定义变量area_b,框的面积
        person_index = (classes == 0)
        # person_index表示了第几个框是否是person类别
        if person_index.any():
            # 存在person这个类别
            boxes = boxes[person_index]
            scores = scores[person_index]
            # 对person的框面积进行计算
            area = np.zeros(len(scores))
            for i in range(person_index.sum()):
                box = boxes[i]
                area[i] = (box[2] - box[0]) * (box[3] - box[1])
            # 对person的框面积进行筛选
            # 假设最小的人的面积: 25*100 像素,并约束阈值
            # valid_person_index = ((area >= 2500) and (scores < 0.01))
            valid_person_index = (area >= 2500)
            boxes = boxes[valid_person_index]
            scores = scores[valid_person_index]
            if valid_person_index.any():
                # 筛选面积和阈值之后还有person
                print('----- Person detected -----')
            else:
                # 筛选面积和阈值之后已经没有person了
                print('----- No person -----')
            num_dets_to_consider = valid_person_index.sum()
        else:
            # 直接就没有person类
            print('----- No person -----')
            num_dets_to_consider = 0

    if num_dets_to_consider == 0:
        # 没检测到人,返回原图
        return (img_gpu * 255).byte().cpu().numpy()

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()
    # img_numpy = (masks * 255).byte().cpu().numpy()

    # 检测到框并输出文字
    for j in reversed(range(num_dets_to_consider)):
        # 这个循环中的boxes, scores, classes都要减少一个维度
        x1, y1, x2, y2 = boxes[j][:]
        color = get_color(classes[j])
        score = scores[j]

        # 绘制检测框
        cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

        # 显示检测结果的文本
        _class = cfg.dataset.class_names[classes[j]]
        text_str = '%s: %.2f' % (_class, score
                                 )  # if args.display_scores else _class

        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale,
                                         font_thickness)[0]

        text_pt = (x1, y1 - 3)
        text_color = [255, 255, 255]

        cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4),
                      color, -1)
        cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale,
                    text_color, font_thickness, cv2.LINE_AA)

    return img_numpy
Exemple #5
0
        images = glob.glob("/home/alex/Yolact_pytorch/results/images/10.png")

        num = len(images)
        print(num)
        for i, one_img in enumerate(images):
            img_name = one_img.split('/')[-1]
            img_origin = torch.from_numpy(cv2.imread(one_img)).float()
            if cuda:
                img_origin = img_origin.cuda()
            img_h, img_w = img_origin.shape[0], img_origin.shape[1]
            img_trans = FastBaseTransform()(img_origin.unsqueeze(0))
            net_outs = net(img_trans)
            nms_outs = NMS(net_outs, args.traditional_nms)

            show_lincomb = bool(args.show_lincomb and args.image_path)
            with timer.env('after nms'):
                results = after_nms(nms_outs,
                                    img_h,
                                    img_w,
                                    show_lincomb=show_lincomb,
                                    crop_masks=not args.no_crop,
                                    visual_thre=args.visual_thre,
                                    img_name=img_name)
                if cuda:
                    torch.cuda.synchronize()

            img_numpy = draw_img(results, img_origin, args)

            cv2.imwrite(f'results/images/{img_name}', img_numpy)
            print(f'\r{i + 1}/{num}', end='')
Exemple #6
0
def _mask_iou(mask1, mask2, iscrowd=False):
    with timer.env('Mask IoU'):
        ret = mask_iou(mask1, mask2, iscrowd)
    return ret.cpu()
Exemple #7
0
def prep_metrics(ap_data,
                 dets,
                 img,
                 gt,
                 gt_masks,
                 h,
                 w,
                 num_crowd,
                 image_id,
                 detections: Detections = None):
    """ Returns a list of APs for this image, with each element being for a class  """
    if not args.output_coco_json:
        with timer.env('Prepare gt'):
            gt_boxes = torch.Tensor(gt[:, :4])
            gt_boxes[:, [0, 2]] *= w
            gt_boxes[:, [1, 3]] *= h
            gt_classes = list(gt[:, 4].astype(int))
            gt_masks = torch.Tensor(gt_masks).view(-1, h * w)

            if num_crowd > 0:
                split = lambda x: (x[-num_crowd:], x[:-num_crowd])
                crowd_boxes, gt_boxes = split(gt_boxes)
                crowd_masks, gt_masks = split(gt_masks)
                crowd_classes, gt_classes = split(gt_classes)

    with timer.env('Postprocess'):
        classes, scores, boxes, masks = postprocess(
            dets,
            w,
            h,
            crop_masks=args.crop,
            score_threshold=args.score_threshold)

        if classes.size(0) == 0:
            return

        classes = list(classes.cpu().numpy().astype(int))
        scores = list(scores.cpu().numpy().astype(float))
        masks = masks.view(-1, h * w).cuda()
        boxes = boxes.cuda()

    if args.output_coco_json:
        with timer.env('JSON Output'):
            boxes = boxes.cpu().numpy()
            masks = masks.view(-1, h, w).cpu().numpy()
            for i in range(masks.shape[0]):
                # Make sure that the bounding box actually makes sense and a mask was produced
                if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] -
                                                  boxes[i, 0]) > 0:
                    detections.add_bbox(image_id, classes[i], boxes[i, :],
                                        scores[i])
                    detections.add_mask(image_id, classes[i], masks[i, :, :],
                                        scores[i])
            return

    with timer.env('Eval Setup'):
        num_pred = len(classes)
        num_gt = len(gt_classes)

        mask_iou_cache = _mask_iou(masks, gt_masks)
        bbox_iou_cache = _bbox_iou(boxes.float(), gt_boxes.float())

        if num_crowd > 0:
            crowd_mask_iou_cache = _mask_iou(masks, crowd_masks, iscrowd=True)
            crowd_bbox_iou_cache = _bbox_iou(boxes.float(),
                                             crowd_boxes.float(),
                                             iscrowd=True)
        else:
            crowd_mask_iou_cache = None
            crowd_bbox_iou_cache = None

        iou_types = [('box', lambda i, j: bbox_iou_cache[i, j].item(),
                      lambda i, j: crowd_bbox_iou_cache[i, j].item()),
                     ('mask', lambda i, j: mask_iou_cache[i, j].item(),
                      lambda i, j: crowd_mask_iou_cache[i, j].item())]

    timer.start('Main loop')
    for _class in set(classes + gt_classes):
        ap_per_iou = []
        num_gt_for_class = sum([1 for x in gt_classes if x == _class])

        for iouIdx in range(len(iou_thresholds)):
            iou_threshold = iou_thresholds[iouIdx]

            for iou_type, iou_func, crowd_func in iou_types:
                gt_used = [False] * len(gt_classes)

                ap_obj = ap_data[iou_type][iouIdx][_class]
                ap_obj.add_gt_positives(num_gt_for_class)

                for i in range(num_pred):
                    if classes[i] != _class:
                        continue

                    max_iou_found = iou_threshold
                    max_match_idx = -1
                    for j in range(num_gt):
                        if gt_used[j] or gt_classes[j] != _class:
                            continue

                        iou = iou_func(i, j)

                        if iou > max_iou_found:
                            max_iou_found = iou
                            max_match_idx = j

                    if max_match_idx >= 0:
                        gt_used[max_match_idx] = True
                        ap_obj.push(scores[i], True)
                    else:
                        # If the detection matches a crowd, we can just ignore it
                        matched_crowd = False

                        if num_crowd > 0:
                            for j in range(len(crowd_classes)):
                                if crowd_classes[j] != _class:
                                    continue

                                iou = crowd_func(i, j)

                                if iou > iou_threshold:
                                    matched_crowd = True
                                    break

                        # All this crowd code so that we can make sure that our eval code gives the
                        # same result as COCOEval. There aren't even that many crowd annotations to
                        # begin with, but accuracy is of the utmost importance.
                        if not matched_crowd:
                            ap_obj.push(scores[i], False)
    timer.stop('Main loop')
Exemple #8
0
def instance_logit(dets,
                   w,
                   h,
                   interpolation_mode='bilinear',
                   visualize_lincomb=False,
                   crop_masks=True,
                   score_threshold=0,
                   overlap_thr=0.5,
                   mask_prune=False):
    with timer.env('Postprocess'):
        classes, scores, boxes, masks = postprocess(
            dets, w, h, score_threshold=score_threshold, mask_score=False)

    if classes.size(0) == 0:  #no predicted mask
        return None, None, None

    classes = classes.cpu().numpy().astype(int)
    scores = scores.cpu().numpy().astype(float)
    masks = masks.view(-1, h, w).cuda()
    boxes = boxes

    used = np.zeros((np.max(classes) + 1, h, w), dtype=np.uint8)
    # used = np.zeros((h,w), dtype=np.uint8)

    keep_masks = []
    keep_boxes = []
    keep_classes = []
    # mask_prune = True
    # if mask_prune is False:
    #     return masks, boxes, classes
    # else:
    with timer.env('things mask pruning'):
        org_boxes = boxes.clone(
        )  #after sanitization, the bbox became absolute coord, but we want to keep it relative to apply in crop function
        boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0],
                                                        boxes[:, 2],
                                                        w,
                                                        cast=False)
        boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1],
                                                        boxes[:, 3],
                                                        h,
                                                        cast=False)
        boxes = boxes.cpu().long().numpy()

        for i in range(masks.size(0)):

            if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) <= 0:
                continue

            mask_crop = masks[i, boxes[i, 1]:boxes[i, 3], boxes[i, 0]:boxes[
                i, 2]].cpu().numpy()  #mask logit , before activation
            mask_crop = np.array(mask_crop > 0, dtype=np.uint8)
            used_crop = used[classes[i], boxes[i, 1]:boxes[i, 3],
                             boxes[i, 0]:boxes[i, 2]]

            area = mask_crop.sum()
            if area == 0 or (np.logical_and(used_crop >= 1, mask_crop
                                            == 1).sum() / area > overlap_thr):
                continue

            used[classes[i], boxes[i, 1]:boxes[i, 3],
                 boxes[i, 0]:boxes[i, 2]] += mask_crop
            keep_masks.append(masks[i, :, :])
            keep_boxes.append(org_boxes[i, :])
            keep_classes.append(classes[i])

        if len(keep_masks) > 0:
            ins_logits = torch.stack(keep_masks, dim=0)
            keep_boxes = torch.stack(keep_boxes, dim=0)
            return ins_logits, keep_boxes, np.array(keep_classes)
        else:
            return None, None, None
Exemple #9
0
def evaluate(net,
             dataset,
             max_num=-1,
             during_training=False,
             benchmark=False,
             cocoapi=False,
             traditional_nms=False):
    frame_times = MovingAverage()
    dataset_size = len(dataset) if max_num < 0 else min(max_num, len(dataset))
    dataset_indices = list(range(len(dataset)))
    dataset_indices = dataset_indices[:dataset_size]
    progress_bar = ProgressBar(40, dataset_size)

    if benchmark:
        timer.disable('Data loading')
    else:
        # For each class and iou, stores tuples (score, isPositive)
        # Index ap_data[type][iouIdx][classIdx]
        ap_data = {
            'box': [[APDataObject() for _ in cfg.dataset.class_names]
                    for _ in iou_thresholds],
            'mask': [[APDataObject() for _ in cfg.dataset.class_names]
                     for _ in iou_thresholds]
        }
        make_json = Make_json()

    for i, image_idx in enumerate(dataset_indices):
        timer.reset()

        with timer.env('Data loading'):
            img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx)

            batch = Variable(img.unsqueeze(0))
            if cuda:
                batch = batch.cuda()

        with timer.env('Network forward'):
            net_outs = net(batch)
            nms_outs = NMS(net_outs, traditional_nms)

        if benchmark:
            prep_benchmark(nms_outs, h, w)
        else:
            prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd,
                         dataset.ids[image_idx], make_json, cocoapi)

        # First couple of images take longer because we're constructing the graph.
        # Since that's technically initialization, don't include those in the FPS calculations.
        fps = 0
        if i > 1 and not during_training:
            frame_times.add(timer.total_time())
            fps = 1 / frame_times.get_avg()

        progress = (i + 1) / dataset_size * 100
        progress_bar.set_val(i + 1)
        print('\rProcessing:  %s  %d / %d (%.2f%%)  %.2f fps  ' %
              (repr(progress_bar), i + 1, dataset_size, progress, fps),
              end='')

    if benchmark:
        print('\n\nStats for the last frame:')
        timer.print_stats()
        avg_seconds = frame_times.get_avg()
        print('Average: %5.2f fps, %5.2f ms' %
              (1 / frame_times.get_avg(), 1000 * avg_seconds))

    else:
        if cocoapi:
            make_json.dump()
            print(f'\nJson files dumped, saved in: {json_path}.')
            return

        table = calc_map(ap_data)
        print(table)
        return table
Exemple #10
0
def process():
    try:
        destFile = ""
        if request.method == 'POST':
            file = request.files['file']
            if file and allowed_file(file.filename):
                filename = secure_filename(file.filename)
                destFile = os.path.join(app.config['UPLOAD_FOLDER'], filename)
                file.save(destFile)
                app.logger.warning('filename=(%s)', filename)
        else:
            app.logger.warning("Request dictionary data: {}".format(request.data))
            app.logger.warning("Request dictionary form: {}".format(request.form))
            url = request.form["url"]
            print("url:", url)
            # download file
            destFile = download_file(url)

        # app.logger.error('An error occurred')
        app.logger.warning('destFile=(%s)', destFile)

        img_name = destFile.split('/')[-1]
        app.logger.warning('img_name=(%s)', img_name)

        img_origin = torch.from_numpy(cv2.imread(destFile)).float()
        if cuda:
            img_origin = img_origin.cuda()
        img_h, img_w = img_origin.shape[0], img_origin.shape[1]
        img_trans = FastBaseTransform()(img_origin.unsqueeze(0))
        net_outs = net(img_trans)
        nms_outs = NMS(net_outs, args.traditional_nms)

        app.logger.warning('img_h=(%s)', img_h)
        app.logger.warning('img_w=(%s)', img_w)

        app.logger.warning('cuda=(%s)', cuda)
        app.logger.warning('args.show_lincomb=(%s)', args.show_lincomb)
        app.logger.warning('args.no_crop=(%s)', args.no_crop)
        app.logger.warning('args.visual_thre=(%s)', args.visual_thre)
        app.logger.warning('args=(%s)', args)

        show_lincomb = bool(args.show_lincomb)
        with timer.env('after nms'):
            results = after_nms(nms_outs, img_h, img_w, show_lincomb=show_lincomb, crop_masks=not args.no_crop,
                                visual_thre=args.visual_thre, img_name=img_name)
            if cuda:
                torch.cuda.synchronize()

        # app.logger.warning('results=(%s)', results)
        img_numpy = draw_img(results, img_origin, args)

        cv2.imwrite(f'results/images/{img_name}', img_numpy)
        # print(f'\r{i + 1}/{num}', end='')

        try:
            im = Image.open(f'results/images/{img_name}')
            # im = Image.open(destFile)
            io = BytesIO()
            im.save(io, format='JPEG')
            return Response(io.getvalue(), mimetype='image/jpeg')

        except IOError:
            abort(404)

        # return send_from_directory('.', filename), 200
        callback = json.dumps({"results": results})
        return callback, 200

    except:
        traceback.print_exc()
        return {'message': 'input error'}, 400
Exemple #11
0
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False,
                 mask_alpha=0.45,
                 fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    global first_frame, old_obj_info
    name = []
    mask_img = []
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):

        #idx = t[1].argsort(0, descending=True)[:args.top_k]
        idx1 = t[1].argsort()
        idx = idx1.argsort()

        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][idx]
            mask_picture = t[3][idx]
        classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]
        for i in range(len(classes)):
            name.append(cfg.dataset.class_names[classes[i]])
            mask_img.append(mask_picture[i:i + 1, :, :, None])

        #obj_info, obj_num = data_save(mask_img, classes, scores, boxes)
        start = time.time()
        obj_info, obj_num = sort_info.data_save(mask_img, classes, name,
                                                scores, boxes, first_frame,
                                                old_obj_info)
        end = time.time()
        print('aaaaaaaaaa', end - start)
        first_frame = True

    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (obj_info[j][0] * 5 if class_color else j *
                     5) % len(COLORS)

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice

    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]

        masks = masks[:num_dets_to_consider, :, :, None]
        #img_gpu = img_gpu * (masks.sum(dim=0) > 0.5).float()  #only show mask
        #img_gpu = img_gpu * masks[0]

        #mike0225
        mask_img = img_gpu * (masks.sum(dim=0) > 0.5).float()  #0209
        global mask_numpy
        mask_numpy = (mask_img * 255).byte().cpu().numpy()  #0209
        mask_numpy = cv2.cvtColor(mask_numpy, cv2.COLOR_BGR2GRAY)

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])

        colors = torch.cat([
            get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    if args.display_fps:
        # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale,
                                         font_thickness)[0]

        img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6  # 1 - Box alpha
    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale,
                    text_color, font_thickness, cv2.LINE_AA)

    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        global frame_count, state_pre, flag, predict_pos, centerX, centerY, degree, mask_color, mask_flag, pub_Flag
        frame_count += 1

        pub_array_msg = obj_array()
        for j in range(obj_num):
            global img_num, temp_x, temp_y, yhat
            if obj_info[j][2] != 0:

                #0502-------------------------------------------------------------------
                mask_image = img_gpu * (obj_info[j][3].sum(dim=0) >
                                        0.5).float()
                mask_numpy1 = (mask_image * 255).byte().cpu().numpy()
                mask_color = cv2.cvtColor(mask_numpy1, cv2.COLOR_BGR2GRAY)
                '''
                kernel = np.ones((5,5), np.uint8)
                mask_color = cv2.erode(mask_color, kernel, iterations = 1)
                mask_color = cv2.dilate(mask_color, kernel, iterations = 1)
                '''
                mask_flag = False
                #-------------------------------------------------------------------------

                if frame_count % 20 == 3:
                    #-----------------------------
                    obj_info[j][5].append(mask_color)
                    mask_flag = True
                    #cv2.imwrite('/home/chien/123/test_{}.jpg'.format(j),mask_numpy1)

                    if len(obj_info[j][5]) > 2:
                        '''
                        for k in range(len(obj_info[j][5])):
                            cv2.imwrite('/home/chien/123/test_{}.jpg'.format(k),obj_info[j][5][k])
                        '''

                        obj_msg = obj_infomsg()
                        obj_msg.id = obj_info[j][0]
                        obj_msg.object_name = obj_info[j][1]

                        imagedata1 = np.array(obj_info[j][5])

                        imagedata1 = imagedata1.reshape((-1, 3, 480, 640, 1))
                        imagedata1 = imagedata1 / 255.
                        start = time.time()
                        yhat = model.predict(imagedata1, verbose=0)
                        end = time.time()
                        '''
                        print(end-start)
                        print('---------------')
                        '''
                        if obj_info[j][6] == []:
                            for i in range(5):
                                x1 = yhat[1][0][i][1] * 320 + 320
                                y1 = yhat[1][0][i][2] * 240 + 240
                                degree1 = arctan_recovery(
                                    yhat[1][0][i][3], yhat[1][0][i][4])
                                temp_x1, temp_y1 = trans_degree(
                                    x1, y1, degree1)
                                obj_info[j][6].append(
                                    (x1, y1, temp_x1, temp_y1))

                        else:
                            for i in range(5):
                                x1 = yhat[1][0][i][1] * 320 + 320
                                y1 = yhat[1][0][i][2] * 240 + 240
                                degree1 = arctan_recovery(
                                    yhat[1][0][i][3], yhat[1][0][i][4])
                                temp_x1, temp_y1 = trans_degree(
                                    x1, y1, degree1)
                                obj_info[j][6][i] = (x1, y1, temp_x1, temp_y1)
                            '''
                            obj_info[j][6].pop(0)
                            x1 = yhat[1][0][4][1]*320+320
                            y1 = yhat[1][0][4][2]*240+240
                            degree1 = arctan_recovery(yhat[1][0][4][3],yhat[1][0][4][4])
                            temp_x1,temp_y1=trans_degree(x1,y1,degree1)
                            obj_info[j][6].append((x1,y1,temp_x1,temp_y1))
                            '''
                        obj_msg.x = yhat[1][0][4][
                            1] * 320 + 320  #yhat[1][0][3][1]*320+320
                        obj_msg.y = yhat[1][0][4][2] * 240 + 240
                        obj_msg.degree = arctan_recovery(
                            yhat[1][0][4][3], yhat[1][0][4][4])
                        tx1, ty1 = trans_degree(obj_msg.x, obj_msg.y,
                                                obj_msg.degree)
                        '''
                        print( obj_msg.degree)
                        cv2.circle(img_numpy, (int(obj_msg.x),int(obj_msg.y)),5,(0, 0, 255),5)
                        cv2.line(img_numpy,(int(obj_msg.x+tx1),int(obj_msg.y+ty1)),(int(obj_msg.x-tx1),int(obj_msg.y-ty1)),(0,0,255),5)
                        '''
                        #print( obj_msg.degree)
                        pub_array_msg.Obj_list.append(obj_msg)

                        pub_Flag = True
                        obj_info[j][5].pop(0)  #0->1
                    '''
                    global pointx,pointy,real_pointx,real_pointy, point_count ,use_count
                    use_count+=1
                    if use_count >=10:
                        pointx.append(obj_info[j][6][4][0])
                        pointy.append(obj_info[j][6][4][1])
                        point_count += 1
                        if point_count >= 5:
                            real_pointx.append(yhat[0][0][2][1]*320+320)
                            real_pointy.append(yhat[0][0][2][2]*240+240)
                    '''

                if obj_info[j][6] != []:
                    for i in range(5):
                        px = obj_info[j][6][i][0]
                        py = obj_info[j][6][i][1]
                        temp_px = obj_info[j][6][i][2]
                        temp_py = obj_info[j][6][i][3]

                        cv2.circle(img_numpy, (int(px), int(py)), 5,
                                   (0, 0, 255), 5)
                        cv2.line(img_numpy,
                                 (int(px + temp_px), int(py + temp_py)),
                                 (int(px - temp_px), int(py - temp_py)),
                                 (0, 0, 255), 5)

                color = get_color(obj_info[j][0])
                score = obj_info[j][3]

                if args.display_bboxes:
                    cv2.rectangle(img_numpy,
                                  (obj_info[j][4][2], obj_info[j][4][4]),
                                  (obj_info[j][4][3], obj_info[j][4][5]),
                                  color, 1)

                if args.display_text:

                    _class = obj_info[j][1]

                    #text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class
                    text_str = '%s: %s' % (obj_info[j][0], _class
                                           ) if args.display_scores else _class
                    #text_str = '%s: %s' % (_class, obj_info[j][2]) if args.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (obj_info[j][4][2], obj_info[j][4][4] - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy,
                                  (obj_info[j][4][2], obj_info[j][4][4]),
                                  (obj_info[j][4][2] + text_w,
                                   obj_info[j][4][4] - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

        if pub_Flag == True:
            #print(pub_array_msg)
            array_pub.publish(pub_array_msg)
        pub_Flag = False
        old_obj_info = obj_info
    return img_numpy
Exemple #12
0
    def prep_display(self,
                     dets_out,
                     img,
                     h,
                     w,
                     undo_transform=True,
                     class_color=False,
                     mask_alpha=0.45,
                     fps_str=''):
        """
        Note: If undo_transform=False then im_h and im_w are allowed to be None.
        """
        lineThickness = 2

        if undo_transform:
            img_numpy = undo_image_transformation(img, w, h)
            img_gpu = torch.Tensor(img_numpy).cuda()
        else:
            img_gpu = img / 255.0
            h, w, _ = img.shape

        with timer.env('Postprocess'):
            save = cfg.rescore_bbox
            cfg.rescore_bbox = True
            t = postprocess(dets_out,
                            w,
                            h,
                            visualize_lincomb=self.display_lincomb,
                            crop_masks=self.crop,
                            score_threshold=self.score_threshold)
            cfg.rescore_bbox = save

        with timer.env('Copy'):
            # idx = t[1].argsort(0, descending=True)[top_k]
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][:self.top_k]

            classes, scores, boxes = [
                x[:self.top_k].cpu().detach().numpy() for x in t[:3]
            ]

        num_dets_to_consider = min(self.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < self.score_threshold:
                num_dets_to_consider = j
                break

        # Quick and dirty lambda for selecting the color for a particular index
        # Also keeps track of a per-gpu color cache for maximum speed
        def get_color(j, on_gpu=None):
            global color_cache
            color_idx = (classes[j] * 5 if class_color else j *
                         5) % len(COLORS)

            if on_gpu is not None and color_idx in color_cache[on_gpu]:
                return color_cache[on_gpu][color_idx]
            else:
                color = COLORS[color_idx]
                if not undo_transform:
                    # The image might come in as RGB or BRG, depending
                    color = (color[2], color[1], color[0])
                if on_gpu is not None:
                    color = torch.Tensor(color).to(on_gpu).float() / 255.
                    color_cache[on_gpu][color_idx] = color
                return color

        # First, draw the masks on the GPU where we can do it really fast
        # Beware: very fast but possibly unintelligible mask-drawing code ahead
        # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
        if self.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
            # After this, mask is of size [num_dets, h, w, 1]
            masks = masks[:num_dets_to_consider, :, :, None]
            # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
            colors = torch.cat([
                get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
                for j in range(num_dets_to_consider)
            ],
                               dim=0)
            masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

            # This is 1 everywhere except for 1-mask_alpha where the mask is
            inv_alph_masks = masks * (-mask_alpha) + 1

            # I did the math for this on pen and paper. This whole block should be equivalent to:
            #    for j in range(num_dets_to_consider):
            #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
            masks_color_summand = masks_color[0]
            if num_dets_to_consider > 1:
                inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                                  1)].cumprod(dim=0)
                masks_color_cumul = masks_color[1:] * inv_alph_cumul
                masks_color_summand += masks_color_cumul.sum(dim=0)

            img_gpu = img_gpu * inv_alph_masks.prod(
                dim=0) + masks_color_summand

        if self.display_fps:
            # Draw the box for the fps on the GPU
            font_face = cv2.FONT_HERSHEY_DUPLEX
            font_scale = 0.6
            font_thickness = 1

            text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale,
                                             font_thickness)[0]

            img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6  # 1 - Box alpha

        # Then draw the stuff that needs to be done on the cpu
        # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
        img_numpy = (img_gpu * 255).byte().cpu().detach().numpy()

        if self.display_fps:
            # Draw the text on the CPU
            text_pt = (4, text_h + 2)
            text_color = [255, 255, 255]

            cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale,
                        text_color, font_thickness, cv2.LINE_AA)

        if num_dets_to_consider == 0:
            return img_numpy

        if self.display_text or self.display_bboxes:
            distance_boxes = []

            def all_subsets(ss):
                return chain(
                    *map(lambda x: combinations(ss, x), range(0,
                                                              len(ss) + 1)))

            def draw_distance(boxes):
                """
                    input : boxes(type=list)
                    Make all possible combinations between the detected boxes of persons
                    perform distance measurement between the boxes to measure distancing
                
                """
                red_counter = 0  ## Countting people who are in high risk
                green_counter = 0
                for subset in all_subsets(boxes):
                    if len(subset) == 2:
                        a = np.array((subset[0][2], subset[0][3]))
                        b = np.array((subset[1][2], subset[1][3]))
                        dist = np.linalg.norm(
                            a - b
                        )  ## Eucledian distance if you want differnt ways to measure distance b/w two boxes you can use the following options
                        # dist = spatial.distance.cosine(a, b)
                        # # print ('Eucledian distance is version-1', dist)
                        # # print ('Eucledian distance is', spatial.distance.euclidean(a, b))
                        # print ('Cosine distance is', dist)
                        if dist < 250:
                            red_counter += len(subset)
                            cv2.line(img_numpy, (subset[0][2], subset[0][3]),
                                     (subset[1][2], subset[1][3]), (0, 0, 255),
                                     lineThickness)

                        elif dist < 300:
                            green_counter += len(subset)
                            cv2.line(img_numpy, (subset[0][2], subset[0][3]),
                                     (subset[1][2], subset[1][3]), (0, 255, 0),
                                     lineThickness)
                    log["total_person_in_red_zone"] = red_counter // 2
                    log["total_person_in_green_zone"] = green_counter // 2

            for j in reversed(range(num_dets_to_consider)):
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)
                score = scores[j]

                if self.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if self.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    if _class == "person":
                        log["total_person"] = num_dets_to_consider
                        distance_boxes.append(boxes[j, :].tolist())
                        draw_distance(distance_boxes)

                    text_str = '%s: %.2f' % (
                        _class, score) if self.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1),
                                  (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

        return img_numpy
Exemple #13
0
def predictions_to_rois(dets_out, width, height, top_k, score_threshold,
                        output_polygons, mask_threshold, mask_nth,
                        output_minrect, view_margin, fully_connected,
                        fit_bbox_to_polygon, bbox_as_fallback, scale,
                        output_mask_image):
    """
    Turns the predictions into ROI objects
    :param dets_out: the predictions
    :param width: the width of the image
    :type width: int
    :param height: the height of the image
    :type height: int
    :param top_k: the maximum number of top predictions to use
    :type top_k: int
    :param score_threshold: the minimum score predictions have to have
    :type score_threshold: float
    :param output_polygons: whether the model predicts masks and polygons should be stored in the CSV files
    :type output_polygons: bool
    :param mask_threshold: the threshold to use for determining the contour of a mask
    :type mask_threshold: float
    :param mask_nth: to speed up polygon computation, use only every nth row and column from mask
    :type mask_nth: int
    :param output_minrect: when predicting polygons, whether to output the minimal rectangles around the objects as well
    :type output_minrect: bool
    :param view_margin: the margin in pixels to use around the masks
    :type view_margin: int
    :param fully_connected: whether regions of 'high' or 'low' values should be fully-connected at isthmuses
    :type fully_connected: str
    :param fit_bbox_to_polygon: whether to fit the bounding box to the polygon
    :type fit_bbox_to_polygon: bool
    :param bbox_as_fallback: if ratio between polygon-bbox and bbox is smaller than this value, use bbox as fallback polygon, ignored if < 0
    :type bbox_as_fallback: float
    :param scale: the scale to use for the image (0-1)
    :type scale: float
    :param output_mask_image: when generating masks, whether to output a combined mask image as well
    :type output_mask_image: bool
    :return: the list of ROIObjects and output_mask image
    :rtype: tuple
    """

    result = []
    mask_comb = None

    with timer.env('Postprocess'):
        save = cfg.rescore_bbox
        cfg.rescore_bbox = True
        t = postprocess(dets_out,
                        width,
                        height,
                        crop_masks=False,
                        score_threshold=score_threshold)
        cfg.rescore_bbox = save

    with timer.env('Copy'):
        idx = t[1].argsort(0, descending=True)[:top_k]
        if output_polygons or output_mask_image:
            classes, scores, boxes, masks = [x[idx].cpu().numpy() for x in t]
        else:
            classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

    num_dets_to_consider = min(top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < score_threshold:
            num_dets_to_consider = j
            break

    # the class labels
    if isinstance(cfg.dataset.class_names, list):
        class_labels = cfg.dataset.class_names
    elif isinstance(cfg.dataset.class_names, tuple):
        class_labels = list(cfg.dataset.class_names)
    else:
        class_labels = [cfg.dataset.class_names]

    if num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        if output_polygons or output_mask_image:
            masks = masks[:num_dets_to_consider, :, :, None]
            mask = masks[j, :, :][:, :, 0]

        for j in range(num_dets_to_consider):
            x0, y0, x1, y1 = boxes[j, :]
            x0n = x0 / width
            y0n = y0 / height
            x1n = x1 / width
            y1n = y1 / height
            if scale != 1.0:
                x0 = int(x0 / scale)
                y0 = int(y0 / scale)
                x1 = int(x1 / scale)
                y1 = int(y1 / scale)
            label = classes[j]
            score = scores[j]
            label_str = class_labels[classes[j]]
            px = None
            py = None
            pxn = None
            pyn = None
            bw = None
            bh = None
            if output_polygons:
                px = []
                py = []
                pxn = []
                pyn = []
                poly = mask_to_polygon(mask,
                                       mask_threshold=mask_threshold,
                                       mask_nth=mask_nth,
                                       view=(int(x0 * scale), int(y0 * scale),
                                             int(x1 * scale), int(y1 * scale)),
                                       view_margin=view_margin,
                                       fully_connected=fully_connected)
                if len(poly) > 0:
                    px, py = polygon_to_lists(poly[0],
                                              swap_x_y=True,
                                              normalize=False)
                    if scale != 1.0:
                        px = [x / scale for x in px]
                        py = [y / scale for y in py]
                    pxn, pyn = polygon_to_lists(poly[0],
                                                swap_x_y=True,
                                                normalize=True,
                                                img_width=width,
                                                img_height=height)
                    if output_minrect:
                        bw, bh = polygon_to_minrect(poly[0])
                        if scale != 1.0:
                            bw = bw / scale
                            bh = bh / scale
                    if bbox_as_fallback >= 0:
                        if len(px) >= 3:
                            p_x0n, p_y0n, p_x1n, p_y1n = polygon_to_bbox(
                                lists_to_polygon(pxn, pyn))
                            p_area = (p_x1n - p_x0n) * (p_y1n - p_y0n)
                            b_area = (x1n - x0n) * (y1n - y0n)
                            if (b_area > 0) and (p_area / b_area <
                                                 bbox_as_fallback):
                                px = [float(i) for i in [x0, x1, x1, x0]]
                                py = [float(i) for i in [y0, y0, y1, y1]]
                                pxn = [float(i) for i in [x0n, x1n, x1n, x0n]]
                                pyn = [float(i) for i in [y0n, y0n, y1n, y1n]]
                        else:
                            px = [float(i) for i in [x0, x1, x1, x0]]
                            py = [float(i) for i in [y0, y0, y1, y1]]
                            pxn = [float(i) for i in [x0n, x1n, x1n, x0n]]
                            pyn = [float(i) for i in [y0n, y0n, y1n, y1n]]
                        if output_minrect:
                            bw = x1 - x0 + 1
                            bh = y1 - y0 + 1
                    if fit_bbox_to_polygon:
                        if len(px) >= 3:
                            x0, y0, x1, y1 = polygon_to_bbox(
                                lists_to_polygon(px, py))
                            x0n, y0n, x1n, y1n = polygon_to_bbox(
                                lists_to_polygon(pxn, pyn))

            if output_mask_image:
                mask_img = mask.copy()
                # apply threshold
                mask_img[mask_img < mask_threshold] = 0
                # mask out everything outside detected box
                m = np.zeros(mask.shape)
                s = np.ones((y1 - y0, x1 - x0))
                m[y0:y0 + s.shape[0], x0:x0 + s.shape[1]] = s
                mask_img = np.where(m == 1, mask_img, 0)
                # use label for color
                mask_img[mask_img < mask_threshold] = 0
                mask_img[
                    mask_img >= mask_threshold] = label + 1  # first label is 0
                if mask_comb is None:
                    mask_comb = mask_img
                else:
                    tmp = np.where(mask_comb == 0, mask_img, mask_comb)
                    mask_comb = tmp

            roiobj = ROIObject(x0,
                               y0,
                               x1,
                               y1,
                               x0n,
                               y0n,
                               x1n,
                               y1n,
                               label,
                               label_str,
                               score=score,
                               poly_x=px,
                               poly_y=py,
                               poly_xn=pxn,
                               poly_yn=pyn,
                               minrect_w=bw,
                               minrect_h=bh)
            result.append(roiobj)

    return result, mask_comb
Exemple #14
0
def prep_display(dets_out,
                 img,
                 gt,
                 gt_masks,
                 h,
                 w,
                 undo_transform=True,
                 class_color=False):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    gt and gt_masks are also allowed to be none (until I reimplement that functionality).
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        torch.cuda.synchronize()

    with timer.env('Copy'):
        if cfg.eval_mask_branch:
            masks = t[3][:args.top_k]  # We'll need this later
        classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]]

    if classes.shape[0] == 0:
        return (img_gpu * 255).byte().cpu().numpy()

    def get_color(j):
        color = COLORS[(classes[j] * 5 if class_color else j * 5) %
                       len(COLORS)]
        if not undo_transform:
            color = (color[2], color[1], color[0])
        return color

    # Draw masks first on the gpu
    if args.display_masks and cfg.eval_mask_branch:
        for j in reversed(range(min(args.top_k, classes.shape[0]))):
            if scores[j] >= args.score_threshold:
                color = get_color(j)

                mask = masks[j, :, :, None]
                mask_color = mask @ (torch.Tensor(color).view(1, 3) / 255.0)
                mask_alpha = 0.45

                # Alpha only the region of the image that contains the mask
                img_gpu = img_gpu * (1 - mask) \
                        + img_gpu * mask * (1-mask_alpha) + mask_color * mask_alpha

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_text or args.display_bboxes:
        for j in reversed(range(min(args.top_k, classes.shape[0]))):
            score = scores[j]

            if scores[j] >= args.score_threshold:
                x1, y1, x2, y2 = boxes[j, :]
                color = get_color(j)

                if args.display_bboxes:
                    cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

                if args.display_text:
                    _class = cfg.dataset.class_names[classes[j]]
                    text_str = '%s: %.2f' % (
                        _class, score) if args.display_scores else _class

                    font_face = cv2.FONT_HERSHEY_DUPLEX
                    font_scale = 0.6
                    font_thickness = 1

                    text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                     font_scale,
                                                     font_thickness)[0]

                    text_pt = (x1, y1 - 3)
                    text_color = [255, 255, 255]

                    cv2.rectangle(img_numpy, (x1, y1),
                                  (x1 + text_w, y1 - text_h - 4), color, -1)
                    cv2.putText(img_numpy, text_str, text_pt, font_face,
                                font_scale, text_color, font_thickness,
                                cv2.LINE_AA)

    return img_numpy
Exemple #15
0
def evaluate(net: Yolact, dataset, train_mode=False):
    net.detect.use_fast_nms = args.fast_nms
    net.detect.use_cross_class_nms = args.cross_class_nms
    cfg.mask_proto_debug = args.mask_proto_debug

    if args.image is not None:
        if ':' in args.image:
            inp, out = args.image.split(':')
            evalimage(net, inp, out)
        else:
            evalimage(net, args.image)
        return
    elif args.images is not None:
        inp, out = args.images.split(':')
        evalimages(net, inp, out)
        return
    elif args.video is not None:
        if ':' in args.video:
            inp, out = args.video.split(':')
            evalvideo(net, inp, out)
        else:
            evalvideo(net, args.video)
        return

    frame_times = MovingAverage()
    dataset_size = len(dataset) if args.max_images < 0 else min(
        args.max_images, len(dataset))
    progress_bar = ProgressBar(30, dataset_size)

    print()

    if not args.display and not args.benchmark:
        # For each class and iou, stores tuples (score, isPositive)
        # Index ap_data[type][iouIdx][classIdx]
        ap_data = {
            'box': [[APDataObject() for _ in cfg.dataset.class_names]
                    for _ in iou_thresholds],
            'mask': [[APDataObject() for _ in cfg.dataset.class_names]
                     for _ in iou_thresholds]
        }
        detections = Detections()
    else:
        timer.disable('Load Data')

    dataset_indices = list(range(len(dataset)))

    if args.shuffle:
        random.shuffle(dataset_indices)
    elif not args.no_sort:
        # Do a deterministic shuffle based on the image ids
        #
        # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's
        # the order of insertion. That means on python 3.6, the images come in the order they are in
        # in the annotations file. For some reason, the first images in the annotations file are
        # the hardest. To combat this, I use a hard-coded hash function based on the image ids
        # to shuffle the indices we use. That way, no matter what python version or how pycocotools
        # handles the data, we get the same result every time.
        hashed = [badhash(x) for x in dataset.ids]
        dataset_indices.sort(key=lambda x: hashed[x])

    dataset_indices = dataset_indices[:dataset_size]

    try:
        # Main eval loop
        for it, image_idx in enumerate(dataset_indices):
            timer.reset()

            with timer.env('Load Data'):
                img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(
                    image_idx)

                # Test flag, do not upvote
                if cfg.mask_proto_debug:
                    with open('scripts/info.txt', 'w') as f:
                        f.write(str(dataset.ids[image_idx]))
                    np.save('scripts/gt.npy', gt_masks)

                batch = Variable(img.unsqueeze(0))
                if args.cuda:
                    batch = batch.cuda()

            with timer.env('Network Extra'):
                preds = net(batch)

            # Perform the meat of the operation here depending on our mode.
            if args.display:
                img_numpy = prep_display(preds, img, h, w)
            elif args.benchmark:
                prep_benchmark(preds, h, w)
            else:
                prep_metrics(ap_data, preds, img, gt, gt_masks, h, w,
                             num_crowd, dataset.ids[image_idx], detections)

            # First couple of images take longer because we're constructing the graph.
            # Since that's technically initialization, don't include those in the FPS calculations.
            if it > 1:
                frame_times.add(timer.total_time())

            if args.display:
                if it > 1:
                    print('Avg FPS: %.4f' % (1 / frame_times.get_avg()))
                plt.imshow(img_numpy)
                plt.title(str(dataset.ids[image_idx]))
                plt.show()
            elif not args.no_bar:
                if it > 1: fps = 1 / frame_times.get_avg()
                else: fps = 0
                progress = (it + 1) / dataset_size * 100
                progress_bar.set_val(it + 1)
                print(
                    '\rProcessing Images  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                    %
                    (repr(progress_bar), it + 1, dataset_size, progress, fps),
                    end='')

        if not args.display and not args.benchmark:
            print()
            if args.output_coco_json:
                print('Dumping detections...')
                if args.output_web_json:
                    detections.dump_web()
                else:
                    detections.dump()
            else:
                if not train_mode:
                    print('Saving data...')
                    with open(args.ap_data_file, 'wb') as f:
                        pickle.dump(ap_data, f)

                return calc_map(ap_data)
        elif args.benchmark:
            print()
            print()
            print('Stats for the last frame:')
            timer.print_stats()
            avg_seconds = frame_times.get_avg()
            print('Average: %5.2f fps, %5.2f ms' %
                  (1 / frame_times.get_avg(), 1000 * avg_seconds))

    except KeyboardInterrupt:
        print('Stopping...')
Exemple #16
0
    # GPU
    net = net.cuda()
    torch.set_default_tensor_type("torch.cuda.FloatTensor")

    x = torch.zeros((1, 3, cfg.max_size, cfg.max_size))
    y = net(x)

    for p in net.prediction_layers:
        print(p.last_conv_size)

    print()
    for k, a in y.items():
        print(k + ": ", a.size(), torch.sum(a))
    exit()

    net(x)
    # timer.disable('pass2')
    avg = MovingAverage()
    try:
        while True:
            timer.reset()
            with timer.env("everything else"):
                net(x)
            avg.add(timer.total_time())
            print("\033[2J")  # Moves console cursor to 0,0
            timer.print_stats()
            print("Avg fps: %.2f\tAvg ms: %.2f         " %
                  (1 / avg.get_avg(), avg.get_avg() * 1000))
    except KeyboardInterrupt:
        pass
Exemple #17
0
def prep_display(dets_out,
                 img,
                 h,
                 w,
                 undo_transform=True,
                 class_color=True,
                 mask_alpha=0.45,
                 fps_str=''):
    """
    Note: If undo_transform=False then im_h and im_w are allowed to be None.
    """
    if undo_transform:
        img_numpy = undo_image_transformation(img, w, h)
        img_gpu = torch.Tensor(img_numpy).cuda()
    else:
        img_gpu = img / 255.0
        h, w, _ = img.shape

    with timer.env('Postprocess'):
        t = postprocess(dets_out,
                        w,
                        h,
                        visualize_lincomb=args.display_lincomb,
                        crop_masks=args.crop,
                        score_threshold=args.score_threshold)
        torch.cuda.synchronize()

    with timer.env('Copy'):
        if cfg.eval_mask_branch:
            # Masks are drawn on the GPU, so don't copy
            masks = t[3][:args.top_k]
        classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]]
    num_dets_to_consider = min(args.top_k, classes.shape[0])
    for j in range(num_dets_to_consider):
        if scores[j] < args.score_threshold:
            num_dets_to_consider = j
            break

    # Quick and dirty lambda for selecting the color for a particular index
    # Also keeps track of a per-gpu color cache for maximum speed
    def get_color(j, on_gpu=None):
        global color_cache
        color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)
        #color_idx = classes[j]

        if on_gpu is not None and color_idx in color_cache[on_gpu]:
            return color_cache[on_gpu][color_idx]
        else:
            color = COLORS[color_idx]
            if not undo_transform:
                # The image might come in as RGB or BRG, depending
                color = (color[2], color[1], color[0])
            if on_gpu is not None:
                color = torch.Tensor(color).to(on_gpu).float() / 255.
                color_cache[on_gpu][color_idx] = color
            return color

    # First, draw the masks on the GPU where we can do it really fast
    # Beware: very fast but possibly unintelligible mask-drawing code ahead
    # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
    if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
        # After this, mask is of size [num_dets, h, w, 1]
        masks = masks[:num_dets_to_consider, :, :, None]

        # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
        colors = torch.cat([
            get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)
            for j in range(num_dets_to_consider)
        ],
                           dim=0)
        masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha

        # This is 1 everywhere except for 1-mask_alpha where the mask is
        inv_alph_masks = masks * (-mask_alpha) + 1

        # I did the math for this on pen and paper. This whole block should be equivalent to:
        #    for j in range(num_dets_to_consider):
        #        img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
        masks_color_summand = masks_color[0]
        if num_dets_to_consider > 1:
            inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider -
                                              1)].cumprod(dim=0)
            masks_color_cumul = masks_color[1:] * inv_alph_cumul
            masks_color_summand += masks_color_cumul.sum(dim=0)

        img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand

    if args.display_fps:
        # Draw the box for the fps on the GPU
        font_face = cv2.FONT_HERSHEY_DUPLEX
        font_scale = 0.6
        font_thickness = 1

        text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale,
                                         font_thickness)[0]

        img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6  # 1 - Box alpha

    # Then draw the stuff that needs to be done on the cpu
    # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
    img_numpy = (img_gpu * 255).byte().cpu().numpy()

    if args.display_fps:
        # Draw the text on the CPU
        text_pt = (4, text_h + 2)
        text_color = [255, 255, 255]

        cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale,
                    text_color, font_thickness, cv2.LINE_AA)

    if num_dets_to_consider == 0:
        return img_numpy

    if args.display_text or args.display_bboxes:
        for j in reversed(range(num_dets_to_consider)):
            x1, y1, x2, y2 = boxes[j, :]
            color = get_color(j)
            score = scores[j]

            if args.display_bboxes:
                cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)

            if args.display_text:
                _class = cfg.dataset.class_names[classes[j]]
                text_str = '%s: %.2f' % (
                    _class, score) if args.display_scores else _class

                font_face = cv2.FONT_HERSHEY_DUPLEX
                font_scale = 0.6
                font_thickness = 1

                text_w, text_h = cv2.getTextSize(text_str, font_face,
                                                 font_scale, font_thickness)[0]

                text_pt = (x1, y1 - 3)
                text_color = [255, 255, 255]

                cv2.rectangle(img_numpy, (x1, y1),
                              (x1 + text_w, y1 - text_h - 4), color, -1)
                cv2.putText(img_numpy, text_str, text_pt, font_face,
                            font_scale, text_color, font_thickness,
                            cv2.LINE_AA)

    return img_numpy
Exemple #18
0
    def forward(self, x):
        """ The input should be of size [batch_size, 3, img_h, img_w] """

        # plt.imshow(x.permute(0,2,3,1)[0,:,:,:].detach().cpu().numpy())
        # plt.savefig('visual_test/input.png')
        # plt.cla()

        with timer.env('backbone'):
            outs = self.backbone(x)

        if cfg.fpn is not None:
            with timer.env('fpn'):
                # Use backbone.selected_layers because we overwrote self.selected_layers
                outs = [outs[i] for i in cfg.backbone.selected_layers]
                outs = self.fpn(outs)

        proto_out = None
        if cfg.fpn_fusion is True:
            fusion_maps = self.fusion_module(
                outs[:self.fusion_layers]
            )  # fusion all levels feature map from map into single one

        if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch:
            with timer.env('proto'):
                proto_x = x if self.proto_src is None else outs[self.proto_src]

                # FPN Fusion
                if cfg.proto_src_fusion is True:
                    proto_x = fusion_maps

                if cfg.cross_attention_fusion is True:
                    P_query = outs[0]
                    proto_x = P_query

                    for layer in range(self.fusion_layers):
                        z = self.CALayer(x_query=P_query,
                                         x_key=outs[layer]) - P_query
                        proto_x = proto_x + z

                if self.num_grids > 0:
                    grids = self.grid.repeat(proto_x.size(0), 1, 1, 1)
                    proto_x = torch.cat([proto_x, grids], dim=1)

                if cfg.proto_coordconv:
                    proto_x = self.addcoords(proto_x)

                proto_out = self.proto_net(proto_x)
                proto_out = cfg.mask_proto_prototype_activation(proto_out)

                if cfg.mask_proto_prototypes_as_features:
                    # Clone here because we don't want to permute this, though idk if contiguous makes this unnecessary
                    proto_downsampled = proto_out.clone()

                    if cfg.mask_proto_prototypes_as_features_no_grad:
                        proto_downsampled = proto_out.detach()

                # Move the features last so the multiplication is easy
                proto_out = proto_out.permute(0, 2, 3, 1).contiguous()

                if cfg.mask_proto_bias:
                    bias_shape = [x for x in proto_out.size()]
                    bias_shape[-1] = 1
                    proto_out = torch.cat(
                        [proto_out, torch.ones(*bias_shape)], -1)

        with timer.env('pred_heads'):
            pred_outs = {'loc': [], 'conf': [], 'mask': [], 'priors': []}

            if cfg.use_instance_coeff:
                pred_outs['inst'] = []

            for idx, pred_layer in zip(self.selected_layers,
                                       self.prediction_layers):
                pred_x = outs[idx]

                if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_prototypes_as_features:
                    # Scale the prototypes down to the current prediction layer's size and add it as inputs
                    proto_downsampled = F.interpolate(
                        proto_downsampled,
                        size=outs[idx].size()[2:],
                        mode='bilinear',
                        align_corners=False)
                    pred_x = torch.cat([pred_x, proto_downsampled], dim=1)

                # A hack for the way dataparallel works
                if cfg.share_prediction_module and pred_layer is not self.prediction_layers[
                        0]:
                    pred_layer.parent = [self.prediction_layers[0]]

                if cfg.ins_coordconv:
                    pred_x = self.addcoords(pred_x)

                p = pred_layer(pred_x)

                for k, v in p.items():
                    pred_outs[k].append(v)

        # ===revised===
        num_priors = []
        for k, v in pred_outs.items():
            if k == 'loc':
                for _v in v:
                    num_priors.append(_v.size(1))
            pred_outs[k] = torch.cat(v, -2)
        pred_outs['layer'] = num_priors

        if proto_out is not None:
            pred_outs['proto'] = proto_out

        if self.training:

            # For the extra loss functions
            if cfg.use_class_existence_loss:
                pred_outs['classes'] = self.class_existence_fc(
                    outs[-1].mean(dim=(2, 3)))

            with timer.env('segm'):
                if cfg.use_semantic_segmentation_loss:
                    sem_in = None
                    if cfg.sem_src_fusion is True:
                        sem_in = fusion_maps
                    elif cfg.sem_lincomb is True:
                        sem_in = outs[-1]

                    if cfg.sem_coordconv:
                        sem_in = self.addcoords(sem_in)

                    pred_outs['segm'] = self.semantic_seg_conv(sem_in)
                    # pred_outs['segm'] = self.semantic_seg_conv(outs[-1]) #lincomb version

            return pred_outs
        else:
            if cfg.use_sigmoid_focal_loss:
                # Note: even though conf[0] exists, this mode doesn't train it so don't use it
                pred_outs['conf'] = torch.sigmoid(pred_outs['conf'])
            elif cfg.use_objectness_score:
                # See focal_loss_sigmoid in multibox_loss.py for details
                objectness = torch.sigmoid(pred_outs['conf'][:, :, 0])
                pred_outs['conf'][:, :,
                                  1:] = objectness[:, :, None] * F.softmax(
                                      pred_outs['conf'][:, :, 1:], -1)
                pred_outs['conf'][:, :, 0] = 1 - objectness
            else:
                pred_outs['conf'] = F.softmax(pred_outs['conf'], -1)

            if cfg.use_sem_output is True:
                sem_in = None
                if cfg.sem_src_fusion is True:
                    sem_in = fusion_maps
                elif cfg.sem_lincomb is True:
                    sem_in = outs[-1]

                if cfg.sem_coordconv:
                    sem_in = self.addcoords(sem_in)

                pred_outs['segm'] = self.semantic_seg_conv(sem_in)

            return self.detect(pred_outs)
Exemple #19
0
def _bbox_iou(bbox1, bbox2, iscrowd=False):
    with timer.env('BBox IoU'):
        ret = jaccard(bbox1, bbox2, iscrowd)
    return ret.cpu()
Exemple #20
0
def postprocess(det_output,
                w,
                h,
                batch_idx=0,
                interpolation_mode='bilinear',
                visualize_lincomb=False,
                crop_masks=True,
                score_threshold=0):
    """
    Postprocesses the output of Sewer on testing mode into a format that makes sense,
    accounting for all the possible configuration settings.

    Args:
        - det_output: The lost of dicts that Detect outputs.
        - w: The real with of the image.
        - h: The real height of the image.
        - batch_idx: If you have multiple images for this batch, the image's index in the batch.
        - interpolation_mode: Can be 'nearest' | 'area' | 'bilinear' (see torch.nn.functional.interpolate)

    Returns 4 torch Tensors (in the following order):
        - classes [num_det]: The class idx for each detection.
        - scores  [num_det]: The confidence score for each detection.
        - boxes   [num_det, 4]: The bounding box for each detection in absolute point form.
        - masks   [num_det, h, w]: Full image masks for each detection.
    """

    dets = det_output[batch_idx]
    net = dets['net']
    dets = dets['detection']

    if dets is None:
        return [torch.Tensor()
                ] * 4  # Warning, this is 4 copies of the same thing

    if score_threshold > 0:
        keep = dets['score'] > score_threshold

        for k in dets:
            if k != 'proto':
                dets[k] = dets[k][keep]

        if dets['score'].size(0) == 0:
            return [torch.Tensor()] * 4

    # Actually extract everything from dets now
    classes = dets['class']
    boxes = dets['box']
    scores = dets['score']
    masks = dets['mask']

    if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch:
        # At this points masks is only the coefficients
        proto_data = dets['proto']

        # Test flag, do not upvote
        if cfg.mask_proto_debug:
            np.save('scripts/proto.npy', proto_data.cpu().numpy())

        if visualize_lincomb:
            display_lincomb(proto_data, masks)

        masks = proto_data @ masks.t()
        masks = cfg.mask_proto_mask_activation(masks)

        # Crop masks before upsampling because you know why
        if crop_masks:
            masks = crop(masks, boxes)

        # Permute into the correct output shape [num_dets, proto_h, proto_w]
        masks = masks.permute(2, 0, 1).contiguous()

        if cfg.use_maskiou:
            with timer.env('maskiou_net'):
                with torch.no_grad():
                    maskiou_p = net.maskiou_net(masks.unsqueeze(1))
                    maskiou_p = torch.gather(
                        maskiou_p, dim=1,
                        index=classes.unsqueeze(1)).squeeze(1)
                    if cfg.rescore_mask:
                        if cfg.rescore_bbox:
                            scores = scores * maskiou_p
                        else:
                            scores = np.concatenate(
                                (scores, scores * maskiou_p))

        # Scale masks up to the full image
        masks = F.interpolate(masks.unsqueeze(0), (h, w),
                              mode=interpolation_mode,
                              align_corners=False).squeeze(0)

        # Binarize the masks
        masks.gt_(0.5)

    boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0],
                                                    boxes[:, 2],
                                                    w,
                                                    cast=False)
    boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1],
                                                    boxes[:, 3],
                                                    h,
                                                    cast=False)
    boxes = boxes.long()

    if cfg.mask_type == mask_type.direct and cfg.eval_mask_branch:
        # Upscale masks
        full_masks = torch.zeros(masks.size(0), h, w)

        for jdx in range(masks.size(0)):
            x1, y1, x2, y2 = boxes[jdx, :]

            mask_w = x2 - x1
            mask_h = y2 - y1

            # Just in case
            if mask_w * mask_h <= 0 or mask_w < 0:
                continue

            mask = masks[jdx, :].view(1, 1, cfg.mask_size, cfg.mask_size)
            mask = F.interpolate(mask, (mask_h, mask_w),
                                 mode=interpolation_mode,
                                 align_corners=False)
            mask = mask.gt(0.5).float()
            full_masks[jdx, y1:y2, x1:x2] = mask

        masks = full_masks

    return classes, scores, boxes, masks
def image_callback(image_data):
    time_start = time.time()
    global cv_image
    cv_image = np.frombuffer(image_data.data,
                             dtype=np.uint8).reshape(image_data.height,
                                                     image_data.width, -1)

    # region_output是8行4列数组,第i行存储第i个区域的信息
    # 每行的第1列为污染等级(0,1,2,3,4)、第2列为植被类型(0无,1草,2灌木,3花)、第3列为行人标志(0无,1有)、第4列为区域ID(1,2,3,4,5,6,7,8)
    region_output = np.zeros((8, 4))
    for region_i in range(8):
        region_output[region_i, 3] = region_i + 1

    with torch.no_grad():
        # 目标检测
        frame = torch.from_numpy(cv_image).cuda().float()
        batch = FastBaseTransform()(frame.unsqueeze(0))
        preds = net(batch)

        # 建立每个目标的蒙版target_masks、类别target_classes、置信度target_scores、边界框target_boxes的一一对应关系
        h, w, _ = frame.shape
        with timer.env('Postprocess'):
            save = cfg.rescore_bbox
            cfg.rescore_bbox = True
            # 检测结果
            t = postprocess(preds,
                            w,
                            h,
                            visualize_lincomb=args.display_lincomb,
                            crop_masks=args.crop,
                            score_threshold=args.score_threshold)
            cfg.rescore_bbox = save
        with timer.env('Copy'):
            idx = t[1].argsort(0, descending=True)[:args.top_k]
            if cfg.eval_mask_branch:
                # Masks are drawn on the GPU, so don't copy
                masks = t[3][idx]
            classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]

        num_dets_to_consider = min(args.top_k, classes.shape[0])
        for j in range(num_dets_to_consider):
            if scores[j] < args.score_threshold:
                num_dets_to_consider = j
                break

        if num_dets_to_consider > 0:
            target_masks = masks[:num_dets_to_consider, :, :]
            target_classes = classes[:num_dets_to_consider]
            target_scores = scores[:num_dets_to_consider]
            target_boxes = boxes[:num_dets_to_consider, :]

            # 显示检测结果
            if display_switch:
                result_image = result_display(frame, target_masks,
                                              target_classes, target_scores,
                                              target_boxes,
                                              num_dets_to_consider)
            else:
                result_image = frame.byte().cpu().numpy()

            # 分别存储垃圾目标和植被目标
            check_k = 0
            rubbish_remain_list = []
            vegetation_remain_list = []
            rubbish_items = [
                'ads', 'cigarette', 'firecracker', 'glass bottle', 'leaves',
                'metal', 'paper', 'peel', 'plastic', 'solid clod',
                'solid crumb'
            ]
            vegetation_items = ['grass', 'shrub', 'flower']
            while check_k < target_classes.shape[0]:
                if cfg.dataset.class_names[
                        target_classes[check_k]] in rubbish_items:
                    rubbish_remain_list.append(check_k)
                if cfg.dataset.class_names[
                        target_classes[check_k]] in vegetation_items:
                    vegetation_remain_list.append(check_k)
                check_k += 1

            rubbish_masks = target_masks[rubbish_remain_list, :, :]
            rubbish_classes = target_classes[rubbish_remain_list]
            rubbish_scores = target_scores[rubbish_remain_list]
            rubbish_boxes = target_boxes[rubbish_remain_list, :]

            vegetation_masks = target_masks[vegetation_remain_list, :, :]
            vegetation_classes = target_classes[vegetation_remain_list]
            vegetation_scores = target_scores[vegetation_remain_list]
            vegetation_boxes = target_boxes[vegetation_remain_list, :]

            rubbsih_num = len(rubbish_remain_list)
            vegetation_num = len(vegetation_remain_list)

            # 针对垃圾目标的处理
            if rubbsih_num > 0:
                # 掩膜边界取点
                result_image, rubbish_boundary_pts = get_boundary(
                    result_image, rubbsih_num, rubbish_masks, cpt_num=10)
                # s_polygon存储每个垃圾目标在世界坐标系中投影于地面的面积
                s_polygon = np.zeros((rubbsih_num, 1))
                rubbish_list = [
                    'ads', 'cigarette', 'firecracker', 'glass bottle',
                    'leaves', 'metal', 'paper', 'peel', 'plastic',
                    'solid clod', 'solid crumb'
                ]
                rubbish_weight_coefficient_list = [
                    80, 200, 200, 8000, 80, 1050, 80, 6000, 775, 15750, 4000
                ]
                # region_w存储各区域内垃圾目标的质量的总和
                region_w = np.zeros((8, 1))
                for i in range(rubbish_boundary_pts.shape[0]):
                    effective_pt_num = 0
                    b_x, b_z = [], []
                    b_area_id = []
                    for b_pt in range(rubbish_boundary_pts.shape[1]):
                        b_pt_u = rubbish_boundary_pts[i, b_pt, 0, 0]
                        b_pt_v = rubbish_boundary_pts[i, b_pt, 0, 1]
                        # 排除像素坐标无效点(u=0,v=0)
                        if b_pt_u or b_pt_v:
                            loc_b_pt = p2d_table[b_pt_u, b_pt_v]
                            # 排除世界坐标无效点(x=0,z=0)
                            if loc_b_pt[0] or loc_b_pt[1]:
                                effective_pt_num += 1
                                b_x.append(loc_b_pt[0])
                                b_z.append(loc_b_pt[1])
                                b_area_id.append(
                                    CameraT.whatArea(loc_b_pt[0], loc_b_pt[1]))
                    if effective_pt_num >= 3:
                        s_sum = 0
                        for b_pt in range(effective_pt_num):
                            s_sum += b_x[b_pt] * b_z[
                                (b_pt + 1) %
                                effective_pt_num] - b_z[b_pt] * b_x[
                                    (b_pt + 1) % effective_pt_num]
                        s_polygon[i, 0] = abs(s_sum) / 2
                        for b_pt in range(effective_pt_num):
                            # 排除区域ID无效点(ID=0)
                            if b_area_id[b_pt]:
                                rubbish_weight = s_polygon[
                                    i, 0] * rubbish_weight_coefficient_list[
                                        rubbish_list.index(
                                            cfg.dataset.class_names[
                                                rubbish_classes[i]])]
                                region_w[
                                    b_area_id[b_pt] - 1,
                                    0] += rubbish_weight / effective_pt_num

                # 界定污染等级
                for region_i in range(8):
                    if region_w[region_i, 0] > 0 and region_w[region_i,
                                                              0] <= 50:
                        region_output[region_i, 0] = 1
                    elif region_w[region_i, 0] > 50 and region_w[region_i,
                                                                 0] <= 100:
                        region_output[region_i, 0] = 2
                    elif region_w[region_i, 0] > 100 and region_w[region_i,
                                                                  0] <= 150:
                        region_output[region_i, 0] = 3
                    elif region_w[region_i, 0] > 150:
                        region_output[region_i, 0] = 4
                if display_switch:
                    print('region_w')
                    print(region_w)
                    result_image = w_display(result_image,
                                             region_w,
                                             font_face=cv2.FONT_HERSHEY_DUPLEX,
                                             font_scale=0.5,
                                             font_thickness=1)

            # 针对植被目标的处理
            if vegetation_num > 0:
                # 掩膜边界取点
                result_image, vegetation_boundary_pts = get_boundary(
                    result_image, vegetation_num, vegetation_masks, cpt_num=20)
                # region_vegetation_type存储各区域内植被类型
                region_vegetation_type = np.zeros((8, 1))
                for i in range(vegetation_boundary_pts.shape[0]):
                    effective_pt_num = 0
                    b_area_id = []
                    for b_pt in range(vegetation_boundary_pts.shape[1]):
                        b_pt_u = vegetation_boundary_pts[i, b_pt, 0, 0]
                        b_pt_v = vegetation_boundary_pts[i, b_pt, 0, 1]
                        # 排除像素坐标无效点(u=0,v=0)
                        if b_pt_u or b_pt_v:
                            loc_b_pt = p2d_table[b_pt_u, b_pt_v]
                            # 排除世界坐标无效点(x=0,z=0)
                            if loc_b_pt[0] or loc_b_pt[1]:
                                effective_pt_num += 1
                                b_area_id.append(
                                    CameraT.whatArea(loc_b_pt[0], loc_b_pt[1]))
                    for b_pt in range(effective_pt_num):
                        # 排除区域ID无效点(ID=0)
                        if b_area_id[b_pt]:
                            # 优先级顺序
                            vegetation_list = ['grass', 'shrub', 'flower']
                            v_type = vegetation_list.index(
                                cfg.dataset.class_names[
                                    vegetation_classes[i]]) + 1
                            current_v_type = region_vegetation_type[
                                b_area_id[b_pt] - 1, 0]
                            if v_type > current_v_type:
                                region_vegetation_type[b_area_id[b_pt] - 1,
                                                       0] = v_type

                for region_i in range(8):
                    region_output[region_i,
                                  1] = region_vegetation_type[region_i, 0]

        else:
            result_image = frame.byte().cpu().numpy()

    areasinfo_msg = AreasInfo()
    for region_i in range(8):
        region_output_msg = AreaInfo()
        region_output_msg.rubbish_grade = int(region_output[region_i, 0])
        region_output_msg.has_person = bool(region_output[region_i, 2])
        region_output_msg.vegetation_type = int(region_output[region_i, 1])
        region_output_msg.area_id = int(region_output[region_i, 3])
        areasinfo_msg.infos.append(region_output_msg)
    pub.publish(areasinfo_msg)

    if display_switch:
        print('region_output')
        print(region_output)
        result_image = CameraT.drawLine(result_image, w=1)
        result_image = output_display(result_image,
                                      region_output,
                                      font_face=cv2.FONT_HERSHEY_DUPLEX,
                                      font_scale=0.5,
                                      font_thickness=1)
        cv2.putText(result_image, str(time.time()), (5, 20),
                    cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
        cv2.imshow("result_image", result_image)
    if record_switch:
        video_out.write(result_image)

    if cv2.waitKey(1) == 27:
        if record_switch:
            video_out.release()
        cv2.destroyAllWindows()
        rospy.signal_shutdown("It's over.")

    time_end_all = time.time()
    print("totally time cost:", time_end_all - time_start)
Exemple #22
0
    # detect images
    if args.image is not None:
        images = glob.glob(args.image + '/*.jpg')
        num = len(images)

        for i, one_img in enumerate(images):
            img_name = one_img.split('/')[-1]
            img_origin = torch.from_numpy(cv2.imread(one_img)).cuda().float()
            img_h, img_w = img_origin.shape[0], img_origin.shape[1]
            img_trans = FastBaseTransform()(img_origin.unsqueeze(0))
            net_outs = net(img_trans)
            nms_outs = NMS(net_outs, args.traditional_nms)

            show_lincomb = bool(args.show_lincomb and args.image_path)
            with timer.env('after nms'):
                results = after_nms(nms_outs,
                                    img_h,
                                    img_w,
                                    show_lincomb=show_lincomb,
                                    crop_masks=not args.no_crop,
                                    visual_thre=args.visual_thre,
                                    img_name=img_name)

                torch.cuda.synchronize()

            img_numpy = draw_img(results, img_origin, args)

            cv2.imwrite(f'{img_path}/{img_name}', img_numpy)
            print(f'{i + 1}/{num}', end='\r')