Example #1
0
    def interpret_out(self, out, img_w, img_h):
        num_classes = self._num_classes
        iou_threshould = self._iou_threshold
        threshold = self._threshold

        pre_boxes = [boxes[:, :, :, :, :4] for boxes in out]
        pre_boxes = [np.reshape(boxes, boxes.shape[1:]) \
            for boxes in pre_boxes]
        pre_boxes = self.coordinate_transfer(img_w, img_h, pre_boxes)

        pre_clses = [clses[:, :, :, :, 4:] for clses in out]
        pre_clses = [np.reshape(clses, (-1, clses.shape[-1])) \
            for clses in pre_clses]
        pre_clses = np.vstack(pre_clses)

        res = {}
        assert len(pre_boxes) == len(pre_clses)
        max_inds = np.argmax(pre_clses, axis=1)
        keep_inds = np.where(max_inds != 0)
        pre_boxes = pre_boxes[keep_inds]
        print(keep_inds)
        pre_clses = pre_clses[keep_inds]
        scores = np.exp(pre_clses)  \
            / np.sum(np.exp(pre_clses), axis=1).reshape([-1, 1])
        print("len:", len(pre_boxes))
        for i in range(num_classes)[1:]:
            keep_inds = np.where(scores[:, i] >= threshold)[0]
            print(keep_inds)
            dets = np.hstack([
                pre_boxes[keep_inds], scores[:, i][keep_inds].reshape([-1, 1])
            ])
            keep_inds = nms(dets, iou_threshould)
            if len(keep_inds) > 0:
                res[str(i)] = dets[keep_inds]
        return res
Example #2
0
    def detect(self, img):
        h, w = img.shape[:2]
        inp = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        inp = cv2.resize(inp, (self.input_size, self.input_size))
        inp = (inp - 127.5) / 128.0

        # shape of y_pred: (?, num_boxes, 4 + num_classes)
        outs = self.sess.run(self.net.prediction,
                             feed_dict={self.inputs: np.array([inp])})[0]
        boxes = outs[:, :4]
        preds = outs[:, 4:]
        decoded_boxes = self.decode_boxes(boxes)

        boxes = []
        for box, pred in zip(decoded_boxes, preds):
            xmin, ymin, xmax, ymax = box
            clsid = np.argmax(pred)
            if clsid == 0:
                # in the case of background
                continue
            clsid -= 1  # decrement to skip background class
            prob = np.max(pred)
            if prob < self.threshold:
                continue
            left = xmin * w
            top = ymin * h
            right = xmax * w
            bottom = ymax * h
            boxes.append([clsid, prob, left, top, right, bottom])

        if len(boxes) > 0:
            return nms(boxes)
        else:
            return {}
Example #3
0
    def detect(self, img):
        img_h, img_w = img.shape[:2]
        img = self.preprocess(img)

        outs = self.sess.run(self.net.prediction, feed_dict={self.inputs: np.array([img])})[0]

        # shape of y_pred: (?, num_boxes, 4 + num_classes)
        boxes = outs[:, :4]
        preds = outs[:, 4:]
        decoded_boxes = self.decode_boxes(boxes)

        results = []
        for box, pred in zip(decoded_boxes, preds):
            xmin, ymin, xmax, ymax = box
            clsid = np.argmax(pred)
            if clsid == 0:
                # in the case of background
                continue
            clsid -= 1 # decrement to skip background class
            prob = np.max(pred)
            left = xmin * img_w
            top = ymin * img_h
            right = xmax * img_w
            bottom = ymax * img_h
            results.append([clsid, prob, left, top, right, bottom])

        if len(results) > 0:
            return nms(results, self.threshold)
        else:
            return {}
def get_crop_images(feature_map, im, pixel_threshold=0.9, quiet=True):
    shape = im.size
    d_wight, d_height = resize_image(im, MAX_IMAGE_SIZE)
    scale_ratio_w = d_wight / im.width
    scale_ratio_h = d_height / im.height
    y = feature_map
    y = np.squeeze(y, axis=0)
    y[:, :, :3] = sigmoid(y[:, :, :3])
    cond = np.greater_equal(y[:, :, 0], pixel_threshold)
    activation_pixels = np.where(cond)
    quad_scores, quad_after_nms = nms(y, activation_pixels)

    txt_items = []
    for score, geo in zip(quad_scores, quad_after_nms):
        if np.amin(score) > 0:
            rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
            rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist()
            txt_item = list(map(int, rescaled_geo_list))
            ploy = [[txt_item[0], txt_item[1]], [txt_item[6], txt_item[7]],
                    [txt_item[4], txt_item[5]], [txt_item[2], txt_item[3]]]
            txt_items.append(ploy)
        elif not quiet:
            print('quad invalid with vertex num less then 4.')
    crop_images, ploys = rotate.rotate_img(txt_items, np.array(im))
    return crop_images, ploys, shape
Example #5
0
 def detect_from_image(self, session, full_image, visualize=False):
     # step1: preprocess, image resize, grayscale, equalizeHist
     image, _ = self.resize(full_image)
     image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
     image_gray = cv2.equalizeHist(image_gray)
     # face detect
     face_regions = self.face_detector.detectMultiScale(image_gray,
                                                        1.05,
                                                        4,
                                                        minSize=(60, 60))
     # eyes detect based on face region
     eyes_regions = []
     for (x, y, w, h) in face_regions:
         face_image = image_gray[y:y + h, x:x + w]
         # resize face, transform to high resolution
         face_image, scale = self.resize(face_image)
         # first eyes detection
         eyes_roi = self.eyes_detector.detectMultiScale(face_image,
                                                        1.05,
                                                        2,
                                                        minSize=(60, 60),
                                                        maxSize=(120, 120))
         # second eyes detection
         for (ex, ey, ew, eh) in eyes_roi:
             eye = face_image[ey:ey + eh, ex:ex + ew]
             pred = self.eyes_selector.predict(session, eye)
             if pred == 1:
                 eyes_regions.append([
                     x + int(ex / scale), y + int(ey / scale),
                     int(ew / scale),
                     int(eh / scale)
                 ])
     # apply nms reduce bbox
     eyes_regions = nms(eyes_regions, thres=0.5)
     # visulize
     if visualize:
         # plot
         for (x, y, w, h) in face_regions:
             cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)
         for (x, y, w, h) in eyes_regions:
             cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
         # 显示标定框
         cv2.imshow("eye detect", image)
         cv2.waitKey(5)
     # post precess, generate left/right eye boundingbox
     # detect bbox more than 2, return None
     if len(eyes_regions) != 2:
         return None, None
     # convert to (left, right) pair format
     eyes_bndbox = sorted(eyes_regions,
                          key=lambda bbox: bbox[0] + bbox[2] // 2)
     eyes_region = [
         cv2.resize(image_gray[y:y + h, x:x + w],
                    (self.image_size, self.image_size))
         for (x, y, w, h) in eyes_bndbox
     ]
     return eyes_bndbox, eyes_region
Example #6
0
    def interpret_output(self, output):
        # NOTE: duplicate code here

        class_prob = output[0:self._boundary1]
        class_prob = np.reshape(
            class_prob, [self._cell_size, self._cell_size, self._num_classes])

        scales = output[self._boundary1:self._boundary2]
        scales = np.reshape(
            scales, [self._cell_size, self._cell_size, self._boxes_per_cell])

        boxes = output[self._boundary2:]
        boxes = np.reshape(
            boxes, [self._cell_size, self._cell_size, self._boxes_per_cell, 4])

        offset = np.arange(self._cell_size)     \
            * self._cell_size * self._boxes_per_cell
        offset = np.reshape(offset,     \
            [self._boxes_per_cell, self._cell_size, self._cell_size])
        offset = np.transpose(offset, [1, 2, 0])

        # boxes[:, :, :, 0] += offset
        # boxes[:, :, :, 1] += np.transpose(offset, (1, 0, 2))
        boxes[:, :, :, :2] = 1.0 * boxes[:, :, :, :2] / self._cell_size
        boxes[:, :, :, 2:] = np.square(boxes[:, :, :, 2:])
        # duplicate code here
        boxes *= self._image_size
        self.coordinate_transfer(boxes)

        probs = np.zeros((self._cell_size, self._cell_size,
                          self._boxes_per_cell, self._num_classes))

        for i in range(self._boxes_per_cell):
            for j in range(self._num_classes):
                tmp = scales[:, :, i] * class_prob[:, :, j]
                if tmp < self._thresh_hold:
                    probs[:, :, i, j] = 0.0
                else:
                    probs[:, :, i, j] = tmp

        probs = np.transpose(probs, (3, 0, 1, 2))
        probs = np.reshape(probs, (self._num_classes
            , self._cell_size * self._cell_size     \
                * self._boxes_per_cell))
        boxes = np.reshape(
            boxes,
            (self._cell_size * self._cell_size * self._boxes_per_cell, 4))
        res = {}
        for i in range(len(self._classes)):
            prob = np.reshape(
                probs[i],
                [self._cell_size * self._cell_size * self._boxes_per_cell, 1])
            dets = np.hstack(boxes, prob)
            keep_inds = nms(dets, self._iou_thresh_hold)
            res[str[i]] = dets[keep_inds]
        return res
Example #7
0
    def visualize_heatmaps(self, img, cls_map, reg_map, clusters, prob_thresh=1, nms_thresh=1, iou=None):
        """
        Expect cls_map and reg_map to be of the form HxWxC
        """
        fy, fx, fc = np.where(cls_map >= prob_thresh)

        # print(iou.shape)
        # best_iou = iou.max(axis=3)
        # print(best_iou.shape)
        # fy, fx, fc = np.where(best_iou >= 0.5)  # neg thresh

        cy, cx = fy*self.sty + self.ofy, fx*self.stx + self.ofx
        cw = clusters[fc, 2] - clusters[fc, 0] + 1
        ch = clusters[fc, 3] - clusters[fc, 1] + 1

        # box_ovlp = best_iou[fc, fy, fx]
        num_clusters = clusters.shape[0]

        # refine bounding box
        tx = reg_map[:, :, 0*num_clusters:1*num_clusters]
        ty = reg_map[:, :, 1*num_clusters:2*num_clusters]
        tw = reg_map[:, :, 2*num_clusters:3*num_clusters]
        th = reg_map[:, :, 3*num_clusters:4*num_clusters]

        dcx = cw * tx[fy, fx, fc]
        dcy = ch * ty[fy, fx, fc]

        rx = cx + dcx
        ry = cy + dcy

        rw = cw * np.exp(tw[fy, fx, fc])
        rh = ch * np.exp(th[fy, fx, fc])

        bboxes = np.array([np.abs(rx-rw/2), np.abs(ry-rh/2), rx+rw/2, ry+rh/2]).T

        scores = cls_map[fy, fx, fc]

        dets = np.hstack((bboxes, scores[:, np.newaxis]))
        keep = nms(dets, nms_thresh)
        bboxes = dets[keep][:, 0:4]
        # bbox_iou = best_iou[fy, fx, fc]

        # print("Best bounding box", bboxes)
        # print(bboxes.shape)

        print("Number of bboxes ", bboxes.shape[0])
        for idx, bbox in enumerate(bboxes):
            bbox = np.round(np.array(bbox))
            print(bbox)
            # img = draw_bounding_box(img, bbox, {"name": "car {0}".format(np.around(bbox_iou[idx], decimals=2))})
            img = draw_bounding_box(img, bbox, {"name": "car {0}".format(idx)})

            # if idx == 20:
            #     break

        img.show(title="Heatmap visualized")
Example #8
0
    def interpret_output(self, img_w, img_h, output):
        # NOTE: duplicate code here

        output = np.reshape(output, output.shape[-1])
        class_prob = output[0:self._boundary1]
        class_prob = np.reshape(class_prob  \
            , [self._cell_size, self._cell_size, self._num_classes])

        scales = output[self._boundary1:self._boundary2]
        scales = np.reshape(
            scales, [self._cell_size, self._cell_size, self._boxes_per_cell])

        boxes = output[self._boundary2:]
        boxes = np.reshape(
            boxes, [self._cell_size, self._cell_size, self._boxes_per_cell, 4])

        offset = [np.arange(self._cell_size)]     \
            * self._cell_size * self._boxes_per_cell
        offset = np.reshape(offset,     \
            [self._boxes_per_cell, self._cell_size, self._cell_size])
        offset = np.transpose(offset, [1, 2, 0])

        boxes[:, :, :, 0] += offset
        boxes[:, :, :, 1] += np.transpose(offset, (1, 0, 2))
        boxes[:, :, :, :2] = 1.0 * boxes[:, :, :, :2] / self._cell_size
        boxes[:, :, :, 2:] = np.square(boxes[:, :, :, 2:])
        # duplicate code here
        boxes *= self._image_size
        self.coordinate_transfer(img_w, img_h, boxes)

        probs = np.zeros((self._cell_size, self._cell_size,
                          self._boxes_per_cell, self._num_classes))

        for i in range(self._boxes_per_cell):
            for j in range(self._num_classes):
                tmp = scales[:, :, i] * class_prob[:, :, j]
                probs[:, :, i, j] = tmp * (tmp >= cfg.THRESHOLD)

        probs = np.transpose(probs, (3, 0, 1, 2))
        probs = np.reshape(probs, (self._num_classes
            , self._cell_size * self._cell_size     \
                * self._boxes_per_cell))
        boxes = np.reshape(
            boxes,
            (self._cell_size * self._cell_size * self._boxes_per_cell, 4))
        res = {}
        for i in range(len(self._classes)):
            prob = np.reshape(
                probs[i],
                [self._cell_size * self._cell_size * self._boxes_per_cell, 1])
            dets = np.hstack([boxes, prob])
            keep_inds = nms(dets, self._iou_thresh_hold)
            if len(keep_inds) > 0:
                res[str(i)] = dets[keep_inds]
        return res
def cones_detection(target_path, output_path, model, device, conf_thres,
                    nms_thres):

    img = Image.open(target_path).convert('RGB')
    w, h = img.size
    new_width, new_height = model.img_size()
    pad_h, pad_w, ratio = calculate_padding(h, w, new_height, new_width)
    img = torchvision.transforms.functional.pad(img,
                                                padding=(pad_w, pad_h, pad_w,
                                                         pad_h),
                                                fill=(127, 127, 127),
                                                padding_mode="constant")
    img = torchvision.transforms.functional.resize(img,
                                                   (new_height, new_width))

    bw = model.get_bw()
    if bw:
        img = torchvision.transforms.functional.to_grayscale(
            img, num_output_channels=1)

    img = torchvision.transforms.functional.to_tensor(img)
    img = img.unsqueeze(0)

    with torch.no_grad():
        model.eval()
        img = img.to(device, non_blocking=True)
        # output,first_layer,second_layer,third_layer = model(img)
        output = model(img)

        for detections in output:
            detections = detections[detections[:, 4] > conf_thres]
            box_corner = torch.zeros((detections.shape[0], 4),
                                     device=detections.device)
            xy = detections[:, 0:2]
            wh = detections[:, 2:4] / 2
            box_corner[:, 0:2] = xy - wh
            box_corner[:, 2:4] = xy + wh
            probabilities = detections[:, 4]
            nms_indices = nms(box_corner, probabilities, nms_thres)
            main_box_corner = box_corner[nms_indices]
            if nms_indices.shape[0] == 0:
                continue

        pred_boxes = []
        for i in range(len(main_box_corner)):
            x0 = main_box_corner[i, 0].to('cpu').item() / ratio - pad_w
            y0 = main_box_corner[i, 1].to('cpu').item() / ratio - pad_h
            x1 = main_box_corner[i, 2].to('cpu').item() / ratio - pad_w
            y1 = main_box_corner[i, 3].to('cpu').item() / ratio - pad_h
            box = [x0, y0, x1, y1]
            pred_boxes.append(box)

        return pred_boxes
def get_detections(model, img, templates, rf, img_transforms, prob_thresh=0.65, nms_thresh=0.3, device=None):
    model = model.to(device)
    model.eval()

    dets = np.empty((0, 6))  # store bbox (x1, y1, x2, y2), score and scale

    num_templates = templates.shape[0]

    # Evaluate over multiple scale
    scales_list = [2 ** x for x in [-1, 0, 1]]

    # convert tensor to PIL image so we can perform resizing
    image = transforms.functional.to_pil_image(img[0])

    min_side = np.min(image.size)

    for s, scale in enumerate(scales_list):
        # scale the images
        scaled_image = transforms.functional.resize(image,
                                                    np.int(min_side*scale))

        # normalize the images
        img = img_transforms(scaled_image)

        # add batch dimension
        img.unsqueeze_(0)

        # now run the model
        x = img.float().to(device)

        output = model(x)

        # first `num_templates` channels are class maps
        score_cls = torch.sigmoid(output[:, :num_templates, :, :])
        score_cls = score_cls.data.cpu().numpy().transpose((0, 2, 3, 1))

        score_reg = output[:, num_templates:, :, :]
        score_reg = score_reg.data.cpu().numpy().transpose((0, 2, 3, 1))

        t_bboxes, scores = get_bboxes(score_cls, score_reg,
                                      templates, prob_thresh, rf, scale)

        scales = np.ones((t_bboxes.shape[0], 1)) / scale
        # append scores at the end for NMS
        d = np.hstack((t_bboxes, scores, scales))

        dets = np.vstack((dets, d))

    # Apply NMS
    keep = nms(dets, nms_thresh)
    dets = dets[keep]

    return dets
Example #11
0
    def detect(self, cv_img):
        cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
        img = img_pil.fromarray(cv_img)
        w, h = img.size
        new_width, new_height = self.model.img_size()
        pad_h, pad_w, ratio = calculate_padding(h, w, new_height, new_width)
        img = torchvision.transforms.functional.pad(img,
                                                    padding=(pad_w, pad_h,
                                                             pad_w, pad_h),
                                                    fill=(127, 127, 127),
                                                    padding_mode="constant")
        img = torchvision.transforms.functional.resize(img,
                                                       (new_height, new_width))

        bw = self.model.get_bw()
        if bw:
            img = torchvision.transforms.functional.to_grayscale(
                img, num_output_channels=1)

        img = torchvision.transforms.functional.to_tensor(img)
        img = img.unsqueeze(0)

        with torch.no_grad():
            self.model.eval()
            img = img.to(self.device, non_blocking=True)
            # output,first_layer,second_layer,third_layer = model(img)
            output = self.model(img)

            for detections in output:
                detections = detections[detections[:, 4] > self.conf_thres]
                box_corner = torch.zeros((detections.shape[0], 4),
                                         device=detections.device)
                xy = detections[:, 0:2]
                wh = detections[:, 2:4] / 2
                box_corner[:, 0:2] = xy - wh
                box_corner[:, 2:4] = xy + wh
                probabilities = detections[:, 4]
                nms_indices = nms(box_corner, probabilities, self.nms_thres)
                main_box_corner = box_corner[nms_indices]
                if nms_indices.shape[0] == 0:
                    continue
            bboxes = []
            for i in range(len(main_box_corner)):
                x0 = main_box_corner[i, 0].to('cpu').item() / ratio - pad_w
                y0 = main_box_corner[i, 1].to('cpu').item() / ratio - pad_h
                x1 = main_box_corner[i, 2].to('cpu').item() / ratio - pad_w
                y1 = main_box_corner[i, 3].to('cpu').item() / ratio - pad_h
                bboxes.append([x0, y0, x1, y1])

        return bboxes
def apply_nms(confidence_map, hmap, wmap, dotmap_pred_downscale=2, thresh=0.3):

    nms_conf_map, nms_conf_box = extract_conf_points(
        [confidence_map[0], confidence_map[1]], [hmap[0], hmap[1]])
    nms_conf_map, nms_conf_box = extract_conf_points(
        [confidence_map[2], nms_conf_map], [hmap[2], nms_conf_box])
    nms_conf_map, nms_conf_box = extract_conf_points(
        [confidence_map[3], nms_conf_map], [hmap[3], nms_conf_box])

    confidence_map = nms_conf_map
    hmap = nms_conf_box
    wmap = nms_conf_box

    confidence_map = np.squeeze(confidence_map)
    hmap = np.squeeze(hmap)
    wmap = np.squeeze(wmap)

    dets_idx = np.where(confidence_map > 0)

    y, x = dets_idx[-2], dets_idx[-1]
    h, w = hmap[dets_idx], wmap[dets_idx]
    x1 = x - w / 2
    x2 = x + w / 2
    y1 = y - h / 2
    y2 = y + h / 2
    scores = confidence_map[dets_idx]

    dets = np.stack([
        np.array(x1),
        np.array(y1),
        np.array(x2),
        np.array(y2),
        np.array(scores)
    ],
                    axis=1)
    # List of indices to keep
    keep = nms.nms(dets, thresh)

    y, x = dets_idx[-2], dets_idx[-1]
    h, w = hmap[dets_idx], wmap[dets_idx]
    x = x[keep]
    y = y[keep]
    h = h[keep]
    w = w[keep]

    scores = scores[keep]
    return x, y, h, w, scores
Example #13
0
def main():
    args = parse.parse_args()
    model_path = args.model_path
    img_path = args.img_path
    if model_path.strip() == '':
        raise ValueError('model path should not be null')
    if img_path.strip() == '':
        raise ValueError('test img path should not be null')
    model = load_model(model_path)

    test_model = Model(model.input, [
        model.get_layer('cls_output').output,
        model.get_layer('bbox_output').output
    ])
    test_model.load_weights(model_path, by_name=True, skip_mismatch=True)
    # 获取输入信息
    inputs = get_inputs()
    # shape (1, 128, 21) shape (1, 128, 80)
    cls_output, bbox_ouput = test_model(inputs)
    # shape (128, 21)
    cls_output = np.squeeze(cls_output, axis=0)
    # shape (128, 80)
    bbox_ouput = np.squeeze(bbox_ouput, axis=0)
    # 进行softmax
    cls_output = softmax(cls_output)
    # 找出128个边框的最大类别 shape (128, )
    argmax_cls = np.argmax(cls_output, axis=1)

    cls_output = cls_output[argmax_cls > 0]
    # (n, ) n <= 128
    argmax_cls = argmax_cls[argmax_cls > 0]
    # (n, 80)
    bbox_ouput = cls_output[bbox_ouput > 0]
    scores = np.max(cls_output, axis=1)
    rects = []
    for i, bbox in enumerate(bbox_ouput):
        # 去掉背景
        cls = argmax_cls[i] - 1
        start = cls * 4
        end = start + 4
        bbox = bbox[start:end]
        rects.append(bbox)
    rects = np.asarray(rects)
    # 非极大值抑制
    keep_ind = nms(rects, scores, 0.5)
    rects = rects[keep_ind, :]
    show_rect(img_path, rects)
Example #14
0
    def query_posecnn_detection(self, classes):

        # detection information of the target object
        rois_est = np.zeros((0, 7), dtype=np.float32)
        # TODO look for multiple object instances
        max_objects = 5
        for i in range(len(classes)):

            for object_id in range(max_objects):

                # check posecnn frame
                cls = classes[i]
                suffix_frame = '_%02d_roi' % (object_id)
                source_frame = 'posecnn/' + cls + suffix_frame

                try:
                    # print('look for posecnn detection ' + source_frame)
                    trans, rot = self.listener.lookupTransform(
                        self.target_frame, source_frame, rospy.Time(0))
                    n = trans[0]
                    secs = trans[1]
                    now = rospy.Time.now()
                    if abs(now.secs - secs) > 1.0:
                        print 'posecnn pose for %s time out %f %f' % (
                            source_frame, now.secs, secs)
                        continue
                    roi = np.zeros((1, 7), dtype=np.float32)
                    roi[0, 0] = 0
                    roi[0, 1] = i
                    roi[0, 2] = rot[0] * n
                    roi[0, 3] = rot[1] * n
                    roi[0, 4] = rot[2] * n
                    roi[0, 5] = rot[3] * n
                    roi[0, 6] = trans[2]
                    rois_est = np.concatenate((rois_est, roi), axis=0)
                    print('find posecnn detection ' + source_frame)
                except:
                    continue

        if rois_est.shape[0] > 0:
            # non-maximum suppression within class
            index = nms(rois_est, 0.2)
            rois_est = rois_est[index, :]

        return rois_est
Example #15
0
    def detect_face(self, img_raw):
        img = np.float32(img_raw)

        im_height, im_width, _ = img.shape
        scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.cuda()
        scale = scale.cuda()

        loc, conf = self.model(img)  # forward pass

        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data

        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

        # ignore low scores

        inds = np.where(scores > self.args.confidence_threshold)[0]
        boxes = boxes[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.args.top_k]
        boxes = boxes[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
        # keep = py_cpu_nms(dets, args.nms_threshold)

        keep = nms(torch.tensor(boxes), torch.tensor(scores), overlap=self.args.nms_threshold)
        dets = dets[keep, :]

        # keep top-K faster NMS
        dets = dets[:self.args.keep_top_k, :]
        return dets
    def generate_paths(self):
        for cls_ix in range(1, self.num_classes): # skip background
            all_scores = np.ndarray(shape=(self.num_frame_pairs,), dtype=np.object)
        cls_boxes = np.ndarray(shape=(self.num_frame_pairs,), dtype=np.object)
        cls_scores = np.ndarray(shape=(self.num_frame_pairs,), dtype=np.object)
        print('Class: {}'.format(self.classes[cls_ix]))
        self._curr_class = self.classes[cls_ix]
        for pair_ix in range(self.num_frame_pairs):
                boxes_t0 = self.pred_boxes[pair_ix][0].clone()
                scores_t0 = self.scores[pair_ix][0][:,cls_ix].clone()
                pick = torch.nonzero(scores_t0>0.0).view(-1)
                # If no good scores for this frame/class, go to next frame
                assert pick.numel()>0, "No detections found for this class."
                if pick.numel()==0:
                    all_scores[pair_ix] = torch.cuda.FloatTensor(0) # empty tensor
                    cls_boxes[pair_ix] = torch.cuda.FloatTensor(0) # empty tensor
                    cls_scores[pair_ix] = torch.cuda.FloatTensor(0) # empty tensor
                    continue 
                # Get scores that passed filter and sort highest-->lowest
                scores_t0 = scores_t0[pick]
                boxes_t0 = boxes_t0[pick, :]
                all_scores_t0 = self.scores[pair_ix][0][pick, :]
                _, pick = torch.sort(scores_t0, descending=True)
                # Take at most 50 per frame per class
                to_pick = min(10,pick.numel())
                pick = pick[:to_pick]
                scores_t0 = scores_t0[pick]
                boxes_t0 = boxes_t0[pick,:]
                all_scores_t0 = all_scores_t0[pick,:]
                cls_dets_t0 = torch.cat([boxes_t0, scores_t0.contiguous().view(-1,1)], dim=1)
                pick = torch.from_numpy(nms(cls_dets_t0.numpy(), 0.3))
                # TODO check pick is sorted in descending order
                # Take top 10 dets after nms
                pick = pick.view(-1).long()                
                pick = pick[:min(10, pick.numel())]

                cls_boxes[pair_ix] = boxes_t0[pick, :].clone()
                cls_scores[pair_ix] = scores_t0[pick].clone()
                all_scores[pair_ix] = all_scores_t0[pick, :].clone()

        paths = self.incremental_linking(cls_boxes, cls_scores, all_scores)
        print("Finish incremental linking")
        self.all_paths[cls_ix] = paths
Example #17
0
def detect(full_image, visualize=False):
    # step1: preprocess, image resize, grayscale, equalizeHist
    image, _ = resize(full_image)
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image_gray = cv2.equalizeHist(image_gray)
    # face detect
    face_regions = face_detector.detectMultiScale(image_gray, 1.05, 4,
                                                  cv2.cv.CV_HAAR_SCALE_IMAGE,
                                                  (60, 60))
    # eyes detect based on face region
    eyes_regions = []
    for (x, y, w, h) in face_regions:
        face_image = image_gray[y:y + h, x:x + w]
        # resize face, transform to high resolution
        face_image, scale = resize(face_image)
        # first eyes detection
        eyes_roi = eyes_detector.detectMultiScale(face_image, 1.05, 2,
                                                  cv2.cv.CV_HAAR_SCALE_IMAGE,
                                                  (60, 60), (120, 120))
        # second eyes detection
        for (ex, ey, ew, eh) in eyes_roi:
            eyes_regions.append([
                x + int(ex / scale), y + int(ey / scale),
                int(ew / scale),
                int(eh / scale)
            ])
    # apply nms reduce bbox
    eyes_regions = nms(eyes_regions, thres=0.5)
    # visulize
    if visualize:
        # plot
        for (x, y, w, h) in face_regions:
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)
        for (x, y, w, h) in eyes_regions:
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
        # 显示标定框
        cv2.imshow("eye detect", image)
        cv2.waitKey(5)
    eyes_region = [
        cv2.resize(image_gray[y:y + h, x:x + w], (eye_size, eye_size))
        for (x, y, w, h) in eyes_regions
    ]
    return eyes_region
Example #18
0
    def merge_outputs(self, detections):
        # detections: list of dets, dets: detection dict{1:det_array,2:det_array...}. det_array: shape of [k,5]
        # return: {1:det_array,2:det_array...}. det_array: shape of [k,5]
        res_dets = {}
        for j in range(1, self.cfg.NUM_CLASS):
            res_dets[j] = np.concatenate([dets[j] for dets in detections],
                                         axis=0).astype(np.float32)
            if len(self.scales) > 1 or self.cfg.NMS:
                res_index = nms(res_dets[j], 0.5)
                res_dets[j] = res_dets[j][res_index]

        scores = np.hstack(
            [res_dets[j][:, 4] for j in range(1, self.cfg.NUM_CLASS)])
        if len(scores) > self.max_per_image:
            kth = len(scores) - self.max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, self.cfg.NUM_CLASS):
                keep_inds = (res_dets[j][:, 4] >= thresh)
                res_dets[j] = res_dets[j][keep_inds]
        return res_dets
 def forward(self, score, reg_param, anchors, im_info):
     # Apply Regression
     rois = apply_reg(anchors, reg_param)
     rois[0::2].clamp_(0, im_info[0] - 1)
     rois[1::2].clamp_(0, im_info[1] - 1)
     # Pre-NMS Top-K Selection
     score_foreground = score[:, :, 0].squeeze(0)
     _, order = torch.sort(score_foreground, descending=True)
     if (cfg.pre_nms_topk > 0
             and cfg.pre_nms_topk < score_foreground.size(0)):
         order = order[:cfg.pre_nms_topk]
     rois = rois[order, :]
     score_foreground = score_foreground[order]
     # NMS
     nms_keep_index = nms(rois, score_foreground, cfg.rpn_nms_thr)
     rois = rois[nms_keep_index, :]
     # Aft-NMS Top-K Selection
     if (cfg.aft_nms_topk > 0
             and cfg.aft_nms_topk < nms_keep_index.size(0)):
         rois = rois[:cfg.aft_nms_topk, :]
         score_foreground = score_foreground[:cfg.aft_nms_topk]
     return rois
Example #20
0
def detect(image, model, priors):
    """
    """
    h, w = image.shape[:2]
    image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
    image = image.astype('float32')
    images = np.expand_dims(image, axis=0)

    confs, locs = model(images, training=False)
    boxes = decode(priors, tf.squeeze(locs, 0))
    boxes = boxes.numpy()
    scale = np.array([w, h, w, h])
    boxes = boxes * scale

    confs = tf.squeeze(confs, 0)
    scores = confs.numpy()
    scores = scores[:, 1]

    # Ignore low scores
    inds = np.where(scores > FLAGS.conf_threshold)[0]
    boxes = boxes[inds]
    scores = scores[inds]

    # Keep top-k before NMS
    order = scores.argsort()[::-1][:FLAGS.top_k]
    boxes = boxes[order]
    scores = scores[order]

    # NMS
    dets = np.hstack(
        (boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
    selected_idx = np.array([0, 1, 2, 3, 4])
    keep = nms(dets[:, selected_idx], FLAGS.nms_threshold)
    dets = dets[keep, :]

    dets = dets[:FLAGS.keep_top_k, :]
    return dets
    def _rpn_proposal(self, rpn_reg_locs, rpn_cls_score):
        """Deduction the output of RPN, is input of RoI.

        NMS_pre_TopN -> apply nms -> NMS_post_TopN
        """
        anchors = tf.py_func(anchor_generate, [self._h, self._w], [tf.float32])
        anchors = to_box_ctr(anchors)

        boxes_regressed = anchor_regress(
            anchors, rpn_reg_locs)  # apply transform to all anchors

        rpn_boxes = to_box_cor(boxes_regressed)
        rpn_score_arg = tf.argsort(rpn_cls_score, direction='DESCENDING')
        rpn_arg_top_pre = rpn_score_arg[:cfg.TRAIN.NMS_PRE_TOPN]

        rpn_boxes_top_pre = tf.gather(rpn_boxes, rpn_arg_top_pre)
        rpn_score_top_pre = tf.gather(rpn_cls_score, rpn_arg_top_pre)
        indices = nms(rpn_boxes_top_pre, rpn_score_top_pre,
                      cfg.TRAIN.NMS_POST_TOPN, cfg.NMS_THRESH)

        rpn_proposal = tf.gather(rpn_boxes_top_pre, indices)
        rpn_proposal_cropped = box_cropper(rpn_proposal, self._h, self._w)

        return rpn_proposal_cropped  # train: [2000, 4] y1, x1, y2, x2
Example #22
0
def post_process(dets, c, s, h, w, num_classes, score_thresh):
    # dets: [1, N*K, 6]  det:[x1,y1,x2,y2,score,class_id]
    # return top_preds{1: list of [x1,y1,x2,y2,score], 2: list of [x1,y1,x2,y2,score] ... }
    top_preds = {}

    # transform x1,y1
    dets[:, :2] = transform_preds(
        dets[:, 0:2], c[0], s[0], (w, h))
    # transform x2,y2
    dets[:, 2:4] = transform_preds(
        dets[:, 2:4], c[0], s[0], (w, h))

    # do nms on dets before assign class
    keep = nms(dets, 0.5)
    dets = dets[keep]
    # get bbox and score for every class
    classes = dets[:, -1]
    scores = dets[:, 4]
    for j in range(1, num_classes):
        inds = ((classes == j) * (scores > score_thresh))
        top_preds[j] = np.concatenate([
            dets[inds, :4].astype(np.float32),
            dets[inds, 4:5].astype(np.float32)], axis=1).tolist()
    return top_preds
Example #23
0
def main():
    total_timer = Timer(name='total')
    total_timer.tic()

    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = build_argparser().parse_args()
    # --------------------------- 1. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
    model_xml = args.model
    model_bin = os.path.splitext(model_xml)[0] + ".bin"
    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
    net = IENetwork(model=model_xml, weights=model_bin)
    # -----------------------------------------------------------------------------------------------------

    # ------------- 2. Load Plugin for inference engine and extensions library if specified --------------
    log.info("Loading Inference Engine")
    ie = IECore()
    log.info("Device info:")
    versions = ie.get_versions(args.device)
    print("{}{}".format(" " * 8, args.device))
    print("{}MKLDNNPlugin version ......... {}.{}".format(
        " " * 8, versions[args.device].major, versions[args.device].minor))
    print("{}Build ........... {}".format(" " * 8,
                                          versions[args.device].build_number))

    if args.cpu_extension and "CPU" in args.device:
        ie.add_extension(args.cpu_extension, "CPU")
        log.info("CPU extension loaded: {}".format(args.cpu_extension))

    if "CPU" in args.device:
        supported_layers = ie.query_network(net, "CPU")
        not_supported_layers = [
            l for l in net.layers.keys() if l not in supported_layers
        ]
        if len(not_supported_layers) != 0:
            log.error(
                "Following layers are not supported by the plugin for specified device {}:\n {}"
                .format(args.device, ', '.join(not_supported_layers)))
            log.error(
                "Please try to specify cpu extensions library path in sample's command line parameters using -l "
                "or --cpu_extension command line argument")
            sys.exit(1)
    # -----------------------------------------------------------------------------------------------------

    # --------------------------- 4. Configure input & output ---------------------------------------------
    # --------------------------- Prepare input blobs -----------------------------------------------------
    log.info("Preparing input blobs")
    assert (len(net.inputs.keys()) == 1
            ), "Sample supports topologies only with 1 input"

    input_name = next(iter(net.inputs.keys()))
    input_info = net.inputs[input_name]
    input_info.precision = 'FP32'

    # --------------------------- Prepare output blobs ----------------------------------------------------
    log.info('Preparing output blobs')
    assert (len(net.outputs.keys()) == 2
            ), "Sample supports topologies only with 2 output"

    loc_out_name = "797"
    class_out_name = "741"
    assert (loc_out_name in net.outputs.keys()) and (class_out_name
                                                     in net.outputs.keys())

    loc_out_info = net.outputs[loc_out_name]
    class_out_info = net.outputs[class_out_name]

    loc_out_info.precision = "FP32"
    class_out_info.precision = "FP32"
    # -----------------------------------------------------------------------------------------------------

    # -----------------------------------------------------------------------------------------------------
    log.info("Loading model to the device")
    exec_net = ie.load_network(network=net, device_name=args.device)

    # --------------------------- 3. Read and preprocess input --------------------------------------------
    # -----------------------------------------------------------------------------------------------------
    if not os.path.exists(args.result_dir):
        os.makedirs(args.result_dir)

    if args.voc_res_file and os.path.exists(args.voc_res_file):
        os.remove(args.voc_res_file)

    create_anchor_timer = Timer(name='create_anchor')
    read_img_timer = Timer(name='read_img')
    preprocess_timer = Timer(name='preprocess')
    infer_timer = Timer(name='infer')
    adapter_timer = Timer(name='adapter')
    patch_img_nms_timer = Timer(name='patch_img_nms')
    whole_img_nms_timer = Timer(name='whole_img_nms')
    add_offset_timer = Timer(name='add_offset')
    write_result_timer = Timer(name='write_result')

    create_anchor_timer.tic()
    adapter = RetinaNetAdapter(input_shape=args.patch_size)
    create_anchor_timer.toc()

    image_names = os.listdir(args.image_dir)
    log.info("image_nums: {}".format(len(image_names)))
    for image_id, image_name in enumerate(image_names):
        read_img_timer.tic()
        image_path = os.path.join(args.image_dir, image_name)
        img = cv2.imread(image_path).astype('float32')
        read_img_timer.toc()

        height, width, _ = img.shape
        image_shape = (width, height)
        strides = args.strides
        patch_size = args.patch_size
        x_num, y_num = calc_split_num(image_shape, patch_size, strides)

        log.info("id:{}, name: {}, shape: ({},{}), x_num:{}, y_num:{}".format(
            image_id, image_name, height, width, x_num, y_num))

        preprocess_timer.tic()
        img = img.transpose((2, 0, 1))  # Change data layout from HWC to CHW
        preprocess_timer.toc()

        result_all = []
        for i in range(x_num):
            for j in range(y_num):
                x = strides[0] * i if i < x_num - 1 else image_shape[
                    0] - args.patch_size[0]
                y = strides[1] * j if j < y_num - 1 else image_shape[
                    1] - args.patch_size[1]
                # print('processing {} , x: {}, y: {}'.format(image_name, x, y))

                preprocess_timer.tic()
                crop_img = img[:, y:y + patch_size[1],
                               x:x + patch_size[0]].copy()
                crop_img = crop_img[np.newaxis, :, :, :]
                preprocess_timer.toc()

                # --------------------------- Performing inference ----------------------------------------------------
                infer_timer.tic()
                res = exec_net.infer(inputs={input_name: crop_img})
                loc_out = res[loc_out_name][0]
                class_out = res[class_out_name][0]
                infer_timer.toc()

                adapter_timer.tic()
                result = adapter.process(loc_out, class_out)
                adapter_timer.toc()

                patch_img_nms_timer.tic()
                result, _ = nms(result, thresh=0.5, keep_top_k=100)
                patch_img_nms_timer.toc()

                # import pdb;pdb.set_trace()
                add_offset_timer.tic()
                result[:, 0] += x
                result[:, 1] += y
                result[:, 2] += x
                result[:, 3] += y
                result_all.append(result)
                add_offset_timer.toc()

        # import pdb;pdb.set_trace()
        whole_img_nms_timer.tic()
        result_all = np.concatenate(result_all, axis=0)
        nms_result, _ = nms(result_all, thresh=0.5)
        whole_img_nms_timer.toc()

        write_result_timer.tic()
        voc_format = '{} {:.4f} {} {} {} {}'
        pos_all = []
        voc_all = []
        for i in range(nms_result.shape[0]):
            x = int(nms_result[i, 0])
            y = int(nms_result[i, 1])
            w = max(int(nms_result[i, 2] - nms_result[i, 0]), 1)
            h = max(int(nms_result[i, 3] - nms_result[i, 1]), 1)
            p = float(nms_result[i, 4])
            pos = {'x': x, 'y': y, 'w': w, 'h': h, 'p': p}
            pos_all.append(pos)

            if args.voc_res_file:
                xmin = x
                ymin = y
                xmax = int(nms_result[i, 2])
                ymax = int(nms_result[i, 3])
                voc_str = voc_format.format(
                    os.path.splitext(image_name)[0], p, xmin, ymin, xmax, ymax)
                voc_all.append(voc_str)

        file_name = os.path.splitext(image_name)[0] + '.json'
        with open(os.path.join(args.result_dir, file_name), 'w') as f:
            json.dump(pos_all, f)

        if args.voc_res_file:
            with open(args.voc_res_file, 'a') as f:
                for voc_str in voc_all:
                    f.write(voc_str + '\n')

        write_result_timer.toc()

    total_timer.toc()
    # -----------------------------------------------------------------------------------------------------
    all_timers = []
    all_timers.extend([
        create_anchor_timer, read_img_timer, preprocess_timer, infer_timer,
        adapter_timer, patch_img_nms_timer, whole_img_nms_timer,
        add_offset_timer, write_result_timer, total_timer
    ])
    for timer in all_timers:
        log.info('{}: avg: {:.2f} ms, total: {:.2f}s'.format(
            timer.name, timer.avg * 1000, timer.total))

    log.info("Execution successful\n")
Example #24
0
def _proposal_layer(rpn_bbox_cls, rpn_bbox_pred, im_size, feat_stride,
                    eval_mode):
    """

    :param rpn_bbox_cls: (None, H, W, 2 * k)
    :param rpn_bbox_pred: (None, H, W, 4 * k)
    :param im_size: (800, 600)
    :param feat_stride: 16
    :return:
    """
    rpn_bbox_cls_prob = rpn_softmax(rpn_bbox_cls)
    anchor = Anchors(feat_stride=feat_stride)
    # all_anchors (A * H * W, 4)
    anchors, A = anchor.get_anchors()
    num_anchors = A
    # (1,  2 * k, H, W)
    rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob, [0, 3, 1, 2])
    # (1,  4 * k, H, W)
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2])

    assert rpn_bbox_cls_prob.shape[0] == 1, 'Only support 1 batch_size'

    if not eval_mode:
        # 训练模式
        pre_nms_topN = cfg.train_rpn_pre_nms_top_n
        post_nms_topN = cfg.train_rpn_post_nms_top_n
        nms_thresh = cfg.train_rpn_nms_thresh
        min_size = cfg.train_rpn_min_size
    else:
        # 验证模式
        pre_nms_topN = cfg.test_rpn_pre_nms_top_n
        post_nms_topN = cfg.test_rpn_post_nms_top_n
        nms_thresh = cfg.test_rpn_nms_thresh
        min_size = cfg.test_rpn_min_size
    # 对于预测的cls 前9个表示背景 后9个表示前景
    scores = rpn_bbox_cls_prob[:, num_anchors:, :, :]
    bbox_deltas = rpn_bbox_pred
    # (1, 4 * k, H, W) -> (1, H, W, 4 * A)
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
    # 根据anchor 和 bbox 预测值 回归出来真正的anchor 从dx dy dw dh -->  cx cy w, h
    proposals = bbox_transform_inv(anchors, bbox_deltas)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_size)

    # 3. remove predicted boxes with either height or width < threshold
    keep = _filter_boxes(proposals, min_size)
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    # scores = scores[keep]

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob
Example #25
0
    def detect_onetwork(self, image_input, dets):
        image_height, image_width, image_channels = image_input.shape

        if dets is None:
            return None, None

        dets = self.square_bbox(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(dets, image_width, image_height)
        num_boxes = dets.shape[0]

        cropped_ims_tensors = []
        for i in range(num_boxes):
            try:
                tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                tmp[dy[i]:edy[i] + 1,
                    dx[i]:edx[i] + 1, :] = image_input[y[i]:ey[i] + 1,
                                                       x[i]:ex[i] + 1, :]
                crop_im = cv2.resize(tmp, (48, 48))
                crop_im_tensor = self.convert_to_tensor(crop_im)
                cropped_ims_tensors.append(crop_im_tensor)
            except:
                continue

        try:
            feed_imgs = Variable(torch.stack(cropped_ims_tensors))
        except:
            return None, None

        detection, bbox = self.o_network(feed_imgs.float())

        detection = detection.data.numpy()
        bbox = bbox.data.numpy()

        keep_inds = np.where(detection > self.threshold[2])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            detection = detection[keep_inds]
            bbox = bbox[keep_inds]
        else:
            return None, None

        keep = nms(boxes, 0.7, mode="Minimum")

        if len(keep) == 0:
            return None, None

        keep_detection = detection[keep]
        keep_boxes = boxes[keep]
        keep_bbox = bbox[keep]

        bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
        bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1

        align_topx = keep_boxes[:, 0] + keep_bbox[:, 0] * bw
        align_topy = keep_boxes[:, 1] + keep_bbox[:, 1] * bh
        align_bottomx = keep_boxes[:, 2] + keep_bbox[:, 2] * bw
        align_bottomy = keep_boxes[:, 3] + keep_bbox[:, 3] * bh

        boxes = np.vstack([
            align_topx, align_topy, align_bottomx, align_bottomy,
            keep_detection[:, 0]
        ])
        boxes_align = boxes.T

        return boxes_align
Example #26
0
    def detect_pnetwork(self, image_input):
        image_height, image_width, image_channels = image_input.shape
        net_size = 12
        final_boxes = []

        current_scale = float(net_size) / self.min_face_size
        image_resized = self.resize_image(image_input, current_scale)
        current_height, current_weight, _ = image_resized.shape

        while min(current_height, current_weight) > net_size:
            image_list = []
            image_resized_tensor = self.convert_to_tensor(image_resized)
            image_list.append(image_resized_tensor)
            image_list = torch.stack(image_list)
            image_list = Variable(image_list)

            detection, bbox = self.p_network(image_list.float())

            detection = np.transpose(detection.data.numpy(), (0, 2, 3, 1))
            bbox = np.transpose(bbox.data.numpy(), (0, 2, 3, 1))

            boxes = self.generate_bounding_boxes(detection[0, :, :], bbox,
                                                 current_scale,
                                                 self.threshold[0])
            current_scale *= self.scale_factor
            image_resized = self.resize_image(image_input, current_scale)
            current_height, current_weight, _ = image_resized.shape

            if (boxes.size) == 0:
                continue

            keep = nms(boxes[:, :5], 0.5, 'Union')
            boxes = boxes[keep]
            final_boxes.append(boxes)

        if len(final_boxes) == 0:
            return None, None

        final_boxes = np.vstack(final_boxes)

        keep = nms(final_boxes[:, 0:5], 0.7, 'Union')
        final_boxes = final_boxes[keep]

        bw = final_boxes[:, 2] - final_boxes[:, 0] + 1
        bh = final_boxes[:, 3] - final_boxes[:, 1] + 1

        boxes = np.vstack([
            final_boxes[:, 0],
            final_boxes[:, 1],
            final_boxes[:, 2],
            final_boxes[:, 3],
            final_boxes[:, 4],
        ])

        boxes = boxes.T

        align_topx = final_boxes[:, 0] + final_boxes[:, 5] * bw
        align_topy = final_boxes[:, 1] + final_boxes[:, 6] * bh
        align_bottomx = final_boxes[:, 2] + final_boxes[:, 7] * bw
        align_bottomy = final_boxes[:, 3] + final_boxes[:, 8] * bh

        boxes_align = np.vstack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            final_boxes[:, 4],
        ])
        boxes_align = boxes_align.T
        return boxes, boxes_align
Example #27
0
    def update(self, image: np.ndarray, boxes: np.ndarray, scores: np.ndarray) \
            -> Iterable[trace.Trace]:
        self.frame += 1

        refind, lost = [], []
        activated, removed = [], []
        # Step 1. Prediction
        for track in chain(self.tracked, self.lost):
            track.predict()

        # Step 2. Selection by score
        if scores is None:
            scores = np.ones(np.size(boxes, 0), dtype=float)

        detections = list(chain(
            map(lambda t: trace.Trace(*t, from_det=True), zip(boxes, scores)),
            map(lambda t: trace.Trace(*t, from_det=False), zip(boxes, scores))
        ))

        self.classifier.update(image)

        detections.extend(map(lambda t: trace.Trace(t.tracking(image), t.track_score, from_det=True),
                              filter(lambda t: t.is_activated, chain(self.tracked, self.lost))))

        rois = np.asarray(list(map(lambda t: t.to_tlbr, detections)), np.float32)

        class_scores = self.classifier.predict(rois)
        scores = np.concatenate([
            np.ones(np.size(boxes, 0), dtype=np.float32),
            np.fromiter(map(lambda t: t.score, detections[np.size(boxes, 0):]), dtype=np.float32)
        ]) * class_scores

        # Non-maxima suppression
        if len(detections) > 0:
            mask = np.zeros(np.size(rois, 0), dtype=np.bool)
            mask[list(nms(rois, scores.reshape(-1), threshold=.4))] = True

            indices = np.zeros_like(detections, dtype=np.bool)
            indices[np.where(mask & (scores >= self.min_score))] = True

            detections = list(compress(detections, indices))
            scores = scores[indices]

            for detection, score in zip(detections, scores):
                detection.score = score

        predictions = list(filter(lambda t: not t.from_det, detections))
        detections = list(filter(lambda t: t.from_det, detections))

        # set features
        features = self.identifier.extract(image, np.asarray(
            list(map(lambda t: t.to_tlbr, detections)), dtype=np.float32)
        )

        for idx, detection in enumerate(detections):
            detection.feature = features[idx]

        # Step3. Association for tracked
        # matching for tracked target
        unconfirmed = list(filter(lambda t: not t.is_activated, self.tracked))
        tracked = list(filter(lambda t: t.is_activated, self.tracked))

        distance = matching.nearest_distance(tracked, detections, metric='euclidean')
        cost = matching.gate_cost(self.motion, distance, tracked, detections)
        matches, u_track, u_detection = matching.assignment(cost, threshold=self.min_dist)

        for track, det in matches:
            tracked[track].update(self.frame, image, detections[det])

        # matching for missing targets
        detections = list(map(lambda u: detections[u], u_detection))
        distance = matching.nearest_distance(self.lost, detections, metric='euclidean')
        cost = matching.gate_cost(self.motion, distance, self.lost, detections)
        matches, u_lost, u_detection = matching.assignment(cost, threshold=self.min_dist)

        for miss, det in matches:
            self.lost[miss].reactivate(self.frame, image, detections[det], reassign=not self.use_refind)
            refind.append(self.lost[miss])

        # remaining tracked
        matched_size = len(u_detection)
        detections = list(map(lambda u: detections[u], u_detection)) + predictions
        u_tracked = list(map(lambda u: tracked[u], u_track))
        distance = matching.iou_distance(u_tracked, detections)
        matches, u_track, u_detection = matching.assignment(distance, threshold=.8)

        for track, det in matches:
            u_tracked[track].update(self.frame, image, detections[det], update_feature=True)

        for track in map(lambda u: u_tracked[u], u_track):
            track.lost()
            lost.append(track)

        # unconfirmed
        detections = list(map(lambda u: detections[u], filter(lambda u: u < matched_size, u_detection)))
        distance = matching.iou_distance(unconfirmed, detections)
        matches, u_unconfirmed, u_detection = matching.assignment(distance, threshold=.8)

        for track, det in matches:
            unconfirmed[track].update(self.frame, image, detections[det], update_feature=True)

        for track in map(lambda u: unconfirmed[u], u_unconfirmed):
            track.remove()
            removed.append(track)

        # Step 4. Init new trace
        for track in filter(lambda t: t.from_det and t.score >= .6,
                            map(lambda u: detections[u], u_detection)):
            track.activate(self.frame, image, self.motion)
            activated.append(track)

        # Step 5. Update state
        for track in filter(lambda t: self.frame - t.frame > self.max_lost, self.lost):
            track.remove()
            removed.append(track)

        self.tracked = list(chain(
            filter(lambda t: t.state == trace.State.Tracked, self.tracked),
            activated, refind,
        ))
        self.lost = list(chain(
            filter(lambda t: t.state == trace.State.Lost, self.lost),
            lost
        ))
        self.removed.extend(removed)

        lost_score = self.classifier.predict(
            np.asarray(list(map(lambda t: t.to_tlbr, self.lost)), dtype=np.float32)
        )

        return chain(
            filter(lambda t: t.is_activated, self.tracked),
            map(lambda it: it[1],
                filter(lambda it: lost_score[it[0]] > .3 and self.frame - it[1].frame <= 4,
                       enumerate(self.lost)))
        )
Example #28
0
def validate(*,
             dataloader,
             model,
             device,
             step=-1,
             bbox_all=False,
             debug_mode):
    # result = open("logs/result.txt", "w" )

    with torch.no_grad():
        t_start = time.time()
        conf_thres, nms_thres, iou_thres = model.get_threshs()
        width, height = model.img_size()
        model.eval()
        print("Calculating mAP - Model in evaluation mode")
        n_images = len(dataloader.dataset)
        mAPs = []
        mR = []
        mP = []
        for batch_i, (img_uris, imgs, targets) in enumerate(
                tqdm(dataloader, desc='Computing mAP')):
            imgs = imgs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)
            # output,_,_,_ = model(imgs)
            output = model(imgs)

            for sample_i, (labels,
                           detections) in enumerate(zip(targets, output)):
                detections = detections[detections[:, 4] > conf_thres]
                if detections.size()[0] == 0:
                    predictions = torch.tensor([])
                else:
                    predictions = torch.argmax(detections[:, 5:], dim=1)
                # From (center x, center y, width, height) to (x1, y1, x2, y2)
                box_corner = torch.zeros((detections.shape[0], 4),
                                         device=detections.device)
                xy = detections[:, 0:2]
                wh = detections[:, 2:4] / 2
                box_corner[:, 0:2] = xy - wh
                box_corner[:, 2:4] = xy + wh
                probabilities = detections[:, 4]
                nms_indices = nms(box_corner, probabilities, nms_thres)
                box_corner = box_corner[nms_indices]
                probabilities = probabilities[nms_indices]
                predictions = predictions[nms_indices]

                if nms_indices.shape[
                        0] == 0:  # there should always be at least one label
                    continue
                # Get detections sorted by decreasing confidence scores
                _, inds = torch.sort(-probabilities)
                box_corner = box_corner[inds]

                probabilities = probabilities[inds]
                predictions = predictions[inds]
                labels = labels[(labels[:, 1:5] <= 0).sum(
                    dim=1
                ) == 0]  # remove the 0-padding added by the dataloader
                # Extract target boxes as (x1, y1, x2, y2)
                target_boxes = xywh2xyxy(labels[:, 1:5])
                target_boxes[:, (0, 2)] *= width
                target_boxes[:, (1, 3)] *= height
                detected = torch.zeros(target_boxes.shape[0],
                                       device=target_boxes.device,
                                       dtype=torch.uint8)
                correct = torch.zeros(nms_indices.shape[0],
                                      device=box_corner.device,
                                      dtype=torch.uint8)
                # 0th dim is the detection
                # (repeat in the 1st dim)
                # 2nd dim is the coord
                ious = bbox_iou(
                    box_corner.unsqueeze(1).expand(-1, target_boxes.shape[0],
                                                   -1),
                    target_boxes.unsqueeze(0).expand(box_corner.shape[0], -1,
                                                     -1))
                # ious is 2d -- 0th dim is the detected box, 1st dim is the target box, value is iou

                #######################################################
                ##### skip images without label #####
                if [] in ious.data.tolist():
                    continue
                #######################################################

                best_is = torch.argmax(ious, dim=1)

                # TODO fix for multi-class. Need to use predictions somehow?
                for i, iou in enumerate(ious):
                    best_i = best_is[i]
                    if ious[i, best_i] > iou_thres and detected[best_i] == 0:
                        correct[i] = 1
                        detected[best_i] = 1

                # Compute Average Precision (AP) per class
                ap, r, p = average_precision(tp=correct,
                                             conf=probabilities,
                                             n_gt=labels.shape[0])

                # Compute mean AP across all classes in this image, and append to image list
                mAPs.append(ap)
                mR.append(r)
                mP.append(p)
                if bbox_all or sample_i < 2:  # log the first two images in every batch
                    img_filepath = img_uris[sample_i]
                    if img_filepath is None:
                        print(
                            "NULL image filepath for image uri: {uri}".format(
                                uri=img_uris[sample_i]))
                    orig_img = Image.open(img_filepath)
                    # draw = ImageDraw.Draw(img_with_boxes)
                    w, h = orig_img.size
                    pad_h, pad_w, scale_factor = calculate_padding(
                        h, w, height, width)

                    ##################################
                    detect_box = copy.deepcopy(box_corner)
                    ##################################

                    box_corner /= scale_factor
                    box_corner[:, (0, 2)] -= pad_w
                    box_corner[:, (1, 3)] -= pad_h

                    #######################################################################################
                    if debug_mode:
                        pil_img = transforms.ToPILImage()(imgs.squeeze())
                        ##### getting the image's name #####
                        img_path = img_uris[0]
                        img_name = ("_".join(map(str,
                                                 img_path.split("_")[-5:])))
                        tmp_path = os.path.join(
                            visualization_tmp_path,
                            img_name[:-4] + "_predicted_vis.jpg")
                        vis_label = add_class_dimension_to_labels(detect_box)
                        visualize_and_save_to_local(pil_img,
                                                    vis_label,
                                                    tmp_path,
                                                    box_color="red")
                        print("Prediction visualization uploaded")
                    #######################################################################################

            mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item()
            mean_R = torch.tensor(mR, dtype=torch.float).mean().item()
            mean_P = torch.tensor(mP, dtype=torch.float).mean().item()
        # Means of all images
        mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item()
        mean_R = torch.tensor(mR, dtype=torch.float).mean().item()
        mean_P = torch.tensor(mP, dtype=torch.float).mean().item()
        dt = time.time() - t_start
        print('mAP: {0:5.2%}, Recall: {1:5.2%}, Precision: {2:5.2%}'.format(
            mean_mAP, mean_R, mean_P))
        # result.write(str(1-mean_mAP))
        # result.close()
        return mean_mAP, mean_R, mean_P, dt / (n_images + 1e-12)
def evaluate(model,
             dataloader,
             templates,
             prob_thresh=0.65,
             nms_thresh=0.3,
             device=None):
    #TODO check Peiyun's code to see the correct way to perform NMS
    print("Running multiscale evaluation code")

    model = model.eval().to(device)

    # Evaluate over multiple scale
    scales_list = [0.5**x for x in [1, 0, -1]]
    num_templates = templates.shape[0]

    results = []
    to_pil_image = transforms.ToPILImage()

    for idx, (img, filename) in tqdm(enumerate(dataloader),
                                     total=len(dataloader)):
        dets = np.empty((0, 6))  # store bbox (x1, y1, x2, y2), score and scale

        # convert tensor to PIL image so we can perform resizing
        image = to_pil_image(img[0])

        min_side = np.min(image.size)

        for s, scale in enumerate(scales_list):
            # scale the images
            scaled_image = transforms.Resize(np.int(min_side * scale))(image)

            # normalize the images
            img = dataloader.dataset.transforms(scaled_image)

            # add batch dimension
            img.unsqueeze_(0)

            # now run the model
            x = img.float().to(device)

            output = model(x)

            # first `num_templates` channels are class maps
            score_cls = torch.sigmoid(output[:, :num_templates, :, :])
            score_cls = score_cls.data.cpu().numpy().transpose((0, 2, 3, 1))

            score_reg = output[:, num_templates:, :, :]
            score_reg = score_reg.data.cpu().numpy().transpose((0, 2, 3, 1))

            t_bboxes, scores = get_bboxes(score_cls, score_reg, templates,
                                          prob_thresh, dataloader.dataset.rf,
                                          scale)

            scales = np.ones((t_bboxes.shape[0], 1)) / scale
            # append scores at the end for NMS
            d = np.hstack((t_bboxes, scores, scales))

            dets = np.vstack((dets, d))

        # Apply NMS
        keep = nms(dets, nms_thresh)
        dets = dets[keep]

    return dets
Example #30
0
def valid(datacfg, weight_file, outfile_prefix):

    options = read_data_cfg(datacfg)
    valid_images_set_file = options['valid']
    namesfile = options['names']

    #load class names
    class_names = load_class_names(namesfile)
    #load valid image
    with open(valid_images_set_file, 'r') as fp:
        image_files = fp.readlines()
        image_files = [file.rstrip() for file in image_files]

    model = yolo_v2()
    model.load_weights(weight_file)

    print("weights %s loaded" % (weight_file))
    if torch.cuda.is_available():
        model.cuda()
    model.eval()

    #result file
    fps = [0] * model.num_classes
    if not os.path.exists('results'):
        os.mkdir('results')
    dir_name = 'results/%s_%s_%s' % (namesfile.split('/')[-1].split('.')[0],
                                     weight_file.split('/')[-1].split('.')[0],
                                     time.strftime("%Y%m%d_%H%M%S",
                                                   time.localtime()))
    print 'save results to %s' % (dir_name)
    if not os.path.exists(dir_name):
        os.mkdir(dir_name)
    for i in range(model.num_classes):
        buf = "%s/%s_%s.txt" % (dir_name, outfile_prefix, class_names[i])
        fps[i] = open(buf, 'w')

    #construct datalist
    valid_dataset = VOCDataset(image_files,
                               shape=(model.width, model.height),
                               shuffle=False,
                               transform=transforms.Compose([
                                   transforms.ToTensor(),
                               ]))
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=4,
                                               shuffle=False,
                                               num_workers=4,
                                               pin_memory=True)

    conf_thresh = 0.005
    nms_thresh = 0.45
    LineId = -1
    for batch_index, (data, target) in enumerate(valid_loader):
        data = data.cuda()
        data = Variable(data, volatile=True)
        output = model(data).data
        batch_boxes = model.get_region_boxes(output, conf_thresh)
        for i in range(len(batch_boxes)):
            boxes = batch_boxes[i]
            boxes = nms(boxes, nms_thresh)

            LineId = LineId + 1
            image_name = image_files[LineId]
            print "[Batch_index:%d] [%d/%d] file:%s " % (
                batch_index, LineId + 1, len(image_files), image_name)

            img_orig = Image.open(image_name)
            #print img_orig
            height, width = img_orig.height, img_orig.width
            print "   height %d, width %d, bbox num %d" % (height, width,
                                                           len(boxes))
            for box in boxes:
                x1 = (box[0] - box[2] / 2.0) * width
                y1 = (box[1] - box[3] / 2.0) * height
                x2 = (box[0] + box[2] / 2.0) * width
                y2 = (box[1] + box[3] / 2.0) * height
                det_conf = box[4]
                cls_conf = box[5]
                cls_id = box[6]
                fps[cls_id].write(
                    "%s %f %f %f %f %f\n" %
                    (image_name, det_conf * cls_conf, x1, y1, x2, y2))

    for i in range(model.num_classes):
        fps[i].close()