def speed_test(self, custom_settings=None):
        """
        custom_settings, if set, should be a tuple of (nms_threshold, conf_threshold, device), this
        device - cuda:0 or cpu
        !!! overwrites the original settings
        """
        if custom_settings:
            print("Current custom settings: ", custom_settings)
            nms_thresh, conf_thresh, device = custom_settings
            self.output_handler.suppress_threshold = nms_thresh
            self.output_handler.confidence_threshold = conf_thresh
            self.device = device
            self.model.to(self.device)

        run = 0
        total_time_model, total_time_pre_nms, total_time_nms, in_nms_boxes = 0, 0, 0, 0
        while run < self.runs:
            c_img = 0
            times_model, times_pre_nms, times_nms, in_nms_boxes = 0, 0, 0, 0
            self.valid_loader_iter = iter(self.valid_loader)
            while c_img < self.n_images:
                (boxes, confs), image_info, last_model = self.val_image_output()
                times_model += last_model

                start_pre_nms = time.time()
                boxes, classes = postprocess_until_nms(self.output_handler, boxes,
                                                       confs, image_info[0][1])
                last_pre_nms = time.time() - start_pre_nms
                times_pre_nms += last_pre_nms

                boxes = wh2corners_numpy(boxes[:, :2], boxes[:, 2:])
                start_nms = time.time()
                boxes = boxes[:200]
                in_nms_boxes += len(boxes)
                _ = nms(boxes, classes, self.output_handler.suppress_threshold)
                last_nms = time.time() - start_nms
                times_nms += last_nms
                c_img += 1

            if self.print_each_run:
                self.print_stats(times_model, times_pre_nms, times_nms, self.n_images)
                print("Mean number of boxes processed by nms: ",
                      "{:.2f}".format(in_nms_boxes / self.n_images))
            run += 1

            total_time_model += times_model
            total_time_pre_nms += times_pre_nms
            total_time_nms += times_nms

        print("Final results:")
        print("--------------------------------------")
        print("--------------------------------------\n\n")
        self.print_stats(total_time_model, total_time_pre_nms, total_time_nms,
                         self.n_images * self.runs)
Exemple #2
0
    def run_inference(self, image, modify_image=True, custom_settings=None):
        """
        If modify_image flag is True, the boxes are drawn on the given image, otherwise this
        function just returns the predicted boxes

        custom_settings, if set, should be a tuple of (nms_threshold, conf_threshold, device), this
        device - cuda:0 or cpu
        !!! overwrites the original settings
        """
        if custom_settings:
            print("Current custom settings: ", custom_settings)
            nms_thresh, conf_thresh, device = custom_settings
            self.output_handler.suppress_threshold = nms_thresh
            self.output_handler.confidence_threshold = conf_thresh
            self.device = device
            self.model.to(self.device)
        with torch.no_grad():
            original_image = image.copy()
            heigth, width, _ = original_image.shape

            image = cv2.resize(image, (300, 300))
            image = F.to_tensor(image)
            image = F.normalize(image,
                                mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])

            image = image.to(self.device)
            image = image.unsqueeze(dim=0)
            boxes, confs = self.model(image)
            boxes = boxes.squeeze().permute(1, 0)
            confs = confs.squeeze().permute(1, 0)

            boxes, classes = postprocess_until_nms(self.output_handler, boxes,
                                                   confs, (width, heigth))

            boxes = wh2corners_numpy(boxes[:, :2], boxes[:, 2:])
            kept_indeces = nms(boxes, classes,
                               self.output_handler.suppress_threshold)

            boxes = boxes[kept_indeces].astype(int)
            # clip values in image range
            clip_boxes(boxes, width, heigth)
            if modify_image:
                image = self.plot_boxes(original_image, boxes)
                return image
            return boxes
Exemple #3
0
    def process_outputs(self, bbox_predictions, classification_predictions,
                        image_info):
        """
        returns complete model outputs in format array of:
        bbox, class id, confidence
        all operations done on cpu
        """
        prediction_bboxes, predicted_classes, highest_confidence_for_predictions, _ = self._get_sorted_predictions(
            bbox_predictions, classification_predictions, image_info)

        # convert to corners for nms
        prediction_bboxes = wh2corners_numpy(prediction_bboxes[:, :2],
                                             prediction_bboxes[:, 2:])
        indeces_kept_by_nms = nms(prediction_bboxes, predicted_classes,
                                  self.suppress_threshold)

        # new structure: array of bbox, class, confidence
        prediction_bboxes = corners_to_wh(prediction_bboxes)
        complete_outputs = np.concatenate(
            (prediction_bboxes, predicted_classes,
             highest_confidence_for_predictions),
            axis=1)

        return complete_outputs[indeces_kept_by_nms]
Exemple #4
0
def test_anchor_mapping(image,
                        bbox_predictions,
                        classification_predictions,
                        gt_bbox,
                        gt_class,
                        image_info,
                        params,
                        model_outputs,
                        visualize_anchors,
                        visualize_anchor_gt_pair,
                        all_anchor_classes,
                        verbose=False,
                        very_verbose=False):
    """
    Args:
    image - C x H x W normalized tensor
    bbox_predictions - 4 x #anchors tensor
    classification_predictions - #classes x #anchors tensor
    gt_bbox - 4 x #anchors tensor
    gt_class - #classes x #anchors tensor
    image_info - (image_id, (width, height))
    model_outputs - flag to check model outputs or not
    visualize_anchors, visualize_anchor_gt_pair - similar flags
    """
    output_handler = Model_output_handler(params)

    anchors_ltrb = default_boxes(order="ltrb")
    anchors_xywh = default_boxes(order="xywh")

    overlaps = jaccard(wh2corners(gt_bbox[:, :2], gt_bbox[:, 2:]),
                       anchors_ltrb)

    processed_predicted_bboxes, processed_predicted_classes, highest_confidence_for_predictions, _ = output_handler._get_sorted_predictions(
        bbox_predictions, classification_predictions, image_info)

    # map each anchor to the highest IOU obj, gt_idx - ids of mapped objects
    gt_bbox_for_matched_anchors, matched_gt_class_ids, pos_idx = map_to_ground_truth(
        overlaps, gt_bbox, gt_class, params)

    indeces_kept_by_nms = nms(
        wh2corners_numpy(processed_predicted_bboxes[:, :2],
                         processed_predicted_bboxes[:, 2:]),
        processed_predicted_classes, output_handler.suppress_threshold)

    # get things in the right format
    image = output_handler._unnorm_scale_image(image)
    pos_idx = (pos_idx.cpu().numpy())
    gt_bbox = output_handler._rescale_bboxes(gt_bbox, image_info[1])
    gt_class = gt_class.cpu().numpy()
    all_anchor_classes = map_id_to_idx(all_anchor_classes).cpu().numpy()

    # get model predictions, unsorted and no nms
    raw_bbox_predictions = output_handler._convert_offsets_to_bboxes(
        bbox_predictions, image_info[1])
    raw_class_confidences = output_handler._convert_confidences_to_workable_data(
        classification_predictions)
    raw_class_indeces, _ = output_handler._get_predicted_class(
        raw_class_confidences)

    # rescale gt bboxes and anchors
    gt_bbox_for_matched_anchors = output_handler._rescale_bboxes(
        gt_bbox_for_matched_anchors, image_info[1])
    matched_gt_class_idxs = map_id_to_idx(
        matched_gt_class_ids[pos_idx]).cpu().numpy()
    anchors_xywh = output_handler._rescale_bboxes(anchors_xywh, image_info[1])

    if model_outputs:
        test(raw_bbox=raw_bbox_predictions,
             raw_class_confidences=classification_predictions,
             raw_class_indeces=raw_class_indeces,
             gt_bbox=gt_bbox,
             gt_class=gt_class,
             pred_bbox=processed_predicted_bboxes,
             pred_class=processed_predicted_classes,
             highest_confidence_for_predictions=
             highest_confidence_for_predictions,
             indeces_kept_by_nms=indeces_kept_by_nms,
             pos_idx=pos_idx,
             size=image_info[1],
             image=image,
             anchors=anchors_xywh,
             gt_bbox_for_matched_anchors=gt_bbox_for_matched_anchors,
             matched_gt_class_idxs=matched_gt_class_idxs,
             all_anchor_classes=all_anchor_classes,
             verbose=verbose,
             very_verbose=very_verbose)

    return inspect_anchors(
        image=image,
        anchors=anchors_xywh,
        gt_bbox_for_matched_anchors=gt_bbox_for_matched_anchors,
        gt_classes_for_matched_anchors=matched_gt_class_idxs,
        pos_idx=pos_idx,
        size=image_info[1],
        visualize_anchors=visualize_anchors,
        visualize_anchor_gt_pair=visualize_anchor_gt_pair)
Exemple #5
0
    if args.input[-4:] == '.mp4' or args.input[-4:] == '.avi':
        video = cv2.VideoCapture(args.input)

        video_length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        pbar = tqdm(unit='frame', total=video_length)

        while video.isOpened():
            ret, frame = video.read()
            if ret is not True:
                video.release()
                break

            resized_frame = cv2.resize(frame, (config['IMAGE_SIZE'], config['IMAGE_SIZE']))
            detected_bboxes = sess.run(model.outputs, feed_dict={model.inputs: np.expand_dims(resized_frame, axis=0)})
            filtered_bboxes = postprocessing.nms(detected_bboxes, conf_thresh=config['CONF_THRESH'],
                                                 iou_thresh=config['IOU_THRESH'])

            for class_id, v in filtered_bboxes.items():
                if args.classes is None or classes[class_id] in args.classes:
                    for detection in v:
                        label_bboxes(frame, detection['bbox'], class_id, detection['score'])

            pbar.update(1)
            cv2.imshow('Result', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                video.release()
                break
        print("Done")
    else:
        frame = cv2.imread(args.input)
        resized_frame = cv2.resize(frame, (config['IMAGE_SIZE'], config['IMAGE_SIZE']))