def predict(self, image_path, threshold=0.1, nms_overlap=0.1, return_scores=True, visualize=False, resize=False): """ Predicts and displays the results of a trained model on a single image. ===================== =========================================== **Argument** **Description** --------------------- ------------------------------------------- image_path Required. Path to the image file to make the predictions on. --------------------- ------------------------------------------- thresh Optional float. The probabilty above which a detection will be considered valid. --------------------- ------------------------------------------- nms_overlap Optional float. The intersection over union threshold with other predicted bounding boxes, above which the box with the highest score will be considered a true positive. --------------------- ------------------------------------------- return_scores Optional boolean. Will return the probability scores of the bounding box predictions if True. --------------------- ------------------------------------------- visualize Optional boolean. Displays the image with predicted bounding boxes if True. --------------------- ------------------------------------------- resize Optional boolean. Resizes the image to the same size (chip_size parameter in prepare_data) that the model was trained on, before detecting objects. Note that if resize_to parameter was used in prepare_data, the image is resized to that size instead. By default, this parameter is false and the detections are run in a sliding window fashion by applying the model on cropped sections of the image (of the same size as the model was trained on). ===================== =========================================== :returns: 'List' of xmin, ymin, width, height of predicted bounding boxes on the given image """ if not HAS_OPENCV: raise Exception( "This function requires opencv 4.0.1.24. Install it using pip install opencv-python==4.0.1.24" ) if not HAS_PIL: raise Exception( "This function requires PIL. Please install it via pip or conda" ) if isinstance(image_path, str): image = cv2.imread(image_path) else: image = image_path orig_height, orig_width, _ = image.shape orig_frame = image.copy() if resize and self._data.resize_to is None\ and self._data.chip_size is not None: image = cv2.resize(image, (self._data.chip_size, self._data.chip_size)) if self._data.resize_to is not None: if isinstance(self._data.resize_to, tuple): image = cv2.resize(image, self._data.resize_to) else: image = cv2.resize( image, (self._data.resize_to, self._data.resize_to)) height, width, _ = image.shape if self._data.chip_size is not None: chips = _get_image_chips(image, self._data.chip_size) else: chips = [{ 'width': width, 'height': height, 'xmin': 0, 'ymin': 0, 'chip': image, 'predictions': [] }] valid_tfms = self._data.valid_ds.tfms self._data.valid_ds.tfms = [] include_pad_detections = False if len(chips) == 1: include_pad_detections = True for chip in chips: frame = Image( pil2tensor(PIL.Image.fromarray( cv2.cvtColor(chip['chip'], cv2.COLOR_BGR2RGB)), dtype=np.float32).div_(255)) self.learn.predicting = True bbox = self.learn.predict(frame, thresh=threshold, nms_overlap=nms_overlap, ret_scores=True, model=self)[0] if bbox: scores = bbox.scores bboxes, lbls = bbox._compute_boxes() bboxes.add_(1).mul_( torch.tensor([ chip['height'] / 2, chip['width'] / 2, chip['height'] / 2, chip['width'] / 2 ])).long() for index, bbox in enumerate(bboxes): if lbls is not None: label = lbls[index] else: label = 'Default' data = bb2hw(bbox) if include_pad_detections or not _exclude_detection( (data[0], data[1], data[2], data[3]), chip['width'], chip['height']): chip['predictions'].append({ 'xmin': data[0], 'ymin': data[1], 'width': data[2], 'height': data[3], 'score': float(scores[index]), 'label': label }) self._data.valid_ds.tfms = valid_tfms predictions, labels, scores = _get_transformed_predictions(chips) # Scale the predictions to original image and clip the predictions to image dims y_ratio = orig_height / height x_ratio = orig_width / width for index, prediction in enumerate(predictions): prediction[0] = prediction[0] * x_ratio prediction[1] = prediction[1] * y_ratio prediction[2] = prediction[2] * x_ratio prediction[3] = prediction[3] * y_ratio # Clip xmin if prediction[0] < 0: prediction[2] = prediction[2] + prediction[0] prediction[0] = 1 # Clip width when xmax greater than original width if prediction[0] + prediction[2] > orig_width: prediction[2] = (prediction[0] + prediction[2]) - orig_width # Clip ymin if prediction[1] < 0: prediction[3] = prediction[3] + prediction[1] prediction[1] = 1 # Clip height when ymax greater than original height if prediction[1] + prediction[3] > orig_height: prediction[3] = (prediction[1] + prediction[3]) - orig_height predictions[index] = [ prediction[0], prediction[1], prediction[2], prediction[3] ] if visualize: image = _draw_predictions(orig_frame, predictions, labels, color=(255, 0, 0), fontface=2, thickness=1) import matplotlib.pyplot as plt image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if getattr(self._data, "_is_coco", "") == True: figsize = (20, 20) else: figsize = (4, 4) fig, ax = plt.subplots(1, 1, figsize=figsize) ax.imshow(image) if return_scores: return predictions, labels, scores else: return predictions, labels
def predict( self, image_path, threshold=0.5, nms_overlap=0.1, return_scores=False, visualize=False ): """ Runs prediction on a video and appends the output VMTI predictions in the metadata file. ===================== =========================================== **Argument** **Description** --------------------- ------------------------------------------- image_path Required. Path to the image file to make the predictions on. --------------------- ------------------------------------------- threshold Optional float. The probability above which a detection will be considered valid. --------------------- ------------------------------------------- nms_overlap Optional float. The intersection over union threshold with other predicted bounding boxes, above which the box with the highest score will be considered a true positive. --------------------- ------------------------------------------- return_scores Optional boolean. Will return the probability scores of the bounding box predictions if True. --------------------- ------------------------------------------- visualize Optional boolean. Displays the image with predicted bounding boxes if True. ===================== =========================================== :returns: 'List' of xmin, ymin, width, height of predicted bounding boxes on the given image """ if not HAS_OPENCV: raise Exception("This function requires opencv 4.0.1.24. Install it using pip install opencv-python==4.0.1.24") if isinstance(image_path, str): image = cv2.imread(image_path) else: image = image_path orig_height, orig_width, _ = image.shape orig_frame = image.copy() if self._data.resize_to is not None: if isinstance(self._data.resize_to, tuple): image = cv2.resize(image, self._data.resize_to) else: image = cv2.resize(image, (self._data.resize_to, self._data.resize_to)) height, width, _ = image.shape if self._data.chip_size is not None: chips = _get_image_chips(image, self._data.chip_size) else: chips = [{'width': width, 'height': height, 'xmin': 0, 'ymin': 0, 'chip': image, 'predictions': []}] valid_tfms = self._data.valid_ds.tfms self._data.valid_ds.tfms = [] for chip in chips: frame = Image(pil2tensor(PIL.Image.fromarray(cv2.cvtColor(chip['chip'], cv2.COLOR_BGR2RGB)), dtype=np.float32).div_(255)) bbox = self.learn.predict(frame, thresh=threshold, nms_overlap=nms_overlap, ret_scores=True, ssd=self)[0] if bbox: scores = bbox.scores bboxes, lbls = bbox._compute_boxes() bboxes.add_(1).mul_(torch.tensor([chip['height'] / 2, chip['width'] / 2, chip['height'] / 2, chip['width'] / 2])).long() for index, bbox in enumerate(bboxes): if lbls is not None: label = lbls[index] else: label = 'Default' data = bb2hw(bbox) if not _exclude_detection((data[0], data[1], data[2], data[3]), chip['width'], chip['height']): chip['predictions'].append({ 'xmin': data[0], 'ymin': data[1], 'width': data[2], 'height': data[3], 'score': float(scores[index]), 'label': label }) self._data.valid_ds.tfms = valid_tfms predictions, labels, scores = _get_transformed_predictions(chips) y_ratio = orig_height/height x_ratio = orig_width/width for index, prediction in enumerate(predictions): prediction[0] = prediction[0]*x_ratio prediction[1] = prediction[1]*y_ratio prediction[2] = prediction[2]*x_ratio prediction[3] = prediction[3]*y_ratio # Clip xmin if prediction[0] < 0: prediction[2] = prediction[2] + prediction[0] prediction[0] = 1 # Clip width when xmax greater than original width if prediction[0] + prediction[2] > orig_width: prediction[2] = (prediction[0] + prediction[2]) - orig_width # Clip ymin if prediction[1] < 0: prediction[3] = prediction[3] + prediction[1] prediction[1] = 1 # Clip height when ymax greater than original height if prediction[1] + prediction[3] > orig_height: prediction[3] = (prediction[1] + prediction[3]) - orig_height predictions[index] = [ prediction[0], prediction[1], prediction[2], prediction[3] ] if visualize: image = _draw_predictions(orig_frame, predictions, labels) import matplotlib.pyplot as plt plt.xticks([]) plt.yticks([]) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) plt.imshow(PIL.Image.fromarray(image)) if return_scores: return predictions, labels, scores else: return predictions, labels