def draw_bounding_boxes(self, image, detections): if detections is None: return image if len(detections) == 0: return image detections = DetectionUtils.rescale_boxes(detections, self.image_size, image.shape[:2]) for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: try: color = self.colors[int(cls_pred)] text = self.classes[int(cls_pred)] + ' ' + str( int(cls_conf * 100)) + '%' width, height = cv2.getTextSize(text, self.font, self.font_scale, thickness=1)[0] image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 2) image = cv2.rectangle(image, (x1, y1), (x1 + width + 5, y1 - height - 10), color, cv2.FILLED) image = cv2.putText(image, text, (x1, y1 - height / 2), self.font, self.font_scale, (255, 255, 255), 1) except: pass return image
def detect(self, image): Tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor image_tensor = ImageUtils.get_image_tensor( image, self.image_size).unsqueeze(0) image_tensor = Variable(image_tensor.type(Tensor)) self.model.eval() with torch.no_grad(): detections = self.model(image_tensor) detections = DetectionUtils.non_max_suppression( detections, self.confidence_threshold, self.nms_threshold) detections = detections[0] # only for 1 image return detections
def __init__(self, root_path="./", confidence_threshold=0.8, nms_threshold=0.4, image_size=416): self.image_folder = root_path + 'assets/images/' self.model_cfg = root_path + 'config/yolov3.cfg' self.weights_path = root_path + 'config/yolov3.weights' self.class_path = root_path + 'config/voc.names' self.confidence_threshold = confidence_threshold self.nms_threshold = nms_threshold self.image_size = image_size self.font_scale = 1 self.font = cv2.FONT_HERSHEY_PLAIN self.classes = DetectionUtils.load_classes(self.class_path) self.colors = [(randint(0, 255), randint(0, 255), randint(0, 255)) for _ in range(len(self.classes) + 1)] self.model = self.__load_model()
def __init__(self, confidence_threshold=0.8, nms_threshold=0.4, image_size=416, tiny=False): self.model_cfg = Constants.ROOT_PATH + 'config/yolov3.cfg' self.weights_path = Constants.ROOT_PATH + 'config/yolov3.weights' if tiny: self.model_cfg = Constants.ROOT_PATH + 'config/yolov3-tiny.cfg' self.weights_path = Constants.ROOT_PATH + 'config/yolov3-tiny.weights' self.class_path = Constants.ROOT_PATH + 'config/coco.names' self.confidence_threshold = confidence_threshold self.nms_threshold = nms_threshold self.image_size = image_size self.font_scale = 1 self.font = cv2.FONT_HERSHEY_PLAIN self.classes = DetectionUtils.load_classes(self.class_path) # self.colors = [(randint(0, 255), randint(0, 255), randint(0, 255)) for _ in range(len(self.classes))] self.colors = [(0, 175, 0) for _ in range(len(self.classes))] self.model = self.__load_model()
def forward(self, x, targets=None): img_dim = x.shape[2] loss = 0 layer_outputs, yolo_outputs = [], [] for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): if module_def["type"] in ["convolutional", "upsample", "maxpool"]: x = module(x) elif module_def["type"] == "route": x = torch.cat([ layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",") ], 1) elif module_def["type"] == "shortcut": layer_i = int(module_def["from"]) x = layer_outputs[-1] + layer_outputs[layer_i] elif module_def["type"] == "yolo": x, layer_loss = module[0](x, targets, img_dim) loss += layer_loss yolo_outputs.append(x) layer_outputs.append(x) yolo_outputs = DetectionUtils.to_cpu(torch.cat(yolo_outputs, 1)) return yolo_outputs if targets is None else (loss, yolo_outputs)
def forward(self, x, targets=None, img_dim=None): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) prediction = ( x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size) .permute(0, 1, 3, 4, 2) .contiguous() ) # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = DetectionUtils.build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": DetectionUtils.to_cpu(total_loss).item(), "x": DetectionUtils.to_cpu(loss_x).item(), "y": DetectionUtils.to_cpu(loss_y).item(), "w": DetectionUtils.to_cpu(loss_w).item(), "h": DetectionUtils.to_cpu(loss_h).item(), "conf": DetectionUtils.to_cpu(loss_conf).item(), "cls": DetectionUtils.to_cpu(loss_cls).item(), "cls_acc": DetectionUtils.to_cpu(cls_acc).item(), "recall50": DetectionUtils.to_cpu(recall50).item(), "recall75": DetectionUtils.to_cpu(recall75).item(), "precision": DetectionUtils.to_cpu(precision).item(), "conf_obj": DetectionUtils.to_cpu(conf_obj).item(), "conf_noobj": DetectionUtils.to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss