def predict(self, image): import albumentations as A self.eval() normalize = A.Normalize() image = normalize(image=image)['image'] slicer = ImageSlicer(image.shape, 512, 512 // 2) patches = [ tensor_from_rgb_image(patch) for patch in slicer.split(image, borderType=cv2.BORDER_CONSTANT) ] offsets = torch.tensor([[crop[0], crop[1], crop[0], crop[1]] for crop in slicer.bbox_crops], dtype=torch.float32) all_bboxes = [] all_labels = [] with torch.set_grad_enabled(False): for patch, patch_loc in DataLoader(list(zip(patches, offsets)), batch_size=8, pin_memory=True): patch = patch.to(self.fpn.conv1.weight.device) bboxes, labels = self(patch) all_bboxes.extend(bboxes.cpu()) all_labels.extend(labels.cpu()) boxes, labels, scores = self.box_coder.decode_multi( all_bboxes, all_labels, offsets) return to_numpy(boxes), to_numpy(labels), to_numpy(scores)
def update(self, y_pred: Tensor, y_true: Tensor): true_ssd_bboxes = y_true[SSD_BBOXES_KEY].detach().cpu() pred_ssd_bboxes = y_pred[SSD_BBOXES_KEY].detach().cpu() pred_classes = y_pred[SSD_LABELS_KEY].detach().cpu() true_classes = y_true[SSD_LABELS_KEY].detach().cpu() pred_classes = pred_classes.softmax(dim=2) true_classes = one_hot(true_classes, num_classes=pred_classes.size(2)) for pred_loc, pred_cls, true_loc, true_cls in zip( pred_ssd_bboxes, pred_classes, true_ssd_bboxes, true_classes): pred_bboxes, _, pred_conf = self.box_coder.decode( pred_loc, pred_cls) true_bboxes, _, _ = self.box_coder.decode(true_loc, true_cls) true_bboxes = change_box_order(true_bboxes, 'xyxy2xywh') pred_bboxes = change_box_order(pred_bboxes, 'xyxy2xywh') true_bboxes = to_numpy(true_bboxes) pred_bboxes = to_numpy(pred_bboxes) pred_conf = to_numpy(pred_conf) if len(true_bboxes) == 0: continue if len(pred_bboxes) == 0: score = 0 else: score = map_iou(true_bboxes, pred_bboxes, pred_conf) self.scores_per_image.append(score)
def decode(self, loc_preds, cls_preds, score_thresh=0.5, nms_thresh=0.5): '''Decode predicted loc/cls back to real box locations and class labels. Args: loc_preds: (tensor) predicted loc, sized [#anchors,5]. cls_preds: (tensor) predicted conf, sized [#anchors,#classes]. score_thresh: (float) threshold for object confidence score. nms_thresh: (float) threshold for box nms. offsets: (tensor) offsets in global coordinate space for bounding boxes [#anchors,5]. Returns: boxes: (tensor) rbbox locations, sized [#obj,5]. Format xywht labels: (tensor) class labels, sized [#obj,]. ''' loc_preds = to_numpy(loc_preds) cls_preds = to_numpy(cls_preds) boxes = self.decode_boxes(loc_preds) mask = cls_preds > score_thresh boxes = boxes[mask] scores = cls_preds[mask] keep = rbbox_nms(boxes, scores, threshold=nms_thresh) good_boxes = boxes[keep] good_scores = scores[keep] return good_boxes, good_scores
def rbbox_iou(prior_boxes: np.ndarray, gt_boxes: np.ndarray): '''Compute the intersection over union of two set of oriented boxes. The box order must be (x, y, w, h, t). Args: prior_boxes: (np.ndarray) Prior (anchor) bounding boxes, sized [N,5]. gt_boxes: (np.ndarray) Ground-truth bounding boxes, sized [M,5]. Return: (tensor) iou, sized [N,M]. Reference: https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py ''' return_torch = False if isinstance(prior_boxes, torch.Tensor): return_torch = True prior_boxes = to_numpy(prior_boxes) gt_boxes = to_numpy(gt_boxes) N = len(prior_boxes) M = len(gt_boxes) iou = np.zeros((N, M), dtype=np.float32) for j, box2 in enumerate(gt_boxes): iou[:, j] = intersection_area_one2many(box2, prior_boxes) if return_torch: iou = torch.from_numpy(iou) return iou
def visualize_mask_predictions(data, normalize=A.Normalize(), threshold=0.5, show_groundtruth=True): batch_ids = data[ID_KEY] batch_images = to_numpy(data[IMAGE_KEY]) pred_mask = to_numpy(data['pred_mask']) true_mask = to_numpy(data['true_mask']) images = [] alpha = 0.5 font_scale = 0.5 font_thickness = 1 text_color = (255, 255, 255) def prediction_to_rgb_mask(image, pred, ship_color=(0, 255, 0), edge_color=(255, 0, 0)): ship_mask = pred[0, ...] > threshold image[ship_mask] = ship_color if pred.shape[0] > 1: edge_mask = pred[1, ...] > threshold image[edge_mask] = edge_color return image # Render pred bboxes for id, image, p, t in zip(batch_ids, batch_images, pred_mask, true_mask): image = np.moveaxis(image, 0, -1) image = (image * np.array(normalize.std) + np.array(normalize.mean)) * normalize.max_pixel_value image = image.astype(np.uint8).copy() overlay = prediction_to_rgb_mask(image.copy(), p) overlay = cv2.addWeighted(image, alpha, overlay, 1 - alpha, 0) if show_groundtruth: overlay2 = prediction_to_rgb_mask(image.copy(), t) overlay2 = cv2.addWeighted(image, alpha, overlay2, 1 - alpha, 0) overlay = np.hstack((overlay, overlay2)) title = str(id) cv2.putText(overlay, title, (5, 25), cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, font_thickness, lineType=cv2.LINE_AA) images.append(overlay) return images
def update(self, y_pred, y_true): num_classes = y_pred[SSD_BBOXES_KEY].size(2) true_bboxes = to_numpy(y_true[SSD_BBOXES_KEY]) pred_bboxes = to_numpy(y_pred[SSD_BBOXES_KEY]) pred_classes = to_numpy(y_pred[SSD_LABELS_KEY].detach().squeeze()) true_classes = to_numpy(y_true[SSD_LABELS_KEY].detach().cpu()) for pred_loc, pred_cls, true_loc, true_cls in zip( pred_bboxes, pred_classes, true_bboxes, true_classes): y_pred = self.rssd_predictions_to_ship_mask(pred_loc, pred_cls) y_true = self.rssd_predictions_to_ship_mask(true_loc, true_cls) self.base_metric.update(y_pred, y_true)
def predict_as_csv(self, dataset, batch_size=1, workers=0): self.eval() dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=workers) image_ids = [] rles = [] for image, y in tqdm(dataloader, total=len(dataloader)): image = image.cuda(non_blocking=True) pred_boxes, pred_labels = self(image) for image_id, boxes, scores in zip(y[ID_KEY], to_numpy(pred_boxes), to_numpy(pred_labels)): boxes, _, scores = dataset.box_coder.decode(boxes, scores) if len(boxes): mask = np.zeros((768, 768), dtype=np.uint8) # First, we resterize all masks for i, rbox in enumerate(boxes): visualize_rbbox(mask, rbox, color=(i + 1, i + 1, i + 1), thickness=cv2.FILLED) # Second, we do rle encoding. This prevents assigning same pixel to multiple instances for i, rbox in enumerate(boxes): rle = rle_encode(mask == (i + 1)) image_ids.append(image_id) rles.append(rle) else: image_ids.append(image_id) rles.append(None) return pd.DataFrame.from_dict({ 'ImageId': image_ids, 'EncodedPixels': rles })
def update(self, y_pred: Tensor, y_true: Tensor): batch_size = y_true.size(0) y_pred = y_pred[:, self.channel, ...].detach().view(batch_size, -1) y_true = y_true[:, self.channel, ...].detach().view(batch_size, -1) if self.threshold is not None: y_pred = y_pred > float(self.threshold) y_true = y_true.float() y_pred = y_pred.float() intersection = (y_pred * y_true).sum(dim=1) union = y_pred.sum(dim=1) + y_true.sum(dim=1) iou = intersection / (union - intersection + 1e-7) iou = iou[y_true.sum(dim=1) > 0] # IoU defined only for non-empty masks self.scores_per_image.extend(to_numpy(iou))
def encode(self, boxes, labels, return_anchors=False): '''Encode target bounding boxes and class labels. SSD coding rules: tx = (x - anchor_x) / (variance[0]*anchor_w) ty = (y - anchor_y) / (variance[0]*anchor_h) tw = log(w / anchor_w) th = log(h / anchor_h) tt = tan(theta - anchor_theta) Args: boxes: (np.ndarray) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj,4]. labels: (np.ndarray) object class labels, sized [#obj,]. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,5]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' if len(boxes) == 0: loc_targets = self.encode_boxes(self.anchor_boxes) cls_targets = np.zeros(len(self.anchor_boxes), dtype=int) else: boxes = to_numpy(boxes) ious = rbbox_iou(self.anchor_boxes, boxes) # [#anchors, #obj] max_ious_1, max_ids_1 = torch.from_numpy(ious).max(1) max_ious_0, max_ids_0 = torch.from_numpy(ious).max(0) max_ids = max_ids_1.numpy() max_ids[max_ids_0.numpy()] = np.arange(len(boxes)) boxes = boxes[max_ids] loc_targets = self.encode_boxes(boxes) max_ious = max_ious_1.numpy() cls_targets = np.ones(len(self.anchor_boxes), dtype=int) * -1 cls_targets[max_ious < 0.4] = 0 cls_targets[max_ious >= 0.5] = 1 cls_targets[max_ids_0] = 1 if return_anchors: return loc_targets, cls_targets, self.anchor_boxes[cls_targets > 0] return loc_targets, cls_targets
def process_epoch(model, criterions: dict, criterion_weights: Optional[dict], metrics: dict, optimizer, dataloader, epoch: int, is_train, summary_writer, tag=None) -> dict: avg_loss = AverageMeter() if tag is None: tag = 'train' if is_train else 'val' epoch_losses = {} for key, _ in criterions.items(): epoch_losses[key] = [] worst_batch_loss = 0 worst_batch = None best_batch_loss = np.inf best_batch = None with torch.set_grad_enabled(is_train): if is_train: model.train() else: model.eval() n_batches = len(dataloader) with tqdm(total=n_batches) as tq: tq.set_description(f'{tag} epoch %d' % epoch) for batch_index, (image, y_true) in enumerate(dataloader): batch_size = image.size(0) # Move all data to GPU image = image.cuda(non_blocking=True) true_ship_presence = y_true[SHIP_PRESENSE_KEY].cuda(non_blocking=True) if is_train: optimizer.zero_grad() pred_ship_presence = model(image) losses = dict((key, criterions[key](pred_ship_presence, true_ship_presence)) for key in criterions.keys()) total_loss = compute_total_loss(losses, criterion_weights) if is_train: total_loss.backward() optimizer.step() y_pred = {SHIP_PRESENSE_KEY: pred_ship_presence} # Predictions total_loss = float(total_loss) if total_loss > worst_batch_loss: worst_batch_loss = total_loss worst_batch = { ID_KEY: y_true[ID_KEY], IMAGE_KEY: image.detach().cpu(), MASK_KEY: to_numpy(y_true[MASK_KEY]), 'pred_has_ship': pred_ship_presence.detach().cpu().sigmoid(), 'true_has_ship': true_ship_presence.detach().cpu(), } if total_loss < best_batch_loss: best_batch_loss = total_loss best_batch = { ID_KEY: y_true[ID_KEY], IMAGE_KEY: image.detach().cpu(), MASK_KEY: to_numpy(y_true[MASK_KEY]), 'pred_has_ship': pred_ship_presence.detach().cpu().sigmoid(), 'true_has_ship': true_ship_presence.detach().cpu(), } # Log losses for loss_name in criterions.keys(): epoch_losses[loss_name].append(float(losses[loss_name])) # Log metrics for name, metric in metrics.items(): metric.update(pred_ship_presence, true_ship_presence) avg_loss.update(total_loss, batch_size) tq.set_postfix(loss='{:.3f}'.format(avg_loss.avg)) tq.update() for key, metric in metrics.items(): metric.log_to_tensorboard(summary_writer, f'{tag}/epoch/' + key, epoch) # Log losses for loss_name, epoch_losses in epoch_losses.items(): if len(epoch_losses): summary_writer.add_scalar(f'{tag}/loss/{loss_name}', np.mean(epoch_losses), epoch) summary_writer.add_histogram(f'{tag}/loss/{loss_name}/histogram', np.array(epoch_losses), epoch) # Negatives negatives = visualize_cls_predictions(worst_batch, show_groundtruth=True) for i, image in enumerate(negatives): summary_writer.add_image(f'{tag}/negatives/{i}', tensor_from_rgb_image(image), epoch) # Positives positives = visualize_cls_predictions(best_batch, show_groundtruth=True) for i, image in enumerate(positives): summary_writer.add_image(f'{tag}/positives/{i}', tensor_from_rgb_image(image), epoch) metric_scores = {f'{tag}_loss': avg_loss.avg} for key, metric in metrics.items(): metric_scores[f'{tag}_{key}'] = metric.value() return metric_scores
def visualize_cls_predictions(data, normalize=A.Normalize(), threshold=0.5, show_groundtruth=True): batch_ids = data[ID_KEY] batch_images = to_numpy(data[IMAGE_KEY]) batch_masks = to_numpy(data[MASK_KEY]) batch_pred_has_ship = to_numpy(data['pred_has_ship']) batch_true_has_ship = to_numpy(data['true_has_ship']) images = [] alpha = 0.5 font_scale = 0.5 font_thickness = 1 text_color = (255, 255, 255) def prediction_to_rgb_mask(image, pred, ship_color=(0, 255, 0), edge_color=(255, 0, 0)): ship_mask = pred[0, ...] > threshold image[ship_mask] = ship_color if pred.shape[0] > 1: edge_mask = pred[1, ...] > threshold image[edge_mask] = edge_color return image # Render pred bboxes for id, image, mask, p, t in zip(batch_ids, batch_images, batch_masks, batch_pred_has_ship, batch_true_has_ship): image = np.moveaxis(image, 0, -1) image = (image * np.array(normalize.std) + np.array(normalize.mean)) * normalize.max_pixel_value image = image.astype(np.uint8).copy() # Put ships overlay ships_rgb = (label2rgb(mask, bg_label=0) * 255).astype(np.uint8) image = cv2.addWeighted(image, alpha, ships_rgb, 1 - alpha, 0) overlay = image.copy() # Put image title title = str(id) cv2.putText(overlay, title, (5, 25), cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, font_thickness, lineType=cv2.LINE_AA) # Put prediction confidence cv2.putText(overlay, '{0:.2f}'.format(float(p)), (10, image.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 0, 0), font_thickness, lineType=cv2.LINE_AA) # Put true label if show_groundtruth: cv2.putText(overlay, '{0:.2f}'.format(float(t)), (image.shape[1] - 40, image.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 255, 0), font_thickness, lineType=cv2.LINE_AA) overlay = cv2.addWeighted(image, alpha, overlay, 1 - alpha, 0) images.append(overlay) return images
def visualize_rssd_predictions(data, box_coder, normalize=A.Normalize(), show_groundtruth=True): batch_ids = data[ID_KEY] batch_images = to_numpy(data[IMAGE_KEY]) pred_ssd_bboxes = to_numpy(data['pred_ssd_bboxes']) true_ssd_bboxes = to_numpy(data['true_ssd_bboxes']) pred_ssd_labels = to_numpy(data['pred_ssd_labels']) true_ssd_labels = to_numpy(data['true_ssd_labels']) images = [] font_scale = 1 font_thickness = 1 text_color = (255, 255, 255) # Render pred bboxes for id, image, bboxes, labels in zip(batch_ids, batch_images, pred_ssd_bboxes, pred_ssd_labels): image = np.moveaxis(image, 0, -1) image = (image * np.array(normalize.std) + np.array(normalize.mean)) * normalize.max_pixel_value image = image.astype(np.uint8).copy() title = str(id) cv2.putText(image, title, (5, 25), cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, font_thickness, lineType=cv2.LINE_AA) bboxes, probs = box_coder.decode(bboxes, labels) if len(probs) > 1024: warnings.warn( f'Too many {len(bboxes)} RSSD predictions. Will render first 1024' ) bboxes = bboxes[:1024] probs = probs[:1024] if len(probs): image = draw_rbboxes(image, bboxes, probs, (255, 0, 0), thickness=3) images.append(image) # Render true bboxes if show_groundtruth: new_images = [] for image, bboxes, labels in zip(images, true_ssd_bboxes, true_ssd_labels): # image = np.moveaxis(image, 0, -1) # image = (image * np.array(normalize.std) + np.array(normalize.mean)) * normalize.max_pixel_value # image = image.astype(np.uint8) bboxes, probs = box_coder.decode(bboxes, labels) if len(labels): image = draw_rbboxes(image, bboxes, probs, (0, 255, 0), thickness=1, show_scores=False) new_images.append(image) images = new_images return images
def update(self, y_pred, y_true): y_pred = self.expand_label_image(to_numpy(y_pred)) y_true = self.expand_label_image(to_numpy(y_true)) f2 = compute_f2(y_pred, y_true) self.f2_scores.append(f2)