Ejemplo n.º 1
0
    def predict(self, image):
        import albumentations as A
        self.eval()

        normalize = A.Normalize()
        image = normalize(image=image)['image']

        slicer = ImageSlicer(image.shape, 512, 512 // 2)
        patches = [
            tensor_from_rgb_image(patch)
            for patch in slicer.split(image, borderType=cv2.BORDER_CONSTANT)
        ]
        offsets = torch.tensor([[crop[0], crop[1], crop[0], crop[1]]
                                for crop in slicer.bbox_crops],
                               dtype=torch.float32)

        all_bboxes = []
        all_labels = []

        with torch.set_grad_enabled(False):
            for patch, patch_loc in DataLoader(list(zip(patches, offsets)),
                                               batch_size=8,
                                               pin_memory=True):
                patch = patch.to(self.fpn.conv1.weight.device)
                bboxes, labels = self(patch)

                all_bboxes.extend(bboxes.cpu())
                all_labels.extend(labels.cpu())

        boxes, labels, scores = self.box_coder.decode_multi(
            all_bboxes, all_labels, offsets)
        return to_numpy(boxes), to_numpy(labels), to_numpy(scores)
Ejemplo n.º 2
0
    def update(self, y_pred: Tensor, y_true: Tensor):
        true_ssd_bboxes = y_true[SSD_BBOXES_KEY].detach().cpu()
        pred_ssd_bboxes = y_pred[SSD_BBOXES_KEY].detach().cpu()
        pred_classes = y_pred[SSD_LABELS_KEY].detach().cpu()
        true_classes = y_true[SSD_LABELS_KEY].detach().cpu()

        pred_classes = pred_classes.softmax(dim=2)
        true_classes = one_hot(true_classes, num_classes=pred_classes.size(2))

        for pred_loc, pred_cls, true_loc, true_cls in zip(
                pred_ssd_bboxes, pred_classes, true_ssd_bboxes, true_classes):
            pred_bboxes, _, pred_conf = self.box_coder.decode(
                pred_loc, pred_cls)
            true_bboxes, _, _ = self.box_coder.decode(true_loc, true_cls)

            true_bboxes = change_box_order(true_bboxes, 'xyxy2xywh')
            pred_bboxes = change_box_order(pred_bboxes, 'xyxy2xywh')

            true_bboxes = to_numpy(true_bboxes)
            pred_bboxes = to_numpy(pred_bboxes)
            pred_conf = to_numpy(pred_conf)

            if len(true_bboxes) == 0:
                continue

            if len(pred_bboxes) == 0:
                score = 0
            else:
                score = map_iou(true_bboxes, pred_bboxes, pred_conf)

            self.scores_per_image.append(score)
Ejemplo n.º 3
0
    def decode(self, loc_preds, cls_preds, score_thresh=0.5, nms_thresh=0.5):
        '''Decode predicted loc/cls back to real box locations and class labels.

        Args:
          loc_preds: (tensor) predicted loc, sized [#anchors,5].
          cls_preds: (tensor) predicted conf, sized [#anchors,#classes].
          score_thresh: (float) threshold for object confidence score.
          nms_thresh: (float) threshold for box nms.
          offsets: (tensor) offsets in global coordinate space for bounding boxes [#anchors,5].

        Returns:
          boxes: (tensor) rbbox locations, sized [#obj,5]. Format xywht
          labels: (tensor) class labels, sized [#obj,].
        '''

        loc_preds = to_numpy(loc_preds)
        cls_preds = to_numpy(cls_preds)

        boxes = self.decode_boxes(loc_preds)

        mask = cls_preds > score_thresh

        boxes = boxes[mask]
        scores = cls_preds[mask]

        keep = rbbox_nms(boxes, scores, threshold=nms_thresh)
        good_boxes = boxes[keep]
        good_scores = scores[keep]
        return good_boxes, good_scores
Ejemplo n.º 4
0
def rbbox_iou(prior_boxes: np.ndarray, gt_boxes: np.ndarray):
    '''Compute the intersection over union of two set of oriented boxes.

    The box order must be (x, y, w, h, t).

    Args:
      prior_boxes: (np.ndarray) Prior (anchor) bounding boxes, sized [N,5].
      gt_boxes: (np.ndarray) Ground-truth bounding boxes, sized [M,5].

    Return:
      (tensor) iou, sized [N,M].

    Reference:
      https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
    '''
    return_torch = False
    if isinstance(prior_boxes, torch.Tensor):
        return_torch = True

    prior_boxes = to_numpy(prior_boxes)
    gt_boxes = to_numpy(gt_boxes)

    N = len(prior_boxes)
    M = len(gt_boxes)

    iou = np.zeros((N, M), dtype=np.float32)
    for j, box2 in enumerate(gt_boxes):
        iou[:, j] = intersection_area_one2many(box2, prior_boxes)

    if return_torch:
        iou = torch.from_numpy(iou)
    return iou
Ejemplo n.º 5
0
def visualize_mask_predictions(data,
                               normalize=A.Normalize(),
                               threshold=0.5,
                               show_groundtruth=True):
    batch_ids = data[ID_KEY]
    batch_images = to_numpy(data[IMAGE_KEY])
    pred_mask = to_numpy(data['pred_mask'])
    true_mask = to_numpy(data['true_mask'])

    images = []

    alpha = 0.5
    font_scale = 0.5
    font_thickness = 1
    text_color = (255, 255, 255)

    def prediction_to_rgb_mask(image,
                               pred,
                               ship_color=(0, 255, 0),
                               edge_color=(255, 0, 0)):
        ship_mask = pred[0, ...] > threshold
        image[ship_mask] = ship_color

        if pred.shape[0] > 1:
            edge_mask = pred[1, ...] > threshold
            image[edge_mask] = edge_color
        return image

    # Render pred bboxes
    for id, image, p, t in zip(batch_ids, batch_images, pred_mask, true_mask):
        image = np.moveaxis(image, 0, -1)
        image = (image * np.array(normalize.std) +
                 np.array(normalize.mean)) * normalize.max_pixel_value
        image = image.astype(np.uint8).copy()

        overlay = prediction_to_rgb_mask(image.copy(), p)
        overlay = cv2.addWeighted(image, alpha, overlay, 1 - alpha, 0)

        if show_groundtruth:
            overlay2 = prediction_to_rgb_mask(image.copy(), t)
            overlay2 = cv2.addWeighted(image, alpha, overlay2, 1 - alpha, 0)
            overlay = np.hstack((overlay, overlay2))

        title = str(id)
        cv2.putText(overlay,
                    title, (5, 25),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    font_scale,
                    text_color,
                    font_thickness,
                    lineType=cv2.LINE_AA)

        images.append(overlay)

    return images
Ejemplo n.º 6
0
    def update(self, y_pred, y_true):

        num_classes = y_pred[SSD_BBOXES_KEY].size(2)

        true_bboxes = to_numpy(y_true[SSD_BBOXES_KEY])
        pred_bboxes = to_numpy(y_pred[SSD_BBOXES_KEY])
        pred_classes = to_numpy(y_pred[SSD_LABELS_KEY].detach().squeeze())
        true_classes = to_numpy(y_true[SSD_LABELS_KEY].detach().cpu())

        for pred_loc, pred_cls, true_loc, true_cls in zip(
                pred_bboxes, pred_classes, true_bboxes, true_classes):
            y_pred = self.rssd_predictions_to_ship_mask(pred_loc, pred_cls)
            y_true = self.rssd_predictions_to_ship_mask(true_loc, true_cls)
            self.base_metric.update(y_pred, y_true)
Ejemplo n.º 7
0
    def predict_as_csv(self, dataset, batch_size=1, workers=0):
        self.eval()
        dataloader = DataLoader(dataset,
                                batch_size=batch_size,
                                num_workers=workers)

        image_ids = []
        rles = []

        for image, y in tqdm(dataloader, total=len(dataloader)):
            image = image.cuda(non_blocking=True)
            pred_boxes, pred_labels = self(image)
            for image_id, boxes, scores in zip(y[ID_KEY], to_numpy(pred_boxes),
                                               to_numpy(pred_labels)):
                boxes, _, scores = dataset.box_coder.decode(boxes, scores)
                if len(boxes):
                    mask = np.zeros((768, 768), dtype=np.uint8)

                    # First, we resterize all masks
                    for i, rbox in enumerate(boxes):
                        visualize_rbbox(mask,
                                        rbox,
                                        color=(i + 1, i + 1, i + 1),
                                        thickness=cv2.FILLED)

                    # Second, we do rle encoding. This prevents assigning same pixel to multiple instances
                    for i, rbox in enumerate(boxes):
                        rle = rle_encode(mask == (i + 1))
                        image_ids.append(image_id)
                        rles.append(rle)

                else:
                    image_ids.append(image_id)
                    rles.append(None)

        return pd.DataFrame.from_dict({
            'ImageId': image_ids,
            'EncodedPixels': rles
        })
Ejemplo n.º 8
0
    def update(self, y_pred: Tensor, y_true: Tensor):
        batch_size = y_true.size(0)

        y_pred = y_pred[:, self.channel, ...].detach().view(batch_size, -1)
        y_true = y_true[:, self.channel, ...].detach().view(batch_size, -1)

        if self.threshold is not None:
            y_pred = y_pred > float(self.threshold)

        y_true = y_true.float()
        y_pred = y_pred.float()

        intersection = (y_pred * y_true).sum(dim=1)
        union = y_pred.sum(dim=1) + y_true.sum(dim=1)
        iou = intersection / (union - intersection + 1e-7)

        iou = iou[y_true.sum(dim=1) >
                  0]  # IoU defined only for non-empty masks
        self.scores_per_image.extend(to_numpy(iou))
Ejemplo n.º 9
0
    def encode(self, boxes, labels, return_anchors=False):
        '''Encode target bounding boxes and class labels.

        SSD coding rules:
          tx = (x - anchor_x) / (variance[0]*anchor_w)
          ty = (y - anchor_y) / (variance[0]*anchor_h)
          tw = log(w / anchor_w)
          th = log(h / anchor_h)
          tt = tan(theta - anchor_theta)

        Args:
          boxes: (np.ndarray) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj,4].
          labels: (np.ndarray) object class labels, sized [#obj,].

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,5].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].

        '''
        if len(boxes) == 0:
            loc_targets = self.encode_boxes(self.anchor_boxes)
            cls_targets = np.zeros(len(self.anchor_boxes), dtype=int)
        else:
            boxes = to_numpy(boxes)
            ious = rbbox_iou(self.anchor_boxes, boxes)  # [#anchors, #obj]

            max_ious_1, max_ids_1 = torch.from_numpy(ious).max(1)
            max_ious_0, max_ids_0 = torch.from_numpy(ious).max(0)
            max_ids = max_ids_1.numpy()
            max_ids[max_ids_0.numpy()] = np.arange(len(boxes))
            boxes = boxes[max_ids]
            loc_targets = self.encode_boxes(boxes)
            max_ious = max_ious_1.numpy()

            cls_targets = np.ones(len(self.anchor_boxes), dtype=int) * -1
            cls_targets[max_ious < 0.4] = 0
            cls_targets[max_ious >= 0.5] = 1
            cls_targets[max_ids_0] = 1

        if return_anchors:
            return loc_targets, cls_targets, self.anchor_boxes[cls_targets > 0]

        return loc_targets, cls_targets
Ejemplo n.º 10
0
def process_epoch(model,
                  criterions: dict,
                  criterion_weights: Optional[dict],
                  metrics: dict,
                  optimizer,
                  dataloader,
                  epoch: int,
                  is_train,
                  summary_writer,
                  tag=None) -> dict:
    avg_loss = AverageMeter()

    if tag is None:
        tag = 'train' if is_train else 'val'

    epoch_losses = {}

    for key, _ in criterions.items():
        epoch_losses[key] = []

    worst_batch_loss = 0
    worst_batch = None

    best_batch_loss = np.inf
    best_batch = None

    with torch.set_grad_enabled(is_train):
        if is_train:
            model.train()
        else:
            model.eval()

        n_batches = len(dataloader)
        with tqdm(total=n_batches) as tq:
            tq.set_description(f'{tag} epoch %d' % epoch)

            for batch_index, (image, y_true) in enumerate(dataloader):
                batch_size = image.size(0)

                # Move all data to GPU
                image = image.cuda(non_blocking=True)
                true_ship_presence = y_true[SHIP_PRESENSE_KEY].cuda(non_blocking=True)

                if is_train:
                    optimizer.zero_grad()

                pred_ship_presence = model(image)

                losses = dict((key, criterions[key](pred_ship_presence, true_ship_presence)) for key in criterions.keys())
                total_loss = compute_total_loss(losses, criterion_weights)

                if is_train:
                    total_loss.backward()
                    optimizer.step()

                y_pred = {SHIP_PRESENSE_KEY: pred_ship_presence}

                # Predictions
                total_loss = float(total_loss)
                if total_loss > worst_batch_loss:
                    worst_batch_loss = total_loss
                    worst_batch = {
                        ID_KEY: y_true[ID_KEY],
                        IMAGE_KEY: image.detach().cpu(),
                        MASK_KEY: to_numpy(y_true[MASK_KEY]),
                        'pred_has_ship': pred_ship_presence.detach().cpu().sigmoid(),
                        'true_has_ship': true_ship_presence.detach().cpu(),
                    }

                if total_loss < best_batch_loss:
                    best_batch_loss = total_loss
                    best_batch = {
                        ID_KEY: y_true[ID_KEY],
                        IMAGE_KEY: image.detach().cpu(),
                        MASK_KEY: to_numpy(y_true[MASK_KEY]),
                        'pred_has_ship': pred_ship_presence.detach().cpu().sigmoid(),
                        'true_has_ship': true_ship_presence.detach().cpu(),
                    }

                # Log losses
                for loss_name in criterions.keys():
                    epoch_losses[loss_name].append(float(losses[loss_name]))

                # Log metrics
                for name, metric in metrics.items():
                    metric.update(pred_ship_presence, true_ship_presence)

                avg_loss.update(total_loss, batch_size)
                tq.set_postfix(loss='{:.3f}'.format(avg_loss.avg))
                tq.update()

    for key, metric in metrics.items():
        metric.log_to_tensorboard(summary_writer, f'{tag}/epoch/' + key, epoch)

    # Log losses
    for loss_name, epoch_losses in epoch_losses.items():
        if len(epoch_losses):
            summary_writer.add_scalar(f'{tag}/loss/{loss_name}', np.mean(epoch_losses), epoch)
            summary_writer.add_histogram(f'{tag}/loss/{loss_name}/histogram', np.array(epoch_losses), epoch)

    # Negatives
    negatives = visualize_cls_predictions(worst_batch, show_groundtruth=True)
    for i, image in enumerate(negatives):
        summary_writer.add_image(f'{tag}/negatives/{i}', tensor_from_rgb_image(image), epoch)

    # Positives
    positives = visualize_cls_predictions(best_batch, show_groundtruth=True)
    for i, image in enumerate(positives):
        summary_writer.add_image(f'{tag}/positives/{i}', tensor_from_rgb_image(image), epoch)

    metric_scores = {f'{tag}_loss': avg_loss.avg}
    for key, metric in metrics.items():
        metric_scores[f'{tag}_{key}'] = metric.value()

    return metric_scores
Ejemplo n.º 11
0
def visualize_cls_predictions(data,
                              normalize=A.Normalize(),
                              threshold=0.5,
                              show_groundtruth=True):
    batch_ids = data[ID_KEY]
    batch_images = to_numpy(data[IMAGE_KEY])
    batch_masks = to_numpy(data[MASK_KEY])
    batch_pred_has_ship = to_numpy(data['pred_has_ship'])
    batch_true_has_ship = to_numpy(data['true_has_ship'])

    images = []

    alpha = 0.5
    font_scale = 0.5
    font_thickness = 1
    text_color = (255, 255, 255)

    def prediction_to_rgb_mask(image,
                               pred,
                               ship_color=(0, 255, 0),
                               edge_color=(255, 0, 0)):
        ship_mask = pred[0, ...] > threshold
        image[ship_mask] = ship_color

        if pred.shape[0] > 1:
            edge_mask = pred[1, ...] > threshold
            image[edge_mask] = edge_color
        return image

    # Render pred bboxes
    for id, image, mask, p, t in zip(batch_ids, batch_images, batch_masks,
                                     batch_pred_has_ship, batch_true_has_ship):
        image = np.moveaxis(image, 0, -1)
        image = (image * np.array(normalize.std) +
                 np.array(normalize.mean)) * normalize.max_pixel_value
        image = image.astype(np.uint8).copy()

        # Put ships overlay
        ships_rgb = (label2rgb(mask, bg_label=0) * 255).astype(np.uint8)
        image = cv2.addWeighted(image, alpha, ships_rgb, 1 - alpha, 0)

        overlay = image.copy()
        # Put image title
        title = str(id)
        cv2.putText(overlay,
                    title, (5, 25),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    font_scale,
                    text_color,
                    font_thickness,
                    lineType=cv2.LINE_AA)

        # Put prediction confidence
        cv2.putText(overlay,
                    '{0:.2f}'.format(float(p)), (10, image.shape[0] - 10),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    font_scale, (255, 0, 0),
                    font_thickness,
                    lineType=cv2.LINE_AA)

        # Put true label
        if show_groundtruth:
            cv2.putText(overlay,
                        '{0:.2f}'.format(float(t)),
                        (image.shape[1] - 40, image.shape[0] - 10),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        font_scale, (0, 255, 0),
                        font_thickness,
                        lineType=cv2.LINE_AA)

        overlay = cv2.addWeighted(image, alpha, overlay, 1 - alpha, 0)
        images.append(overlay)

    return images
Ejemplo n.º 12
0
def visualize_rssd_predictions(data,
                               box_coder,
                               normalize=A.Normalize(),
                               show_groundtruth=True):
    batch_ids = data[ID_KEY]
    batch_images = to_numpy(data[IMAGE_KEY])
    pred_ssd_bboxes = to_numpy(data['pred_ssd_bboxes'])
    true_ssd_bboxes = to_numpy(data['true_ssd_bboxes'])
    pred_ssd_labels = to_numpy(data['pred_ssd_labels'])
    true_ssd_labels = to_numpy(data['true_ssd_labels'])

    images = []

    font_scale = 1
    font_thickness = 1
    text_color = (255, 255, 255)

    # Render pred bboxes
    for id, image, bboxes, labels in zip(batch_ids, batch_images,
                                         pred_ssd_bboxes, pred_ssd_labels):
        image = np.moveaxis(image, 0, -1)
        image = (image * np.array(normalize.std) +
                 np.array(normalize.mean)) * normalize.max_pixel_value
        image = image.astype(np.uint8).copy()

        title = str(id)
        cv2.putText(image,
                    title, (5, 25),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    font_scale,
                    text_color,
                    font_thickness,
                    lineType=cv2.LINE_AA)

        bboxes, probs = box_coder.decode(bboxes, labels)

        if len(probs) > 1024:
            warnings.warn(
                f'Too many {len(bboxes)} RSSD predictions. Will render first 1024'
            )
            bboxes = bboxes[:1024]
            probs = probs[:1024]

        if len(probs):
            image = draw_rbboxes(image,
                                 bboxes,
                                 probs, (255, 0, 0),
                                 thickness=3)

        images.append(image)

    # Render true bboxes
    if show_groundtruth:
        new_images = []
        for image, bboxes, labels in zip(images, true_ssd_bboxes,
                                         true_ssd_labels):
            # image = np.moveaxis(image, 0, -1)
            # image = (image * np.array(normalize.std) + np.array(normalize.mean)) * normalize.max_pixel_value
            # image = image.astype(np.uint8)

            bboxes, probs = box_coder.decode(bboxes, labels)

            if len(labels):
                image = draw_rbboxes(image,
                                     bboxes,
                                     probs, (0, 255, 0),
                                     thickness=1,
                                     show_scores=False)

            new_images.append(image)
        images = new_images

    return images
Ejemplo n.º 13
0
    def update(self, y_pred, y_true):
        y_pred = self.expand_label_image(to_numpy(y_pred))
        y_true = self.expand_label_image(to_numpy(y_true))

        f2 = compute_f2(y_pred, y_true)
        self.f2_scores.append(f2)