Ejemplo n.º 1
0
        def single_batch_nms(candidate_boxes):
            y_mask = candidate_boxes[..., 4] >= self.score_threshold
            candidate_boxes = candidate_boxes[y_mask]
            outputs = torch.zeros((self.max_detection + 1, candidate_boxes.size(-1)))

            indices = []
            updates = []

            count = 0
            while candidate_boxes.size(0) > 0 and count < self.max_detection:
                best_idx = torch.argmax(candidate_boxes[..., 4], dim=0)
                best_box = candidate_boxes[best_idx]

                indices.append([count] * candidate_boxes.size(-1))
                updates.append(best_box)
                count += 1

                candidate_boxes = torch.cat(
                    (candidate_boxes[0:best_idx], candidate_boxes[best_idx + 1:candidate_boxes.size(0)]), dim=0)

                iou = broadcast_iou(best_box[0:4], candidate_boxes[..., 0:4])

                iou_mask = iou <= self.iou_threshold
                candidate_boxes = candidate_boxes[iou_mask]

            if count > 0:
                count_index = [[self.max_detection] * candidate_boxes.size(-1)]
                count_updates = [torch.zeros(candidate_boxes.size(-1)).fill_(count)]
                indices = torch.cat((torch.tensor(indices), torch.tensor(count_index)), dim=0)
                updates = torch.cat((torch.stack(updates).cuda(), torch.stack(count_updates).cuda()), dim=0)
                outputs = outputs.cuda().scatter_(0, indices.cuda(), updates)

            return outputs
Ejemplo n.º 2
0
    def calc_ignore_mask(self, true_box, pred_box, true_obj):
        # (batch, 13, 13, 3, 4)
        true_box_shape = true_box.shape
        pred_box_shape = pred_box.shape

        true_box = torch.reshape(true_box, [true_box_shape[0], -1, 4])
        true_box = torch.sort(true_box, dim=1, descending=True).values
        # true_box = true_box[:, 0:100, :]

        # pred_box, true_box shape : (batch, 507, 4)
        pred_box = torch.reshape(pred_box, [pred_box_shape[0], -1, 4])

        # (batch, 507. 507)
        iou = broadcast_iou(pred_box, true_box)

        # tensorflow 코드에서는 reduce_max를 해야하는데 여기선 필요 없나?
        # https://github.com/ethanyanjiali/deep-vision/blob/master/YOLO/tensorflow/yolov3.py#L462
        # best_iou = torch.max(iou, dim=-1).values
        best_iou = iou
        best_iou = torch.reshape(best_iou, [
            pred_box_shape[0], pred_box_shape[1], pred_box_shape[2],
            pred_box_shape[3]
        ])

        # (batch, 13, 13, 3, 1)
        ignore_mask = (best_iou < self.ignore_thresh).float()
        ignore_mask = torch.unsqueeze(ignore_mask, dim=-1)

        return ignore_mask
    def yolo_loss(y_true, y_pred):
        # 1. transform all pred outputs
        # y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls))
        pred_box, pred_obj, pred_class, pred_xywh = yolo_boxes(
            y_pred, anchors, classes)
        pred_xy = pred_xywh[..., 0:2]
        pred_wh = pred_xywh[..., 2:4]

        # 2. transform all true outputs
        # y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls))
        true_box, true_obj, true_class_idx = tf.split(y_true, (4, 1, 1),
                                                      axis=-1)
        true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2
        true_wh = true_box[..., 2:4] - true_box[..., 0:2]

        # give higher weights to small boxes
        box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1]

        # 3. inverting the pred box equations
        grid_size = tf.shape(y_true)[1]
        grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size))
        grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)
        true_xy = true_xy * tf.cast(grid_size, tf.float32) - \
            tf.cast(grid, tf.float32)
        true_wh = tf.math.log(true_wh / anchors)
        true_wh = tf.where(tf.math.is_inf(true_wh), tf.zeros_like(true_wh),
                           true_wh)

        # 4. calculate all masks
        obj_mask = tf.squeeze(true_obj, -1)
        # ignore false positive when iou is over threshold
        best_iou = tf.map_fn(
            lambda x: tf.reduce_max(broadcast_iou(
                x[0], tf.boolean_mask(x[1], tf.cast(x[2], tf.bool))),
                                    axis=-1), (pred_box, true_box, obj_mask),
            tf.float32)
        ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32)

        # 5. calculate all losses
        xy_loss = obj_mask * box_loss_scale * \
            tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1)
        wh_loss = obj_mask * box_loss_scale * \
            tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1)
        obj_loss = binary_crossentropy(true_obj, pred_obj)
        obj_loss = obj_mask * obj_loss + \
            (1 - obj_mask) * ignore_mask * obj_loss
        if Config.CLASS_MUTUALLY_EXCLUSIVE is True:
            class_loss = obj_mask * sparse_categorical_crossentropy(
                true_class_idx, pred_class)
        else:
            class_loss = obj_mask * binary_crossentropy(
                true_class_idx, pred_class)

        # 6. sum over (batch, gridx, gridy, anchors) => (batch, 1)
        xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3))
        wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3))
        obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3))
        class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3))

        return xy_loss + wh_loss + obj_loss + class_loss
Ejemplo n.º 4
0
    def yoloLoss(y_true, y_pred):
        pred_box, pred_obj, pred_class, pred_xywh = yolo_boxes(
            y_pred, anchors, class_num)
        pred_xy = pred_xywh[..., 0:2]  # 取出偏移量
        pred_wh = pred_xywh[..., 2:4]

        true_box, true_obj, true_class_idx = tf.split(y_true, (4, 1, 1),
                                                      axis=-1)
        true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2
        true_wh = true_box[..., 2:4] - true_box[..., 0:2]

        box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1]

        grid_size = tf.shape(y_true)[1]
        grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size))
        grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)
        true_xy = true_xy * tf.cast(grid_size, tf.float32) - \
                  tf.cast(grid, tf.float32)
        true_wh = tf.math.log(true_wh / anchors)
        true_wh = tf.where(tf.math.is_inf(true_wh), tf.zeros_like(true_wh),
                           true_wh)  # 将真实坐标转变为偏移量用于计算loss

        obj_mask = tf.squeeze(true_obj, -1)
        # ignore false positive when iou is over threshold
        best_iou = tf.map_fn(
            lambda x: tf.reduce_max(broadcast_iou(
                x[0], tf.boolean_mask(x[1], tf.cast(x[2], tf.bool))),
                                    axis=-1), (pred_box, true_box, obj_mask),
            tf.float32)
        ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32)

        xy_loss = obj_mask * box_loss_scale * \
                  tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1)
        wh_loss = obj_mask * box_loss_scale * \
                  tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1)
        obj_loss = binary_crossentropy(true_obj, pred_obj)
        obj_loss = obj_mask * obj_loss + \
                   (1 - obj_mask) * ignore_mask * obj_loss
        # TODO: use binary_crossentropy instead
        class_loss = obj_mask * sparse_categorical_crossentropy(
            true_class_idx, pred_class)

        xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3))
        wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3))
        obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3))
        class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3))

        return xy_loss + wh_loss + obj_loss + class_loss
Ejemplo n.º 5
0
    def calc_ignore_mask(self, true_box, pred_box, true_obj):
        obj_mask = torch.squeeze(true_obj, dim=-1)

        best_iou = []
        for x in zip(pred_box, true_box, obj_mask):
            mask = x[1][x[2].bool()]
            if mask.size(0) is not 0:
                best_iou.append(broadcast_iou(x[0], mask))
            else:
                best_iou.append(torch.zeros(true_box.shape[1:4]).cuda())
        best_iou = torch.stack(best_iou)

        ignore_mask = (best_iou < self.ignore_thresh).float()
        ignore_mask = ignore_mask.unsqueeze(-1)

        return ignore_mask
Ejemplo n.º 6
0
        def single_batch_nms(candidate_boxes):
            y_mask = candidate_boxes[
                ..., 4] >= self.score_threshold  # true or false
            candidate_boxes = candidate_boxes[y_mask]
            outputs = torch.zeros(
                (self.max_detection + 1, candidate_boxes.size(-1)))

            indices = []
            updates = []

            count = 0
            # candidate_boxes가 없거나 max_detection을 다 채울때 까지 반복
            while candidate_boxes.size(0) > 0 and count < self.max_detection:
                # candidate_boxes 중에서 점수가 가장 높은 박스 pick
                best_idx = torch.argmax(candidate_boxes[..., 4], dim=0)
                best_box = candidate_boxes[best_idx]

                indices.append([count] * candidate_boxes.size(-1))
                updates.append(best_box)
                count += 1
                # best_box는 candidate_boxes에서 제거
                candidate_boxes = torch.cat(
                    (candidate_boxes[0:best_idx],
                     candidate_boxes[best_idx + 1:candidate_boxes.size(0)]),
                    dim=0)
                # best_box와 모든 candidate_boxes 비교
                iou = broadcast_iou(best_box[0:4], candidate_boxes[..., 0:4])
                # iou가 iou_threshold보다 큰 후보 상자 제거
                candidate_boxes = candidate_boxes[iou <= self.iou_threshold]

            # 한번이라도 count가 됬을 때
            if count > 0:
                count_idx = [[self.max_detection] * candidate_boxes.size(-1)]
                count_update = [
                    torch.zeros(candidate_boxes.size(-1)).fill_(count)
                ]
                indices = torch.cat(
                    (torch.tensor(indices), torch.tensor(count_idx)), dim=0)
                updates = torch.cat(
                    (torch.stack(updates), torch.stack(count_update).cuda()),
                    dim=0)
                # dim=0으로 outputs의 indices위치에 updates값 넣기
                outputs = outputs.cuda().scatter_(0, indices.cuda(), updates)

            return outputs
Ejemplo n.º 7
0
    def calc_ignore_mask(self, true_obj, true_box, pred_box):

        true_box_shape = tf.shape(true_box)
        pred_box_shape = tf.shape(pred_box)
        true_box = tf.reshape(true_box, [true_box_shape[0], -1, 4])
        true_box = tf.sort(true_box, axis=1, direction="DESCENDING")
        true_box = true_box[:, 0:100, :]
        pred_box = tf.reshape(pred_box, [pred_box_shape[0], -1, 4])

        iou = broadcast_iou(pred_box, true_box)
        best_iou = tf.reduce_max(iou, axis=-1)
        best_iou = tf.reshape(best_iou, [
            pred_box_shape[0], pred_box_shape[1], pred_box_shape[2],
            pred_box_shape[3]
        ])
        ignore_mask = tf.cast(best_iou < self.ignore_thresh, tf.float32)
        ignore_mask = tf.expand_dims(ignore_mask, axis=-1)
        return ignore_mask
Ejemplo n.º 8
0
        def single_batch_nms(candidate_boxes):
            # filter out predictions with score less than score_threshold
            candidate_boxes = tf.boolean_mask(
                candidate_boxes, candidate_boxes[..., 4] >= score_threshold)
            outputs = tf.zeros((max_detection + 1,
                                tf.shape(candidate_boxes)[-1]))
            indices = []
            updates = []

            count = 0
            # keep running this until there's no more candidate box or max_detection is met
            while tf.shape(candidate_boxes)[0] > 0 and count < max_detection:
                # pick the box with the highest score
                best_idx = tf.math.argmax(candidate_boxes[..., 4], axis=0)
                best_box = candidate_boxes[best_idx]
                # add this best box to the output
                indices.append([count])
                updates.append(best_box)
                count += 1
                # remove this box from candidate boxes
                candidate_boxes = tf.concat([
                    candidate_boxes[0:best_idx],
                    candidate_boxes[best_idx + 1:tf.shape(candidate_boxes)[0]]
                ],
                                            axis=0)
                # calculate IOU between this box and all remaining candidate boxes
                iou = broadcast_iou(best_box[0:4], candidate_boxes[..., 0:4])
                # remove all candidate boxes with IOU bigger than iou_threshold
                candidate_boxes = tf.boolean_mask(candidate_boxes,
                                                  iou[0] <= iou_threshold)
            if count > 0:
                # also append num_detection to the result
                count_index = [[max_detection]]
                count_updates = [
                    tf.fill([tf.shape(candidate_boxes)[-1]], count)
                ]
                indices = tf.concat([indices, count_index], axis=0)
                updates = tf.concat([updates, count_updates], axis=0)
                outputs = tf.tensor_scatter_nd_update(outputs, indices,
                                                      updates)
            return outputs
Ejemplo n.º 9
0
    def calc_ignore_mask(self, true_obj, true_box, pred_box):
        # eg. true_obj (1, 13, 13, 3, 1)
        true_obj = tf.squeeze(true_obj, axis=-1)
        # eg. true_obj (1, 13, 13, 3)
        # eg. true_box (1, 13, 13, 3, 4)
        # eg. pred_box (1, 13, 13, 2, 4)
        # eg. true_box_filtered (2, 4) it was (3, 4) but one element got filtered out
        true_box_filtered = tf.boolean_mask(true_box,
                                            tf.cast(true_obj, tf.bool))

        # YOLOv3:
        # "If the bounding box prior is not the best but does overlap a ground
        # truth object by more than some threshold we ignore the prediction,
        # following [17]. We use the threshold of .5."
        # calculate the iou for each pair of pred bbox and true bbox, then find the best among them
        # eg. best_iou (1, 1, 1, 2)
        best_iou = tf.reduce_max(broadcast_iou(pred_box, true_box_filtered),
                                 axis=-1)

        # if best iou is higher than threshold, set the box to be ignored for noobj loss
        # eg. ignore_mask(1, 1, 1, 2)
        ignore_mask = tf.cast(best_iou < self.ignore_thresh, tf.float32)
        ignore_mask = tf.expand_dims(ignore_mask, axis=-1)
        return ignore_mask