Beispiel #1
0
    def iou_aid_calc(self, ANCHOR_BOX, gt_bbox):
        aidx_per_image, delta_per_image, label_per_image_with_aidx = [], [], []
        aidx_set = set()
        gt_area = self.calc_area(gt_bbox)
        for i in np.argsort(gt_area):
            overlaps = batch_iou(ANCHOR_BOX, gt_bbox[i])
            find = False
            aidx = len(ANCHOR_BOX)
            for ov_idx in np.argsort(overlaps)[::-1]:
                if overlaps[ov_idx] <= 0:
                    # if mc.DEBUG_MODE:
                    #   min_iou = min(overlaps[ov_idx], min_iou)
                    #   num_objects += 1
                    #   num_zero_iou_obj += 1
                    break
                if ov_idx not in aidx_set and overlaps[ov_idx] > 0.25:
                    aidx_set.add(ov_idx)
                    aidx = ov_idx
                    aidx_per_image.append([aidx, i])
                    find = True

                    box_cx, box_cy, box_w, box_h = gt_bbox[i]
                    delta = [0] * 4
                    delta[0] = (box_cx -
                                ANCHOR_BOX[aidx][0]) / ANCHOR_BOX[aidx][2]
                    delta[1] = (box_cy -
                                ANCHOR_BOX[aidx][1]) / ANCHOR_BOX[aidx][3]
                    delta[2] = np.log(box_w / ANCHOR_BOX[aidx][2])
                    delta[3] = np.log(box_h / ANCHOR_BOX[aidx][3])

                    delta_per_image.append(delta)

                    # if mc.DEBUG_MODE:
                    #   max_iou = max(overlaps[ov_idx], max_iou)
                    #   min_iou = min(overlaps[ov_idx], min_iou)
                    #   avg_ious += overlaps[ov_idx]
                    #   num_objects += 1
                    # break

            if not find:
                # even the largeset available overlap is 0, thus, choose one with the
                # smallest square distance
                dist = np.sum(np.square(gt_bbox[i] - ANCHOR_BOX), axis=1)
                for dist_idx in np.argsort(dist):
                    if dist_idx not in aidx_set:
                        aidx_set.add(dist_idx)
                        aidx = dist_idx
                        break
                aidx_per_image.append([aidx, i])

                box_cx, box_cy, box_w, box_h = gt_bbox[i]
                delta = [0] * 4
                delta[0] = (box_cx - ANCHOR_BOX[aidx][0]) / ANCHOR_BOX[aidx][2]
                delta[1] = (box_cy - ANCHOR_BOX[aidx][1]) / ANCHOR_BOX[aidx][3]
                delta[2] = np.log(box_w / ANCHOR_BOX[aidx][2])
                delta[3] = np.log(box_h / ANCHOR_BOX[aidx][3])
                delta_per_image.append(delta)

        return aidx_per_image, delta_per_image
Beispiel #2
0
    def analyze_detections(self, detection_file_dir, det_error_file):
        def _save_detection(f, idx, error_type, det, score):
            f.write(
                '{:s} {:s} {:.1f} {:.1f} {:.1f} {:.1f} {:s} {:.3f}\n'.format(
                    idx, error_type, det[0] - det[2] / 2.,
                    det[1] - det[3] / 2., det[0] + det[2] / 2.,
                    det[1] + det[3] / 2., self._classes[int(det[4])], score))

        # load detections
        self._det_rois = {}
        for idx in self._image_idx:
            det_file_name = os.path.join(detection_file_dir, idx + '.txt')
            with open(det_file_name) as f:
                lines = f.readlines()
            f.close()
            bboxes = []
            for line in lines:
                obj = line.strip().split(' ')
                cls = self._class_to_idx[obj[0].lower().strip()]
                xmin = float(obj[4])
                ymin = float(obj[5])
                xmax = float(obj[6])
                ymax = float(obj[7])
                score = float(obj[-1])

                x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax])
                bboxes.append([x, y, w, h, cls, score])
            bboxes.sort(key=lambda x: x[-1], reverse=True)
            self._det_rois[idx] = bboxes

        # do error analysis
        num_objs = 0.
        num_dets = 0.
        num_correct = 0.
        num_loc_error = 0.
        num_cls_error = 0.
        num_bg_error = 0.
        num_repeated_error = 0.
        num_detected_obj = 0.

        with open(det_error_file, 'w') as f:
            for idx in self._image_idx:
                gt_bboxes = np.array(self._rois[idx])
                num_objs += len(gt_bboxes)
                detected = [False] * len(gt_bboxes)

                det_bboxes = self._det_rois[idx]
                if len(gt_bboxes) < 1:
                    continue

                for i, det in enumerate(det_bboxes):
                    if i < len(gt_bboxes):
                        num_dets += 1
                    ious = batch_iou(gt_bboxes[:, :4], det[:4])
                    max_iou = np.max(ious)
                    gt_idx = np.argmax(ious)
                    if max_iou > 0.1:
                        if gt_bboxes[gt_idx, 4] == det[4]:
                            if max_iou >= 0.5:
                                if i < len(gt_bboxes):
                                    if not detected[gt_idx]:
                                        num_correct += 1
                                        detected[gt_idx] = True
                                    else:
                                        num_repeated_error += 1
                            else:
                                if i < len(gt_bboxes):
                                    num_loc_error += 1
                                    _save_detection(f, idx, 'loc', det, det[5])
                        else:
                            if i < len(gt_bboxes):
                                num_cls_error += 1
                                _save_detection(f, idx, 'cls', det, det[5])
                    else:
                        if i < len(gt_bboxes):
                            num_bg_error += 1
                            _save_detection(f, idx, 'bg', det, det[5])

                for i, gt in enumerate(gt_bboxes):
                    if not detected[i]:
                        _save_detection(f, idx, 'missed', gt, -1.0)
                num_detected_obj += sum(detected)
        f.close()

        print('Detection Analysis:')
        print('    Number of detections: {}'.format(num_dets))
        print('    Number of objects: {}'.format(num_objs))
        print('    Percentage of correct detections: {}'.format(num_correct /
                                                                num_dets))
        print('    Percentage of localization error: {}'.format(num_loc_error /
                                                                num_dets))
        print('    Percentage of classification error: {}'.format(
            num_cls_error / num_dets))
        print('    Percentage of background error: {}'.format(num_bg_error /
                                                              num_dets))
        print('    Percentage of repeated detections: {}'.format(
            num_repeated_error / num_dets))
        print('    Recall: {}'.format(num_detected_obj / num_objs))

        out = {}
        out['num of detections'] = num_dets
        out['num of objects'] = num_objs
        out['% correct detections'] = num_correct / num_dets
        out['% localization error'] = num_loc_error / num_dets
        out['% classification error'] = num_cls_error / num_dets
        out['% background error'] = num_bg_error / num_dets
        out['% repeated error'] = num_repeated_error / num_dets
        out['% recall'] = num_detected_obj / num_objs

        return out
Beispiel #3
0
    def read_batch(self, shuffle=True, wrap_around=True):
        """Read a batch of image and instance annotations.
    Args:
      shuffle: whether or not to shuffle the dataset
      wrap_around: cyclic data extraction
    Returns:
      image_per_batch: images. Shape: batch_size x width x height x [b, g, r]
      label_per_batch: labels. Shape: batch_size x object_num
      delta_per_batch: bounding box or mask deltas. Shape: batch_size x object_num x 
          [dx ,dy, dw, dh] or [dx, dy, dw, dh, dof1, dof2, dof3, dof4]
      aidx_per_batch: index of anchors that are responsible for prediction.
          Shape: batch_size x object_num
      bbox_per_batch: scaled bounding boxes or mask parameters. Shape: batch_size x object_num x 
          [cx, cy, w, h] or [cx, cy, w, h, of1, of2, of3, of4]
    """
        mc = self.mc

        if shuffle:
            if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx):
                self._shuffle_image_idx()
            batch_idx = self._perm_idx[self._cur_idx:self._cur_idx +
                                       mc.BATCH_SIZE]
            self._cur_idx += mc.BATCH_SIZE
        else:
            # Check for warp around only in non shuffle mode
            if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx):
                batch_idx = self._image_idx[self._cur_idx:] \
                    + self._image_idx[:self._cur_idx + mc.BATCH_SIZE-len(self._image_idx)]
                if wrap_around:
                    self._cur_idx += mc.BATCH_SIZE - len(self._image_idx)
                else:
                    # Restart the counter if no-wrap-around is enabled
                    # This ensures all the validation examples are evaluated
                    self._cur_idx = 0
            else:
                batch_idx = self._image_idx[self._cur_idx:self._cur_idx +
                                            mc.BATCH_SIZE]
                self._cur_idx += mc.BATCH_SIZE

        image_per_batch = []
        label_per_batch = []
        bbox_per_batch = []
        delta_per_batch = []
        aidx_per_batch = []
        boundary_adhesions_per_batch = []
        if mc.DEBUG_MODE:
            avg_ious = 0.
            num_objects = 0.
            max_iou = 0.0
            min_iou = 1.0
            num_zero_iou_obj = 0

        for img_ct, idx in enumerate(batch_idx):
            # load the image
            try:
                # Seems to be the only way to detect invalid image files
                Image.open(self._image_path_at(idx)).tobytes()
            except IOError:
                print('Detect error img %s' % self._image_path_at(idx))
                continue
            im = cv2.imread(self._image_path_at(idx)).astype(np.float32,
                                                             copy=False)
            if im is None:
                print("\n\nCorrupt image found: ", self._image_path_at(idx))
                continue

            im = im.astype(np.float32, copy=False)
            im -= mc.BGR_MEANS
            orig_h, orig_w, _ = [float(v) for v in im.shape]

            # load annotations
            label_per_batch.append([b[4] for b in self._rois[idx][:]])
            gt_bbox_pre = np.array([[b[0], b[1], b[2], b[3]]
                                    for b in self._rois[idx][:]])

            if mc.EIGHT_POINT_REGRESSION:
                polygons = [b[2] for b in self._poly[idx][:]]
                boundary_adhesion_pre = np.array(
                    [[b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]
                     for b in self._boundary_adhesions[idx][:]])
            else:
                boundary_adhesion_pre = np.array(
                    [[b[0], b[1], b[2], b[3]]
                     for b in self._boundary_adhesions[idx][:]])

            is_drift_performed = False
            is_flip_performed = False

            assert np.all((gt_bbox_pre[:, 0] - (gt_bbox_pre[:, 2]/2.0)) >= 0) or \
                    np.all((gt_bbox_pre[:, 0] + (gt_bbox_pre[:, 2]/2.0)) < orig_w), "Error in the bounding boxes before augmentation"

            if mc.DATA_AUGMENTATION:
                assert mc.DRIFT_X >= 0 and mc.DRIFT_Y > 0, \
                    'mc.DRIFT_X and mc.DRIFT_Y must be >= 0'

                if mc.DRIFT_X > 0 or mc.DRIFT_Y > 0:
                    # Ensures that gt bounding box is not cut out of the image
                    max_drift_x = math.floor(
                        min(gt_bbox_pre[:, 0] - (gt_bbox_pre[:, 2] / 2.0) + 1))
                    max_drift_y = math.floor(
                        min(gt_bbox_pre[:, 1] - (gt_bbox_pre[:, 3] / 2.0) + 1))
                    assert max_drift_x >= 0 and max_drift_y >= 0, 'bbox out of image'

                    dy = np.random.randint(-mc.DRIFT_Y,
                                           min(mc.DRIFT_Y + 1, max_drift_y))
                    dx = np.random.randint(-mc.DRIFT_X,
                                           min(mc.DRIFT_X + 1, max_drift_x))

                    # shift bbox
                    gt_bbox_pre[:, 0] = gt_bbox_pre[:, 0] - dx
                    gt_bbox_pre[:, 1] = gt_bbox_pre[:, 1] - dy
                    is_drift_performed = True
                    # distort image
                    orig_h -= dy
                    orig_w -= dx
                    orig_x, dist_x = max(dx, 0), max(-dx, 0)
                    orig_y, dist_y = max(dy, 0), max(-dy, 0)

                    distorted_im = np.zeros(
                        (int(orig_h), int(orig_w), 3)).astype(np.float32)
                    distorted_im[dist_y:, dist_x:, :] = im[orig_y:, orig_x:, :]
                    dist_h, dist_w, _ = [float(v) for v in distorted_im.shape]
                    im = distorted_im

                    if mc.EIGHT_POINT_REGRESSION:
                        if dx < 0:
                            # Recheck right boundary
                            xmax_temp = gt_bbox_pre[:, 0] + (
                                gt_bbox_pre[:, 2] / 2)
                            temp_ids = np.where(
                                xmax_temp >= dist_w - 1 - self.right_margin)[0]
                            if len(temp_ids) > 0:
                                boundary_adhesion_pre[
                                    temp_ids, 2] = True  # Right boundary
                                boundary_adhesion_pre[
                                    temp_ids, 7] = True  # Right top boundary
                                boundary_adhesion_pre[
                                    temp_ids,
                                    6] = True  # Right bottom boundary
                        if dy < 0:
                            # Recheck bottom boundary
                            ymax_temp = gt_bbox_pre[:, 1] + (
                                gt_bbox_pre[:, 3] / 2)
                            temp_ids = np.where(ymax_temp >= dist_h - 1 -
                                                self.bottom_margin)[0]
                            if len(temp_ids) > 0:
                                boundary_adhesion_pre[
                                    temp_ids, 3] = True  # Bottom boundary
                                boundary_adhesion_pre[
                                    temp_ids,
                                    6] = True  # Bottom right boundary
                                boundary_adhesion_pre[
                                    temp_ids, 5] = True  # Bottom left boundary
                        if dx > 0:
                            # Recheck left boundary
                            xmin_temp = gt_bbox_pre[:, 0] - (
                                gt_bbox_pre[:, 2] / 2)
                            temp_ids = np.where(
                                xmin_temp <= self.left_margin)[0]
                            if len(temp_ids) > 0:
                                boundary_adhesion_pre[
                                    temp_ids, 0] = True  # Left boundary
                                boundary_adhesion_pre[
                                    temp_ids, 4] = True  # Left top boundary
                                boundary_adhesion_pre[
                                    temp_ids, 5] = True  # Left bottom boundary
                        if dy > 0:
                            # Recheck top boundary
                            ymin_temp = gt_bbox_pre[:, 1] - (
                                gt_bbox_pre[:, 3] / 2)
                            temp_ids = np.where(
                                ymin_temp <= self.top_margin)[0]
                            if len(temp_ids) > 0:
                                boundary_adhesion_pre[temp_ids,
                                                      1] = True  # Top boundary
                                boundary_adhesion_pre[
                                    temp_ids, 4] = True  # Top left boundary
                                boundary_adhesion_pre[
                                    temp_ids, 7] = True  # Top right boundary
                    else:
                        if dx < 0:
                            # Recheck right boundary
                            xmax_temp = gt_bbox_pre[:, 0] + (
                                gt_bbox_pre[:, 2] / 2)
                            temp_ids = np.where(
                                xmax_temp >= dist_w - 1 - self.right_margin)[0]
                            if len(temp_ids) > 0:
                                boundary_adhesion_pre[
                                    temp_ids, 2] = True  # Right boundary
                        if dy < 0:
                            # Recheck bottom boundary
                            ymax_temp = gt_bbox_pre[:, 1] + (
                                gt_bbox_pre[:, 3] / 2)
                            temp_ids = np.where(ymax_temp >= dist_h - 1 -
                                                self.bottom_margin)[0]
                            if len(temp_ids) > 0:
                                boundary_adhesion_pre[
                                    temp_ids, 3] = True  # Bottom boundary
                        if dx > 0:
                            # Recheck left boundary
                            xmin_temp = gt_bbox_pre[:, 0] - (
                                gt_bbox_pre[:, 2] / 2)
                            temp_ids = np.where(
                                xmin_temp <= self.left_margin)[0]
                            if len(temp_ids) > 0:
                                boundary_adhesion_pre[
                                    temp_ids, 0] = True  # Left boundary
                        if dy > 0:
                            # Recheck top boundary
                            ymin_temp = gt_bbox_pre[:, 1] - (
                                gt_bbox_pre[:, 3] / 2)
                            temp_ids = np.where(
                                ymin_temp <= self.top_margin)[0]
                            if len(temp_ids) > 0:
                                boundary_adhesion_pre[temp_ids,
                                                      1] = True  # Top boundary

                # Flip image with 50% probability
                if np.random.randint(2) > 0.5:
                    im = im[:, ::-1, :]
                    is_flip_performed = True
                    gt_bbox_pre[:, 0] = orig_w - 1 - gt_bbox_pre[:, 0]
                    if mc.EIGHT_POINT_REGRESSION:
                        temp1 = copy.deepcopy(boundary_adhesion_pre[:, 0])
                        temp2 = copy.deepcopy(boundary_adhesion_pre[:, 4])
                        temp3 = copy.deepcopy(boundary_adhesion_pre[:, 5])
                        boundary_adhesion_pre[:, 0] = boundary_adhesion_pre[:,
                                                                            2]
                        boundary_adhesion_pre[:, 4] = boundary_adhesion_pre[:,
                                                                            7]
                        boundary_adhesion_pre[:, 5] = boundary_adhesion_pre[:,
                                                                            6]
                        boundary_adhesion_pre[:, 2] = temp1
                        boundary_adhesion_pre[:, 7] = temp2
                        boundary_adhesion_pre[:, 6] = temp3
                    else:
                        temp = copy.deepcopy(boundary_adhesion_pre[:, 0])
                        boundary_adhesion_pre[:, 0] = boundary_adhesion_pre[:,
                                                                            2]
                        boundary_adhesion_pre[:, 2] = temp

            # scale image
            im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT))
            image_per_batch.append(im)

            # scale annotation
            x_scale = mc.IMAGE_WIDTH / orig_w
            y_scale = mc.IMAGE_HEIGHT / orig_h
            gt_bbox_pre[:, 0::2] = gt_bbox_pre[:, 0::2] * x_scale
            gt_bbox_pre[:, 1::2] = gt_bbox_pre[:, 1::2] * y_scale

            assert np.all((gt_bbox_pre[:, 0] - (gt_bbox_pre[:, 2]/2.0)) >= 0) or \
                    np.all((gt_bbox_pre[:, 0] + (gt_bbox_pre[:, 2]/2.0)) < orig_w), "Error in the bounding boxes after augmentation"
            if mc.EIGHT_POINT_REGRESSION:
                for p in range(len(polygons)):
                    poly = np.array(polygons[p])
                    if is_drift_performed:
                        poly[:, 0] = poly[:, 0] - dx
                        poly[:, 1] = poly[:, 1] - dy
                    if is_flip_performed:
                        poly[:, 0] = orig_w - 1 - poly[:, 0]
                    poly[:, 0] = poly[:, 0] * x_scale
                    poly[:, 1] = poly[:, 1] * y_scale
                    polygons[p] = poly
            is_drift_performed = False
            is_flip_performed = False
            gt_bbox = gt_bbox_pre  # Use shifted bounding box if EIGHT_POINT_REGRESSION = False
            # Transform the bounding box to offset mode.
            # We extract the bounding box from the flipped and drifted masks to ensure
            # consistency.
            if mc.EIGHT_POINT_REGRESSION:
                gt_bbox = []
                actual_bin_masks = []
                for k in range(len(polygons)):
                    polygon = polygons[k]
                    mask_vector = self._get_8_point_mask(
                        polygon, mc.IMAGE_HEIGHT, mc.IMAGE_WIDTH)
                    center_x, center_y, width, height, of1, of2, of3, of4 = mask_vector
                    if width == 0 or height == 0:
                        print("Error in width or height so ignoring", width,
                              height, gt_bbox_pre[k][2], gt_bbox_pre[k][3],
                              center_x, center_y, gt_bbox_pre[k][0],
                              gt_bbox_pre[k][1], idx)
                        del label_per_batch[img_ct][k]
                        continue
                    assert not (of1 <= 0 or of2 <= 0 or of3 <= 0 or of4 <= 0
                                ), "Error Occured " + str(of1) + " " + str(
                                    of2) + " " + str(of3) + " " + str(of4)
                    points = decode_parameterization(mask_vector)
                    points = np.round(points)
                    points = np.array(points, 'int32')
                    assert not ((points[0][1] - points[1][1]) > 1 or (points[2][0] - points[3][0]) > 1 or (points[5][1] - points[4][1]) > 1 or (points[7][0] - points[6][0]) > 1), \
                      "\n\n Error in extraction:"+str(points)+" "+str(idx)+" "+str(mask_vector)
                    gt_bbox.append(mask_vector)

            bbox_per_batch.append(gt_bbox)
            boundary_adhesions_per_batch.append(boundary_adhesion_pre)

            aidx_per_image, delta_per_image = [], []
            aidx_set = set()
            for i in range(len(gt_bbox)):
                overlaps = batch_iou(mc.ANCHOR_BOX, gt_bbox[i])
                aidx = len(mc.ANCHOR_BOX)
                for ov_idx in np.argsort(overlaps)[::-1]:
                    if overlaps[ov_idx] <= 0:
                        if mc.DEBUG_MODE:
                            min_iou = min(overlaps[ov_idx], min_iou)
                            num_objects += 1
                            num_zero_iou_obj += 1
                        break
                    if ov_idx not in aidx_set:
                        aidx_set.add(ov_idx)
                        aidx = ov_idx
                        if mc.DEBUG_MODE:
                            max_iou = max(overlaps[ov_idx], max_iou)
                            min_iou = min(overlaps[ov_idx], min_iou)
                            avg_ious += overlaps[ov_idx]
                            num_objects += 1
                        break

                if aidx == len(mc.ANCHOR_BOX):
                    # even the largeset available overlap is 0, thus, choose one with the
                    # smallest square distance
                    dist = np.sum(np.square(gt_bbox[i] - mc.ANCHOR_BOX),
                                  axis=1)
                    for dist_idx in np.argsort(dist):
                        if dist_idx not in aidx_set:
                            aidx_set.add(dist_idx)
                            aidx = dist_idx
                            break
                if mc.EIGHT_POINT_REGRESSION:
                    box_cx, box_cy, box_w, box_h, of1, of2, of3, of4 = gt_bbox[
                        i]
                    delta = [0] * 8
                else:
                    box_cx, box_cy, box_w, box_h = gt_bbox[i]
                    delta = [0] * 4

                if mc.ENCODING_TYPE == 'asymmetric_linear':
                    # Use linear domain anchors
                    xmin_t, ymin_t, xmax_t, ymax_t = bbox_transform(
                        [box_cx, box_cy, box_w, box_h])
                    xmin_a, ymin_a, xmax_a, ymax_a = bbox_transform(
                        mc.ANCHOR_BOX[aidx])
                    delta[0] = (xmin_t - xmin_a) / mc.ANCHOR_BOX[aidx][2]
                    delta[1] = (ymin_t - ymin_a) / mc.ANCHOR_BOX[aidx][3]
                    delta[2] = (xmax_t - xmax_a) / mc.ANCHOR_BOX[aidx][2]
                    delta[3] = (ymax_t - ymax_a) / mc.ANCHOR_BOX[aidx][3]
                elif mc.ENCODING_TYPE == 'asymmetric_log':
                    # Use log domain anchors
                    EPSILON = 0.5
                    xmin_t, ymin_t, xmax_t, ymax_t = bbox_transform(
                        [box_cx, box_cy, box_w, box_h])
                    delta[0] = np.log(
                        max((mc.ANCHOR_BOX[aidx][0] - xmin_t) /
                            mc.ANCHOR_BOX[aidx][2], 0) + EPSILON)
                    delta[1] = np.log(
                        max((mc.ANCHOR_BOX[aidx][1] - ymin_t) /
                            mc.ANCHOR_BOX[aidx][3], 0) + EPSILON)
                    delta[2] = np.log(
                        max((xmax_t - mc.ANCHOR_BOX[aidx][0]) /
                            mc.ANCHOR_BOX[aidx][2], 0) + EPSILON)
                    delta[3] = np.log(
                        max((ymax_t - mc.ANCHOR_BOX[aidx][1]) /
                            mc.ANCHOR_BOX[aidx][3], 0) + EPSILON)
                else:
                    delta[0] = (box_cx - mc.ANCHOR_BOX[aidx][0]
                                ) / mc.ANCHOR_BOX[aidx][2]
                    delta[1] = (box_cy - mc.ANCHOR_BOX[aidx][1]
                                ) / mc.ANCHOR_BOX[aidx][3]
                    delta[2] = np.log(
                        box_w / mc.ANCHOR_BOX[aidx][2]
                    )  # if box_w or box_h = 0, the box is not included
                    delta[3] = np.log(box_h / mc.ANCHOR_BOX[aidx][3])

                if mc.EIGHT_POINT_REGRESSION:
                    EPSILON = 1e-8
                    anchor_diagonal = (mc.ANCHOR_BOX[aidx][2]**2 +
                                       mc.ANCHOR_BOX[aidx][3]**2)**(0.5)
                    delta[4] = np.log((of1 + EPSILON) / anchor_diagonal)
                    delta[5] = np.log((of2 + EPSILON) / anchor_diagonal)
                    delta[6] = np.log((of3 + EPSILON) / anchor_diagonal)
                    delta[7] = np.log((of4 + EPSILON) / anchor_diagonal)

                aidx_per_image.append(aidx)
                delta_per_image.append(delta)

            delta_per_batch.append(delta_per_image)
            aidx_per_batch.append(aidx_per_image)

        if mc.DEBUG_MODE:
            print('max iou: {}'.format(max_iou))
            print('min iou: {}'.format(min_iou))
            print('avg iou: {}'.format(avg_ious / num_objects))
            print('number of objects: {}'.format(num_objects))
            print('number of objects with 0 iou: {}'.format(num_zero_iou_obj))

        return image_per_batch, label_per_batch, delta_per_batch, \
            aidx_per_batch, bbox_per_batch, boundary_adhesions_per_batch
Beispiel #4
0
    def read_batch(self, shuffle=True):
        """Read a batch of image and bounding box annotations.
    Args:
      shuffle: whether or not to shuffle the dataset
    Returns:
      image_per_batch: images. Shape: batch_size x width x height x [b, g, r]
      label_per_batch: labels. Shape: batch_size x object_num
      delta_per_batch: bounding box deltas. Shape: batch_size x object_num x
          [dx ,dy, dw, dh]
      aidx_per_batch: index of anchors that are responsible for prediction.
          Shape: batch_size x object_num
      bbox_per_batch: scaled bounding boxes. Shape: batch_size x object_num x
          [cx, cy, w, h]
    """
        mc = self.mc

        if shuffle:
            if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx):
                self._shuffle_image_idx()
            batch_idx = self._perm_idx[self._cur_idx:self._cur_idx +
                                       mc.BATCH_SIZE]
            self._cur_idx += mc.BATCH_SIZE
        else:
            if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx):
                batch_idx = self._image_idx[self._cur_idx:] \
                    + self._image_idx[:self._cur_idx + mc.BATCH_SIZE-len(self._image_idx)]
                self._cur_idx += mc.BATCH_SIZE - len(self._image_idx)
            else:
                batch_idx = self._image_idx[self._cur_idx:self._cur_idx +
                                            mc.BATCH_SIZE]
                self._cur_idx += mc.BATCH_SIZE

        image_per_batch = []
        label_per_batch = []
        bbox_per_batch = []
        delta_per_batch = []
        aidx_per_batch = []
        if mc.DEBUG_MODE:
            avg_ious = 0.
            num_objects = 0.
            max_iou = 0.0
            min_iou = 1.0
            num_zero_iou_obj = 0

        for idx in batch_idx:
            # load the image
            #print("Path: ", self._image_path_at(idx))
            img = cv2.imread(self._image_path_at(idx))
            assert not img is None, "path: %s " % self._image_path_at(idx)
            im = img.astype(np.float32, copy=False)

            im -= mc.BGR_MEANS
            orig_h, orig_w, _ = [float(v) for v in im.shape]

            # load annotations

            #print("type batch idx: ", type(idx))
            label_per_batch.append([b[4] for b in self._rois[idx][:]])
            gt_bbox = np.array([[b[0], b[1], b[2], b[3]]
                                for b in self._rois[idx][:]])
            #print("GT  box: ", gt_bbox)
            if mc.DATA_AUGMENTATION and len(gt_bbox) > 0:
                assert mc.DRIFT_X >= 0 and mc.DRIFT_Y > 0, \
                    'mc.DRIFT_X and mc.DRIFT_Y must be >= 0'

                if mc.DRIFT_X > 0 or mc.DRIFT_Y > 0:
                    # Ensures that gt boundibg box is not cutted out of the image
                    max_drift_x = min(gt_bbox[:, 0] - gt_bbox[:, 2] / 2.0 + 1)
                    max_drift_y = min(gt_bbox[:, 1] - gt_bbox[:, 3] / 2.0 + 1)
                    assert max_drift_x >= 0 and max_drift_y >= 0, 'bbox out of image %s' % self._image_path_at(
                        idx)

                    dy = np.random.randint(-mc.DRIFT_Y,
                                           min(mc.DRIFT_Y + 1, max_drift_y))
                    dx = np.random.randint(-mc.DRIFT_X,
                                           min(mc.DRIFT_X + 1, max_drift_x))

                    # shift bbox
                    gt_bbox[:, 0] = gt_bbox[:, 0] - dx
                    gt_bbox[:, 1] = gt_bbox[:, 1] - dy

                    # distort image
                    orig_h -= dy
                    orig_w -= dx
                    orig_x, dist_x = max(dx, 0), max(-dx, 0)
                    orig_y, dist_y = max(dy, 0), max(-dy, 0)

                    distorted_im = np.zeros(
                        (int(orig_h), int(orig_w), 3)).astype(np.float32)
                    distorted_im[dist_y:, dist_x:, :] = im[orig_y:, orig_x:, :]
                    im = distorted_im

                # Flip image with 50% probability
                if np.random.randint(2) > 0.5:
                    im = im[:, ::-1, :]
                    gt_bbox[:, 0] = orig_w - 1 - gt_bbox[:, 0]

            # scale image
            im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT))
            image_per_batch.append(im)

            # scale annotation
            if len(gt_bbox) > 0:
                x_scale = mc.IMAGE_WIDTH / orig_w
                y_scale = mc.IMAGE_HEIGHT / orig_h
                gt_bbox[:, 0::2] = gt_bbox[:, 0::2] * x_scale
                gt_bbox[:, 1::2] = gt_bbox[:, 1::2] * y_scale

            bbox_per_batch.append(gt_bbox)

            aidx_per_image, delta_per_image = [], []
            aidx_set = set()
            for i in range(len(gt_bbox)):
                overlaps = batch_iou(mc.ANCHOR_BOX, gt_bbox[i])

                aidx = len(mc.ANCHOR_BOX)
                for ov_idx in np.argsort(overlaps)[::-1]:
                    if overlaps[ov_idx] <= 0:
                        if mc.DEBUG_MODE:
                            min_iou = min(overlaps[ov_idx], min_iou)
                            num_objects += 1
                            num_zero_iou_obj += 1
                        break
                    if ov_idx not in aidx_set:
                        aidx_set.add(ov_idx)
                        aidx = ov_idx
                        if mc.DEBUG_MODE:
                            max_iou = max(overlaps[ov_idx], max_iou)
                            min_iou = min(overlaps[ov_idx], min_iou)
                            avg_ious += overlaps[ov_idx]
                            num_objects += 1
                        break

                if aidx == len(mc.ANCHOR_BOX):
                    # even the largeset available overlap is 0, thus, choose one with the
                    # smallest square distance
                    dist = np.sum(np.square(gt_bbox[i] - mc.ANCHOR_BOX),
                                  axis=1)
                    for dist_idx in np.argsort(dist):
                        if dist_idx not in aidx_set:
                            aidx_set.add(dist_idx)
                            aidx = dist_idx
                            break

                box_cx, box_cy, box_w, box_h = gt_bbox[i]
                delta = [0] * 4
                delta[0] = (box_cx -
                            mc.ANCHOR_BOX[aidx][0]) / mc.ANCHOR_BOX[aidx][2]
                delta[1] = (box_cy -
                            mc.ANCHOR_BOX[aidx][1]) / mc.ANCHOR_BOX[aidx][3]
                delta[2] = np.log(box_w / mc.ANCHOR_BOX[aidx][2])
                delta[3] = np.log(box_h / mc.ANCHOR_BOX[aidx][3])

                aidx_per_image.append(aidx)
                delta_per_image.append(delta)

            delta_per_batch.append(delta_per_image)
            aidx_per_batch.append(aidx_per_image)

        if mc.DEBUG_MODE:
            print('max iou: {}'.format(max_iou))
            print('min iou: {}'.format(min_iou))
            print('avg iou: {}'.format(avg_ious / num_objects))
            print('number of objects: {}'.format(num_objects))
            print('number of objects with 0 iou: {}'.format(num_zero_iou_obj))

        return image_per_batch, label_per_batch, delta_per_batch, aidx_per_batch, bbox_per_batch
Beispiel #5
0
    def read_batch(self, shuffle=True):
        """Read a batch of image and bounding box annotations.
    Args:
      shuffle: whether or not to shuffle the dataset
    Returns:
      image_per_batch: images. Shape: batch_size x width x height x [b, g, r]
      label_per_batch: labels. Shape: batch_size x object_num
      delta_per_batch: bounding box deltas. Shape: batch_size x object_num x 
          [dx ,dy, dw, dh]
      aidx_per_batch: index of anchors that are responsible for prediction.
          Shape: batch_size x object_num
      bbox_per_batch: scaled bounding boxes. Shape: batch_size x object_num x 
          [cx, cy, w, h]
    """
        mc = self.mc

        if shuffle:
            if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx):
                self._shuffle_image_idx()
            batch_idx = self._perm_idx[self._cur_idx:self._cur_idx +
                                       mc.BATCH_SIZE]
            self._cur_idx += mc.BATCH_SIZE
        else:
            if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx):
                batch_idx = self._image_idx[self._cur_idx:] \
                    + self._image_idx[:self._cur_idx + mc.BATCH_SIZE-len(self._image_idx)]
                self._cur_idx += mc.BATCH_SIZE - len(self._image_idx)
            else:
                batch_idx = self._image_idx[self._cur_idx:self._cur_idx +
                                            mc.BATCH_SIZE]
                self._cur_idx += mc.BATCH_SIZE

        image_per_batch = []
        image_per_batch_viz = []
        label_per_batch = []
        bbox_per_batch = []
        delta_per_batch = []
        aidx_per_batch = []
        if mc.DEBUG_MODE:
            avg_ious = 0.
            num_objects = 0.
            max_iou = 0.0
            min_iou = 1.0
            num_zero_iou_obj = 0

        for idx in batch_idx:
            # load the image
            im = cv2.imread(self._image_path_at(idx))
            if im is None:
                print('failed file read:' + self._image_path_at(idx))

            im = im.astype(np.float32, copy=False)

            # random brightness control
            hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
            h, s, v = cv2.split(hsv)
            add_v = np.random.randint(55, 200) - 128
            v = np.where(v <= 255 - add_v, v + add_v, 255)
            final_hsv = cv2.merge((h, s, v))
            im = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)

            im -= mc.BGR_MEANS  # <-------------------------------
            im /= 128.0  # to make input in the range of [0, 2)
            orig_h, orig_w, _ = [float(v) for v in im.shape]

            # load annotations
            label_per_batch.append([b[4] for b in self._rois[idx][:]])
            gt_bbox = np.array([[(b[0] + b[2]) / 2, (b[1] + b[3]) / 2,
                                 b[2] - b[0], b[3] - b[1]]
                                for b in self._rois[idx][:]])
            assert np.any(gt_bbox[:, 0] > 0), 'less than 0 gt_bbox[0]'
            assert np.any(gt_bbox[:, 1] > 0), 'less than 0 gt_bbox[1]'
            assert np.any(gt_bbox[:, 2] > 0), 'less than 0 gt_bbox[2]'
            assert np.any(gt_bbox[:, 3] > 0), 'less than 0 gt_bbox[3]'

            if mc.DATA_AUGMENTATION:
                # Flip image with 50% probability
                if np.random.randint(2) > 0.5:
                    im = im[:, ::-1, :]
                    gt_bbox[:, 0] = orig_w - 1 - gt_bbox[:, 0]

            # scale image
            #im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT), interpolation=cv2.INTER_AREA)
            image_per_batch.append(im)
            image_per_batch_viz.append(im * 128.0)

            # scale annotation
            x_scale = mc.IMAGE_WIDTH / orig_w
            y_scale = mc.IMAGE_HEIGHT / orig_h
            gt_bbox[:, 0::2] = gt_bbox[:, 0::2] * x_scale
            gt_bbox[:, 1::2] = gt_bbox[:, 1::2] * y_scale
            bbox_per_batch.append(gt_bbox)

            aidx_per_image, delta_per_image = [], []
            aidx_set = set()
            for i in range(len(gt_bbox)):
                overlaps = batch_iou(mc.ANCHOR_BOX, gt_bbox[i])

                aidx = len(mc.ANCHOR_BOX)
                for ov_idx in np.argsort(overlaps)[::-1]:
                    if overlaps[ov_idx] <= 0:
                        if mc.DEBUG_MODE:
                            min_iou = min(overlaps[ov_idx], min_iou)
                            num_objects += 1
                            num_zero_iou_obj += 1
                        break
                    if ov_idx not in aidx_set:
                        aidx_set.add(ov_idx)
                        aidx = ov_idx
                        if mc.DEBUG_MODE:
                            max_iou = max(overlaps[ov_idx], max_iou)
                            min_iou = min(overlaps[ov_idx], min_iou)
                            avg_ious += overlaps[ov_idx]
                            num_objects += 1
                        break

                if aidx == len(mc.ANCHOR_BOX):
                    # even the largeset available overlap is 0, thus, choose one with the
                    # smallest square distance
                    dist = np.sum(np.square(gt_bbox[i] - mc.ANCHOR_BOX),
                                  axis=1)
                    for dist_idx in np.argsort(dist):
                        if dist_idx not in aidx_set:
                            aidx_set.add(dist_idx)
                            aidx = dist_idx
                            break

                box_cx, box_cy, box_w, box_h = gt_bbox[i]
                delta = [0] * 4
                delta[0] = (box_cx -
                            mc.ANCHOR_BOX[aidx][0]) / mc.ANCHOR_BOX[aidx][2]
                delta[1] = (box_cy -
                            mc.ANCHOR_BOX[aidx][1]) / mc.ANCHOR_BOX[aidx][3]
                if False:
                    delta[2] = np.log(box_w / mc.ANCHOR_BOX[aidx][2])
                    delta[3] = np.log(box_h / mc.ANCHOR_BOX[aidx][3])
                else:  # to remove exp in FPGA
                    delta[2] = box_w / mc.ANCHOR_BOX[aidx][2]
                    delta[3] = box_h / mc.ANCHOR_BOX[aidx][3]

                aidx_per_image.append(aidx)
                delta_per_image.append(delta)

            delta_per_batch.append(delta_per_image)
            aidx_per_batch.append(aidx_per_image)

        if mc.DEBUG_MODE:
            print('max iou: {}'.format(max_iou))
            print('min iou: {}'.format(min_iou))
            print('avg iou: {}'.format(avg_ious / num_objects))
            print('number of objects: {}'.format(num_objects))
            print('number of objects with 0 iou: {}'.format(num_zero_iou_obj))

        return image_per_batch, label_per_batch, delta_per_batch, \
            aidx_per_batch, bbox_per_batch, image_per_batch_viz
Beispiel #6
0
  def read_batch(self, shuffle=True):
    """Read a batch of image and bounding box annotations.
    Args:
      shuffle: whether or not to shuffle the dataset
    Returns:
      image_per_batch: images. Shape: batch_size x width x height x [b, g, r]
      label_per_batch: labels. Shape: batch_size x object_num
      delta_per_batch: bounding box deltas. Shape: batch_size x object_num x 
          [dx ,dy, dw, dh]
      aidx_per_batch: index of anchors that are responsible for prediction.
          Shape: batch_size x object_num
      bbox_per_batch: scaled bounding boxes. Shape: batch_size x object_num x 
          [cx, cy, w, h]
    """
    mc = self.mc

    if shuffle:
      if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx):
        self._shuffle_image_idx()
      batch_idx = self._perm_idx[self._cur_idx:self._cur_idx+mc.BATCH_SIZE]
      self._cur_idx += mc.BATCH_SIZE
    else:
      if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx):
        batch_idx = self._image_idx[self._cur_idx:] \
            + self._image_idx[:self._cur_idx + mc.BATCH_SIZE-len(self._image_idx)]
        self._cur_idx += mc.BATCH_SIZE - len(self._image_idx)
      else:
        batch_idx = self._image_idx[self._cur_idx:self._cur_idx+mc.BATCH_SIZE]
        self._cur_idx += mc.BATCH_SIZE

    image_per_batch = []
    label_per_batch = []
    bbox_per_batch  = []
    delta_per_batch = []
    aidx_per_batch  = []
    if mc.DEBUG_MODE:
      avg_ious = 0.
      num_objects = 0.
      max_iou = 0.0
      min_iou = 1.0
      num_zero_iou_obj = 0

    for idx in batch_idx:
      # load the image
      im = cv2.imread(self._image_path_at(idx))
      orig_h, orig_w, _ = [float(v) for v in im.shape]

      # load annotations
      label_this_batch = np.array([b[4] for b in self._rois[idx][:]])
      gt_bbox = np.array([[b[0], b[1], b[2], b[3]] for b in self._rois[idx][:]])

      if mc.DATA_AUGMENTATION:
        assert mc.DATA_AUG_TYPE in ['SQT', 'YOLO'], \
            'Invalid augmentation type: {}'.format(mc.DATA_AUG_TYPE)
        if mc.DATA_AUG_TYPE == 'SQT':
          im, gt_bbox = drift_dist(im, gt_bbox, mc, orig_h, orig_w)
        elif mc.DATA_AUG_TYPE == 'YOLO':
          if np.random.randint(2) > 0.5:
            im, gt_bbox, label_this_batch = scale_trans(im, gt_bbox, label_this_batch)
            im = recolor(im)
        im, gt_bbox = rand_flip(im, gt_bbox, orig_w)

      # Remove BGR bias
      if mc.SUB_BGR_MEANS:
        im = im.astype(np.float32, copy=False)
        im -= mc.BGR_MEANS
        #im = im.astype(np.uint8, copy=False)

      label_per_batch.append(label_this_batch.tolist())

      # scale image
      im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT))
      image_per_batch.append(im)

      # scale annotation
      x_scale = mc.IMAGE_WIDTH/orig_w
      y_scale = mc.IMAGE_HEIGHT/orig_h
      gt_bbox[:, 0::2] = gt_bbox[:, 0::2]*x_scale
      gt_bbox[:, 1::2] = gt_bbox[:, 1::2]*y_scale
      bbox_per_batch.append(gt_bbox)

      aidx_per_image, delta_per_image = [], []
      aidx_set = set()
      for i in range(len(gt_bbox)):
        overlaps = batch_iou(mc.ANCHOR_BOX, gt_bbox[i])

        aidx = len(mc.ANCHOR_BOX)
        for ov_idx in np.argsort(overlaps)[::-1]:
          if overlaps[ov_idx] <= 0:
            if mc.DEBUG_MODE:
              min_iou = min(overlaps[ov_idx], min_iou)
              num_objects += 1
              num_zero_iou_obj += 1
            break
          if ov_idx not in aidx_set:
            aidx_set.add(ov_idx)
            aidx = ov_idx
            if mc.DEBUG_MODE:
              max_iou = max(overlaps[ov_idx], max_iou)
              min_iou = min(overlaps[ov_idx], min_iou)
              avg_ious += overlaps[ov_idx]
              num_objects += 1
            break

        if aidx == len(mc.ANCHOR_BOX): 
          # even the largeset available overlap is 0, thus, choose one with the
          # smallest square distance
          dist = np.sum(np.square(gt_bbox[i] - mc.ANCHOR_BOX), axis=1)
          for dist_idx in np.argsort(dist):
            if dist_idx not in aidx_set:
              aidx_set.add(dist_idx)
              aidx = dist_idx
              break
        
        box_cx, box_cy, box_w, box_h = gt_bbox[i]
        delta = [0]*4
        delta[0] = (box_cx - mc.ANCHOR_BOX[aidx][0])/box_w
        delta[1] = (box_cy - mc.ANCHOR_BOX[aidx][1])/box_h
        delta[2] = np.log(box_w/mc.ANCHOR_BOX[aidx][2])
        delta[3] = np.log(box_h/mc.ANCHOR_BOX[aidx][3])

        aidx_per_image.append(aidx)
        delta_per_image.append(delta)

      delta_per_batch.append(delta_per_image)
      aidx_per_batch.append(aidx_per_image)

    if mc.DEBUG_MODE:
      print ('max iou: {}'.format(max_iou))
      print ('min iou: {}'.format(min_iou))
      print ('avg iou: {}'.format(avg_ious/num_objects))
      print ('number of objects: {}'.format(num_objects))
      print ('number of objects with 0 iou: {}'.format(num_zero_iou_obj))

    return image_per_batch, label_per_batch, delta_per_batch, \
        aidx_per_batch, bbox_per_batch