def iou_aid_calc(self, ANCHOR_BOX, gt_bbox): aidx_per_image, delta_per_image, label_per_image_with_aidx = [], [], [] aidx_set = set() gt_area = self.calc_area(gt_bbox) for i in np.argsort(gt_area): overlaps = batch_iou(ANCHOR_BOX, gt_bbox[i]) find = False aidx = len(ANCHOR_BOX) for ov_idx in np.argsort(overlaps)[::-1]: if overlaps[ov_idx] <= 0: # if mc.DEBUG_MODE: # min_iou = min(overlaps[ov_idx], min_iou) # num_objects += 1 # num_zero_iou_obj += 1 break if ov_idx not in aidx_set and overlaps[ov_idx] > 0.25: aidx_set.add(ov_idx) aidx = ov_idx aidx_per_image.append([aidx, i]) find = True box_cx, box_cy, box_w, box_h = gt_bbox[i] delta = [0] * 4 delta[0] = (box_cx - ANCHOR_BOX[aidx][0]) / ANCHOR_BOX[aidx][2] delta[1] = (box_cy - ANCHOR_BOX[aidx][1]) / ANCHOR_BOX[aidx][3] delta[2] = np.log(box_w / ANCHOR_BOX[aidx][2]) delta[3] = np.log(box_h / ANCHOR_BOX[aidx][3]) delta_per_image.append(delta) # if mc.DEBUG_MODE: # max_iou = max(overlaps[ov_idx], max_iou) # min_iou = min(overlaps[ov_idx], min_iou) # avg_ious += overlaps[ov_idx] # num_objects += 1 # break if not find: # even the largeset available overlap is 0, thus, choose one with the # smallest square distance dist = np.sum(np.square(gt_bbox[i] - ANCHOR_BOX), axis=1) for dist_idx in np.argsort(dist): if dist_idx not in aidx_set: aidx_set.add(dist_idx) aidx = dist_idx break aidx_per_image.append([aidx, i]) box_cx, box_cy, box_w, box_h = gt_bbox[i] delta = [0] * 4 delta[0] = (box_cx - ANCHOR_BOX[aidx][0]) / ANCHOR_BOX[aidx][2] delta[1] = (box_cy - ANCHOR_BOX[aidx][1]) / ANCHOR_BOX[aidx][3] delta[2] = np.log(box_w / ANCHOR_BOX[aidx][2]) delta[3] = np.log(box_h / ANCHOR_BOX[aidx][3]) delta_per_image.append(delta) return aidx_per_image, delta_per_image
def analyze_detections(self, detection_file_dir, det_error_file): def _save_detection(f, idx, error_type, det, score): f.write( '{:s} {:s} {:.1f} {:.1f} {:.1f} {:.1f} {:s} {:.3f}\n'.format( idx, error_type, det[0] - det[2] / 2., det[1] - det[3] / 2., det[0] + det[2] / 2., det[1] + det[3] / 2., self._classes[int(det[4])], score)) # load detections self._det_rois = {} for idx in self._image_idx: det_file_name = os.path.join(detection_file_dir, idx + '.txt') with open(det_file_name) as f: lines = f.readlines() f.close() bboxes = [] for line in lines: obj = line.strip().split(' ') cls = self._class_to_idx[obj[0].lower().strip()] xmin = float(obj[4]) ymin = float(obj[5]) xmax = float(obj[6]) ymax = float(obj[7]) score = float(obj[-1]) x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) bboxes.append([x, y, w, h, cls, score]) bboxes.sort(key=lambda x: x[-1], reverse=True) self._det_rois[idx] = bboxes # do error analysis num_objs = 0. num_dets = 0. num_correct = 0. num_loc_error = 0. num_cls_error = 0. num_bg_error = 0. num_repeated_error = 0. num_detected_obj = 0. with open(det_error_file, 'w') as f: for idx in self._image_idx: gt_bboxes = np.array(self._rois[idx]) num_objs += len(gt_bboxes) detected = [False] * len(gt_bboxes) det_bboxes = self._det_rois[idx] if len(gt_bboxes) < 1: continue for i, det in enumerate(det_bboxes): if i < len(gt_bboxes): num_dets += 1 ious = batch_iou(gt_bboxes[:, :4], det[:4]) max_iou = np.max(ious) gt_idx = np.argmax(ious) if max_iou > 0.1: if gt_bboxes[gt_idx, 4] == det[4]: if max_iou >= 0.5: if i < len(gt_bboxes): if not detected[gt_idx]: num_correct += 1 detected[gt_idx] = True else: num_repeated_error += 1 else: if i < len(gt_bboxes): num_loc_error += 1 _save_detection(f, idx, 'loc', det, det[5]) else: if i < len(gt_bboxes): num_cls_error += 1 _save_detection(f, idx, 'cls', det, det[5]) else: if i < len(gt_bboxes): num_bg_error += 1 _save_detection(f, idx, 'bg', det, det[5]) for i, gt in enumerate(gt_bboxes): if not detected[i]: _save_detection(f, idx, 'missed', gt, -1.0) num_detected_obj += sum(detected) f.close() print('Detection Analysis:') print(' Number of detections: {}'.format(num_dets)) print(' Number of objects: {}'.format(num_objs)) print(' Percentage of correct detections: {}'.format(num_correct / num_dets)) print(' Percentage of localization error: {}'.format(num_loc_error / num_dets)) print(' Percentage of classification error: {}'.format( num_cls_error / num_dets)) print(' Percentage of background error: {}'.format(num_bg_error / num_dets)) print(' Percentage of repeated detections: {}'.format( num_repeated_error / num_dets)) print(' Recall: {}'.format(num_detected_obj / num_objs)) out = {} out['num of detections'] = num_dets out['num of objects'] = num_objs out['% correct detections'] = num_correct / num_dets out['% localization error'] = num_loc_error / num_dets out['% classification error'] = num_cls_error / num_dets out['% background error'] = num_bg_error / num_dets out['% repeated error'] = num_repeated_error / num_dets out['% recall'] = num_detected_obj / num_objs return out
def read_batch(self, shuffle=True, wrap_around=True): """Read a batch of image and instance annotations. Args: shuffle: whether or not to shuffle the dataset wrap_around: cyclic data extraction Returns: image_per_batch: images. Shape: batch_size x width x height x [b, g, r] label_per_batch: labels. Shape: batch_size x object_num delta_per_batch: bounding box or mask deltas. Shape: batch_size x object_num x [dx ,dy, dw, dh] or [dx, dy, dw, dh, dof1, dof2, dof3, dof4] aidx_per_batch: index of anchors that are responsible for prediction. Shape: batch_size x object_num bbox_per_batch: scaled bounding boxes or mask parameters. Shape: batch_size x object_num x [cx, cy, w, h] or [cx, cy, w, h, of1, of2, of3, of4] """ mc = self.mc if shuffle: if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx): self._shuffle_image_idx() batch_idx = self._perm_idx[self._cur_idx:self._cur_idx + mc.BATCH_SIZE] self._cur_idx += mc.BATCH_SIZE else: # Check for warp around only in non shuffle mode if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx): batch_idx = self._image_idx[self._cur_idx:] \ + self._image_idx[:self._cur_idx + mc.BATCH_SIZE-len(self._image_idx)] if wrap_around: self._cur_idx += mc.BATCH_SIZE - len(self._image_idx) else: # Restart the counter if no-wrap-around is enabled # This ensures all the validation examples are evaluated self._cur_idx = 0 else: batch_idx = self._image_idx[self._cur_idx:self._cur_idx + mc.BATCH_SIZE] self._cur_idx += mc.BATCH_SIZE image_per_batch = [] label_per_batch = [] bbox_per_batch = [] delta_per_batch = [] aidx_per_batch = [] boundary_adhesions_per_batch = [] if mc.DEBUG_MODE: avg_ious = 0. num_objects = 0. max_iou = 0.0 min_iou = 1.0 num_zero_iou_obj = 0 for img_ct, idx in enumerate(batch_idx): # load the image try: # Seems to be the only way to detect invalid image files Image.open(self._image_path_at(idx)).tobytes() except IOError: print('Detect error img %s' % self._image_path_at(idx)) continue im = cv2.imread(self._image_path_at(idx)).astype(np.float32, copy=False) if im is None: print("\n\nCorrupt image found: ", self._image_path_at(idx)) continue im = im.astype(np.float32, copy=False) im -= mc.BGR_MEANS orig_h, orig_w, _ = [float(v) for v in im.shape] # load annotations label_per_batch.append([b[4] for b in self._rois[idx][:]]) gt_bbox_pre = np.array([[b[0], b[1], b[2], b[3]] for b in self._rois[idx][:]]) if mc.EIGHT_POINT_REGRESSION: polygons = [b[2] for b in self._poly[idx][:]] boundary_adhesion_pre = np.array( [[b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]] for b in self._boundary_adhesions[idx][:]]) else: boundary_adhesion_pre = np.array( [[b[0], b[1], b[2], b[3]] for b in self._boundary_adhesions[idx][:]]) is_drift_performed = False is_flip_performed = False assert np.all((gt_bbox_pre[:, 0] - (gt_bbox_pre[:, 2]/2.0)) >= 0) or \ np.all((gt_bbox_pre[:, 0] + (gt_bbox_pre[:, 2]/2.0)) < orig_w), "Error in the bounding boxes before augmentation" if mc.DATA_AUGMENTATION: assert mc.DRIFT_X >= 0 and mc.DRIFT_Y > 0, \ 'mc.DRIFT_X and mc.DRIFT_Y must be >= 0' if mc.DRIFT_X > 0 or mc.DRIFT_Y > 0: # Ensures that gt bounding box is not cut out of the image max_drift_x = math.floor( min(gt_bbox_pre[:, 0] - (gt_bbox_pre[:, 2] / 2.0) + 1)) max_drift_y = math.floor( min(gt_bbox_pre[:, 1] - (gt_bbox_pre[:, 3] / 2.0) + 1)) assert max_drift_x >= 0 and max_drift_y >= 0, 'bbox out of image' dy = np.random.randint(-mc.DRIFT_Y, min(mc.DRIFT_Y + 1, max_drift_y)) dx = np.random.randint(-mc.DRIFT_X, min(mc.DRIFT_X + 1, max_drift_x)) # shift bbox gt_bbox_pre[:, 0] = gt_bbox_pre[:, 0] - dx gt_bbox_pre[:, 1] = gt_bbox_pre[:, 1] - dy is_drift_performed = True # distort image orig_h -= dy orig_w -= dx orig_x, dist_x = max(dx, 0), max(-dx, 0) orig_y, dist_y = max(dy, 0), max(-dy, 0) distorted_im = np.zeros( (int(orig_h), int(orig_w), 3)).astype(np.float32) distorted_im[dist_y:, dist_x:, :] = im[orig_y:, orig_x:, :] dist_h, dist_w, _ = [float(v) for v in distorted_im.shape] im = distorted_im if mc.EIGHT_POINT_REGRESSION: if dx < 0: # Recheck right boundary xmax_temp = gt_bbox_pre[:, 0] + ( gt_bbox_pre[:, 2] / 2) temp_ids = np.where( xmax_temp >= dist_w - 1 - self.right_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[ temp_ids, 2] = True # Right boundary boundary_adhesion_pre[ temp_ids, 7] = True # Right top boundary boundary_adhesion_pre[ temp_ids, 6] = True # Right bottom boundary if dy < 0: # Recheck bottom boundary ymax_temp = gt_bbox_pre[:, 1] + ( gt_bbox_pre[:, 3] / 2) temp_ids = np.where(ymax_temp >= dist_h - 1 - self.bottom_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[ temp_ids, 3] = True # Bottom boundary boundary_adhesion_pre[ temp_ids, 6] = True # Bottom right boundary boundary_adhesion_pre[ temp_ids, 5] = True # Bottom left boundary if dx > 0: # Recheck left boundary xmin_temp = gt_bbox_pre[:, 0] - ( gt_bbox_pre[:, 2] / 2) temp_ids = np.where( xmin_temp <= self.left_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[ temp_ids, 0] = True # Left boundary boundary_adhesion_pre[ temp_ids, 4] = True # Left top boundary boundary_adhesion_pre[ temp_ids, 5] = True # Left bottom boundary if dy > 0: # Recheck top boundary ymin_temp = gt_bbox_pre[:, 1] - ( gt_bbox_pre[:, 3] / 2) temp_ids = np.where( ymin_temp <= self.top_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[temp_ids, 1] = True # Top boundary boundary_adhesion_pre[ temp_ids, 4] = True # Top left boundary boundary_adhesion_pre[ temp_ids, 7] = True # Top right boundary else: if dx < 0: # Recheck right boundary xmax_temp = gt_bbox_pre[:, 0] + ( gt_bbox_pre[:, 2] / 2) temp_ids = np.where( xmax_temp >= dist_w - 1 - self.right_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[ temp_ids, 2] = True # Right boundary if dy < 0: # Recheck bottom boundary ymax_temp = gt_bbox_pre[:, 1] + ( gt_bbox_pre[:, 3] / 2) temp_ids = np.where(ymax_temp >= dist_h - 1 - self.bottom_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[ temp_ids, 3] = True # Bottom boundary if dx > 0: # Recheck left boundary xmin_temp = gt_bbox_pre[:, 0] - ( gt_bbox_pre[:, 2] / 2) temp_ids = np.where( xmin_temp <= self.left_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[ temp_ids, 0] = True # Left boundary if dy > 0: # Recheck top boundary ymin_temp = gt_bbox_pre[:, 1] - ( gt_bbox_pre[:, 3] / 2) temp_ids = np.where( ymin_temp <= self.top_margin)[0] if len(temp_ids) > 0: boundary_adhesion_pre[temp_ids, 1] = True # Top boundary # Flip image with 50% probability if np.random.randint(2) > 0.5: im = im[:, ::-1, :] is_flip_performed = True gt_bbox_pre[:, 0] = orig_w - 1 - gt_bbox_pre[:, 0] if mc.EIGHT_POINT_REGRESSION: temp1 = copy.deepcopy(boundary_adhesion_pre[:, 0]) temp2 = copy.deepcopy(boundary_adhesion_pre[:, 4]) temp3 = copy.deepcopy(boundary_adhesion_pre[:, 5]) boundary_adhesion_pre[:, 0] = boundary_adhesion_pre[:, 2] boundary_adhesion_pre[:, 4] = boundary_adhesion_pre[:, 7] boundary_adhesion_pre[:, 5] = boundary_adhesion_pre[:, 6] boundary_adhesion_pre[:, 2] = temp1 boundary_adhesion_pre[:, 7] = temp2 boundary_adhesion_pre[:, 6] = temp3 else: temp = copy.deepcopy(boundary_adhesion_pre[:, 0]) boundary_adhesion_pre[:, 0] = boundary_adhesion_pre[:, 2] boundary_adhesion_pre[:, 2] = temp # scale image im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT)) image_per_batch.append(im) # scale annotation x_scale = mc.IMAGE_WIDTH / orig_w y_scale = mc.IMAGE_HEIGHT / orig_h gt_bbox_pre[:, 0::2] = gt_bbox_pre[:, 0::2] * x_scale gt_bbox_pre[:, 1::2] = gt_bbox_pre[:, 1::2] * y_scale assert np.all((gt_bbox_pre[:, 0] - (gt_bbox_pre[:, 2]/2.0)) >= 0) or \ np.all((gt_bbox_pre[:, 0] + (gt_bbox_pre[:, 2]/2.0)) < orig_w), "Error in the bounding boxes after augmentation" if mc.EIGHT_POINT_REGRESSION: for p in range(len(polygons)): poly = np.array(polygons[p]) if is_drift_performed: poly[:, 0] = poly[:, 0] - dx poly[:, 1] = poly[:, 1] - dy if is_flip_performed: poly[:, 0] = orig_w - 1 - poly[:, 0] poly[:, 0] = poly[:, 0] * x_scale poly[:, 1] = poly[:, 1] * y_scale polygons[p] = poly is_drift_performed = False is_flip_performed = False gt_bbox = gt_bbox_pre # Use shifted bounding box if EIGHT_POINT_REGRESSION = False # Transform the bounding box to offset mode. # We extract the bounding box from the flipped and drifted masks to ensure # consistency. if mc.EIGHT_POINT_REGRESSION: gt_bbox = [] actual_bin_masks = [] for k in range(len(polygons)): polygon = polygons[k] mask_vector = self._get_8_point_mask( polygon, mc.IMAGE_HEIGHT, mc.IMAGE_WIDTH) center_x, center_y, width, height, of1, of2, of3, of4 = mask_vector if width == 0 or height == 0: print("Error in width or height so ignoring", width, height, gt_bbox_pre[k][2], gt_bbox_pre[k][3], center_x, center_y, gt_bbox_pre[k][0], gt_bbox_pre[k][1], idx) del label_per_batch[img_ct][k] continue assert not (of1 <= 0 or of2 <= 0 or of3 <= 0 or of4 <= 0 ), "Error Occured " + str(of1) + " " + str( of2) + " " + str(of3) + " " + str(of4) points = decode_parameterization(mask_vector) points = np.round(points) points = np.array(points, 'int32') assert not ((points[0][1] - points[1][1]) > 1 or (points[2][0] - points[3][0]) > 1 or (points[5][1] - points[4][1]) > 1 or (points[7][0] - points[6][0]) > 1), \ "\n\n Error in extraction:"+str(points)+" "+str(idx)+" "+str(mask_vector) gt_bbox.append(mask_vector) bbox_per_batch.append(gt_bbox) boundary_adhesions_per_batch.append(boundary_adhesion_pre) aidx_per_image, delta_per_image = [], [] aidx_set = set() for i in range(len(gt_bbox)): overlaps = batch_iou(mc.ANCHOR_BOX, gt_bbox[i]) aidx = len(mc.ANCHOR_BOX) for ov_idx in np.argsort(overlaps)[::-1]: if overlaps[ov_idx] <= 0: if mc.DEBUG_MODE: min_iou = min(overlaps[ov_idx], min_iou) num_objects += 1 num_zero_iou_obj += 1 break if ov_idx not in aidx_set: aidx_set.add(ov_idx) aidx = ov_idx if mc.DEBUG_MODE: max_iou = max(overlaps[ov_idx], max_iou) min_iou = min(overlaps[ov_idx], min_iou) avg_ious += overlaps[ov_idx] num_objects += 1 break if aidx == len(mc.ANCHOR_BOX): # even the largeset available overlap is 0, thus, choose one with the # smallest square distance dist = np.sum(np.square(gt_bbox[i] - mc.ANCHOR_BOX), axis=1) for dist_idx in np.argsort(dist): if dist_idx not in aidx_set: aidx_set.add(dist_idx) aidx = dist_idx break if mc.EIGHT_POINT_REGRESSION: box_cx, box_cy, box_w, box_h, of1, of2, of3, of4 = gt_bbox[ i] delta = [0] * 8 else: box_cx, box_cy, box_w, box_h = gt_bbox[i] delta = [0] * 4 if mc.ENCODING_TYPE == 'asymmetric_linear': # Use linear domain anchors xmin_t, ymin_t, xmax_t, ymax_t = bbox_transform( [box_cx, box_cy, box_w, box_h]) xmin_a, ymin_a, xmax_a, ymax_a = bbox_transform( mc.ANCHOR_BOX[aidx]) delta[0] = (xmin_t - xmin_a) / mc.ANCHOR_BOX[aidx][2] delta[1] = (ymin_t - ymin_a) / mc.ANCHOR_BOX[aidx][3] delta[2] = (xmax_t - xmax_a) / mc.ANCHOR_BOX[aidx][2] delta[3] = (ymax_t - ymax_a) / mc.ANCHOR_BOX[aidx][3] elif mc.ENCODING_TYPE == 'asymmetric_log': # Use log domain anchors EPSILON = 0.5 xmin_t, ymin_t, xmax_t, ymax_t = bbox_transform( [box_cx, box_cy, box_w, box_h]) delta[0] = np.log( max((mc.ANCHOR_BOX[aidx][0] - xmin_t) / mc.ANCHOR_BOX[aidx][2], 0) + EPSILON) delta[1] = np.log( max((mc.ANCHOR_BOX[aidx][1] - ymin_t) / mc.ANCHOR_BOX[aidx][3], 0) + EPSILON) delta[2] = np.log( max((xmax_t - mc.ANCHOR_BOX[aidx][0]) / mc.ANCHOR_BOX[aidx][2], 0) + EPSILON) delta[3] = np.log( max((ymax_t - mc.ANCHOR_BOX[aidx][1]) / mc.ANCHOR_BOX[aidx][3], 0) + EPSILON) else: delta[0] = (box_cx - mc.ANCHOR_BOX[aidx][0] ) / mc.ANCHOR_BOX[aidx][2] delta[1] = (box_cy - mc.ANCHOR_BOX[aidx][1] ) / mc.ANCHOR_BOX[aidx][3] delta[2] = np.log( box_w / mc.ANCHOR_BOX[aidx][2] ) # if box_w or box_h = 0, the box is not included delta[3] = np.log(box_h / mc.ANCHOR_BOX[aidx][3]) if mc.EIGHT_POINT_REGRESSION: EPSILON = 1e-8 anchor_diagonal = (mc.ANCHOR_BOX[aidx][2]**2 + mc.ANCHOR_BOX[aidx][3]**2)**(0.5) delta[4] = np.log((of1 + EPSILON) / anchor_diagonal) delta[5] = np.log((of2 + EPSILON) / anchor_diagonal) delta[6] = np.log((of3 + EPSILON) / anchor_diagonal) delta[7] = np.log((of4 + EPSILON) / anchor_diagonal) aidx_per_image.append(aidx) delta_per_image.append(delta) delta_per_batch.append(delta_per_image) aidx_per_batch.append(aidx_per_image) if mc.DEBUG_MODE: print('max iou: {}'.format(max_iou)) print('min iou: {}'.format(min_iou)) print('avg iou: {}'.format(avg_ious / num_objects)) print('number of objects: {}'.format(num_objects)) print('number of objects with 0 iou: {}'.format(num_zero_iou_obj)) return image_per_batch, label_per_batch, delta_per_batch, \ aidx_per_batch, bbox_per_batch, boundary_adhesions_per_batch
def read_batch(self, shuffle=True): """Read a batch of image and bounding box annotations. Args: shuffle: whether or not to shuffle the dataset Returns: image_per_batch: images. Shape: batch_size x width x height x [b, g, r] label_per_batch: labels. Shape: batch_size x object_num delta_per_batch: bounding box deltas. Shape: batch_size x object_num x [dx ,dy, dw, dh] aidx_per_batch: index of anchors that are responsible for prediction. Shape: batch_size x object_num bbox_per_batch: scaled bounding boxes. Shape: batch_size x object_num x [cx, cy, w, h] """ mc = self.mc if shuffle: if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx): self._shuffle_image_idx() batch_idx = self._perm_idx[self._cur_idx:self._cur_idx + mc.BATCH_SIZE] self._cur_idx += mc.BATCH_SIZE else: if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx): batch_idx = self._image_idx[self._cur_idx:] \ + self._image_idx[:self._cur_idx + mc.BATCH_SIZE-len(self._image_idx)] self._cur_idx += mc.BATCH_SIZE - len(self._image_idx) else: batch_idx = self._image_idx[self._cur_idx:self._cur_idx + mc.BATCH_SIZE] self._cur_idx += mc.BATCH_SIZE image_per_batch = [] label_per_batch = [] bbox_per_batch = [] delta_per_batch = [] aidx_per_batch = [] if mc.DEBUG_MODE: avg_ious = 0. num_objects = 0. max_iou = 0.0 min_iou = 1.0 num_zero_iou_obj = 0 for idx in batch_idx: # load the image #print("Path: ", self._image_path_at(idx)) img = cv2.imread(self._image_path_at(idx)) assert not img is None, "path: %s " % self._image_path_at(idx) im = img.astype(np.float32, copy=False) im -= mc.BGR_MEANS orig_h, orig_w, _ = [float(v) for v in im.shape] # load annotations #print("type batch idx: ", type(idx)) label_per_batch.append([b[4] for b in self._rois[idx][:]]) gt_bbox = np.array([[b[0], b[1], b[2], b[3]] for b in self._rois[idx][:]]) #print("GT box: ", gt_bbox) if mc.DATA_AUGMENTATION and len(gt_bbox) > 0: assert mc.DRIFT_X >= 0 and mc.DRIFT_Y > 0, \ 'mc.DRIFT_X and mc.DRIFT_Y must be >= 0' if mc.DRIFT_X > 0 or mc.DRIFT_Y > 0: # Ensures that gt boundibg box is not cutted out of the image max_drift_x = min(gt_bbox[:, 0] - gt_bbox[:, 2] / 2.0 + 1) max_drift_y = min(gt_bbox[:, 1] - gt_bbox[:, 3] / 2.0 + 1) assert max_drift_x >= 0 and max_drift_y >= 0, 'bbox out of image %s' % self._image_path_at( idx) dy = np.random.randint(-mc.DRIFT_Y, min(mc.DRIFT_Y + 1, max_drift_y)) dx = np.random.randint(-mc.DRIFT_X, min(mc.DRIFT_X + 1, max_drift_x)) # shift bbox gt_bbox[:, 0] = gt_bbox[:, 0] - dx gt_bbox[:, 1] = gt_bbox[:, 1] - dy # distort image orig_h -= dy orig_w -= dx orig_x, dist_x = max(dx, 0), max(-dx, 0) orig_y, dist_y = max(dy, 0), max(-dy, 0) distorted_im = np.zeros( (int(orig_h), int(orig_w), 3)).astype(np.float32) distorted_im[dist_y:, dist_x:, :] = im[orig_y:, orig_x:, :] im = distorted_im # Flip image with 50% probability if np.random.randint(2) > 0.5: im = im[:, ::-1, :] gt_bbox[:, 0] = orig_w - 1 - gt_bbox[:, 0] # scale image im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT)) image_per_batch.append(im) # scale annotation if len(gt_bbox) > 0: x_scale = mc.IMAGE_WIDTH / orig_w y_scale = mc.IMAGE_HEIGHT / orig_h gt_bbox[:, 0::2] = gt_bbox[:, 0::2] * x_scale gt_bbox[:, 1::2] = gt_bbox[:, 1::2] * y_scale bbox_per_batch.append(gt_bbox) aidx_per_image, delta_per_image = [], [] aidx_set = set() for i in range(len(gt_bbox)): overlaps = batch_iou(mc.ANCHOR_BOX, gt_bbox[i]) aidx = len(mc.ANCHOR_BOX) for ov_idx in np.argsort(overlaps)[::-1]: if overlaps[ov_idx] <= 0: if mc.DEBUG_MODE: min_iou = min(overlaps[ov_idx], min_iou) num_objects += 1 num_zero_iou_obj += 1 break if ov_idx not in aidx_set: aidx_set.add(ov_idx) aidx = ov_idx if mc.DEBUG_MODE: max_iou = max(overlaps[ov_idx], max_iou) min_iou = min(overlaps[ov_idx], min_iou) avg_ious += overlaps[ov_idx] num_objects += 1 break if aidx == len(mc.ANCHOR_BOX): # even the largeset available overlap is 0, thus, choose one with the # smallest square distance dist = np.sum(np.square(gt_bbox[i] - mc.ANCHOR_BOX), axis=1) for dist_idx in np.argsort(dist): if dist_idx not in aidx_set: aidx_set.add(dist_idx) aidx = dist_idx break box_cx, box_cy, box_w, box_h = gt_bbox[i] delta = [0] * 4 delta[0] = (box_cx - mc.ANCHOR_BOX[aidx][0]) / mc.ANCHOR_BOX[aidx][2] delta[1] = (box_cy - mc.ANCHOR_BOX[aidx][1]) / mc.ANCHOR_BOX[aidx][3] delta[2] = np.log(box_w / mc.ANCHOR_BOX[aidx][2]) delta[3] = np.log(box_h / mc.ANCHOR_BOX[aidx][3]) aidx_per_image.append(aidx) delta_per_image.append(delta) delta_per_batch.append(delta_per_image) aidx_per_batch.append(aidx_per_image) if mc.DEBUG_MODE: print('max iou: {}'.format(max_iou)) print('min iou: {}'.format(min_iou)) print('avg iou: {}'.format(avg_ious / num_objects)) print('number of objects: {}'.format(num_objects)) print('number of objects with 0 iou: {}'.format(num_zero_iou_obj)) return image_per_batch, label_per_batch, delta_per_batch, aidx_per_batch, bbox_per_batch
def read_batch(self, shuffle=True): """Read a batch of image and bounding box annotations. Args: shuffle: whether or not to shuffle the dataset Returns: image_per_batch: images. Shape: batch_size x width x height x [b, g, r] label_per_batch: labels. Shape: batch_size x object_num delta_per_batch: bounding box deltas. Shape: batch_size x object_num x [dx ,dy, dw, dh] aidx_per_batch: index of anchors that are responsible for prediction. Shape: batch_size x object_num bbox_per_batch: scaled bounding boxes. Shape: batch_size x object_num x [cx, cy, w, h] """ mc = self.mc if shuffle: if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx): self._shuffle_image_idx() batch_idx = self._perm_idx[self._cur_idx:self._cur_idx + mc.BATCH_SIZE] self._cur_idx += mc.BATCH_SIZE else: if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx): batch_idx = self._image_idx[self._cur_idx:] \ + self._image_idx[:self._cur_idx + mc.BATCH_SIZE-len(self._image_idx)] self._cur_idx += mc.BATCH_SIZE - len(self._image_idx) else: batch_idx = self._image_idx[self._cur_idx:self._cur_idx + mc.BATCH_SIZE] self._cur_idx += mc.BATCH_SIZE image_per_batch = [] image_per_batch_viz = [] label_per_batch = [] bbox_per_batch = [] delta_per_batch = [] aidx_per_batch = [] if mc.DEBUG_MODE: avg_ious = 0. num_objects = 0. max_iou = 0.0 min_iou = 1.0 num_zero_iou_obj = 0 for idx in batch_idx: # load the image im = cv2.imread(self._image_path_at(idx)) if im is None: print('failed file read:' + self._image_path_at(idx)) im = im.astype(np.float32, copy=False) # random brightness control hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV) h, s, v = cv2.split(hsv) add_v = np.random.randint(55, 200) - 128 v = np.where(v <= 255 - add_v, v + add_v, 255) final_hsv = cv2.merge((h, s, v)) im = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR) im -= mc.BGR_MEANS # <------------------------------- im /= 128.0 # to make input in the range of [0, 2) orig_h, orig_w, _ = [float(v) for v in im.shape] # load annotations label_per_batch.append([b[4] for b in self._rois[idx][:]]) gt_bbox = np.array([[(b[0] + b[2]) / 2, (b[1] + b[3]) / 2, b[2] - b[0], b[3] - b[1]] for b in self._rois[idx][:]]) assert np.any(gt_bbox[:, 0] > 0), 'less than 0 gt_bbox[0]' assert np.any(gt_bbox[:, 1] > 0), 'less than 0 gt_bbox[1]' assert np.any(gt_bbox[:, 2] > 0), 'less than 0 gt_bbox[2]' assert np.any(gt_bbox[:, 3] > 0), 'less than 0 gt_bbox[3]' if mc.DATA_AUGMENTATION: # Flip image with 50% probability if np.random.randint(2) > 0.5: im = im[:, ::-1, :] gt_bbox[:, 0] = orig_w - 1 - gt_bbox[:, 0] # scale image #im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT), interpolation=cv2.INTER_AREA) image_per_batch.append(im) image_per_batch_viz.append(im * 128.0) # scale annotation x_scale = mc.IMAGE_WIDTH / orig_w y_scale = mc.IMAGE_HEIGHT / orig_h gt_bbox[:, 0::2] = gt_bbox[:, 0::2] * x_scale gt_bbox[:, 1::2] = gt_bbox[:, 1::2] * y_scale bbox_per_batch.append(gt_bbox) aidx_per_image, delta_per_image = [], [] aidx_set = set() for i in range(len(gt_bbox)): overlaps = batch_iou(mc.ANCHOR_BOX, gt_bbox[i]) aidx = len(mc.ANCHOR_BOX) for ov_idx in np.argsort(overlaps)[::-1]: if overlaps[ov_idx] <= 0: if mc.DEBUG_MODE: min_iou = min(overlaps[ov_idx], min_iou) num_objects += 1 num_zero_iou_obj += 1 break if ov_idx not in aidx_set: aidx_set.add(ov_idx) aidx = ov_idx if mc.DEBUG_MODE: max_iou = max(overlaps[ov_idx], max_iou) min_iou = min(overlaps[ov_idx], min_iou) avg_ious += overlaps[ov_idx] num_objects += 1 break if aidx == len(mc.ANCHOR_BOX): # even the largeset available overlap is 0, thus, choose one with the # smallest square distance dist = np.sum(np.square(gt_bbox[i] - mc.ANCHOR_BOX), axis=1) for dist_idx in np.argsort(dist): if dist_idx not in aidx_set: aidx_set.add(dist_idx) aidx = dist_idx break box_cx, box_cy, box_w, box_h = gt_bbox[i] delta = [0] * 4 delta[0] = (box_cx - mc.ANCHOR_BOX[aidx][0]) / mc.ANCHOR_BOX[aidx][2] delta[1] = (box_cy - mc.ANCHOR_BOX[aidx][1]) / mc.ANCHOR_BOX[aidx][3] if False: delta[2] = np.log(box_w / mc.ANCHOR_BOX[aidx][2]) delta[3] = np.log(box_h / mc.ANCHOR_BOX[aidx][3]) else: # to remove exp in FPGA delta[2] = box_w / mc.ANCHOR_BOX[aidx][2] delta[3] = box_h / mc.ANCHOR_BOX[aidx][3] aidx_per_image.append(aidx) delta_per_image.append(delta) delta_per_batch.append(delta_per_image) aidx_per_batch.append(aidx_per_image) if mc.DEBUG_MODE: print('max iou: {}'.format(max_iou)) print('min iou: {}'.format(min_iou)) print('avg iou: {}'.format(avg_ious / num_objects)) print('number of objects: {}'.format(num_objects)) print('number of objects with 0 iou: {}'.format(num_zero_iou_obj)) return image_per_batch, label_per_batch, delta_per_batch, \ aidx_per_batch, bbox_per_batch, image_per_batch_viz
def read_batch(self, shuffle=True): """Read a batch of image and bounding box annotations. Args: shuffle: whether or not to shuffle the dataset Returns: image_per_batch: images. Shape: batch_size x width x height x [b, g, r] label_per_batch: labels. Shape: batch_size x object_num delta_per_batch: bounding box deltas. Shape: batch_size x object_num x [dx ,dy, dw, dh] aidx_per_batch: index of anchors that are responsible for prediction. Shape: batch_size x object_num bbox_per_batch: scaled bounding boxes. Shape: batch_size x object_num x [cx, cy, w, h] """ mc = self.mc if shuffle: if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx): self._shuffle_image_idx() batch_idx = self._perm_idx[self._cur_idx:self._cur_idx+mc.BATCH_SIZE] self._cur_idx += mc.BATCH_SIZE else: if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx): batch_idx = self._image_idx[self._cur_idx:] \ + self._image_idx[:self._cur_idx + mc.BATCH_SIZE-len(self._image_idx)] self._cur_idx += mc.BATCH_SIZE - len(self._image_idx) else: batch_idx = self._image_idx[self._cur_idx:self._cur_idx+mc.BATCH_SIZE] self._cur_idx += mc.BATCH_SIZE image_per_batch = [] label_per_batch = [] bbox_per_batch = [] delta_per_batch = [] aidx_per_batch = [] if mc.DEBUG_MODE: avg_ious = 0. num_objects = 0. max_iou = 0.0 min_iou = 1.0 num_zero_iou_obj = 0 for idx in batch_idx: # load the image im = cv2.imread(self._image_path_at(idx)) orig_h, orig_w, _ = [float(v) for v in im.shape] # load annotations label_this_batch = np.array([b[4] for b in self._rois[idx][:]]) gt_bbox = np.array([[b[0], b[1], b[2], b[3]] for b in self._rois[idx][:]]) if mc.DATA_AUGMENTATION: assert mc.DATA_AUG_TYPE in ['SQT', 'YOLO'], \ 'Invalid augmentation type: {}'.format(mc.DATA_AUG_TYPE) if mc.DATA_AUG_TYPE == 'SQT': im, gt_bbox = drift_dist(im, gt_bbox, mc, orig_h, orig_w) elif mc.DATA_AUG_TYPE == 'YOLO': if np.random.randint(2) > 0.5: im, gt_bbox, label_this_batch = scale_trans(im, gt_bbox, label_this_batch) im = recolor(im) im, gt_bbox = rand_flip(im, gt_bbox, orig_w) # Remove BGR bias if mc.SUB_BGR_MEANS: im = im.astype(np.float32, copy=False) im -= mc.BGR_MEANS #im = im.astype(np.uint8, copy=False) label_per_batch.append(label_this_batch.tolist()) # scale image im = cv2.resize(im, (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT)) image_per_batch.append(im) # scale annotation x_scale = mc.IMAGE_WIDTH/orig_w y_scale = mc.IMAGE_HEIGHT/orig_h gt_bbox[:, 0::2] = gt_bbox[:, 0::2]*x_scale gt_bbox[:, 1::2] = gt_bbox[:, 1::2]*y_scale bbox_per_batch.append(gt_bbox) aidx_per_image, delta_per_image = [], [] aidx_set = set() for i in range(len(gt_bbox)): overlaps = batch_iou(mc.ANCHOR_BOX, gt_bbox[i]) aidx = len(mc.ANCHOR_BOX) for ov_idx in np.argsort(overlaps)[::-1]: if overlaps[ov_idx] <= 0: if mc.DEBUG_MODE: min_iou = min(overlaps[ov_idx], min_iou) num_objects += 1 num_zero_iou_obj += 1 break if ov_idx not in aidx_set: aidx_set.add(ov_idx) aidx = ov_idx if mc.DEBUG_MODE: max_iou = max(overlaps[ov_idx], max_iou) min_iou = min(overlaps[ov_idx], min_iou) avg_ious += overlaps[ov_idx] num_objects += 1 break if aidx == len(mc.ANCHOR_BOX): # even the largeset available overlap is 0, thus, choose one with the # smallest square distance dist = np.sum(np.square(gt_bbox[i] - mc.ANCHOR_BOX), axis=1) for dist_idx in np.argsort(dist): if dist_idx not in aidx_set: aidx_set.add(dist_idx) aidx = dist_idx break box_cx, box_cy, box_w, box_h = gt_bbox[i] delta = [0]*4 delta[0] = (box_cx - mc.ANCHOR_BOX[aidx][0])/box_w delta[1] = (box_cy - mc.ANCHOR_BOX[aidx][1])/box_h delta[2] = np.log(box_w/mc.ANCHOR_BOX[aidx][2]) delta[3] = np.log(box_h/mc.ANCHOR_BOX[aidx][3]) aidx_per_image.append(aidx) delta_per_image.append(delta) delta_per_batch.append(delta_per_image) aidx_per_batch.append(aidx_per_image) if mc.DEBUG_MODE: print ('max iou: {}'.format(max_iou)) print ('min iou: {}'.format(min_iou)) print ('avg iou: {}'.format(avg_ious/num_objects)) print ('number of objects: {}'.format(num_objects)) print ('number of objects with 0 iou: {}'.format(num_zero_iou_obj)) return image_per_batch, label_per_batch, delta_per_batch, \ aidx_per_batch, bbox_per_batch