def debug_images(cls, xywh, loc_obj, loc_bg, img_meta, image): image = denormalize_image(image.copy()).copy() ratio_x = img_meta['rescaled_width'] / img_meta['width'] ratio_y = img_meta['rescaled_height'] / img_meta['height'] for roi in xywh[0, loc_obj]: cls._rectangle(image, roi, color=(255, 255, 0), thickness=2) for roi in xywh[0, loc_bg]: cls._point(image, roi, color=(200, 200, 200), thickness=2) for obj in img_meta['objects']: min_x, min_y, max_x, max_y = obj[1:] min_x = int(min_x * ratio_x) max_x = int(max_x * ratio_x) min_y = int(min_y * ratio_y) max_y = int(max_y * ratio_y) cx = (min_x + max_x) // 2 cy = (min_y + max_y) // 2 cv2.rectangle(image, (min_x, min_y), (max_x, max_y), (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('temp', img_meta['filename']), image)
def debug_next_batch(cls, image, meta, clsf, regr): config = singleton_config() # Calculate Scales scales = calculate_anchor_size() height, width, _ = image.shape image = denormalize_image(image) # Check Classification cls_h, cls_w, cls_o = np.where( np.logical_and(clsf[0, :, :, :9] == 1, clsf[0, :, :, 9:] == 1)) regr = regr[0].copy() for i in range(len(cls_h)): loc_w = cls_w[i] loc_h = cls_h[i] loc_o = cls_o[i] cw = (loc_w) * config.anchor_stride[0] ch = (loc_h) * config.anchor_stride[1] anc_w, anc_h = scales[loc_o] cw = int(cw) ch = int(ch) cv2.rectangle(image, (cw, ch), (cw + 5, ch + 5), (255, 255, 0)) min_x = cw - anc_w / 2 min_y = ch - anc_h / 2 max_x = cw + anc_w / 2 max_y = ch + anc_h / 2 min_x = int(min_x) min_y = int(min_y) max_x = int(max_x) max_y = int(max_y) tx, ty, tw, th = regr[loc_h, loc_w, (loc_o * 4) + 36:(loc_o * 4) + 4 + 36] g_cx, g_cy, g_w, g_h = to_absolute_coord( [min_x, min_y, max_x, max_y], [tx, ty, tw, th]) g_x1 = int(g_cx - g_w / 2) g_y1 = int(g_cy - g_h / 2) g_x2 = int(g_x1 + g_w) g_y2 = int(g_y1 + g_h) cv2.rectangle(image, (g_x1, g_y1), (g_x2, g_y2), (255, 255, 0), thickness=3) # Visualize GTA visualize_gta(image, meta) cv2.imwrite('temp/' + meta['filename'], image)
def debug_generate_anchors(image: np.ndarray, meta: dict, anchors: np.ndarray, probs): """ Anchors는 청생 포인트로 이미지에 점을 찍고, Ground-truth anchor는 빨간색 박스로 표시를 한다. - 빨간색 박스: meta에서 이미지에 대한 박스위치가 잘 잡혔는지 확인 - 청색 포인트: anchors가 빨간색 박스 근처에서 잡혔는지 확인 :param anchors: :param rescaled_image: :param meta: :return: """ config = singleton_config() image = image.copy() image = denormalize_image(image).copy() ratio = meta['rescaled_ratio'] # Visualize GTA visualize_gta(image, meta, center=True) for anchor, prob in zip(anchors, probs): min_x = (anchor[0] + 0.5) * config.anchor_stride[0] min_y = (anchor[1] + 0.5) * config.anchor_stride[1] max_x = (anchor[2] + 0.5) * config.anchor_stride[0] max_y = (anchor[3] + 0.5) * config.anchor_stride[1] cx = (min_x + max_x) / 2 cy = (min_y + max_y) / 2 min_x = int(min_x) min_y = int(min_y) max_x = int(max_x) max_y = int(max_y) cx = int(cx) cy = int(cy) if prob > 0.8: cv2.rectangle(image, (cx - 3, cy - 3), (cx + 3, cy + 3), (255, 255, 0), thickness=1) cv2.rectangle(image, (min_x, min_y), (max_x, max_y), (255 * prob, 255 * prob, 0), thickness=1) else: cv2.rectangle(image, (cx, cy), (cx + 5, cy + 5), (0, 0, 0), thickness=1) # cv2.rectangle(image, (min_x, min_y), (max_x, max_y), (0, 0, 0), thickness=1) cv2.imwrite(os.path.join('temp', meta['filename']), image)
def visualize(image, meta, cls_p, anc_p, class_mapping, class_mapping_inv): image = denormalize_image(image) visualize_gta(image, meta) # Test Classification bg_idx = class_mapping['bg'] cls_pred = cls_p[np.where(cls_p != bg_idx)] cls_pred = [class_mapping_inv[cls_idx] for cls_idx in cls_pred] cls_true = [obj[0] for obj in meta['objects']] print('cls_pred:', cls_pred) print('cls_true:', cls_true) print() # Test Regression for anc in anc_p: min_x = int(anc[0]) min_y = int(anc[1]) max_x = int(anc[2]) max_y = int(anc[3]) cv2.rectangle(image, (min_x, min_y), (max_x, max_y), (255, 255, 0)) cv2.imwrite('temp/{0}'.format(meta['filename']), image)
def generate_rpn_target( self, meta: dict, image: np.ndarray = None, debug: bool = False) -> Tuple[np.ndarray, np.ndarray]: width = meta['width'] height = meta['height'] rescaled_width = meta['rescaled_width'] rescaled_height = meta['rescaled_height'] n_object = len(meta['objects']) n_ratio = len(self.anchor_ratios) n_anchor = len(self.anchor_ratios) * len(self.anchor_scales) # Calculate output size of Base Network (feature extraction model) fen_width, fen_height, _ = cal_fen_output_size(self._net_name, rescaled_width, rescaled_height) # Tracking best things best_iou_for_box = np.zeros(n_object) best_anchor_for_box = -1 * np.ones((n_object, 4), dtype='int') best_reg_for_box = np.zeros((n_object, 4), dtype='float32') n_pos_anchor_for_box = np.zeros(n_object) # Classification Target Data y_cls_target = np.zeros((fen_height, fen_width, n_anchor)) y_valid_box = np.zeros((fen_height, fen_width, n_anchor)) y_regr_targets = np.zeros((fen_height, fen_width, n_anchor * 4)) _comb = [ range(fen_height), range(fen_width), range(len(self.anchor_scales)), range(len(self.anchor_ratios)), range(n_object) ] # DEBUG if debug: _image = denormalize_image(image[0].copy()) for y_pos, x_pos, anc_scale_idx, anc_rat_idx, idx_obj in itertools.product( *_comb): anc_scale = self.anchor_scales[anc_scale_idx] anc_rat = self.anchor_ratios[anc_rat_idx] # ground-truth box coordinates on the rescaled image obj_info = meta['objects'][idx_obj] gta_coord = self.cal_gta_coordinate(obj_info[1:], width, height, rescaled_width, rescaled_height) if debug: gta_coord = gta_coord.astype('int') cv2.rectangle(_image, (gta_coord[0], gta_coord[1]), (gta_coord[2], gta_coord[3]), (0, 0, 255)) # anchor box coordinates on the rescaled image anchor_coord = self.cal_anchor_cooridinate(x_pos, y_pos, anc_scale, anc_rat, self.anchor_stride) # Check if the anchor is within the rescaled image _valid_anchor = self.is_anchor_valid(anchor_coord, rescaled_width, rescaled_height) if not _valid_anchor: continue # Calculate Intersection Over Union iou = cal_iou(gta_coord, anchor_coord) # DEBUG # r = np.random.randint(4, size=4) # reg_target = to_relative_coord(gta_coord, anchor_coord) # g_cx, g_cy, g_w, g_h = to_absolute_coord(anchor_coord, reg_target) # g_x1 = int(g_cx - g_w / 2) # g_y1 = int(g_cy - g_h / 2) # g_x2 = int(g_x1 + g_w) # g_y2 = int(g_y1 + g_h) # cv2.rectangle(_image, (g_x1 + 2 + r[0], g_y1 + 2 + r[1]), (g_x2 + 2 + r[2], g_y2 + 2 + r[3]), (255, 255, 0)) # Calculate regression target if iou > best_iou_for_box[idx_obj] or iou > self.max_overlap: # The regression target fit to the rescaled image reg_target = to_relative_coord(gta_coord, anchor_coord) # Ground-truth bounding box should be mapped to at least one anchor box. # So tracking the best anchor should be implemented if iou > best_iou_for_box[idx_obj]: best_iou_for_box[idx_obj] = iou best_anchor_for_box[idx_obj] = (y_pos, x_pos, anc_scale_idx, anc_rat_idx) best_reg_for_box[idx_obj] = reg_target # if debug: # g_cx, g_cy, g_w, g_h = to_absolute_coord(anchor_coord , reg_target) # g_x1 = int(g_cx - g_w / 2) # g_y1 = int(g_cy - g_h / 2) # g_x2 = int(g_x1 + g_w) # g_y2 = int(g_y1 + g_h) # cv2.rectangle(_image, (g_x1+2, g_y1+2), (g_x2+2, g_y2+2), (255, 255, 0)) # Anchor is positive (the anchor refers to an ground-truth object) if iou > 0.5~0.7 # is_valid_anchor: this flag prevents overwriting existing valid anchor (due to the for-loop of objects) # if the anchor meets overlap_max or overlap_min, it should not be changed. z_pos = anc_scale_idx + n_ratio * anc_rat_idx is_valid_anchor = bool(y_valid_box[y_pos, x_pos, z_pos] == 1) if iou > self.max_overlap: # Positive anchors n_pos_anchor_for_box[idx_obj] += 1 y_valid_box[y_pos, x_pos, z_pos] = 1 y_cls_target[y_pos, x_pos, z_pos] = 1 y_regr_targets[y_pos, x_pos, (z_pos * 4):(z_pos * 4) + 4] = reg_target # if debug: # TestRPN.apply(_image, x_pos, y_pos, anc_scale, anc_rat, reg_target) if debug: g_cx, g_cy, g_w, g_h = to_absolute_coord( anchor_coord, reg_target) g_x1 = int(g_cx - g_w / 2) g_y1 = int(g_cy - g_h / 2) g_x2 = int(g_x1 + g_w) g_y2 = int(g_y1 + g_h) cv2.rectangle(_image, (g_x1 + 2, g_y1 + 2), (g_x2 + 2, g_y2 + 2), (255, 255, 0), thickness=2) elif iou < self.min_overlap and not is_valid_anchor: # Negative anchors y_valid_box[y_pos, x_pos, z_pos] = 1 y_cls_target[y_pos, x_pos, z_pos] = 0 elif not is_valid_anchor: y_valid_box[y_pos, x_pos, z_pos] = 0 y_cls_target[y_pos, x_pos, z_pos] = 0 # Limit Y class target pos_locs = np.where(y_cls_target == 1) if pos_locs[0].shape[0] > 256: val_locs = random.sample(range(len(pos_locs[0])), len(pos_locs[0]) - 256) y_cls_target[pos_locs[0][val_locs], pos_locs[1][val_locs], pos_locs[2][val_locs]] = 0 assert y_cls_target.sum() <= 256 # Ensure a ground-truth bounding box is mapped to at least one anchor for i in range(n_object): if n_pos_anchor_for_box[i] == 0 or True: y_pos, x_pos, anc_scale_idx, anc_rat_idx = best_anchor_for_box[ i] z_pos = anc_scale_idx + n_ratio * anc_rat_idx reg_target = best_reg_for_box[i] y_valid_box[y_pos, x_pos, z_pos] = 1 y_cls_target[y_pos, x_pos, z_pos] = 1 y_regr_targets[y_pos, x_pos, (z_pos * 4):(z_pos * 4) + 4] = reg_target # if debug: # anc_scale = self.anchor_scales[anc_scale_idx] # anc_rat = self.anchor_ratios[anc_rat_idx] # anchor_coord = self.cal_anchor_cooridinate(x_pos, y_pos, anc_scale, anc_rat, self.anchor_stride) # g_cx, g_cy, g_w, g_h = to_absolute_coord(anchor_coord, reg_target) # g_x1 = int(g_cx - g_w / 2) # g_y1 = int(g_cy - g_h / 2) # g_x2 = int(g_x1 + g_w) # g_y2 = int(g_y1 + g_h) # cv2.rectangle(_image, (g_x1 + 2, g_y1 + 2), (g_x2 + 2, g_y2 + 2), (255, 255, 0)) # It is more likely to have more negative anchors than positive anchors. # The ratio between negative and positive anchors should be equal. pos_locs = np.where(np.logical_and(y_valid_box == 1, y_cls_target == 1)) neg_locs = np.where(np.logical_and(y_valid_box == 1, y_cls_target == 0)) n_pos = pos_locs[0].shape[0] n_neg = neg_locs[0].shape[0] if len(pos_locs[0]) > self.max_anchor / 2: val_locs = random.sample(range(len(pos_locs[0])), len(pos_locs[0]) - self.max_anchor // 2) y_valid_box[pos_locs[0][val_locs], pos_locs[1][val_locs], pos_locs[2][val_locs]] = 0 n_pos = self.max_anchor // 2 if n_neg + n_pos > self.max_anchor: val_locs = random.sample(range(len(neg_locs[0])), n_neg - n_pos) y_valid_box[neg_locs[0][val_locs], neg_locs[1][val_locs], neg_locs[2][val_locs]] = 0 # Add batch dimension y_cls_target = np.expand_dims(y_cls_target, axis=0) y_valid_box = np.expand_dims(y_valid_box, axis=0) y_regr_targets = np.expand_dims(y_regr_targets, axis=0) # Debug if debug: cv2.imwrite('temp/' + meta['filename'], _image) # Final target data # Classification loss in RPN only uses y_valid_box. # Regression loss in RPN only uses y_regr_targets. y_rpn_cls = np.concatenate([y_valid_box, y_cls_target], axis=-1) y_rpn_regr = np.concatenate( [np.repeat(y_cls_target, 4, axis=-1), y_regr_targets], axis=-1) # cv2.imwrite('temp/{0}.png'.format(datum['filename']), image) return np.copy(y_rpn_cls), np.copy(y_rpn_regr)
def debug_next_batch(cls, image, meta, rois, cls_p, reg_p, class_mapping, class_mapping_inv): """ :param image: :param meta: :param rois: (x, y, w, h) on feature map :param cls_p: :param reg_p: (tx, ty, th, tw) :param class_mapping: :return: """ image = denormalize_image(image) visualize_gta(image, meta) # Test Classification bg_idx = class_mapping['bg'] cls_pred = list( filter(lambda x: x != bg_idx, np.argmax(cls_p, axis=2)[0])) cls_pred = [class_mapping_inv[cls_idx] for cls_idx in cls_pred] cls_true = [obj[0] for obj in meta['objects']] print('cls_pred:', cls_pred) print('cls_true:', cls_true) print() assert np.isin(np.unique(cls_pred), cls_true).all() # Test Regression mask = np.where(reg_p[:, :, :80] == 1) mask_regs = reg_p[:, :, 80:][mask].reshape(-1, 4) mask_rois = rois[0, :mask_regs.shape[0]] # Center of Anchor cxs = mask_rois[:, 0] + mask_rois[:, 2] / 2 # x_a + w_a/2 = cx_a cys = mask_rois[:, 1] + mask_rois[:, 3] / 2 # y_a + h_a/2 = cy_a for cx, cy in zip(cxs, cys): cx = int((cx + 0.5) * 16) cy = int((cy + 0.5) * 16) cv2.rectangle(image, (cx - 3, cy - 3), (cx + 3, cy + 3), (255, 255, 0), thickness=2) # Rectangle batch_xywh = apply_regression_to_rois(mask_regs, mask_rois).astype('float64') batch_xywh[:, 0] -= batch_xywh[:, 2] / 2. batch_xywh[:, 1] -= batch_xywh[:, 3] / 2. batch_xywh[:, 2] += batch_xywh[:, 0] batch_xywh[:, 3] += batch_xywh[:, 1] anchors = batch_xywh for anchor in anchors: min_x = anchor[0] min_y = anchor[1] max_x = anchor[2] max_y = anchor[3] min_x = int(min_x * 16) min_y = int(min_y * 16) max_x = int(max_x * 16) max_y = int(max_y * 16) # if min_x < 0 or min_y < 0 or max_x < 0 or max_y < 0: # continue # # if (max_x - min_x) < 0 or (max_y - min_y) < 0: # continue cv2.rectangle(image, (min_x, min_y), (max_x, max_y), (255, 255, 0), thickness=2) cv2.imwrite('temp/{0}'.format(meta['filename']), image)