def _data_generator(self, batch_size): i = 0 n = self.sample_nums while True: total_img_data = [] total_labels = [] total_deltas = [] for b in range(batch_size): if i == 0: self._random_shuffle() annotation = self._annotations[i] image_path, gt_boxes = self._parse_annotation(annotation) img = cv2.imread(image_path) # height/width/channel height, width, _ = img.shape # img resize img = cv2.resize(img, im_size, interpolation=cv2.INTER_CUBIC) # BGR -> RGB 做简单处理 img = img[:, :, (2, 1, 0)] img = img.astype(np.float32) img = img / 255 # gt_box resize gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * (im_size[0] / width) gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * (im_size[1] / height) # regions 里面 是 x1, y1, x2, y2 _, regions = selective_search(img, scale=200, sigma=0.9, min_size=50) rects = np.asarray([list(region['rect']) for region in regions]) selected_imgs = [] candidates = set() # 过滤掉一些框 for r in rects: x1, y1, x2, y2 = r x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int(round(x2)), int(round(y2)) if (x1, y1, x2, y2) in candidates: continue if (x2 - x1) * (y2 - y1) < 220: continue crop_img = img[y1:y2, x1:x2, :] # 裁剪后进行resize crop_img = cv2.resize(crop_img, im_size, interpolation=cv2.INTER_CUBIC) selected_imgs.append(crop_img) candidates.add((x1, y1, x2, y2)) rects = np.asarray([list(candidate) for candidate in candidates]) # 将 gt_boxes 添加进来 for idx in range(len(gt_boxes)): x1, y1, x2, y2 = gt_boxes[idx, 0:4] x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int(round(x2)), int(round(y2)) # 裁剪后进行resize crop_img = img[y1:y2, x1:x2, :] crop_img = cv2.resize(crop_img, im_size, interpolation=cv2.INTER_CUBIC) selected_imgs.append(crop_img) rects = np.vstack((rects, gt_boxes[:, 0:4])) # cal iou overlaps = bbox_overlaps(rects, gt_boxes) # 选出与哪个gt_box iou最大的索引位置 argmax_overlaps = np.argmax(overlaps, axis=1) # judge cls max_overlaps = np.max(overlaps, axis=1) keep = np.where(max_overlaps > threshold)[0] labels = np.empty(len(argmax_overlaps)) labels.fill(0) labels[keep] = gt_boxes[argmax_overlaps[keep], 4] # do reg deltas = bbox_transform(rects, gt_boxes[argmax_overlaps, 0:4]) total_deltas.append(deltas) total_labels.append(labels) total_img_data.append(selected_imgs) i = (i + 1) % n total_img_data = np.concatenate(total_img_data, axis=0) total_labels = np.concatenate(total_labels, axis=0) total_deltas = np.concatenate(total_deltas, axis=0) yield total_img_data, total_labels, total_deltas # # voc_data = VocData('~/segment_data', 2007, 'train', './data/voc_classes.txt') # g = voc_data.data_generator_wrapper() # x, y, z = next(g) # print(x.shape) # print(y.shape) # print(z.shape)
def _get_labels(self, regions, gt_boxes): """ 获取训练用的标签数据 :param regions: n * 4 (x1, y1, w, h) :param gt_boxes: m * 5 (x1, y1, x2, y2, cls) :return: """ # 将gt_boxes添加进regions增加正样本数量 all_regions = np.vstack((regions, gt_boxes[:, :4])) # 1. 计算iou overlaps = bbox_overlaps( np.ascontiguousarray(all_regions[:, :], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) # 为每个anchor设置所属类别 与哪个gt_boxes相交iou最大就是对应的class labels = gt_boxes[gt_assignment, 4] # 2. 设置正负样本数目 fg_inds = np.where(max_overlaps >= cfg.TRAIN_FG_THRESH)[0] # 128 * 0.25 fg_rois_per_image = cfg.TRAIN_BATCH_SIZE * cfg.TRAIN_FG_FRACTION fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) if fg_inds.size > 0: # 随机抽样 fg_inds = np.random.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # [0.1, 0.5] 的region为背景 取不到足够的样本 # bg_inds = np.where((max_overlaps < cfg.TRAIN_BG_THRESH_HI) & # (max_overlaps >= cfg.TRAIN_BG_THRESH_LO))[0] bg_inds = np.where(max_overlaps < cfg.TRAIN_BG_THRESH_HI)[0] bg_rois_per_this_image = cfg.TRAIN_BATCH_SIZE - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) if bg_inds.size > 0: bg_inds = np.random.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # 得到128个labels 和 regions用来训练 # fast-rcnn论文中说batch_size=2, 每张图片64个训练样本 这里没有采用这种方法 直接使用faster-rcnn论文中 # batch_size=1 , 每张图片采用128个 labels = labels[keep_inds] labels[fg_rois_per_this_image:] = 0 regions_target = all_regions[keep_inds] # 将regions转成回归值 tx ty tw th bbox_target_data = self._transform_regions( regions_target, gt_boxes[gt_assignment[keep_inds], :4]) bbox_targets, bbox_inside_weights = self._get_bbox_regression_labels( bbox_target_data, labels) # 给regions添加上batch 维度信息 regions_target = np.vstack( (np.zeros(regions_target.shape[0], ), regions_target[:, 0], regions_target[:, 1], regions_target[:, 2], regions_target[:, 3])).transpose() labels = labels.reshape((1, -1)) regions_target = regions_target.reshape((1, -1, 5)) # 减去背景类别 bbox_targets = bbox_targets.reshape( (1, -1, (self._num_classes - 1) * 4)) bbox_inside_weights = bbox_inside_weights.reshape( (1, -1, (self._num_classes - 1) * 4)) bbox_outside_weights = np.array(bbox_inside_weights > 0).astype( np.float32) return labels, regions_target, bbox_targets, bbox_inside_weights, bbox_outside_weights
def _data_generator(self, batch_size): data_augment = DataAugment(augment=self._data_augment, horizontal_flip=True, vertical_flip=True) im_size = self._cfg.im_size feat_stride = self._cfg.feat_stride feature_width, feature_height = round(im_size[0] / feat_stride), round( im_size[1] / feat_stride) anchor = Anchors(feature_size=(feature_height, feature_width), feat_stride=feat_stride) i = 0 while True: for annotation in self._annotations: img_data = [] rpn_labels = [] rpn_bbox_targets = [] rpn_bbox_inside_weights = [] rpn_bbox_outside_weights = [] total_gt_boxes = [] for b in range(batch_size): if i == 0 and self._shuffle: self._random_shuffle() image_path, gt_boxes = self._parse_annotation(annotation) # 进行数据增广防止过拟合 img, gt_boxes = data_augment(image_path, gt_boxes) height, width = img.shape[:2] # resize img img = cv2.resize(img, im_size, interpolation=cv2.INTER_CUBIC) # BGR -> RGB img = img[:, :, (2, 1, 0)] img = img.astype(np.float32) img[:, :, 0] -= self._cfg.img_channel_mean[0] img[:, :, 1] -= self._cfg.img_channel_mean[1] img[:, :, 2] -= self._cfg.img_channel_mean[2] img /= self._cfg.img_scaling_factor img_data.append(img) # reisze gt_boxes gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * (im_size[0] / width) gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * (im_size[1] / height) # get anchors all_anchors, A = anchor.get_anchors() # 得到总额anchros数目 h * w * k (50 * 38 * 9) total_anchors_num = len(all_anchors) # 过滤 anchors allow_border = 0 inds_inside = np.where( (all_anchors[:, 0] >= allow_border) & (all_anchors[:, 1] >= allow_border) & (all_anchors[:, 2] <= (im_size[1] + allow_border)) & (all_anchors[:, 3] <= (im_size[0] + allow_border)))[0] anchors = all_anchors[inds_inside, :] labels = np.empty((len(inds_inside, )), dtype=np.float32) labels.fill(-1) # 计算iou overlaps = bbox_overlaps(np.ascontiguousarray(anchors), np.ascontiguousarray(gt_boxes)) argmax_overlaps = np.argmax(overlaps, axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = np.argmax(overlaps, axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where( overlaps == gt_max_overlaps)[0] if not self._cfg.train_rpn_clobber_positives: labels[max_overlaps < self._cfg.train_rpn_negative_overlap] = 0 labels[gt_argmax_overlaps] = 1 labels[max_overlaps > self._cfg.train_rpn_positive_overlap] = 1 if self._cfg.train_rpn_clobber_positives: labels[max_overlaps < self._cfg.train_rpn_negative_overlap] = 0 # 防止每张图片训练数据过多 这里每张图片positive + negative 的样本数等于256 # 其中尽量保证 positive和negative样本数目一致 如果正样本不够128则负样本增加满足一种图片取256个样本 num_fg = int(self._cfg.train_rpn_fg_fraction * self._cfg.train_rpn_batch_size) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: # 这个表示随机采样 replace=false表示没有重复采样 disabled_inds = np.random.choice(fg_inds, size=len(fg_inds) - num_fg, replace=False) labels[disabled_inds] = -1 num_bg = self._cfg.train_rpn_batch_size - np.sum( labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disabled_inds = np.random.choice(bg_inds, size=len(bg_inds) - num_bg, replace=False) labels[disabled_inds] = -1 bbox_targets = self._compute_targets( anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( [1.0, 1.0, 1.0, 1.0]) if self._cfg.train_rpn_positive_weight < 0: # uniform weighting of examples (given non-uniform sampling) 非均匀采样 # 这里相当于把样本进行均匀采样处理出现的概率都是一样的权重是一样 num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((self._cfg.train_rpn_positive_weight > 0) & (self._cfg.train_rpn_positive_weight < 1)) # 如果是非均匀采样这里将权重设置成 该样本的概率乘以1/positive_samples loss = p * loss(positive) + (1-p) loss(negative) # 目的就是调节正负样本损失在总损失中站的比例 防止样本多的损失占比过大 positive_weights = ( self._cfg.train_rpn_positive_weight / np.sum(labels == 1)) negative_weights = ( (1.0 - self._cfg.train_rpn_positive_weight) / np.sum(labels == 0)) # 给训练样本进行权重赋值 bbox_outside_weights[labels == 1] = positive_weights bbox_outside_weights[labels == 0] = negative_weights labels = self._unmap(labels, total_anchors_num, inds_inside, fill=-1) # 把图像内部的anchor对应的bbox_target映射回所有的anchor(加上了那些超出边界的anchor,填充0) bbox_targets = self._unmap(bbox_targets, total_anchors_num, inds_inside, fill=0) # [H * W * A, 4] bbox_inside_weights = self._unmap(bbox_inside_weights, total_anchors_num, inds_inside, fill=0) bbox_outside_weights = self._unmap(bbox_outside_weights, total_anchors_num, inds_inside, fill=0) # 进行reshape # [H * W * A] --> [H, W, A] --> [A, H, W] labels = labels.reshape( (feature_height, feature_width, A)).transpose( (2, 0, 1)) labels = labels.reshape( (A * feature_height, feature_width)) # (H * W * A, 4) -> (H, W, A * 4) -> (A * 4, H, W) bbox_targets = bbox_targets.reshape( (feature_height, feature_width, A * 4)).transpose( (2, 0, 1)) # (H * W * A, 4) -> (H, W, A * 4) -> (A * 4, H, W) bbox_inside_weights = bbox_inside_weights.reshape((feature_height, feature_width, A * 4)).\ transpose((2, 0, 1)) # (H * W * A, 4) -> (H, W, A * 4) -> (A * 4, H, W) bbox_outside_weights = bbox_outside_weights.reshape((feature_height, feature_width, A * 4)).\ transpose((2, 0, 1)) rpn_labels.append(labels) rpn_bbox_inside_weights.append(bbox_inside_weights) rpn_bbox_outside_weights.append(bbox_outside_weights) rpn_bbox_targets.append(bbox_targets) total_gt_boxes.append(gt_boxes) i = (i + 1) % self.sample_nums rpn_labels = np.array(rpn_labels) rpn_bbox_inside_weights = np.array(rpn_bbox_inside_weights) rpn_bbox_targets = np.array(rpn_bbox_targets) rpn_bbox_outside_weights = np.array(rpn_bbox_outside_weights) img_data = np.array(img_data) total_gt_boxes = np.asarray(total_gt_boxes) # (1, 3 * 4 * A, H, W) rpn_bbox_targets = np.concatenate([ rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights ], axis=1) yield img_data, [rpn_labels, rpn_bbox_targets], total_gt_boxes
from PIL import ImageDraw from utils.bbox_overlaps import bbox_overlaps import torch image_size = 500 base_image = Image.new("RGB", (image_size, image_size), color="#FFF") boxes = [[15, 15, 28, 28], [45, 45, 10, 10], [30, 30, 30, 30]] gt_boxes = [[15, 15, 30, 30], [45, 45, 15, 14]] draw = ImageDraw.Draw(base_image) def bbox_to_coords(bbox): x1, y1, h, w = bbox x = x1 - w / 2 y = y1 - h / 2 return x, y, x1 + w / 2, y1 + h / 2 for box in gt_boxes: draw.rectangle(bbox_to_coords(box), outline="#f00") for box in boxes: draw.rectangle(bbox_to_coords(box), outline="#00f") base_image.save("overlap.png", "PNG") bboxes = torch.tensor(boxes).float() gt_boxes = torch.tensor(gt_boxes).float() out = bbox_overlaps(bboxes, gt_boxes) print(out) print(out.shape)
def _data_generator(self, batch_size, is_svm): i = 0 n = self.samples_num while True: total_img_data = [] total_labels = [] total_deltas = [] for b in range(batch_size): if i == 0: self._random_shuffle() annotation = self._annotations[i] image_path, gt_boxes = self._parse_annotation(annotation) img = cv2.imread(image_path) # height/width/channel height, width, _ = img.shape # img resize img = cv2.resize(img, im_size, interpolation=cv2.INTER_CUBIC) # BGR -> RGB 做简单处理 img = img[:, :, (2, 1, 0)] img = img.astype(np.float32) img = img / 255. # gt_box resize gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * (im_size[0] / width) gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * (im_size[1] / height) # regions 里面 是 x1, y1, x2, y2 _, regions = selective_search(img, scale=200, sigma=0.9, min_size=50) rects = np.asarray( [list(region['rect']) for region in regions]) selected_imgs = [] candidates = set() # 过滤掉一些框 for r in rects: x1, y1, x2, y2 = r x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int( round(x2)), int(round(y2)) if (x1, y1, x2, y2) in candidates: continue if (x2 - x1) * (y2 - y1) < 220: continue crop_img = img[y1:y2, x1:x2, :] # 裁剪后进行resize crop_img = cv2.resize(crop_img, im_size, interpolation=cv2.INTER_CUBIC) selected_imgs.append(crop_img) candidates.add((x1, y1, x2, y2)) rects = [list(candidate) for candidate in candidates] # 将 gt_boxes 添加进来 for idx in range(len(gt_boxes)): x1, y1, x2, y2 = gt_boxes[idx, 0:4] x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int( round(x2)), int(round(y2)) # 裁剪后进行resize crop_img = img[y1:y2, x1:x2, :] try: crop_img = cv2.resize(crop_img, im_size, interpolation=cv2.INTER_CUBIC) selected_imgs.append(crop_img) rects.append(gt_boxes[idx, 0:4]) except: continue rects = np.asarray(rects) # cal iou overlaps = bbox_overlaps(rects, gt_boxes) # 选出与哪个gt_box iou最大的索引位置 argmax_overlaps = np.argmax(overlaps, axis=1) # judge cls max_overlaps = np.max(overlaps, axis=1) threshold = cfg.THRESHOLD if is_svm else cfg.FINE_TUNE_THRESHOLD keep = np.where(max_overlaps >= threshold)[0] labels = np.empty(len(argmax_overlaps)) # svm和fine-tune的iou取值是不一样的 if is_svm: # 因为svm非常适合小训练集 所以论文中严格限制iou范围 减少svm训练样本集 # 用 -1 填充 labels.fill(-1) # bg_ids = np.where(max_overlaps < ) # ground - truth样本作为正样本 且IoU大于0.3的“hard negatives”, # 背景 bg_ids = np.where(max_overlaps > threshold)[0] labels[bg_ids] = 0 # gt 为正样本 这里用>0.7来当做正样本 fg_ids = np.where(max_overlaps > 0.7) labels[fg_ids] = gt_boxes[argmax_overlaps[fg_ids], 4] else: labels.fill(0) # 对于大于指定threshold 前景类别 labels[keep] = gt_boxes[argmax_overlaps[keep], 4] # to something deltas = bbox_transform(rects, gt_boxes[argmax_overlaps, 0:4]) total_deltas.append(deltas) total_labels.append(labels) total_img_data.append(selected_imgs) i = (i + 1) % n total_img_data = np.concatenate(total_img_data, axis=0) total_labels = np.concatenate(total_labels, axis=0) total_deltas = np.concatenate(total_deltas, axis=0) yield total_img_data, total_labels, total_deltas
def detect(save_img=False): imgsz = ( 320, 192 ) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width) weights, half = opt.weights, opt.half # Initialize device = torch_utils.select_device( device='cpu' if ONNX_EXPORT else opt.device) # Initialize model model = Darknet(opt.cfg, imgsz) # Load weights attempt_download(weights) if weights.endswith('.pt'): # pytorch format model.load_state_dict( torch.load(weights, map_location=device)['model']) else: # darknet format load_darknet_weights(model, weights) # Second-stage classifier classify = False if classify: modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Eval mode model.to(device).eval() # Half precision half = half and device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Get names and colors names = load_classes(opt.names) # Run inference img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img.float() ) if device.type != 'cpu' else None # run once cls2id = dict(zip(names, range(0, len(names)))) gt_cls_num = np.zeros((len(names))) tp = np.zeros((len(names))) fp = np.zeros((len(names))) fn = np.zeros((len(names))) tn = np.zeros((len(names))) acc = 0.0 tot = 0.0 with open(test_path, "r") as f: filenames = f.readlines() for filename in filenames: img_file = filename.strip() + ".jpg" xml_file = filename.strip() + ".xml" source = os.path.join(test_img_path, img_file) dataset = LoadImages(source, img_size=imgsz) xml_path = os.path.join(test_xml_path, xml_file) coords = read_xml(xml_path) if len(coords) is 0: print("No annotations\n") continue gt_bboxes = [coord[:4] for coord in coords] gt_labels = [coord[4] for coord in coords] for label in gt_labels: gt_cls_num[cls2id[label]] += 1 tot += 1 for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = torch_utils.time_synchronized() pred = model(img, augment=opt.augment)[0] t2 = torch_utils.time_synchronized() # to float if half: pred = pred.float() # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, multi_label=False, classes=opt.classes, agnostic=opt.agnostic_nms) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for j, det in enumerate(pred): # detections for image j p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor( im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from imgsz to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() det_bboxes, det_labels, det_scores = get_result(det) ious = bbox_overlaps(np.array(det_bboxes), np.array(gt_bboxes)) ious_max = ious.max(axis=1) ious_argmax = ious.argmax(axis=1) gt_matched_det = np.ones((len(gt_bboxes))) * -1 det_matched_gt = np.ones((len(det_bboxes))) * -1 gt_matched_scores = np.zeros((len(gt_bboxes))) for i in range(0, len(det_bboxes)): if ious_max[i] > 0.5: target_gt = ious_argmax[i] if gt_matched_scores[target_gt] < det_scores[i]: gt_matched_scores[target_gt] = det_scores[ i] gt_matched_det[target_gt] = i det_matched_gt[i] = target_gt else: fp[det_labels[i]] += 1 for i in range(0, len(det_matched_gt)): gt = int(det_matched_gt[i]) if gt > -1: if op.eq(names[det_labels[i]], gt_labels[gt]): tp[det_labels[i]] += 1 assert (tp[det_labels[i]] <= gt_cls_num[det_labels[i]]) acc += 1 else: fp[det_labels[i]] += 1 mat = np.zeros((len(names), len(TABLE_HEAD))) for i in range(0, len(names)): mat[i][0] = i mat[i][1] = gt_cls_num[i] mat[i][2] = tp[i] mat[i][3] = fp[i] mat[i][4] = fn[i] mat[i][5] = tp[i] / (tp[i] + fp[i]) mat[i][6] = tp[i] / (tp[i] + fn[i]) print("%s: %.0f gt, %.0f det, %.0f tp, precision: %.6f, recall: %.6f" % (names[i], gt_cls_num[i], tp[i] + fp[i], tp[i], tp[i] / (tp[i] + fp[i]), tp[i] / (tp[i] + fn[i]))) if os.path.exists("rzx_statistics.xlsx"): os.remove("rzx_statistics.xlsx") workbook = openpyxl.Workbook("rzx_statistics.xlsx") sheet = workbook.create_sheet("sheet") sheet.append(TABLE_HEAD) for i in range(0, len(names)): label = names[i] sheet.append([ label, "%.0f" % gt_cls_num[i], "%.0f" % tp[i], "%.0f" % fp[i], "%.0f" % fn[i], "%.6f" % (tp[i] / (tp[i] + fp[i])), "%.6f" % (tp[i] / (tp[i] + fn[i])) ]) workbook.save("rzx_statistics.xlsx")