def gen_pnet_data(data_dir, anno_file, prefix): neg_save_dir = os.path.join(data_dir, "12/negative") pos_save_dir = os.path.join(data_dir, "12/positive") part_save_dir = os.path.join(data_dir, "12/part") for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]: # make if not os.path.exists(dir_path): os.makedirs(dir_path) # save_dir = os.path.join(data_dir, "pnet") # if not os.path.exists(save_dir): # os.mkdir(save_dir) post_save_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_POSTIVE_ANNO_FILENAME) neg_save_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_NEGATIVE_ANNO_FILENAME) part_save_file = os.path.join(config.ANNO_STORE_DIR, config.PNET_PART_ANNO_FILENAME) f1 = open(post_save_file, 'w') f2 = open(neg_save_file, 'w') f3 = open(part_save_file, 'w') with open(anno_file, 'r') as f: annotations = f.readlines() num = len(annotations) print("%d pics in total" % num) p_idx = 0 # positive examples index n_idx = 0 # negative examples index d_idx = 0 # partface examples index idx = 0 # pics index box_idx = 0 # boxes index for annotation in annotations: annotation = annotation.strip().split(' ') im_path = os.path.join(prefix, annotation[0]) # image_path # print(im_path) bbox = list(map( float, annotation[1:])) # map()函数是将func作用于seq中的每一个元素,并将所有的调用的结果作为一个list返回 boxes = np.array(bbox, dtype=np.int32).reshape(-1, 4) # N*4 dim array img = cv2.imread(im_path) idx += 1 if idx % 100 == 0: print(idx, "images done") height, width, channel = img.shape neg_num = 0 while neg_num < 50: size = npr.randint(12, min(width, height) / 2) nx = npr.randint(0, width - size) ny = npr.randint(0, height - size) crop_box = np.array([nx, ny, nx + size, ny + size]) Iou = IoU(crop_box, boxes) if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) # save neg image f2.write(save_file + ' 0\n') cropped_im = img[ny:ny + size, nx:nx + size, :] resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) cv2.imwrite(save_file, resized_im) n_idx += 1 neg_num += 1 for box in boxes: # box (x_left, y_top, x_right, y_bottom) x1, y1, x2, y2 = box w = x2 - x1 h = y2 - y1 # ignore small faces # in case the ground truth boxes of small faces are not accurate if max(w, h) < 40 or x1 < 0 or y1 < 0: continue # generate negative examples that have overlap with gt for i in range(5): size = npr.randint(12, min(width, height) / 2) # delta_x and delta_y are offsets of (x1, y1) delta_x = npr.randint(max(-size, -x1), w) delta_y = npr.randint(max(-size, -y1), h) nx1 = max(0, x1 + delta_x) ny1 = max(0, y1 + delta_y) if nx1 + size > width or ny1 + size > height: continue crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size]) Iou = IoU(crop_box, boxes) if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) cropped_im = img[ny1:ny1 + size, nx1:nx1 + size, :] resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) f2.write(save_file + ' 0\n') # neg samples with label 0 cv2.imwrite(save_file, resized_im) n_idx += 1 # generate positive examples and part faces # 每个box随机生成50个box,Iou>=0.65的作为positive examples,0.4<=Iou<0.65的作为part faces,其他忽略 for i in range(20): size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) # delta here is the offset of box center delta_x = npr.randint(-w * 0.2, w * 0.2) delta_y = npr.randint(-h * 0.2, h * 0.2) nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0)) ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0)) nx2 = int(nx1 + size) ny2 = int(ny1 + size) if nx2 > width or ny2 > height: continue crop_box = np.array([nx1, ny1, nx2, ny2]) # bbox偏移量的计算,由 x1 = nx1 + float(size)*offset_x1 推导而来 offset_x1 = (x1 - nx1) / float(size) offset_y1 = (y1 - ny1) / float(size) offset_x2 = (x2 - nx2) / float(size) offset_y2 = (y2 - ny2) / float(size) cropped_im = img[ny1:ny2, nx1:nx2, :] resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) box_ = box.reshape(1, -1) if IoU(crop_box, box_) >= 0.65: save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif IoU(crop_box, box_) >= 0.4: save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 box_idx += 1 print("%s images done, pos: %s part: %s neg: %s" % (idx, p_idx, d_idx, n_idx)) f1.close() f2.close() f3.close()
def get_rnet_sample_data(data_dir, anno_file, det_boxes_file, prefix_path): neg_save_dir = os.path.join(data_dir, "24/negative") pos_save_dir = os.path.join(data_dir, "24/positive") part_save_dir = os.path.join(data_dir, "24/part") for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]: if not os.path.exists(dir_path): os.makedirs(dir_path) # load ground truth from annotation file # format of each line: image/path [x1, y1, x2, y2] for each gt_box in this image with open(anno_file, 'r') as f: annotations = f.readlines() image_size = 24 im_idx_list = list() gt_boxes_list = list() num_of_images = len(annotations) print("processing %d images in total" % num_of_images) for annotation in annotations: # for i in range(10): annotation = annotation.strip().split(' ') # annotation = annotations[i].strip().split(' ') im_idx = os.path.join(prefix_path, annotation[0]) boxes = list(map(float, annotation[1:])) boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) im_idx_list.append(im_idx) gt_boxes_list.append(boxes) save_path = config.ANNO_STORE_DIR if not os.path.exists(save_path): os.makedirs(save_path) f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w') f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w') f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w') det_handle = open(det_boxes_file, 'rb') det_boxes = pickle.load(det_handle) print(len(det_boxes), num_of_images) assert len( det_boxes) == num_of_images, "incorrect detections or ground truths" # index of neg, pos and part face, used as their image names n_idx = 0 p_idx = 0 d_idx = 0 image_done = 0 for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list): image_done += 1 if image_done % 100 == 0: print("%d images done" % image_done) if dets.shape[0] == 0: continue img = cv2.imread(im_idx) # dets = convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) # each image have at most 50 neg_samples cur_n_idx = 0 for box in dets: x_left, y_top, x_right, y_bottom = box[0:4].astype(int) width = x_right - x_left height = y_bottom - y_top # ignore box that is too small or beyond image border if width < 20 or height < 20 or x_left <= 0 or y_top <= 0 or x_right >= img.shape[ 1] or y_bottom >= img.shape[0]: continue # compute intersection over union(IoU) between current box and all gt boxes Iou = IoU(box, gts) cropped_im = img[y_top:y_bottom, x_left:x_right, :] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 cur_n_idx += 1 if cur_n_idx <= 50: save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) f2.write(save_file + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(width) offset_y1 = (y1 - y_top) / float(height) offset_x2 = (x2 - x_right) / float(width) offset_y2 = (y2 - y_bottom) / float(height) # save positive and part-face images and write labels if np.max(Iou) >= 0.65: save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 print("%s images done, pos: %s part: %s neg: %s" % (im_idx, p_idx, d_idx, n_idx)) f1.close() f2.close() f3.close()
def myloss(self, anchors, y_pred, y_true): self.reso = 352 self.anchors = anchors loss = dict() # 1. Prepare # 1.1 re-organize y_pred # [bs, (5+nC)*nA, gs, gs] => [bs, num_anchors, gs, gs, 5+nC] bs, _, gs, _ = y_pred.size() nA = len(self.anchors) nC = self.num_classes y_pred = y_pred.view(bs, nA, 5 + nC, gs, gs) y_pred = y_pred.permute(0, 1, 3, 4, 2) # 1.3 prepare anchor boxes stride = self.reso // gs anchors = [(a[0] / stride, a[1] / stride) for a in self.anchors] anchor_bboxes = torch.zeros(3, 4).cuda() anchor_bboxes[:, 2:] = torch.Tensor(anchors) anchor_bboxes = anchor_bboxes.repeat(bs, 1, 1) # 2. Build gt [tx, ty, tw, th] and masks # TODO: f1 score implementation # total_num = 0 gt_tx = torch.zeros(bs, nA, gs, gs, requires_grad=False) gt_ty = torch.zeros(bs, nA, gs, gs, requires_grad=False) gt_tw = torch.zeros(bs, nA, gs, gs, requires_grad=False) gt_th = torch.zeros(bs, nA, gs, gs, requires_grad=False) obj_mask = torch.zeros(bs, nA, gs, gs, requires_grad=False) non_obj_mask = torch.ones(bs, nA, gs, gs, requires_grad=False) cls_mask = torch.zeros(bs, nA, gs, gs, nC, requires_grad=False) start = time.time() # for batch_idx in range(bs): # for box_idx, y_true_one in enumerate(y_true[batch_idx]): # total_num += 1 gt_bbox = y_true[:, :, :4] * gs # scale bbox relative to feature map gt_cls_label = y_true[:, :, 4].int() # gt_xc, gt_yc, gt_w, gt_h = gt_bbox[:,:,0:4] gt_xc = gt_bbox[:, :, 0] gt_yc = gt_bbox[:, :, 1] gt_w = gt_bbox[:, :, 2] gt_h = gt_bbox[:, :, 3] gt_i, gt_j = gt_xc.int(), gt_yc.int() gt_box_shape = y_true[:, :, :4] * gs gt_box_shape[:, :, 0:2] = 0 # gt_box_shape = torch.Tensor([0, 0, gt_w, gt_h]).unsqueeze(0).cuda() anch_ious = bbox_iou(gt_box_shape.view(self.batch_size, 1, 4), anchor_bboxes.cuda()) anchor_ious = IoU(gt_box_shape, anchor_bboxes, format='center') best_anchor = np.argmax(anchor_ious) anchor_w, anchor_h = anchors[best_anchor] gt_tw[:, best_anchor, gt_i, gt_j] = torch.log(gt_w / anchor_w + 1e-16) gt_th[:, best_anchor, gt_i, gt_j] = torch.log(gt_h / anchor_h + 1e-16) gt_tx[:, best_anchor, gt_i, gt_j] = gt_xc - gt_i gt_ty[:, best_anchor, gt_i, gt_j] = gt_yc - gt_j obj_mask[:, best_anchor, gt_i, gt_j] = 1 non_obj_mask[:, anchor_ious > 0.5] = 0 # FIXME: 0.5 as variable cls_mask[:, best_anchor, gt_i, gt_j, gt_cls_label] = 1 # 3. activate raw y_pred end = time.time() print("yolo_losses", bs, len(y_true), end - start) pred_tx = torch.sigmoid(y_pred[..., 0]) # gt tx/ty are not deactivated pred_ty = torch.sigmoid(y_pred[..., 1]) pred_tw = y_pred[..., 2] pred_th = y_pred[..., 3] pred_conf = y_pred[..., 4] pred_cls = y_pred[..., 5:] # 4. Compute loss obj_mask = obj_mask.cuda() non_obj_mask = non_obj_mask.cuda() cls_mask = cls_mask.cuda() gt_tx, gt_ty = gt_tx.cuda(), gt_ty.cuda() gt_tw, gt_th = gt_tw.cuda(), gt_th.cuda() # average over batch MSELoss = nn.MSELoss() BCEWithLogitsLoss = nn.BCEWithLogitsLoss() BCELoss = nn.BCELoss() CrossEntropyLoss = nn.CrossEntropyLoss() loss['x'] = MSELoss(pred_tx[obj_mask == 1], gt_tx[obj_mask == 1]) loss['y'] = MSELoss(pred_ty[obj_mask == 1], gt_ty[obj_mask == 1]) loss['w'] = MSELoss(pred_tw[obj_mask == 1], gt_tw[obj_mask == 1]) loss['h'] = MSELoss(pred_th[obj_mask == 1], gt_th[obj_mask == 1]) loss['cls'] = CrossEntropyLoss( pred_cls[obj_mask == 1], torch.argmax(cls_mask[obj_mask == 1], 1)) loss['conf'] = BCEWithLogitsLoss(pred_conf[obj_mask == 1], obj_mask[obj_mask == 1]) loss['non_conf'] = BCEWithLogitsLoss(pred_conf[non_obj_mask == 1], non_obj_mask[non_obj_mask == 1]) loss['total_loss'] = loss['x'] + loss['y'] + loss['w'] + loss[ 'h'] + loss['cls'] + loss['conf'] + loss['non_conf'] #["total_loss", "x", "y", "w", "h", "conf", "cls", "recall"] return loss['total_loss'], loss['x'], loss['y'], loss['w'], loss[ 'h'], loss['cls'], loss['conf'], loss['non_conf']