def rotate_boxes_4xy(self, target, angle): ''' 4xy ''' angle = -angle gt_boxes = target.bbox if isinstance(target.bbox, torch.Tensor): gt_boxes = target.bbox.data.cpu().numpy() gt_labels = target.get_field("labels") rotated_gt_boxes = np.empty((len(gt_boxes), 8), dtype=np.float32) iminfo = target.size im_height = iminfo[1] im_width = iminfo[0] # origin_gt_boxes = gt_boxes # anti-clockwise to clockwise arc cos_cita = np.cos(np.pi / 180 * angle) sin_cita = np.sin(np.pi / 180 * angle) # clockwise matrix rotation_matrix = np.array([[cos_cita, sin_cita], [-sin_cita, cos_cita]]) # pts_ctr = origin_gt_boxes[:, 0:2] pts_ctr = gt_boxes.reshape([-1, 2]) pts_ctr = pts_ctr - np.array([[ im_width / 2, im_height / 2 ]]) #np.tile((im_width / 2, im_height / 2), (gt_boxes.shape[0], 1)) # print('pts_ctr:', pts_ctr.shape) pts_ctr = np.array(np.dot(pts_ctr, rotation_matrix), dtype=np.int16) pts_ctr = pts_ctr + np.array([[im_width / 2, im_height / 2]]) pts24xy = pts_ctr.reshape([-1, 8]) rotated_gt_boxes_th = torch.tensor(pts24xy).to(target.bbox.device) target_cpy = RBoxList(rotated_gt_boxes_th, iminfo, mode='4xy') # target_cpy.add_field('difficult', difficulty) target_cpy.add_field('labels', gt_labels) if target_cpy.bbox.size()[0] <= 0: return None # print(target_cpy.bbox.size()) return target_cpy, pts24xy
def __getitem__(self, idx): img, anno = super(DOTARotateDataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] # scale_seed = random.uniform(0.4,1) # img_tem = img1.resize((int(img1.size[0]*scale_seed), int(img1.size[1]*scale_seed))) # img = Image.new('RGB', (img1.size[0], img1.size[1]), (255, 255, 255)) # img.paste(img_tem, (0, 0)) # ori # boxes = [obj["bbox"] for obj in anno] boxes = self.conver4xyTO4xy(anno) # if(len(boxes)<=0): # print('scale: ', scale_seed) # img = img1 # new # boxes = self.conver4xyTOxywha(boxes) # print(len(boxes)) boxes = torch.as_tensor(boxes).reshape(-1, 8) # guard against no boxes target = RBoxList(boxes, img.size, mode="4xy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) # masks = [obj["segmentation"] for obj in anno] # masks = SegmentationMask(masks, img.size, mode='poly') # target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self.transforms is not None: img, target = self.transforms(img, target) if (len(target) >= 800): target = target[0:800] # print(len(target)) return img, target, idx
def forward_for_single_feature_map(self, locations, box_cls, box_regression, centerness, image_sizes, normal_factor): """ Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ N, C, H, W = box_cls.shape C = C // self.num_pts # put in the same format as locations # ori # box_cls = box_cls.permute(0, 2, 3, 1)#.view(N, C, H, W) # box_cls = box_cls.reshape(N, H, W, self.num_pts, C).permute(0, 3, 1, 2, 4).reshape(N, -1, C).sigmoid()#softmax(dim=2)# # # print(box_cls) # # box_cls = box_cls.reshape(N, -1, C).softmax(dim=2) # # detax1 detay1 detax2 detay2 h # box_regression = box_regression.permute(0, 2, 3, 1).reshape(N, H, W, self.num_pts, 5).permute(0, 3, 1, 2, 4).reshape(N, -1, 5)#.view(N, -1, H, W) # # box_regression = box_regression # centerness = centerness.permute(0, 2, 3, 1).reshape(N, H, W, self.num_pts, 1).permute(0, 3, 1, 2, 4).reshape(N, -1)#.view(N, 1, H, W) # centerness = centerness.sigmoid() box_cls = box_cls.permute(0, 2, 3, 1) #.view(N, C, H, W) box_cls = box_cls.reshape(N, -1, C).sigmoid() #softmax(dim=2)# # print(box_cls) # box_cls = box_cls.reshape(N, -1, C).softmax(dim=2) # detax1 detay1 detax2 detay2 h box_regression = box_regression.permute(0, 2, 3, 1).reshape( N, -1, 5) #.view(N, -1, H, W) # box_regression = box_regression centerness = centerness.permute(0, 2, 3, 1).reshape(N, -1) #.view(N, 1, H, W) centerness = centerness.sigmoid() # N h*w C candidate_inds = box_cls * centerness[:, :, None] > self.pre_nms_thresh # candidate_inds = (box_cls>0.1) * (centerness[:, :, None]>0.1)> self.pre_nms_thresh # N h*w*C -> N 1 pre_nms_top_n = candidate_inds.view(N, -1).sum(1) # N h*w*C (1000) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) # multiply the classification scores with centerness scores # N h*w C # 现在的置信度可以直接算出来 # box_cls = ((box_cls>0.1) * (centerness[:, :, None]>0.1)).float() filter_score = box_cls #* centerness[:, :, None] print(filter_score.max(dim=1)[0]) results = [] # 对每一张图片进行处理 for i in range(N): # h*w C per_box_cls = box_cls[i] # h*w C bool per_candidate_inds = candidate_inds[i] # 1D cls per_box_cls = per_box_cls[per_candidate_inds] per_candidate_nonzeros = per_candidate_inds.nonzero() # loc ind per_box_loc = per_candidate_nonzeros[:, 0] # 类别信息 per_class = per_candidate_nonzeros[:, 1] + 1 per_box_regression = torch.pow(box_regression[i], 3) * normal_factor per_box_regression = per_box_regression[per_box_loc] per_locations = locations[per_box_loc] per_pre_nms_top_n = pre_nms_top_n[i] # 结果大1000 if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_class = per_class[top_k_indices] per_box_regression = per_box_regression[top_k_indices] per_locations = per_locations[top_k_indices] h, w = image_sizes[i] # N 5 detections = torch.stack( [ per_locations[:, 0], per_locations[:, 1], per_locations[:, 0] - per_box_regression[:, 0], #x1 per_locations[:, 1] - per_box_regression[:, 1], #y1 per_locations[:, 0] - per_box_regression[:, 2], #x2 per_locations[:, 1] - per_box_regression[:, 3], #y2 per_box_regression[:, 4], #h ], dim=1) # center_xy = (detections[:,[0,1]]+detections[:,[2,3]])/2 # wh = boxlist = RBoxList(detections, (int(w), int(h)), mode="xywha") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) # boxlist.add_field("filter_score", filter_score) # boxlist = boxlist.clip_to_image(remove_empty=False) # boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results
def rotate_boxes_xywha(self, target, angle): ''' xywha ''' # def rotate_gt_bbox(iminfo, gt_boxes, gt_classes, angle): gt_boxes = target.bbox if isinstance(target.bbox, torch.Tensor): gt_boxes = target.bbox.data.cpu().numpy() gt_labels = target.get_field("labels") rotated_gt_boxes = np.empty((len(gt_boxes), 5), dtype=np.float32) iminfo = target.size im_height = iminfo[1] im_width = iminfo[0] origin_gt_boxes = gt_boxes # anti-clockwise to clockwise arc cos_cita = np.cos(np.pi / 180 * angle) sin_cita = np.sin(np.pi / 180 * angle) # clockwise matrix rotation_matrix = np.array([[cos_cita, sin_cita], [-sin_cita, cos_cita]]) pts_ctr = origin_gt_boxes[:, 0:2] pts_ctr = pts_ctr - np.tile((im_width / 2, im_height / 2), (gt_boxes.shape[0], 1)) # print('pts_ctr:', pts_ctr.shape) pts_ctr = np.array(np.dot(pts_ctr, rotation_matrix), dtype=np.int16) # print('pts_ctr:', pts_ctr.shape) pts_ctr = np.squeeze(pts_ctr, axis=-1) + np.tile( (im_width / 2, im_height / 2), (gt_boxes.shape[0], 1)) # print('pts_ctr:', pts_ctr, np.tile((im_width / 2, im_height / 2), (gt_boxes.shape[0], 1)).shape) origin_gt_boxes[:, 0:2] = pts_ctr # print origin_gt_boxes[:, 0:2] len_of_gt = len(origin_gt_boxes) # rectificate the angle in the range of [-45, 45] for idx in range(len_of_gt): ori_angle = origin_gt_boxes[idx, 4] height = origin_gt_boxes[idx, 3] width = origin_gt_boxes[idx, 2] # step 1: normalize gt (-45,135) if width < height: ori_angle += 90 width, height = height, width # step 2: rotate (-45,495) rotated_angle = ori_angle + angle # step 3: normalize rotated_angle (-45,135) while rotated_angle > 135: rotated_angle = rotated_angle - 180 rotated_gt_boxes[idx, 0] = origin_gt_boxes[idx, 0] rotated_gt_boxes[idx, 1] = origin_gt_boxes[idx, 1] rotated_gt_boxes[idx, 3] = height * self.gt_margin rotated_gt_boxes[idx, 2] = width * self.gt_margin rotated_gt_boxes[idx, 4] = rotated_angle x_inbound = np.logical_and(rotated_gt_boxes[:, 0] >= 0, rotated_gt_boxes[:, 0] < im_width) y_inbound = np.logical_and(rotated_gt_boxes[:, 1] >= 0, rotated_gt_boxes[:, 1] < im_height) inbound = np.logical_and(x_inbound, y_inbound) inbound_th = torch.tensor(np.where(inbound)).long().view(-1) rotated_gt_boxes_th = torch.tensor(rotated_gt_boxes[inbound]).to( target.bbox.device) # print('gt_labels before:', gt_labels.size(), inbound_th.size()) gt_labels = gt_labels[inbound_th] # print('gt_labels after:', gt_labels.size()) difficulty = target.get_field("difficult") difficulty = difficulty[inbound_th] target_cpy = RBoxList(rotated_gt_boxes_th, iminfo, mode='xywha') target_cpy.add_field('difficult', difficulty) target_cpy.add_field('labels', gt_labels) # print('has word:', target.has_field("words"), target.get_field("words")) if target.has_field("words"): words = target.get_field("words")[inbound_th] target_cpy.add_field('words', words) if target.has_field("word_length"): word_length = target.get_field("word_length")[inbound_th] target_cpy.add_field('word_length', word_length) if target.has_field("masks"): seg_masks = target.get_field("masks") # print('seg_masks:', seg_masks) target_cpy.add_field( 'masks', seg_masks.rotate(torch.from_numpy(angle.astype(np.float32)), torch.tensor([im_width / 2, im_height / 2]))[inbound_th]) # print('rotated_gt_boxes_th:', origin_gt_boxes[0], target_cpy.bbox[0]) # print('rotated_gt_boxes_th:', target.bbox.size(), gt_boxes.shape) if target_cpy.bbox.size()[0] <= 0: return None return target_cpy
if (len(_scores) <= 0): continue bboxes = np.append(bboxes, _bboxes + [new_width, new_height, 0, 0, 0], axis=0) scores = np.append(scores, _scores, axis=0) labels = np.append(labels, _labels, axis=0) level = np.append(level, _level, axis=0) response_point = np.append(response_point, _response_point, axis=0) end_points = np.append(end_points, _endpoints, axis=0) target = RBoxList(bboxes, raw_img1.shape, mode="xywha") target.add_field("labels", torch.tensor(labels)) target.add_field('score', torch.tensor(scores)) target.add_field('response_point', torch.tensor(response_point)) target.add_field('end_points', torch.tensor(end_points)) target.add_field('levels', torch.tensor(level)) target.to('cuda') target = boxlist_nms(target, 0.05) # print(os.path.join("./demo/dota_result/", filename)) write_dotaResult(filename, np.array(target.bbox), np.array(target.get_field('score')), np.array(target.get_field('labels'))) det_detections_r = draw_rotate_box_cv( raw_img1,