def visualize_labeled_box(img, bboxs, pos_boxes, neg_boxes, output_path): img = img.cpu().numpy().transpose(1, 2, 0) cv2.imwrite(output_path, img) img = cv2.imread(output_path) img = Box.visualize_box(img, bboxs, color=(0, 255, 0), thickness=4) img = Box.visualize_box(img, neg_boxes, color=(0, 0, 255), thickness=2) img = Box.visualize_box(img, pos_boxes, color=(255, 0, 0), thickness=2) cv2.imwrite(output_path, img)
def visualize_result(img, results, gt_boxes, output_path): boxes = [] for result in results: res = result['bbox'] new_res = [res[0], res[1], res[0] + res[2], res[1] + res[3]] boxes.append(new_res) boxes = np.array(boxes) img = Box.visualize_box(img, gt_boxes, color=(0, 255, 0), thickness=2) img = Box.visualize_box(img, boxes, color=(255, 255, 255), thickness=2) cv2.imwrite(output_path, img)
def visualize_box(img, boxes, output_path): img = img.cpu().numpy().transpose(1, 2, 0) cv2.imwrite(output_path, img) img = cv2.imread(output_path) img = Box.visualize_box(img, boxes, color=(255, 255, 255), thickness=1) cv2.imwrite(output_path, img)
def post_processing(self, datas, results): final_results = [] for data, result in zip(datas, results): image_id = data['img_id'] ratio_h = data['raw_img_size'][0] / data['img_size'][0] ratio_w = data['raw_img_size'][1] / data['img_size'][1] for result_per_label in result: category_id = result_per_label['label'] result_per_label['bbox'][:, ( 0, 2)] = result_per_label['bbox'][:, (0, 2)] * ratio_w result_per_label['bbox'][:, ( 1, 3)] = result_per_label['bbox'][:, (1, 3)] * ratio_h for score, bbox in zip(result_per_label['score'], result_per_label['bbox']): final_results.append({ 'image_id': int(image_id), 'category_id': int(category_id), 'bbox': Box.xyxy_to_xywh(bbox.tolist()), 'score': round(score.tolist(), 3) }) return final_results
def labeling_proposals(self, proposals, gt_labels, gt_boxes): # proposals : [bs, N, 4], format : x1y1x2y2 # gt_boxes : [bs, M, 4], format : x1y1x2y2 labels = [] match_gt_boxes = [] for i, (proposal, gt_label, gt_box) in enumerate(zip(proposals, gt_labels, gt_boxes)): label = torch.empty(len(proposal), ).cuda().long().fill_(0) match_gt_box = torch.zeros(len(proposal), 4).cuda() # get iou_matrix : [N, M] iou_matrix = Box.calculate_iou_matrix(proposal, gt_box) # labeling max_ious, match_gt_idxs = torch.max(iou_matrix, dim=1) max_ious_idxs = torch.where(max_ious > self.threshold)[0] match_gt_idxs = match_gt_idxs[max_ious_idxs] # labels & coressponding gt boxes label[max_ious_idxs] = gt_label[match_gt_idxs] match_gt_box[max_ious_idxs] = gt_box[match_gt_idxs] labels.append(label) match_gt_boxes.append(match_gt_box) # labels : [bs, N] # match_gt_boxes : [bs, N, 4] return labels, match_gt_boxes
def get_top_detections(self, proposals, pred_scores, pred_deltas, images): img_size = (images[0].shape[1], images[0].shape[2]) start_idx = 0 results = [] for proposal in proposals: pred_score = pred_scores[start_idx:start_idx + len(proposal)] pred_delta = pred_deltas[start_idx:start_idx + len(proposal)] start_idx = start_idx + len(proposal) result = [] for i in range(1, self.num_labels + 1): idxs = torch.where(pred_score[:, i] > self.score_threshold)[0] pred_scores_i = pred_score[idxs, i] pred_delta_i = pred_delta[idxs, :] proposal_i = proposal[idxs] detections_i = Box.delta_to_pos(proposal_i, pred_delta_i) # valid check pred_scores_i, detections_i = Box.box_valid_check( pred_scores_i, detections_i, img_size) # nms keep = ops.nms(detections_i, pred_scores_i, self.nms_threshold) if cfg.visualize: visualize_box(self.img, detections_i[keep], './outputs/debug.jpg') result.append({ 'label': i, 'score': pred_scores_i[keep], 'bbox': detections_i[keep] }) results.append(result) return results
def preprocessing(self, img, gt_data): # resize image img, scales = Img.resize_img(img, cfg.min_size, cfg.max_size) # normalize image img = Img.normalize_img(img, cfg.pixel_mean, cfg.pixel_std) img_size = img.shape # padding image img = Img.padding_img(img, cfg.pad_unit) # recompose ground truth if self.is_train: gt_data['bboxs'] = Box.scale_box(gt_data['bboxs'], scales) gt_data['keypoints'] = Keypoint.scale_keypoint(gt_data['keypoints'], scales) return img, gt_data, img_size
def __init__(self, train='train'): # set annotation & data path cur_dir = os.path.dirname(os.path.abspath(__file__)) cur_dir = os.path.join(cur_dir, '.') if train == 'train': self.annot_path = os.path.join(cur_dir, 'dataset', 'annotations', 'person_keypoints_train2017.json') self.root = os.path.join(cur_dir, 'dataset', 'train2017') elif train == 'val': self.annot_path = os.path.join(cur_dir, 'dataset', 'annotations', 'person_keypoints_val2017.json') self.root = os.path.join(cur_dir, 'dataset', 'val2017') elif train == 'test': self.annot_path = os.path.join(cur_dir, 'dataset', 'annotations', 'image_info_test2017.json') self.root = os.path.join(cur_dir, 'dataset', 'test2017') # dataset type if train in ['train', 'val']: self.is_train = True else: self.is_train = False # load data self.db = COCO(self.annot_path) self.img_ids = self.db.getImgIds(catIds = 1) # only get person image id self.img_ids = list(sorted(self.img_ids)) self.annots = [] self.img_paths = [] self.gt_datas = [] for img_id in self.img_ids: # get annotations ann_ids = self.db.getAnnIds(imgIds=img_id) anns = self.db.loadAnns(ann_ids) # get image path path = self.db.loadImgs(img_id)[0]['file_name'] category_ids = [] bboxs = [] keypoints = [] num_keypoints = [] areas = [] iscrowds = [] for ann in anns: category_ids.append(ann['category_id']) bboxs.append(Box.xywh_to_xyxy(ann['bbox'])) keypoints.append(ann['keypoints']) num_keypoints.append(ann['num_keypoints']) areas.append(ann['area']) iscrowds.append(ann['iscrowd']) #bimask = self.db.annToMask(ann) #segmentations.append(0) self.annots.append(anns) self.img_paths.append(path) if self.is_train: self.gt_datas.append({ 'category_id' : category_ids, 'bboxs' : bboxs, 'keypoints' : Keypoint.to_array(keypoints), 'num_keypoints' : num_keypoints, 'areas' : areas, 'iscrowds' : iscrowds, 'segmentations' : None }) else: self.gt_datas.append({}) # init cv2 threads cv2.setNumThreads(0) print(len(self.img_ids))
def visualize_input_image(img, gt_boxes, output_path): img = Box.visualize_box(img, gt_boxes, color=(0, 255, 0), thickness=2) cv2.imwrite(output_path, img)
def loss(self, proposals, pred_scores, pred_deltas, proposal_labels, match_gt_boxes, num_features): start_idx = 0 cls_loss = 0.0 loc_loss = 0.0 batch_size = len(proposal_labels) for proposal, gt_label, gt_box, interval in zip( proposals, proposal_labels, match_gt_boxes, num_features): pred_score = pred_scores[start_idx:start_idx + interval] pred_delta = pred_deltas[start_idx:start_idx + interval] start_idx = start_idx + interval # score matching idxs = torch.where(gt_label >= 0)[0] pos_score = pred_score[idxs] pos_label = gt_label[idxs] # score loss function cls_loss = cls_loss + F.cross_entropy( pos_score, pos_label.long(), reduction="sum") # deltas matching idxs = torch.where(gt_label > 0)[0] pos_proposal = proposal[idxs] # proposal pos_gt_boxes = gt_box[idxs] # gt boxes gt_deltas = Box.pos_to_delta(pos_gt_boxes, pos_proposal) # target pos_deltas = pred_delta[idxs] # pred # delta loss function loc_loss = loc_loss + smooth_l1_loss( pos_deltas, gt_deltas, beta=cfg.smooth_l1_beta) # normalizer cls_loss = cls_loss / (batch_size * self.num_sample) loc_loss = loc_loss / (batch_size * self.num_sample) if cfg.visualize & self.training: # for visualize - trianing d_pred_scores = pred_scores[:num_features[0]] d_pred_deltas = pred_deltas[:num_features[0]] idxs = torch.where(proposal_labels[0] == 1)[0] pos_proposals = proposals[0][idxs] pos_deltas = d_pred_deltas[idxs] d_match_gt_boxes = match_gt_boxes[0][idxs] pos_proposal = Box.delta_to_pos(pos_proposals, pos_deltas) visualize_labeled_box(self.img, d_match_gt_boxes, pos_proposal, pos_proposal, './outputs/debug_proposal_image.jpg') scores, idx = d_pred_scores[:, 1].sort(descending=True) scores, topk_idx = scores[:30], idx[:30] d_delta = d_pred_deltas[topk_idx] d_proposals = proposals[0][topk_idx] pos_proposal = Box.delta_to_pos(d_proposals, d_delta) visualize_labeled_box(self.img, d_match_gt_boxes, pos_proposal, pos_proposal, './outputs/debug_final_image.jpg') return cls_loss, loc_loss