def get_ratio(self, boxlist, is_train): """ boxlist: [Bbox, Bbox, ...] """ """ for those without keypoints: global, not partition """ return_boxlist = [] device = boxlist[0].bbox.device for target in boxlist: target_bbox = target.bbox keypoint = target.get_field("keypoints") kp = keypoint.keypoints n, _, _ = kp.shape bbox = target.bbox img_size = target.size new_bbox = [] new_pad = [] for k in range(n): p_kp = kp[k] pad = 1.0 if is_train else 0. if p_kp.sum().item() > 0: pad = 0. for iteration, i in enumerate(self._idx[::-1][:-1]): # assume thorax exists vis = False store_y = None for j in i: if p_kp[j][2] > self.INVIS_THRSH: vis = True if vis: store_y = max(p_kp[i[0]][1], p_kp[i[1]][1]) break if not vis: # hips, knees, ankles not visible pad += sum(self._pratio[2:]) res = F.relu(target_bbox[k, 3]-p_kp[self.thrx_idx, 1]) known = F.relu(p_kp[self.thrx_idx, 1]-target_bbox[k, 1]) tmp = F.relu((self.r_thrx2hip/self.r_head2thrx)*known-res) # pixel pad += (self.r_thrx2hip*tmp/(tmp+res)).item() if p_kp[self.thrx_idx, 1].item() == 0: pad = 1.0 elif iteration == 0: pad = 0. else: pad += sum(self._pratio[::-1][:iteration]) res = F.relu(target_bbox[k, 3]-store_y) known = F.relu(p_kp[self.thrx_idx, 1]-target_bbox[k, 1]) tmp = F.relu((self._pratio[::-1][iteration]/self.r_head2thrx)*known-res) pad += (self._pratio[::-1][iteration]*tmp/(tmp+res)).item() if p_kp[self.thrx_idx, 1].item() == 0: pad = 1.0 p_bbox = 1.*bbox[k, :] h = p_bbox[3] - p_bbox[1] if pad == 1.0: new_h = h if not is_train: pad = 0. p_bbox[3] = p_bbox[1] + new_h new_bbox.append(p_bbox.tolist()) new_pad.append(pad) else: if not is_train: curr_aug_per = 1 else: curr_aug_per = self.aug_per + 1 p_bbox_repeat = p_bbox.repeat(curr_aug_per, 1) for ap in range(curr_aug_per): if ap == 0: random_cut = 0. else: random_cut = self.rand_cut*random.random() # 0-0.3 update_pad = pad + (1.-pad)*random_cut p_bbox_repeat_ = p_bbox_repeat[ap] new_h = h*(1./(1.-update_pad)) p_bbox_repeat_[3] = p_bbox_repeat_[1] + new_h new_bbox.append(p_bbox_repeat_.tolist()) new_pad.append(update_pad) new_bboxlist = BoxList(new_bbox, img_size, mode="xyxy") new_bboxlist._copy_extra_fields(target) new_bboxlist.add_field("pad_ratio", torch.tensor(new_pad)) return_boxlist.append(new_bboxlist) return_boxlist = [return_box.to(device) for return_box in return_boxlist] return return_boxlist
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W if self.onnx_export: from torch.onnx import operators num_anchors = operators.shape_as_tensor(objectness)[1].unsqueeze(0) pre_nms_top_n = torch.min( torch.cat((torch.tensor([self.pre_nms_top_n], dtype=torch.long), num_anchors), 0)) else: pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] if self.onnx_export: # NOTE: for now only batch == 1 is supported for ONNX export. assert topk_idx.size(0) == 1 topk_idx = topk_idx.squeeze(0) box_regression = box_regression.index_select(1, topk_idx) else: box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) if self.onnx_export: concat_anchors = concat_anchors.reshape(N, -1, 4).index_select( 1, topk_idx) else: concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size, self.onnx_export) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def convert_kitti_instance_only(root, ann_file, out_dir, dataset): image_index, label_list, boxes_list, boxes_3d_list, \ alphas_list = get_pkl_element(ann_file) number_image = len(image_index) image_lists = [] calib_lists = [] depth_list = [] for i in range(number_image): image_lists.append(root + '/training' + '/image_2/' + image_index[i] + ".png") calib_lists.append(root + '/training' + '/calib/' + image_index[i] + ".txt") depth_list.append(root + '/training' + '/depth/' + image_index[i] + "_01.png.npz") # img_id = 0 # ann_id = 0 img_id = 3712 ann_id = 11855 # cat_id = 1 category_dict = {'car': 1} category_instancesonly = [ 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle', ] ann_dict = {} images = [] annotations = [] for i, id in image_index.items(): if len(images) % 50 == 0: print("Processed %s images, %s annotations" % (len(images), len(annotations))) image = {} image['id'] = img_id img_id += 1 img = Image.open(image_lists[i]).convert("RGB") width, height = img.size image['width'] = width image['height'] = height image['file_name'] = image_lists[i].split('/')[-1] image['seg_file_name'] = image['file_name'] images.append(image) num_instances = label_list[i].shape[0] boxes = boxes_list[i] boxes = torch.as_tensor(boxes).reshape(-1, 4) box2d = BoxList(boxes, img.size, mode="xyxy") area = box2d.area().tolist() boxes = box2d.convert('xywh') boxes = boxes.bbox.tolist() for j in range(num_instances): ann = {} ann['id'] = ann_id ann_id += 1 ann['image_id'] = image['id'] ann['segmentation'] = [] ann['category_id'] = category_dict['car'] ann['iscrowd'] = 0 ann['area'] = area[j] ann['bbox'] = boxes[j] annotations.append(ann) ann_dict['images'] = images categories = [{ "id": category_dict[name], "name": name } for name in category_dict] ann_dict['categories'] = categories ann_dict['annotations'] = annotations print("Num categories: %s" % len(categories)) print("Num images: %s" % len(images)) print("Num annotations: %s" % len(annotations)) with open( os.path.join(out_dir, 'instancesonly_filtered_gtFine_' + dataset + '.json'), 'w') as outfile: outfile.write(json.dumps(ann_dict))
def forward(self, x, rel_pair_idxs, boxes): """ Arguments: x (tuple[tensor, tensor]): x contains the relation logits and finetuned object logits from the relation model. rel_pair_idxs (list[tensor]): subject and object indice of each relation, the size of tensor is (num_rel, 2) boxes (list[BoxList]): bounding boxes that are used as reference, one for ech image Returns: results (list[BoxList]): one BoxList for each image, containing the extra fields labels and scores """ relation_logits, refine_logits = x if self.attribute_on: if isinstance(refine_logits[0], (list, tuple)): finetune_obj_logits, finetune_att_logits = refine_logits else: # just use attribute feature, do not actually predict attribute self.attribute_on = False finetune_obj_logits = refine_logits else: finetune_obj_logits = refine_logits results = [] for i, (rel_logit, obj_logit, rel_pair_idx, box) in enumerate(zip( relation_logits, finetune_obj_logits, rel_pair_idxs, boxes )): if self.attribute_on: att_logit = finetune_att_logits[i] att_prob = torch.sigmoid(att_logit) obj_class_prob = F.softmax(obj_logit, -1) obj_class_prob[:, 0] = 0 # set background score to 0 num_obj_bbox = obj_class_prob.shape[0] num_obj_class = obj_class_prob.shape[1] if self.use_gt_box: obj_scores, obj_pred = obj_class_prob[:, 1:].max(dim=1) obj_pred = obj_pred + 1 else: # NOTE: by kaihua, apply late nms for object prediction obj_pred = obj_prediction_nms(box.get_field('boxes_per_cls'), obj_logit, self.later_nms_pred_thres) obj_score_ind = torch.arange(num_obj_bbox, device=obj_logit.device) * num_obj_class + obj_pred obj_scores = obj_class_prob.view(-1)[obj_score_ind] assert obj_scores.shape[0] == num_obj_bbox obj_class = obj_pred if self.use_gt_box: boxlist = box else: # mode==sgdet # apply regression based on finetuned object class device = obj_class.device batch_size = obj_class.shape[0] regressed_box_idxs = obj_class boxlist = BoxList(box.get_field('boxes_per_cls')[torch.arange(batch_size, device=device), regressed_box_idxs], box.size, 'xyxy') boxlist.add_field('pred_labels', obj_class) # (#obj, ) boxlist.add_field('pred_scores', obj_scores) # (#obj, ) if self.attribute_on: boxlist.add_field('pred_attributes', att_prob) # sorting triples according to score production obj_scores0 = obj_scores[rel_pair_idx[:, 0]] obj_scores1 = obj_scores[rel_pair_idx[:, 1]] rel_class_prob = F.softmax(rel_logit, -1) rel_scores, rel_class = rel_class_prob[:, 1:].max(dim=1) rel_class = rel_class + 1 # TODO Kaihua: how about using weighted some here? e.g. rel*1 + obj *0.8 + obj*0.8 triple_scores = rel_scores * obj_scores0 * obj_scores1 _, sorting_idx = torch.sort(triple_scores.view(-1), dim=0, descending=True) rel_pair_idx = rel_pair_idx[sorting_idx] rel_class_prob = rel_class_prob[sorting_idx] rel_labels = rel_class[sorting_idx] boxlist.add_field('rel_pair_idxs', rel_pair_idx) # (#rel, 2) boxlist.add_field('pred_rel_scores', rel_class_prob) # (#rel, #rel_class) boxlist.add_field('pred_rel_labels', rel_labels) # (#rel, ) # should have fields : rel_pair_idxs, pred_rel_class_prob, pred_rel_labels, pred_labels, pred_scores # Note # TODO Kaihua: add a new type of element, which can have different length with boxlist (similar to field, except that once # the boxlist has such an element, the slicing operation should be forbidden.) # it is not safe to add fields about relation into boxlist! results.append(boxlist) return results
def do_train( model, model_ema, data_loader, optimizer, scheduler, checkpointer, device, local_rank, checkpoint_period, cfg_arg, arguments, ): logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") meters_ema = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] ema_decay = arguments["ema_decay"] loss_semi = arguments['loss_semi'] temporal_save_path = cfg_arg["temporal_save_path"] model.train() model_ema.train() box_coder = BoxCoder(weights=(10., 10., 5., 5.)) temporal_ens = {} start_training_time = time.time() end = time.time() labeled_database = arguments["HYPER_PARAMETERS"]['LABELED_DATABASE'] temporal_supervised_losses = [] for iteration, (images, targets_with_trans_info, idx) in enumerate(data_loader, start_iter): targets = [_iter[0] for _iter in targets_with_trans_info] trans_info = [_iter[1] for _iter in targets_with_trans_info] try: db_idx, img_idx, idx_name, bboxes_batch = map_to_img( data_loader, idx) temporal_ens_bboxes = [ ensemble_bboxes(_boxes, _im_sz, arguments["ANCHOR_STRIDES"], arguments["HYPER_PARAMETERS"]['ENS_THRE'], device) for _boxes, _im_sz in zip(bboxes_batch, images.image_sizes) ] img_size = [(_sz[1], _sz[0]) for _sz in images.image_sizes] pred_trans_info = copy.deepcopy(trans_info) temporal_ens_pred = [] for i, _sz in enumerate(img_size): pred_trans_info[i][1] = _sz temporal_ens_per = [ trans_reverse(_temporal_ens, pred_trans_info[i]).to(device) for _temporal_ens in temporal_ens_bboxes[i] ] temporal_ens_pred.append(temporal_ens_per) db_w = [] for i, _db in enumerate(db_idx): if _db not in labeled_database: _bbox = BoxList( torch.zeros([1, 4]), (images.image_sizes[i][1], images.image_sizes[i][0]), mode="xyxy") _bbox.add_field('labels', torch.ones([1])) targets[i] = _bbox db_w.append(0.) else: db_w.append(1.) if any(len(target) < 1 for target in targets): logger.error( f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" ) continue data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration images = images.to(device) targets = [target.to(device) for target in targets] update_ema_variables(model, model_ema, ema_decay, iteration) _loss_dict, result = model(images, targets) #---------------------loss masked by with torch.no_grad(): _loss_dict_ema, result_ema = model_ema(images, targets) is_labeled_db_weight = torch.tensor( db_w, dtype=torch.float32).to(device) loss_dict = {} loss_dict_ema = {} for _key in _loss_dict.keys(): loss_dict[_key] = torch.sum( torch.stack(_loss_dict[_key], dim=0) * is_labeled_db_weight) loss_dict_ema[_key] = torch.sum( torch.stack(_loss_dict_ema[_key], dim=0) * is_labeled_db_weight) # loss_dict = _loss_dict # loss_dict_ema = _loss_dict_ema #result_origin = [trans_reverse(_res,_info) for _res,_info in zip(result_ema,trans_info)] #result_origin = predict_collect_postprocess(arguments['postprocess'],result_ema,trans_info) result_origin = predict_retina_postprocess( arguments['postprocess'], box_coder, result_ema, trans_info, images.image_sizes) # any_zeros = [_iter.bbox.shape[0] == 0 for _iter in temporal_ens_pred] # if any(any_zeros): # loss_dict['semi_box_reg'] = torch.tensor(0,dtype=torch.float32,device=device) # loss_dict['semi_cls'] = torch.tensor(0,dtype=torch.float32,device=device) # else: # semi_loss = loss_semi( # result, temporal_ens_pred) # for _key in semi_loss.keys(): # loss_dict[_key] = torch.sum(torch.stack(semi_loss[_key],dim=0) * (1 - db_weight)) * arguments["semi_weight"] #balance losses with torch.no_grad(): supversed_loss = (loss_dict['loss_retina_cls'] + loss_dict['loss_retina_reg']) / ( np.sum(db_w) + 0.1) temporal_supervised_losses.append(supversed_loss) temporal_supervised_losses = temporal_supervised_losses[-100:] sup_loss = torch.stack(temporal_supervised_losses).mean() meters.update(sup_loss=sup_loss) if get_world_size() > 1: torch.distributed.all_reduce( torch.stack(temporal_supervised_losses).mean(), op=torch.distributed.ReduceOp.SUM) balance_weight = min(1. / (sup_loss / 0.28)**12, 1.) semi_loss = semi_loss_fn( result, result_ema, temporal_ens_pred, images.image_sizes, box_coder, n_cls=arguments["HYPER_PARAMETERS"]['NCLS'], reg_cons_w=arguments["HYPER_PARAMETERS"]['REG_CONSIST_WEIGHT']) semi_loss_weight = semi_weight_by_epoch( iteration, start_iter=arguments["HYPER_PARAMETERS"]['EPOCH_BATCH_NUM'] * arguments["HYPER_PARAMETERS"]['START_ITER'], rampup_length=arguments["HYPER_PARAMETERS"]['EPOCH_BATCH_NUM'] * arguments["HYPER_PARAMETERS"]['RAMPUP_LENGTH'], consistence_weight=arguments["HYPER_PARAMETERS"] ['CONSISTENCE_WEIGHT'], consistence_trunc=arguments["HYPER_PARAMETERS"] ['MAX_CONSISTENT_LOSS']) #semi_weight_by_epoch(iteration) for _key in semi_loss.keys(): #loss_dict[_key] = torch.sum(semi_loss[_key] * (1 - is_labeled_db_weight))*semi_loss_weight*balance_weight # not used labeled loss_dict[_key] = torch.sum(semi_loss[_key]) * semi_loss_weight for i, (_id, _labeled) in enumerate(zip(idx_name, db_w)): # if _labeled == 1: # continue result_dict = { 'iteration': iteration, 'result': result_origin[i] } if _id in temporal_ens.keys(): temporal_ens[_id].append(result_dict) else: temporal_ens[_id] = [result_dict] #print('id={},{},scores={}----------{}'.format(idx_name[0],idx_name[1],result_origin[0].get_field('objectness')[:5],result_origin[1].get_field('objectness')[:5])) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) loss_dict_reduced_ema = reduce_loss_dict(loss_dict_ema) losses_reduced_ema = sum( loss for loss in loss_dict_reduced_ema.values()) meters_ema.update(loss=losses_reduced_ema, **loss_dict_reduced_ema) optimizer.zero_grad() # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe with amp.scale_loss(losses, optimizer) as scaled_losses: scaled_losses.backward() if not iteration < arguments["HYPER_PARAMETERS"][ 'EPOCH_BATCH_NUM'] * arguments["HYPER_PARAMETERS"][ 'START_ITER']: optimizer.step() #scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "{meters_ema}", "lr: {lr:.6f}", "semi_w:{semi_w:2.3f}", "supervised loss{sup_loss:2.3f}," "balance_weight{balance_weight:2.3f}," "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), meters_ema=str(meters_ema), lr=optimizer.param_groups[0]["lr"], semi_w=semi_loss_weight, sup_loss=sup_loss, balance_weight=balance_weight, memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if (iteration - 50) % 100 == 0: for _key in temporal_ens.keys(): for _iter in temporal_ens[_key]: str_folder = os.path.join( temporal_save_path, _key) #"{}/{}".format(temporal_save_path,_key) str_file = '{}/{}_loc{}_iter_x{:07d}.pt'.format( str_folder, _key, local_rank, _iter['iteration']) if not os.path.exists(str_folder): os.makedirs(str_folder) torch.save(_iter['result'], str_file) del _iter['result'] del temporal_ens temporal_ens = {} if iteration % checkpoint_period == 0: save_time = time.time() checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) except Exception as e: print('error in file ', idx_name, img_idx) raise e total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def filter_results(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) boxes_per_cls = boxlist.bbox.reshape(-1, num_classes, 4) scores = boxlist.get_field("pred_scores").reshape(-1, num_classes) device = scores.device result = [] orig_inds = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("pred_scores", scores_j) boxlist_for_class, keep = boxlist_nms( boxlist_for_class, self.nms, max_proposals=self.post_nms_per_cls_topn, score_field='pred_scores') inds = inds[keep] num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "pred_labels", torch.full((num_labels, ), j, dtype=torch.int64, device=device)) result.append(boxlist_for_class) orig_inds.append(inds) #NOTE: kaihua, according to Neural-MOTIFS (and my experiments, we need remove duplicate bbox) if self.nms_filter_duplicates or self.save_proposals: assert len(orig_inds) == (num_classes - 1) # set all bg to zero inds_all[:, 0] = 0 for j in range(1, num_classes): inds_all[:, j] = 0 orig_idx = orig_inds[j - 1] inds_all[orig_idx, j] = 1 dist_scores = scores * inds_all.float() scores_pre, labels_pre = dist_scores.max(1) final_inds = scores_pre.nonzero() assert final_inds.dim() != 0 final_inds = final_inds.squeeze(1) scores_pre = scores_pre[final_inds] labels_pre = labels_pre[final_inds] result = BoxList(boxes_per_cls[final_inds, labels_pre], boxlist.size, mode="xyxy") result.add_field("pred_scores", scores_pre) result.add_field("pred_labels", labels_pre) orig_inds = final_inds else: result = cat_boxlist(result) orig_inds = torch.cat(orig_inds, dim=0) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("pred_scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] orig_inds = orig_inds[keep] return result, orig_inds, boxes_per_cls[orig_inds]
def main(): result_file = 'model_path.pth' input_img_folder = './input_image_folder' output_folder = './folder' config_file = './retinanet_R-50-FPN_1x_coco_unlabeled.yaml' result_predict = torch.load(result_file) jpg_output = './output_jpg' cfg.merge_from_file(config_file) data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=False)[0] score_thr = 0.43 secore_sel = 0.3 Zeros_obj = 0 hist_means = [] hist_ious = [] Scores_minor = 0 ious_hard = 0 sel_file_id = [] for _id, _bbox in enumerate(result_predict): print(_id) img_info = data_loaders_val.dataset.get_img_info(_id) sel_num = (_bbox.get_field('scores') > secore_sel).sum() img_src = os.path.join(output_folder, img_info['file_name']) # if img_src.find('000000335584')<0: # continue if sel_num < 1: Zeros_obj += 1 # if not os.path.exists(img_src): # continue #shutil.copy(img_src,jpg_output) continue #caculate means sel_scores = _bbox.get_field('scores')[ _bbox.get_field('scores') > secore_sel] mean_scores = sel_scores.mean().numpy() * 100 hist_means.append(mean_scores) if mean_scores < (score_thr * 100): Scores_minor += 1 # if not os.path.exists(img_src): # continue # shutil.copy(img_src,jpg_output) continue #caculate ious ind_sel = _bbox.get_field('scores') > secore_sel box_sel = BoxList(_bbox.bbox[ind_sel], _bbox.size) ious = boxlist_iou(box_sel, box_sel) - torch.eye(len(box_sel)) ious_scores = ious.mean() * 1000 hist_ious.append(ious_scores) if ious_scores > 150: ious_hard += 1 # if not os.path.exists(img_src): # continue # shutil.copy(img_src,jpg_output) continue sel_file_id.append(img_info['file_name']) plt.hist(hist_ious, bins=5) plt.gca().set(title='Frequency Histogram of Diamond Depths', ylabel='Frequency') plt.savefig('./test2.jpg') np.save('sel_unlabeled_ids_r101.npy', sel_file_id) print('zeros object = ', Zeros_obj, 'Scores_minor', Scores_minor, 'ious_hard', ious_hard, 'total sample', len(result_predict), 'select sample', len(sel_file_id))
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape num_anchors = A * H * W # If inputs are on GPU, use a faster path use_fast_cuda_path = (objectness.is_cuda and box_regression.is_cuda) # Encompasses box decode, clip_to_image and remove_small_boxes calls if use_fast_cuda_path: objectness = objectness.reshape(N, -1) # Now [N, AHW] objectness = objectness.sigmoid() pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # Get all image shapes, and cat them together image_shapes = [box.size for box in anchors] image_shapes_cat = torch.tensor([box.size for box in anchors], device=objectness.device).float() # Get a single tensor for all anchors concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) # Note: Take all anchors, we'll index accordingly inside the kernel # only take the anchors corresponding to the topk boxes concat_anchors = concat_anchors.reshape(N, -1, 4) # [batch_idx, topk_idx] # Return pre-nms boxes, associated scores and keep flag # Encompasses: # 1. Box decode # 2. Box clipping # 3. Box filtering # At the end we need to keep only the proposals & scores flagged # Note: topk_idx, objectness are sorted => proposals, objectness, keep are also # sorted -- this is important later proposals, objectness, keep = C.GeneratePreNMSUprightBoxes( N, A, H, W, topk_idx, objectness.float( ), # Need to cast these as kernel doesn't support fp16 box_regression.float(), concat_anchors, image_shapes_cat, pre_nms_top_n, self.min_size, self.box_coder.bbox_xform_clip, True) # view as [N, pre_nms_top_n, 4] proposals = proposals.view(N, -1, 4) objectness = objectness.view(N, -1) else: # reverse the reshape from before ready for permutation objectness = objectness.reshape(N, A, H, W) objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1) objectness = objectness.sigmoid() pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # put in the same format as anchors box_regression = box_regression.view(N, -1, 4, H, W).permute(0, 3, 4, 1, 2) box_regression = box_regression.reshape(N, -1, 4) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) # handle non-fast path without changing the loop if not use_fast_cuda_path: keep = [None for _ in range(N)] result = [] for proposal, score, im_shape, k in zip(proposals, objectness, image_shapes, keep): if use_fast_cuda_path: # Note: Want k to be applied per-image instead of all-at-once in batched code earlier # clip_to_image and remove_small_boxes already done in single kernel p = proposal.masked_select(k[:, None]).view(-1, 4) score = score.masked_select(k) boxlist = BoxList(p, im_shape, mode="xyxy") else: boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist.add_field("objectness", score) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def forward_for_single_feature_map(self, anchors, box_cls, box_regression, pre_nms_thresh): """ Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ device = box_cls.device N, _, H, W = box_cls.shape A = int(box_regression.size(1) / 4) C = int(box_cls.size(1) / A) # put in the same format as anchors box_cls = box_cls.view(N, -1, C, H, W).permute(0, 3, 4, 1, 2) box_cls = box_cls.reshape(N, -1, C) box_cls = box_cls.sigmoid() box_regression = box_regression.view(N, -1, 4, H, W) box_regression = box_regression.permute(0, 3, 4, 1, 2) box_regression = box_regression.reshape(N, -1, 4) num_anchors = A * H * W results = [[] for _ in range(N)] candidate_inds = box_cls > pre_nms_thresh if candidate_inds.sum().item() == 0: empty_boxlists = [] for a in anchors: empty_boxlist = BoxList(torch.Tensor(0, 4).to(device), a.size) empty_boxlist.add_field("labels", torch.LongTensor([]).to(device)) empty_boxlist.add_field("scores", torch.Tensor([]).to(device)) empty_boxlists.append(empty_boxlist) return empty_boxlists pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) for batch_idx, (per_box_cls, per_box_regression, per_pre_nms_top_n, \ per_candidate_inds, per_anchors) in enumerate(zip( box_cls, box_regression, pre_nms_top_n, candidate_inds, anchors)): # Sort and select TopN per_box_cls = per_box_cls[per_candidate_inds] per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_candidate_nonzeros = \ per_candidate_inds.nonzero()[top_k_indices, :] per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] per_class += 1 detections = self.box_coder.decode( per_box_regression[per_box_loc, :].view(-1, 4), per_anchors.bbox[per_box_loc, :].view(-1, 4)) boxlist = BoxList(detections, per_anchors.size, mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results[batch_idx] = boxlist return results
def __getitem__(self, k): im_ori_RGB = Image.open(self.img_files[k]).convert('RGB') # im_ori_RGB.size: (W, H with open(self.pickle_files[k], 'rb') as filehandle: data = pickle.load(filehandle) bboxes = data['bboxes'].astype(np.float32) # [xywh] assert len(bboxes.shape)==2 and bboxes.shape[1]==4 num_bboxes_ori = bboxes.shape[0] if 'label' in data: labels = data['label'] # ['car', 'person', 'person'] else: labels = ['person'] * num_bboxes_ori # bboxes = np.load(self.bbox_npy_files[k]).astype(np.float32) # [xywh] if bboxes.shape[0] > self.cfg.DATA.COCO.GOOD_NUM: bboxes = bboxes[:self.cfg.DATA.COCO.GOOD_NUM, :] labels = labels[:self.cfg.DATA.COCO.GOOD_NUM] target_boxes = torch.as_tensor(bboxes).reshape(-1, 4) # guard against no boxes target = BoxList(target_boxes, im_ori_RGB.size, mode="xywh").convert("xyxy") num_boxes = target.bbox.shape[0] if self.opt.est_kps: if 'kps' in data: kps_gt = data['kps'].astype(int) # [N, 51] if num_bboxes_ori > self.cfg.DATA.COCO.GOOD_NUM: kps_gt = kps_gt[:self.cfg.DATA.COCO.GOOD_NUM, :] kps_gt = kps_gt.tolist() # [[51]] else: kps_gt = [[0]*51 for i in range(num_boxes)] target_keypoints = PersonKeypoints(kps_gt, im_ori_RGB.size) # kps_sum = torch.sum(torch.sum(target_keypoints.keypoints[:, :, :2], 1), 1) # kps_mask = kps_sum != 0. # print(target_keypoints.keypoints.shape, kps_sum, kps_mask) target.add_field("keypoints", target_keypoints) # target.add_field("keypoints_mask", kps_mask) target = target.clip_to_image(remove_empty=True) classes = [1] * num_boxes # !!!!! all person (1) for now... classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) scores = torch.tensor([1.] * target.bbox.shape[0]) target.add_field("scores", scores) W, H = im_ori_RGB.size[:2] if self.train: yannick_results = loadmat(self.yannick_mat_files[k]) horizon_visible = yannick_results['horizon_visible'][0][0].astype(np.float32) assert horizon_visible == 1 horizon = yannick_results['pitch'][0][0].astype(np.float32) horizon_pixels_yannick = H * horizon v0 = H - horizon_pixels_yannick vfov = yannick_results['vfov'][0][0].astype(np.float32) f_pixels_yannick = H/2./(np.tan(vfov/2.)) else: f_pixels_yannick = -1 v0 = -1 im_yannickTransform = self.transforms_yannick(im_ori_RGB) # [0., 1.] by default im_maskrcnnTransform, target_maskrcnnTransform = self.transforms_maskrcnn(im_ori_RGB, target) # [0., 1.] by default # print('---', im.size(), np.asarray(im).shape) # im_array = np.asarray(im) # if len(im_array.shape)==2: # im_array = np.stack((im_array,)*3, axis=-1) # # print(im_array.shape) # x = torch.from_numpy(im_array.transpose((2,0,1))) if self.train and self.opt.est_kps: target_maskrcnnTransform.add_field("keypoints_ori", target_keypoints) target_maskrcnnTransform.add_field("boxlist_ori", target) target_maskrcnnTransform.add_field('img_files', [self.img_files[k]] * num_boxes) if self.train: y_person = 1.75 bbox_good_list = bboxes vc = H / 2. inv_f2_yannick = 1./ (f_pixels_yannick * f_pixels_yannick) yc_list = [] for bbox in bbox_good_list: vt = H - bbox[1] vb = H - (bbox[1] + bbox[3]) # v0_single = yc * (vt - vb) / y_person + vb yc_single = y_person * (v0 - vb) / (vt - vb) / (1. + (vc - v0) * (vc - vt) / f_pixels_yannick**2) yc_list.append(yc_single) yc_estCam = np.median(np.asarray(yc_list)) else: yc_estCam = -1 assert len(labels)==bboxes.shape[0] # im_ori_BGR_array = np.array(im_ori_RGB.copy())[:,:,::-1] return im_yannickTransform, im_maskrcnnTransform, W, H, \ float(yc_estCam), \ self.pad_bbox(bboxes, self.GOOD_NUM).astype(np.float32), bboxes.shape[0], float(v0), float(f_pixels_yannick), \ os.path.basename(self.img_files[k])[:12], self.img_files[k], target_maskrcnnTransform, labels
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList], [image1-si-boxlist, image2-si-boxlist, ...] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W 返回值是一个 list, len(result)=batch_size, 每个元素都是一个 BoxList 对象 """ device = objectness.device N, A, H, W = objectness.shape # objectness的shape是[N,A,H,W], 现在要把每个A*H*W的特征图拉成一个向量, 如果直接进行 # reshape操作, 展开的顺序是从A那一维开始的, 所以先交换维度再reshape, 先把H*W的特征图 # 拉成一个向量, 再把所有特征图拼接起来 objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) # rpn 中要进行的是不关心类别的二分类任务(object/bg) # [N, H*W*A] objectness = objectness.sigmoid() # [N, H*W*A, 4] box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W # 根据置信度选出前 k 个 anchors, k = pre_nms_top_n pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # box_regression 中同样保留 topk 的anchors batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] # boxList.bbox 返回对象中的 tensor, 将 batch 中所有图片的 anchors 拼接起来 # boxList.bbox 是个二维的 tensor, 参考 anchor_generator.grid_anchors concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) # reshape 之后: [N, H*W*A, 4], 然后选出 topk concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode( box_regression.view(-1, 4), concat_anchors.view(-1, 4) ) proposals = proposals.view(N, -1, 4) result = [] # 分别处理 batch 中的每一张图片 for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) # 将超出图片边界的 anchors 进行裁剪 boxlist = boxlist.clip_to_image(remove_empty=False) # 将宽度或高度小于 min_size 的 anchors 移除 boxlist = remove_small_boxes(boxlist, self.min_size) # nms boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def __getitem__(self, idx): imgid = self.imgids[idx] img = self.dotadev.loadImgs(imgid)[0] anns = self.dotadev.loadAnns(imgId=imgid) boxes = [obj["poly"] for obj in anns] boxes = torch.tensor(boxes) rc_boxes = _dots8ToRec4_(boxes) for i, bx in enumerate(boxes): arg_x = torch.argsort(bx, dim=0) x_min1 = bx[arg_x[0, 0]] x_min2 = bx[arg_x[1, 0]] x_max1 = bx[arg_x[2, 0]] x_max2 = bx[arg_x[3, 0]] if x_min2[0] == x_max2[0]: if x_min2[1] < x_max2[1]: x_min2 = boxes[arg_x[2, 0]] x_max2 = boxes[arg_x[3, 0]] # change the obb point to regular order. x_min = torch.cat([x_min1[None], x_min2[None]], dim=0) arg_y = torch.argsort(x_min, dim=0) point_1 = x_min[arg_y[0, 1]] point_4 = x_min[arg_y[1, 1]] if point_1[1] == point_4[1]: if point_1[0] < point_4[0]: point_4 = x_min[arg_y[0, 1]] point_1 = x_min[arg_y[1, 1]] x_max = torch.cat([x_max1[None], x_max2[None]], dim=0) arg_y = torch.argsort(x_max, dim=0) point_2 = x_max[arg_y[0, 1]] point_3 = x_max[arg_y[1, 1]] if point_2[1] == point_3[1]: if point_2[0] < point_3[0]: point_3 = x_max[arg_y[0, 1]] point_2 = x_max[arg_y[1, 1]] bx = torch.cat( [point_1[None], point_2[None], point_3[None], point_4[None]], dim=0) boxes[i] = bx boxes = boxes.view(boxes.size(0), -1) # areas = [obj["area"] for obj in anns] areas = torch.tensor(areas) target = BoxList(rc_boxes, img.size) rc_target = bb2(boxes, img.size) target.add_field('poly_bbox', rc_target) target.add_field('areas', areas) difficult = [int(obj["difficult"]) for obj in anns] difficult = torch.tensor(difficult) target.add_field('difficult', difficult) classes = [obj["name"] for obj in anns] classes = [self.classes_keys.index(c) for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) target = target.clip_to_image(remove_empty=True) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W 得到N=图片数(batch),A=ratio数,H=该层特征图高,W=该层特征图宽 box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors # 在得到的目标特征图上扩充一维,该维度为特定特征图的某一个位置上anchor内是否有目标 # 然后取消掉除FPN层数以外的所有维度,合并到一个维度上,将图片数,高,宽等信息压缩为一维 objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) # 输出N张图,-1个待回归框,每个框需要1个得分值 objectness = objectness.sigmoid() # 在得到的目标特征图上扩充一维,该维度为特定特征图的某一个位置上anchor的边框信息。 box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) # 输出N张图,-1个待回归框,每个框需要4个回归值 num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) # 得到在训练过程中设置的每张图片选取的anchor数(在每个特征图上) # 得到前pre_nms_top_n个目标评分最高的anchor的目标评分以及该anchor在anchor列表中的索引 objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) # 初始化图片个数的索引 batch_idx = torch.arange(N, device=device)[:, None] # 得到前pre_nms_top_n个目标评分最高的anchor的边框回归信息!!! box_regression = box_regression[batch_idx, topk_idx] # 获取图片尺寸信息 image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) # 得到pre_nms_top_n个目标评分最高的anchor信息!!! concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] # 利用anchor坐标和回归信息,得到proposal边框 proposals = self.box_coder.decode( box_regression.view(-1, 4), concat_anchors.view(-1, 4) # rpn输出的是'xyxy'格式的 ) # 用实际xyxy坐标和回归值就能得到新的检测框 proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): # 将预测边框保存到BoxList, # 为每一个FPN层的每一张图的所有候选框建立一个BoxList boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) # 将每个anchor的目标评分保存到BoxList boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result # rpn输出的是'xyxy'格式的
def forward_for_single_feature_map( self, locations, box_cls, box_regression, centerness, image_sizes): """ Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ N, C, H, W = box_cls.shape # put in the same format as locations box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1) box_cls = box_cls.reshape(N, -1, self.num_classes - 1).sigmoid() box_regression = box_regression.view(N, self.dense_points * 4, H, W).permute(0, 2, 3, 1) box_regression = box_regression.reshape(N, -1, 4) centerness = centerness.view(N, self.dense_points, H, W).permute(0, 2, 3, 1) centerness = centerness.reshape(N, -1).sigmoid() candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) # multiply the classification scores with centerness scores box_cls = box_cls * centerness[:, :, None] results = [] for i in range(N): per_box_cls = box_cls[i] per_candidate_inds = candidate_inds[i] per_box_cls = per_box_cls[per_candidate_inds] per_candidate_nonzeros = per_candidate_inds.nonzero() per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] + 1 per_box_regression = box_regression[i] per_box_regression = per_box_regression[per_box_loc] per_locations = locations[per_box_loc] per_pre_nms_top_n = pre_nms_top_n[i] if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_class = per_class[top_k_indices] per_box_regression = per_box_regression[top_k_indices] per_locations = per_locations[top_k_indices] detections = torch.stack([ per_locations[:, 0] - per_box_regression[:, 0], per_locations[:, 1] - per_box_regression[:, 1], per_locations[:, 0] + per_box_regression[:, 2], per_locations[:, 1] + per_box_regression[:, 3], ], dim=1) h, w = image_sizes[i] boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results
def prepare_for_coco_detection_mstest(predictions, dataset): # pdb.set_trace() predictions_s = predictions[0] predictions_m = predictions[1] predictions_l = predictions[2] dataset_s = dataset[0] dataset_m = dataset[1] dataset_l = dataset[2] coco_results = [] # one image. for image_id, predictions in enumerate( zip(predictions_s, predictions_m, predictions_l)): prediction_s = predictions[0] prediction_m = predictions[1] prediction_l = predictions[2] original_id = dataset_l.id_to_img_map[image_id] if len(predictions_l) == 0: continue img_info = dataset_l.get_img_info(image_id) image_width = img_info["width"] image_height = img_info["height"] img_id_json = img_info['id'] # rescale predict bbox to original images size. prediction_s = prediction_s.resize((image_width, image_height)) prediction_m = prediction_m.resize((image_width, image_height)) prediction_l = prediction_l.resize((image_width, image_height)) # get single-scale results from type BoxList. bbox_s = prediction_s.bbox score_s = prediction_s.get_field('scores').unsqueeze(1) label_s = prediction_s.get_field('labels').unsqueeze(1) bbox_m = prediction_m.bbox score_m = prediction_m.get_field('scores').unsqueeze(1) label_m = prediction_m.get_field('labels').unsqueeze(1) bbox_l = prediction_l.bbox score_l = prediction_l.get_field('scores').unsqueeze(1) label_l = prediction_l.get_field('labels').unsqueeze(1) # concat single-scale result and convert to type BoxList. (small, medium, large) min_size = 0 w = prediction_l.size[0] h = prediction_l.size[1] detections = torch.from_numpy(np.row_stack( (bbox_s, bbox_m, bbox_l))).cuda() per_class = torch.from_numpy(np.row_stack( (label_s, label_m, label_l))).cuda() per_class = torch.squeeze(per_class, dim=1) per_box_cls = torch.from_numpy( np.row_stack((score_s, score_m, score_l))).cuda() per_box_cls = torch.squeeze(per_box_cls, dim=1) boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, min_size) # multi-scale results apply NMS. (small, medium, large) nms_method = cfg.TEST.MS_TEST_NMS nms_thresh = cfg.TEST.MS_TEST_NMS_THR num_classes = 81 scores = boxlist.get_field("scores") labels = boxlist.get_field("labels") boxes = boxlist.bbox result = [] # multi-scale test + NMS for j in range(1, num_classes): inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) if nms_method == "nms": boxlist_for_class = boxlist_nms(boxlist_for_class, nms_thresh, score_field="scores") elif nms_method == "soft_nms": boxlist_for_class = boxlist_soft_nms(boxlist_for_class, nms_thresh, score_field="scores") else: print('the nms method is wrong') num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=scores.device)) result.append(boxlist_for_class) result = cat_boxlist(result) boxlist = result boxlist = boxlist.convert("xywh") boxes = boxlist.bbox.tolist() scores = boxlist.get_field("scores").tolist() labels = boxlist.get_field("labels").tolist() mapped_labels = [ dataset_l.contiguous_category_id_to_json_id[int(i)] for i in labels ] coco_results.extend([{ "image_id": original_id, "category_id": mapped_labels[k], "bbox": box, "score": scores[k], } for k, box in enumerate(boxes)]) return coco_results
annotations = ET.parse(args.anno_path + lines[i] +'.xml').getroot() immage_info = preprocess_annotation(annotations) if (immage_info["boxes"].shape[0] == 0): #caso penso impossibile, cioè ground-truth image without bbox continue im_height, im_width = immage_info["im_info"] detections[i] = detections[i].resize((im_width, im_height)) detections[i].bbox[:, 2:] += 1 immage_info["boxes"][:, 2:] += 1 iou_res = boxlist_iou(BoxList(detections[i].bbox.numpy(),(im_width, im_height)), BoxList(immage_info["boxes"].numpy(), (im_width, im_height))).numpy() gt_index = iou_res.argmax(axis=1) iou_with_gt = iou_res.max(axis=1) del iou_res for k in range(len(detections[i].extra_fields['labels'])): temp_dict = {} temp_dict[f"{i}_{k}"] = k temp_dict["label_p"] = classes[detections[i].extra_fields['labels'][k]] temp_dict["label_gt"] = immage_info["labels"][gt_index[k]] temp_dict["score"] = detections[i].extra_fields['scores'].numpy()[k]
def load_graphs(roidb_file, split, num_im, num_val_im, filter_empty_rels, filter_non_overlap): """ Load the file containing the GT boxes and relations, as well as the dataset split Parameters: roidb_file: HDF5 split: (train, val, or test) num_im: Number of images we want num_val_im: Number of validation images filter_empty_rels: (will be filtered otherwise.) filter_non_overlap: If training, filter images that dont overlap. Return: image_index: numpy array corresponding to the index of images we're using boxes: List where each element is a [num_gt, 4] array of ground truth boxes (x1, y1, x2, y2) gt_classes: List where each element is a [num_gt] array of classes relationships: List where each element is a [num_r, 3] array of (box_ind_1, box_ind_2, predicate) relationships """ roi_h5 = h5py.File(roidb_file, 'r') data_split = roi_h5['split'][:] split_flag = 2 if split == 'test' else 0 split_mask = data_split == split_flag # Filter out images without bounding boxes split_mask &= roi_h5['img_to_first_box'][:] >= 0 if filter_empty_rels: split_mask &= roi_h5['img_to_first_rel'][:] >= 0 image_index = np.where(split_mask)[0] if num_im > -1: image_index = image_index[:num_im] if num_val_im > 0: if split == 'val': image_index = image_index[:num_val_im] elif split == 'train': image_index = image_index[num_val_im:] split_mask = np.zeros_like(data_split).astype(bool) split_mask[image_index] = True # Get box information all_labels = roi_h5['labels'][:, 0] all_attributes = roi_h5['attributes'][:, :] all_boxes = roi_h5['boxes_{}'.format(BOX_SCALE)][:] # cx,cy,w,h assert np.all(all_boxes[:, :2] >= 0) # sanity check assert np.all(all_boxes[:, 2:] > 0) # no empty box # convert from xc, yc, w, h to x1, y1, x2, y2 all_boxes[:, :2] = all_boxes[:, :2] - all_boxes[:, 2:] / 2 all_boxes[:, 2:] = all_boxes[:, :2] + all_boxes[:, 2:] im_to_first_box = roi_h5['img_to_first_box'][split_mask] im_to_last_box = roi_h5['img_to_last_box'][split_mask] im_to_first_rel = roi_h5['img_to_first_rel'][split_mask] im_to_last_rel = roi_h5['img_to_last_rel'][split_mask] # load relation labels _relations = roi_h5['relationships'][:] _relation_predicates = roi_h5['predicates'][:, 0] assert (im_to_first_rel.shape[0] == im_to_last_rel.shape[0]) assert (_relations.shape[0] == _relation_predicates.shape[0] ) # sanity check # Get everything by image. boxes = [] gt_classes = [] gt_attributes = [] relationships = [] for i in range(len(image_index)): i_obj_start = im_to_first_box[i] i_obj_end = im_to_last_box[i] i_rel_start = im_to_first_rel[i] i_rel_end = im_to_last_rel[i] boxes_i = all_boxes[i_obj_start:i_obj_end + 1, :] gt_classes_i = all_labels[i_obj_start:i_obj_end + 1] gt_attributes_i = all_attributes[i_obj_start:i_obj_end + 1, :] if i_rel_start >= 0: predicates = _relation_predicates[i_rel_start:i_rel_end + 1] obj_idx = _relations[i_rel_start:i_rel_end + 1] - i_obj_start # range is [0, num_box) assert np.all(obj_idx >= 0) assert np.all(obj_idx < boxes_i.shape[0]) rels = np.column_stack( (obj_idx, predicates)) # (num_rel, 3), representing sub, obj, and pred else: assert not filter_empty_rels rels = np.zeros((0, 3), dtype=np.int32) if filter_non_overlap: assert split == 'train' # construct BoxList object to apply boxlist_iou method # give a useless (height=0, width=0) boxes_i_obj = BoxList(boxes_i, (1000, 1000), 'xyxy') inters = boxlist_iou(boxes_i_obj, boxes_i_obj) rel_overs = inters[rels[:, 0], rels[:, 1]] inc = np.where(rel_overs > 0.0)[0] if inc.size > 0: rels = rels[inc] else: split_mask[image_index[i]] = 0 continue boxes.append(boxes_i) gt_classes.append(gt_classes_i) gt_attributes.append(gt_attributes_i) relationships.append(rels) return split_mask, boxes, gt_classes, gt_attributes, relationships
def forward(self, features_left, proposals_left, features_right=None, proposals_right=None, targets_left=None, targets_right=None, proposals_sampled=None): """ Arguments: features (list[Tensor]): feature-maps from possibly several levels proposals (list[BoxList]): proposal boxes targets (list[BoxList], optional): the ground-truth targets. Returns: x (Tensor): the result of the feature extractor proposals (list[BoxList]): during training, the subsampled proposals are returned. During testing, the predicted boxlists are returned losses (dict[Tensor]): During training, returns the losses for the head. During testing, returns an empty dict. """ # generate right from left(TODO: TEMP solution for inconsistent ground truth) # if not targets_left is None: # targets_right = [] # for target in targets_left: # target_right = target.copy_with_fields("labels").convert("xywh") # disps = target.get_field("depths").convert("disp").depths # target_right.bbox[:,0] -= disps # targets_right.append(target_right.convert("xyxy")) if self.training: # Faster R-CNN subsamples during training the proposals with a fixed # positive / negative ratio if proposals_sampled is None: with torch.no_grad(): proposals_sampled_left, proposals_sampled_right = self.loss_evaluator.subsample( proposals_left, proposals_right, targets_left, targets_right) proposals_left, proposals_right = proposals_sampled_left, proposals_sampled_right # calculate proposals_union proposals_union = [] # print(len(proposals_left[0]), len(proposals_right[0])) for tl, tr in zip(proposals_left, proposals_right): assert (tl.size == tr.size) bbox_left, bbox_right = tl.convert("xyxy").bbox, tr.convert( "xyxy").bbox # print(bbox_left, bbox_right) new_bbox = torch.stack([ torch.min(bbox_left[:, 0], bbox_right[:, 0]), torch.min(bbox_left[:, 1], bbox_right[:, 1]), torch.max(bbox_left[:, 2], bbox_right[:, 2]), torch.max(bbox_left[:, 3], bbox_right[:, 3]), ], dim=1) # print(new_bbox) proposals_union.append(BoxList(new_bbox, tl.size, mode="xyxy")) # extract features that will be fed to the final classifier. The # feature_extractor generally corresponds to the pooler + heads fl = self.feature_extractor(features_left, proposals_union) fr = self.feature_extractor(features_right, proposals_union) x = torch.cat([fl, fr], dim=1) # final classifier that converts the features into predictions class_logits, box_regression_left, box_regression_right = self.predictor( x) if not self.training: # result_left = self.post_processor((class_logits, box_regression_left), proposals_union) # result_right = self.post_processor((class_logits, box_regression_right), proposals_union) result_left, result_right = self.post_processor( (class_logits, box_regression_left, box_regression_right), proposals_union) # resample # result_union = [boxlist_union(rl, rr) for rl,rr in zip(result_left, result_right)] # fl = self.feature_extractor(features_left, result_union) # fr = self.feature_extractor(features_right, result_union) # x = torch.cat([fl, fr], dim=1) return x, result_left, result_right, {} # TODO: loss is not needed for mean teacher when MT_ON if not self.cfg.MODEL.ROI_BOX_HEAD.FREEZE_WEIGHT: loss_classifier, loss_box_reg, loss_box_reg_right = self.loss_evaluator( [class_logits], [box_regression_left], [box_regression_right], proposals_union) # if self.cfg.MODEL.ROI_BOX_HEAD.OUTPUT_DECODED_PROPOSAL: # bbox_reg_weights = self.cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS # box_coder = BoxCoder(weights=bbox_reg_weights) # boxes_per_image = [len(box) for box in proposals] # concat_boxes = torch.cat([a.bbox for a in proposals], dim=0) # decoded_proposals = box_coder.decode( # box_regression_left.view(sum(boxes_per_image), -1), concat_boxes # ) # decoded_proposals = decoded_proposals.split(boxes_per_image, dim=0) # # decoded_proposals = self.post_processor((class_logits, box_regression), proposals) # # make sure there are valid proposals # for i, boxes in enumerate(decoded_proposals): # if len(boxes) > 0: # proposals[i].bbox = boxes.reshape(-1, 4) loss_dict = dict() # if self.cfg.MODEL.MT_ON: # loss_dict.update(class_logits=class_logits, box_logits=box_regression_left) # loss_dict.update(class_logits=x, box_logits=x) # proposals_sampled.add_field('class_logits', class_logits) # proposals_sampled.add_field('box_logits', box_regression) if not self.is_mt and not self.cfg.MODEL.ROI_BOX_HEAD.FREEZE_WEIGHT: loss_dict.update( dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg, loss_box_reg_right=loss_box_reg_right)) return x, proposals_left, proposals_right, loss_dict
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) img_original = img # img_original = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) # img_original = img_original + 127.5 # trans1 = torchvision.transforms.ToTensor() # img_original = trans1(img_original) # img_original[img_original - 127.5 < 30] = 0 # img_original = img_original * 0.65 # cv2.imwrite('d.jpg', img_original) # print('============') # pass # img, anno = overlay_GT_on_scan(img, anno, self.gtcloud, self.gtann, resolution=1000) # noiseoffset = (torch.randn(2)) # minimal bbox noise is better? # for ann in anno: # noiseratio = ((torch.randn(1)).div_(20)).exp_().clamp(0.9, 1.1) # noiserotate = torch.randn(1).clamp(-3, 3) # label = ann["bbox"] # orien = ann["rotation"] # box = bBox_2D(label[3], label[2], label[0] + label[2] / 2, label[1] + label[3] / 2, # orien) # bBox_2D: length, width, xc, yc,alpha label: 'bbox': [box.xtl, box.ytl, box.width, box.length], # box.rotate(noiserotate) # box.resize(noiseratio) # # box.translate(noiseoffset[0], noiseoffset[1]) # box.xcyc2topleft() # ann["bbox"] = [box.xtl, box.ytl, box.width, box.length] # # slightly stretch the box may be better viewed ? # ann["rotation"] = box.alpha # filter crowd annotations # TODO might be better to add an extra field anno = [ obj for obj in anno ] # if obj["iscrowd"] == 0] =============================================== boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes # print(boxes) target = BoxList(boxes, img.size, mode="xywh").convert( "xyxy") # ===================================== # print(target.bbox,'============================') classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size) target.add_field("masks", masks) # ==================================== rotations = [obj["rotation"] * math.pi / 180 for obj in anno] # print(rotations,'====') rotations = torch.tensor(rotations) # rotations = torch.stack((5 * torch.sin(rotations), 5 * torch.cos(rotations))) rotations = torch.stack((rotations, rotations)) # for testing # COMPLEX space *5 is radius of unit circle or weight rotations = torch.transpose(rotations, dim0=0, dim1=-1) # N*2 shape # print(rotations) target.add_field("rotations", rotations) # print(target.get_field('rotations'), '============ooo================') # print(target,'============================================') target = target.clip_to_image(remove_empty=False) # print(len(target), '==================targetanno=================') if self.transforms is not None: img, target = self.transforms(img, target) # print(img.size(),'=================%d=================='%idx) # print(target.get_field('rotations'), '============================') return img, target, idx, img_original
def im_detect_bbox_aug(model, images, device): # Collect detections computed under different transformations boxlists_ts = [] for _ in range(len(images)): boxlists_ts.append([]) def add_preds_t(boxlists_t): for i, boxlist_t in enumerate(boxlists_t): if len(boxlists_ts[i]) == 0: # The first one is identity transform, no need to resize the boxlist boxlists_ts[i].append(boxlist_t) else: # Resize the boxlist as the first one boxlists_ts[i].append(boxlist_t.resize(boxlists_ts[i][0].size)) # Compute detections for the original image (identity transform) boxlists_i = im_detect_bbox(model, images, cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, device) add_preds_t(boxlists_i) # Perform detection on the horizontally flipped image if cfg.TEST.BBOX_AUG.H_FLIP: boxlists_hf = im_detect_bbox_hflip(model, images, cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, device) add_preds_t(boxlists_hf) # Compute detections at different scales for scale in cfg.TEST.BBOX_AUG.SCALES: max_size = cfg.TEST.BBOX_AUG.MAX_SIZE boxlists_scl = im_detect_bbox_scale(model, images, scale, max_size, device) add_preds_t(boxlists_scl) if cfg.TEST.BBOX_AUG.SCALE_H_FLIP: boxlists_scl_hf = im_detect_bbox_scale(model, images, scale, max_size, device, hflip=True) add_preds_t(boxlists_scl_hf) # Merge boxlists detected by different bbox aug params boxlists = [] for i, boxlist_ts in enumerate(boxlists_ts): bbox = torch.cat([boxlist_t.bbox for boxlist_t in boxlist_ts]) scores = torch.cat( [boxlist_t.get_field('scores') for boxlist_t in boxlist_ts]) boxlist = BoxList(bbox, boxlist_ts[0].size, boxlist_ts[0].mode) boxlist.add_field('scores', scores) boxlists.append(boxlist) # Apply NMS and limit the final detections results = [] post_processor = make_roi_box_post_processor(cfg) for boxlist in boxlists: results.append( post_processor.filter_results(boxlist, cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES)) return results
def compute_predictions(cfg, dataset, model, transforms, icwt_21_objs=False, compute_average_recall_RPN=False, is_train=True, result_dir=None, evaluate_segmentation=True, eval_segm_with_gt_bboxes=False): model.eval() num_img = len(dataset.ids) # Set the number of images that will be used to set minibootstrap parameters if hasattr(model, 'rpn'): model.rpn.cfg.NUM_IMAGES = num_img if hasattr(model, 'roi_heads'): model.roi_heads.box.cfg.NUM_IMAGES = num_img if compute_average_recall_RPN: average_recall_RPN = 0 predictions = [] for i in range(num_img): if type(dataset).__name__ is 'iCubWorldDataset': image, gt_bboxes_list, masks, gt_labels, img_sizes = compute_gts_icwt( dataset, i, icwt_21_objs) elif type(dataset).__name__ is 'YCBVideoDataset': image, gt_bboxes_list, masks, gt_labels, img_sizes = compute_gts_ycbv( dataset, i, evaluate_segmentation=evaluate_segmentation) # Save list of boxes as tensor gt_bbox_tensor = torch.tensor(gt_bboxes_list, device="cuda") gt_labels_torch = torch.tensor(gt_labels, device="cuda", dtype=torch.uint8).reshape( (len(gt_labels), 1)) if len(masks) > 0: mask_lists = SegmentationMask(torch.cat(masks), img_sizes, mode='mask') # create box list containing the ground truth bounding boxes try: gt_bbox_boxlist = BoxList(gt_bbox_tensor, image_size=img_sizes, mode='xyxy') try: if evaluate_segmentation: gt_bbox_boxlist.add_field("masks", mask_lists) except: pass except: gt_bbox_boxlist = BoxList(torch.empty((0, 4), device="cuda"), image_size=img_sizes, mode='xyxy') # apply pre-processing to image image = transforms(image) # convert to an ImageList image_list = to_image_list(image, 1) image_list = image_list.to("cuda") # compute predictions with torch.no_grad(): AR, predicted_boxes = model( image_list, gt_bbox=gt_bbox_boxlist, gt_label=gt_labels_torch, img_size=img_sizes, compute_average_recall_RPN=compute_average_recall_RPN, gt_labels_list=gt_labels, is_train=is_train, result_dir=result_dir, evaluate_segmentation=evaluate_segmentation, eval_segm_with_gt_bboxes=eval_segm_with_gt_bboxes) if compute_average_recall_RPN: average_recall_RPN += AR predictions.append(predicted_boxes) if compute_average_recall_RPN: AR = average_recall_RPN / num_img print('Average Recall (AR):', AR) if result_dir: with open(os.path.join(result_dir, "result.txt"), "a") as fid: fid.write('Average Recall (AR): {} \n \n'.format(AR)) if type(dataset).__name__ is 'iCubWorldDataset': extra_args = dict( box_only=False, iou_types=("bbox", ), expected_results=(), expected_results_sigma_tol=4, draw_preds=False, is_target_task=True, icwt_21_objs=icwt_21_objs, iou_thresholds=model.roi_heads.box.cfg.EVALUATION.IOU_THRESHOLDS, use_07_metric=model.roi_heads.box.cfg.EVALUATION.USE_VOC07_METRIC) elif type(dataset).__name__ is 'YCBVideoDataset': extra_args = dict( box_only=False, iou_types=("bbox", ), expected_results=(), expected_results_sigma_tol=4, draw_preds=False, evaluate_segmentation=evaluate_segmentation, iou_thresholds=model.roi_heads.box.cfg.EVALUATION.IOU_THRESHOLDS, use_07_metric=model.roi_heads.box.cfg.EVALUATION.USE_VOC07_METRIC) return evaluate(dataset=dataset, predictions=predictions, output_folder=result_dir, **extra_args)
def __getitem__(self, idx): # ''' # img is tensor now # ''' # img_a, target_a, idx_a = self.get_one_item(idx) # img_b, target_b, idx_b = self.get_one_item((idx+1) % len(self.ids)) # #merge them # #merge img # m = Beta(torch.tensor([1.5]), torch.tensor([1.5])) # cof_a = m.sample() # #cof_a = 0.5 # c,ha,wa = img_a.shape # c,hb,wb = img_b.shape # h,w = (max(ha,hb),max(wa,wb)) # img = img_a.new_zeros((c,h,w)) # img[:,:ha,:wa] = cof_a * img_a # img[:,:hb,:wb] = (1-cof_a) * img_b # #merge labels and masks # boxes = torch.cat([target_a.bbox,target_b.bbox],dim=0) # target = BoxList(boxes, (w,h), mode="xyxy") # classes = torch.cat([target_a.get_field('labels'),target_b.get_field('labels')],dim=0) # target.add_field("labels", classes) # masks = target_a.get_field("masks").instances.polygons + target_b.get_field("masks").instances.polygons # masks = SegmentationMask(masks, (w,h), mode='poly') # target.add_field("masks", masks) # # #add marks # # marks = [1]*target_a.bbox.size(0) + [0] * target_b.bbox.size(0) # # target.add_field("marks", torch.tensor(marks)) # cofs = [cof_a]*target_a.bbox.size(0) + [1-cof_a] * target_b.bbox.size(0) # target.add_field('cofs',torch.tensor(cofs)) # return img, target, idx # def get_one_item(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
def forward_for_single_feature_map(self, anchors, box_cls, box_regression): """ Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ device = box_cls.device N, _, H, W = box_cls.shape A = box_regression.size(1) // 4 C = box_cls.size(1) // A # put in the same format as anchors box_cls = permute_and_flatten(box_cls, N, A, C, H, W) box_cls = box_cls.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W if self.imbalanced_decider is None: candidate_inds = box_cls > self.pre_nms_thresh else: candidate_inds = self.imbalanced_decider(box_cls) pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) results = [] for per_box_cls, per_box_regression, per_pre_nms_top_n, \ per_candidate_inds, per_anchors in zip( box_cls, box_regression, pre_nms_top_n, candidate_inds, anchors): # Sort and select TopN # TODO most of this can be made out of the loop for # all images. # TODO:Yang: Not easy to do. Because the numbers of detections are # different in each image. Therefore, this part needs to be done # per image. per_box_cls = per_box_cls[per_candidate_inds] per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_candidate_nonzeros = \ per_candidate_inds.nonzero()[top_k_indices, :] per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] per_class += 1 detections = self.box_coder.decode( per_box_regression[per_box_loc, :].view(-1, 4), per_anchors.bbox[per_box_loc, :].view(-1, 4)) boxlist = BoxList(detections, per_anchors.size, mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results
def forward_for_single_feature_map(self, locations, box_cls, box_regression, centerness, image_sizes): """ Arguments: anchors: list[BoxList] box_cls: tensor of size N, A * C, H, W box_regression: tensor of size N, A * 4, H, W """ N, C, H, W = box_cls.shape # put in the same format as locations box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1) box_cls = box_cls.reshape(N, -1, C).sigmoid() box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1) box_regression = box_regression.reshape(N, -1, 4) candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) # multiply the classification scores with centerness scores if centerness is not None: centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1) centerness = centerness.reshape(N, -1).sigmoid() box_cls = box_cls * centerness[:, :, None] if self.debug_vis_label: # box_prob_set.extend([box_cls, centerness, centerness[:,:,None]*box_prob_set[-1]]) show_box_cls([box_cls, box_cls**2], N, H, W, C, self.pre_nms_thresh) # K = 1 # box_cls = box_cls.reshape(-1, C) # top, idim = torch.topk(box_cls, K, dim=-1) # box_cls[:] = 0 # i0 = torch.zeros(idim.size()).long() + torch.arange(0, idim.size(0))[:, None] # box_cls[i0, idim] = top # box_cls = box_cls.reshape(N, -1, C) results = [] for i in range(N): per_box_cls = box_cls[i] per_candidate_inds = candidate_inds[i] per_box_cls = per_box_cls[per_candidate_inds] per_candidate_nonzeros = per_candidate_inds.nonzero() per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] + 1 per_box_regression = box_regression[i] per_box_regression = per_box_regression[per_box_loc] per_locations = locations[per_box_loc] per_pre_nms_top_n = pre_nms_top_n[i] if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): per_box_cls, top_k_indices = \ per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_class = per_class[top_k_indices] per_box_regression = per_box_regression[top_k_indices] per_locations = per_locations[top_k_indices] detections = torch.stack([ per_locations[:, 0] - per_box_regression[:, 0], per_locations[:, 1] - per_box_regression[:, 1], per_locations[:, 0] + per_box_regression[:, 2], per_locations[:, 1] + per_box_regression[:, 3], ], dim=1) h, w = image_sizes[i] boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) if self.debug_vis_label: boxlist.add_field("det_locations", per_locations) # add by hui boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results
def evaluate_box_proposals( predictions, dataset, thresholds=None, area="all", limit=None ): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0 ** 2, 1e5 ** 2], # all [0 ** 2, 32 ** 2], # small [32 ** 2, 96 ** 2], # medium [96 ** 2, 1e5 ** 2], # large [96 ** 2, 128 ** 2], # 96-128 [128 ** 2, 256 ** 2], # 128-256 [256 ** 2, 512 ** 2], # 256-512 [512 ** 2, 1e5 ** 2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for image_id, prediction in enumerate(predictions): original_id = dataset.id_to_img_map[image_id] # TODO replace with get_img_info? image_width = dataset.coco.imgs[original_id]["width"] image_height = dataset.coco.imgs[original_id]["height"] prediction = prediction.resize((image_width, image_height)) # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = prediction.get_field("objectness").sort(descending=True)[1] prediction = prediction[inds] ann_ids = dataset.coco.getAnnIds(imgIds=original_id) anno = dataset.coco.loadAnns(ann_ids) gt_boxes = [obj["bbox"] for obj in anno if obj["iscrowd"] == 0] gt_boxes = torch.as_tensor(gt_boxes).reshape( -1, 4 ) # guard against no boxes gt_boxes = BoxList( gt_boxes, (image_width, image_height), mode="xywh" ).convert("xyxy") gt_areas = torch.as_tensor( [obj["area"] for obj in anno if obj["iscrowd"] == 0] ) if len(gt_boxes) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & ( gt_areas <= area_range[1] ) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if len(prediction) == 0: continue if limit is not None and len(prediction) > limit: prediction = prediction[:limit] overlaps = boxlist_iou(prediction, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(prediction), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = torch.cat(gt_overlaps, dim=0) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, }
def __getitem__(self, item): im_name = os.path.basename(self.image_lists[item]) # print(self.image_lists[item]) img = Image.open(self.image_lists[item]).convert("RGB") width, height = img.size if self.gts_dir is not None: gt_path = os.path.join(self.gts_dir, im_name + '.txt') if not os.path.isfile(gt_path): gt_path = os.path.join(self.gts_dir, 'gt_' + im_name.split('.')[0] + '.txt') words, boxes, charsbbs, segmentations = self.load_gt_from_txt( gt_path, height, width) target = BoxList(boxes[:, :4], img.size, mode="xyxy", use_char_ann=self.use_charann) classes = torch.ones(len(boxes)) target.add_field("labels", classes) masks = SegmentationMask(segmentations, img.size) target.add_field("masks", masks) if words[0] == '': use_char_ann = False else: use_char_ann = True if not self.use_charann: use_char_ann = False char_masks = SegmentationCharMask(charsbbs, words=words, use_char_ann=use_char_ann, size=img.size) target.add_field("char_masks", char_masks) else: target = None if self.transforms is not None: img, target = self.transforms(img, target) if self.vis: new_im = img.numpy().copy().transpose( [1, 2, 0]) + [102.9801, 115.9465, 122.7717] new_im = Image.fromarray(new_im.astype(np.uint8)).convert('RGB') mask = target.extra_fields['masks'].polygons[0].convert('mask') mask = Image.fromarray( (mask.numpy() * 255).astype(np.uint8)).convert('RGB') if self.use_charann: m, _ = target.extra_fields['char_masks'].chars_boxes[ 0].convert('char_mask') color = self.creat_color_map(37, 255) color_map = color[m.numpy().astype(np.uint8)] char = Image.fromarray(color_map.astype( np.uint8)).convert('RGB') char = Image.blend(char, new_im, 0.5) else: char = new_im new = Image.blend(char, mask, 0.5) img_draw = ImageDraw.Draw(new) for box in target.bbox.numpy(): box = list(box) box = box[:2] + [box[2], box[1]] + box[2:] + [box[0], box[3] ] + box[:2] img_draw.line(box, fill=(255, 0, 0), width=2) new.save('./vis/char_' + im_name) return img, target, self.image_lists[item]
def inference(self, colors_pred, add_class_names=None, save_path=None, save_independently=None, show_ground_truth=True): """ Do Inference, either show the boxes or the masks """ # load the config paths_catalog = import_file("maskrcnn_benchmark.config.paths_catalog", cfg.PATHS_CATALOG, True) DatasetCatalog = paths_catalog.DatasetCatalog test_datasets = DatasetCatalog.get(cfg.DATASETS.TEST[0]) img_dir = test_datasets['args']['root'] anno_file = test_datasets['args']['ann_file'] data = json.load(open(anno_file)) coco = COCO(anno_file) predis = [] filenames = [] # iterate through data for i, image in enumerate(data['images']): pil_img = Image.open(img_dir + '/' + image['file_name']) filenames.append(image['file_name']) img = np.array(pil_img)[:, :, [0, 1, 2]] # get ground truth boxes or masks anno = [ obj for obj in data['annotations'] if obj['image_id'] == image['id'] ] classes = [ obj['category_id'] for obj in data['annotations'] if obj['image_id'] == image['id'] ] json_category_id_to_contiguous_id = { v: i + 1 for i, v in enumerate(coco.getCatIds()) } classes = [json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) boxes = [obj['bbox'] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) target = BoxList(boxes, pil_img.size, mode='xywh').convert('xyxy') target.add_field('labels', classes) masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size) target.add_field("masks", masks) target = target.clip_to_image(remove_empty=True) # these are the ground truth polygons polygons = [] color_rgb = [[255, 101, 80], [255, 55, 55], [255, 255, 61], [255, 128, 0]] colors = { i: [s / 255 for s in color] for i, color in enumerate(color_rgb) } color = [colors[i.item()] for i in classes] # ground truth boxes boxes = [] polys = vars(target)['extra_fields']['masks'] for polygon in polys: try: tenso = vars(polygon)['polygons'][0] except KeyError: continue poly1 = tenso.numpy() poly = poly1.reshape((int(len(poly1) / 2), 2)) polygons.append(Polygon(poly)) xywh_tar = target.convert("xywh") for box in vars(xywh_tar)['bbox'].numpy(): rect = Rectangle((box[0], box[1]), box[2], box[3]) boxes.append(rect) # compute predictions predictions = self.compute_prediction(img) predis.append(predictions) top_predictions = self.select_top_predictions(predictions) polygons_predicted, colors_prediction = self.overlay_mask( img, top_predictions, colors_pred, inference=True) #print(colors_prediction) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.imshow(Image.fromarray(img)) ax.axis('off') # this is for ground thruth if show_ground_truth == True: p = PatchCollection(polygons, facecolor='none', linewidths=0, alpha=0.4) ax.add_collection(p) p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2) ax.add_collection(p) # this is for prediction ppd = PatchCollection(polygons_predicted, facecolor='none', linewidths=0, alpha=0.4) ax.add_collection(ppd) ppd = PatchCollection(polygons_predicted, facecolor='none', edgecolors=colors_prediction, linewidths=2) ax.add_collection(ppd) plt.savefig(save_path + image['file_name'], dpi=200, bbox_inches='tight', pad_inches=0) plt.show() dic = {} for i in range(len(filenames)): dic[filenames[i]] = predis[i] return dic
def filter_results(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist if cfg.ROTATE: boxes = boxlist.bbox.reshape(-1, num_classes * 8) scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 8:(j + 1) * 8] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xy8") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class = boxlist_rnms(boxlist_for_class, self.nms) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=device)) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) else: boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=device)) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def calc_detection_sysu_prec_rec(gt_boxlists, pred_boxlists, iou_thresh=0.5): """Calculate precision and recall based on evaluation code of PASCAL VOC. This function calculates precision and recall of predicted bounding boxes obtained from a dataset which has :math:`N` images. The code is based on the evaluation code used in PASCAL VOC Challenge. """ n_pos = defaultdict(int) score = defaultdict(list) match = defaultdict(list) for gt_boxlist, pred_boxlist in zip(gt_boxlists, pred_boxlists): pred_bbox = pred_boxlist.bbox.numpy() pred_label = pred_boxlist.get_field("labels").numpy() pred_score = pred_boxlist.get_field("scores").numpy() gt_bbox = gt_boxlist.bbox.numpy() gt_label = gt_boxlist.get_field("labels").numpy() gt_difficult = gt_boxlist.get_field("difficult").numpy() for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)): #print ('For calc_detection_sysu_prec_rec 1 ...') #embed() ### pred_mask_l = pred_label == l pred_bbox_l = pred_bbox[pred_mask_l] pred_score_l = pred_score[pred_mask_l] # sort by score order = pred_score_l.argsort()[::-1] # sorted pred_bbox_l = pred_bbox_l[order] pred_score_l = pred_score_l[order] gt_mask_l = gt_label == l gt_bbox_l = gt_bbox[gt_mask_l] gt_difficult_l = gt_difficult[gt_mask_l] n_pos[l] += np.logical_not(gt_difficult_l).sum() score[l].extend(pred_score_l) if len(pred_bbox_l) == 0: continue if len(gt_bbox_l) == 0: match[l].extend((0, ) * pred_bbox_l.shape[0]) continue # VOC evaluation follows integer typed bounding boxes. pred_bbox_l = pred_bbox_l.copy() pred_bbox_l[:, 2:] += 1 gt_bbox_l = gt_bbox_l.copy() gt_bbox_l[:, 2:] += 1 iou = boxlist_iou( BoxList(pred_bbox_l, gt_boxlist.size), BoxList(gt_bbox_l, gt_boxlist.size), ).numpy() gt_index = iou.argmax(axis=1) # set -1 if there is no matching ground truth gt_index[iou.max(axis=1) < iou_thresh] = -1 del iou selec = np.zeros(gt_bbox_l.shape[0], dtype=bool) for gt_idx in gt_index: if gt_idx >= 0: if gt_difficult_l[gt_idx]: match[l].append(-1) else: if not selec[gt_idx]: match[l].append(1) else: match[l].append(0) selec[gt_idx] = True else: match[l].append(0) n_fg_class = max(n_pos.keys()) + 1 prec = [None] * n_fg_class rec = [None] * n_fg_class for l in n_pos.keys(): #print ('For calc_detection_sysu_prec_rec 2 ...') #embed() ### score_l = np.array(score[l]) match_l = np.array(match[l], dtype=np.int8) order = score_l.argsort()[::-1] match_l = match_l[order] tp = np.cumsum(match_l == 1) fp = np.cumsum(match_l == 0) # If an element of fp + tp is 0, # the corresponding element of prec[l] is nan. prec[l] = tp / (fp + tp) # precision / accuracy # If n_pos[l] is 0, rec[l] is None. if n_pos[l] > 0: rec[l] = tp / n_pos[l] # recall return prec, rec
def __getitem__(self, idx): img, anno, meta = self.__get_item__(idx) # filter crowd annotations anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] # classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) polygons = [obj["segmentation"] for obj in anno] seg_mask_instance = SegmentationMask(polygons, img.size) target.add_field("masks", seg_mask_instance) masks = [ _get_mask_from_polygon(polygon, img.size) for polygon in polygons ] N = len(masks) W, H = img.size if self.cfg["Pose"] or self.cfg["Vertex"]: meta = [obj["meta"] for obj in anno] centers = [m['center'] for m in meta] assert len(meta) == len(polygons) if self.cfg["Pose"]: poses = [obj["pose"] for obj in meta] target.add_field("poses", torch.tensor(poses)) if self.cfg["Vertex"]: vertex_centers = np.zeros((N, 2, H, W)) for ix, m in enumerate(masks): center = centers[ix] # pose = poses[ix] # z = np.log(pose[-1]) # z distance is the last value in pose [qw,qx,qy,qz,x,y,z] # m = _get_mask_from_polygon(poly, img.size) vertex_centers[ix, :] = _generate_vertex_center_mask( m, center) vertex_centers = torch.tensor(vertex_centers) vertexes = ObjectMask(vertex_centers, img.size) target.add_field("vertex", vertexes) centers = Keypoints([[c[0], c[1], 1] for c in centers], img.size) # set all kp to class of 1 target.add_field("centers", centers) if self.cfg["Depth"]: depth_data = np.zeros((N, 1, H, W)) if 'depth' in meta: depth = meta['depth'] for ix, m in enumerate(masks): depth_data[ix, :] = _generate_depth_mask(m, depth) depth_data = torch.tensor(depth_data) depth_D = ObjectMask(depth_data, img.size) target.add_field("depth", depth_D) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) if "intrinsic_matrix" in meta: target.add_field("intrinsic_matrix", meta["intrinsic_matrix"]) if self.cfg["Pose"]: target.add_field("symmetry", self.symmetry) target.add_field("extents", self.extents) target.add_field("points", self.points) return img, target, idx