コード例 #1
0
ファイル: completeness_reg.py プロジェクト: kjlee18/APNet
	def get_ratio(self, boxlist, is_train):
		"""

			boxlist: [Bbox, Bbox, ...]

		"""	
		"""
			for those without keypoints:
				global, not partition
		
		"""
		return_boxlist = []
		device = boxlist[0].bbox.device

		for target in boxlist:
			target_bbox = target.bbox
			keypoint = target.get_field("keypoints")
			kp = keypoint.keypoints
			n, _, _ = kp.shape
			bbox = target.bbox
			img_size = target.size

			new_bbox = []
			new_pad = []
			for k in range(n):
				p_kp = kp[k]
				
				pad = 1.0 if is_train else 0.
			
				if p_kp.sum().item() > 0:
					pad = 0.
					for iteration, i in enumerate(self._idx[::-1][:-1]):
						# assume thorax exists
						vis = False
						store_y = None
						for j in i:
							if p_kp[j][2] > self.INVIS_THRSH:
								vis = True

						if vis: 
							store_y = max(p_kp[i[0]][1], p_kp[i[1]][1])
							break



					if not vis:
						# hips, knees, ankles not visible
						pad += sum(self._pratio[2:])
						res = F.relu(target_bbox[k, 3]-p_kp[self.thrx_idx, 1])
						known = F.relu(p_kp[self.thrx_idx, 1]-target_bbox[k, 1])
						tmp = F.relu((self.r_thrx2hip/self.r_head2thrx)*known-res) # pixel
						pad += (self.r_thrx2hip*tmp/(tmp+res)).item()

						if p_kp[self.thrx_idx, 1].item() == 0:
							pad = 1.0
				

					elif iteration == 0:
						pad = 0.
				
					else:
						pad += sum(self._pratio[::-1][:iteration])
						res = F.relu(target_bbox[k, 3]-store_y)
						known = F.relu(p_kp[self.thrx_idx, 1]-target_bbox[k, 1])
						tmp = F.relu((self._pratio[::-1][iteration]/self.r_head2thrx)*known-res)
						pad += (self._pratio[::-1][iteration]*tmp/(tmp+res)).item()
					
						if p_kp[self.thrx_idx, 1].item() == 0:
							pad = 1.0
							

				
				p_bbox = 1.*bbox[k, :]
				h = p_bbox[3] - p_bbox[1]
				if pad == 1.0:
					new_h = h
					if not is_train:
						pad = 0.
					p_bbox[3] = p_bbox[1] + new_h
					new_bbox.append(p_bbox.tolist())
					new_pad.append(pad)

				else:
					if not is_train:
						curr_aug_per = 1
					else:
						curr_aug_per = self.aug_per + 1


					p_bbox_repeat = p_bbox.repeat(curr_aug_per, 1)

					for ap in range(curr_aug_per):
						if ap == 0:
							random_cut = 0.
						else:
							random_cut = self.rand_cut*random.random()
						# 0-0.3
						update_pad = pad + (1.-pad)*random_cut
						p_bbox_repeat_ = p_bbox_repeat[ap]

						new_h = h*(1./(1.-update_pad))
						p_bbox_repeat_[3] = p_bbox_repeat_[1] + new_h

						new_bbox.append(p_bbox_repeat_.tolist())
						new_pad.append(update_pad)


			new_bboxlist = BoxList(new_bbox, img_size, mode="xyxy")
			new_bboxlist._copy_extra_fields(target)
			new_bboxlist.add_field("pad_ratio", torch.tensor(new_pad))
			return_boxlist.append(new_bboxlist)

		return_boxlist = [return_box.to(device) for return_box in return_boxlist]


		return return_boxlist
コード例 #2
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        if self.onnx_export:
            from torch.onnx import operators
            num_anchors = operators.shape_as_tensor(objectness)[1].unsqueeze(0)

            pre_nms_top_n = torch.min(
                torch.cat((torch.tensor([self.pre_nms_top_n],
                                        dtype=torch.long), num_anchors), 0))
        else:
            pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        if self.onnx_export:
            # NOTE: for now only batch == 1 is supported for ONNX export.
            assert topk_idx.size(0) == 1
            topk_idx = topk_idx.squeeze(0)
            box_regression = box_regression.index_select(1, topk_idx)
        else:
            box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        if self.onnx_export:
            concat_anchors = concat_anchors.reshape(N, -1, 4).index_select(
                1, topk_idx)
        else:
            concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx,
                                                              topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size,
                                         self.onnx_export)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
def convert_kitti_instance_only(root, ann_file, out_dir, dataset):
    image_index, label_list, boxes_list, boxes_3d_list, \
    alphas_list = get_pkl_element(ann_file)
    number_image = len(image_index)
    image_lists = []
    calib_lists = []
    depth_list = []
    for i in range(number_image):
        image_lists.append(root + '/training' + '/image_2/' + image_index[i] +
                           ".png")
        calib_lists.append(root + '/training' + '/calib/' + image_index[i] +
                           ".txt")
        depth_list.append(root + '/training' + '/depth/' + image_index[i] +
                          "_01.png.npz")

    # img_id = 0
    # ann_id = 0
    img_id = 3712
    ann_id = 11855

    # cat_id = 1
    category_dict = {'car': 1}

    category_instancesonly = [
        'person',
        'rider',
        'car',
        'truck',
        'bus',
        'train',
        'motorcycle',
        'bicycle',
    ]

    ann_dict = {}
    images = []
    annotations = []

    for i, id in image_index.items():
        if len(images) % 50 == 0:
            print("Processed %s images, %s annotations" %
                  (len(images), len(annotations)))
        image = {}
        image['id'] = img_id
        img_id += 1

        img = Image.open(image_lists[i]).convert("RGB")
        width, height = img.size
        image['width'] = width
        image['height'] = height
        image['file_name'] = image_lists[i].split('/')[-1]
        image['seg_file_name'] = image['file_name']

        images.append(image)

        num_instances = label_list[i].shape[0]
        boxes = boxes_list[i]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)
        box2d = BoxList(boxes, img.size, mode="xyxy")
        area = box2d.area().tolist()
        boxes = box2d.convert('xywh')
        boxes = boxes.bbox.tolist()

        for j in range(num_instances):
            ann = {}
            ann['id'] = ann_id
            ann_id += 1
            ann['image_id'] = image['id']
            ann['segmentation'] = []

            ann['category_id'] = category_dict['car']
            ann['iscrowd'] = 0
            ann['area'] = area[j]
            ann['bbox'] = boxes[j]

            annotations.append(ann)

    ann_dict['images'] = images
    categories = [{
        "id": category_dict[name],
        "name": name
    } for name in category_dict]
    ann_dict['categories'] = categories
    ann_dict['annotations'] = annotations
    print("Num categories: %s" % len(categories))
    print("Num images: %s" % len(images))
    print("Num annotations: %s" % len(annotations))

    with open(
            os.path.join(out_dir,
                         'instancesonly_filtered_gtFine_' + dataset + '.json'),
            'w') as outfile:
        outfile.write(json.dumps(ann_dict))
コード例 #4
0
    def forward(self, x, rel_pair_idxs, boxes):
        """
        Arguments:
            x (tuple[tensor, tensor]): x contains the relation logits
                and finetuned object logits from the relation model.
            rel_pair_idxs (list[tensor]): subject and object indice of each relation,
                the size of tensor is (num_rel, 2)
            boxes (list[BoxList]): bounding boxes that are used as
                reference, one for ech image

        Returns:
            results (list[BoxList]): one BoxList for each image, containing
                the extra fields labels and scores
        """
        relation_logits, refine_logits = x
        
        if self.attribute_on:
            if isinstance(refine_logits[0], (list, tuple)):
                finetune_obj_logits, finetune_att_logits = refine_logits
            else:
                # just use attribute feature, do not actually predict attribute
                self.attribute_on = False
                finetune_obj_logits = refine_logits
        else:
            finetune_obj_logits = refine_logits

        results = []
        for i, (rel_logit, obj_logit, rel_pair_idx, box) in enumerate(zip(
            relation_logits, finetune_obj_logits, rel_pair_idxs, boxes
        )):
            if self.attribute_on:
                att_logit = finetune_att_logits[i]
                att_prob = torch.sigmoid(att_logit)
            obj_class_prob = F.softmax(obj_logit, -1)
            obj_class_prob[:, 0] = 0  # set background score to 0
            num_obj_bbox = obj_class_prob.shape[0]
            num_obj_class = obj_class_prob.shape[1]

            if self.use_gt_box:
                obj_scores, obj_pred = obj_class_prob[:, 1:].max(dim=1)
                obj_pred = obj_pred + 1
            else:
                # NOTE: by kaihua, apply late nms for object prediction
                obj_pred = obj_prediction_nms(box.get_field('boxes_per_cls'), obj_logit, self.later_nms_pred_thres)
                obj_score_ind = torch.arange(num_obj_bbox, device=obj_logit.device) * num_obj_class + obj_pred
                obj_scores = obj_class_prob.view(-1)[obj_score_ind]
            
            assert obj_scores.shape[0] == num_obj_bbox
            obj_class = obj_pred

            if self.use_gt_box:
                boxlist = box
            else:
                # mode==sgdet
                # apply regression based on finetuned object class
                device = obj_class.device
                batch_size = obj_class.shape[0]
                regressed_box_idxs = obj_class
                boxlist = BoxList(box.get_field('boxes_per_cls')[torch.arange(batch_size, device=device), regressed_box_idxs], box.size, 'xyxy')
            boxlist.add_field('pred_labels', obj_class) # (#obj, )
            boxlist.add_field('pred_scores', obj_scores) # (#obj, )

            if self.attribute_on:
                boxlist.add_field('pred_attributes', att_prob)
            
            # sorting triples according to score production
            obj_scores0 = obj_scores[rel_pair_idx[:, 0]]
            obj_scores1 = obj_scores[rel_pair_idx[:, 1]]
            rel_class_prob = F.softmax(rel_logit, -1)
            rel_scores, rel_class = rel_class_prob[:, 1:].max(dim=1)
            rel_class = rel_class + 1
            # TODO Kaihua: how about using weighted some here?  e.g. rel*1 + obj *0.8 + obj*0.8
            triple_scores = rel_scores * obj_scores0 * obj_scores1
            _, sorting_idx = torch.sort(triple_scores.view(-1), dim=0, descending=True)
            rel_pair_idx = rel_pair_idx[sorting_idx]
            rel_class_prob = rel_class_prob[sorting_idx]
            rel_labels = rel_class[sorting_idx]

            boxlist.add_field('rel_pair_idxs', rel_pair_idx) # (#rel, 2)
            boxlist.add_field('pred_rel_scores', rel_class_prob) # (#rel, #rel_class)
            boxlist.add_field('pred_rel_labels', rel_labels) # (#rel, )
            # should have fields : rel_pair_idxs, pred_rel_class_prob, pred_rel_labels, pred_labels, pred_scores
            # Note
            # TODO Kaihua: add a new type of element, which can have different length with boxlist (similar to field, except that once 
            # the boxlist has such an element, the slicing operation should be forbidden.)
            # it is not safe to add fields about relation into boxlist!
            results.append(boxlist)
        return results
コード例 #5
0
ファイル: trainer_semi.py プロジェクト: SYangDong/tse-t
def do_train(
    model,
    model_ema,
    data_loader,
    optimizer,
    scheduler,
    checkpointer,
    device,
    local_rank,
    checkpoint_period,
    cfg_arg,
    arguments,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    meters_ema = MetricLogger(delimiter="  ")

    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    ema_decay = arguments["ema_decay"]
    loss_semi = arguments['loss_semi']
    temporal_save_path = cfg_arg["temporal_save_path"]
    model.train()
    model_ema.train()
    box_coder = BoxCoder(weights=(10., 10., 5., 5.))
    temporal_ens = {}
    start_training_time = time.time()
    end = time.time()
    labeled_database = arguments["HYPER_PARAMETERS"]['LABELED_DATABASE']
    temporal_supervised_losses = []

    for iteration, (images, targets_with_trans_info,
                    idx) in enumerate(data_loader, start_iter):
        targets = [_iter[0] for _iter in targets_with_trans_info]
        trans_info = [_iter[1] for _iter in targets_with_trans_info]

        try:
            db_idx, img_idx, idx_name, bboxes_batch = map_to_img(
                data_loader, idx)
            temporal_ens_bboxes = [
                ensemble_bboxes(_boxes, _im_sz, arguments["ANCHOR_STRIDES"],
                                arguments["HYPER_PARAMETERS"]['ENS_THRE'],
                                device)
                for _boxes, _im_sz in zip(bboxes_batch, images.image_sizes)
            ]

            img_size = [(_sz[1], _sz[0]) for _sz in images.image_sizes]
            pred_trans_info = copy.deepcopy(trans_info)
            temporal_ens_pred = []

            for i, _sz in enumerate(img_size):
                pred_trans_info[i][1] = _sz
                temporal_ens_per = [
                    trans_reverse(_temporal_ens, pred_trans_info[i]).to(device)
                    for _temporal_ens in temporal_ens_bboxes[i]
                ]
                temporal_ens_pred.append(temporal_ens_per)

            db_w = []
            for i, _db in enumerate(db_idx):
                if _db not in labeled_database:
                    _bbox = BoxList(
                        torch.zeros([1, 4]),
                        (images.image_sizes[i][1], images.image_sizes[i][0]),
                        mode="xyxy")
                    _bbox.add_field('labels', torch.ones([1]))
                    targets[i] = _bbox
                    db_w.append(0.)
                else:
                    db_w.append(1.)

            if any(len(target) < 1 for target in targets):
                logger.error(
                    f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
                )
                continue
            data_time = time.time() - end
            iteration = iteration + 1
            arguments["iteration"] = iteration

            images = images.to(device)
            targets = [target.to(device) for target in targets]
            update_ema_variables(model, model_ema, ema_decay, iteration)

            _loss_dict, result = model(images, targets)
            #---------------------loss masked by
            with torch.no_grad():
                _loss_dict_ema, result_ema = model_ema(images, targets)
                is_labeled_db_weight = torch.tensor(
                    db_w, dtype=torch.float32).to(device)

            loss_dict = {}
            loss_dict_ema = {}
            for _key in _loss_dict.keys():
                loss_dict[_key] = torch.sum(
                    torch.stack(_loss_dict[_key], dim=0) *
                    is_labeled_db_weight)
                loss_dict_ema[_key] = torch.sum(
                    torch.stack(_loss_dict_ema[_key], dim=0) *
                    is_labeled_db_weight)

            # loss_dict = _loss_dict
            # loss_dict_ema = _loss_dict_ema

            #result_origin = [trans_reverse(_res,_info) for _res,_info in zip(result_ema,trans_info)]
            #result_origin = predict_collect_postprocess(arguments['postprocess'],result_ema,trans_info)
            result_origin = predict_retina_postprocess(
                arguments['postprocess'], box_coder, result_ema, trans_info,
                images.image_sizes)

            # any_zeros = [_iter.bbox.shape[0] == 0 for _iter in temporal_ens_pred]
            # if any(any_zeros):
            #     loss_dict['semi_box_reg'] = torch.tensor(0,dtype=torch.float32,device=device)
            #     loss_dict['semi_cls'] = torch.tensor(0,dtype=torch.float32,device=device)
            # else:
            #     semi_loss = loss_semi(
            #         result, temporal_ens_pred)
            #     for _key in semi_loss.keys():
            #         loss_dict[_key] = torch.sum(torch.stack(semi_loss[_key],dim=0) * (1 - db_weight)) * arguments["semi_weight"]

            #balance losses
            with torch.no_grad():
                supversed_loss = (loss_dict['loss_retina_cls'] +
                                  loss_dict['loss_retina_reg']) / (
                                      np.sum(db_w) + 0.1)
            temporal_supervised_losses.append(supversed_loss)
            temporal_supervised_losses = temporal_supervised_losses[-100:]
            sup_loss = torch.stack(temporal_supervised_losses).mean()
            meters.update(sup_loss=sup_loss)

            if get_world_size() > 1:
                torch.distributed.all_reduce(
                    torch.stack(temporal_supervised_losses).mean(),
                    op=torch.distributed.ReduceOp.SUM)
            balance_weight = min(1. / (sup_loss / 0.28)**12, 1.)

            semi_loss = semi_loss_fn(
                result,
                result_ema,
                temporal_ens_pred,
                images.image_sizes,
                box_coder,
                n_cls=arguments["HYPER_PARAMETERS"]['NCLS'],
                reg_cons_w=arguments["HYPER_PARAMETERS"]['REG_CONSIST_WEIGHT'])
            semi_loss_weight = semi_weight_by_epoch(
                iteration,
                start_iter=arguments["HYPER_PARAMETERS"]['EPOCH_BATCH_NUM'] *
                arguments["HYPER_PARAMETERS"]['START_ITER'],
                rampup_length=arguments["HYPER_PARAMETERS"]['EPOCH_BATCH_NUM']
                * arguments["HYPER_PARAMETERS"]['RAMPUP_LENGTH'],
                consistence_weight=arguments["HYPER_PARAMETERS"]
                ['CONSISTENCE_WEIGHT'],
                consistence_trunc=arguments["HYPER_PARAMETERS"]
                ['MAX_CONSISTENT_LOSS'])  #semi_weight_by_epoch(iteration)
            for _key in semi_loss.keys():
                #loss_dict[_key] = torch.sum(semi_loss[_key] * (1 - is_labeled_db_weight))*semi_loss_weight*balance_weight # not used labeled
                loss_dict[_key] = torch.sum(semi_loss[_key]) * semi_loss_weight

            for i, (_id, _labeled) in enumerate(zip(idx_name, db_w)):
                # if _labeled == 1:
                #     continue
                result_dict = {
                    'iteration': iteration,
                    'result': result_origin[i]
                }
                if _id in temporal_ens.keys():
                    temporal_ens[_id].append(result_dict)
                else:
                    temporal_ens[_id] = [result_dict]

            #print('id={},{},scores={}----------{}'.format(idx_name[0],idx_name[1],result_origin[0].get_field('objectness')[:5],result_origin[1].get_field('objectness')[:5]))
            losses = sum(loss for loss in loss_dict.values())

            # reduce losses over all GPUs for logging purposes
            loss_dict_reduced = reduce_loss_dict(loss_dict)

            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            meters.update(loss=losses_reduced, **loss_dict_reduced)

            loss_dict_reduced_ema = reduce_loss_dict(loss_dict_ema)
            losses_reduced_ema = sum(
                loss for loss in loss_dict_reduced_ema.values())
            meters_ema.update(loss=losses_reduced_ema, **loss_dict_reduced_ema)

            optimizer.zero_grad()
            # Note: If mixed precision is not used, this ends up doing nothing
            # Otherwise apply loss scaling for mixed-precision recipe
            with amp.scale_loss(losses, optimizer) as scaled_losses:
                scaled_losses.backward()

            if not iteration < arguments["HYPER_PARAMETERS"][
                    'EPOCH_BATCH_NUM'] * arguments["HYPER_PARAMETERS"][
                        'START_ITER']:
                optimizer.step()
            #scheduler.step()

            batch_time = time.time() - end
            end = time.time()
            meters.update(time=batch_time, data=data_time)

            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

            if iteration % 20 == 0 or iteration == max_iter:
                logger.info(
                    meters.delimiter.join([
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "{meters_ema}",
                        "lr: {lr:.6f}",
                        "semi_w:{semi_w:2.3f}",
                        "supervised loss{sup_loss:2.3f},"
                        "balance_weight{balance_weight:2.3f},"
                        "max mem: {memory:.0f}",
                    ]).format(
                        eta=eta_string,
                        iter=iteration,
                        meters=str(meters),
                        meters_ema=str(meters_ema),
                        lr=optimizer.param_groups[0]["lr"],
                        semi_w=semi_loss_weight,
                        sup_loss=sup_loss,
                        balance_weight=balance_weight,
                        memory=torch.cuda.max_memory_allocated() / 1024.0 /
                        1024.0,
                    ))

            if (iteration - 50) % 100 == 0:
                for _key in temporal_ens.keys():
                    for _iter in temporal_ens[_key]:
                        str_folder = os.path.join(
                            temporal_save_path,
                            _key)  #"{}/{}".format(temporal_save_path,_key)
                        str_file = '{}/{}_loc{}_iter_x{:07d}.pt'.format(
                            str_folder, _key, local_rank, _iter['iteration'])
                        if not os.path.exists(str_folder):
                            os.makedirs(str_folder)
                        torch.save(_iter['result'], str_file)
                        del _iter['result']

                del temporal_ens
                temporal_ens = {}

            if iteration % checkpoint_period == 0:
                save_time = time.time()
                checkpointer.save("model_{:07d}".format(iteration),
                                  **arguments)

            if iteration == max_iter:
                checkpointer.save("model_final", **arguments)

        except Exception as e:
            print('error in file ', idx_name, img_idx)
            raise e

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
コード例 #6
0
    def filter_results(self, boxlist, num_classes):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        boxes_per_cls = boxlist.bbox.reshape(-1, num_classes, 4)
        scores = boxlist.get_field("pred_scores").reshape(-1, num_classes)

        device = scores.device
        result = []
        orig_inds = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4:(j + 1) * 4]
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("pred_scores", scores_j)
            boxlist_for_class, keep = boxlist_nms(
                boxlist_for_class,
                self.nms,
                max_proposals=self.post_nms_per_cls_topn,
                score_field='pred_scores')
            inds = inds[keep]
            num_labels = len(boxlist_for_class)
            boxlist_for_class.add_field(
                "pred_labels",
                torch.full((num_labels, ), j, dtype=torch.int64,
                           device=device))
            result.append(boxlist_for_class)
            orig_inds.append(inds)

        #NOTE: kaihua, according to Neural-MOTIFS (and my experiments, we need remove duplicate bbox)
        if self.nms_filter_duplicates or self.save_proposals:
            assert len(orig_inds) == (num_classes - 1)
            # set all bg to zero
            inds_all[:, 0] = 0
            for j in range(1, num_classes):
                inds_all[:, j] = 0
                orig_idx = orig_inds[j - 1]
                inds_all[orig_idx, j] = 1
            dist_scores = scores * inds_all.float()
            scores_pre, labels_pre = dist_scores.max(1)
            final_inds = scores_pre.nonzero()
            assert final_inds.dim() != 0
            final_inds = final_inds.squeeze(1)

            scores_pre = scores_pre[final_inds]
            labels_pre = labels_pre[final_inds]

            result = BoxList(boxes_per_cls[final_inds, labels_pre],
                             boxlist.size,
                             mode="xyxy")
            result.add_field("pred_scores", scores_pre)
            result.add_field("pred_labels", labels_pre)
            orig_inds = final_inds
        else:
            result = cat_boxlist(result)
            orig_inds = torch.cat(orig_inds, dim=0)

        number_of_detections = len(result)
        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("pred_scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(),
                number_of_detections - self.detections_per_img + 1)
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
            orig_inds = orig_inds[keep]
        return result, orig_inds, boxes_per_cls[orig_inds]
コード例 #7
0
ファイル: curriculum_sel.py プロジェクト: SYangDong/tse-t
def main():
    result_file = 'model_path.pth'
    input_img_folder = './input_image_folder'
    output_folder = './folder'
    config_file = './retinanet_R-50-FPN_1x_coco_unlabeled.yaml'
    result_predict = torch.load(result_file)
    jpg_output = './output_jpg'

    cfg.merge_from_file(config_file)
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=False)[0]

    score_thr = 0.43
    secore_sel = 0.3
    Zeros_obj = 0
    hist_means = []
    hist_ious = []
    Scores_minor = 0
    ious_hard = 0

    sel_file_id = []
    for _id, _bbox in enumerate(result_predict):
        print(_id)
        img_info = data_loaders_val.dataset.get_img_info(_id)
        sel_num = (_bbox.get_field('scores') > secore_sel).sum()

        img_src = os.path.join(output_folder, img_info['file_name'])
        # if img_src.find('000000335584')<0:
        #     continue

        if sel_num < 1:
            Zeros_obj += 1
            # if not os.path.exists(img_src):
            #     continue
            #shutil.copy(img_src,jpg_output)
            continue

        #caculate means
        sel_scores = _bbox.get_field('scores')[
            _bbox.get_field('scores') > secore_sel]
        mean_scores = sel_scores.mean().numpy() * 100
        hist_means.append(mean_scores)
        if mean_scores < (score_thr * 100):
            Scores_minor += 1
            # if not os.path.exists(img_src):
            #     continue
            # shutil.copy(img_src,jpg_output)
            continue

        #caculate ious
        ind_sel = _bbox.get_field('scores') > secore_sel
        box_sel = BoxList(_bbox.bbox[ind_sel], _bbox.size)
        ious = boxlist_iou(box_sel, box_sel) - torch.eye(len(box_sel))
        ious_scores = ious.mean() * 1000
        hist_ious.append(ious_scores)
        if ious_scores > 150:
            ious_hard += 1
            # if not os.path.exists(img_src):
            #     continue
            # shutil.copy(img_src,jpg_output)
            continue
        sel_file_id.append(img_info['file_name'])

    plt.hist(hist_ious, bins=5)
    plt.gca().set(title='Frequency Histogram of Diamond Depths',
                  ylabel='Frequency')
    plt.savefig('./test2.jpg')
    np.save('sel_unlabeled_ids_r101.npy', sel_file_id)

    print('zeros object = ', Zeros_obj, 'Scores_minor',
          Scores_minor, 'ious_hard', ious_hard, 'total sample',
          len(result_predict), 'select sample', len(sel_file_id))
コード例 #8
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        num_anchors = A * H * W

        # If inputs are on GPU, use a faster path
        use_fast_cuda_path = (objectness.is_cuda and box_regression.is_cuda)
        # Encompasses box decode, clip_to_image and remove_small_boxes calls
        if use_fast_cuda_path:
            objectness = objectness.reshape(N, -1)  # Now [N, AHW]
            objectness = objectness.sigmoid()

            pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
            objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                                   dim=1,
                                                   sorted=True)

            # Get all image shapes, and cat them together
            image_shapes = [box.size for box in anchors]
            image_shapes_cat = torch.tensor([box.size for box in anchors],
                                            device=objectness.device).float()

            # Get a single tensor for all anchors
            concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)

            # Note: Take all anchors, we'll index accordingly inside the kernel
            # only take the anchors corresponding to the topk boxes
            concat_anchors = concat_anchors.reshape(N, -1,
                                                    4)  # [batch_idx, topk_idx]

            # Return pre-nms boxes, associated scores and keep flag
            # Encompasses:
            # 1. Box decode
            # 2. Box clipping
            # 3. Box filtering
            # At the end we need to keep only the proposals & scores flagged
            # Note: topk_idx, objectness are sorted => proposals, objectness, keep are also
            # sorted -- this is important later
            proposals, objectness, keep = C.GeneratePreNMSUprightBoxes(
                N,
                A,
                H,
                W,
                topk_idx,
                objectness.float(
                ),  # Need to cast these as kernel doesn't support fp16
                box_regression.float(),
                concat_anchors,
                image_shapes_cat,
                pre_nms_top_n,
                self.min_size,
                self.box_coder.bbox_xform_clip,
                True)

            # view as [N, pre_nms_top_n, 4]
            proposals = proposals.view(N, -1, 4)
            objectness = objectness.view(N, -1)
        else:
            # reverse the reshape from before ready for permutation
            objectness = objectness.reshape(N, A, H, W)
            objectness = objectness.permute(0, 2, 3, 1).reshape(N, -1)
            objectness = objectness.sigmoid()

            pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
            objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                                   dim=1,
                                                   sorted=True)

            # put in the same format as anchors
            box_regression = box_regression.view(N, -1, 4, H,
                                                 W).permute(0, 3, 4, 1, 2)
            box_regression = box_regression.reshape(N, -1, 4)

            batch_idx = torch.arange(N, device=device)[:, None]
            box_regression = box_regression[batch_idx, topk_idx]

            image_shapes = [box.size for box in anchors]
            concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
            concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx,
                                                              topk_idx]

            proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                              concat_anchors.view(-1, 4))

            proposals = proposals.view(N, -1, 4)

        # handle non-fast path without changing the loop
        if not use_fast_cuda_path:
            keep = [None for _ in range(N)]

        result = []
        for proposal, score, im_shape, k in zip(proposals, objectness,
                                                image_shapes, keep):
            if use_fast_cuda_path:
                # Note: Want k to be applied per-image instead of all-at-once in batched code earlier
                #       clip_to_image and remove_small_boxes already done in single kernel
                p = proposal.masked_select(k[:, None]).view(-1, 4)
                score = score.masked_select(k)
                boxlist = BoxList(p, im_shape, mode="xyxy")
            else:
                boxlist = BoxList(proposal, im_shape, mode="xyxy")
                boxlist = boxlist.clip_to_image(remove_empty=False)
                boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist.add_field("objectness", score)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
コード例 #9
0
    def forward_for_single_feature_map(self, anchors, box_cls, box_regression,
                                       pre_nms_thresh):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = box_cls.device
        N, _, H, W = box_cls.shape
        A = int(box_regression.size(1) / 4)
        C = int(box_cls.size(1) / A)

        # put in the same format as anchors
        box_cls = box_cls.view(N, -1, C, H, W).permute(0, 3, 4, 1, 2)
        box_cls = box_cls.reshape(N, -1, C)
        box_cls = box_cls.sigmoid()

        box_regression = box_regression.view(N, -1, 4, H, W)
        box_regression = box_regression.permute(0, 3, 4, 1, 2)
        box_regression = box_regression.reshape(N, -1, 4)

        num_anchors = A * H * W

        results = [[] for _ in range(N)]
        candidate_inds = box_cls > pre_nms_thresh
        if candidate_inds.sum().item() == 0:
            empty_boxlists = []
            for a in anchors:
                empty_boxlist = BoxList(torch.Tensor(0, 4).to(device), a.size)
                empty_boxlist.add_field("labels",
                                        torch.LongTensor([]).to(device))
                empty_boxlist.add_field("scores", torch.Tensor([]).to(device))
                empty_boxlists.append(empty_boxlist)
            return empty_boxlists

        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        for batch_idx, (per_box_cls, per_box_regression, per_pre_nms_top_n, \
        per_candidate_inds, per_anchors) in enumerate(zip(
            box_cls,
            box_regression,
            pre_nms_top_n,
            candidate_inds,
            anchors)):

            # Sort and select TopN
            per_box_cls = per_box_cls[per_candidate_inds]
            per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)
            per_candidate_nonzeros = \
                    per_candidate_inds.nonzero()[top_k_indices, :]

            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            per_class += 1

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results[batch_idx] = boxlist

        return results
コード例 #10
0
    def __getitem__(self, k):
        im_ori_RGB = Image.open(self.img_files[k]).convert('RGB') # im_ori_RGB.size: (W, H
        with open(self.pickle_files[k], 'rb') as filehandle:
            data = pickle.load(filehandle)
        bboxes = data['bboxes'].astype(np.float32) # [xywh]
        assert len(bboxes.shape)==2 and bboxes.shape[1]==4
        num_bboxes_ori = bboxes.shape[0]

        if 'label' in data:
            labels = data['label'] # ['car', 'person', 'person']
        else:
            labels = ['person'] * num_bboxes_ori
        # bboxes = np.load(self.bbox_npy_files[k]).astype(np.float32) # [xywh]
        if bboxes.shape[0] > self.cfg.DATA.COCO.GOOD_NUM:
            bboxes = bboxes[:self.cfg.DATA.COCO.GOOD_NUM, :]
            labels = labels[:self.cfg.DATA.COCO.GOOD_NUM]

        target_boxes = torch.as_tensor(bboxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(target_boxes, im_ori_RGB.size, mode="xywh").convert("xyxy")
        num_boxes = target.bbox.shape[0]
        
        if self.opt.est_kps:
            if 'kps' in data:
                kps_gt = data['kps'].astype(int) # [N, 51]
                if num_bboxes_ori > self.cfg.DATA.COCO.GOOD_NUM:
                    kps_gt = kps_gt[:self.cfg.DATA.COCO.GOOD_NUM, :]
                kps_gt = kps_gt.tolist() # [[51]]
            else:
                kps_gt = [[0]*51 for i in range(num_boxes)]

            target_keypoints = PersonKeypoints(kps_gt, im_ori_RGB.size)
            # kps_sum = torch.sum(torch.sum(target_keypoints.keypoints[:, :, :2], 1), 1)
            # kps_mask = kps_sum != 0.
            # print(target_keypoints.keypoints.shape, kps_sum, kps_mask)

            target.add_field("keypoints", target_keypoints)
            # target.add_field("keypoints_mask", kps_mask)
            target = target.clip_to_image(remove_empty=True)
            classes = [1] * num_boxes # !!!!! all person (1) for now...
            classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
            classes = torch.tensor(classes)
            target.add_field("labels", classes)
            scores = torch.tensor([1.] * target.bbox.shape[0])
            target.add_field("scores", scores)

        W, H = im_ori_RGB.size[:2]
        if self.train:
            yannick_results = loadmat(self.yannick_mat_files[k])
            horizon_visible = yannick_results['horizon_visible'][0][0].astype(np.float32)
            assert horizon_visible == 1
            horizon = yannick_results['pitch'][0][0].astype(np.float32)
            horizon_pixels_yannick = H * horizon
            v0 = H - horizon_pixels_yannick
            vfov = yannick_results['vfov'][0][0].astype(np.float32)
            f_pixels_yannick = H/2./(np.tan(vfov/2.))
        else:
            f_pixels_yannick = -1
            v0 = -1

        im_yannickTransform = self.transforms_yannick(im_ori_RGB) # [0., 1.] by default
        im_maskrcnnTransform, target_maskrcnnTransform = self.transforms_maskrcnn(im_ori_RGB, target) # [0., 1.] by default
        # print('---', im.size(), np.asarray(im).shape)
        # im_array = np.asarray(im)
        # if len(im_array.shape)==2:
        #     im_array = np.stack((im_array,)*3, axis=-1)
        #     # print(im_array.shape)
        # x = torch.from_numpy(im_array.transpose((2,0,1)))

        if self.train and self.opt.est_kps:
            target_maskrcnnTransform.add_field("keypoints_ori", target_keypoints)
            target_maskrcnnTransform.add_field("boxlist_ori", target)
        target_maskrcnnTransform.add_field('img_files', [self.img_files[k]] * num_boxes)

        if self.train:
            y_person = 1.75
            bbox_good_list = bboxes
            vc = H / 2.
            inv_f2_yannick = 1./ (f_pixels_yannick * f_pixels_yannick)
            yc_list = []
            for bbox in bbox_good_list:
                vt = H - bbox[1]
                vb = H - (bbox[1] + bbox[3])
            #     v0_single = yc * (vt - vb) / y_person + vb
                yc_single = y_person * (v0 - vb) / (vt - vb) / (1. + (vc - v0) * (vc - vt) / f_pixels_yannick**2)
                yc_list.append(yc_single)
            yc_estCam = np.median(np.asarray(yc_list))
        else:
            yc_estCam = -1

        assert len(labels)==bboxes.shape[0]
        # im_ori_BGR_array = np.array(im_ori_RGB.copy())[:,:,::-1]
        return im_yannickTransform, im_maskrcnnTransform, W, H, \
               float(yc_estCam), \
               self.pad_bbox(bboxes, self.GOOD_NUM).astype(np.float32), bboxes.shape[0], float(v0), float(f_pixels_yannick), \
               os.path.basename(self.img_files[k])[:12], self.img_files[k], target_maskrcnnTransform, labels
コード例 #11
0
ファイル: inference.py プロジェクト: sadicLiu/mask_rcnn_code
    def forward_for_single_feature_map(self, anchors, objectness, box_regression):
        """
        Arguments:
            anchors: list[BoxList], [image1-si-boxlist, image2-si-boxlist, ...]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W

        返回值是一个 list, len(result)=batch_size, 每个元素都是一个 BoxList 对象
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # objectness的shape是[N,A,H,W], 现在要把每个A*H*W的特征图拉成一个向量, 如果直接进行
        # reshape操作, 展开的顺序是从A那一维开始的, 所以先交换维度再reshape, 先把H*W的特征图
        # 拉成一个向量, 再把所有特征图拼接起来
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)

        # rpn 中要进行的是不关心类别的二分类任务(object/bg)
        # [N, H*W*A]
        objectness = objectness.sigmoid()

        # [N, H*W*A, 4]
        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        # 根据置信度选出前 k 个 anchors, k = pre_nms_top_n
        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True)

        # box_regression 中同样保留 topk 的anchors
        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]

        # boxList.bbox 返回对象中的 tensor, 将 batch 中所有图片的 anchors 拼接起来
        # boxList.bbox 是个二维的 tensor, 参考 anchor_generator.grid_anchors
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        # reshape 之后: [N, H*W*A, 4], 然后选出 topk
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(
            box_regression.view(-1, 4), concat_anchors.view(-1, 4)
        )

        proposals = proposals.view(N, -1, 4)

        result = []
        # 分别处理 batch 中的每一张图片
        for proposal, score, im_shape in zip(proposals, objectness, image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)

            # 将超出图片边界的 anchors 进行裁剪
            boxlist = boxlist.clip_to_image(remove_empty=False)
            # 将宽度或高度小于 min_size 的 anchors 移除
            boxlist = remove_small_boxes(boxlist, self.min_size)
            # nms
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
コード例 #12
0
ファイル: dota.py プロジェクト: Linyou/maskrcnn-benchmark
    def __getitem__(self, idx):

        imgid = self.imgids[idx]
        img = self.dotadev.loadImgs(imgid)[0]
        anns = self.dotadev.loadAnns(imgId=imgid)

        boxes = [obj["poly"] for obj in anns]
        boxes = torch.tensor(boxes)

        rc_boxes = _dots8ToRec4_(boxes)

        for i, bx in enumerate(boxes):
            arg_x = torch.argsort(bx, dim=0)
            x_min1 = bx[arg_x[0, 0]]
            x_min2 = bx[arg_x[1, 0]]
            x_max1 = bx[arg_x[2, 0]]
            x_max2 = bx[arg_x[3, 0]]

            if x_min2[0] == x_max2[0]:
                if x_min2[1] < x_max2[1]:
                    x_min2 = boxes[arg_x[2, 0]]
                    x_max2 = boxes[arg_x[3, 0]]

            # change the obb point to regular order.
            x_min = torch.cat([x_min1[None], x_min2[None]], dim=0)
            arg_y = torch.argsort(x_min, dim=0)
            point_1 = x_min[arg_y[0, 1]]
            point_4 = x_min[arg_y[1, 1]]

            if point_1[1] == point_4[1]:
                if point_1[0] < point_4[0]:
                    point_4 = x_min[arg_y[0, 1]]
                    point_1 = x_min[arg_y[1, 1]]

            x_max = torch.cat([x_max1[None], x_max2[None]], dim=0)
            arg_y = torch.argsort(x_max, dim=0)
            point_2 = x_max[arg_y[0, 1]]
            point_3 = x_max[arg_y[1, 1]]

            if point_2[1] == point_3[1]:
                if point_2[0] < point_3[0]:
                    point_3 = x_max[arg_y[0, 1]]
                    point_2 = x_max[arg_y[1, 1]]

            bx = torch.cat(
                [point_1[None], point_2[None], point_3[None], point_4[None]],
                dim=0)

            boxes[i] = bx

        boxes = boxes.view(boxes.size(0), -1)

        #
        areas = [obj["area"] for obj in anns]
        areas = torch.tensor(areas)

        target = BoxList(rc_boxes, img.size)
        rc_target = bb2(boxes, img.size)
        target.add_field('poly_bbox', rc_target)
        target.add_field('areas', areas)

        difficult = [int(obj["difficult"]) for obj in anns]
        difficult = torch.tensor(difficult)
        target.add_field('difficult', difficult)

        classes = [obj["name"] for obj in anns]
        classes = [self.classes_keys.index(c) for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        target = target.clip_to_image(remove_empty=True)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target, idx
コード例 #13
0
    def forward_for_single_feature_map(self, anchors, objectness, box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            得到N=图片数(batch),A=ratio数,H=该层特征图高,W=该层特征图宽

            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        # 在得到的目标特征图上扩充一维,该维度为特定特征图的某一个位置上anchor内是否有目标
        # 然后取消掉除FPN层数以外的所有维度,合并到一个维度上,将图片数,高,宽等信息压缩为一维
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        # 输出N张图,-1个待回归框,每个框需要1个得分值
        objectness = objectness.sigmoid()

        # 在得到的目标特征图上扩充一维,该维度为特定特征图的某一个位置上anchor的边框信息。
        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
        # 输出N张图,-1个待回归框,每个框需要4个回归值

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)  # 得到在训练过程中设置的每张图片选取的anchor数(在每个特征图上)

        # 得到前pre_nms_top_n个目标评分最高的anchor的目标评分以及该anchor在anchor列表中的索引
        objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True)

        # 初始化图片个数的索引
        batch_idx = torch.arange(N, device=device)[:, None]

        # 得到前pre_nms_top_n个目标评分最高的anchor的边框回归信息!!!
        box_regression = box_regression[batch_idx, topk_idx]

        # 获取图片尺寸信息
        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)

        # 得到pre_nms_top_n个目标评分最高的anchor信息!!!
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        # 利用anchor坐标和回归信息,得到proposal边框
        proposals = self.box_coder.decode(
            box_regression.view(-1, 4), concat_anchors.view(-1, 4)  # rpn输出的是'xyxy'格式的
        )  # 用实际xyxy坐标和回归值就能得到新的检测框

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness, image_shapes):
            # 将预测边框保存到BoxList,
            # 为每一个FPN层的每一张图的所有候选框建立一个BoxList
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)  # 将每个anchor的目标评分保存到BoxList
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result  # rpn输出的是'xyxy'格式的
コード例 #14
0
    def forward_for_single_feature_map(
            self, locations, box_cls,
            box_regression, centerness,
            image_sizes):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        N, C, H, W = box_cls.shape

        # put in the same format as locations
        box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1)
        box_cls = box_cls.reshape(N, -1, self.num_classes - 1).sigmoid()
        box_regression = box_regression.view(N, self.dense_points * 4, H, W).permute(0, 2, 3, 1)
        box_regression = box_regression.reshape(N, -1, 4)
        centerness = centerness.view(N, self.dense_points, H, W).permute(0, 2, 3, 1)
        centerness = centerness.reshape(N, -1).sigmoid()

        candidate_inds = box_cls > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        # multiply the classification scores with centerness scores
        box_cls = box_cls * centerness[:, :, None]

        results = []
        for i in range(N):
            per_box_cls = box_cls[i]
            per_candidate_inds = candidate_inds[i]
            per_box_cls = per_box_cls[per_candidate_inds]

            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1] + 1

            per_box_regression = box_regression[i]
            per_box_regression = per_box_regression[per_box_loc]
            per_locations = locations[per_box_loc]

            per_pre_nms_top_n = pre_nms_top_n[i]

            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)
                per_class = per_class[top_k_indices]
                per_box_regression = per_box_regression[top_k_indices]
                per_locations = per_locations[top_k_indices]

            detections = torch.stack([
                per_locations[:, 0] - per_box_regression[:, 0],
                per_locations[:, 1] - per_box_regression[:, 1],
                per_locations[:, 0] + per_box_regression[:, 2],
                per_locations[:, 1] + per_box_regression[:, 3],
            ], dim=1)

            h, w = image_sizes[i]
            boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
コード例 #15
0
def prepare_for_coco_detection_mstest(predictions, dataset):

    # pdb.set_trace()

    predictions_s = predictions[0]
    predictions_m = predictions[1]
    predictions_l = predictions[2]

    dataset_s = dataset[0]
    dataset_m = dataset[1]
    dataset_l = dataset[2]

    coco_results = []
    # one image.
    for image_id, predictions in enumerate(
            zip(predictions_s, predictions_m, predictions_l)):

        prediction_s = predictions[0]
        prediction_m = predictions[1]
        prediction_l = predictions[2]

        original_id = dataset_l.id_to_img_map[image_id]

        if len(predictions_l) == 0:
            continue

        img_info = dataset_l.get_img_info(image_id)
        image_width = img_info["width"]
        image_height = img_info["height"]
        img_id_json = img_info['id']

        # rescale predict bbox to original images size.
        prediction_s = prediction_s.resize((image_width, image_height))
        prediction_m = prediction_m.resize((image_width, image_height))
        prediction_l = prediction_l.resize((image_width, image_height))

        # get single-scale results from type BoxList.
        bbox_s = prediction_s.bbox
        score_s = prediction_s.get_field('scores').unsqueeze(1)
        label_s = prediction_s.get_field('labels').unsqueeze(1)

        bbox_m = prediction_m.bbox
        score_m = prediction_m.get_field('scores').unsqueeze(1)
        label_m = prediction_m.get_field('labels').unsqueeze(1)

        bbox_l = prediction_l.bbox
        score_l = prediction_l.get_field('scores').unsqueeze(1)
        label_l = prediction_l.get_field('labels').unsqueeze(1)

        # concat single-scale result and convert to type BoxList. (small, medium, large)
        min_size = 0
        w = prediction_l.size[0]
        h = prediction_l.size[1]

        detections = torch.from_numpy(np.row_stack(
            (bbox_s, bbox_m, bbox_l))).cuda()
        per_class = torch.from_numpy(np.row_stack(
            (label_s, label_m, label_l))).cuda()
        per_class = torch.squeeze(per_class, dim=1)
        per_box_cls = torch.from_numpy(
            np.row_stack((score_s, score_m, score_l))).cuda()
        per_box_cls = torch.squeeze(per_box_cls, dim=1)

        boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
        boxlist.add_field("labels", per_class)
        boxlist.add_field("scores", per_box_cls)
        boxlist = boxlist.clip_to_image(remove_empty=False)
        boxlist = remove_small_boxes(boxlist, min_size)

        # multi-scale results apply NMS. (small, medium, large)
        nms_method = cfg.TEST.MS_TEST_NMS
        nms_thresh = cfg.TEST.MS_TEST_NMS_THR

        num_classes = 81
        scores = boxlist.get_field("scores")
        labels = boxlist.get_field("labels")
        boxes = boxlist.bbox
        result = []

        # multi-scale test + NMS
        for j in range(1, num_classes):
            inds = (labels == j).nonzero().view(-1)
            scores_j = scores[inds]
            boxes_j = boxes[inds, :].view(-1, 4)
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)

            if nms_method == "nms":
                boxlist_for_class = boxlist_nms(boxlist_for_class,
                                                nms_thresh,
                                                score_field="scores")
            elif nms_method == "soft_nms":
                boxlist_for_class = boxlist_soft_nms(boxlist_for_class,
                                                     nms_thresh,
                                                     score_field="scores")
            else:
                print('the nms method is wrong')

            num_labels = len(boxlist_for_class)

            boxlist_for_class.add_field(
                "labels",
                torch.full((num_labels, ),
                           j,
                           dtype=torch.int64,
                           device=scores.device))

            result.append(boxlist_for_class)

        result = cat_boxlist(result)
        boxlist = result

        boxlist = boxlist.convert("xywh")
        boxes = boxlist.bbox.tolist()
        scores = boxlist.get_field("scores").tolist()
        labels = boxlist.get_field("labels").tolist()

        mapped_labels = [
            dataset_l.contiguous_category_id_to_json_id[int(i)] for i in labels
        ]

        coco_results.extend([{
            "image_id": original_id,
            "category_id": mapped_labels[k],
            "bbox": box,
            "score": scores[k],
        } for k, box in enumerate(boxes)])

    return coco_results
コード例 #16
0
    annotations = ET.parse(args.anno_path + lines[i] +'.xml').getroot()
    immage_info = preprocess_annotation(annotations)

    if (immage_info["boxes"].shape[0] == 0):
       #caso penso impossibile, cioè ground-truth image without bbox
       continue


    im_height, im_width = immage_info["im_info"]
    detections[i] = detections[i].resize((im_width, im_height))

    detections[i].bbox[:, 2:] += 1
    immage_info["boxes"][:, 2:] += 1

    iou_res = boxlist_iou(BoxList(detections[i].bbox.numpy(),(im_width, im_height)), BoxList(immage_info["boxes"].numpy(), (im_width, im_height))).numpy()

    gt_index = iou_res.argmax(axis=1)
    iou_with_gt = iou_res.max(axis=1)

    del iou_res


    for k in range(len(detections[i].extra_fields['labels'])):

        temp_dict = {}
        temp_dict[f"{i}_{k}"] = k
        temp_dict["label_p"] = classes[detections[i].extra_fields['labels'][k]]

        temp_dict["label_gt"] = immage_info["labels"][gt_index[k]]
        temp_dict["score"] = detections[i].extra_fields['scores'].numpy()[k]
コード例 #17
0
ファイル: visual_genome.py プロジェクト: zacharie12/SG
def load_graphs(roidb_file, split, num_im, num_val_im, filter_empty_rels,
                filter_non_overlap):
    """
    Load the file containing the GT boxes and relations, as well as the dataset split
    Parameters:
        roidb_file: HDF5
        split: (train, val, or test)
        num_im: Number of images we want
        num_val_im: Number of validation images
        filter_empty_rels: (will be filtered otherwise.)
        filter_non_overlap: If training, filter images that dont overlap.
    Return: 
        image_index: numpy array corresponding to the index of images we're using
        boxes: List where each element is a [num_gt, 4] array of ground 
                    truth boxes (x1, y1, x2, y2)
        gt_classes: List where each element is a [num_gt] array of classes
        relationships: List where each element is a [num_r, 3] array of 
                    (box_ind_1, box_ind_2, predicate) relationships
    """
    roi_h5 = h5py.File(roidb_file, 'r')
    data_split = roi_h5['split'][:]
    split_flag = 2 if split == 'test' else 0
    split_mask = data_split == split_flag

    # Filter out images without bounding boxes
    split_mask &= roi_h5['img_to_first_box'][:] >= 0
    if filter_empty_rels:
        split_mask &= roi_h5['img_to_first_rel'][:] >= 0

    image_index = np.where(split_mask)[0]
    if num_im > -1:
        image_index = image_index[:num_im]
    if num_val_im > 0:
        if split == 'val':
            image_index = image_index[:num_val_im]
        elif split == 'train':
            image_index = image_index[num_val_im:]

    split_mask = np.zeros_like(data_split).astype(bool)
    split_mask[image_index] = True

    # Get box information
    all_labels = roi_h5['labels'][:, 0]
    all_attributes = roi_h5['attributes'][:, :]
    all_boxes = roi_h5['boxes_{}'.format(BOX_SCALE)][:]  # cx,cy,w,h
    assert np.all(all_boxes[:, :2] >= 0)  # sanity check
    assert np.all(all_boxes[:, 2:] > 0)  # no empty box

    # convert from xc, yc, w, h to x1, y1, x2, y2
    all_boxes[:, :2] = all_boxes[:, :2] - all_boxes[:, 2:] / 2
    all_boxes[:, 2:] = all_boxes[:, :2] + all_boxes[:, 2:]

    im_to_first_box = roi_h5['img_to_first_box'][split_mask]
    im_to_last_box = roi_h5['img_to_last_box'][split_mask]
    im_to_first_rel = roi_h5['img_to_first_rel'][split_mask]
    im_to_last_rel = roi_h5['img_to_last_rel'][split_mask]

    # load relation labels
    _relations = roi_h5['relationships'][:]
    _relation_predicates = roi_h5['predicates'][:, 0]
    assert (im_to_first_rel.shape[0] == im_to_last_rel.shape[0])
    assert (_relations.shape[0] == _relation_predicates.shape[0]
            )  # sanity check

    # Get everything by image.
    boxes = []
    gt_classes = []
    gt_attributes = []
    relationships = []
    for i in range(len(image_index)):
        i_obj_start = im_to_first_box[i]
        i_obj_end = im_to_last_box[i]
        i_rel_start = im_to_first_rel[i]
        i_rel_end = im_to_last_rel[i]

        boxes_i = all_boxes[i_obj_start:i_obj_end + 1, :]
        gt_classes_i = all_labels[i_obj_start:i_obj_end + 1]
        gt_attributes_i = all_attributes[i_obj_start:i_obj_end + 1, :]

        if i_rel_start >= 0:
            predicates = _relation_predicates[i_rel_start:i_rel_end + 1]
            obj_idx = _relations[i_rel_start:i_rel_end +
                                 1] - i_obj_start  # range is [0, num_box)
            assert np.all(obj_idx >= 0)
            assert np.all(obj_idx < boxes_i.shape[0])
            rels = np.column_stack(
                (obj_idx,
                 predicates))  # (num_rel, 3), representing sub, obj, and pred
        else:
            assert not filter_empty_rels
            rels = np.zeros((0, 3), dtype=np.int32)

        if filter_non_overlap:
            assert split == 'train'
            # construct BoxList object to apply boxlist_iou method
            # give a useless (height=0, width=0)
            boxes_i_obj = BoxList(boxes_i, (1000, 1000), 'xyxy')
            inters = boxlist_iou(boxes_i_obj, boxes_i_obj)
            rel_overs = inters[rels[:, 0], rels[:, 1]]
            inc = np.where(rel_overs > 0.0)[0]

            if inc.size > 0:
                rels = rels[inc]
            else:
                split_mask[image_index[i]] = 0
                continue

        boxes.append(boxes_i)
        gt_classes.append(gt_classes_i)
        gt_attributes.append(gt_attributes_i)
        relationships.append(rels)

    return split_mask, boxes, gt_classes, gt_attributes, relationships
コード例 #18
0
    def forward(self,
                features_left,
                proposals_left,
                features_right=None,
                proposals_right=None,
                targets_left=None,
                targets_right=None,
                proposals_sampled=None):
        """
        Arguments:
            features (list[Tensor]): feature-maps from possibly several levels
            proposals (list[BoxList]): proposal boxes
            targets (list[BoxList], optional): the ground-truth targets.

        Returns:
            x (Tensor): the result of the feature extractor
            proposals (list[BoxList]): during training, the subsampled proposals
                are returned. During testing, the predicted boxlists are returned
            losses (dict[Tensor]): During training, returns the losses for the
                head. During testing, returns an empty dict.
        """

        # generate right from left(TODO: TEMP solution for inconsistent ground truth)
        # if not targets_left is None:
        #     targets_right = []
        #     for target in targets_left:
        #         target_right = target.copy_with_fields("labels").convert("xywh")
        #         disps = target.get_field("depths").convert("disp").depths
        #         target_right.bbox[:,0] -= disps
        #         targets_right.append(target_right.convert("xyxy"))

        if self.training:
            # Faster R-CNN subsamples during training the proposals with a fixed
            # positive / negative ratio
            if proposals_sampled is None:
                with torch.no_grad():
                    proposals_sampled_left, proposals_sampled_right = self.loss_evaluator.subsample(
                        proposals_left, proposals_right, targets_left,
                        targets_right)
            proposals_left, proposals_right = proposals_sampled_left, proposals_sampled_right

        # calculate proposals_union
        proposals_union = []
        # print(len(proposals_left[0]), len(proposals_right[0]))
        for tl, tr in zip(proposals_left, proposals_right):
            assert (tl.size == tr.size)
            bbox_left, bbox_right = tl.convert("xyxy").bbox, tr.convert(
                "xyxy").bbox
            # print(bbox_left, bbox_right)
            new_bbox = torch.stack([
                torch.min(bbox_left[:, 0], bbox_right[:, 0]),
                torch.min(bbox_left[:, 1], bbox_right[:, 1]),
                torch.max(bbox_left[:, 2], bbox_right[:, 2]),
                torch.max(bbox_left[:, 3], bbox_right[:, 3]),
            ],
                                   dim=1)
            # print(new_bbox)
            proposals_union.append(BoxList(new_bbox, tl.size, mode="xyxy"))

        # extract features that will be fed to the final classifier. The
        # feature_extractor generally corresponds to the pooler + heads
        fl = self.feature_extractor(features_left, proposals_union)
        fr = self.feature_extractor(features_right, proposals_union)
        x = torch.cat([fl, fr], dim=1)
        # final classifier that converts the features into predictions
        class_logits, box_regression_left, box_regression_right = self.predictor(
            x)

        if not self.training:
            # result_left = self.post_processor((class_logits, box_regression_left), proposals_union)
            # result_right = self.post_processor((class_logits, box_regression_right), proposals_union)
            result_left, result_right = self.post_processor(
                (class_logits, box_regression_left, box_regression_right),
                proposals_union)
            # resample
            # result_union = [boxlist_union(rl, rr) for rl,rr in zip(result_left, result_right)]
            # fl = self.feature_extractor(features_left, result_union)
            # fr = self.feature_extractor(features_right, result_union)
            # x = torch.cat([fl, fr], dim=1)
            return x, result_left, result_right, {}

        # TODO: loss is not needed for mean teacher when MT_ON
        if not self.cfg.MODEL.ROI_BOX_HEAD.FREEZE_WEIGHT:
            loss_classifier, loss_box_reg, loss_box_reg_right = self.loss_evaluator(
                [class_logits], [box_regression_left], [box_regression_right],
                proposals_union)

        # if self.cfg.MODEL.ROI_BOX_HEAD.OUTPUT_DECODED_PROPOSAL:
        #     bbox_reg_weights = self.cfg.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS
        #     box_coder = BoxCoder(weights=bbox_reg_weights)
        #     boxes_per_image = [len(box) for box in proposals]
        #     concat_boxes = torch.cat([a.bbox for a in proposals], dim=0)
        #     decoded_proposals = box_coder.decode(
        #         box_regression_left.view(sum(boxes_per_image), -1), concat_boxes
        #     )
        #     decoded_proposals = decoded_proposals.split(boxes_per_image, dim=0)
        #     # decoded_proposals = self.post_processor((class_logits, box_regression), proposals)
        #     # make sure there are valid proposals
        #     for i, boxes in enumerate(decoded_proposals):
        #         if len(boxes) > 0:
        #             proposals[i].bbox = boxes.reshape(-1, 4)

        loss_dict = dict()

        # if self.cfg.MODEL.MT_ON:
        #     loss_dict.update(class_logits=class_logits, box_logits=box_regression_left)
        # loss_dict.update(class_logits=x, box_logits=x)
        # proposals_sampled.add_field('class_logits', class_logits)
        # proposals_sampled.add_field('box_logits', box_regression)

        if not self.is_mt and not self.cfg.MODEL.ROI_BOX_HEAD.FREEZE_WEIGHT:
            loss_dict.update(
                dict(loss_classifier=loss_classifier,
                     loss_box_reg=loss_box_reg,
                     loss_box_reg_right=loss_box_reg_right))

        return x, proposals_left, proposals_right, loss_dict
コード例 #19
0
    def __getitem__(self, idx):
        img, anno = super(COCODataset, self).__getitem__(idx)

        img_original = img
        # img_original = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
        # img_original = img_original + 127.5
        # trans1 = torchvision.transforms.ToTensor()
        # img_original = trans1(img_original)
        # img_original[img_original - 127.5 < 30] = 0
        # img_original = img_original * 0.65
        # cv2.imwrite('d.jpg', img_original)
        # print('============')
        # pass

        # img, anno = overlay_GT_on_scan(img, anno, self.gtcloud, self.gtann, resolution=1000)

        # noiseoffset = (torch.randn(2))  # minimal bbox noise is better?
        # for ann in anno:
        #     noiseratio = ((torch.randn(1)).div_(20)).exp_().clamp(0.9, 1.1)
        #     noiserotate = torch.randn(1).clamp(-3, 3)
        #     label = ann["bbox"]
        #     orien = ann["rotation"]
        #     box = bBox_2D(label[3], label[2], label[0] + label[2] / 2, label[1] + label[3] / 2,
        #                   orien)  # bBox_2D: length, width, xc, yc,alpha       label: 'bbox': [box.xtl, box.ytl, box.width, box.length],
        #     box.rotate(noiserotate)
        #     box.resize(noiseratio)
        #     # box.translate(noiseoffset[0], noiseoffset[1])
        #     box.xcyc2topleft()
        #     ann["bbox"] = [box.xtl, box.ytl, box.width, box.length]
        #     # slightly stretch the box may be better viewed ?
        #     ann["rotation"] = box.alpha

        # filter crowd annotations
        # TODO might be better to add an extra field
        anno = [
            obj for obj in anno
        ]  # if obj["iscrowd"] == 0] ===============================================

        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        # print(boxes)
        target = BoxList(boxes, img.size, mode="xywh").convert(
            "xyxy")  # =====================================

        # print(target.bbox,'============================')

        classes = [obj["category_id"] for obj in anno]
        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        masks = [obj["segmentation"] for obj in anno]
        masks = SegmentationMask(masks, img.size)
        target.add_field("masks", masks)

        # ====================================
        rotations = [obj["rotation"] * math.pi / 180 for obj in anno]
        # print(rotations,'====')
        rotations = torch.tensor(rotations)
        # rotations = torch.stack((5 * torch.sin(rotations), 5 * torch.cos(rotations)))
        rotations = torch.stack((rotations, rotations))  # for testing
        # COMPLEX space   *5 is radius of unit circle or weight
        rotations = torch.transpose(rotations, dim0=0, dim1=-1)  # N*2 shape
        # print(rotations)
        target.add_field("rotations", rotations)

        # print(target.get_field('rotations'), '============ooo================')

        # print(target,'============================================')
        target = target.clip_to_image(remove_empty=False)
        # print(len(target), '==================targetanno=================')
        if self.transforms is not None:
            img, target = self.transforms(img, target)

        # print(img.size(),'=================%d=================='%idx)
        # print(target.get_field('rotations'), '============================')
        return img, target, idx, img_original
コード例 #20
0
def im_detect_bbox_aug(model, images, device):
    # Collect detections computed under different transformations
    boxlists_ts = []
    for _ in range(len(images)):
        boxlists_ts.append([])

    def add_preds_t(boxlists_t):
        for i, boxlist_t in enumerate(boxlists_t):
            if len(boxlists_ts[i]) == 0:
                # The first one is identity transform, no need to resize the boxlist
                boxlists_ts[i].append(boxlist_t)
            else:
                # Resize the boxlist as the first one
                boxlists_ts[i].append(boxlist_t.resize(boxlists_ts[i][0].size))

    # Compute detections for the original image (identity transform)
    boxlists_i = im_detect_bbox(model, images, cfg.INPUT.MIN_SIZE_TEST,
                                cfg.INPUT.MAX_SIZE_TEST, device)
    add_preds_t(boxlists_i)

    # Perform detection on the horizontally flipped image
    if cfg.TEST.BBOX_AUG.H_FLIP:
        boxlists_hf = im_detect_bbox_hflip(model, images,
                                           cfg.INPUT.MIN_SIZE_TEST,
                                           cfg.INPUT.MAX_SIZE_TEST, device)
        add_preds_t(boxlists_hf)

    # Compute detections at different scales
    for scale in cfg.TEST.BBOX_AUG.SCALES:
        max_size = cfg.TEST.BBOX_AUG.MAX_SIZE
        boxlists_scl = im_detect_bbox_scale(model, images, scale, max_size,
                                            device)
        add_preds_t(boxlists_scl)

        if cfg.TEST.BBOX_AUG.SCALE_H_FLIP:
            boxlists_scl_hf = im_detect_bbox_scale(model,
                                                   images,
                                                   scale,
                                                   max_size,
                                                   device,
                                                   hflip=True)
            add_preds_t(boxlists_scl_hf)

    # Merge boxlists detected by different bbox aug params
    boxlists = []
    for i, boxlist_ts in enumerate(boxlists_ts):
        bbox = torch.cat([boxlist_t.bbox for boxlist_t in boxlist_ts])
        scores = torch.cat(
            [boxlist_t.get_field('scores') for boxlist_t in boxlist_ts])
        boxlist = BoxList(bbox, boxlist_ts[0].size, boxlist_ts[0].mode)
        boxlist.add_field('scores', scores)
        boxlists.append(boxlist)

    # Apply NMS and limit the final detections
    results = []
    post_processor = make_roi_box_post_processor(cfg)
    for boxlist in boxlists:
        results.append(
            post_processor.filter_results(boxlist,
                                          cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES))

    return results
コード例 #21
0
def compute_predictions(cfg,
                        dataset,
                        model,
                        transforms,
                        icwt_21_objs=False,
                        compute_average_recall_RPN=False,
                        is_train=True,
                        result_dir=None,
                        evaluate_segmentation=True,
                        eval_segm_with_gt_bboxes=False):
    model.eval()
    num_img = len(dataset.ids)

    # Set the number of images that will be used to set minibootstrap parameters
    if hasattr(model, 'rpn'):
        model.rpn.cfg.NUM_IMAGES = num_img
    if hasattr(model, 'roi_heads'):
        model.roi_heads.box.cfg.NUM_IMAGES = num_img

    if compute_average_recall_RPN:
        average_recall_RPN = 0

    predictions = []

    for i in range(num_img):
        if type(dataset).__name__ is 'iCubWorldDataset':
            image, gt_bboxes_list, masks, gt_labels, img_sizes = compute_gts_icwt(
                dataset, i, icwt_21_objs)
        elif type(dataset).__name__ is 'YCBVideoDataset':
            image, gt_bboxes_list, masks, gt_labels, img_sizes = compute_gts_ycbv(
                dataset, i, evaluate_segmentation=evaluate_segmentation)

        # Save list of boxes as tensor
        gt_bbox_tensor = torch.tensor(gt_bboxes_list, device="cuda")
        gt_labels_torch = torch.tensor(gt_labels,
                                       device="cuda",
                                       dtype=torch.uint8).reshape(
                                           (len(gt_labels), 1))

        if len(masks) > 0:
            mask_lists = SegmentationMask(torch.cat(masks),
                                          img_sizes,
                                          mode='mask')

        # create box list containing the ground truth bounding boxes
        try:
            gt_bbox_boxlist = BoxList(gt_bbox_tensor,
                                      image_size=img_sizes,
                                      mode='xyxy')
            try:
                if evaluate_segmentation:
                    gt_bbox_boxlist.add_field("masks", mask_lists)
            except:
                pass
        except:
            gt_bbox_boxlist = BoxList(torch.empty((0, 4), device="cuda"),
                                      image_size=img_sizes,
                                      mode='xyxy')

        # apply pre-processing to image
        image = transforms(image)
        # convert to an ImageList
        image_list = to_image_list(image, 1)
        image_list = image_list.to("cuda")
        # compute predictions
        with torch.no_grad():
            AR, predicted_boxes = model(
                image_list,
                gt_bbox=gt_bbox_boxlist,
                gt_label=gt_labels_torch,
                img_size=img_sizes,
                compute_average_recall_RPN=compute_average_recall_RPN,
                gt_labels_list=gt_labels,
                is_train=is_train,
                result_dir=result_dir,
                evaluate_segmentation=evaluate_segmentation,
                eval_segm_with_gt_bboxes=eval_segm_with_gt_bboxes)
            if compute_average_recall_RPN:
                average_recall_RPN += AR
            predictions.append(predicted_boxes)

    if compute_average_recall_RPN:
        AR = average_recall_RPN / num_img
        print('Average Recall (AR):', AR)
        if result_dir:
            with open(os.path.join(result_dir, "result.txt"), "a") as fid:
                fid.write('Average Recall (AR): {} \n \n'.format(AR))

    if type(dataset).__name__ is 'iCubWorldDataset':
        extra_args = dict(
            box_only=False,
            iou_types=("bbox", ),
            expected_results=(),
            expected_results_sigma_tol=4,
            draw_preds=False,
            is_target_task=True,
            icwt_21_objs=icwt_21_objs,
            iou_thresholds=model.roi_heads.box.cfg.EVALUATION.IOU_THRESHOLDS,
            use_07_metric=model.roi_heads.box.cfg.EVALUATION.USE_VOC07_METRIC)
    elif type(dataset).__name__ is 'YCBVideoDataset':
        extra_args = dict(
            box_only=False,
            iou_types=("bbox", ),
            expected_results=(),
            expected_results_sigma_tol=4,
            draw_preds=False,
            evaluate_segmentation=evaluate_segmentation,
            iou_thresholds=model.roi_heads.box.cfg.EVALUATION.IOU_THRESHOLDS,
            use_07_metric=model.roi_heads.box.cfg.EVALUATION.USE_VOC07_METRIC)

    return evaluate(dataset=dataset,
                    predictions=predictions,
                    output_folder=result_dir,
                    **extra_args)
コード例 #22
0
ファイル: coco.py プロジェクト: Iamal1/maskrcnn-benchmark
    def __getitem__(self, idx):
        #     '''
        #     img is tensor now
        #     '''
        #     img_a, target_a, idx_a = self.get_one_item(idx)
        #     img_b, target_b, idx_b = self.get_one_item((idx+1) % len(self.ids))
        #     #merge them
        #     #merge img
        #     m = Beta(torch.tensor([1.5]), torch.tensor([1.5]))
        #     cof_a = m.sample()
        #     #cof_a = 0.5
        #     c,ha,wa = img_a.shape
        #     c,hb,wb = img_b.shape
        #     h,w = (max(ha,hb),max(wa,wb))
        #     img = img_a.new_zeros((c,h,w))
        #     img[:,:ha,:wa] = cof_a * img_a
        #     img[:,:hb,:wb] = (1-cof_a) * img_b

        #     #merge labels and masks
        #     boxes = torch.cat([target_a.bbox,target_b.bbox],dim=0)
        #     target = BoxList(boxes, (w,h), mode="xyxy")

        #     classes = torch.cat([target_a.get_field('labels'),target_b.get_field('labels')],dim=0)
        #     target.add_field("labels", classes)

        #     masks = target_a.get_field("masks").instances.polygons + target_b.get_field("masks").instances.polygons
        #     masks = SegmentationMask(masks, (w,h), mode='poly')
        #     target.add_field("masks", masks)

        #    # #add marks
        #    # marks = [1]*target_a.bbox.size(0) +  [0] * target_b.bbox.size(0)
        #    # target.add_field("marks", torch.tensor(marks))
        #     cofs = [cof_a]*target_a.bbox.size(0) +  [1-cof_a] * target_b.bbox.size(0)
        #     target.add_field('cofs',torch.tensor(cofs))

        #     return img, target, idx

        # def get_one_item(self, idx):
        img, anno = super(COCODataset, self).__getitem__(idx)

        # filter crowd annotations
        # TODO might be better to add an extra field
        anno = [obj for obj in anno if obj["iscrowd"] == 0]

        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")

        classes = [obj["category_id"] for obj in anno]
        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        masks = [obj["segmentation"] for obj in anno]
        masks = SegmentationMask(masks, img.size, mode='poly')
        target.add_field("masks", masks)

        if anno and "keypoints" in anno[0]:
            keypoints = [obj["keypoints"] for obj in anno]
            keypoints = PersonKeypoints(keypoints, img.size)
            target.add_field("keypoints", keypoints)

        target = target.clip_to_image(remove_empty=True)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target, idx
コード例 #23
0
    def forward_for_single_feature_map(self, anchors, box_cls, box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = box_cls.device
        N, _, H, W = box_cls.shape
        A = box_regression.size(1) // 4
        C = box_cls.size(1) // A

        # put in the same format as anchors
        box_cls = permute_and_flatten(box_cls, N, A, C, H, W)
        box_cls = box_cls.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        if self.imbalanced_decider is None:
            candidate_inds = box_cls > self.pre_nms_thresh
        else:
            candidate_inds = self.imbalanced_decider(box_cls)

        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        results = []
        for per_box_cls, per_box_regression, per_pre_nms_top_n, \
        per_candidate_inds, per_anchors in zip(
            box_cls,
            box_regression,
            pre_nms_top_n,
            candidate_inds,
            anchors):

            # Sort and select TopN
            # TODO most of this can be made out of the loop for
            # all images.
            # TODO:Yang: Not easy to do. Because the numbers of detections are
            # different in each image. Therefore, this part needs to be done
            # per image.
            per_box_cls = per_box_cls[per_candidate_inds]

            per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)

            per_candidate_nonzeros = \
                    per_candidate_inds.nonzero()[top_k_indices, :]

            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            per_class += 1

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
コード例 #24
0
    def forward_for_single_feature_map(self, locations, box_cls,
                                       box_regression, centerness,
                                       image_sizes):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        N, C, H, W = box_cls.shape

        # put in the same format as locations
        box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1)
        box_cls = box_cls.reshape(N, -1, C).sigmoid()
        box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1)
        box_regression = box_regression.reshape(N, -1, 4)

        candidate_inds = box_cls > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        # multiply the classification scores with centerness scores
        if centerness is not None:
            centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1)
            centerness = centerness.reshape(N, -1).sigmoid()
            box_cls = box_cls * centerness[:, :, None]

        if self.debug_vis_label:
            # box_prob_set.extend([box_cls, centerness, centerness[:,:,None]*box_prob_set[-1]])
            show_box_cls([box_cls, box_cls**2], N, H, W, C,
                         self.pre_nms_thresh)

        # K = 1
        # box_cls = box_cls.reshape(-1, C)
        # top, idim = torch.topk(box_cls, K, dim=-1)
        # box_cls[:] = 0
        # i0 = torch.zeros(idim.size()).long() + torch.arange(0, idim.size(0))[:, None]
        # box_cls[i0, idim] = top
        # box_cls = box_cls.reshape(N, -1, C)

        results = []
        for i in range(N):
            per_box_cls = box_cls[i]
            per_candidate_inds = candidate_inds[i]
            per_box_cls = per_box_cls[per_candidate_inds]

            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1] + 1

            per_box_regression = box_regression[i]
            per_box_regression = per_box_regression[per_box_loc]
            per_locations = locations[per_box_loc]

            per_pre_nms_top_n = pre_nms_top_n[i]

            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)
                per_class = per_class[top_k_indices]
                per_box_regression = per_box_regression[top_k_indices]
                per_locations = per_locations[top_k_indices]

            detections = torch.stack([
                per_locations[:, 0] - per_box_regression[:, 0],
                per_locations[:, 1] - per_box_regression[:, 1],
                per_locations[:, 0] + per_box_regression[:, 2],
                per_locations[:, 1] + per_box_regression[:, 3],
            ],
                                     dim=1)

            h, w = image_sizes[i]
            boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            if self.debug_vis_label:
                boxlist.add_field("det_locations", per_locations)  # add by hui
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
コード例 #25
0
def evaluate_box_proposals(
    predictions, dataset, thresholds=None, area="all", limit=None
):
    """Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official COCO API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        "all": 0,
        "small": 1,
        "medium": 2,
        "large": 3,
        "96-128": 4,
        "128-256": 5,
        "256-512": 6,
        "512-inf": 7,
    }
    area_ranges = [
        [0 ** 2, 1e5 ** 2],  # all
        [0 ** 2, 32 ** 2],  # small
        [32 ** 2, 96 ** 2],  # medium
        [96 ** 2, 1e5 ** 2],  # large
        [96 ** 2, 128 ** 2],  # 96-128
        [128 ** 2, 256 ** 2],  # 128-256
        [256 ** 2, 512 ** 2],  # 256-512
        [512 ** 2, 1e5 ** 2],
    ]  # 512-inf
    assert area in areas, "Unknown area range: {}".format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = []
    num_pos = 0

    for image_id, prediction in enumerate(predictions):
        original_id = dataset.id_to_img_map[image_id]

        # TODO replace with get_img_info?
        image_width = dataset.coco.imgs[original_id]["width"]
        image_height = dataset.coco.imgs[original_id]["height"]
        prediction = prediction.resize((image_width, image_height))

        # sort predictions in descending order
        # TODO maybe remove this and make it explicit in the documentation
        inds = prediction.get_field("objectness").sort(descending=True)[1]
        prediction = prediction[inds]

        ann_ids = dataset.coco.getAnnIds(imgIds=original_id)
        anno = dataset.coco.loadAnns(ann_ids)
        gt_boxes = [obj["bbox"] for obj in anno if obj["iscrowd"] == 0]
        gt_boxes = torch.as_tensor(gt_boxes).reshape(
            -1, 4
        )  # guard against no boxes
        gt_boxes = BoxList(
            gt_boxes, (image_width, image_height), mode="xywh"
        ).convert("xyxy")
        gt_areas = torch.as_tensor(
            [obj["area"] for obj in anno if obj["iscrowd"] == 0]
        )

        if len(gt_boxes) == 0:
            continue

        valid_gt_inds = (gt_areas >= area_range[0]) & (
            gt_areas <= area_range[1]
        )
        gt_boxes = gt_boxes[valid_gt_inds]

        num_pos += len(gt_boxes)

        if len(gt_boxes) == 0:
            continue

        if len(prediction) == 0:
            continue

        if limit is not None and len(prediction) > limit:
            prediction = prediction[:limit]

        overlaps = boxlist_iou(prediction, gt_boxes)

        _gt_overlaps = torch.zeros(len(gt_boxes))
        for j in range(min(len(prediction), len(gt_boxes))):
            # find which proposal box maximally covers each gt box
            # and get the iou amount of coverage for each gt box
            max_overlaps, argmax_overlaps = overlaps.max(dim=0)

            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ovr, gt_ind = max_overlaps.max(dim=0)
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1

        # append recorded iou coverage level
        gt_overlaps.append(_gt_overlaps)
    gt_overlaps = torch.cat(gt_overlaps, dim=0)
    gt_overlaps, _ = torch.sort(gt_overlaps)

    if thresholds is None:
        step = 0.05
        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
    recalls = torch.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {
        "ar": ar,
        "recalls": recalls,
        "thresholds": thresholds,
        "gt_overlaps": gt_overlaps,
        "num_pos": num_pos,
    }
コード例 #26
0
ファイル: icdar.py プロジェクト: EuphoriaYan/MaskTextSpotter
 def __getitem__(self, item):
     im_name = os.path.basename(self.image_lists[item])
     # print(self.image_lists[item])
     img = Image.open(self.image_lists[item]).convert("RGB")
     width, height = img.size
     if self.gts_dir is not None:
         gt_path = os.path.join(self.gts_dir, im_name + '.txt')
         if not os.path.isfile(gt_path):
             gt_path = os.path.join(self.gts_dir,
                                    'gt_' + im_name.split('.')[0] + '.txt')
         words, boxes, charsbbs, segmentations = self.load_gt_from_txt(
             gt_path, height, width)
         target = BoxList(boxes[:, :4],
                          img.size,
                          mode="xyxy",
                          use_char_ann=self.use_charann)
         classes = torch.ones(len(boxes))
         target.add_field("labels", classes)
         masks = SegmentationMask(segmentations, img.size)
         target.add_field("masks", masks)
         if words[0] == '':
             use_char_ann = False
         else:
             use_char_ann = True
         if not self.use_charann:
             use_char_ann = False
         char_masks = SegmentationCharMask(charsbbs,
                                           words=words,
                                           use_char_ann=use_char_ann,
                                           size=img.size)
         target.add_field("char_masks", char_masks)
     else:
         target = None
     if self.transforms is not None:
         img, target = self.transforms(img, target)
     if self.vis:
         new_im = img.numpy().copy().transpose(
             [1, 2, 0]) + [102.9801, 115.9465, 122.7717]
         new_im = Image.fromarray(new_im.astype(np.uint8)).convert('RGB')
         mask = target.extra_fields['masks'].polygons[0].convert('mask')
         mask = Image.fromarray(
             (mask.numpy() * 255).astype(np.uint8)).convert('RGB')
         if self.use_charann:
             m, _ = target.extra_fields['char_masks'].chars_boxes[
                 0].convert('char_mask')
             color = self.creat_color_map(37, 255)
             color_map = color[m.numpy().astype(np.uint8)]
             char = Image.fromarray(color_map.astype(
                 np.uint8)).convert('RGB')
             char = Image.blend(char, new_im, 0.5)
         else:
             char = new_im
         new = Image.blend(char, mask, 0.5)
         img_draw = ImageDraw.Draw(new)
         for box in target.bbox.numpy():
             box = list(box)
             box = box[:2] + [box[2], box[1]] + box[2:] + [box[0], box[3]
                                                           ] + box[:2]
             img_draw.line(box, fill=(255, 0, 0), width=2)
         new.save('./vis/char_' + im_name)
     return img, target, self.image_lists[item]
コード例 #27
0
    def inference(self,
                  colors_pred,
                  add_class_names=None,
                  save_path=None,
                  save_independently=None,
                  show_ground_truth=True):
        """
        Do Inference, either show the boxes or the masks
        """

        # load the config
        paths_catalog = import_file("maskrcnn_benchmark.config.paths_catalog",
                                    cfg.PATHS_CATALOG, True)
        DatasetCatalog = paths_catalog.DatasetCatalog
        test_datasets = DatasetCatalog.get(cfg.DATASETS.TEST[0])
        img_dir = test_datasets['args']['root']
        anno_file = test_datasets['args']['ann_file']
        data = json.load(open(anno_file))
        coco = COCO(anno_file)
        predis = []
        filenames = []

        # iterate through data
        for i, image in enumerate(data['images']):

            pil_img = Image.open(img_dir + '/' + image['file_name'])
            filenames.append(image['file_name'])
            img = np.array(pil_img)[:, :, [0, 1, 2]]

            # get ground truth boxes or masks
            anno = [
                obj for obj in data['annotations']
                if obj['image_id'] == image['id']
            ]
            classes = [
                obj['category_id'] for obj in data['annotations']
                if obj['image_id'] == image['id']
            ]
            json_category_id_to_contiguous_id = {
                v: i + 1
                for i, v in enumerate(coco.getCatIds())
            }
            classes = [json_category_id_to_contiguous_id[c] for c in classes]
            classes = torch.tensor(classes)
            boxes = [obj['bbox'] for obj in anno]
            boxes = torch.as_tensor(boxes).reshape(-1, 4)
            target = BoxList(boxes, pil_img.size, mode='xywh').convert('xyxy')
            target.add_field('labels', classes)
            masks = [obj["segmentation"] for obj in anno]
            masks = SegmentationMask(masks, img.size)
            target.add_field("masks", masks)
            target = target.clip_to_image(remove_empty=True)

            # these are the ground truth polygons
            polygons = []
            color_rgb = [[255, 101, 80], [255, 55, 55], [255, 255, 61],
                         [255, 128, 0]]
            colors = {
                i: [s / 255 for s in color]
                for i, color in enumerate(color_rgb)
            }
            color = [colors[i.item()] for i in classes]

            # ground truth boxes
            boxes = []

            polys = vars(target)['extra_fields']['masks']
            for polygon in polys:
                try:
                    tenso = vars(polygon)['polygons'][0]
                except KeyError:
                    continue

                poly1 = tenso.numpy()
                poly = poly1.reshape((int(len(poly1) / 2), 2))
                polygons.append(Polygon(poly))

            xywh_tar = target.convert("xywh")
            for box in vars(xywh_tar)['bbox'].numpy():

                rect = Rectangle((box[0], box[1]), box[2], box[3])
                boxes.append(rect)

            # compute predictions
            predictions = self.compute_prediction(img)
            predis.append(predictions)
            top_predictions = self.select_top_predictions(predictions)

            polygons_predicted, colors_prediction = self.overlay_mask(
                img, top_predictions, colors_pred, inference=True)
            #print(colors_prediction)

            fig = plt.figure()
            ax = fig.add_subplot(1, 1, 1)

            ax.imshow(Image.fromarray(img))
            ax.axis('off')

            # this is for ground thruth
            if show_ground_truth == True:
                p = PatchCollection(polygons,
                                    facecolor='none',
                                    linewidths=0,
                                    alpha=0.4)
                ax.add_collection(p)
                p = PatchCollection(polygons,
                                    facecolor='none',
                                    edgecolors=color,
                                    linewidths=2)
                ax.add_collection(p)

            # this is for prediction
            ppd = PatchCollection(polygons_predicted,
                                  facecolor='none',
                                  linewidths=0,
                                  alpha=0.4)
            ax.add_collection(ppd)
            ppd = PatchCollection(polygons_predicted,
                                  facecolor='none',
                                  edgecolors=colors_prediction,
                                  linewidths=2)
            ax.add_collection(ppd)

            plt.savefig(save_path + image['file_name'],
                        dpi=200,
                        bbox_inches='tight',
                        pad_inches=0)

            plt.show()

        dic = {}
        for i in range(len(filenames)):
            dic[filenames[i]] = predis[i]
        return dic
コード例 #28
0
    def filter_results(self, boxlist, num_classes):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        if cfg.ROTATE:
            boxes = boxlist.bbox.reshape(-1, num_classes * 8)
            scores = boxlist.get_field("scores").reshape(-1, num_classes)

            device = scores.device
            result = []
            # Apply threshold on detection probabilities and apply NMS
            # Skip j = 0, because it's the background class
            inds_all = scores > self.score_thresh
            for j in range(1, num_classes):
                inds = inds_all[:, j].nonzero().squeeze(1)
                scores_j = scores[inds, j]
                boxes_j = boxes[inds, j * 8:(j + 1) * 8]
                boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xy8")
                boxlist_for_class.add_field("scores", scores_j)
                boxlist_for_class = boxlist_rnms(boxlist_for_class, self.nms)
                num_labels = len(boxlist_for_class)
                boxlist_for_class.add_field(
                    "labels",
                    torch.full((num_labels, ),
                               j,
                               dtype=torch.int64,
                               device=device))
                result.append(boxlist_for_class)

            result = cat_boxlist(result)
            number_of_detections = len(result)

        else:
            boxes = boxlist.bbox.reshape(-1, num_classes * 4)
            scores = boxlist.get_field("scores").reshape(-1, num_classes)

            device = scores.device
            result = []
            # Apply threshold on detection probabilities and apply NMS
            # Skip j = 0, because it's the background class
            inds_all = scores > self.score_thresh
            for j in range(1, num_classes):
                inds = inds_all[:, j].nonzero().squeeze(1)
                scores_j = scores[inds, j]
                boxes_j = boxes[inds, j * 4:(j + 1) * 4]
                boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
                boxlist_for_class.add_field("scores", scores_j)
                boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms)
                num_labels = len(boxlist_for_class)
                boxlist_for_class.add_field(
                    "labels",
                    torch.full((num_labels, ),
                               j,
                               dtype=torch.int64,
                               device=device))
                result.append(boxlist_for_class)

            result = cat_boxlist(result)
            number_of_detections = len(result)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(),
                number_of_detections - self.detections_per_img + 1)
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
        return result
コード例 #29
0
def calc_detection_sysu_prec_rec(gt_boxlists, pred_boxlists, iou_thresh=0.5):
    """Calculate precision and recall based on evaluation code of PASCAL VOC.
    This function calculates precision and recall of
    predicted bounding boxes obtained from a dataset which has :math:`N`
    images.
    The code is based on the evaluation code used in PASCAL VOC Challenge.
   """
    n_pos = defaultdict(int)
    score = defaultdict(list)
    match = defaultdict(list)
    for gt_boxlist, pred_boxlist in zip(gt_boxlists, pred_boxlists):
        pred_bbox = pred_boxlist.bbox.numpy()
        pred_label = pred_boxlist.get_field("labels").numpy()
        pred_score = pred_boxlist.get_field("scores").numpy()
        gt_bbox = gt_boxlist.bbox.numpy()
        gt_label = gt_boxlist.get_field("labels").numpy()
        gt_difficult = gt_boxlist.get_field("difficult").numpy()

        for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
            #print ('For calc_detection_sysu_prec_rec 1 ...')
            #embed() ###

            pred_mask_l = pred_label == l
            pred_bbox_l = pred_bbox[pred_mask_l]
            pred_score_l = pred_score[pred_mask_l]
            # sort by score
            order = pred_score_l.argsort()[::-1]  # sorted
            pred_bbox_l = pred_bbox_l[order]
            pred_score_l = pred_score_l[order]

            gt_mask_l = gt_label == l
            gt_bbox_l = gt_bbox[gt_mask_l]
            gt_difficult_l = gt_difficult[gt_mask_l]

            n_pos[l] += np.logical_not(gt_difficult_l).sum()
            score[l].extend(pred_score_l)

            if len(pred_bbox_l) == 0:
                continue
            if len(gt_bbox_l) == 0:
                match[l].extend((0, ) * pred_bbox_l.shape[0])
                continue

            # VOC evaluation follows integer typed bounding boxes.
            pred_bbox_l = pred_bbox_l.copy()
            pred_bbox_l[:, 2:] += 1
            gt_bbox_l = gt_bbox_l.copy()
            gt_bbox_l[:, 2:] += 1
            iou = boxlist_iou(
                BoxList(pred_bbox_l, gt_boxlist.size),
                BoxList(gt_bbox_l, gt_boxlist.size),
            ).numpy()
            gt_index = iou.argmax(axis=1)
            # set -1 if there is no matching ground truth
            gt_index[iou.max(axis=1) < iou_thresh] = -1
            del iou

            selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
            for gt_idx in gt_index:
                if gt_idx >= 0:
                    if gt_difficult_l[gt_idx]:
                        match[l].append(-1)
                    else:
                        if not selec[gt_idx]:
                            match[l].append(1)
                        else:
                            match[l].append(0)
                    selec[gt_idx] = True
                else:
                    match[l].append(0)

    n_fg_class = max(n_pos.keys()) + 1
    prec = [None] * n_fg_class
    rec = [None] * n_fg_class

    for l in n_pos.keys():
        #print ('For calc_detection_sysu_prec_rec 2 ...')
        #embed()  ###

        score_l = np.array(score[l])
        match_l = np.array(match[l], dtype=np.int8)

        order = score_l.argsort()[::-1]
        match_l = match_l[order]

        tp = np.cumsum(match_l == 1)
        fp = np.cumsum(match_l == 0)

        # If an element of fp + tp is 0,
        # the corresponding element of prec[l] is nan.
        prec[l] = tp / (fp + tp)  # precision / accuracy
        # If n_pos[l] is 0, rec[l] is None.
        if n_pos[l] > 0:
            rec[l] = tp / n_pos[l]  # recall

    return prec, rec
コード例 #30
0
    def __getitem__(self, idx):
        img, anno, meta = self.__get_item__(idx)

        # filter crowd annotations

        anno = [obj for obj in anno if obj["iscrowd"] == 0]

        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")

        classes = [obj["category_id"] for obj in anno]
        # classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        polygons = [obj["segmentation"] for obj in anno]
        seg_mask_instance = SegmentationMask(polygons, img.size)
        target.add_field("masks", seg_mask_instance)

        masks = [
            _get_mask_from_polygon(polygon, img.size) for polygon in polygons
        ]
        N = len(masks)
        W, H = img.size
        if self.cfg["Pose"] or self.cfg["Vertex"]:
            meta = [obj["meta"] for obj in anno]
            centers = [m['center'] for m in meta]
            assert len(meta) == len(polygons)

            if self.cfg["Pose"]:
                poses = [obj["pose"] for obj in meta]
                target.add_field("poses", torch.tensor(poses))

            if self.cfg["Vertex"]:
                vertex_centers = np.zeros((N, 2, H, W))
                for ix, m in enumerate(masks):
                    center = centers[ix]
                    # pose = poses[ix]
                    # z = np.log(pose[-1]) # z distance is the last value in pose [qw,qx,qy,qz,x,y,z]
                    # m = _get_mask_from_polygon(poly, img.size)
                    vertex_centers[ix, :] = _generate_vertex_center_mask(
                        m, center)

                vertex_centers = torch.tensor(vertex_centers)
                vertexes = ObjectMask(vertex_centers, img.size)
                target.add_field("vertex", vertexes)

            centers = Keypoints([[c[0], c[1], 1] for c in centers],
                                img.size)  # set all kp to class of 1
            target.add_field("centers", centers)

        if self.cfg["Depth"]:
            depth_data = np.zeros((N, 1, H, W))
            if 'depth' in meta:
                depth = meta['depth']
                for ix, m in enumerate(masks):
                    depth_data[ix, :] = _generate_depth_mask(m, depth)
            depth_data = torch.tensor(depth_data)
            depth_D = ObjectMask(depth_data, img.size)
            target.add_field("depth", depth_D)

        target = target.clip_to_image(remove_empty=True)
        if self._transforms is not None:
            img, target = self._transforms(img, target)

        if "intrinsic_matrix" in meta:
            target.add_field("intrinsic_matrix", meta["intrinsic_matrix"])
        if self.cfg["Pose"]:
            target.add_field("symmetry", self.symmetry)
            target.add_field("extents", self.extents)
            target.add_field("points", self.points)

        return img, target, idx