Python nmsの例、utils.box_utils.nms Pythonの例

コード例 #1

0

ファイルを表示

    def forward(self, loc_data, conf_data, prior_data):
        loc_data = loc_data.cpu()
        conf_data = conf_data.cpu()
        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        conf_preds = conf_data.view(num, num_priors,
                                    self.num_classes).transpose(2, 1)
        # 对每一张图片进行处理
        for i in range(num):
            # 对先验框解码获得预测框
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            conf_scores = conf_preds[i].clone()

            for cl in range(1, self.num_classes):
                # 对每一类进行非极大抑制
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                if scores.size(0) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # 进行非极大抑制
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].unsqueeze(1),
                               boxes[ids[:count]]), 1)
        flt = output.contiguous().view(num, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        return output

コード例 #2

0

ファイルを表示

ファイル: ssd_layers.py プロジェクト: riskycheng/pytorch.ssd

    def forward(self, loc_data, conf_data, prior_data):
        #--------------------------------#
        #   先转换成cpu下运行
        #--------------------------------#
        loc_data = loc_data.cpu()
        conf_data = conf_data.cpu()

        #--------------------------------#
        #   num的值为batch_size
        #   num_priors为先验框的数量
        #--------------------------------#
        num = loc_data.size(0)
        num_priors = prior_data.size(0)

        output = torch.zeros(num, self.num_classes, self.top_k, 5)

        #--------------------------------------#
        #   对分类预测结果进行reshape
        #   num, num_classes, num_priors
        #--------------------------------------#
        conf_preds = conf_data.view(num, num_priors,
                                    self.num_classes).transpose(2, 1)

        # 对每一张图片进行处理正常预测的时候只有一张图片，所以只会循环一次
        for i in range(num):
            #--------------------------------------#
            #   对先验框解码获得预测框
            #   解码后，获得的结果的shape为
            #   num_priors, 4
            #--------------------------------------#
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            conf_scores = conf_preds[i].clone()

            #--------------------------------------#
            #   获得每一个类对应的分类结果
            #   num_priors,
            #--------------------------------------#
            for cl in range(1, self.num_classes):
                #--------------------------------------#
                #   首先利用门限进行判断
                #   然后取出满足门限的得分
                #--------------------------------------#
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                if scores.size(0) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                #--------------------------------------#
                #   将满足门限的预测框取出来
                #--------------------------------------#
                boxes = decoded_boxes[l_mask].view(-1, 4)
                #--------------------------------------#
                #   利用这些预测框进行非极大抑制
                #--------------------------------------#
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                output[i, cl, :count] = torch.cat(
                    (scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1)

        return output

コード例 #3

0

ファイルを表示

    def forward(self, loc_data, conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        self.output.zero_()
        if num == 1:
            # size batch x num_classes x num_priors
            conf_preds = conf_data.t().contiguous().unsqueeze(0)
        else:
            conf_preds = conf_data.view(num, num_priors,
                                        self.num_classes).transpose(2, 1)
            self.output.expand_(num, self.num_classes, self.top_k, 5)

        # Decode predictions into bboxes.
        for i in range(num):
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()

            for cl in range(1, self.num_classes):
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                if scores.dim() == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                # NMS
                if self.soft_nms == -1:
                    ids, count = nms(boxes, scores, self.nms_thresh,
                                     self.top_k)
                    self.output[i, cl, :count] = \
                        torch.cat((scores[ids[:count]].unsqueeze(1),
                                   boxes[ids[:count]]), 1)
                else:
                    count = boxes.size(
                        0) if boxes.size(0) < self.top_k else self.top_k
                    new_scores, new_boxes = soft_nms(boxes,
                                                     scores,
                                                     self.nms_thresh,
                                                     self.top_k,
                                                     type=self.soft_nms)
                    self.output[i, cl, :count] = torch.cat(
                        (new_scores.unsqueeze(1), new_boxes), 1)
        # flt = self.output.view(-1, 5)
        # _, idx = flt[:, 0].sort(0)
        # _, rank = idx.sort(0)
        # flt[(rank >= self.top_k).unsqueeze(1).expand_as(flt)].fill_(0)
        return self.output

コード例 #4

0

ファイルを表示

ファイル: predictor.py プロジェクト: LeenGadisseur/mobile-vod-bottleneck-lstm

    def predict(self, image, top_k=-1, prob_threshold=None):
        """Implement Predictor while testing of the model
		Arguments:
			image: image input for predictor
			prob_threshold: threshold for probability
			top_k: keep top_k results. If k <= 0, keep all the results.
		Returns:
			predicted boxes, labels and their probability
		"""
        cpu_device = torch.device("cpu")
        height, width, _ = image.shape
        image = self.transform(image)
        images = image.unsqueeze(0)
        images = images.to(self.device)
        with torch.no_grad():
            self.timer.start()
            scores, boxes = self.net.forward(images)
            print("Inference time: ", self.timer.end())
        boxes = boxes[0]
        scores = scores[0]
        if not prob_threshold:
            prob_threshold = self.filter_threshold
        # this version of nms is slower on GPU, so we move data to CPU.
        boxes = boxes.to(cpu_device)
        scores = scores.to(cpu_device)
        picked_box_probs = []
        picked_labels = []
        for class_index in range(1, scores.size(1)):
            probs = scores[:, class_index]
            mask = probs > prob_threshold
            probs = probs[mask]
            if probs.size(0) == 0:
                continue
            subset_boxes = boxes[mask, :]
            box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
            box_probs = box_utils.nms(box_probs,
                                      self.nms_method,
                                      score_threshold=prob_threshold,
                                      iou_threshold=self.iou_threshold,
                                      sigma=self.sigma,
                                      top_k=top_k,
                                      candidate_size=self.candidate_size)
            picked_box_probs.append(box_probs)
            picked_labels.extend([class_index] * box_probs.size(0))
        if not picked_box_probs:
            return torch.tensor([]), torch.tensor([]), torch.tensor([])
        picked_box_probs = torch.cat(picked_box_probs)
        picked_box_probs[:, 0] *= width
        picked_box_probs[:, 1] *= height
        picked_box_probs[:, 2] *= width
        picked_box_probs[:, 3] *= height
        return picked_box_probs[:, :4], torch.tensor(
            picked_labels), picked_box_probs[:, 4]

コード例 #5

0

ファイルを表示

ファイル: ssd_layers.py プロジェクト: cxqzjhz/pytorch-ssd-cam

    def forward(ctx, loc_data, conf_data, prior_data):
        # # loc_data preds torch.Size([1, 8732, 4])
        # # conf_data  # torch.Size([1, 8732, 3])
        # # prior_data torch.Size([8732, 4])
        loc_data = loc_data.cpu()
        conf_data = conf_data.cpu()
        num = loc_data.size(0)  # batch size 1
        num_priors = prior_data.size(0)  # 8732
        output = torch.zeros(num, Detect.num_classes, Detect.top_k,
                             5)  # torch.Size([1, 3, 200, 5])
        conf_preds = conf_data.view(num,
                                    num_priors, Detect.num_classes).transpose(
                                        2, 1)  # torch.Size([1, 3, 8732])
        # 对每一张图片进行处理
        for i in range(num):
            # 对先验框解码获得预测框
            decoded_boxes = decode(loc_data[i], prior_data,
                                   Detect.variance)  # torch.Size([8732, 4])
            conf_scores = conf_preds[i].clone()  # torch.Size([3, 8732])

            for cl in range(1, Detect.num_classes):  # 遍历1到2,因为0代表背景
                # 对每一类进行非极大抑制
                c_mask = conf_scores[cl].gt(
                    Detect.conf_thresh)  # 获取正样本的索引  torch.Size([8732])
                scores = conf_scores[cl][
                    c_mask]  # 获取所有正样本的置信度分数  torch.Size([11])
                if scores.size(0) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(
                    decoded_boxes)  # torch.Size([8732, 4])
                boxes = decoded_boxes[l_mask].view(
                    -1, 4)  # torch.Size([11, 4]) 获取所有正样本的边框
                # 进行非极大抑制
                ids, count = nms(boxes, scores, Detect.nms_thresh,
                                 Detect.top_k)
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].unsqueeze(1),
                               boxes[ids[:count]]), 1)
        flt = output.contiguous().view(num, -1, 5)  # 这几行代码注释掉之后程序仍然能够正确运行
        _, idx = flt[:, :, 0].sort(1, descending=True)  # 这几行代码注释掉之后程序仍然能够正确运行
        _, rank = idx.sort(1)  # 这几行代码注释掉之后程序仍然能够正确运行
        flt[(rank < Detect.top_k).unsqueeze(-1).expand_as(flt)].fill_(
            0)  # 这几行代码注释掉之后程序仍然能够正确运行
        # 注意这里的操作并不会影响output,因为flt[mask].fill_(0)不会影响output
        return output  # torch.Size([1, 3, 200, 5])  1置信度+4位置信息

コード例 #6

0

ファイルを表示

ファイル: ssd_layers.py プロジェクト: banniford/Microbial-identification

    def forward(self, loc_data, conf_data, prior_data):
        if Config['nms_thresh'] <= 0:
            raise ValueError('nms_threshold must be non negative.')
        loc_data = loc_data.cpu()
        conf_data = conf_data.cpu()
        # 图片数量 预测一般一张
        num = loc_data.size(0)  # batch size 一张图片
        # 先验框数量 8732
        num_priors = prior_data.size(0)
        # 存放输出(1,类别，200)
        output = torch.zeros(num, Config['num_classes'], Config["top_k"], 5)

        # 分类预测结果转换（1，8732，种类）torch.transpose(input, dim0, dim1, out=None) → Tensor 返回输入矩阵input的转置。交换维度dim0和dim1。 输出张量与输入张量共享内存，所以改变其中一个会导致另外一个也被修改。
        conf_preds = conf_data.view(num, num_priors,
                                    Config['num_classes']).transpose(2, 1)
        # 对每一张图片进行处理
        for i in range(num):
            # 对先验框解码获得预测框
            decoded_boxes = decode(loc_data[i], prior_data, Config['variance'])
            # 取出某一图片所有先验框种类
            conf_scores = conf_preds[i].clone()

            for cl in range(1, Config['num_classes']):
                # 对每一类进行非极大抑制
                # gt(a,b) 相当于 a > b conf_thresh阈值0.01 返回（True,False）
                c_mask = conf_scores[cl].gt(Config["conf_thresh"])
                # 两组合并去除false对应数据数据
                scores = conf_scores[cl][c_mask]
                if scores.size(0) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # 进行非极大抑制
                ids, count = nms(boxes, scores, Config['nms_thresh'],
                                 Config["top_k"])
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].unsqueeze(1),
                               boxes[ids[:count]]), 1)
        # 进行排序
        flt = output.contiguous().view(num, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        # 取出top_K框返回
        flt[(rank < Config["top_k"]).unsqueeze(-1).expand_as(flt)].fill_(0)
        return output

コード例 #7

0

ファイルを表示

ファイル: predictor.py プロジェクト: sinianyutian/ssdlite_mobilenet_v2

 def predict(self, image, top_k=-1, prob_threshold=None):
     cpu_device = torch.device("cpu")
     height, width, _ = image.shape
     image = self.transform(image)
     images = image.unsqueeze(0)
     images = images.to(self.device)
     with torch.no_grad():
         self.timer.start()
         scores, boxes = self.net.forward(images)
     boxes = boxes[0]
     scores = scores[0]
     if not prob_threshold:
         prob_threshold = self.filter_threshold
     # this version of nms is slower on GPU, so we move data to CPU.
     boxes = boxes.to(cpu_device)
     scores = scores.to(cpu_device)
     picked_box_probs = []
     picked_labels = []
     for class_index in range(1, scores.size(1)):
         probs = scores[:, class_index]
         mask = probs > prob_threshold
         probs = probs[mask]
         if probs.size(0) == 0:
             continue
         subset_boxes = boxes[mask, :]
         box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
         box_probs = box_utils.nms(box_probs,
                                   self.nms_method,
                                   score_threshold=prob_threshold,
                                   iou_threshold=self.iou_threshold,
                                   sigma=self.sigma,
                                   top_k=top_k,
                                   candidate_size=self.candidate_size)
         picked_box_probs.append(box_probs)
         picked_labels.extend([class_index] * box_probs.size(0))
     if not picked_box_probs:
         return torch.tensor([]), torch.tensor([]), torch.tensor([])
     picked_box_probs = torch.cat(picked_box_probs)
     picked_box_probs[:, 0] *= width
     picked_box_probs[:, 1] *= height
     picked_box_probs[:, 2] *= width
     picked_box_probs[:, 3] *= height
     return picked_box_probs[:, :4], torch.tensor(
         picked_labels), picked_box_probs[:, 4]

コード例 #8

0

ファイルを表示

    def forward(self, loc_p, class_p, priors):
        batch_size = loc_p.size(0)
        num_priors = priors.size(0)
        output = torch.zeros(batch_size, self.num_classes, self.top_k, 5)
        class_p = class_p.transpose(1, 2)

        for idx in range(batch_size):
            decoded_boxes = decode(loc_p[idx], priors, self.variance)
            for c in range(1, self.num_classes):
                c_mask = (class_p[idx][c] > self.conf_thresh)
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                scores = class_p[idx][c][c_mask]
                boxes = decoded_boxes[l_mask].view(-1, 4)
                if len(scores) == 0:
                    continue
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                output[idx, c, :count] = torch.cat(
                    (scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1)
        return output

コード例 #9

0

ファイルを表示

    def forward(self, loc_data, conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        conf_preds = conf_data.view(num, num_priors,
                                    self.num_classes).transpose(2, 1)

        # Decode predictions into bboxes.
        for i in range(num):
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()

            for cl in range(1, self.num_classes):
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                if scores.size(0) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].unsqueeze(1),
                               boxes[ids[:count]]), 1)
        flt = output.reshape(num, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        # output[batch_index][class_index] = list of (score, box), sort by score in descending order
        # output batch_num * num_classes * top_k * 5 (5 means the list of score, x_min, y_min, x_max and y_max)
        # top_k = 200 by default. if number of valid boxes is less than that, still keep top-k elements(zero filled);
        #                         otherwise, only keep top-k elements.
        return output

コード例 #10

0

ファイルを表示

ファイル: inatt_models_ptl.py プロジェクト: alejodosr/adaptive-inattention

    def compute_nms(self, scores, boxes, original_size):
        height, width, _ = original_size
        boxes = boxes.cpu().detach()
        # height = height.item()
        # width = width.item()
        # original_size = (width, height)

        # Filtering by confidence threshold?
        prob_threshold = 0.01

        # Compute prediction with NMS
        picked_box_probs = []
        picked_labels = []
        for class_index in range(1, scores.size(1)):
            probs = scores[:, class_index]
            mask = probs > prob_threshold
            probs = probs[mask]
            if probs.size(0) == 0:
                continue
            subset_boxes = boxes[mask, :]
            box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
            box_probs = nms(box_probs,
                            "hard",
                            score_threshold=prob_threshold,
                            iou_threshold=self.config.iou_threshold,
                            sigma=0.5,
                            top_k=-1,
                            candidate_size=200)
            picked_box_probs.append(box_probs)
            picked_labels.extend([class_index] * box_probs.size(0))
        if not picked_box_probs:
            return torch.tensor([]), torch.tensor([]), torch.tensor([])
        else:
            picked_box_probs = torch.cat(picked_box_probs)
            picked_box_probs[:, 0] *= width
            picked_box_probs[:, 1] *= height
            picked_box_probs[:, 2] *= width
            picked_box_probs[:, 3] *= height
            return picked_box_probs[:, :4], torch.tensor(
                picked_labels), picked_box_probs[:, 4]

コード例 #11

0

ファイルを表示

    def forward(self, loc_data, conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        conf_preds = conf_data.view(num, num_priors,
                                    self.num_classes).transpose(2, 1)

        # Decode predictions into bboxes.
        for i in range(num):
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()

            for cl in range(1, self.num_classes):
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                if scores.size(0) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].unsqueeze(1),
                               boxes[ids[:count]]), 1)
        flt = output.contiguous().view(num, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        return output

コード例 #12

0

ファイルを表示

        # ignore low scores
        inds = np.where(scores > args.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:args.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
        keep = nms(dets, args.nms_threshold)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:args.keep_top_k, :]
        landms = landms[:args.keep_top_k, :]

        dets = np.concatenate((dets, landms), axis=1)

        for b in dets:
            if b[4] < args.vis_thres:
                continue
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)

コード例 #13

0

ファイルを表示

ファイル: test.py プロジェクト: MaxKuznets0v/AI

    # ignore low scores
    inds = np.where(scores > cfg["conf_threshold"])[0]
    boxes = boxes[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:cfg["top_k"]]
    boxes = boxes[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep_ind, keep_count = nms(torch.from_numpy(boxes.astype(np.float32)),
                               torch.from_numpy(scores.astype(np.float32)),
                               cfg["nms_threshold"], cfg['keep_top_k'])
    dets = dets[keep_ind[:keep_count], :]

    if len(dets.shape) == 1:
        dets = dets[np.newaxis, :]  # Adding second dim (if only 1 det)

    # keep top-K faster NMS
    dets = dets[:cfg["keep_top_k"], :]

    # Show image and saving image results
    with open(save_file, 'a') as f:
        f.write(str(img_path) + ":\n")
    for b in dets:
        if b[4] < cfg['min_for_visual']:
            continue

コード例 #14

0

ファイルを表示

ファイル: inatt_models_ptl.py プロジェクト: alejodosr/adaptive-inattention

    def predict(self, image, full_processing=True, inter_tensor=None):

        height, width, _ = image.shape

        if inter_tensor is None:
            image = self.prediction_transform(image, resize=full_processing)
            images = image.unsqueeze(0)
            images = images.to(self.device)

        with torch.no_grad():
            if inter_tensor is not None:
                scores, boxes = self.forward(None,
                                             full_processing=full_processing,
                                             inter_tensor=inter_tensor)
            else:
                scores, boxes = self.forward(images,
                                             full_processing=full_processing)

        if not full_processing:
            return torch.tensor([]), torch.tensor([]), torch.tensor([]), 0

        # Compute metrics
        boxes = boxes[0]
        scores = scores[0]

        # Move to cpu?
        boxes = boxes.to(torch.device("cpu"))
        scores = scores.to(torch.device("cpu"))

        # Filtering by confidence threshold?
        prob_threshold = 0.01

        # Compute prediction with NMS
        picked_box_probs = []
        picked_labels = []
        for class_index in range(1, scores.size(1)):
            probs = scores[:, class_index]
            mask = probs > prob_threshold
            probs = probs[mask]
            if probs.size(0) == 0:
                continue
            subset_boxes = boxes[mask, :]
            box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
            box_probs = nms(box_probs,
                            "hard",
                            score_threshold=prob_threshold,
                            iou_threshold=self.config.iou_threshold,
                            sigma=0.5,
                            top_k=-1,
                            candidate_size=200)
            picked_box_probs.append(box_probs)
            picked_labels.extend([class_index] * box_probs.size(0))
        if not picked_box_probs:

            return torch.tensor([]), torch.tensor([]), torch.tensor([]), 0
        else:
            picked_box_probs = torch.cat(picked_box_probs)
            picked_box_probs[:, 0] *= width
            picked_box_probs[:, 1] *= height
            picked_box_probs[:, 2] *= width
            picked_box_probs[:, 3] *= height

            return picked_box_probs[:, :4], torch.tensor(
                picked_labels), picked_box_probs[:, 4], 0

コード例 #15

0

ファイルを表示

ファイル: inatt_models_ptl.py プロジェクト: alejodosr/adaptive-inattention

    def validation_step(self, batch, batch_nb):
        if self.hparams.net == 'lstm':
            # Detach hidden states from graph
            self.detach_hidden()

            if not int(
                    self.pred_dec.bottleneck_lstm1.hidden_state.shape[0]
            ) == 1 or not int(
                    self.pred_dec.bottleneck_lstm1.cell_state.shape[0]) == 1:
                # Adjust hidden state due to batch size
                (h, c) = self.pred_dec.bottleneck_lstm1.cell.init_hidden(
                    1,
                    hidden=self.pred_dec.bottleneck_lstm1.hidden_channels,
                    shape=(10, 10))
                self.pred_dec.bottleneck_lstm1.hidden_state = h
                self.pred_dec.bottleneck_lstm1.cell_state = c

        # OPTIONAL
        images, boxes_batch, labels_batch, original_size = batch  # gt

        scores, boxes = self.forward(images)
        regression_loss, classification_loss = self.loss_criterion(
            self.pred_dec.confidences, self.pred_dec.locations, labels_batch,
            boxes_batch)
        loss = regression_loss + classification_loss
        self.accum_val_loss += loss.item()

        # Apply inverse transform
        boxes = boxes[0]
        scores = scores[0]
        image, _, _ = self.inverse_val_transform(images[0], None, None)

        # height, width, _ = image.shape
        height, width, _ = original_size
        height = height.item()
        width = width.item()
        original_size = (width, height)

        # Filtering by confidence threshold?
        prob_threshold = 0.01

        # Compute prediction with NMS
        picked_box_probs = []
        picked_labels = []
        for class_index in range(1, scores.size(1)):
            probs = scores[:, class_index]
            mask = probs > prob_threshold
            probs = probs[mask]
            if probs.size(0) == 0:
                continue
            subset_boxes = boxes[mask, :]
            box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
            box_probs = nms(box_probs,
                            "hard",
                            score_threshold=prob_threshold,
                            iou_threshold=self.config.iou_threshold,
                            sigma=0.5,
                            top_k=-1,
                            candidate_size=200)
            picked_box_probs.append(box_probs)
            picked_labels.extend([class_index] * box_probs.size(0))
        if not picked_box_probs:
            boxes, labels, probs = torch.tensor([]), torch.tensor(
                []), torch.tensor([])
        else:
            picked_box_probs = torch.cat(picked_box_probs)
            picked_box_probs[:, 0] *= width
            picked_box_probs[:, 1] *= height
            picked_box_probs[:, 2] *= width
            picked_box_probs[:, 3] *= height
            boxes, labels, probs = picked_box_probs[:, :4], torch.tensor(
                picked_labels), picked_box_probs[:, 4]

        if self.plot_image:
            img_draw = image.copy()
            img_draw = cv2.resize(img_draw, original_size)

            for j, box in enumerate(boxes):
                if probs[j].item() > 0.01:  # Threshold
                    x1 = int(box[0].cpu().item())
                    y1 = int(box[1].cpu().item())
                    x2 = int(box[2].cpu().item())
                    y2 = int(box[3].cpu().item())
                    cv2.rectangle(img_draw, (x1, y1), (x2, y2), (36, 255, 12),
                                  2)
                    cv2.putText(
                        img_draw, self.val_dataset._classes_names[labels[j]] +
                        " " + str(probs[j].cpu().item()), (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
            cv2.imshow("img", img_draw)
            key = cv2.waitKey(0)
            if key == 27:  # if ESC is pressed, exit loop
                self.plot_image = False
                cv2.destroyAllWindows()

        # After prediction
        indexes = torch.ones(labels.size(0), 1,
                             dtype=torch.float32) * self.val_index
        self.val_index += 1

        tmprslt = torch.cat(
            [
                indexes.reshape(-1, 1).to(self.device),
                labels.reshape(-1, 1).float().to(self.device),
                probs.reshape(-1, 1).to(self.device),
                (boxes + 1.0).to(self.device)  # matlab's indexes start from 1
            ],
            dim=1)
        if tmprslt.shape[0] > 0:
            self.results.append(tmprslt)

        tensorboard_logs = {'val_loss': loss}

        return {'val_loss': loss, 'log': tensorboard_logs}

コード例 #16

0

ファイルを表示

ファイル: detect.py プロジェクト: MaxKuznets0v/AI

def detect_faces(img_path, save_path=None):
    print("Starting detection...")
    # Loading pretrained model
    net = FaceDetectionSSD("test", cfg['img_dim'], cfg['num_classes'])
    net.load_state_dict(torch.load(cfg['pretrained_model']))
    net.eval()
    cudnn.benchmark = True
    device = torch.device("cuda:0" if cfg['gpu_train'] else "cpu")
    net.to(device)
    rgb_mean = (104, 117, 123)  # BGR order

    # Processing image
    init_im = cv2.imread(img_path, cv2.IMREAD_COLOR)
    if init_im is None:
        raise RuntimeError("Image does not exist!")

    image = np.float32(init_im)
    im_height, im_width, _ = image.shape
    scale = torch.Tensor(
        [image.shape[1], image.shape[0], image.shape[1], image.shape[0]])
    image -= rgb_mean
    image = image.transpose(2, 0, 1)
    image = torch.from_numpy(image).unsqueeze(0)
    image = image.to(device)
    scale = scale.to(device)

    loc, conf = net(image)
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

    # ignore low scores
    inds = np.where(scores > cfg["conf_threshold"])[0]
    boxes = boxes[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:cfg["top_k"]]
    boxes = boxes[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep_ind, keep_count = nms(torch.from_numpy(boxes.astype(np.float32)),
                               torch.from_numpy(scores.astype(np.float32)),
                               cfg["nms_threshold"], cfg['keep_top_k'])
    dets = dets[keep_ind[:keep_count], :]

    # keep top-K faster NMS
    try:
        dets = dets[:cfg["keep_top_k"], :]
    except:
        dets = dets.reshape((1, dets.size))[:cfg["keep_top_k"], :]

    face_count = 0
    # Show image and saving image results
    for b in dets:
        if b[4] < cfg['min_for_visual']:
            continue
        face_count += 1
        text = "{:.2f}%".format(b[4] * 100)
        b = list(b)
        for i in range(4):
            b[i] = int(b[i])
        cv2.rectangle(init_im, (b[0], b[1]), (b[2], b[3]), (0, 255, 255), 2)
        cx = b[0]
        cy = b[1] - 5
        cv2.putText(init_im, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5,
                    (255, 255, 255))
    if save_path is not None:
        cv2.imwrite(save_path, init_im)
    print("Detection completed!")
    print(f"Found {face_count} faces!")
    return init_im