Esempio n. 1
0
    def get_ground_truth_predictors(self, ground_truth, label_map, im=None):
        i = 0    #indexes the anchor boxes
        j = 0    
                
        total_boxes_per_gt = sum(self.anchor_nums)
        num_ground_truth_in_im = ground_truth.shape[0]
        inds = np.zeros((num_ground_truth_in_im, total_boxes_per_gt), dtype = np.int)
        
        #n index the the detection maps
        for n, anchor in enumerate(self.anchor_nums):
            offset =  sum(self.num_pred_boxes[:n])
            try:
                center_cells = (ground_truth[:,[0,1]]) // self.strides[n]
            except:
                print(ground_truth)
                assert False
            
            a = offset + self.anchor_nums[n]*(self.inp_dim//self.strides[n]*center_cells[:,1] + center_cells[:,0])
            inds[:,sum(self.anchor_nums[:n])] = a
            
            for x in range(1, self.anchor_nums[n]):
                inds[:,sum(self.anchor_nums[:n]) + x] = a + x 
      
            i += anchor
            j += self.num_pred_boxes[n]
        
        candidate_boxes = label_map[inds][:,:,:4]
        candidate_boxes = center_to_corner(candidate_boxes)
        ground_truth_boxes = center_to_corner(ground_truth.copy()[np.newaxis]).squeeze(0)[:,:4]
        candidate_boxes = candidate_boxes.transpose(0,2,1)
        ground_truth_boxes = ground_truth_boxes[:,:,np.newaxis]
        candidate_ious = bbox_iou(candidate_boxes, ground_truth_boxes, lib="numpy")
        prediction_boxes = np.zeros((num_ground_truth_in_im,1), dtype=np.int)

        for i in range(num_ground_truth_in_im):
            #get the the row and the column of the highest IoU
            max_iou_ind = np.argmax(candidate_ious)
            max_iou_row = max_iou_ind // total_boxes_per_gt
            max_iou_col = max_iou_ind % total_boxes_per_gt
            
            #get the index (in label map) of the box with maximum IoU
            max_iou_box = inds[max_iou_row, max_iou_col]
            
            #assign the bounding box to the appropriate gt
            prediction_boxes[max_iou_row] = max_iou_box
            
            #zero out all the IoUs for this box so it can't be reassigned to any other gt
            box_mask = (inds != max_iou_ind).reshape(-1,len(self.anchors))
            candidate_ious *= box_mask
            
            #zero out all the values of the row representing gt that just got assigned so that it 
            #doesn't participate in the process again
            candidate_ious[max_iou_row] *= 0
        
        return (prediction_boxes)
Esempio n. 2
0
 def get_no_obj_candidates(self, ground_truth, label_map, ground_truth_predictors):
             
     total_boxes_per_gt = sum(self.anchor_nums)
     num_ground_truth_in_im = ground_truth.shape[0]
     inds = np.zeros((num_ground_truth_in_im, total_boxes_per_gt), dtype = np.int)
     inds = np.arange(sum(self.num_pred_boxes)).astype(int)
     inds = inds[np.newaxis].repeat(num_ground_truth_in_im, axis = 0) 
     candidate_boxes = label_map[inds][:,:,:4]
     candidate_boxes = center_to_corner(candidate_boxes)
     ground_truth_boxes = center_to_corner(ground_truth.copy()[np.newaxis]).squeeze(0)[:,:4]
     candidate_boxes = candidate_boxes.transpose(0,2,1)
     ground_truth_boxes = ground_truth_boxes[:,:,np.newaxis]
     candidate_ious = bbox_iou(candidate_boxes, ground_truth_boxes, lib = "numpy")
     candidate_ious[:, ground_truth_predictors] = 1
     
     max_ious_per_box = np.max(candidate_ious, 0)
     no_obj_cands = (np.nonzero(max_ious_per_box < 0.5)[0].astype(int))
     
     return no_obj_cands
Esempio n. 3
0
def do_nms(boxes, nms_thresh):
    if len(boxes) > 0:
        nb_class = len(boxes[0].classes)
    else:
        return

    for c in range(nb_class):
        sorted_indices = np.argsort([-box.classes[c] for box in boxes])

        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]

            if boxes[index_i].classes[c] == 0: continue

            for j in range(i + 1, len(sorted_indices)):
                index_j = sorted_indices[j]

                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
                    boxes[index_j].classes[c] = 0
Esempio n. 4
0
def write_results(predictions,
                  confidence,
                  num_class,
                  nms=True,
                  nms_thresh=0.4):
    # 保留预测结果中置信度大于给定阈值的部分
    # confidence: shape=(1,10647, 85)
    # mask: shape=(1,10647) => 增加一维度之后 (1, 10647, 1)
    mask = (predictions[:, :, 4] > confidence).float().unsqueeze(2)
    predictions = predictions * mask  # 小于置信度的条目值全为0, 剩下部分不变

    # 如果没有检测任何有效目标,返回值为0
    ind_nz = torch.nonzero(predictions[:, :, 4].squeeze()).squeeze()
    if ind_nz.size(0) == 0:
        return 0
    # predictions = predictions[:, ind_nz, :]

    # try:
    #     # ind_nz: shape=(14,2)=>(2,14) 结果每一项是 非零数据所在行,所在列
    #     ind_nz = torch.nonzero(predictions[:, :, 4]).transpose(0, 1).contiguous()
    # except:
    #     return 0 # 没有任何有效预测输出
    '''
    保留预测结果中置信度大于阈值的bbox
    下面开始为nms准备
    '''

    # prediction的前五个数据分别表示 (Cx, Cy, w, h, score)
    bbox = predictions.new(predictions.shape)
    bbox[:, :, 0] = (predictions[:, :, 0] - predictions[:, :, 2] / 2
                     )  # x1 = Cx - w/2
    bbox[:, :, 1] = (predictions[:, :, 1] - predictions[:, :, 3] / 2
                     )  # y1 = Cy - h/2
    bbox[:, :, 2] = (predictions[:, :, 0] + predictions[:, :, 2] / 2
                     )  # x2 = Cx + w/2
    bbox[:, :, 3] = (predictions[:, :, 1] + predictions[:, :, 3] / 2
                     )  # y2 = Cy + h/2
    predictions[:, :, :4] = bbox[:, :, :4]  # 计算后的新坐标复制回去

    batch_size = predictions.size(0)  # dim=0
    # output = predictions.new(1, predictions.size(2)+1) # shape=(1,85+1)

    write = False  # 拼接结果到output中最后返回
    for ind in range(batch_size):
        # 选择此batch中第ind个图像的预测结果
        prediction = predictions[ind]
        ind_nz = torch.nonzero(prediction[:, 4].squeeze()).squeeze()
        if ind_nz.size(0) == 0:
            continue
        prediction = prediction[ind_nz, :]
        # print(prediction.shape) # shape=(10647->14, 85)

        # 最大值, 最大值索引, 按照dim=1 方向计算
        max_score, max_score_ind = torch.max(prediction[:, 5:],
                                             1)  # prediction[:, 5:]表示每一分类的分数
        # 维度扩展
        # max_score: shape=(10647->14) => (10647->14,1)
        max_score = max_score.float().unsqueeze(1)
        max_score_ind = max_score_ind.float().unsqueeze(1)
        seq = (prediction[:, :5], max_score, max_score_ind)  # 取前五
        prediction = torch.cat(seq, 1)  # shape=(10647, 5+1+1=7)
        # print(prediction.shape)

        # 获取当前图像检测结果中出现的所有类别
        try:
            image_classes = unique(prediction[:, -1])  # tensor, shape=(n)
        except:
            continue

        # 执行classwise nms
        for cls in image_classes:
            # 分离检测结果中属于当前类的数据
            # -1: cls_index, -2: score
            class_mask = (prediction[:, -1] == cls)  # shape=(n)
            class_mask_ind = torch.nonzero(
                class_mask).squeeze()  # shape=(n,1) => (n)

            # prediction_: shape(n,7)
            prediction_class = prediction[class_mask_ind].view(
                -1, 7)  # 从prediction中取出属于cls类别的所有结果,为下一步的nms的输入
            ''' 到此步 prediction_class 已经存在了我们需要进行非极大值抑制的数据 '''
            # 开始 nms
            # 按照score排序, 由大到小
            # 最大值最上面
            score_sort_ind = torch.sort(
                prediction_class[:,
                                 4], descending=True)[1]  # [0] 排序结果, [1]排序索引
            prediction_class = prediction_class[score_sort_ind]
            cnt = prediction_class.size(0)  # 个数
            '''开始执行 "非极大值抑制" 操作'''
            if nms:
                for i in range(cnt):
                    # 对已经有序的结果,每次开始更新后索引加一,挨个与后面的结果比较
                    try:
                        ious = bbox_iou(prediction_class[i].unsqueeze(0),
                                        prediction_class[i + 1:])
                    except ValueError:
                        break
                    except IndexError:
                        break

                    # 计算出需要移除的item
                    iou_mask = (ious < nms_thresh).float().unsqueeze(1)
                    prediction_class[i + 1:] *= iou_mask  # 保留i自身
                    # 开始移除
                    non_zero_ind = torch.nonzero(prediction_class[:,
                                                                  4].squeeze())
                    prediction_class = prediction_class[non_zero_ind].view(
                        -1, 7)

                    # iou_mask = (ious < nms_thresh).float() # shape=(n)
                    # non_zero_ind = torch.nonzero(iou_mask).squeeze()+1 # 会为空,导致出错
                    # prediction_class = prediction_class[non_zero_ind].view(-1, 7)

            # 当前类的nms执行完之后,保存结果
            batch_ind = prediction_class.new(prediction_class.size(0),
                                             1).fill_(ind)
            seq = batch_ind, prediction_class

            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))

    return output
Esempio n. 5
0
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4):
    """
    将输出结果根据目标分数阈值和非最大值抑制来获得true检测结果
    :param prediction: 预测张量包含B x 10647个边界框的信息
    :param confidence: 置信度
    :param num_classes: 类别数量
    :param nms: 是否有nms操作
    :param nms_conf:NMS IoU阈值
    :return:D x 8的张量。D是所有图像的true检测,每个检测由一行表示。
            每个检测有8个属性,即检测的图像在所属批次中的索引,4个角坐标,目标分数,最大置信度类别的分数以及该类别的索引。
    """
    # 预测张量包含B x 10647个边界框的信息.对于每个具有低于阈值的目标分数的边界框,将它的每个属性(边界框的整个行)的值设置为零
    conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2)
    prediction = prediction * conf_mask

    try:
        ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0,
                                                              1).contiguous()
    except:
        return 0

    # 现在具有的边界框属性由中心坐标以及边界框的高度和宽度描述。但是,使用每个框的一对角点的坐标来计算两个框的IoU更容易。
    # 将框的(中心x,中心y,高度,宽度)属性转换为(左上角x,左上角y,右下角x,右下角y)
    box_corner = prediction.new(prediction.shape)
    box_corner[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
    box_corner[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
    box_corner[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
    box_corner[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
    prediction[:, :, :4] = box_corner[:, :, :4]
    """
    每幅图像中的true检测结果的数量可能不同。
    例如,批量大小为3,图像1,2和3分别具有5个,2个和4个true检测结果。
    因此,一次只能对一张图像进行置信度阈值和NMS。这意味着,我们不能向量化所涉及的操作
    ,并且必须在prediction的第一维(包含批量中的图像索引)上进行循环。
    """

    batch_size = prediction.size(0)
    image_pred = prediction.new(1, prediction.size(2) + 1)

    # write标志用于指示我们尚未初始化output,我们将使用张量来保存整个批量的true检测结果。
    write = False

    for ind in range(batch_size):
        # 从批次中选择图片
        image_pred = prediction[ind]

        # 每个边界框行有85个属性,其中80个是类别分数。此时,我们只关心具有最大值的类别分数。
        # 从每一行中删除80个类别的分数,并添加具有最大值的类别的索引,以及该类别的类别分数。
        max_conf, max_conf_score = torch.max(
            image_pred[:, 5:(5 + num_classes)], 1)
        max_conf = max_conf.float().unsqueeze(1)
        max_conf_score = max_conf_score.float().unsqueeze(1)

        seq = (image_pred[:, :5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)

        # 我们已经将具有小于阈值的目标置信度的边界框行设置为零.现在让我们清除它们
        non_zero_ind = (torch.nonzero(image_pred[:, 4]))

        image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)

        # 处理一个图像中检测到的类
        try:
            img_classes = unique(image_pred_[:, -1])
        except:
            continue

        # 对每一个检测类进行NMS
        for cls in img_classes:
            # 获取一个特定类的检测结果
            cls_mask = image_pred_ * (image_pred_[:, -1]
                                      == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()
            image_pred_class = image_pred_[class_mask_ind].view(-1, 7)

            # 对检测进行结果按大小排序,信息分数是最重要的
            conf_sort_index = torch.sort(image_pred_class[:, 4],
                                         descending=True)[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)

            # 传进来的参数nms为True
            if nms:
                # 1个边界框对其他所有的边界框进行计算IOU,执行NMS
                for i in range(idx):
                    # 获取该次循环中所有我们正则查看boxes的IOUs
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0),
                                        image_pred_class[i + 1:])
                    except ValueError:
                        break

                    except IndexError:
                        break

                    # 每次迭代,任何具有索引大于i的的边界框,
                    # 若其IoU大于阈值nms_thresh(具有由i索引的框),则该边界框将被去除。
                    iou_mask = (ious < nms_conf).float().unsqueeze(1)
                    image_pred_class[i + 1:] *= iou_mask

                    # Remove the non-zero entries
                    non_zero_ind = torch.nonzero(
                        image_pred_class[:, 4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind].view(
                        -1, 7)
            """
            和以前一样,除非我们有一个检测分配给它,否则我们不会初始化输出张量。
            一旦它被初始化,我们把后续的检测与它连接。我们使用write标志来指示张量
            是否已经初始化。在遍历类的循环结束时,我们将检测结果添加到张量output中。
            """
            batch_ind = image_pred_class.new(image_pred_class.size(0),
                                             1).fill_(ind)
            seq = batch_ind, image_pred_class
            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))
        # 没有物标种类就输出0
        try:
            return output
        except:
            return 0
Esempio n. 6
0
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4):
    conf_mask = (prediction[:, :, 4] > confidence).float().float().unsqueeze(2)   # 1, 10674 ,1
    prediction = prediction * conf_mask   # 1,10647,85

    try:
        ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()  #2,n n为满足条件的个数
    except:
        return 0
    # xmin ymin xmax ymax
    box_a = prediction.new(prediction.shape)
    box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2]/2)
    box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3]/2)
    box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2]/2)
    box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3]/2)
    prediction[:, :, :4] = box_a[:, :, :4]

    batch_size = prediction.size(0)   # batch no.

    output = prediction.new(1, prediction.size(2) + 1)  # 1,86   5 + 80 + 1
    write = False
    num = 0
    for ind in range(batch_size):
        # select the image from the batch
        image_pred = prediction[ind]   # 10674, 85

        # Get the class having maximum score, and the index of that class
        # Get rid of num_classes softmax scores
        # Add the class index and the class score of class having maximum score
        max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes], 1) #10647
        max_conf = max_conf.float().unsqueeze(1)  # 10647,1
        max_conf_score = max_conf_score.float().unsqueeze(1) #10674, 1
        seq = (image_pred[:, :5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)   # 10674, 7  (xmin, xmax,ymin,ymax, conf, class_max_conf, class_max_conf_idx)

        # Get rid of the zero entries
        non_zero_ind =  (torch.nonzero(image_pred[:, 4]))  # n, 1  conf不为0的行坐标, n为目标个数

        image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) # 筛选出满足条件的目标  n,7

        # Get the various classes detected in the image
        try:
            img_classes = unique(image_pred_[:, -1])  # class idx 检测到的类别个数列表
        except:
            continue

        # WE will do NMS classwise
        # print(img_classes)
        for cls in img_classes:
            # if cls != 0: #0 is the person
            #     continue
            # get the detections with one particular class
            cls_mask = image_pred_*(image_pred_[:, -1] == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()

            image_pred_class = image_pred_[class_mask_ind].view(-1,7)

            # sort the detections such that the entry with the maximum objectness
            # confidence is at the top
            conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)

            # if nms has to be done
            if nms:
                # For each detection
                for i in range(idx):
                    # Get the IOUs of all boxes that come after the one we are looking at
                    # in the loop
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
                    except ValueError:
                        break
        
                    except IndexError:
                        break
                    
                    # Zero out all the detections that have IoU > treshhold
                    iou_mask = (ious < nms_conf).float().unsqueeze(1)
                    image_pred_class[i+1:] *= iou_mask       
                    
                    # Remove the non-zero entries
                    non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind].view(-1,7)

            # if nms has to be done
            # if nms:
            #     # Perform non-maximum suppression
            #     max_detections = []
            #     while image_pred_class.size(0):
            #         # Get detection with highest confidence and save as max detection
            #         max_detections.append(image_pred_class[0].unsqueeze(0))
            #         # Stop if we're at the last detection
            #         if len(image_pred_class) == 1:
            #             break
            #         # Get the IOUs for all boxes with lower confidence
            #         ious = bbox_iou(max_detections[-1], image_pred_class[1:])
            #         # Remove detections with IoU >= NMS threshold
            #         image_pred_class = image_pred_class[1:][ious < nms_conf]
            #     image_pred_class = torch.cat(max_detections).data


            # Concatenate the batch_id of the image to the detection
            # this helps us identify which image does the detection correspond to
            # We use a linear straucture to hold ALL the detections from the batch
            # the batch_dim is flattened
            # batch is identified by extra batch column

            batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
            seq = batch_ind, image_pred_class
            if not write:
                output = torch.cat(seq,1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output,out))
            num += 1
    
    if not num:
        return 0

    return output
Esempio n. 7
0
    def __getitem__(self, idx):
        # get image input size, change every 10 batches
        net_h, net_w = self._get_net_size(idx)
        base_grid_h, base_grid_w = net_h // self.downsample, net_w // self.downsample

        # determine the first and the last indices of the batch
        l_bound = idx * self.batch_size
        r_bound = (idx + 1) * self.batch_size

        if r_bound > len(self.instances):
            r_bound = len(self.instances)
            l_bound = r_bound - self.batch_size

        x_batch = np.zeros(
            (r_bound - l_bound, net_h, net_w, 3))  # input images
        t_batch = np.zeros((r_bound - l_bound, 1, 1, 1, self.max_box_per_image,
                            4))  # list of groundtruth boxes

        # initialize the inputs and the outputs
        yolo_1 = np.zeros(
            (r_bound - l_bound, 1 * base_grid_h, 1 * base_grid_w,
             len(self.anchors) // 3,
             4 + 1 + len(self.labels)))  # desired network output 1
        yolo_2 = np.zeros(
            (r_bound - l_bound, 2 * base_grid_h, 2 * base_grid_w,
             len(self.anchors) // 3,
             4 + 1 + len(self.labels)))  # desired network output 2
        yolo_3 = np.zeros(
            (r_bound - l_bound, 4 * base_grid_h, 4 * base_grid_w,
             len(self.anchors) // 3,
             4 + 1 + len(self.labels)))  # desired network output 3
        yolos = [yolo_3, yolo_2, yolo_1]

        dummy_yolo_1 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_2 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_3 = np.zeros((r_bound - l_bound, 1))

        instance_count = 0
        true_box_index = 0

        # do the logic to fill in the inputs and the output
        for train_instance in self.instances[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self._aug_image(train_instance, net_h, net_w)

            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None
                max_index = -1
                max_iou = -1

                shifted_box = BoundBox(0, 0, obj['xmax'] - obj['xmin'],
                                       obj['ymax'] - obj['ymin'])

                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index = i
                        max_iou = iou

                        # determine the yolo to be responsible for this bounding box
                yolo = yolos[max_index // 3]
                grid_h, grid_w = yolo.shape[1:3]

                # determine the position of the bounding box on the grid
                center_x = .5 * (obj['xmin'] + obj['xmax'])
                center_x = center_x / float(net_w) * grid_w  # sigma(t_x) + c_x
                center_y = .5 * (obj['ymin'] + obj['ymax'])
                center_y = center_y / float(net_h) * grid_h  # sigma(t_y) + c_y

                # determine the sizes of the bounding box
                w = np.log((obj['xmax'] - obj['xmin']) /
                           float(max_anchor.xmax))  # t_w
                h = np.log((obj['ymax'] - obj['ymin']) /
                           float(max_anchor.ymax))  # t_h

                box = [center_x, center_y, w, h]

                # determine the index of the label
                obj_indx = self.labels.index(obj['name'])

                # determine the location of the cell responsible for this object
                grid_x = int(np.floor(center_x))
                grid_y = int(np.floor(center_y))

                # assign ground truth x, y, w, h, confidence and class probs to y_batch
                yolo[instance_count, grid_y, grid_x, max_index % 3] = 0
                yolo[instance_count, grid_y, grid_x, max_index % 3, 0:4] = box
                yolo[instance_count, grid_y, grid_x, max_index % 3, 4] = 1.
                yolo[instance_count, grid_y, grid_x, max_index % 3,
                     5 + obj_indx] = 1

                # assign the true box to t_batch
                true_box = [
                    center_x, center_y, obj['xmax'] - obj['xmin'],
                    obj['ymax'] - obj['ymin']
                ]
                t_batch[instance_count, 0, 0, 0, true_box_index] = true_box

                true_box_index += 1
                true_box_index = true_box_index % self.max_box_per_image

                # assign input image to x_batch
            if self.norm != None:
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    cv2.rectangle(img, (obj['xmin'], obj['ymin']),
                                  (obj['xmax'], obj['ymax']), (255, 0, 0), 3)
                    cv2.putText(img, obj['name'],
                                (obj['xmin'] + 2, obj['ymin'] + 12), 0,
                                1.2e-3 * img.shape[0], (0, 255, 0), 2)

                x_batch[instance_count] = img

            # increase instance counter in the current batch
            instance_count += 1

        return [x_batch, t_batch, yolo_1, yolo_2,
                yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
Esempio n. 8
0
    def parse_targets(self, targets, anchors, grid_w, grid_h, threshold):
        # Initalize variables
        batch_size = targets.size(0)
        mask = torch.zeros(batch_size,
                           self.num_anchors,
                           grid_w,
                           grid_h,
                           requires_grad=False)
        noobj_mask = torch.ones(batch_size,
                                self.num_anchors,
                                grid_w,
                                grid_h,
                                requires_grad=False)
        t_x = torch.zeros(batch_size,
                          self.num_anchors,
                          grid_w,
                          grid_h,
                          requires_grad=False)
        t_y = torch.zeros(batch_size,
                          self.num_anchors,
                          grid_w,
                          grid_h,
                          requires_grad=False)
        t_w = torch.zeros(batch_size,
                          self.num_anchors,
                          grid_w,
                          grid_h,
                          requires_grad=False)
        t_h = torch.zeros(batch_size,
                          self.num_anchors,
                          grid_w,
                          grid_h,
                          requires_grad=False)
        t_conf = torch.zeros(batch_size,
                             self.num_anchors,
                             grid_w,
                             grid_h,
                             requires_grad=False)
        t_class = torch.zeros(batch_size,
                              self.num_anchors,
                              grid_w,
                              grid_h,
                              self.num_classes,
                              requires_grad=False)

        # Calculate values
        for b in range(batch_size):
            for t in range(targets.shape[1]):
                if targets[b, t].sum() == 0:
                    continue

                #Convert positions to make them relative to box
                g_x = targets[b, t, 1] * grid_w
                g_y = targets[b, t, 2] * grid_h
                g_w = targets[b, t, 3] * grid_w
                g_h = targets[b, t, 4] * grid_h

                # Get the indices of the grid box
                g_i = int(g_x)
                g_j = int(g_y)

                # Get shape of ground truth box
                ground_truth_box = torch.FloatTensor(np.array([0, 0, g_w, g_h
                                                               ])).unsqueeze(0)

                # Get shape of anchor box
                anchor_shapes = torch.FloatTensor(
                    np.concatenate((np.zeros(
                        (self.num_anchors, 2)), np.array(anchors)), 1))

                # Calculate the IoU between gt and anchor shapes
                anchor_ious = bbox_iou(ground_truth_box, anchor_shapes)

                # Set mask to zero where the overlap is larger than the threshold
                noobj_mask[b, anchor_ious > threshold, g_j, g_i] = 0

                # Find the best matching anchor box
                n_best = np.argmax(anchor_ious)

                # TODO: add 1st dimension b back in
                mask[b, n_best, g_j, g_i] = 1
                t_x[b, n_best, g_j, g_i] = g_x - g_i
                t_y[b, n_best, g_j, g_i] = g_y - g_j
                t_w[b, n_best, g_j,
                    g_i] = math.log(g_w / anchors[n_best][0] + 1e-16)
                t_h[b, n_best, g_j,
                    g_i] = math.log(g_h / anchors[n_best][1] + 1e-16)
                t_conf[b, n_best, g_j, g_i] = 1
                t_class[b, n_best, g_j, g_i, int(targets[b, t, 0])] = 1

        return mask, noobj_mask, t_x, t_y, t_w, t_h, t_conf, t_class
Esempio n. 9
0
def eval_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4):
    conf_mask = (prediction[:, :, 4] > confidence).unsqueeze(2)
    prediction = prediction * conf_mask

    box_a = prediction.new(prediction.shape)
    box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
    box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
    box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
    box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
    prediction[:, :, :4] = box_a[:, :, :4]

    batch_size = prediction.size(0)
    write = False

    for ind in range(batch_size):
        image_pred = prediction[ind]

        max_conf, max_conf_index = torch.max(image_pred[:, 5:5 + num_classes], 1)
        max_conf = max_conf.unsqueeze(1)
        max_conf_index = max_conf_index.unsqueeze(1)
        seq = (image_pred[:, :5], max_conf, max_conf_index)
        image_pred = torch.cat(seq, 1)

        non_zero_ind = torch.nonzero(image_pred[:, 4])
        if len(non_zero_ind)  == 0:
            continue
        image_pred_ = image_pred[non_zero_ind.squeeze(), :]
        img_classes = torch.unique(image_pred_[:, -1], True)

        for cls in img_classes:
            cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()

            image_pred_class = image_pred_[class_mask_ind]

            conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)

            if nms:
                for i in range(idx):
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:])
                    except ValueError:
                        break

                    except IndexError:
                        break

                    iou_mask = (ious < nms_conf).float().unsqueeze(1)
                    image_pred_class[i + 1:] *= iou_mask

                    non_zero_ind = torch.nonzero(image_pred_class[:, 4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind].view(-1, 7)

            batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
            seq = batch_ind, image_pred_class

            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))

    return output.cpu().data.numpy()
Esempio n. 10
0
def write_results(prediction,
                  confidence,
                  num_classes,
                  model_dim,
                  orig_dim,
                  nms=True,
                  nms_conf=0.7):
    """
    Arguments
    ---------
    prediction : tensor (3D)
        [batch, image_id, [x_center, y_center, width, height, objectness_score, class_score1, class_score2, ...]]

    Returns
    --------
    output : tensor (2D)
        [image_id, [batch_index, x_1, y_1, x_2, y_2, objectness_score, class_index, class_probability]]
    """

    # Initialize to no output
    output = -1

    # Technically, this should always be 1
    batch_size = prediction.size(0)

    # Get rid of 1st dim
    orig_dim = orig_dim.squeeze(0)

    # If the entire batch contains 0 for objectness score, skip
    try:
        torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous()
    except:
        return -1

    # Keep track of if output has been compiled yet (for concatenation)
    write = False

    for ind in range(batch_size):
        pred = prediction[ind]

        if pred.shape[0] > 0:
            # Get x1y1x2y2
            pred = center_to_corner_2d(pred)

            # #Get the class having maximum score, and the index of that class
            # #Get rid of num_classes softmax scores
            # #Add the class index and the class score of class having maximum score
            max_conf_score, max_conf = torch.max(pred[:, 5:5 + num_classes], 1)
            max_conf = max_conf.float().unsqueeze(1)
            max_conf_score = max_conf_score.float().unsqueeze(1)
            seq = (pred[:, :5], max_conf, max_conf_score)
            image_pred = torch.cat(seq, 1)

            #Get rid of the zero entries for objectness
            non_zero_ind = (torch.nonzero(image_pred[:, 4]))
            image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)

            # Remove low confidence by class probs
            image_pred_ = image_pred_[image_pred_[:, -1] > confidence, :]

            #Get the various classes detected in the image
            try:
                img_classes = unique(image_pred_[:, -2].int())
                print('img_classes ', img_classes)
            except:
                continue

            #WE will do NMS classwise
            for label in img_classes:
                #get the detections with one particular class
                cls_mask_ind = (image_pred_[:, -2].int() == label)
                # class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
                image_pred_class = image_pred_[cls_mask_ind].view(-1, 7)

                #sort the detections such that the entry with the maximum objectness
                #confidence is at the top
                conf_sort_index = torch.sort(image_pred_class[:, 4],
                                             descending=True)[1]
                image_pred_class = image_pred_class[conf_sort_index]
                idx = image_pred_class.size(0)

                #if nms has to be done
                if nms:
                    #For each detection
                    for i in range(idx):
                        #Get the IOUs of all boxes that come after the one we are looking at
                        #in the loop
                        try:
                            ious = bbox_iou(image_pred_class[i].unsqueeze(0),
                                            image_pred_class[i + 1:])
                        except ValueError:
                            continue
                        except IndexError:
                            continue

                        # Zero out all the detections that have IoU > nms treshhold
                        iou_mask = (ious < nms_conf).float().unsqueeze(1)
                        image_pred_class[i + 1:] *= iou_mask

                        # Keep the non-zero entries for objectness
                        non_zero_ind = torch.nonzero(
                            image_pred_class[:, 4]).squeeze()
                        image_pred_class = image_pred_class[non_zero_ind].view(
                            -1, 7)

                batch_ind = image_pred_class.new(image_pred_class.size(0),
                                                 1).fill_(ind)
                seq = batch_ind, image_pred_class
                if not write:
                    output = torch.cat(seq, 1)
                    write = True
                else:
                    out = torch.cat(seq, 1)
                    output = torch.cat((output, out))
    return output
Esempio n. 11
0
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4):
    # 对于prediction有B*10647个边界框,如果object检测预测值小于confidence
    # 则忽略
    # 在prediction第二维加入一维,代表conf_mask,如果低于阈值,全部置0
    conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2)
    prediction = prediction * conf_mask

    try:
        ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0,
                                                              1).contiguous()
    except:
        return 0

    # 转换坐标。从中心点(x,y),height,width。转化成左上角坐标,右下角坐标
    # 便于IOU的计算
    box_a = prediction.new(prediction.shape)
    box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
    box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
    box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
    box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
    prediction[:, :, :4] = box_a[:, :, :4]

    batch_size = prediction.size(0)

    output = prediction.new(1, prediction.size(2) + 1)
    write = False

    for ind in range(batch_size):
        # image_tensor,batch_size中的每一张图片
        image_pred = prediction[ind]

        # 获取最大置信度的类,并设置概率为1
        max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes],
                                             1)
        max_conf = max_conf.float().unsqueeze(1)
        max_conf_score = max_conf_score.float().unsqueeze(1)
        seq = (image_pred[:, :5], max_conf, max_conf_score)
        # 现在的shape为[:,5+num_classes+2] 分别为max_conf和max_conf_score
        image_pred = torch.cat(seq, 1)

        # 弃置边界框低于置信度的
        non_zero_ind = (torch.nonzero(image_pred[:, 4]))

        try:
            # 将非0的anchor索引取出来,
            image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)
        except:
            continue

        # 如果检测没有目标直接跳过
        if image_pred_.shape[0] == 0:
            continue

        try:
            img_classes = unique(image_pred_[:, -1])  #最后一个索引代表目标类
        except:
            continue
        #WE will do NMS classwise
        for cls in img_classes:
            # 执行非最大值抑制
            # get the detections with one particular class
            # 是不是当前检测的目标
            cls_mask = image_pred_ * (image_pred_[:, -1]
                                      == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()
            image_pred_class = image_pred_[class_mask_ind].view(-1, 7)
            # 对检测进行排序,从大到小的概率,降序排序

            conf_sort_index = torch.sort(image_pred_class[:, 4],
                                         descending=True)[1]
            image_pred_class = image_pred_class[conf_sort_index]

            #检测的目标数量
            idx = image_pred_class.size(0)

            #是否执行非极大值抑制
            if nms:
                #For each detection
                for i in range(idx):
                    # 获得IOU
                    try:
                        #获得置信度最大的框框和其他框的iou值
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0),
                                        image_pred_class[i + 1:])
                    except ValueError:
                        break

                    except IndexError:
                        break

                    #去除所有IOU值大于阈值的框框
                    iou_mask = (ious < nms_conf).float().unsqueeze(1)
                    image_pred_class[i + 1:] *= iou_mask

                    #保留非零的那些预测
                    non_zero_ind = torch.nonzero(
                        image_pred_class[:, 4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind].view(
                        -1, 7)

            #Concatenate the batch_id of the image to the detection
            #this helps us identify which image does the detection correspond to
            #We use a linear straucture to hold ALL the detections from the batch
            #the batch_dim is flattened
            #batch is identified by extra batch column

            batch_ind = image_pred_class.new(image_pred_class.size(0),
                                             1).fill_(ind)
            seq = batch_ind, image_pred_class
            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))

    return output
Esempio n. 12
0
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4):
    '''
    对网络的输出进行处理得到最终的输出
    '''
    # 将小于置信度阈值的边界框的整行设置为0
    conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2)
    prediction = prediction * conf_mask

    try:
        ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0,
                                                              1).contiguous()
    except:
        return 0

    # 将bboxes的坐标转换为对角线坐标的形式
    box_a = prediction.new(prediction.shape)
    box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
    box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
    box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
    box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
    prediction[:, :, :4] = box_a[:, :, :4]

    batch_size = prediction.size(0)

    output = prediction.new(1, prediction.size(2) + 1)
    write = False

    # 对每张图片依次进行置信度阈值判断和NMS
    for ind in range(batch_size):
        #select the image from the batch
        image_pred = prediction[ind]

        # 得到具有最大分数的类别,以及该类别的索引
        # 将num_classes个类别分数删除
        # 加入具有最大分数的类别的索引与分数
        max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes],
                                             1)
        max_conf = max_conf.float().unsqueeze(1)  # 在索引的第二维新加一维
        max_conf_score = max_conf_score.float().unsqueeze(1)
        seq = (image_pred[:, :5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)

        # 去除置信度分数为0的边界框(即行)
        non_zero_ind = (torch.nonzero(image_pred[:, 4]))
        image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)

        #Get the various classes detected in the image
        try:
            img_classes = unique(image_pred_[:, -1])
        except:
            continue
        #WE will do NMS classwise
        # 按照类别进行NMS
        for cls in img_classes:
            # 提取特定类的检测结果
            cls_mask = image_pred_ * (image_pred_[:, -1]
                                      == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()

            image_pred_class = image_pred_[class_mask_ind].view(-1, 7)

            # 对同一类的边界框进行排序,具有最大目标置信度的排在顶部
            conf_sort_index = torch.sort(image_pred_class[:, 4],
                                         descending=True)[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)

            #if nms has to be done
            if nms:
                #For each detection
                for i in range(idx):
                    # 计算i索引的边界框与i之后的所有边界框的IoU
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0),
                                        image_pred_class[i + 1:])
                    except ValueError:
                        break

                    except IndexError:
                        break

                    # 将IoU大于阈值的所有边界框清零
                    iou_mask = (ious < nms_conf).float().unsqueeze(1)
                    image_pred_class[i + 1:] *= iou_mask

                    #Remove the non-zero entries
                    non_zero_ind = torch.nonzero(
                        image_pred_class[:, 4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind].view(
                        -1, 7)

            #Concatenate the batch_id of the image to the detection
            #this helps us identify which image does the detection correspond to
            #We use a linear straucture to hold ALL the detections from the batch
            #the batch_dim is flattened
            #batch is identified by extra batch column

            batch_ind = image_pred_class.new(image_pred_class.size(0),
                                             1).fill_(ind)
            seq = batch_ind, image_pred_class
            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))

    return output
Esempio n. 13
0
def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.4):
    # 将小于对象置信度的行置0
    conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
    prediction = prediction*conf_mask
    

    try:
        ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
    except:
        return 0
    
    # 将中心坐标宽度高度转换为左上角以及右下角坐标
    box_a = prediction.new(prediction.shape)
    box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
    box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
    box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) 
    box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
    prediction[:,:,:4] = box_a[:,:,:4]
    

    
    batch_size = prediction.size(0)
    
    output = prediction.new(1, prediction.size(2) + 1)
    write = False


    for ind in range(batch_size):
        #select the image from the batch
        image_pred = prediction[ind]
        

        
        #Get the class having maximum score, and the index of that class
        #Get rid of num_classes softmax scores 
        #Add the class index and the class score of class having maximum score
        # torch.max返回最大值和最大值索引
        max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
        max_conf = max_conf.float().unsqueeze(1)
        max_conf_score = max_conf_score.float().unsqueeze(1)
        seq = (image_pred[:,:5], max_conf, max_conf_score)
        # shape [?,7]
        image_pred = torch.cat(seq, 1)
        

        
        #Get rid of the zero entries
        # torch.nonzero返回输入的非零元素的索引
        non_zero_ind =  (torch.nonzero(image_pred[:,4]))

        
        image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
        
        #Get the various classes detected in the image
        try:
            img_classes = unique(image_pred_[:,-1])
        except:
             continue
        #WE will do NMS classwise 按类执行非极大值抑制
        for cls in img_classes:
            #get the detections with one particular class
            # 取出image_pred_中的当前类别的行,按类别执行非极大值抑制
            # image_pred_[:,-1] == cls 是[?]维tensor unsqueeze后变为[?,1]维tensor
            cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
            # class_mask_ind是[?,1]维的tensor
            class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
            

            image_pred_class = image_pred_[class_mask_ind].view(-1,7)

		
        
             #sort the detections such that the entry with the maximum objectness
             #confidence is at the top
            # conf_sort_index是[?,1]维tensor
            # 按照是否是目标的概率排序
            conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)
            
            #if nms has to be done
            if nms:
                #For each detection
                for i in range(idx):
                    #Get the IOUs of all boxes that come after the one we are looking at 
                    #in the loop
                    try:
                        # 选择i单独一行会自动去掉一个维度,要加上此维度
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
                    except ValueError:
                        break
        
                    except IndexError:
                        break
                    
                    #Zero out all the detections that have IoU > treshhold
                    iou_mask = (ious < nms_conf).float().unsqueeze(1)
                    image_pred_class[i+1:] *= iou_mask
                    
                    #Remove the non-zero entries
                    # 乘以0以后所有的这一行的元素都变为0
                    non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind].view(-1,7)
                    
                    

            #Concatenate the batch_id of the image to the detection
            #this helps us identify which image does the detection correspond to 
            #We use a linear straucture to hold ALL the detections from the batch
            #the batch_dim is flattened
            #batch is identified by extra batch column
            
            
            batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
            seq = batch_ind, image_pred_class
            if not write:
                output = torch.cat(seq,1)
                write = True
            else:
                out = torch.cat(seq,1)
                output = torch.cat((output,out))
    
    return output
Esempio n. 14
0
def build_targets(
    pred_boxes, pred_conf, pred_cls, target, anchors, num_anchors, num_classes, grid_size, ignore_thres,device):
    nB = target.size(0)
    nA = num_anchors
    nC = num_classes
    nG = grid_size
    mask = torch.zeros(nB, nA, nG, nG,device=device)
    conf_mask = torch.ones(nB, nA, nG, nG,device=device)
    tx = torch.zeros(nB, nA, nG, nG,device=device)
    ty = torch.zeros(nB, nA, nG, nG,device=device)
    tw = torch.zeros(nB, nA, nG, nG,device=device)
    th = torch.zeros(nB, nA, nG, nG,device=device)
    tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0)
    tconf = tconf.to(device)
    tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0)
    tcls = tcls.to(device)

    nGT = 0
    nCorrect = 0
    for b in range(nB):
        for t in range(target[b].shape[0]):
            if target[b, t].sum() == 0:
                continue
            nGT += 1
            # Convert to position relative to box
            gx = target[b, t, 1].item() * nG
            gy = target[b, t, 2].item() * nG
            gw = target[b, t, 3].item() * nG
            gh = target[b, t, 4].item() * nG
            # Get grid box indices
            gi = int(gx)
            gj = int(gy)
            # Get shape of gt box
            gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
            gt_box = gt_box.to(device)
            # Get shape of anchor box
            anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((len(anchors), 2)), np.array(anchors)), 1))
            anchor_shapes = anchor_shapes.to(device)
            # Calculate iou between gt and anchor shapes
            anch_ious = bbox_iou(gt_box, anchor_shapes)
            # Where the overlap is larger than threshold set mask to zero (ignore)
            conf_mask[b, anch_ious > ignore_thres, gj, gi] = 0
            # Find the best matching anchor box
            best_n = torch.argmax(anch_ious)
            # Get ground truth box
            gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0)
            # Get the best prediction
            gt_box = gt_box.to(device)
            pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0)
            pred_box = pred_box.to(device)
            # Masks
            mask[b, best_n, gj, gi] = 1
            conf_mask[b, best_n, gj, gi] = 1
            # Coordinates
            tx[b, best_n, gj, gi] = gx - gi
            ty[b, best_n, gj, gi] = gy - gj
            # Width and height
            tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16)
            th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16)
            # One-hot encoding of label
            target_label = int(target[b, t, 0])
            tcls[b, best_n, gj, gi, target_label] = 1
            tconf[b, best_n, gj, gi] = 1

            # Calculate iou between ground truth and best matching prediction
            iou = bbox_iou(gt_box, pred_box)
            pred_label = torch.argmax(pred_cls[b, best_n, gj, gi])
            score = pred_conf[b, best_n, gj, gi]
            if iou > 0.5 and pred_label == target_label and score > 0.5:
                nCorrect += 1

    return nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls
Esempio n. 15
0
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4):
    conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2)
    prediction = prediction * conf_mask
    # try:
    #     ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
    # except:
    #     return 0
    #使用每个框的两个对角坐标能更轻松地计算两个框的 IoU。
    box_a = prediction.new(prediction.shape)
    box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
    box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
    box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
    box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
    prediction[:, :, :4] = box_a[:, :, :4]

    #一次只能完成一张图像的置信度阈值设置和 NMS
    batch_size = prediction.size(0)
    # output = prediction.new(1, prediction.size(2) + 1)
    write = False
    for ind in range(batch_size):
        #select the image from the batch
        image_pred = prediction[ind]
        #Get the class having maximum score, and the index of that class
        #Get rid of num_classes softmax scores
        #Add the class index and the class score of class having maximum score
        max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes],
                                             1)
        max_conf = max_conf.float().unsqueeze(1)
        max_conf_score = max_conf_score.float().unsqueeze(1)
        seq = (image_pred[:, :5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)

        #Get rid of the zero entries 目的是处理无检测结果的情况。在这种情况下,我们使用 continue 来跳过对本图像的循环。
        non_zero_ind = (torch.nonzero(image_pred[:, 4]))
        if non_zero_ind.shape[0] > 0:
            image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)
        else:
            continue
        if image_pred_.shape[0] == 0:
            continue

        # # only person
        # # print(image_pred_)
        # person_mask = image_pred_ * (image_pred_[:, -1] == 0).float().unsqueeze(1)
        # person_mask_ind = torch.nonzero(person_mask[:, -2])
        # # print(image_pred_)
        #
        # # print(person_mask_ind.shape)
        # # print(person_mask_ind)
        #
        #
        # if person_mask_ind.shape[0] > 0:
        #     image_pred_ = image_pred_[person_mask_ind.squeeze(),:].view(-1,7)
        #     # print(image_pred_)
        #     # print(123)
        # else:
        #     continue
        # if image_pred_.shape[0] == 0:
        #     continue
        # # end of only person

        #Get the various classes detected in the image
        try:  #因为同一类别可能会有多个「真实」检测结果,所以我们使用一个名叫 unique 的函数来获取任意给定图像中存在的类别。
            img_classes = unique(image_pred_[:, -1])
        except:
            continue

        #WE will do NMS classwise  提取特定类别(用变量 cls 表示)的检测结果。
        for cls in img_classes:
            #get the detections with one particular class
            cls_mask = image_pred_ * (image_pred_[:, -1]
                                      == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()
            image_pred_class = image_pred_[class_mask_ind].view(-1, 7)
            #sort the detections such that the entry with the maximum objectness
            #confidence is at the top
            conf_sort_index = torch.sort(image_pred_class[:, 4],
                                         descending=True)[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)
            #if nms has to be done
            if nms:
                #For each detection
                for i in range(idx):
                    #Get the IOUs of all boxes that come after the one we are looking at
                    #in the loop
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0),
                                        image_pred_class[i + 1:])
                    except ValueError:
                        break
                    except IndexError:
                        break
                    #Zero out all the detections that have IoU > treshhold
                    iou_mask = (ious < nms_conf).float().unsqueeze(1)
                    image_pred_class[i + 1:] *= iou_mask
                    #Remove the non-zero entries
                    non_zero_ind = torch.nonzero(
                        image_pred_class[:, 4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind].view(
                        -1, 7)
            #Concatenate the batch_id of the image to the detection
            #this helps us identify which image does the detection correspond to
            #We use a linear straucture to hold ALL the detections from the batch
            #the batch_dim is flattened
            #batch is identified by extra batch column
            batch_ind = image_pred_class.new(image_pred_class.size(0),
                                             1).fill_(ind)
            seq = batch_ind, image_pred_class
            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))
    try:
        return output
    except:
        return 0
def write_results(prediction,
                  confidence,
                  num_classes,
                  nms=True,
                  nms_conf=0.4,
                  det_hm=False):
    """
        https://blog.paperspace.com/how-to-implement-a-yolo-v3-object-detector-from-scratch-in-pytorch-part-4/
        prediction: (B x 10647 x 85)
        B: the number of images in a batch,
        10647: the number of bounding boxes predicted per image. (52×52+26×26+13×13)×3=10647
        85: the number of bounding box attributes. (c_x, c_y, w, h, object confidence, and 80 class scores)

        output: Num_obj × [img_index, x_1, y_1, x_2, y_2, object confidence, class_score, label_index]
    """

    conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2)
    prediction = prediction * conf_mask

    box_a = prediction.new(prediction.shape)
    box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
    box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
    box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
    box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
    prediction[:, :, :4] = box_a[:, :, :4]

    batch_size = prediction.size(0)

    output = prediction.new(1, prediction.size(2) + 1)
    write = False

    for ind in range(batch_size):
        # select the image from the batch
        image_pred = prediction[ind]

        # Get the class having maximum score, and the index of that class
        # Get rid of num_classes softmax scores
        # Add the class index and the class score of class having maximum score
        max_conf, max_conf_index = torch.max(image_pred[:, 5:5 + num_classes],
                                             1)
        max_conf = max_conf.float().unsqueeze(1)
        max_conf_index = max_conf_index.float().unsqueeze(1)
        seq = (image_pred[:, :5], max_conf, max_conf_index)
        image_pred = torch.cat(
            seq, 1
        )  # image_pred:(10647, 7) 7:[x1, y1, x2, y2, obj_score, max_conf, max_conf_index]

        # Get rid of the zero entries
        non_zero_ind = (torch.nonzero(image_pred[:, 4]))
        image_pred__ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)

        # filters out people id
        if det_hm:
            cls_mask = (image_pred__[:, -1] == 0).float()
            class_mask_ind = torch.nonzero(cls_mask).squeeze()
            image_pred_ = image_pred__[class_mask_ind].view(-1, 7)

            if torch.sum(cls_mask) == 0:
                return image_pred_
        else:
            image_pred_ = image_pred__

        # Get the various classes detected in the image
        try:
            # img_classes = unique(image_pred_[:, -1])
            img_classes = torch.unique(image_pred_[:, -1], sorted=True).float()
        except:
            continue

        # We will do NMS classwise
        #  import ipdb;ipdb.set_trace()
        for cls in img_classes:
            # get the detections with one particular class
            cls_mask = image_pred_ * (image_pred_[:, -1]
                                      == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()
            image_pred_class = image_pred_[class_mask_ind].view(-1, 7)

            # sort the detections such that the entry with the maximum objectness
            # confidence is at the top
            conf_sort_index = torch.sort(image_pred_class[:, 4],
                                         descending=True)[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)

            #  from soft_NMS import soft_nms
            #  boxes = image_pred_class[:,:4]
            #  scores = image_pred_class[:, 4]
            #  k, N = soft_nms(boxes, scores, method=2)
            #  image_pred_class = image_pred_class[k]

            # if nms has to be done
            if nms:
                # For each detection
                for i in range(idx):
                    # Get the IOUs of all boxes that come after the one we are looking at
                    # in the loop
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0),
                                        image_pred_class[i + 1:])
                    except ValueError:
                        break

                    except IndexError:
                        break

                    # Zero out all the detections that have IoU > threshold
                    iou_mask = (ious < nms_conf).float().unsqueeze(1)
                    image_pred_class[i + 1:] *= iou_mask

                    #  Remove the zero entries
                    non_zero_ind = torch.nonzero(
                        image_pred_class[:, 4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind].view(
                        -1, 7)

            # Concatenate the batch_id of the image to the detection
            # this helps us identify which image does the detection correspond to
            # We use a linear structure to hold ALL the detections from the batch
            # the batch_dim is flattened
            # batch is identified by extra batch column

            batch_ind = image_pred_class.new(image_pred_class.size(0),
                                             1).fill_(ind)
            seq = batch_ind, image_pred_class
            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))

    return output
Esempio n. 17
0
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4):
    conf_mask = (prediction[:, :, 4] > confidence).float().float().unsqueeze(2)
    prediction = prediction * conf_mask

    try:
        ind_nz = torch.nonzero(prediction[:, :, 4],
                               as_tuple=False).transpose(0, 1).contiguous()
    except:
        return 0

    box_a = prediction.new(prediction.shape)
    box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
    box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
    box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
    box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
    prediction[:, :, :4] = box_a[:, :, :4]

    batch_size = prediction.size(0)

    output = prediction.new(1, prediction.size(2) + 1)
    write = False
    num = 0
    for ind in range(batch_size):
        #select the image from the batch
        image_pred = prediction[ind]

        #Get the class having maximum score, and the index of that class
        #Get rid of num_classes softmax scores
        #Add the class index and the class score of class having maximum score
        max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes],
                                             1)
        max_conf = max_conf.float().unsqueeze(1)
        max_conf_score = max_conf_score.float().unsqueeze(1)
        seq = (image_pred[:, :5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)

        #Get rid of the zero entries
        non_zero_ind = (torch.nonzero(image_pred[:, 4], as_tuple=False))

        image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)

        #Get the various classes detected in the image
        try:
            img_classes = unique(image_pred_[:, -1])
        except:
            continue

        #WE will do NMS classwise
        #print(img_classes)
        for cls in img_classes:
            if cls != 0:
                continue
            #get the detections with one particular class
            cls_mask = image_pred_ * (image_pred_[:, -1]
                                      == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2],
                                           as_tuple=False).squeeze()

            image_pred_class = image_pred_[class_mask_ind].view(-1, 7)

            #sort the detections such that the entry with the maximum objectness
            #confidence is at the top
            conf_sort_index = torch.sort(image_pred_class[:, 4],
                                         descending=True)[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)

            #if nms has to be done
            if nms:
                # Perform non-maximum suppression
                max_detections = []
                while image_pred_class.size(0):
                    # Get detection with highest confidence and save as max detection
                    max_detections.append(image_pred_class[0].unsqueeze(0))
                    # Stop if we're at the last detection
                    if len(image_pred_class) == 1:
                        break
                    # Get the IOUs for all boxes with lower confidence
                    ious = bbox_iou(max_detections[-1], image_pred_class[1:])
                    # Remove detections with IoU >= NMS threshold
                    image_pred_class = image_pred_class[1:][ious < nms_conf]

                image_pred_class = torch.cat(max_detections).data

            #Concatenate the batch_id of the image to the detection
            #this helps us identify which image does the detection correspond to
            #We use a linear straucture to hold ALL the detections from the batch
            #the batch_dim is flattened
            #batch is identified by extra batch column

            batch_ind = image_pred_class.new(image_pred_class.size(0),
                                             1).fill_(ind)
            seq = batch_ind, image_pred_class
            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))
            num += 1

    if not num:
        return 0

    return output
Esempio n. 18
0
File: util.py Progetto: FenHua/yolo
def write_results(predictions,
                  confidence,
                  num_class,
                  nms=True,
                  nms_thresh=0.4):
    # 保留预测结果中置信度大于给定阈值的部分
    # confidence: shape=(1,10647, 85) 10647=3*(13*13+26*26+52*52)
    # mask: shape=(1,10647) => 增加一维度之后 (1, 10647, 1)
    mask = (predictions[:, :, 4] > confidence).float().unsqueeze(
        2)  # 保留预测结果中置信度大于阈值的bbox
    predictions = predictions * mask  # 小于置信度的条目值全为0, 剩下部分不变
    ind_nz = torch.nonzero(predictions[:, :, 4].squeeze()).squeeze()
    if ind_nz.size(0) == 0:
        return 0  # 如果没有检测任何有效目标,返回值为0
    bbox = predictions.new(
        predictions.shape)  # prediction的前五个数据分别表示 (Cx, Cy, w, h, score)
    # 将中心点和宽长度量方法改为左下右上坐标表示方法
    bbox[:, :, 0] = (predictions[:, :, 0] - predictions[:, :, 2] / 2
                     )  # x1 = Cx - w/2
    bbox[:, :, 1] = (predictions[:, :, 1] - predictions[:, :, 3] / 2
                     )  # y1 = Cy - h/2
    bbox[:, :, 2] = (predictions[:, :, 0] + predictions[:, :, 2] / 2
                     )  # x2 = Cx + w/2
    bbox[:, :, 3] = (predictions[:, :, 1] + predictions[:, :, 3] / 2
                     )  # y2 = Cy + h/2
    predictions[:, :, :4] = bbox[:, :, :4]  # 计算后的新坐标复制回去
    batch_size = predictions.size(0)  # dim=0
    write = False  # 拼接结果到output中最后返回
    for ind in range(batch_size):
        prediction = predictions[ind]  # 选择此batch中第ind个图像的预测结果
        ind_nz = torch.nonzero(prediction[:, 4].squeeze()).squeeze()
        if ind_nz.size(0) == 0:
            continue
        prediction = prediction[ind_nz, :]  # shape=(10647->14, 85)
        max_score, max_score_ind = torch.max(prediction[:, 5:],
                                             1)  # 获取每一个候选框最可能的类型score和相应的index
        max_score = max_score.float().unsqueeze(
            1)  # 维度扩展,shape=(10647->14) => (10647->14,1)
        max_score_ind = max_score_ind.float().unsqueeze(1)
        seq = (prediction[:, :5], max_score, max_score_ind)  # 获取有价值的信息,共6个
        prediction = torch.cat(seq, 1)
        try:
            image_classes = unique(prediction[:, -1])  # 获取当前图像检测结果中出现的所有类别
        except:
            continue
        # 执行基于类别的NMS
        for cls in image_classes:
            class_mask = (prediction[:, -1] == cls)  # 分离检测结果中属于当前类的数据
            class_mask_ind = torch.nonzero(
                class_mask).squeeze()  # shape=(n,1) => (n)
            prediction_class = prediction[class_mask_ind].view(
                -1, 7)  # 从prediction中取出属于cls类别的所有结果,为下一步的nms的输入
            ''' 到此步 prediction_class 已经存在了我们需要进行非极大值抑制的数据 '''
            score_sort_ind = torch.sort(prediction_class[:, 4],
                                        descending=True)[1]  # 返回排序索引
            prediction_class = prediction_class[score_sort_ind]
            cnt = prediction_class.size(0)  # 个数
            '''开始执行 "非极大值抑制" 操作'''
            if nms:
                for i in range(cnt):
                    try:
                        ious = bbox_iou(prediction_class[i].unsqueeze(0),
                                        prediction_class[i + 1:])
                    except ValueError:
                        break
                    except IndexError:
                        break
                    iou_mask = (ious < nms_thresh).float().unsqueeze(
                        1)  # 计算出需要移除的item
                    prediction_class[i + 1:] *= iou_mask  # 保留i自身
                    non_zero_ind = torch.nonzero(prediction_class[:,
                                                                  4].squeeze())
                    prediction_class = prediction_class[non_zero_ind].view(
                        -1, 7)  # 移除
            # 当前类的nms执行完之后,保存结果
            batch_ind = prediction_class.new(prediction_class.size(0),
                                             1).fill_(ind)
            seq = batch_ind, prediction_class
            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))
    return output
Esempio n. 19
0
def write_results_half(prediction,
                       confidence,
                       num_classes,
                       nms=True,
                       nms_conf=0.4):
    conf_mask = (prediction[:, :, 4] > confidence).half().unsqueeze(2)
    prediction = prediction * conf_mask

    try:
        ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0,
                                                              1).contiguous()
    except:
        return 0

    box_a = prediction.new(prediction.shape)
    box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
    box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
    box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
    box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
    prediction[:, :, :4] = box_a[:, :, :4]

    batch_size = prediction.size(0)

    output = prediction.new(1, prediction.size(2) + 1)
    write = False

    for ind in range(batch_size):
        #select the image from the batch
        image_pred = prediction[ind]

        #Get the class having maximum score, and the index of that class
        #Get rid of num_classes softmax scores
        #Add the class index and the class score of class having maximum score
        max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes],
                                             1)
        max_conf = max_conf.half().unsqueeze(1)
        max_conf_score = max_conf_score.half().unsqueeze(1)
        seq = (image_pred[:, :5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)

        #Get rid of the zero entries
        non_zero_ind = (torch.nonzero(image_pred[:, 4]))
        try:
            image_pred_ = image_pred[non_zero_ind.squeeze(), :]
        except:
            continue

        #Get the various classes detected in the image
        img_classes = unique(image_pred_[:, -1].long()).half()

        #WE will do NMS classwise
        for cls in img_classes:
            #get the detections with one particular class
            cls_mask = image_pred_ * (image_pred_[:, -1]
                                      == cls).half().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()

            image_pred_class = image_pred_[class_mask_ind]

            #sort the detections such that the entry with the maximum objectness
            #confidence is at the top
            conf_sort_index = torch.sort(image_pred_class[:, 4],
                                         descending=True)[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)

            #if nms has to be done
            if nms:
                #For each detection
                for i in range(idx):
                    #Get the IOUs of all boxes that come after the one we are looking at
                    #in the loop
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0),
                                        image_pred_class[i + 1:])
                    except ValueError:
                        break

                    except IndexError:
                        break

                    #Zero out all the detections that have IoU > treshhold
                    iou_mask = (ious < nms_conf).half().unsqueeze(1)
                    image_pred_class[i + 1:] *= iou_mask

                    #Remove the non-zero entries
                    non_zero_ind = torch.nonzero(
                        image_pred_class[:, 4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind]

            #Concatenate the batch_id of the image to the detection
            #this helps us identify which image does the detection correspond to
            #We use a linear straucture to hold ALL the detections from the batch
            #the batch_dim is flattened
            #batch is identified by extra batch column
            batch_ind = image_pred_class.new(image_pred_class.size(0),
                                             1).fill_(ind)
            seq = batch_ind, image_pred_class

            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))

    return output
Esempio n. 20
0
def write_results_half(prediction,
                       confidence,
                       num_classes,
                       nms=True,
                       nms_conf=0.4):
    conf_mask = (prediction[:, :, 4] > confidence).half().unsqueeze(2)
    prediction = prediction * conf_mask

    try:
        ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0,
                                                              1).contiguous()
    except:
        return 0

    box_a = prediction.new(prediction.shape)
    box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
    box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
    box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
    box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
    prediction[:, :, :4] = box_a[:, :, :4]

    batch_size = prediction.size(0)

    output = prediction.new(1, prediction.size(2) + 1)
    write = False

    for ind in range(batch_size):

        image_pred = prediction[ind]

        max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes],
                                             1)
        max_conf = max_conf.half().unsqueeze(1)
        max_conf_score = max_conf_score.half().unsqueeze(1)
        seq = (image_pred[:, :5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)

        non_zero_ind = (torch.nonzero(image_pred[:, 4]))
        try:
            image_pred_ = image_pred[non_zero_ind.squeeze(), :]
        except:
            continue

        img_classes = unique(image_pred_[:, -1].long()).half()

        for cls in img_classes:

            cls_mask = image_pred_ * (image_pred_[:, -1]
                                      == cls).half().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()

            image_pred_class = image_pred_[class_mask_ind]

            conf_sort_index = torch.sort(image_pred_class[:, 4],
                                         descending=True)[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)

            if nms:

                for i in range(idx):

                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0),
                                        image_pred_class[i + 1:])
                    except ValueError:
                        break

                    except IndexError:
                        break

                    iou_mask = (ious < nms_conf).half().unsqueeze(1)
                    image_pred_class[i + 1:] *= iou_mask

                    non_zero_ind = torch.nonzero(
                        image_pred_class[:, 4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind]

            batch_ind = image_pred_class.new(image_pred_class.size(0),
                                             1).fill_(ind)
            seq = batch_ind, image_pred_class

            if not write:
                output = torch.cat(seq, 1)
                write = True
            else:
                out = torch.cat(seq, 1)
                output = torch.cat((output, out))

    return output