コード例 #1
0
def generate_candidate(predictions):
    batch_Size = predictions['loc'].size(0)
    candidate = []
    prior_data = predictions['priors'].squeeze(0)
    for i in range(batch_Size):
        loc_data = predictions['loc'][i]
        conf_data = predictions['conf'][i]

        candidate_cur = {
            'T2S_feat': predictions['T2S_feat'][i].unsqueeze(0),
            'fpn_feat': predictions['fpn_feat'][i].unsqueeze(0)
        }

        with timer.env('Detect'):
            decoded_boxes = decode(loc_data, prior_data)

            conf_data = conf_data.t().contiguous()
            conf_scores, _ = torch.max(conf_data[1:, :], dim=0)

            keep = (conf_scores > cfg.eval_conf_thresh)
            candidate_cur['proto'] = predictions['proto'][i]
            candidate_cur['conf'] = conf_data[:, keep].t()
            candidate_cur['box'] = decoded_boxes[keep, :]
            candidate_cur['mask_coeff'] = predictions['mask_coeff'][i][keep, :]
            candidate_cur['track'] = predictions['track'][i][
                keep, :] if cfg.train_track else None
            if cfg.train_centerness:
                candidate_cur['centerness'] = predictions['centerness'][i][
                    keep].view(-1)

        candidate.append(candidate_cur)

    return candidate
コード例 #2
0
ファイル: collate.py プロジェクト: arunpatala/ssd.pytorch
def get_ann2(dets, cut_off=0.5, th=0.33, fpups=False):
    loc, conf, priors = dets
    conf = softmax(Variable(conf)).data  #.cpu()

    decoded_boxes = decode(loc, priors, [0.1, 0.2])
    conf_scores = conf.t().contiguous()
    conf_p, conf_cl = conf_scores[1:, :].max(0)

    cl = 0
    c_mask = conf_scores[cl] < 0.5
    scores = conf_scores[cl][c_mask]
    l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
    boxes = decoded_boxes[l_mask].view(-1, 4)

    if (boxes.nelement() == 0): return None
    boxes_cl = (conf_cl.squeeze()[c_mask]).unsqueeze(1).float()
    boxes_p = (conf_p.squeeze()[c_mask]).unsqueeze(1).float()
    dets = torch.cat([boxes_cl, boxes_p * 100, boxes * args.size], 1)
    #ids, count = nms(boxes, 1-scores, th, 200)
    #ids = ids.cpu()

    #ann_dets = (boxes[ids[:count]]*300).cpu().round().numpy()
    dets = dets.round().cpu().numpy().astype('int32')

    ann = Ann(dets=dets)

    return ann
コード例 #3
0
def CandidateShift(net,
                   ref_candidate,
                   next_candidate,
                   img=None,
                   img_meta=None,
                   display=False):
    """
    The function try to shift the candidates of reference frame to that of target frame.
    The most important step is to shift the bounding box of reference frame to that of target frame
    :param net: network
    :param next_candidate: features of the last layer to predict bounding box on target frame
    :param ref_candidate: the candidate dictionary that includes 'box', 'conf', 'mask_coeff', 'track' items.
    :return: candidates on the target frame
     """

    ref_candidate_shift = {}
    for k, v in next_candidate.items():
        if k in {'proto', 'fpn_feat', 'T2S_feat'}:
            ref_candidate_shift[k] = v.clone()

    # we only use the features in the P3 layer to perform correlation operation
    T2S_feat_ref, T2S_feat_next = ref_candidate['T2S_feat'], next_candidate[
        'T2S_feat']
    fpn_feat_ref, fpn_feat_next = ref_candidate['fpn_feat'], next_candidate[
        'fpn_feat']
    x_corr = correlate(fpn_feat_ref,
                       fpn_feat_next,
                       patch_size=cfg.correlation_patch_size)
    concatenated_features = F.relu(
        torch.cat([x_corr, T2S_feat_ref, T2S_feat_next], dim=1))

    box_ref = ref_candidate['box'].clone()
    feat_h, feat_w = fpn_feat_ref.size()[2:]
    bbox_feat_input = bbox_feat_extractor(concatenated_features, box_ref,
                                          feat_h, feat_w, 7)
    loc_ref_shift, mask_coeff_shift = net.TemporalNet(bbox_feat_input)
    box_ref_shift = decode(loc_ref_shift, center_size(box_ref))
    mask_coeff_ref_shift = ref_candidate['mask_coeff'].clone(
    ) + mask_coeff_shift
    masks_ref_shift = generate_mask(next_candidate['proto'],
                                    mask_coeff_ref_shift, box_ref_shift)

    # display = 1
    if display:
        # display_correlation_map_patch(bbox_feat_input[:, :121], img_meta)
        display_box_shift(box_ref,
                          box_ref_shift,
                          mask_shift=masks_ref_shift,
                          img_meta=img_meta,
                          img_gpu=img)

    ref_candidate_shift['box'] = box_ref_shift.clone()
    ref_candidate_shift['score'] = ref_candidate['score'].clone() * 0.95
    ref_candidate_shift['mask_coeff'] = mask_coeff_ref_shift.clone()
    ref_candidate_shift['mask'] = masks_ref_shift.clone()

    return ref_candidate_shift
コード例 #4
0
def detect_in_thread(class_data_proxy, num_classes, trained_model_path,
                     use_cuda, cfg):

    #li_margin_ratio_l_r_t_b = compute_margin_ratio_l_r_t_b(w_h_cam, w_h_net)
    net = init_ssd(num_classes, trained_model_path, use_cuda)

    fps_det = FPS().start()
    print('class_data.end_of_capture of detect in thread : ',
          class_data_proxy.get_eoc())  #; exit()

    is_huda = False
    while not class_data_proxy.get_eoc():
        batch_rgb = class_data_proxy.get_batch_rgb()
        #print('batch_rgb.shape : ', batch_rgb.shape)
        if batch_rgb is None:
            print('batch_rgb is None !!!')
            #exit()
            if is_huda:
                class_data_proxy.set_eoc()
                print('class_data.end_of_capture of detect in thread is True')
                #exit()
            continue
        is_huda = True
        start = time.time()

        #   net forwarding
        loc_data, conf_preds, prior_data = net(batch_rgb)

        decoded_boxes = decode(loc_data[0].data, prior_data.data,
                               cfg['variance']).clone()
        conf_scores = net.softmax(conf_preds[0]).data.clone()

        class_data_proxy.set_net_result((decoded_boxes, conf_scores))
        #   post process output
        #li_det = post_process_output(im_bgr, net, CLASSES, loc_data, conf_preds, prior_data, w_h_cam, li_margin_ratio_l_r_t_b, li_color_class, th_conf, th_nms, None)

        #class_data_proxy.set_li_det(li_det)

        fps_det.update()
        class_data_proxy.set_fps_det(fps_det.fps())
        #print('fps_det : ', fps_det.fps())
    print("class_data.end_of_capture is True : detect_in_thread")
コード例 #5
0
ファイル: collate.py プロジェクト: arunpatala/ssd.pytorch
def get_ann(dets, p=0.33, th=0.33, fpups=False):
    loc, conf, priors = dets
    decoded_boxes = decode(loc, priors, [0.1, 0.2])

    conf_scores = conf.t().contiguous()
    cl = 0
    c_mask = conf_scores[cl].lt(p)
    if fpups:
        p_mask = conf_scores[-1].lt(p)
        c_mask = c_mask & p_mask
    scores = conf_scores[cl][c_mask]
    l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
    boxes = decoded_boxes[l_mask].view(-1, 4)
    if (boxes.nelement() == 0): return None
    ids, count = nms(boxes, 1 - scores, th, 200)
    ids = ids.cpu()
    #print(boxes)
    ann_dets = (boxes[ids[:count]] * args.size).round().numpy()
    #print(ann_dets)
    ann = Ann(dets=ann_dets)
    return ann
コード例 #6
0
    def forward(self, loc_data, conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        conf_preds = conf_data.view(num, num_priors,
                                    self.num_classes).transpose(2, 1)

        # Decode predictions into bboxes.
        for i in range(num):
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()

            for cl in range(1, self.num_classes):
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                if scores.dim() == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].unsqueeze(1),
                               boxes[ids[:count]]), 1)
        flt = output.contiguous().view(num, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        return output
コード例 #7
0
def validate(args,
             net,
             val_data_loader,
             val_dataset,
             iteration_num,
             iou_thresh=0.5):
    """Test a SSD network on an image database."""
    print('Validating at ', iteration_num)
    num_images = len(val_dataset)
    num_classes = args.num_classes

    det_boxes = [[] for _ in range(len(CLASSES))]
    gt_boxes = []
    print_time = True
    batch_iterator = None
    val_step = 100
    count = 0
    torch.cuda.synchronize()
    ts = time.perf_counter()

    for val_itr in range(len(val_data_loader)):
        if not batch_iterator:
            batch_iterator = iter(val_data_loader)

        torch.cuda.synchronize()
        t1 = time.perf_counter()

        images, targets, img_indexs = next(batch_iterator)
        batch_size = images.size(0)
        height, width = images.size(2), images.size(3)

        if args.cuda:
            images = Variable(images.cuda(), volatile=True)
        output = net(images)

        loc_data = output[0]
        conf_preds = output[1]
        prior_data = output[2]

        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            tf = time.perf_counter()
            print('Forward Time {:0.3f}'.format(tf - t1))
        for b in range(batch_size):
            gt = targets[b].numpy()
            gt[:, 0] *= width
            gt[:, 2] *= width
            gt[:, 1] *= height
            gt[:, 3] *= height
            gt_boxes.append(gt)
            decoded_boxes = decode(loc_data[b].data, prior_data.data,
                                   args.cfg['variance']).clone()
            conf_scores = net.softmax(conf_preds[b]).data.clone()

            for cl_ind in range(1, num_classes):
                scores = conf_scores[:, cl_ind].squeeze()
                c_mask = scores.gt(
                    args.conf_thresh)  # greater than minmum threshold
                scores = scores[c_mask].squeeze()
                # print('scores size',scores.size())
                if scores.dim() == 0:
                    # print(len(''), ' dim ==0 ')
                    det_boxes[cl_ind - 1].append(np.asarray([]))
                    continue
                boxes = decoded_boxes.clone()
                l_mask = c_mask.unsqueeze(1).expand_as(boxes)
                boxes = boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, counts = nms(boxes, scores, args.nms_thresh,
                                  args.topk)  # idsn - ids after nms
                scores = scores[ids[:counts]].cpu().numpy()
                boxes = boxes[ids[:counts]].cpu().numpy()
                # print('boxes sahpe',boxes.shape)
                boxes[:, 0] *= width
                boxes[:, 2] *= width
                boxes[:, 1] *= height
                boxes[:, 3] *= height

                for ik in range(boxes.shape[0]):
                    boxes[ik, 0] = max(0, boxes[ik, 0])
                    boxes[ik, 2] = min(width, boxes[ik, 2])
                    boxes[ik, 1] = max(0, boxes[ik, 1])
                    boxes[ik, 3] = min(height, boxes[ik, 3])

                cls_dets = np.hstack(
                    (boxes, scores[:, np.newaxis])).astype(np.float32,
                                                           copy=True)

                det_boxes[cl_ind - 1].append(cls_dets)
            count += 1
        if val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(
                count, num_images, te - ts))
            torch.cuda.synchronize()
            ts = time.perf_counter()
        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('NMS stuff Time {:0.3f}'.format(te - tf))
    print('Evaluating detections for itration number ', iteration_num)
    return evaluate_detections(gt_boxes,
                               det_boxes,
                               CLASSES,
                               iou_thresh=iou_thresh)
コード例 #8
0
    def __call__(self, predictions):
        """
        Args:
             loc_data: (tensor) Loc preds from loc layers
                Shape: [batch, num_priors, 4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch, num_priors, num_classes]
            mask_data: (tensor) Mask preds from mask layers
                Shape: [batch, num_priors, mask_dim]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [num_priors, 4]
            proto_data: (tensor) If using mask_type.lincomb, the prototype masks
                Shape: [batch, mask_h, mask_w, mask_dim]
        
        Returns:
            output of shape (batch_size, top_k, 1 + 1 + 4 + mask_dim)
            These outputs are in the order: class idx, confidence, bbox coords, and mask.

            Note that the outputs are sorted only if cross_class_nms is False
        """

        loc_data = predictions['loc']
        conf_data = predictions['conf']
        mask_data = predictions['mask']
        prior_data = predictions['priors']
        proto_data = predictions['proto'] if 'proto' in predictions else None

        inst_data = predictions['inst'] if 'inst' in predictions else None

        out = []

        with timer.env('Detect'):
            batch_size = loc_data.size(0)
            num_priors = prior_data.size(0)

            #view-->resize
            conf_preds = conf_data.view(batch_size, num_priors,
                                        self.num_classes).transpose(
                                            2, 1).contiguous()

            for batch_idx in range(batch_size):
                decoded_boxes = decode(loc_data[batch_idx], prior_data)
                result = self.detect(batch_idx, conf_preds, decoded_boxes,
                                     mask_data, inst_data)

                if result is not None and proto_data is not None:
                    result['proto'] = proto_data[batch_idx]

                #ipdb> result.keys()
                #dict_keys(['box', 'mask', 'class', 'score', 'proto'])
                #
                #ipdb> result['box'].shape
                #torch.Size([100, 4])
                #
                #ipdb> result['mask'].shape
                #torch.Size([100, 32])
                #
                #ipdb> result['class'].shape
                #torch.Size([100])
                #
                #ipdb> result['score'].shape
                #torch.Size([100])
                #
                #ipdb> result['proto'].shape
                #torch.Size([138, 138, 32])
                out.append(result)

        return out
コード例 #9
0
def test_net(net,
             save_root,
             exp_name,
             input_type,
             dataset,
             iteration,
             num_classes,
             thresh=0.5):
    """ Test a SSD network on an Action image database. """

    val_data_loader = data.DataLoader(dataset,
                                      args.batch_size,
                                      num_workers=args.num_workers,
                                      shuffle=False,
                                      collate_fn=detection_collate,
                                      pin_memory=True)
    image_ids = dataset.ids
    save_ids = []
    val_step = 250
    num_images = len(dataset)
    video_list = dataset.video_list
    det_boxes = [[] for _ in range(len(CLASSES))]
    gt_boxes = []
    print_time = True
    batch_iterator = None
    count = 0
    torch.cuda.synchronize()
    ts = time.perf_counter()
    num_batches = len(val_data_loader)
    det_file = save_root + 'cache/' + exp_name + '/detection-' + str(
        iteration).zfill(6) + '.pkl'
    print('Number of images ', len(dataset), ' number of batchs', num_batches)
    frame_save_dir = save_root + 'detections/CONV-' + input_type + '-' + args.listid + '-' + str(
        iteration).zfill(6) + '/'
    print('\n\n\nDetections will be store in ', frame_save_dir, '\n\n')
    for val_itr in range(len(val_data_loader)):
        if not batch_iterator:
            batch_iterator = iter(val_data_loader)

        torch.cuda.synchronize()
        t1 = time.perf_counter()

        images, targets, img_indexs = next(batch_iterator)
        batch_size = images.size(0)
        height, width = images.size(2), images.size(3)

        if args.cuda:
            images = Variable(images.cuda(), volatile=True)
        output = net(images)

        loc_data = output[0]
        conf_preds = output[1]
        prior_data = output[2]

        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            tf = time.perf_counter()
            print('Forward Time {:0.3f}'.format(tf - t1))
        for b in range(batch_size):
            gt = targets[b].numpy()
            gt[:, 0] *= width
            gt[:, 2] *= width
            gt[:, 1] *= height
            gt[:, 3] *= height
            gt_boxes.append(gt)
            decoded_boxes = decode(loc_data[b].data, prior_data.data,
                                   cfg['variance']).clone()
            conf_scores = net.softmax(conf_preds[b]).data.clone()
            index = img_indexs[b]
            annot_info = image_ids[index]

            frame_num = annot_info[1]
            video_id = annot_info[0]
            videoname = video_list[video_id]
            # output_dir = frame_save_dir+videoname
            # if not os.path.isdir(output_dir):
            #     os.makedirs(output_dir)
            #
            # output_file_name = output_dir+'/{:05d}.mat'.format(int(frame_num))
            # save_ids.append(output_file_name)
            # sio.savemat(output_file_name, mdict={'scores':conf_scores.cpu().numpy(),'loc':decoded_boxes.cpu().numpy()})

            for cl_ind in range(1, num_classes):
                scores = conf_scores[:, cl_ind].squeeze()
                c_mask = scores.gt(
                    args.conf_thresh)  # greater than minmum threshold
                scores = scores[c_mask].squeeze()
                # print('scores size',scores.size())
                if scores.dim() == 0:
                    # print(len(''), ' dim ==0 ')
                    det_boxes[cl_ind - 1].append(np.asarray([]))
                    continue
                boxes = decoded_boxes.clone()
                l_mask = c_mask.unsqueeze(1).expand_as(boxes)
                boxes = boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, counts = nms(boxes, scores, args.nms_thresh,
                                  args.topk)  # idsn - ids after nms
                scores = scores[ids[:counts]].cpu().numpy()
                boxes = boxes[ids[:counts]].cpu().numpy()
                # print('boxes sahpe',boxes.shape)
                boxes[:, 0] *= width
                boxes[:, 2] *= width
                boxes[:, 1] *= height
                boxes[:, 3] *= height

                for ik in range(boxes.shape[0]):
                    boxes[ik, 0] = max(0, boxes[ik, 0])
                    boxes[ik, 2] = min(width, boxes[ik, 2])
                    boxes[ik, 1] = max(0, boxes[ik, 1])
                    boxes[ik, 3] = min(height, boxes[ik, 3])

                cls_dets = np.hstack(
                    (boxes, scores[:, np.newaxis])).astype(np.float32,
                                                           copy=True)
                det_boxes[cl_ind - 1].append(cls_dets)

            count += 1
        if val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(
                count, num_images, te - ts))
            torch.cuda.synchronize()
            ts = time.perf_counter()
        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('NMS stuff Time {:0.3f}'.format(te - tf))
    print('Evaluating detections for itration number ', iteration)

    # #Save detection after NMS along with GT
    # with open(det_file, 'wb') as f:
    #     pickle.dump([gt_boxes, det_boxes, save_ids], f, pickle.HIGHEST_PROTOCOL)

    return evaluate_detections(gt_boxes, det_boxes, CLASSES, iou_thresh=thresh)
コード例 #10
0
def validate(args,
             net,
             val_data_loader,
             val_dataset,
             epoch,
             iou_thresh=0.5,
             num_gpu=1):
    """Test a SSD network on an image database."""
    print('Validating at ', epoch)
    num_images = len(val_dataset)
    num_classes = args.num_classes

    det_boxes = [[] for _ in range(len(CLASSES))]
    gt_boxes = []
    print_time = True
    val_step = 100
    count = 0
    net.eval()  # switch net to evaluation modelen(val_data_loader)-2,
    torch.cuda.synchronize()
    ts = time.perf_counter()

    # create batch iterator

    batch_iterator = [[] for i in range(num_gpu)]
    max_x_y = 0
    min_x_y = []
    for i in range(num_gpu):
        batch_iterator[i] = iter(val_data_loader[i])
        min_x_y.append(len(val_data_loader[i]))
        max_x_y = max(max_x_y, len(val_data_loader[i]))
        # print("len: ", len(train_data_loader[i]))

    iter_count = 0
    t0 = time.perf_counter()
    dtype = torch.cuda.FloatTensor
    for val_itr in range(max_x_y):
        img_indexs = []
        for ii in range(num_gpu):
            if val_itr >= min_x_y[ii]:
                batch_iterator[ii] = iter(val_data_loader[ii])

        torch.cuda.synchronize()
        t1 = time.perf_counter()

        img_indexs = []
        images, targets, img_in = next(batch_iterator[0])
        img_indexs.append(img_in)

        img = torch.zeros([1, 3, 300, 300])
        images = torch.cat((images, img.type_as(images)), 0)

        for ii in range(num_gpu - 1):
            img, targ, img_in = next(batch_iterator[ii + 1])
            images = torch.cat((images, img), 0)
            img = (torch.ones([1, 3, 300, 300]) + ii)
            images = torch.cat((images, img.type_as(images)), 0)
            for iii in range(len(targ)):
                targets.append(targ[iii])

            img_indexs.append(img_in)

        batch_size = images.size(0) - num_gpu
        height, width = images.size(2), images.size(3)

        if args.cuda:
            images = Variable(images.cuda(), volatile=True)

        output = net(images, img_indexs)

        loc_data = output[0]
        conf_preds = output[1]
        prior_data = output[2]
        prior_data = prior_data[:loc_data.size(1), :]

        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            tf = time.perf_counter()
            print('Forward Time {:0.3f}'.format(tf - t1))
        for b in range(batch_size):
            gt = targets[b].numpy()
            gt[:, 0] *= width
            gt[:, 2] *= width
            gt[:, 1] *= height
            gt[:, 3] *= height
            gt_boxes.append(gt)
            decoded_boxes = decode(loc_data[b].data, prior_data.data,
                                   args.cfg['variance']).clone()
            conf_scores = net.module.softmax(conf_preds[b]).data.clone()

            for cl_ind in range(1, num_classes):
                scores = conf_scores[:, cl_ind].squeeze()
                c_mask = scores.gt(
                    args.conf_thresh)  # greater than minmum threshold
                scores = scores[c_mask].squeeze()
                # print('scores size',scores.size())
                if scores.dim() == 0:
                    # print(len(''), ' dim ==0 ')
                    det_boxes[cl_ind - 1].append(np.asarray([]))
                    continue
                boxes = decoded_boxes.clone()
                l_mask = c_mask.unsqueeze(1).expand_as(boxes)
                boxes = boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, counts = nms(boxes, scores, args.nms_thresh,
                                  args.topk)  # idsn - ids after nms
                scores = scores[ids[:counts]].cpu().numpy()
                boxes = boxes[ids[:counts]].cpu().numpy()
                # print('boxes sahpe',boxes.shape)
                boxes[:, 0] *= width
                boxes[:, 2] *= width
                boxes[:, 1] *= height
                boxes[:, 3] *= height

                for ik in range(boxes.shape[0]):
                    boxes[ik, 0] = max(0, boxes[ik, 0])
                    boxes[ik, 2] = min(width, boxes[ik, 2])
                    boxes[ik, 1] = max(0, boxes[ik, 1])
                    boxes[ik, 3] = min(height, boxes[ik, 3])

                cls_dets = np.hstack(
                    (boxes, scores[:, np.newaxis])).astype(np.float32,
                                                           copy=True)

                det_boxes[cl_ind - 1].append(cls_dets)
            count += 1
        if val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(
                count, num_images, te - ts))
            torch.cuda.synchronize()
            ts = time.perf_counter()
        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('NMS stuff Time {:0.3f}'.format(te - tf))
    print('Evaluating detections for epoch number ', epoch)
    return evaluate_detections(gt_boxes,
                               det_boxes,
                               CLASSES,
                               iou_thresh=iou_thresh)
コード例 #11
0
def test_net(net, save_root, exp_name, input_type, dataset, iteration,
             li_color_class, means_bgr, n_record_per_class, th_iou):
    """ Test a SSD network on an Action image database. """
    '''
    print('type(means) : ', type(means))
    print('means : ', means)
    '''
    #li_color_class = make_class_color_list(num_classes)
    shall_record = n_record_per_class > 0
    th_conf = args.conf_thresh
    th_nms = args.nms_thresh
    top_k = args.topk
    t3 = np.asarray(means_bgr)
    means_rgb = np.flipud(t3)
    #means_rgb_2 = np.fliplr(t3)
    #print('t3 : ', t3); print('means_rgb_1 : ', means_rgb_1);   exit(); #print('means_rgb_2 : ', means_bgr_2);   exit()

    #val_data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True)
    val_data_loader = data.DataLoader(dataset,
                                      args.batch_size,
                                      num_workers=args.num_workers,
                                      shuffle=True,
                                      collate_fn=detection_collate,
                                      pin_memory=True)
    image_ids = dataset.ids
    save_ids = []
    val_step = 250
    num_images = len(dataset)
    video_list = dataset.video_list
    '''
    print('type(dataset) : ', type(dataset)); 
    print('num_images : ', num_images); 
    print('len(video_list) : ', len(video_list));  exit()
    '''
    det_boxes = [[] for _ in range(len(CLASSES))]
    gt_boxes = []
    print_time = True
    batch_iterator = None
    count = 0
    torch.cuda.synchronize()
    ts = time.perf_counter()
    num_batches = len(val_data_loader)
    det_file = save_root + 'cache/' + exp_name + '/detection-' + input_type + '_' + str(
        iteration).zfill(6) + '.pkl'
    print('det_file : ', det_file)
    #exit()
    print('Number of images ', len(dataset), ' number of batchs', num_batches)
    frame_save_dir = save_root + 'detections/CONV-' + input_type + '-' + args.listid + '-' + str(
        iteration).zfill(6) + '/'
    print('\n\n\nDetections will be store in ', frame_save_dir, '\n\n')
    if shall_record:
        di_class_num_processed = {}
        fn_record = 'action_recognition_images_conf_thres_{:.2f}_nms_thres_{:.1f}_fpc_{}.avi'.format(
            th_conf, th_nms, n_record_per_class)
        writer = make_video_recorder(fn_record, (300, 300), 20)
    shall_stop = False
    for val_itr in range(len(val_data_loader)):
        print('\nval_itr : {} / {}'.format(val_itr, len(val_data_loader)))
        if not batch_iterator:
            batch_iterator = iter(val_data_loader)
        torch.cuda.synchronize()
        t1 = time.perf_counter()
        images_rgb, targets, img_indexs = next(batch_iterator)
        batch_size = images_rgb.size(0)
        if shall_record:
            skip_this_batch = False
            for b in range(batch_size):
                img_idx = img_indexs[b]
                annot_info = dataset.ids[img_idx]
                video_id = annot_info[0]
                video_name = dataset.video_list[video_id].split("/")[0]
                if video_name in di_class_num_processed:
                    if di_class_num_processed[video_name] > n_record_per_class:
                        skip_this_batch = True
                        break
                    di_class_num_processed[video_name] += 1
                else:
                    di_class_num_processed[video_name] = 1
            if skip_this_batch:
                continue

        height, width = images_rgb.size(2), images_rgb.size(3)
        li_margin_ratio_l_r_t_b = [0, 0, 0, 0]
        if args.cuda:
            images_rgb = Variable(images_rgb.cuda(), volatile=True)
            #exit()
        #print('images_rgb.shape : ', images_rgb.shape)


########    networking forwarding ######################################################
        output = net(images_rgb)
        ######################################################################################
        loc_data = output[0]
        conf_preds = output[1]
        prior_data = output[2]

        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            tf = time.perf_counter()
            print('Forward Time {:0.3f}'.format(tf - t1))

        #   for each image in this batch
        for b in range(batch_size):
            #print('b : {} / {}'.format(b, batch_size))
            img_idx = img_indexs[b]
            annot_info = dataset.ids[img_idx]
            #print('annot_info : ', annot_info)
            video_id = annot_info[0]
            frame_num = annot_info[1]

            #print('video_id : ', video_id)
            video_name = dataset.video_list[video_id]
            video_class = video_name.split("/")[0]
            img_name = dataset._imgpath + '/{:s}/{:05d}.jpg'.format(
                video_name, frame_num)
            #print('video_name : ', video_name)
            #print('video_class : ', video_class)
            print('img_name : ', img_name)

            #t1_rgb = np.transpose(images_rgb[b].cpu().numpy(), (1, 2, 0))
            #exit()
            t1_rgb = np.transpose(images_rgb[b].cpu().data.numpy(), (1, 2, 0))

            t2_rgb = t1_rgb + means_rgb
            t3_bgr = cv2.cvtColor(t2_rgb.astype(np.uint8), cv2.COLOR_RGB2BGR)
            gt = targets[b].numpy()
            gt[:, 0] *= width
            gt[:, 2] *= width
            gt[:, 1] *= height
            gt[:, 3] *= height
            #print('type(gt) : ', type(gt)); exit()
            #cv2.putText(t3_bgr, video_name, (60, 20), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 0, 255))
            id_vid = dataset.CLASSES.index(video_class)
            cv2.putText(t3_bgr, video_class,
                        (X_OFFSET_GT_VID, Y_OFFSET_GT_VID),
                        cv2.FONT_HERSHEY_DUPLEX, FONT_SCALE_GT_VID,
                        li_color_class[id_vid])

            cv2.putText(t3_bgr, "conf. thres. : {:.2f}".format(th_conf),
                        (int(width * 0.5 - 85), int(height - 10)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255))

            if not shall_record:
                t3_bgr = mark_ground_truth(t3_bgr, gt, dataset.CLASSES,
                                           li_color_class)
            gt_boxes.append(gt)

            decoded_boxes = decode(loc_data[b].data, prior_data.data,
                                   cfg['variance']).clone()
            conf_scores = net.softmax(conf_preds[b]).data.clone()

            t3_bgr, det_boxes = mark_detections(t3_bgr, conf_scores,
                                                dataset.CLASSES, decoded_boxes,
                                                (width, height),
                                                li_margin_ratio_l_r_t_b,
                                                li_color_class, top_k, th_conf,
                                                th_nms, det_boxes)

            #index = img_indexs[b]
            annot_info = image_ids[img_idx]
            #exit()

            frame_num = annot_info[1]
            video_id = annot_info[0]
            videoname = video_list[video_id]
            output_dir = frame_save_dir + videoname
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)

            output_file_name = output_dir + '/{:05d}.mat'.format(
                int(frame_num))
            save_ids.append(output_file_name)
            sio.savemat(output_file_name,
                        mdict={
                            'scores': conf_scores.cpu().numpy(),
                            'loc': decoded_boxes.cpu().numpy()
                        })
            if shall_record:
                writer.write(t3_bgr)
            count += 1
            cv2.imshow('t3_bgr', t3_bgr)
            #cv2.waitKey(1)
            k = cv2.waitKey() & 0xFF
            #k = cv2.waitKey(1)
            '''
            if 255 != k:
                print('k : ', k)
            '''
            if 27 == k:
                shall_stop = True
        if val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(
                count, num_images, te - ts))
            torch.cuda.synchronize()
            ts = time.perf_counter()
        if print_time and val_itr % val_step == 0:
            torch.cuda.synchronize()
            te = time.perf_counter()
            print('NMS stuff Time {:0.3f}'.format(te - tf))
        if shall_stop:
            break
    print('Evaluating detections for itration number ', iteration)

    #Save detection after NMS along with GT
    with open(det_file, 'wb') as f:
        pickle.dump([gt_boxes, det_boxes, save_ids], f,
                    pickle.HIGHEST_PROTOCOL)
    if shall_record:
        writer.release()
        convert_vid_2_animated_gif(fn_record)
    return evaluate_detections(gt_boxes, det_boxes, CLASSES, iou_thresh=th_iou)
コード例 #12
0
ファイル: test_video_trn.py プロジェクト: xychen9459/TDRN
def main():
    mean = (104, 117, 123)
    if 'FPN' in backbone:
        from model.refinedet_vgg import build_net
        static_net = build_net('test',
                               size=ssd_dim,
                               num_classes=num_classes,
                               c7_channel=c7_channel,
                               bn=bn)
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        c7_channel=c7_channel,
                        bn=bn)
    else:
        from model.ssd4scale_vgg import build_net
        static_net = build_net('test',
                               size=ssd_dim,
                               num_classes=num_classes,
                               c7_channel=c7_channel,
                               bn=bn)
        net = build_net('test',
                        size=ssd_dim,
                        num_classes=num_classes,
                        c7_channel=c7_channel,
                        bn=bn,
                        deform=deform)

    print('loading model!')
    static_net.load_state_dict(torch.load(static_dir))
    static_net.eval()
    static_net = static_net.to(device)
    net.load_state_dict(torch.load(trn_dir))
    net.eval()
    net = net.to(device)
    print('Finished loading model!', static_dir, trn_dir)
    detector = Detect(num_classes, 0, top_k, confidence_threshold,
                      nms_threshold)
    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward().to(device)

    frame_num = 0
    cap = cv2.VideoCapture(video_name)
    w, h = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
    size = (640, 480)
    if save_dir:
        fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
        record = cv2.VideoWriter(
            os.path.join(save_dir,
                         video_name.split('/')[-1].split('.')[0] + '.avi'),
            fourcc, cap.get(cv2.CAP_PROP_FPS), size)
    # static_flag = True
    offset_list = list()
    ref_loc = list()
    while (cap.isOpened()):
        ret, frame = cap.read()
        if not ret:
            break
        h, w, _ = frame.shape
        frame_draw = frame.copy()
        im_trans = base_transform(frame, ssd_dim, mean)
        with torch.no_grad():
            x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1,
                                                                2).to(device)
            if frame_num % interval == 0:
                # if static_flag:
                static_out = static_net(x, ret_loc=deform)
                priors_static = center_size(
                    decode(static_out[0][0], priors, [0.1, 0.2]))
                if deform:
                    ref_loc = static_out[
                        2]  # [o * args.loose for o in static_out[2]]
                    offset_list = list()
            out = net(x,
                      ref_loc=ref_loc,
                      offset_list=offset_list,
                      ret_off=(False, True)[deform and not offset_list])
            detections = detector.forward(out[0],
                                          out[1],
                                          priors_static,
                                          scale=torch.cuda.FloatTensor(
                                              [w, h, w, h]))
            if len(detections) == 3:
                offset_list = out[2]
                ref_loc = list()
            # if static_flag:
            #     ref_mask = mask.clone()mask
            #     print('static')
            #     static_flag = False
            # else:
            #     time1 = time.time()
            #     s_score = (mask * ref_mask).sum().float() / (mask + ref_mask).sum().float()
            #     static_flag = (False, True)[s_score<0.45]
            #     time2 = time.time()
            #     print(s_score, 'match time:', time2-time1)
        out = list()
        for j in range(1, detections.size(1)):
            if detections[0, j, :, :].sum() == 0:
                continue
            for k in range(detections.size(2)):
                dets = detections[0, j, k, :]
                if dets.sum() == 0:
                    continue
                boxes = dets[1:-1] if dets.size(0) == 6 else dets[1:]
                identity = dets[-1] if dets.size(0) == 6 else -1
                x_min = int(boxes[0] * w)
                x_max = int(boxes[2] * w)
                y_min = int(boxes[1] * h)
                y_max = int(boxes[3] * h)

                score = dets[0]
                if score > confidence_threshold:
                    put_str = VID_CLASSES_name[j - 1] + ':' + str(
                        np.around(score,
                                  decimals=2)).split('(')[-1].split(',')[0][:4]
                    color = (255, 0, 0)
                    cv2.rectangle(frame_draw, (x_min, y_min), (x_max, y_max),
                                  color,
                                  thickness=2)
                    cv2.putText(frame_draw,
                                put_str, (x_min + 10, y_min - 10),
                                cv2.FONT_HERSHEY_DUPLEX,
                                0.8,
                                color=color,
                                thickness=1)
        print(str(frame_num))
        frame_num += 1
        frame_show = cv2.resize(frame_draw, size)
        cv2.imshow('frame', frame_show)  # 255* mask.cpu().numpy())
        if save_dir:
            record.write(frame_show)
        ch = cv2.waitKey(1)
        if ch == 32:
            # if frame_num % 1 ==0:
            while 1:
                in_ch = cv2.waitKey(10)
                if in_ch == 115:  # 's'
                    if save_dir:
                        print('save: ', frame_num)
                        torch.save(
                            out,
                            os.path.join(save_dir, '_%s.pkl' % str(frame_num)))
                        cv2.imwrite(
                            os.path.join(save_dir, '%s.jpg' % str(frame_num)),
                            frame)
                elif in_ch == 32:
                    break

    cap.release()
    if save_dir:
        record.release()
    cv2.destroyAllWindows()