Ejemplo n.º 1
0
    def forward(self, act_classifications, obj_classifications,
                obj_regressions, anchors, inst_annotations, **kwargs):
        anchors = anchors.float()
        act_classifications = act_classifications.float()

        alpha = 0.25
        gamma = 2.0
        batch_size = act_classifications.shape[0]
        act_classification_losses = []
        obj_classification_losses = []
        regression_losses = []

        anchor = anchors[
            0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):
            act_classification = act_classifications[
                j, :, :]  # (h*w*feat_num, num_act_classes)
            obj_classification = obj_classifications[
                j, :, :]  # (h*w*feat_num, num_obj_classes)
            regression = obj_regressions[
                j, :, :]  # (h*w*feat_num, num_anchor*4)

            bbox_annotation = inst_annotations[j, :, :5]
            act_annotation_oh = inst_annotations[j, :, 5:]

            act_annotation_oh = act_annotation_oh[bbox_annotation[:, 4] != -1]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] !=
                                              -1]  # (num_boxes, 5)

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():
                    act_classification_losses.append(
                        torch.tensor(0).to(dtype).cuda())
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                    obj_classification_losses.append(
                        torch.tensor(0).to(dtype).cuda())
                else:
                    act_classification_losses.append(torch.tensor(0).to(dtype))
                    regression_losses.append(torch.tensor(0).to(dtype))
                    obj_classification_losses.append(torch.tensor(0).to(dtype))
                continue

            obj_classification = torch.clamp(obj_classification, 1e-4,
                                             1.0 - 1e-4)
            act_classification = torch.clamp(act_classification, 1e-4,
                                             1.0 - 1e-4)

            IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            # compute the loss for classification
            act_targets = torch.ones_like(act_classification) * -1
            obj_targets = torch.ones_like(obj_classification) * -1
            if torch.cuda.is_available():
                act_targets = act_targets.cuda()
                obj_targets = obj_targets.cuda()

            obj_targets[torch.lt(IoU_max, 0.4), :] = 0  # IoU < 0.4
            act_targets[torch.lt(IoU_max, 0.4), :] = 0  # IoU < 0.4

            positive_indices = torch.ge(IoU_max, 0.5)  # IoU > 0.5

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]
            assigned_act_annotation = act_annotation_oh[IoU_argmax, :]

            act_targets[positive_indices, :] = 0
            obj_targets[positive_indices, :] = 0

            obj_targets[positive_indices, assigned_annotations[
                positive_indices,
                4].long()] = 1  # set the corresponding categories as 1
            act_targets[positive_indices, :] = assigned_act_annotation[
                positive_indices, :]  # set the corresponding categories as 1

            foreground = torch.max(act_targets, dim=1)[0] > 0
            act_targets = act_targets[foreground]
            act_classification = act_classification[foreground]

            alpha_factor_obj = torch.ones_like(obj_targets) * alpha

            if torch.cuda.is_available():
                alpha_factor_obj = alpha_factor_obj.cuda()

            alpha_factor_obj = torch.where(torch.eq(obj_targets,
                                                    1.), alpha_factor_obj,
                                           1. - alpha_factor_obj)

            obj_focal_weight = torch.where(torch.eq(obj_targets, 1.),
                                           1. - obj_classification,
                                           obj_classification)
            obj_focal_weight = alpha_factor_obj * torch.pow(
                obj_focal_weight, gamma)

            obj_bce = -(
                obj_targets * torch.log(obj_classification) +
                (1.0 - obj_targets) * torch.log(1.0 - obj_classification))
            act_bce = -(
                act_targets * torch.log(act_classification) +
                (1.0 - act_targets) * torch.log(1.0 - act_classification))

            obj_cls_loss = obj_focal_weight * obj_bce  # classification loss

            if self.dataset == "vcoco":
                act_cls_loss = act_bce
            else:
                act_cls_loss = act_bce * self.verb_weight.to(dtype).cuda()

            obj_zeros = torch.zeros_like(obj_cls_loss)
            act_zeros = torch.zeros_like(act_cls_loss)
            if torch.cuda.is_available():
                obj_zeros = obj_zeros.cuda()
                act_zeros = act_zeros.cuda()
            obj_cls_loss = torch.where(
                torch.ne(obj_targets, -1.0), obj_cls_loss,
                obj_zeros)  # ignore loss if IoU is too small
            act_cls_loss = torch.where(
                torch.ne(act_targets, -1.0), act_cls_loss,
                act_zeros)  # ignore loss if IoU is too small

            obj_classification_losses.append(
                obj_cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))
            act_classification_losses.append(
                act_cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # efficientdet style
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dy, targets_dx, targets_dh, targets_dw))
                targets = targets.t()

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))

        # debug
        imgs = kwargs.get('imgs', None)
        if imgs is not None:
            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()
            obj_list = kwargs.get('obj_list', None)
            out = postprocess(
                imgs.detach(),
                torch.stack([anchors[0]] * imgs.shape[0], 0).detach(),
                regressions.detach(), obj_classifications.detach(),
                regressBoxes, clipBoxes, 0.5, 0.3)
            imgs = imgs.permute(0, 2, 3, 1).cpu().numpy()
            imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) *
                    255).astype(np.uint8)
            imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs]
            display(out, imgs, obj_list, imshow=False, imwrite=True)

        return torch.stack(act_classification_losses).mean(dim=0, keepdim=True), \
                torch.stack(obj_classification_losses).mean(dim=0, keepdim=True), \
                torch.stack(regression_losses).mean(dim=0, keepdim=True)
Ejemplo n.º 2
0
def evaluate_coco_show_res_jss(img_path,
                               set_name,
                               image_ids,
                               coco,
                               model,
                               threshold=0.05):
    results = []

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()
    count = 0
    for image_id in tqdm(image_ids):
        count = count + 1
        if count > 21:
            break
        image_info = coco.loadImgs(image_id)[0]
        image_path = img_path + image_info['file_name']
        print('image path:', image_path)

        ori_imgs, framed_imgs, framed_metas = preprocess(
            image_path, max_size=input_sizes[compound_coef])
        x = torch.from_numpy(framed_imgs[0])

        if use_cuda:
            x = x.cuda(gpu)
            if use_float16:
                x = x.half()
            else:
                x = x.float()
        else:
            x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)

        preds = postprocess(x, anchors, regression, classification,
                            regressBoxes, clipBoxes, threshold, nms_threshold)

        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                image_result = {
                    'image_id': image_id,
                    'category_id': label + 1,
                    'score': float(score),
                    'bbox': box.tolist(),
                }
                score = float(score)
                category_id = label + 1
                box = box.tolist()
                # print('box:',box)
                xmin, ymin, w, h, score = int(box[0]), int(box[1]), int(
                    box[2]), int(box[3]), score
                if score > 0.2:
                    cv2.rectangle(ori_imgs[0], (xmin, ymin),
                                  (xmin + w, ymin + h), (0, 255, 0), 6)
                    cv2.putText(ori_imgs[0],
                                '{}:{:.2f}'.format(category_id,
                                                   score), (xmin, ymin),
                                cv2.FONT_HERSHEY_SIMPLEX, 4.0, (0, 255, 0), 6)
                results.append(image_result)
        cv2.imwrite(
            './test_result/zhongchui_d3_epoch200_1124/' + 'tmp' +
            '{}'.format(count) + '.jpeg', ori_imgs[0])

    if not len(results):
        raise Exception(
            'the model does not provide any valid output, check model architecture and the data input'
        )

    # write output
    # filepath = f'{set_name}_bbox_results.json'
    filepath = det_save_json
    if os.path.exists(filepath):
        os.remove(filepath)
    json.dump(results, open(filepath, 'w'), indent=4)
def evaluate_coco(img_path, set_name, image_ids, coco, model, threshold=0.05):
    results = []
    processed_image_ids = []

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    for image_id in tqdm(image_ids):
        image_info = coco.loadImgs(image_id)[0]
        image_path = img_path + image_info['file_name']

        ori_imgs, framed_imgs, framed_metas = preprocess(
            image_path, max_size=input_sizes[compound_coef])
        x = torch.from_numpy(framed_imgs[0])

        if use_cuda:
            x = x.cuda(gpu)
            if use_float16:
                x = x.half()
            else:
                x = x.float()
        else:
            x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)

        preds = postprocess(x, anchors, regression, classification,
                            regressBoxes, clipBoxes, threshold, nms_threshold)

        processed_image_ids.append(image_id)

        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]
                box = box.tolist()
                # Adjust for integer box
                #box = [int(x) for x in box]
                if score < threshold:
                    break
                image_result = {
                    'image_id': image_id,
                    'category_id': label + 1,
                    'score': float(score),
                    'bbox': box,
                }

                results.append(image_result)

    if not len(results):
        raise Exception(
            'the model does not provide any valid output, check model architecture and the data input'
        )

    # write output
    filepath = f'{set_name}_bbox_results.json'
    if os.path.exists(filepath):
        os.remove(filepath)
    json.dump(results, open(filepath, 'w'), indent=4)

    return processed_image_ids
Ejemplo n.º 4
0
def evaluate_coco(img_path, json_path, image_ids, coco, model, max_size, config):
    results = []

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    for image_id in tqdm(image_ids):
        image_info = coco.loadImgs(image_id)[0]
        image_path = img_path + image_info['file_name']

        ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=max_size)
        x = torch.from_numpy(framed_imgs[0])

        if config.eval_use_cuda:
            x = x.cuda(config.eval_gpu)
            if config.eval_use_float16:
                x = x.half()
            else:
                x = x.float()
        else:
            x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)
        

        preds = postprocess(x,
                            anchors, regression, classification,
                            regressBoxes, clipBoxes,
                            config.eval_threshold, config.eval_nms_threshold,
                            anchor_free_mode=config.anchor_free_mode)
        
        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                image_result = {
                    'image_id': image_id,
                    'category_id': label + 1,
                    'score': float(score),
                    'bbox': box.tolist(),
                }

                results.append(image_result)
        
    if not len(results):
        raise Exception('the model does not provide any valid output, check model architecture and the data input')

    # write output
    
    if os.path.exists(json_path):
        os.remove(json_path)
    json.dump(results, open(json_path, 'w'), indent=4)
Ejemplo n.º 5
0
def request_chat(uid: str, text: str) -> dict:
    # print(uid)
    # print(text)
    # prep = dataset.load_predict(text, embed_processor)
    # print(prep)
    # intent = intent_classifier.predict(prep, calibrate=False)
    # entity = entity_recognizer.predict(prep)
    # entity = None
    # text = dataset.prep.tokenize(text, train=False)
    # dialogue_cache[uid] = scenario_manager.apply_scenario(intent, entity, text)

    # BERT 인텐트
    # utterance =
    utterence = preprocess(text)
    max_seq_len = 50
    inputs = tokenizer.encode_plus(utterence,
            None,
            pad_to_max_length=True,
            add_special_tokens=True,
            return_attention_mask=True,
            max_length = max_seq_len,
    )

    ids = inputs["input_ids"]
    token_type_ids = inputs["token_type_ids"]
    mask = inputs["attention_mask"]

    input_data = {
        'ids': torch.tensor(ids, dtype=torch.long),
        'mask': torch.tensor(mask, dtype=torch.long),
        'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
        # 'target': torch.tensor(self.train_csv.iloc[index, 2], dtype=torch.long)
        # 'target': torch.tensor(self.target[index], dtype=torch.long)
    }

    input_data['ids'] = input_data['ids'].to(device)
    input_data['mask'] = input_data['mask'].to(device)
    input_data['token_type_ids'] = input_data['token_type_ids'].to(device)
    # input_data['target'] = input_data['target'].to(device)

    input_data['ids'] = input_data['ids'].unsqueeze(0)
    input_data['mask'] = input_data['mask'].unsqueeze(0)
    input_data['token_type_ids'] = input_data['token_type_ids'].unsqueeze(0)

    # 3. 모델에 데이터 넣기
    inputs = {'input_ids': input_data['ids'],
              'token_type_ids' : input_data['token_type_ids'],
              'attention_mask': input_data['mask']
              }

    outputs = model(**inputs)

    intent_str, intent_candidate_str, score_str, intent, score = postprocess(outputs, model)


    feedback['id'].append(uid)
    feedback['text'].append(text)
    feedback['utterance'].append(utterence)
    feedback['intent'].append(intent)
    feedback['score'].append(score)
    feedback['label'].append(999)


    entity = None

    dialogue_cache = {'input': text, 'intent': intent_str, 'entity': entity, 'state':'FALLBACK', 'answer': None, 'score': score_str}

    # feedback에 저장
    # feedback['']
    return dialogue_cache
Ejemplo n.º 6
0
def train(opt):
    params = Params(f'projects/{opt.project}.yml')

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    opt.saved_path = opt.saved_path + f'/{params.project_name}/'
    opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/'
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)

    training_params = {'batch_size': opt.batch_size,
                       'shuffle': True,
                       'drop_last': True,
                       'collate_fn': collater,
                       'num_workers': opt.num_workers}

    val_params = {'batch_size': opt.batch_size,
                  'shuffle': False,
                  'drop_last': True,
                  'collate_fn': collater,
                  'num_workers': opt.num_workers}

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    training_set = CocoDataset(root_dir=os.path.join(opt.data_path, params.project_name), set=params.train_set,
                               transform=transforms.Compose([Normalizer(mean=params.mean, std=params.std),
                                                             Augmenter(),
                                                             Resizer(input_sizes[opt.compound_coef])]))
    training_generator = DataLoader(training_set, **training_params)

    val_set = CocoDataset(root_dir=os.path.join(opt.data_path, params.project_name), set=params.val_set,
                          transform=transforms.Compose([Normalizer(mean=params.mean, std=params.std),
                                                        Resizer(input_sizes[opt.compound_coef])]))
    val_generator = DataLoader(val_set, **val_params)

    labels = training_set.labels
    print('label:', labels)

    model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=opt.compound_coef,
                                 ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales))

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0

        try:
            ret = model.load_state_dict(torch.load(weights_path), strict=False)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.')

        print(f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}')
    else:
        last_step = 0
        print('[Info] initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:
        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/')

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, debug=opt.debug)

    if params.num_gpus > 0:
        model = model.cuda()
        if params.num_gpus > 1:
            model = CustomDataParallel(model, params.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if opt.optim == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.9, nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

    epoch = 0
    best_loss = 1e5
    best_epoch = 0
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(opt.num_epochs):
            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            progress_bar = tqdm(training_generator)
            for iter, data in enumerate(progress_bar):
                if iter < step - last_epoch * num_iter_per_epoch:
                    progress_bar.update()
                    continue
                try:
                    imgs = data['img']
                    annot = data['annot']

                    if params.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        annot = annot.cuda()

                    optimizer.zero_grad()
                    cls_loss, reg_loss, regression, classification, anchors= model(imgs, annot, obj_list=params.obj_list)

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()

                    # loss
                    epoch_loss.append(float(loss))

                    # mAP
                    threshold = 0.2
                    iou_threshold = 0.2

                    regressBoxes = BBoxTransform()
                    clipBoxes = ClipBoxes()

                    out = postprocess(imgs,
                                      anchors, regression, classification,
                                      regressBoxes, clipBoxes,
                                      threshold, iou_threshold)

                    mAP = mAP_score(annot, out, labels)
                    mAP = mAP.results['mAP']

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}. mAP: {:.2f}'.format(
                            step, epoch+1, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss.item(),
                            reg_loss.item(), loss.item(), mAP))
                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss}, step)
                    writer.add_scalars('Classfication_loss', {'train': cls_loss}, step)
                    writer.add_scalars('mAP', {'train': mAP}, step)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)

                    step += 1

                    if step % opt.save_interval == 0 and step > 0:
                        save_checkpoint(model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
                        print('checkpoint...')

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue
            scheduler.step(np.mean(epoch_loss))



            if epoch % opt.val_interval == 0:
                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []

                for iter, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']

                        if params.num_gpus == 1:
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        cls_loss, reg_loss, regression, classification, anchors = model(imgs, annot, obj_list=params.obj_list)
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                # mAP
                threshold = 0.2
                iou_threshold = 0.2

                regressBoxes = BBoxTransform()
                clipBoxes = ClipBoxes()

                out = postprocess(imgs,
                                  anchors, regression, classification,
                                  regressBoxes, clipBoxes,
                                  threshold, iou_threshold)

                mAP = mAP_score(annot, out, labels)
                mAP = mAP.results['mAP']

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}. mAP: {:.2f}'.format(
                        epoch+1, opt.num_epochs, cls_loss, reg_loss, loss, mAP))
                writer.add_scalars('Loss', {'val': loss}, step)
                writer.add_scalars('Regression_loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication_loss', {'val': cls_loss}, step)
                writer.add_scalars('mAP', {'val': mAP}, step)

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch

                    save_checkpoint(model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')

                model.train()

                # Early stopping
                if epoch - best_epoch > opt.es_patience > 0:
                    print('[Info] Stop training at epoch {}. The lowest loss achieved is {}'.format(epoch, best_loss))
                    break
    except KeyboardInterrupt:
        save_checkpoint(model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
        writer.close()
    writer.close()
Ejemplo n.º 7
0
def detect(model, dataset, args):
    use_cuda = not args.cpu
    threshold = args.threshold
    iou_threshold = args.iou_threshold
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    input_size = input_sizes[args.compound_coef]

    img_dir = os.path.join(dataset, dataset, 'images')
    bbox_dir = os.path.join(dataset, dataset, 'annotations', 'bboxes')
    vis_dir = os.path.join(dataset, 'det_vis')
    prepare_dirs(bbox_dir, vis_dir)

    img_paths = [os.path.join(img_dir, f) for f in os.listdir(img_dir)]
    for img_path in tqdm(img_paths):
        ori_imgs, framed_imgs, framed_metas = preprocess(img_path,
                                                         max_size=input_size)
        ori_img = ori_imgs[0]
        img_id = os.path.basename(img_path).split('.')[0]

        json_byhand = os.path.join(dataset, 'annotation_byhand',
                                   img_id + '.json')
        if os.path.exists(json_byhand):
            with open(json_byhand) as f:
                annotation_byhand = json.load(f)
                points = annotation_byhand['shapes'][0]['points']
                max_box = points[0] + points[1]
        else:
            if args.update:  # only process annotations by hand
                continue
            if use_cuda:
                x = torch.stack(
                    [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
            else:
                x = torch.stack([torch.from_numpy(ft) for fi in framed_imgs],
                                0)

            x = x.to(torch.float32).permute(0, 3, 1, 2)

            with torch.no_grad():
                features, regression, classification, anchors = model(x)

                regressBoxes = BBoxTransform()
                clipBoxes = ClipBoxes()

                preds = postprocess(x, anchors, regression, classification,
                                    regressBoxes, clipBoxes, threshold,
                                    iou_threshold)

                pred = invert_affine(framed_metas, preds)[0]

            max_area, max_box = 0, [0, 0, ori_img.shape[1], ori_img.shape[0]]
            for det, class_id in zip(pred['rois'], pred['class_ids']):
                if not class_id == 0:
                    continue
                x1, y1, x2, y2 = det.astype(np.int)
                w, h = x2 - x1, y2 - y1
                area = w * h
                if area > max_area:
                    max_area = area
                    max_box = [x1, y1, x2, y2]

        plot_one_box(ori_img, max_box, color=[255, 0, 255], line_thickness=2)
        if args.vis:
            cv2.imwrite(os.path.join(vis_dir, img_id + '.jpg'), ori_img)

        bbox_file = os.path.join(bbox_dir, img_id + '.txt')
        with open(bbox_file, 'w') as f:
            bbox_info = ' '.join(map(str, max_box))
            f.write(bbox_info)
Ejemplo n.º 8
0
# print("Restore Ckpt Sucessfully!!")

# Load the model weights
model(np.zeros((1, 550, 550, 3)))
model.load_weights('D:\Tensorflow-YOLACT\weights\weights_pascal_42.54.h5')
print("Load weights Sucessfully!!")
# -----------------------------------------------------------------------------------------------
# Load Validation Images and do Detection
# iteration for detection (5000 val images)
for image, labels in valid_dataset.take(10):
    # only try on 1 image
    output = model(image, training=False)
    detection = model.detect(output)
    # postprocessing
    cls, scores, bbox, masks = postprocess(detection,
                                           tf.shape(image)[1],
                                           tf.shape(image)[2], 0, "bilinear")
    if cls == None:
        continue
    cls, scores, bbox, masks = cls.numpy(), scores.numpy(), bbox.numpy(
    ), masks.numpy()
    # visualize the detection (un-transform the image)
    image = labels['ori'][0].numpy()
    gt_bbox = labels['bbox'].numpy()
    gt_cls = labels['classes'].numpy() - 1
    num_obj = labels['num_obj'].numpy()
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    masks = masks[None, :, :] if masks.shape[0] == 550 else masks
    final_m = np.zeros_like(masks[0][:, :, None])
    # show the prediction box
    for idx in range(bbox.shape[0]):
def efficientDet_video_inference(video_src,compound_coef = 0,force_input_size=None,
                                 frame_skipping = 3,
                                 threshold=0.2,out_path=None,imshow=False,
                                 display_fps=False):

    #deep-sort variables

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0


    model_filename = '/home/shaheryar/Desktop/Projects/Football-Monitoring/deep_sort/model_weights/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric,n_init=5)

    # efficientDet-pytorch variables
    iou_threshold = 0.4
    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size

    # load model
    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list))
    model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth'))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    # Video capture
    cap = cv2.VideoCapture(video_src)
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    fourcc = cv2.VideoWriter_fourcc(*'MPEG')
    fps = cap.get(cv2.CAP_PROP_FPS)
    print("Video fps",fps)
    if(out_path is not None):
        outp = cv2.VideoWriter(out_path, fourcc, fps, (frame_width, frame_height))
    i=0
    start= time.time()
    current_frame_fps=0
    while True:

        ret, frame = cap.read()

        if not ret:
            break
        t1=time.time()
        if (frame_skipping==0 or i%frame_skipping==0):
        # if(True):


            # frame preprocessing (running detections)
            ori_imgs, framed_imgs, framed_metas, t1 = preprocess_video(frame, width=input_size, height=input_size)
            if use_cuda:
                x = torch.stack([fi.cuda() for fi in framed_imgs], 0)
            else:
                x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)
            # model predict
            t1=time.time()
            with torch.no_grad():
                features, regression, classification, anchors = model(x)

                out = postprocess(x,
                                  anchors, regression, classification,
                                  regressBoxes, clipBoxes,
                                  threshold, iou_threshold)
            # Post processing
            out = invert_affine(framed_metas, out)
            # decoding bbox ,object name and scores
            boxes,classes,scores =decode_predictions(out[0])
            org_boxes = boxes.copy()
            t2 = time.time() - t1

            # feature extraction for deep sort
            boxes = [convert_bbox_to_deep_sort_format(frame.shape, b) for b in boxes]

            features = encoder(frame,boxes)
            detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features)]
            boxes = np.array([d.tlwh for d in detections])
            # print(boxes)
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]
            tracker.predict()
            tracker.update(detections)



        i = i + 1
        img_show=frame.copy()
        for j in range(len(org_boxes)):
            img_show =drawBoxes(img_show,org_boxes[j],(255,255,0),str(tracker.tracks[j].track_id))

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            x1=int(bbox[0])
            y1 = int(bbox[1])
            x2 = int(bbox[2])
            y2=int(bbox[3])
            roi= frame[y1:y2,x1:x2]
            cv2.rectangle(img_show, (x1, y1), (x2, y2), update_color_association(roi, track.track_id), 2)
            cv2.putText(img_show, str(track.track_id), (x1, y1), 0, 5e-3 * 100, (255, 255, 0), 1)


        if display_fps:
            current_frame_fps=1/t2
        else:
            current_frame_fps=0

        cv2.putText(img_show, 'FPS: {0:.2f}'.format(current_frame_fps), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (255, 255, 0),
                    2, cv2.LINE_AA)
        if (i % int(fps) == 0):
            print("Processed ", str(int(i / fps)), "seconds")
            print("Time taken",time.time()-start)
            # print(color_dict)

        if imshow:
            img_show=cv2.resize(img_show,(0,0),fx=0.75,fy=0.75)
            cv2.imshow('Frame',img_show)
            # Press Q on keyboard to  exit
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        if out_path is not None:
            outp.write(img_show)

    cap.release()
    outp.release()
    def infer(self, image):
        img = np.array(image)
        img = img[:, :, ::-1]  #rgb 2 bgr
        anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
        anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]

        threshold = 0.25
        iou_threshold = 0.25

        force_input_size = None
        use_cuda = False
        use_float16 = False
        cudnn.fastest = False
        cudnn.benchmark = False

        input_size = 512
        ori_imgs, framed_imgs, framed_metas = preprocess(img,
                                                         max_size=input_size)

        if use_cuda:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        model = EfficientDetBackbone(compound_coef=0,
                                     num_classes=len(self.labels),
                                     ratios=anchor_ratios,
                                     scales=anchor_scales)

        model.load_state_dict(torch.load(self.path, map_location='cpu'))
        model.requires_grad_(False)
        model.eval()

        if use_cuda:
            model = model.cuda()
        if use_float16:
            model = model.half()

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, threshold,
                              iou_threshold)

        pred = invert_affine(framed_metas, out)

        results = []

        for i in range(len(ori_imgs)):
            if len(pred[i]['rois']) == 0:
                continue

            ori_imgs[i] = ori_imgs[i].copy()

            for j in range(len(pred[i]['rois'])):
                xt1, yt1, xbr, ybr = pred[i]['rois'][j].astype(np.float64)
                xt1 = float(xt1)
                yt1 = float(yt1)
                xbr = float(xbr)
                yb4 = float(ybr)
                obj = str(pred[i]['class_ids'][j])
                obj_label = self.labels.get(obj)
                obj_score = str(pred[i]['scores'][j])
                results.append({
                    "confidence": str(obj_score),
                    "label": obj_label,
                    "points": [xt1, yt1, xbr, ybr],
                    "type": "rectangle",
                })

        return results
Ejemplo n.º 11
0
    def forward(self, regressions, anchors, annotations, **kwargs):
        alpha = 0.25
        gamma = 2.0
        batch_size = regressions.shape[0]
        regression_losses = []

        anchor = anchors[
            0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):

            regression = regressions[j, :, :]

            bbox_annotation = annotations[j]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            positive_indices = torch.ge(IoU_max, 0.5)
            assigned_annotations = bbox_annotation[IoU_argmax, :]

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # efficientdet style
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dy, targets_dx, targets_dh, targets_dw))
                targets = targets.t()

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))

        # debug
        imgs = kwargs.get('imgs', None)
        if imgs is not None:
            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()
            obj_list = kwargs.get('obj_list', None)
            out = postprocess(
                imgs.detach(),
                torch.stack([anchors[0]] * imgs.shape[0], 0).detach(),
                regressions.detach(), classifications.detach(), regressBoxes,
                clipBoxes, 0.5, 0.3)
            imgs = imgs.permute(0, 2, 3, 1).cpu().numpy()
            imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) *
                    255).astype(np.uint8)
            imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs]
            display(out, imgs, obj_list, imshow=False, imwrite=True)

        return torch.stack(regression_losses).mean(dim=0, keepdim=True)
Ejemplo n.º 12
0
    'Cow'
]

confusion_matrix = [[0 for _ in range(13)] for j in range(13)]

for image, labels in tqdm(valid_dataset):
    # only try on 1 image
    output = model(image, training=False)
    detection = detect_layer(output)

    gt_bbox = labels['bbox'].numpy()
    gt_cls = labels['classes'].numpy()
    num_obj = labels['num_obj'].numpy()

    if detection[0]['detection'] != None:
        my_cls, scores, bbox, masks = postprocess(detection, 256, 256, 0, 'bilinear')
        my_cls, scores, bbox, masks = my_cls.numpy(), scores.numpy(), bbox.numpy(), masks.numpy()
        
        ground_truth = []
        prediction = []

        for idx in range(num_obj[0]):
            ground_truth.append({
                'class': gt_cls[0][idx],
                'bbox': gt_bbox[0][idx]
            })

        for idx in range(bbox.shape[0]):
            prediction.append({
                'class': my_cls[idx] + 1,
                'bbox': bbox[idx]
Ejemplo n.º 13
0
def evaluate_coco(img_path, set_name, image_ids, coco, model, threshold=0.05):
    results = []
    processed_image_ids = []

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    for image_id in tqdm(image_ids):
        image_info = coco.loadImgs(image_id)[0]
        image_path = img_path + image_info['file_name']

        ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=input_sizes[compound_coef])
        x = torch.from_numpy(framed_imgs[0]).cuda().unsqueeze(0).to(torch.float32).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)

        preds = postprocess(x,
                            anchors, regression, classification,
                            regressBoxes, clipBoxes,
                            threshold, nms_threshold)

        processed_image_ids.append(image_id)

        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                if score < threshold:
                    break
                image_result = {
                    'image_id': image_id,
                    'category_id': label + 1,
                    'score': float(score),
                    'bbox': box.tolist(),
                }

                results.append(image_result)

    if not len(results):
        return []

    # write output
    json.dump(results, open(f'{set_name}_bbox_results.json', 'w'), indent=4)

    return processed_image_ids
Ejemplo n.º 14
0
def evaluate_odgt(records, model):
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()
    all_result = []
    pbar = tqdm(total=len(records))
    for record in records:
        pbar.update(1)
        nori_ids = record['ID']
        # img = imdecode(nf.get(nori_id))
        ori_imgs, framed_imgs, framed_metas = preprocess_nori(nori_ids, max_size=input_sizes[compound_coef])
        x = torch.from_numpy(framed_imgs[0])

        if use_cuda:
            x = x.cuda(gpu)
            if use_float16:
                x = x.half()
            else:
                x = x.float()
        else:
            x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)

        preds = postprocess(x,
                            anchors, regression, classification,
                            regressBoxes, clipBoxes,
                            threshold, nms_threshold)
        
        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        dtboxes = []
        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores
            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                # category_id = label + 1
                category_id = label
                tag = obj_list[category_id]
                # only test bad 
                # if tag != 'zangwu':
                #     continue
                # if tag in ['normal_board', 'normal_driving_birds_device']:
                #     continue
                box = rois[roi_id, :]
                box = box.tolist()
                box_new = dict(box=box, score=score, tag=tag)
                dtboxes.append(box_new)
        record['dtboxes'] = dtboxes
        
        # new_gtboxes = []
        # for gtbox in record['gtboxes']:
        #     # if gtbox['tag'] == 'fangzhenchui_bad' or gtbox['tag'] == 'fangzhenchui_good':
        #     #     new_gtboxes.append(gtbox)
        #     if gtbox['tag'] == 'zangwu':
        #         new_gtboxes.append(gtbox)
        #     # if gtbox['tag'] in ['normal_board', 'normal_driving_birds_device']:
        #     #     continue
        #     # else:
        #     #     new_gtboxes.append(gtbox)
        # record['gtboxes'] = new_gtboxes
        all_result.append(record)

    fw = open(det_save_odgt, 'w')
    for res in all_result:
        res = json.dumps(res)
        fw.write(res + '\n')
    fw.close()

    # evaluation
    eval_script = '/data/wurenji/code_new/dianwang_detection/evalTookits2/eval.py'
    command = 'python3 -u %s --dt=%s --gt=%s --iou=%f | tee -a %s' % (eval_script, det_save_odgt, det_save_odgt, 0.2, det_save_eval_log_txt)
    os.system(command)
    print('done')
Ejemplo n.º 15
0
def predict(images: List[Union[str, os.PathLike]],
        model: EfficientDetBackbone,
        compound_coef: float, 
        resize: Optional[Union[int, Tuple[int, int]]] = None,
        confidence: Optional[float] = 0.5,
        nms_threshold: Optional[float] = 0.5,
        output_path: Union[str, os.PathLike] = "../",
    ) -> None:
    """Generate Predictions on test images in a folder.

    Args:
        images (List[Union[str, os.PathLike]]): List of test image path to run predictions.
        model (EfficientDetBackbone): EfficientDet model.
        compound_coef (float): Compund scaling coefficient.
        resize (Optional[Union[int, Tuple[int, int]]], optional): Resize of test images. Defaults to None.
        confidence (Optional[float], optional): confidence score to filter detections. Defaults to 0.5.
        nms_threshold (Optional[float], optional): IOU threshold to filter duplicate detections. Defaults to 0.5.
        output_path (Union[str, os.PathLike], optional): Output path/file where final output needs to be stored. Defaults to "../".

    Raises:
        IOError: Raises when output_path do not exist.
    """      

    #Initializaing results
    results = {}
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    
    #Iterating over all images
    for image_path in tqdm(images):
        #Initalize and Get image name.
        img_result = []
        img_name = image_path.split('/')[-1]

        #Preprocess image
        ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=INPUT_SIZES[compound_coef])
        x = torch.from_numpy(framed_imgs[0])
        
        #Convert to CUDA or CPU.
        if USE_CUDA:
            x = x.cuda()
            x = x.float()
        else:
            x = x.float()

        #Batching
        x = x.unsqueeze(0).permute(0, 3, 1, 2)

        #Run model
        features, regression, classification, anchors = model(x)

        #Applying threshold and NMS on predictions
        preds = postprocess(x,
                            anchors, regression, classification,
                            regressBoxes, clipBoxes,
                            confidence, nms_threshold)
        
        #Continue if there are no predictions for this image.
        if not preds:
            results[img_name] = img_result
            continue
        
        #Convert predictions.
        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        #Convert bbox and others to required format.
        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            #rois[:, 2] -= rois[:, 0]
            #rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                img_result.append({
                    'class_index': label,
                    'bbox': box.tolist(),
                    'confidence': float(score)
                })

                results[img_name] = img_result

    if not len(results):
        print('The model does not provide any valid output, check model architecture and the data input')

    # Write output
    if output_path.endswith(".json"):
        if os.path.exists(os.path.dirname(output_path)):
            output_file = output_path
        else:
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            output_file = output_path
    elif os.path.isdir(output_path):
        output_file = os.path.join(
            output_path, "yolov5_predictions_" + str(time.time()).split(".")[0] + ".json"
        )

    else:
        raise IOError(
            f"{Fore.RED} no such directory {os.path.dirname(output_path)} {Style.RESET_ALL}"
        )

    with open(output_file, "w") as f:
        json.dump(results, f, indent=2)
    print(f"Detections are written to {output_file}.")
def inference():
    compound_coef = 0
    force_input_size = None  # set None to use default size
    img_path = 'test/original_img.jpg'
    
    # replace this part with your project's anchor config
    anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
    anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]

    threshold = 0.2
    iou_threshold = 0.2

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
                'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie',
                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
                'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
                'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv',
                'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
                'toothbrush']


    color_list = standard_to_bgr(STANDARD_COLORS)
    # tf bilinear interpolation is different from any other's, just make do
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    input_size = input_sizes[2] if force_input_size is None else force_input_size
    ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

    model = EfficientDet_semanticBackbone(compound_coef=1, num_classes=len(obj_list),
                                ratios=anchor_ratios, scales=anchor_scales)

    model.load_state_dict(torch.load('model_weight/model_1_epoch_80.pth'))

    if use_cuda:
        model = model.cuda()

    with torch.no_grad():
        features, regression, classification, anchors, sem_out = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x,
                        anchors, regression, classification,
                        regressBoxes, clipBoxes,
                        threshold, iou_threshold)

    out = invert_affine(framed_metas, out)
    out = box(out, ori_imgs, color_list, obj_list, imshow=False, imwrite=False)

    outputs = sem_out.data.cpu().numpy() # (shape: (batch_size, num_classes, img_h, img_w))
    pred_label_imgs = np.argmax(outputs, axis=1) # (shape: (batch_size, img_h, img_w))
    pred_label_imgs = pred_label_imgs.astype(np.uint8)

    z = cv2.resize(pred_label_imgs[0], (ori_imgs[0].shape[1], ori_imgs[0].shape[0]))

    from semantic_utils.utils import label_img_to_color
    pred_label_img_color = label_img_to_color(z)
    overlayed_img = 0.35*out + 0.65*pred_label_img_color

    flag = cv2.imwrite('test/semantic_img_1.jpg', overlayed_img)
    return flag
Ejemplo n.º 17
0
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()
    # view
    # start =0
    # for each in [8, 16, 32,64,128]:
    #     ll = (input_size//each) **2
    #     ss=classification[:,start:start+ll,:]
    #     tt
    #     start += ll
    #     n=torch.argmax(ss,dim=-1)
    #     print(np.array(n.view(input_size//each, input_size//each).cpu()))
    out = postprocess(x,
                      anchors,
                      regression,
                      classification,
                      regressBoxes,
                      clipBoxes,
                      threshold,
                      iou_threshold,
                      anchor_free_mode=config.anchor_free_mode)


def display(preds, imgs, imshow=True, imwrite=False):
    for i in range(len(imgs)):
        if len(preds[i]['rois']) == 0:
            continue

        for j in range(len(preds[i]['rois'])):
            x1, y1, x2, y2 = preds[i]['rois'][j].astype(np.int)
            obj = obj_list[preds[i]['class_ids'][j]]
            score = float(preds[i]['scores'][j])
Ejemplo n.º 18
0
def getImageDetections(imagePath, weights, nms_threshold, confidenceParam, coefficient):
    """
    Runs the detections and returns all detection into a single structure.

    Parameters
    ----------
    imagePath : str
        Path to all images.
    weights : str
        path to the weights.
    nms_threshold : float
        non-maximum supression threshold.
    confidenceParam : float
        confidence score for the detections (everything above this threshold is considered a valid detection).
    coefficient : int
        coefficient of the current efficientdet model (from d1 to d7).

    Returns
    -------
    detectionsList : List
        return a list with all predicted bounding-boxes.

    """
    compound_coef = coefficient
    force_input_size = None  # set None to use default size
    img_path  = imagePath

    threshold = confidenceParam
    iou_threshold = nms_threshold

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True
    obj_list = ['class_name']

    # tf bilinear interpolation is different from any other's, just make do
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size
    ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                                # replace this part with your project's anchor config
                                ratios=[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)],
                                scales=[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])

    model.load_state_dict(torch.load(rootDir+'logs/' + project + '/' + weights))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    with torch.no_grad():
        features, regression, classification, anchors = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x,
                          anchors, regression, classification,
                          regressBoxes, clipBoxes,
                          threshold, iou_threshold)

    out = invert_affine(framed_metas, out)
     
    for i in range(len(ori_imgs)):
        if len(out[i]['rois']) == 0:
            continue
        detectionsList = []
        for j in range(len(out[i]['rois'])):
            (x1, y1, x2, y2) = out[i]['rois'][j].astype(np.int)
            detectionsList.append((float(out[i]['scores'][j]), x1, y1, x2, y2))
        return detectionsList
Ejemplo n.º 19
0
def prep_metrics(ap_data, dets, img, labels, detections=None, image_id=None):
    """Mainly update the ap_data for validation table"""
    # get the shape of image
    w = tf.shape(img)[1]
    h = tf.shape(img)[2]
    # tf.print(f"img size (w, h):{w}, {h}")

    # Load prediction
    classes, scores, boxes, masks = postprocess(dets, w, h, 0, "bilinear")

    # if no detection or only one detection
    if classes is None:
        return
    if tf.size(scores) == 1:
        scores = tf.expand_dims(scores, axis=0)
        masks = tf.expand_dims(masks, axis=0)
    boxes = tf.expand_dims(boxes, axis=0)
    #
    # tf.print("prep classes", tf.shape(classes))
    # tf.print("prep scores", tf.shape(scores))
    # tf.print("prep boxes", tf.shape(boxes))
    # tf.print("prep masks", tf.shape(masks))

    # Load gt
    gt_bbox = labels['bbox']
    gt_classes = labels['classes']
    gt_masks = labels['mask_target']
    num_obj = labels['num_obj']
    num_gt = num_obj.numpy()[0]
    # prepare data
    classes = list(classes.numpy())
    scores = list(scores.numpy())
    box_scores = scores
    mask_scores = scores

    # if output json, add things to detections objects

    # else
    num_pred = len(classes)
    # tf.print("num pred", num_pred)
    # tf.print("num gt", num_gt)
    #
    # tf.print('prep gt bbox', tf.shape(gt_bbox))
    # tf.print('prep gt classes', tf.shape(gt_classes))
    # tf.print('prep gt masks', tf.shape(gt_masks))
    # tf.print('prep num crowd', tf.shape(num_crowd))
    # tf.print("prep num obj", tf.shape(num_obj))

    # resize gt mask
    # should be [num_gt, w, h]
    masks_gt = tf.squeeze(tf.image.resize(tf.expand_dims(gt_masks[0], axis=-1),
                                          [h, w],
                                          method='bilinear'),
                          axis=-1)

    # calculating the IOU first
    mask_iou_cache = _mask_iou(masks, masks_gt).numpy()
    bbox_iou_cache = tf.squeeze(_bbox_iou(boxes, gt_bbox).numpy(), axis=0)
    # tf.print(tf.shape(boxes))
    # tf.print(tf.shape(gt_bbox))
    # tf.print(tf.shape(bbox_iou_cache))
    # tf.print("non crowd mask iou shape:", tf.shape(mask_iou_cache))
    # tf.print("non crowd bbox iou shape:", tf.shape(bbox_iou_cache))

    crowd_mask_iou_cache = None
    crowd_bbox_iou_cache = None

    # get the sorted index of scores (descending order)
    box_indices = sorted(range(num_pred), key=lambda idx: -box_scores[idx])
    mask_indices = sorted(box_indices, key=lambda idx: -mask_scores[idx])

    # define some useful lambda function for next section
    # avoid writing "bbox_iou_cache[row, col]" too many times, wrap it as a lambda func
    iou_types = [('box', lambda row, col: bbox_iou_cache[row, col],
                  lambda row, col: crowd_bbox_iou_cache[row, col],
                  lambda idx: box_scores[idx], box_indices),
                 ('mask', lambda row, col: mask_iou_cache[row, col],
                  lambda row, col: crowd_mask_iou_cache[row, col],
                  lambda idx: mask_scores[idx], mask_indices)]

    # Because we ignore 0 (classified as background) need to fix it here
    gt_classes = list(gt_classes[0].numpy() - 1)

    # starting to update the ap_data from this batch
    for _class in set(classes + gt_classes):
        # calculating how many labels belong to this class
        num_gt_for_class = sum([1 for x in gt_classes if x == _class])

        for iouIdx in range(len(iou_thresholds)):
            th = iou_thresholds[iouIdx]

            for iou_type, iou_func, crowd_func, score_func, indices in iou_types:
                gt_used = [False] * len(gt_classes)

                # get certain APobject
                ap_obj = ap_data[iou_type][iouIdx][_class]
                ap_obj.add_gt_positive(num_gt_for_class)

                for i in indices:
                    if classes[i] != _class:
                        continue

                    max_iou_found = th
                    max_match_idx = -1

                    for j in range(num_gt):
                        if gt_used[j] or gt_classes[j] != _class:
                            continue
                        iou = iou_func(i, j)
                        if iou > max_iou_found:
                            max_iou_found = iou
                            max_match_idx = j
                    if max_match_idx >= 0:
                        gt_used[max_match_idx] = True
                        ap_obj.push(score_func(i), True)
                    else:
                        ap_obj.push(score_func(i), False)
Ejemplo n.º 20
0
def main(compound_coef=0, model_dir=MODEL_DIR, nms_threshold=0.5, use_cuda=False, use_float16=False,
         image_batch_size=2):
    threshold = 0.05

    cudnn.fastest = True
    cudnn.benchmark = True

    # tf bilinear interpolation is different from any other's, just make do
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    input_size = input_sizes[compound_coef]

    model = model_fn(model_dir=model_dir, compound_coef=compound_coef, use_cuda=use_cuda,
                     use_float16=use_float16)

    image_paths = glob.glob(os.path.join(DATA, '*.jpg'))

    L = len(image_paths)
    print(f'processing {L} in batches of {image_batch_size}')
    results = {}
    loop_start = datetime.datetime.now()
    for image_batch in image_path_batches(image_paths, image_batch_size):
        batch_start = datetime.datetime.now()
        ori_images, framed_images, framed_metas = preprocess(*image_batch, max_size=input_size)

        # build tensor from framed images
        x = torch.stack([(torch.from_numpy(fi).cuda()
                          if use_cuda
                          else torch.from_numpy(fi))
                         for fi in framed_images],
                        0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x,
                              anchors, regression, classification,
                              regressBoxes, clipBoxes,
                              threshold, nms_threshold)

            out = invert_affine(framed_metas, out)

        batch_end = datetime.datetime.now()
        batch_time = (batch_end - batch_start).total_seconds()
        print(f"batch_time = {batch_time} (s)")
        print(f"batch_size = {image_batch_size}")
        print(f"FPS = {image_batch_size / batch_time:0.4f}")
        print(f"SPF = {batch_time / image_batch_size:0.4f}")

        results.update(dict(zip(image_batch, out)))

    loop_end = datetime.datetime.now()
    loop_time = (loop_end - loop_start).total_seconds()
    print('\nfinal summary:')
    print(f"total processing time: {loop_time} (s)")
    print(f"number of frames processed: {len(image_paths)}")
    print(f"batch_size = {image_batch_size}")
    print(f"FPS: {L / loop_time:0.4f}")
    print(f"SPF: {loop_time / L:0.4f}")

    with open(f'results.{compound_coef}.pkl', 'wb') as fp:
        pickle.dump(results, fp)
Ejemplo n.º 21
0
def evaluate_voc(gt_dict, img_paths, model, max_size, config):
    results = []

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    for idx, image_path in enumerate(tqdm(img_paths)):
        
        ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=max_size)
        x = torch.from_numpy(framed_imgs[0])

        if config.eval_use_cuda:
            x = x.cuda(config.eval_gpu)
            if config.eval_use_float16:
                x = x.half()
            else:
                x = x.float()
        else:
            x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)
        

        preds = postprocess(x,
                            anchors, regression, classification,
                            regressBoxes, clipBoxes,
                            config.eval_threshold, config.eval_nms_threshold,
                            anchor_free_mode=config.anchor_free_mode)
        
        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # # x1,y1,x2,y2 -> x1,y1,w,h
            # rois[:, 2] -= rois[:, 0]
            # rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                image_result = [idx, box[0], box[1], box[2], box[3], score, label]

                results.append(image_result)

    if not len(results):
        raise Exception('the model does not provide any valid output, check model architecture and the data input')
    voc_certs = []
    for idx in range(len(config.obj_list)):
        npos, nd, rec, prec, ap = voc_eval(gt_dict, results, idx, iou_thres=0.5, use_07_metric=False)
        voc_certs.append([prec, rec, ap])
    return voc_certs
Ejemplo n.º 22
0
def main(i):
    compound_coef = i
    force_input_size = None  # set None to use default size

    # replace this part with your project's anchor config
    anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
    anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]

    threshold = 0.2
    iou_threshold = 0.2

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
                'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie',
                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
                'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
                'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv',
                'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
                'toothbrush']

    out_dict = dict()
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size

    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                                    ratios=anchor_ratios, scales=anchor_scales)
    model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth', map_location='cpu'))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()

    base_dir = '/data/jiashenc/jackson/'

    print('Processing Det-' + str(i))

    for k in range(1000000, 1100000):
        if k % 1000 == 0:
            print('    Finish {} frames'.format(k + 1))
            
        img_path = os.path.join(base_dir, 'frame{}.jpg'.format(k))
        ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

        if use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x,
                              anchors, regression, classification,
                              regressBoxes, clipBoxes,
                              threshold, iou_threshold)


        out = invert_affine(framed_metas, out)
        to_json(out, out_dict)

    with open(os.path.join(base_dir, '10', 'res-{:d}.json'.format(i)), 'w') as f:
        json.dump(out_dict, f)
        out_dict = dict()
    def forward(self, classifications, regressions, anchors, annotations,
                **kwargs):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[
            0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = alpha_factor.cuda()
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                    classification_losses.append(cls_loss.sum())
                else:

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(torch.tensor(0).to(dtype))
                    classification_losses.append(cls_loss.sum())

                continue

            IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            # compute the loss for classification
            targets = torch.ones_like(classification) * -1
            if torch.cuda.is_available():
                targets = targets.cuda()

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices,
                                                           4].long()] = 1

            alpha_factor = torch.ones_like(targets) * alpha
            if torch.cuda.is_available():
                alpha_factor = alpha_factor.cuda()

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) +
                    (1.0 - targets) * torch.log(1.0 - classification))

            cls_loss = focal_weight * bce

            zeros = torch.zeros_like(cls_loss)
            if torch.cuda.is_available():
                zeros = zeros.cuda()
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)

            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # efficientdet style
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dy, targets_dx, targets_dh, targets_dw))
                targets = targets.t()

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))

        # debug
        imgs = kwargs.get('imgs', None)
        if imgs is not None:
            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()
            obj_list = kwargs.get('obj_list', None)
            out = postprocess(
                imgs.detach(),
                torch.stack([anchors[0]] * imgs.shape[0], 0).detach(),
                regressions.detach(), classifications.detach(), regressBoxes,
                clipBoxes, 0.5, 0.3)
            imgs = imgs.permute(0, 2, 3, 1).cpu().numpy()
            imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) *
                    255).astype(np.uint8)
            # imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs]
            # Uncomment the above line if you're storing the images using opencv.

            for i, _ in enumerate(imgs):
                if len(out[i]['rois']) == 0:
                    continue

                for j in range(len(out[i]['rois'])):
                    (x1, y1, x2, y2) = out[i]['rois'][j].astype(np.int)
                    cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0),
                                  2)
                    obj = obj_list[out[i]['class_ids'][j]]
                    score = float(out[i]['scores'][j])

                    cv2.putText(imgs[i], '{}, {:.3f}'.format(obj, score),
                                (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (255, 255, 0), 1)

            return torch.stack(classification_losses).mean(dim=0, keepdim=True), \
               torch.stack(regression_losses).mean(dim=0, keepdim=True), imgs

        return torch.stack(classification_losses).mean(dim=0, keepdim=True), \
               torch.stack(regression_losses).mean(dim=0, keepdim=True)
Ejemplo n.º 24
0
        input_data['token_type_ids'] = input_data['token_type_ids'].unsqueeze(
            0)
        ####################
        ## 2. 모델 불러오기 ##
        ####################

        # 3. 모델에 데이터 넣기
        inputs = {
            'input_ids': input_data['ids'],
            'token_type_ids': input_data['token_type_ids'],
            'attention_mask': input_data['mask']
        }

        outputs = model(**inputs)

        intent_str, intent_candidate_str, score_str = postprocess(
            outputs, model)

        print('인텐트:', intent_str)
        print(intent_candidate_str)
        print(score_str)

        m = nn.Softmax(dim=1)
        output_softmax = m(outputs.logits)

        if False:  # 그래프 on, off

            print(output_softmax.topk(3).values.tolist())
            output_softmax_value = output_softmax.squeeze()
            xx = range(0, 31)
            plt.plot(xx, output_softmax_value.tolist())
            plt.show()
Ejemplo n.º 25
0
def excuteModel(videoname):
    # Video's path
    # set int to use webcam, set str to read from a video file

    if videoname is not None:
        video_src = os.path.join(r'D:\GitHub\Detection\server\uploads', f"{videoname}.mp4")
    else:
        video_src = 'D:\\GitHub\\Detection\\server\AImodel\\videotest\\default.mp4'

    compound_coef = 2
    trained_weights = 'D:\\GitHub\\Detection\\server\\AImodel\\weights\\efficientdet-video.pth'

    force_input_size = None  # set None to use default size

    threshold = 0.2
    iou_threshold = 0.2

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
                'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie',
                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
                'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
                'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv',
                'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
                'toothbrush']

    # tf bilinear interpolation is different from any other's, just make do
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size

    # load model
    model = EfficientDetBackbone(
        compound_coef=compound_coef, num_classes=len(obj_list))
    model.load_state_dict(torch.load(trained_weights))

    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    # function for display

    # Box
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    # Video capture
    cap = cv2.VideoCapture(video_src)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    writer = None
    # try to determine the total number of frames in the video file
    try:
        prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() \
            else cv2.CAP_PROP_FRAME_COUNT
        total = int(vs.get(prop))
        print("[INFO] {} total frames in video".format(total))

    # an error occurred while trying to determine the total
    # number of frames in the video file
    except:
        print("[INFO] could not determine # of frames in video")
        total = -1

    path_out = os.path.join(os.path.dirname(
        os.path.abspath(__file__)), 'outvideo')

    path_result = r"D:\GitHub\Detection\server\AImodel\videotest\default.mp4"
    path_asset = r"D:\GitHub\Detection\client\src\assets"
    for i in range(0, length):
        ret, frame = cap.read()
        if not ret:
            break

        # frame preprocessing
        ori_imgs, framed_imgs, framed_metas = preprocess_video(
            frame, max_size=input_size)

        if use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda()
                             for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        # model predict
        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            out = postprocess(x,
                              anchors, regression, classification,
                              regressBoxes, clipBoxes,
                              threshold, iou_threshold)

        # result
        out = invert_affine(framed_metas, out)
        img_show = display(out, ori_imgs, obj_list)

        if writer is None:

            # initialize our video writer
            fourcc = 0x00000021
            #fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            if videoname is not None:
                path_result = os.path.join(path_out, f"{videoname}.mp4")
            else:
                path_result = os.path.join(path_out, "default.mp4")

            writer = cv2.VideoWriter(path_result, fourcc, 30, (img_show.shape[1], img_show.shape[0]), True)


        # write the output frame to disk
        writer.write(img_show)
        print("Processing data... " + str(round((i+1)/length, 3)*100) + " %")
        # show frame by frame
        #cv2.imshow('frame', img_show)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    print("[INFO] cleaning up...")

    writer.release()
    cap.release()
    cv2.destroyAllWindows()

    if videoname is not None:
        path_asset = os.path.join(path_asset, f"{videoname}.mp4")
    else:
        path_asset = os.path.join(path_asset, "default.mp4")
    copyfile(path_result, path_asset)
    return path_asset
Ejemplo n.º 26
0
def calc_mAP(imgs, annot, model, writer, it, local_it):
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()
    orig_train = model.training
    model.eval()
    threshold = 0.2
    iou_threshold = 0.2
    with torch.no_grad():
        features, regression, classification, anchors = model.model(imgs)
        # get max. confidence
        # there are batch_size out dicts
        out = postprocess(imgs, anchors, regression, classification,
                          regressBoxes, clipBoxes, threshold, iou_threshold)
    mAP_list = []
    for im_idx in range(len(out)):  # iterate through images
        # (1) check ground truth
        curr_annot = annot[im_idx, :, :]  # e.g. (8,5)
        curr_annot = curr_annot[curr_annot[..., 0] != -1.]  # e.g. (6,5)
        gt_boxes = curr_annot[:, :4]
        gt_cls = curr_annot[:, 4]
        if gt_cls.shape[0] == 0:
            continue
        # (2) check prediction
        out_ = out[im_idx]
        pred_boxes = out_['rois']
        pred_classes = out_['class_ids']
        pred_scores = out_['scores']
        curr_img = imgs[im_idx]
        # (3) build map tuple
        map_tuple = tuple()
        map_tuple += (pred_boxes / curr_img.shape[1], )
        map_tuple += (pred_classes, )
        map_tuple += (pred_scores, )
        map_tuple += (gt_boxes.cpu() / curr_img.shape[2], )
        map_tuple += (gt_cls.cpu(), )
        if map_tuple is not None:
            mAP_list.append(map_tuple)
    mAP = DetectionMAP(3)
    overall_mAP = []
    classwise_mAP = []
    for mAP_item in mAP_list:
        mAP.evaluate(*mAP_item)
        ov_, cls_ = mAP.map(class_names=["vehicle", "pedestrian", "cyclist"])
        overall_mAP.append(ov_)
        classwise_mAP.append(cls_)
    key_arrays = {}
    for item in classwise_mAP:
        for k, v in item.items():
            key_arrays.setdefault(k, []).append(v)
    ave = {
        k: reduce(lambda x, y: x + y, v) / len(v)
        for k, v in key_arrays.items()
    }

    if len(overall_mAP) > 0:
        writer.add_scalars('mAP', {'val': np.mean(overall_mAP)}, it)
    for k in ave.keys():
        writer.add_scalars('val mAP {}'.format(k), {'val': ave[k]}, it)
    if orig_train:
        model.train()
    return np.mean(overall_mAP)
Ejemplo n.º 27
0
    def forward(self, classifications, regressions, anchors, annotations,
                **kwargs):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[
            0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = alpha_factor.cuda(self.gpu_id)
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(
                        torch.tensor(0).to(dtype).cuda(self.gpu_id))
                    classification_losses.append(cls_loss.sum())
                else:

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(torch.tensor(0).to(dtype))
                    classification_losses.append(cls_loss.sum())

                continue

            IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            # compute the loss for classification
            targets = torch.ones_like(classification) * -1
            if torch.cuda.is_available():
                targets = targets.cuda(self.gpu_id)

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices,
                                                           4].long()] = 1

            alpha_factor = torch.ones_like(targets) * alpha
            if torch.cuda.is_available():
                alpha_factor = alpha_factor.cuda(self.gpu_id)

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) +
                    (1.0 - targets) * torch.log(1.0 - classification))

            cls_loss = focal_weight * bce

            zeros = torch.zeros_like(cls_loss)
            if torch.cuda.is_available():
                zeros = zeros.cuda(self.gpu_id)
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)

            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # efficientdet style
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dy, targets_dx, targets_dh, targets_dw))
                targets = targets.t()

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(
                        torch.tensor(0).to(dtype).cuda(self.gpu_id))
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))

        # debug
        imgs = kwargs.get('imgs', None)
        if imgs is not None:
            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()
            obj_list = kwargs.get('obj_list', None)
            out = postprocess(
                imgs.detach(),
                torch.stack([anchors[0]] * imgs.shape[0], 0).detach(),
                regressions.detach(), classifications.detach(), regressBoxes,
                clipBoxes, 0.5, 0.3)
            imgs = imgs.permute(0, 2, 3, 1).cpu().numpy()
            imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) *
                    255).astype(np.uint8)
            imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs]
            display(out, imgs, obj_list, imshow=False, imwrite=True)

        return torch.stack(classification_losses).mean(dim=0, keepdim=True), \
               torch.stack(regression_losses).mean(dim=0, keepdim=True) * 50  # https://github.com/google/automl/blob/6fdd1de778408625c1faf368a327fe36ecd41bf7/efficientdet/hparams_config.py#L233
Ejemplo n.º 28
0
def plot_tensorboard(imgs, annot, model, writer, it, local_it, mAP_value):
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()
    orig_train = model.training
    model.eval()
    with torch.no_grad():
        features, regression, classification, anchors = model.model(imgs)
        # get max. confidence
        max_pred = 1e9
        out = postprocess(imgs, anchors, regression, classification,
                          regressBoxes, clipBoxes, 0, 0, max_pred)[0]
        try:
            max_confidence = np.array(out['scores']).max()
        except:
            max_confidence = 0
        # filter out trash predictions
        threshold = 0.2
        iou_threshold = 0.2
        out = postprocess(imgs, anchors, regression, classification,
                          regressBoxes, clipBoxes, threshold, iou_threshold,
                          max_pred)[0]
        boxes = out['rois']
        img = imgs[0].detach().cpu().numpy()
        img = (img - img.min())
        img = img / img.max()
        fig, ax = plt.subplots(1, 1)
        ax.axis("off")
        colors = ["red", "green", "blue", "black"]
        ax.imshow(torch.from_numpy(img).permute(1, 2, 0))
        for idx in range(boxes.shape[0]):
            cx, cy, lx, ly = boxes[idx]
            cw, ch = lx - cx, ly - cy
            class_idx = out["class_ids"][idx]
            if class_idx < 3:
                color = colors[class_idx]
            else:
                color = colors[-1]
            rect = patches.Rectangle((cx, cy),
                                     cw,
                                     ch,
                                     linewidth=1,
                                     edgecolor=color,
                                     facecolor='none')
            ax.add_patch(rect)

        for idx in range(annot.shape[1]):
            curr_annot = annot[0, idx]
            cx, cy, lx, ly = curr_annot[:4]
            cw, ch = lx - cx, ly - cy
            rect = patches.Rectangle((cx, cy),
                                     cw,
                                     ch,
                                     linewidth=1,
                                     edgecolor='none',
                                     facecolor='white',
                                     alpha=0.2)
            ax.add_patch(rect)
        ax.set_title("Max. Confidence: {:.3f}.".format(max_confidence))
        fig.canvas.draw()
        img = torch.from_numpy(
            np.array(fig.canvas.renderer._renderer)[:, :, :3]).permute(
                2, 0, 1)
        plt.close()
        writer.add_image('Prediction/{}'.format(local_it), img, it)
    if orig_train:
        model.train()
model = EfficientDetBackbone(compound_coef=compound_coef,
                             num_classes=len(obj_list))
model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth'))
model.requires_grad_(False)
model.eval()

if use_cuda:
    model = model.cuda()

with torch.no_grad():
    features, regression, classification, anchors = model(x)

regressBoxes = BBoxTransform()
clipBoxes = ClipBoxes()

out = postprocess(x, anchors, regression, classification, regressBoxes,
                  clipBoxes, threshold, iou_threshold)


def display(preds, imgs, imshow=True, imwrite=False):
    for i in range(len(imgs)):
        if len(preds[i]['rois']) == 0:
            continue

        for j in range(len(preds[i]['rois'])):
            (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int)
            cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2)
            obj = obj_list[preds[i]['class_ids'][j]]
            score = float(preds[i]['scores'][j])

            cv2.putText(imgs[i], '{}, {:.3f}'.format(obj,
                                                     score), (x1, y1 + 10),
def evaluate_coco(img_path, set_name, image_ids, coco, model, params, step, threshold=0.2, nms_threshold=0.5,
                  compound_coef=4, use_cuda=True):
    results = []

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    for image_id in tqdm(image_ids):
        image_info = coco.loadImgs(image_id)[0]
        image_path = img_path + image_info['file_name']
        image = cv.imread(image_path)
        ori_imgs, framed_imgs, framed_metas = preprocess([image], max_size=input_sizes[compound_coef])
        x = torch.from_numpy(framed_imgs[0])

        if use_cuda:
            x = x.cuda(0)
        x = x.float()

        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        features, regression, classification, anchors = model(x)

        preds = postprocess(x,
                            anchors, regression, classification,
                            regressBoxes, clipBoxes,
                            threshold, nms_threshold)
        if not preds:
            continue

        preds = invert_affine(framed_metas, preds)[0]

        display([preds], [image], params['obj_list'], imshow=False, imwrite=False, send=True, step=step, tag='val')

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']

        if rois.shape[0] > 0:
            # x1,y1,x2,y2 -> x1,y1,w,h
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]

            bbox_score = scores

            for roi_id in range(rois.shape[0]):
                score = float(bbox_score[roi_id])
                label = int(class_ids[roi_id])
                box = rois[roi_id, :]

                image_result = {
                    'image_id': image_id,
                    'category_id': label + 1,
                    'score': float(score),
                    'bbox': box.tolist(),
                }

                results.append(image_result)

    if not len(results):
        raise Exception('the model does not provide any valid output, check model architecture and the data input')

    # write output
    filepath = f'{set_name}_bbox_results.json'
    if os.path.exists(filepath):
        os.remove(filepath)
    json.dump(results, open(filepath, 'w'), indent=4)