Python draw_bounding_boxes Exemples, bounding_box.draw_bounding_boxes Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : load_and_train.py Projet : calculusrex/Synthetic-OCR

def prettyShow(recovered_sample):
    image = draw_bounding_boxes(recovered_sample['image'],
                                recovered_sample['bounding_boxes'])
    print(
        '$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'
    )
    print('ANGLE: ')
    print(recovered_sample['angle'])
    print(
        '$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'
    )
    image.show()

Exemple #2

0

Afficher le fichier

def draw_anchors(*, config='config', nb=100, only_groundtruth=False):
 
    cfg = _read_config(config)
    anchor_list = _build_anchor_list(cfg)
    (dataset, _), _ = _build_dataset(cfg, anchor_list)
    bgid = dataset.class_to_idx['background']

    mean = cfg['mean']
    std = cfg['std']
    image_size = cfg['image_size']
    for i in range(nb):
        x, gt_boxes, encoding = dataset[i]
        gt_boxes = [
            ((x*image_size, y*image_size, w*image_size, h*image_size), dataset.idx_to_class[class_id]) 
            for ((x, y, w, h), class_id) in gt_boxes
        ]
        x = x.numpy()
        x = x.transpose((1, 2, 0))
        x = x * np.array(std) + np.array(mean)
        x = x.astype('float32')
        anchor_boxes = []
        if not only_groundtruth:
            for j, e in enumerate(encoding):
                B, C = e
                A = anchor_list[j]
                hcoords, wcoords, k_id = np.where(C != bgid)
                for h, w, k in zip(hcoords, wcoords, k_id):
                    a = image_size * A[h, w, k]
                    a = a.tolist()
                    c = C[h, w, k]
                    c = dataset.idx_to_class[c]
                    anchor_boxes.append((a, c))
        pad = 100
        im = np.zeros((x.shape[0] + pad * 2, x.shape[1] + pad * 2, x.shape[2]))
        im[pad:-pad, pad:-pad] = x
        im = draw_bounding_boxes(im, gt_boxes, color=(1, 0, 0), text_color=(1, 0, 0), pad=pad)
        im = draw_bounding_boxes(im, anchor_boxes, color=(0, 1, 0), text_color=(0, 1, 0), pad=pad) 
        imsave(os.path.join(cfg['out_folder'], 'anchors', 'sample_{:05d}.jpg'.format(i)), im)

Exemple #3

0

Afficher le fichier

def test(
    *,
    in_folder='test_images',
    out_folder='test_results',
    model='out/model.th', 
    score_threshold=None, 
    topk=10, 
    iou_threshold=None,
    background_threshold=0.5,
    use_nms=True, 
    out=None, 
    cuda=False):
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)
    model = torch.load(model, map_location=lambda storage, loc: storage)
    if cuda:
        model = model.cuda()
    model.eval()
    filenames = [os.path.join(in_folder, f) for f in os.listdir(in_folder)]
    for filename in filenames:
        t0 = time.time()
        try:
            im = default_loader(filename)
        except OSError:
            continue
        w, h = im.size
        x = model.transform(im)
        scale_w, scale_h = w / x.size(2), h / x.size(1)
        X = x.view(1, x.size(0), x.size(1), x.size(2))
        if cuda:
            X = X.cuda()
        X = Variable(X)
        Ypred = model(X)
        BP = [
            bp.data.cpu().
            view(bp.size(0), -1, model.num_coords, bp.size(2), bp.size(3)).
            permute(0, 3, 4, 1, 2).
            numpy() 
        for bp, cp in Ypred]
        CP = [
            cp.data.cpu().
            view(cp.size(0), -1, model.nb_classes, cp.size(2), cp.size(3)).
            permute(0, 3, 4, 1, 2).
            numpy() 
        for bp, cp in Ypred]
        X = X.data.cpu().numpy()
        x = X[0]
        x = x.transpose((1, 2, 0))
        x = x * np.array(model.std) + np.array(model.mean)
        x = x.astype('float32')
        pred_boxes = []
        for j in range(len(Ypred)):
            cp = CP[j][0]#cl1_score,cl2_score,...
            bp = BP[j][0]#4
            if model.use_discrete_coords:
                bp = _get_coords(bp, model.coords_discretization)
            A = model.anchor_list[j]
            cp = get_probas(cp, model.classif_loss_name, axis=3)
            boxes = decode_bounding_box_list(
                bp, cp, A, 
                image_size=model.image_size,
                include_scores=True,
                variance=model.config['variance']
            )
            pred_boxes.extend(boxes)
        if score_threshold is None:
            score_threshold = model.config['nms_score_threshold']
        else:
            score_threshold = float(score_threshold)
        if iou_threshold is None:
            iou_threshold = model.config['nms_iou_threshold']
        else:
            iou_threshold = float(iou_threshold)
        bgid = model.class_to_idx['background']
        idx_to_class = {i: c for c, i in model.class_to_idx.items()}
        
        if use_nms:
            pred_boxes = non_maximal_suppression_per_class(
                pred_boxes, 
                iou_threshold=iou_threshold,
                background_class_id=bgid,
                score_threshold=score_threshold
            )
        else:
            def get_box(box, scores):
                bg_score = scores[0]
                if bg_score >= background_threshold:
                    return box, 0, bg_score
                else:
                    cl= 1 + scores[1:].argmax()
                    score = scores[1:].max()
                    return box, cl, score
            pred_boxes = [get_box(box, scores) for box, scores in pred_boxes]
        pred_boxes = sorted(pred_boxes, key=lambda p:p[2], reverse=True)
        pred_boxes = [(box, class_id, score) for box, class_id, score in pred_boxes if class_id != bgid]
        pred_boxes = [(box, idx_to_class[class_id], score) for box, class_id, score in pred_boxes]
        pred_boxes = pred_boxes[0:topk]
        pred_boxes = [
            ((x * scale_w, y * scale_h, w * scale_w, h * scale_h), class_name, score)
            for (x, y, w, h), class_name, score in pred_boxes
        ]
        for box, class_name, score in pred_boxes:
            print(class_name, score)
        im = np.array(im)
        pad = int(model.config['pad'] * scale_w)
        impadded = np.zeros((im.shape[0] + pad * 2, im.shape[1] + pad * 2, im.shape[2]))
        impadded = impadded.astype('uint8')
        impadded[pad:-pad, pad:-pad] = im
        im = draw_bounding_boxes(impadded, pred_boxes, color=(0, 255, 0), text_color=(0, 255, 0), pad=pad)
        delta = time.time() - t0
        outf = os.path.join(out_folder, os.path.basename(filename))
        print('Processed {} in {:.3f}s'.format(outf, delta))
        imsave(outf, impadded)

Exemple #4

0

Afficher le fichier

def _evaluate_model(model, train, valid):
    print('Evaluation')
    bgid = train.dataset.class_to_idx['background']
    t0 = time.time()
    model.eval()
    metrics = defaultdict(list)
    cfg = model.config
    class_ids = list(set(train.dataset.class_to_idx.values()) - set([bgid]))
    class_ids = sorted(class_ids)
    for split_name, loader in (('train', train), ('valid', valid)):
        t0 = time.time()
        im_index = 0
        for batch, samples, in enumerate(loader):
            tt0 = time.time()
            X, (Y, Ypred), (bbox_true, bbox_pred), (class_true, class_pred) = _predict(model, samples)
            X = X.data.cpu().numpy()

            B = [[b for b, c in y] for y in Y]
            B = [torch.from_numpy(np.array(b)).float() for b in B]
            C = [[c for b, c in y] for y in Y]
            C = [torch.from_numpy(np.array(c)).long() for c in C]
             
            # B contains groundtruth bounding boxes for each scale
            # C contains groundtruth classes for each scale
            BP = [
                bp.data.cpu().view(bp.size(0), -1, model.num_coords, bp.size(2), bp.size(3)).permute(0, 3, 4, 1, 2).numpy() 
            for bp, cp in Ypred]
            CP = [
                cp.data.cpu().view(cp.size(0), -1, model.nb_classes, cp.size(2), cp.size(3)).permute(0, 3, 4, 1, 2).numpy() 
            for bp, cp in Ypred]
            # BP contains predicted bounding boxes for each scale
            # CP contains predicted classes for each scale
            for i in range(len(X)):
                gt_boxes = []
                pred_boxes = []
                gt_boxes_per_class = defaultdict(list)
                pred_boxes_per_class = defaultdict(list)
                # for each example i  in mini-batch
                x = X[i]
                x = x.transpose((1, 2, 0))
                x = x * np.array(cfg['std']) + np.array(cfg['mean'])
                x = x.astype('float32')

                for j in range(len(Y)):
                    # for each scale j
                    ct = C[j][i]# class_id
                    cp = CP[j][i]#cl1_score,cl2_score,...
                    bt = B[j][i]#4
                    bp = BP[j][i]#4
                    if cfg['use_discrete_coords']:
                        bp = _get_coords(bp, model.coords_discretization)
                    A = model.anchor_list[j]
                    # get groundtruth boxes
                    gt_boxes.extend(decode_bounding_box_list(
                        bt, ct, A, 
                        include_scores=False,
                        image_size=cfg['image_size'],
                        variance=cfg['variance'],
                    ))
                    # get predicted boxes
                    cp = get_probas(cp, cfg['classif_loss'], axis=3)
                    pred_boxes.extend(decode_bounding_box_list(
                        bp, cp, A, 
                        include_scores=True,
                        image_size=cfg['image_size'],
                        variance=cfg['variance']
                    ))
                gt_boxes = [(box, class_id) for box, class_id in gt_boxes if class_id != bgid]
                for class_id in class_ids:
                    gt_boxes_per_class[class_id].extend([
                        box
                        for box, box_class_id in gt_boxes if class_id == box_class_id]
                    )
                # use the predicted boxes and groundtruth boxes to compute precision and recall
                # PER image
                pred_boxes = non_maximal_suppression_per_class(
                    pred_boxes, 
                    background_class_id=bgid,
                    iou_threshold=cfg['nms_iou_threshold'],
                    score_threshold=cfg['nms_score_threshold'],
                )
                for box, class_id, score in pred_boxes:
                    pred_boxes_per_class[class_id].append((box, score))
                pred_boxes = pred_boxes[0:cfg['nms_topk']]
                P = []
                R = []
                for class_id in class_ids:
                    t = [box for box, cl in gt_boxes if cl == class_id]
                    if len(t) == 0:
                        continue
                    p = [box for box, cl, score in pred_boxes if cl == class_id]
                    prec = precision(p, t, iou_threshold=cfg['eval_iou_threshold'])
                    re = recall(p, t, iou_threshold=cfg['eval_iou_threshold'])
                    metrics['precision_' + train.dataset.idx_to_class[class_id] + '_' + split_name].append(prec)
                    metrics['recall_' + train.dataset.idx_to_class[class_id] + '_' + split_name].append(re)
                    P.append(prec)
                    R.append(re)
                metrics['precision_' + split_name].append(np.mean(P))
                metrics['recall_' + split_name].append(np.mean(R))
                gt_boxes = [(box, train.dataset.idx_to_class[class_id]) for box, class_id in gt_boxes]
                pred_boxes = [(box, train.dataset.idx_to_class[class_id], score) for box, class_id, score in pred_boxes]
                # compute mAP and AP PER image
                recalls_mAP = np.linspace(0, 1, 11)
                AP_for_recall = defaultdict(list)
                AP_for_class = []
                for class_id in class_ids:
                    APs = average_precision(
                        pred_boxes_per_class[class_id], gt_boxes_per_class[class_id], 
                        iou_threshold=cfg['eval_iou_threshold'], 
                        recalls_mAP=recalls_mAP,
                        aggregate=False,
                    )
                    if APs is None:
                        continue
                    AP = np.mean(APs)
                    AP_for_class.append(AP)
                    metrics['AP_' + train.dataset.idx_to_class[class_id] + '_' + split_name].append(AP)
                    for r, ap in zip(recalls_mAP, APs):
                        m = 'AP(rec_{:.2f})_{}_{}'.format(r, train.dataset.idx_to_class[class_id], split_name)
                        metrics[m].append(ap)
                        AP_for_recall[r].append(ap)
                mAP = np.mean(AP_for_class)
                metrics['mAP_' + split_name].append(mAP)
                for r in recalls_mAP:
                    mAP = np.mean(AP_for_recall[r])
                metrics['mAP(rec_{:.2f})_{}'.format(r, split_name)].append(mAP)
                # draw boxes
                pad = cfg['pad']
                im = np.zeros((x.shape[0] + pad * 2, x.shape[1] + pad * 2, x.shape[2]))
                im[pad:-pad, pad:-pad] = x
                im = draw_bounding_boxes(im, gt_boxes, color=(1, 0, 0), text_color=(1, 0, 0), pad=pad)
                im = draw_bounding_boxes(im, pred_boxes, color=(0, 1, 0), text_color=(0, 1, 0), pad=pad) 
                imsave(os.path.join(cfg['out_folder'], 'eval_{}'.format(split_name), 'sample_{:05d}.jpg'.format(im_index)), im)
                im_index += 1
            delta = time.time() - tt0
            print('Eval Batch {:04d}/{:04d} on split {}, Time : {:.3f}s'.format(batch + 1, len(loader), split_name, delta)) 
        delta = time.time() - t0
        metrics['eval_' + split_name + '_time'] = [delta]
        print('Eval time of {}: {:.4f}s'.format(split_name, delta))

    stats = {}
    for k in sorted(metrics.keys()):
        v = np.mean(metrics[k])
        print('{}: {:.4}'.format(k, v))
        stats[k] = v
    return stats

Exemple #5

0

Afficher le fichier

def train(*, config='config', resume=False):
    print('Read config "{}"'.format(config))
    cfg = _read_config(config)
    w_loc = cfg['w_loc']
    w_classif = cfg['w_classif']
    batch_size = cfg['batch_size']
    num_epoch = cfg['num_epoch']
    image_size = cfg['image_size']
    gamma = cfg['gamma']
    mean = cfg['mean']
    std = cfg['std']
    imbalance_strategy = cfg['imbalance_strategy']
    out_folder = cfg['out_folder'] 
    negative_per_positive = cfg['negative_per_positive']
    if imbalance_strategy == 'class_weight':
        pos_weight = cfg['pos_weight']
        neg_weight = cfg['neg_weight']
    nms_iou_threshold = cfg['nms_iou_threshold']
    eval_iou_threshold = cfg['eval_iou_threshold']
    log_interval = cfg['log_interval']
    nms_score_threshold = cfg['nms_score_threshold']
    eval_interval = cfg['eval_interval']
    aspect_ratios = cfg['aspect_ratios']
    nms_topk = cfg['nms_topk']
    debug = cfg['debug'] 
    
    folders = [
        'train', 
        'eval_train', 
        'eval_valid', 
    ]
    for f in folders:
        try:
            os.makedirs(os.path.join(out_folder, f))
        except OSError:
            pass
    if debug:
        log_interval = 30
    # anchor list for each scale (we have 6 scales)
    anchor_list = _build_anchor_list(cfg)
    # dataset for train and valid
    print('Loading dataset anotations...')
    (train_dataset, valid_dataset), (train_evaluation, valid_evaluation) = _build_dataset(
        cfg,
        anchor_list=anchor_list,
    )
    print('Done loading dataset annotations.')
    if debug:
        n = 10
        train_dataset = SubSample(train_dataset, nb=n)
        valid_dataset = SubSample(valid_dataset, nb=n)
        train_evaluation = SubSample(train_evaluation, nb=n)
        valid_evaluation = SubSample(valid_evaluation, nb=n)
    assert train_dataset.class_to_idx == valid_dataset.class_to_idx
    assert train_dataset.idx_to_class == valid_dataset.idx_to_class
    clfn = lambda l:l
    # Dataset loaders for full training and full validation 
    train_loader = DataLoader(
        train_dataset, 
        shuffle=True, 
        batch_size=batch_size, 
        collate_fn=clfn,
        num_workers=cfg['num_workers'],
    )
    train_evaluation_loader = DataLoader(
        train_evaluation,
        batch_size=batch_size,
        collate_fn=clfn,
        num_workers=cfg['num_workers'],
    )
    valid_evaluation_loader = DataLoader(
        valid_evaluation,
        batch_size=batch_size,
        collate_fn=clfn,
        num_workers=cfg['num_workers'],
    )
    nb_classes = len(train_dataset.class_to_idx)
    bgid = train_dataset.class_to_idx['background']
    class_ids = list(set(train_dataset.class_to_idx.values()) - set([bgid]))
    class_ids = sorted(class_ids)
    print('Number of training images : {}'.format(len(train_dataset)))
    print('Number of valid images : {}'.format(len(valid_dataset)))
    print('Number of classes : {}'.format(nb_classes))
    stats_filename = os.path.join(out_folder, 'stats.csv')
    train_stats_filename = os.path.join(out_folder, 'train_stats.csv')
    model_filename = os.path.join(out_folder, 'model.th') 
    optimizer_filename = os.path.join(out_folder, 'optimizer.th')

    if resume:
        print('Resuming model: {}'.format(model_filename))
        model = torch.load(model_filename)
        model = model.cuda()
        if os.path.exists(optimizer_filename):
            print('Resuming optimizer: {}'.format(optimizer_filename))
            optimizer = torch.load(optimizer_filename)
        else:
            optimizer_cls = getattr(torch.optim, cfg['optim_algo'])
            optimizer = optimizer_cls(model.parameters(), lr=0, **cfg['optim_params'])

        if os.path.exists(stats_filename):
            stats = pd.read_csv(stats_filename).to_dict(orient='list')
            first_epoch = max(stats['epoch'])
        else:
            stats = defaultdict(list)
            first_epoch = 1
        if os.path.exists(train_stats_filename):
            train_stats = pd.read_csv(train_stats_filename).to_dict(orient='list')
        else:
            train_stats = defaultdict(list)
        use_discrete_coords = model.use_discrete_coords
        coords_discretization = model.coords_discretization
        print('Starting from epoch {}'.format(first_epoch))
    else:
        use_discrete_coords = cfg['use_discrete_coords']
        if use_discrete_coords:
            coords_discretization = torch.linspace(
                cfg['discrete_coords_min'], 
                cfg['discrete_coords_max'],
                cfg['discrete_coords_nb']
            )
            num_coords = 4 * len(coords_discretization) # discretization values for each x,y,w,h
        else:
            num_coords = 4
            coords_discretization = None
        if 'init_from' in cfg:
            print('Init from {}'.format(cfg['init_from']))
            model = torch.load(cfg['init_from'])
        else:
            model_class = getattr(model_module, cfg['model_name'])
            kw = cfg.get('model_config', {})
            model = model_class(
                num_anchors=list(map(len, aspect_ratios)), 
                num_classes=nb_classes,
                num_coords=num_coords,
                **kw, 
            )
            optimizer_cls = getattr(torch.optim, cfg['optim_algo'])
            optimizer = optimizer_cls(model.parameters(), lr=0, **cfg['optim_params'])
         
        model.use_discrete_coords = use_discrete_coords
        model.num_coords = num_coords
        model.coords_discretization = coords_discretization
        model = model.cuda()
        model.transform = valid_dataset.transform 
        model.nb_classes = nb_classes
        model.aspect_ratios = aspect_ratios
        model.anchor_list = anchor_list
        model.image_size = image_size
        first_epoch = 1
        stats = defaultdict(list)
        train_stats = defaultdict(list)
        model.nb_updates = 0
        model.avg_loss = 0.
        model.avg_loc = 0.
        model.avg_classif = 0
        model.background_class_id = train_dataset.background_class_id
        model.class_to_idx = train_dataset.class_to_idx
        model.mean = mean
        model.std = std
        model.config = cfg
        print(model)
    classif_loss_name = cfg.get('classif_loss', 'cross_entropy')
    model.classif_loss_name = classif_loss_name
    if classif_loss_name == 'cross_entropy':
        classif_loss = cross_entropy
    elif classif_loss_name == 'binary_cross_entropy':
        classif_loss = binary_cross_entropy_with_logits
    elif classif_loss_name == 'focal_loss':
        classif_loss = FocalLoss(
            gamma=cfg.get('focal_loss_gamma', 2),
            alpha=cfg.get('focal_loss_alpha', None),
        )
    else:
        raise ValueError('Unknown classif loss : {}'.format(classif_loss_name))

    if imbalance_strategy == 'class_weight':
        class_weight = torch.zeros(nb_classes)
        class_weight[0] = neg_weight
        class_weight[1:] = pos_weight
        class_weight = class_weight.cuda()
   
    for epoch in range(first_epoch, num_epoch):
        epoch_t0 = time.time()
        model.train()
        for batch, samples, in enumerate(train_loader):
            t0 = time.time()
            X, (Y, Ypred), (bbox_true, bbox_pred), (class_true, class_pred) = _predict(model, samples)
            # X is batch of image
            # Y is groundtruth output
            # Ypred is predicted output
            # bbox_true are groundtruth bounding boxes extracted from Y
            # bbox_pred are predicted bounding boxes extracted from Ypred
            # class_true are groundtruth classes extracted from Y
            # class_pred are predicted classes extracted from Ypred
            m = (class_true != bgid).view(-1)
            ind = m.nonzero().view(-1)
            bt = bbox_true
            bp = bbox_pred
            ct = class_true
            cp = class_pred
            N = max(len(ind), 1.0)
            # localization loss
            if use_discrete_coords:
                l_loc = _discrete_coords_loss(bp[ind], bt[ind], coords_discretization)
            else:
                l_loc = smooth_l1_loss(bp[ind], bt[ind], size_average=False) / N
            # classif loss
            if imbalance_strategy == 'hard_negative_mining':
                ind = torch.arange(len(ct))
                pos = ind[(ct.data.cpu() > 0)].long().cuda()
                neg = ind[(ct.data.cpu() == 0)].long().cuda()
                ct_pos = ct[pos]
                cp_pos = cp[pos]
                ct_neg = ct[neg]
                cp_neg = cp[neg]
                cp_neg_loss = classif_loss(cp_neg, ct_neg, reduce=False)
                cp_neg_loss = cp_neg_loss.cuda()
                vals, indices = cp_neg_loss.sort(descending=True)
                nb = len(ct_pos) * negative_per_positive
                cp_neg = cp_neg[indices[0:nb]]
                ct_neg = ct_neg[indices[0:nb]]
                l_classif = (classif_loss(cp_pos, ct_pos, size_average=False) + classif_loss(cp_neg, ct_neg, size_average=False)) / N
            elif imbalance_strategy == 'hard_negative_mining_with_sampling':
                ind = torch.arange(len(ct))
                pos = ind[(ct.data.cpu() > 0)].long().cuda()
                neg = ind[(ct.data.cpu() == 0)].long().cuda()
                ct_pos = ct[pos]
                cp_pos = cp[pos]
                ct_neg = ct[neg]
                cp_neg = cp[neg]
                nb = min(len(ct_pos) * negative_per_positive, len(ct_neg))
                cp_neg_loss = classif_loss(cp_neg, ct_neg, reduce=False)
                proba_sel = torch.nn.Softmax(dim=0)(cp_neg_loss)
                proba_sel = proba_sel.cuda()
                indices = torch.multinomial(proba_sel, nb)
                cp_neg = cp_neg[indices]
                ct_neg = ct_neg[indices]
                l_classif = (classif_loss(cp_pos, ct_pos, size_average=False) + classif_loss(cp_neg, ct_neg, size_average=False)) / N
            elif imbalance_strategy == 'undersampling':
                ind = torch.arange(len(ct))
                pos = ind[(ct.data.cpu() > 0)].long().cuda()
                neg = ind[(ct.data.cpu() == 0)].long().cuda()
                ct_pos = ct[pos]
                cp_pos = cp[pos]
                ct_neg = ct[neg]
                cp_neg = cp[neg]
                nb = len(ct_pos) * negative_per_positive
                inds = torch.from_numpy(np.random.randint(0, len(ct_neg), nb))
                inds = inds.long().cuda()
                ct_neg = ct_neg[inds]
                cp_neg = cp_neg[inds]
                l_classif = (classif_loss(cp_pos, ct_pos, size_average=False) + classif_loss(cp_neg, ct_neg, size_average=False)) / N
            elif imbalance_strategy == 'class_weight':
                # TODO make it work if classif_loss is "binary_cross_entropy", it does not
                # work in that case
                l_classif = classif_loss(cp, ct, weight=class_weight, size_average=False) / N
            elif imbalance_strategy == 'nothing':
                l_classif = classif_loss(cp, ct, size_average=False) / N
            else:
                raise ValueError('unknown imbalance strategy : {}'.format(imbalance_strategy))
            model.zero_grad()
            loss = w_loc * l_loc + w_classif * l_classif
            loss.backward()
            _update_lr(optimizer, model.nb_updates, cfg['lr_schedule'])
            optimizer.step()
            model.avg_loss = model.avg_loss * gamma + item(loss) * (1 - gamma)
            model.avg_loc = model.avg_loc * gamma  + item(l_loc) * (1 - gamma)
            model.avg_classif = model.avg_classif * gamma + item(l_classif) * (1 - gamma)
            delta = time.time() - t0
            print('Epoch {:05d}/{:05d} Batch {:05d}/{:05d} Loss : {:.3f} Loc : {:.3f} '
                  'Classif : {:.3f} AvgTrainLoss : {:.3f} AvgLoc : {:.3f} '
                  'AvgClassif {:.3f} Time:{:.3f}s'.format(
                      epoch,
                      num_epoch,
                      batch + 1, 
                      len(train_loader), 
                      item(loss), 
                      item(l_loc),
                      item(l_classif),
                      model.avg_loss,
                      model.avg_loc,
                      model.avg_classif,
                      delta
                    ))
            train_stats['loss'].append(item(loss))
            train_stats['loc'].append(item(l_loc))
            train_stats['classif'].append(item(l_classif))
            train_stats['time'].append(delta)

            if model.nb_updates % log_interval == 0:
                # reporting part
                # -- draw training samples with their predicted and true bounding boxes
                pd.DataFrame(train_stats).to_csv(train_stats_filename, index=False)
                t0 = time.time()
                torch.save(model, model_filename)
                torch.save(optimizer, optimizer_filename)
                X = X.data.cpu().numpy()
                
                B = [[b for b, c in y] for y in Y]
                B = [(np.array(b)) for b in B]
                C = [[c for b, c in y] for y in Y]
                C = [(np.array(c)) for c in C]
                # B contains groundtruth bounding boxes for each scale
                # C contains groundtruth classes for each scale
                BP = [
                    bp.data.cpu().view(bp.size(0), -1, model.num_coords, bp.size(2), bp.size(3)).permute(0, 3, 4, 1, 2).numpy() 
                for bp, cp in Ypred]
                CP = [
                    cp.data.cpu().view(cp.size(0), -1, model.nb_classes, cp.size(2), cp.size(3)).permute(0, 3, 4, 1, 2).numpy() 
                for bp, cp in Ypred]
                # BP contains predicted bounding boxes for each scale
                # CP contains predicted classes for each scale

                for i in range(len(X)):
                    # for each example i in mini-batch
                    x = X[i]
                    x = x.transpose((1, 2, 0))
                    x = x * np.array(std) + np.array(mean)
                    x = x.astype('float32')
                    gt_boxes = []
                    pred_boxes = []
                    for j in range(len(Y)):
                        # for each scale j
                        ct = C[j][i]# class_id
                        cp = CP[j][i]#cl1_score,cl2_score,...
                        bt = B[j][i]#4
                        bp = BP[j][i]#4
                        if use_discrete_coords:
                            bp = _get_coords(bp, model.coords_discretization)
                        A = model.anchor_list[j]
                        # get groundtruth boxes
                        gt_boxes.extend(decode_bounding_box_list(
                            bt, ct, A, 
                            include_scores=False,
                            image_size=image_size,
                            variance=cfg['variance'],
                        ))
                        # get predicted boxes
                        cp = get_probas(cp, classif_loss_name, axis=3)
                        pred_boxes.extend(decode_bounding_box_list(
                            bp, cp, A, 
                            include_scores=True,
                            image_size=image_size,
                            variance=cfg['variance'],
                        ))
                    gt_boxes = [(box, class_id) for box, class_id in gt_boxes if class_id != bgid]
                    # apply non-maximal suppression to predicted boxes
                    # 1) for each class, filter low confidence predictions and then do NMS
                    # 2) concat all the bboxes from all classes
                    # 3) take to the topk (nms_topk)
                    if cfg['use_nms']:
                        pred_boxes = non_maximal_suppression_per_class(
                            pred_boxes, 
                            background_class_id=bgid,
                            iou_threshold=nms_iou_threshold,
                            score_threshold=nms_score_threshold)
                        pred_boxes = pred_boxes[0:nms_topk]
                    else:
                        pred_boxes = [
                            (box, scores.argmax(), scores.max()) 
                            for box, scores in pred_boxes if scores.argmax() != bgid and scores.max() > nms_score_threshold
                        ]
                    # get class names
                    gt_boxes = [(box, train_dataset.idx_to_class[class_id]) for box, class_id in gt_boxes]
                    pred_boxes = [(box, train_dataset.idx_to_class[class_id], score) for box, class_id, score in pred_boxes]
                    # draw boxes
                    pad = cfg['pad']
                    im = np.zeros((x.shape[0] + pad * 2, x.shape[1] + pad * 2, x.shape[2]))
                    im[pad:-pad, pad:-pad] = x
                    im = draw_bounding_boxes(im, gt_boxes, color=(1, 0, 0), text_color=(1, 0, 0), pad=pad)
                    im = draw_bounding_boxes(im, pred_boxes, color=(0, 1, 0), text_color=(0, 1, 0), pad=pad) 
                    imsave(os.path.join(out_folder, 'train', 'sample_{:05d}.jpg'.format(i)), im)
                delta = time.time() - t0
                print('Draw box time {:.4f}s'.format(delta))
            model.nb_updates += 1
        epoch_time = time.time() - epoch_t0
        if epoch % eval_interval != 0:
            continue
        if cfg.get('evaluate', True) is False:
            continue
        stats_epoch = _evaluate_model(model, train_evaluation_loader, valid_evaluation_loader)
        stats_epoch['train_time'] = epoch_time
        stats_epoch['epoch'] = epoch
        for k, v in stats_epoch.items():
            stats[k].append(v)
        pd.DataFrame(stats).to_csv(stats_filename, index=False)

Exemple #6

0

Afficher le fichier

import bounding_box
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import sys

image = sys.argv[1] + '.jpg'
image = cv.imread(image)
image = np.asarray(cv.cvtColor(image, cv.COLOR_BGR2GRAY))
bounding_box.plot(image)
boxes = bounding_box.get_bounding_boxes(image)
print(boxes)
bounding_box.draw_bounding_boxes(image, boxes)
bounding_box.plot(image)