Exemple #1
0
        resp_offset = resp_offset[iou_mask]
        resp_strd = resp_strd[iou_mask]
        conf = resp_true_pred[iou_mask][:, 4].mean().item()
        class_mask = targets[:, 5:].type(torch.BoolTensor).squeeze(0)

        if (iou_mask.sum() == class_mask.shape[0]):
            pos_class = resp_true_pred[iou_mask][:,
                                                 5:][class_mask].mean().item()
            neg_class = resp_true_pred[iou_mask][:,
                                                 5:][~class_mask].mean().item(
                                                 )
        else:
            pos_class = 0
            neg_class = 0
        loss = util.yolo_loss(resp_raw_pred, targets, no_obj, mask,
                              resp_anchors, resp_offset, resp_strd, inp_dim,
                              hyperparameters)

        loss.backward()
        optimizer.step()

        avg_conf = avg_conf + conf
        avg_no_conf = avg_no_conf + no_obj_conf
        avg_pos = avg_pos + pos_class
        avg_neg = avg_neg + neg_class
        total_loss = total_loss + loss.item()
        avg_iou = avg_iou + iou
        prg_counter = prg_counter + 1
#         sys.stdout.write('\rPgr:'+str(prg_counter/dataset_len*100*batch_size)+'%' ' L:'+ str(loss.item()))
#         sys.stdout.write(' IoU:' +str(iou)+' pob:'+str(conf)+ ' nob:'+str(no_obj_conf))
#         sys.stdout.write(' PCls:' +str(pos_class)+' ncls:'+str(neg_class))
def train_one_epoch(model, optimizer, dataloader, hyperparameters, mode):
    model.train()

    if (mode['show_temp_summary'] == True):
        writer = SummaryWriter('../tensorboard/test_vis/')
    epoch = hyperparameters['resume_from']

    if type(model) is nn.DataParallel:
        inp_dim = model.module.inp_dim
        pw_ph = model.module.pw_ph
        cx_cy = model.module.cx_cy
        stride = model.module.stride
    else:
        inp_dim = model.inp_dim
        pw_ph = model.pw_ph
        cx_cy = model.cx_cy
        stride = model.stride

    coco_version = hyperparameters['coco_version']

    pw_ph = pw_ph.cuda()
    cx_cy = cx_cy.cuda()
    stride = stride.cuda()

    break_flag = 0
    dataset_len = len(dataloader.dataset)
    batch_size = dataloader.batch_size
    total_loss = 0
    avg_iou = 0
    prg_counter = 0
    train_counter = 0
    avg_conf = 0
    avg_no_conf = 0
    avg_pos = 0
    avg_neg = 0
    for images, targets in dataloader:
        train_counter = train_counter + 1
        prg_counter = prg_counter + 1
        optimizer.zero_grad()
        images = images.cuda()

        if mode['debugging'] == True:
            with autograd.detect_anomaly():
                raw_pred = model(images, torch.cuda.is_available())
        else:
            raw_pred = model(images, torch.cuda.is_available())
            if (torch.isinf(raw_pred).sum() > 0):
                break_flag = 1
                break

        true_pred = util.transform(raw_pred.clone().detach(), pw_ph, cx_cy,
                                   stride)
        iou_list = util.get_iou_list(true_pred, targets, hyperparameters,
                                     inp_dim)

        resp_raw_pred, resp_cx_cy, resp_pw_ph, resp_stride, no_obj = util.build_tensors(
            raw_pred, iou_list, pw_ph, cx_cy, stride, hyperparameters)

        stats = helper.get_progress_stats(true_pred, no_obj, iou_list, targets)

        if hyperparameters['wasserstein'] == True:
            no_obj = util.get_wasserstein_matrices(raw_pred, iou_list, inp_dim)

        if mode['debugging'] == True:
            with autograd.detect_anomaly():
                loss = util.yolo_loss(resp_raw_pred, targets, no_obj,
                                      resp_pw_ph, resp_cx_cy, resp_stride,
                                      inp_dim, hyperparameters)
        elif mode['bayes_opt'] == True:
            try:
                loss = util.yolo_loss(resp_raw_pred, targets, no_obj,
                                      resp_pw_ph, resp_cx_cy, resp_stride,
                                      inp_dim, hyperparameters)
            except RuntimeError:
                #                 print('bayes opt failed')
                break_flag = 1
                break
        else:
            loss = util.yolo_loss(resp_raw_pred, targets, no_obj, resp_pw_ph,
                                  resp_cx_cy, resp_stride, inp_dim,
                                  hyperparameters)
        loss.backward()
        optimizer.step()

        avg_conf = avg_conf + stats['pos_conf']
        avg_no_conf = avg_no_conf + stats['neg_conf']
        avg_pos = avg_pos + stats['pos_class']
        avg_neg = avg_neg + stats['neg_class']
        total_loss = total_loss + loss.item()
        avg_iou = avg_iou + stats['iou']

        if mode['show_output'] == True:
            sys.stdout.write('\rPgr:' + str(prg_counter / dataset_len * 100 *
                                            batch_size) + '%'
                             ' L:' + str(loss.item()))
            sys.stdout.write(' IoU:' + str(stats['iou']) + ' pob:' +
                             str(stats['pos_conf']) + ' nob:' +
                             str(stats['neg_conf']))
            sys.stdout.write(' PCls:' + str(stats['pos_class']) + ' ncls:' +
                             str(stats['neg_class']))
            sys.stdout.flush()

        if (mode['show_temp_summary'] == True):
            writer.add_scalar('AvLoss/train', total_loss / train_counter,
                              train_counter)
            writer.add_scalar('AvIoU/train', avg_iou / train_counter,
                              train_counter)
            writer.add_scalar('AvPConf/train', avg_conf / train_counter,
                              train_counter)
            writer.add_scalar('AvNConf/train', avg_no_conf / train_counter,
                              train_counter)
            writer.add_scalar('AvClass/train', avg_pos / train_counter,
                              train_counter)
            writer.add_scalar('AvNClass/train', avg_neg / train_counter,
                              train_counter)

    total_loss = total_loss / train_counter
    avg_iou = avg_iou / train_counter
    avg_pos = avg_pos / train_counter
    avg_neg = avg_neg / train_counter
    avg_conf = avg_conf / train_counter
    avg_no_conf = avg_no_conf / train_counter

    outcome = {
        'avg_loss': total_loss,
        'avg_iou': avg_iou,
        'avg_pos': avg_pos,
        'avg_neg': avg_neg,
        'avg_conf': avg_conf,
        'avg_no_conf': avg_no_conf,
        'broken': break_flag
    }

    return outcome
Exemple #3
0
def train(trainloader, model, optimizer, epoch, cuda=True):
    # switch to train mode
    model.train()
    hyperparameters = model.hp
    mode = model.mode

    if type(model) is nn.DataParallel:
        inp_dim = model.module.inp_dim
        pw_ph = model.module.pw_ph
        cx_cy = model.module.cx_cy
        stride = model.module.stride
    else:
        inp_dim = model.inp_dim
        pw_ph = model.pw_ph
        cx_cy = model.cx_cy
        stride = model.stride

    if cuda:
        pw_ph = pw_ph.cuda()
        cx_cy = cx_cy.cuda()
        stride = stride.cuda()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    avg_loss = AverageMeter()
    avg_iou = AverageMeter()
    avg_conf = AverageMeter()
    avg_no_conf = AverageMeter()
    avg_pos = AverageMeter()
    avg_neg = AverageMeter()
    end = time.time()
    break_flag = 0

    if mode['show_temp_summary'] == True:
        writer = SummaryWriter(os.path.join(track.trial_dir(), 'temp_vis/'))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # measure data loading time
        data_time.update(time.time() - end)

        if cuda:
            inputs = inputs.cuda()

        # compute output
        raw_pred = model(inputs, torch.cuda.is_available())
        true_pred = util.transform(raw_pred.clone().detach(), pw_ph, cx_cy,
                                   stride)
        iou_list = util.get_iou_list(true_pred, targets, hyperparameters,
                                     inp_dim)

        resp_raw_pred, resp_cx_cy, resp_pw_ph, resp_stride, no_obj = util.build_tensors(
            raw_pred, iou_list, pw_ph, cx_cy, stride, hyperparameters)

        stats = helper.get_progress_stats(true_pred, no_obj, iou_list, targets)
        if hyperparameters['wasserstein'] == True:
            no_obj = util.get_wasserstein_matrices(raw_pred, iou_list, inp_dim)

        try:
            loss = util.yolo_loss(resp_raw_pred, targets, no_obj, resp_pw_ph,
                                  resp_cx_cy, resp_stride, inp_dim,
                                  hyperparameters)
        except RuntimeError:
            print('bayes opt failed')
            break_flag = 1
            break

        # measure accuracy and record loss
        avg_loss.update(loss.item())
        avg_iou.update(stats['iou'])
        avg_conf.update(stats['pos_conf'])
        avg_no_conf.update(stats['neg_conf'])
        avg_pos.update(stats['pos_class'])
        avg_neg.update(stats['neg_class'])

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if mode['show_output'] == True:  # plot progress
            progress_str = 'Loss: %.4f | AvIoU: %.3f | AvPConf: %.3f | AvNConf: %.5f | AvClass: %.3f | AvNClass: %.5f'\
                % (loss.item(), stats['iou'], stats['pos_conf'], stats['neg_conf'],stats['pos_class'],stats['neg_class'])
            progress_bar(batch_idx, len(trainloader), progress_str)

        iteration = epoch * len(trainloader) + batch_idx

        if mode['show_temp_summary'] == True:
            writer.add_scalar('AvLoss/train', avg_loss.avg, iteration)
            writer.add_scalar('AvIoU/train', avg_iou.avg, iteration)
            writer.add_scalar('AvPConf/train', avg_conf.avg, iteration)
            writer.add_scalar('AvNConf/train', avg_no_conf.avg, iteration)
            writer.add_scalar('AvClass/train', avg_pos.avg, iteration)
            writer.add_scalar('AvNClass/train', avg_neg.avg, iteration)

    track.metric(iteration=iteration,
                 epoch=epoch,
                 avg_train_loss=avg_loss.avg,
                 avg_train_iou=avg_iou.avg,
                 avg_train_conf=avg_conf.avg,
                 avg_train_neg_conf=avg_no_conf.avg,
                 avg_train_pos=avg_pos.avg,
                 avg_train_neg=avg_neg.avg)

    outcome = {
        'avg_loss': avg_loss.avg,
        'avg_iou': avg_iou.avg,
        'avg_pos': avg_pos.avg,
        'avg_neg': avg_neg.avg,
        'avg_conf': avg_conf.avg,
        'avg_no_conf': avg_no_conf.avg,
        'broken': break_flag
    }

    return outcome
epochs = 20

lock = 0
total_loss = 0
for e in range(epochs):
    prg_counter = 0
    total_loss = 0
    print("\n epoch " + str(e))
    for index, row in df.iterrows():
        optimizer.zero_grad()
        imgpath = '../images/images/' + row['filename'] + '_img' + row[
            'framespan'].split(':')[0] + '.jpg'
        inp = get_test_input(imgpath)
        targets = torch.tensor([[[
            row['x'] * (416 / 1980), row['y'] * (416 / 1080),
            row['width'] * (416 / 1980), row['height'] * (416 / 1080), 1, 1
        ]]])
        pred = model(inp, torch.cuda.is_available())
        pred = pred.to(device='cuda')
        targets = targets.to(device='cuda')
        loss = util.yolo_loss(pred, targets)
        loss.backward()
        optimizer.step()
        sys.stdout.write('\r Progress is ' + str(prg_counter / 9570 * 100) +
                         '%'
                         ' loss is: ' + str(loss.item()))
        sys.stdout.flush()
        prg_counter = prg_counter + 1
        total_loss = total_loss + loss.item()
    torch.save(model.state_dict(), PATH)
    print('\n total average loss is ' + str(total_loss / 9570))
        anchors = pw_ph.clone()
        offset = cx_cy.clone()
        strd = stride.clone()

        noobj_box = raw_pred[:, :, 4:5].clone()

        noobj_box = noobj_box[noobj_mask.T, :]
        raw_pred = raw_pred[iou_mask.T, :]
        anchors = anchors[iou_mask.T, :]
        offset = offset[iou_mask.T, :]
        strd = strd[iou_mask.T, :]

        if (strd.shape[0] == 1):
            target[:, 0:4] = target[:, 0:4] * (inp_dim / strd)
            target = target.squeeze(-2)
            target = util.transform_groundtruth(target, anchors, offset)
            loss = util.yolo_loss(raw_pred, target, noobj_box, 1)
            loss.backward()
            optimizer.step()
            sys.stdout.write('\r Progress is ' +
                             str(prg_counter / 9570 * 100) + '%'
                             ' loss is: ' + str(loss.item()))
            sys.stdout.flush()
            prg_counter = prg_counter + 1
            total_loss = total_loss + loss.item()
        else:
            print('missed')
            prg_counter = prg_counter + 1
    torch.save(model.state_dict(), PATH)
    print('\n total average loss is ' + str(total_loss / 9570))
        noobj_box = noobj_box[noobj_mask.T, :]
        no_obj_conf = noobj_box.mean().item()

        raw_pred = raw_pred[iou_mask.T, :]
        anchors = anchors[iou_mask.T, :]
        offset = offset[iou_mask.T, :]
        strd = strd[iou_mask.T, :]

        if (
                strd.shape[0] == sample_batched['image'].shape[0]
        ):  #this means that iou_mask failed and was all true, because max of zeros is true for all lenght of mask strd
            target = util.xyxy_to_xywh(target)
            target = target.squeeze(1)
            target = util.transform_groundtruth(target, anchors, offset, strd)
            loss = util.yolo_loss(raw_pred, target, noobj_box, batch_size)
            loss.backward()
            optimizer.step()
            total_loss = total_loss + loss.item()
            avg_iou = avg_iou + iou
            sys.stdout.write('\r Progress is ' +
                             str(prg_counter / dataset_len * 100 *
                                 batch_size) + '%'
                             ' loss is: ' + str(loss.item()))
            sys.stdout.write(' Iou is ' + str(iou) + ' conf is ' + str(conf) +
                             ' no_obj conf is ' + str(no_obj_conf))
            sys.stdout.flush()
            del loss, raw_pred, target, true_pred, sample_batched[
                'image'], iou, noobj_box, conf
            torch.cuda.empty_cache()
            prg_counter = prg_counter + 1