def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0
    min_loss = float('inf')
    print('Loading Dataset...')

    dataset = Detection(args.annoPath, PyramidAugmentation(ssd_dim, means), AnnotationTransform())

    epoch_size = len(dataset) // args.batch_size
    print('Training SSD on', dataset.name)
    step_index = 0
    step_increase = 0
    if args.visdom:
        # initialize visdom loss plot
        lot = viz.line(
            X=torch.zeros((1,)).cpu(),
            Y=torch.zeros((1, 3)).cpu(),
            opts=dict(
                xlabel='Iteration',
                ylabel='Loss',
                title='Current SSD Training Loss',
                legend=['Loc Loss', 'Conf Loss', 'Loss']
            )
        )
        epoch_lot = viz.line(
            X=torch.zeros((1,)).cpu(),
            Y=torch.zeros((1, 3)).cpu(),
            opts=dict(
                xlabel='Epoch',
                ylabel='Loss',
                title='Epoch SSD Training Loss',
                legend=['Loc Loss', 'Conf Loss', 'Loss']
            )
        )
    batch_iterator = None
    data_loader = data.DataLoader(dataset, batch_size, num_workers=args.num_workers,
                                  shuffle=True, collate_fn=detection_collate, pin_memory=True)
    for iteration in range(args.start_iter, max_iter):
        t0 = time.time()
        if (not batch_iterator) or (iteration % epoch_size == 0):
            # create batch iterator
            batch_iterator = iter(data_loader)
        if iteration in stepvalues:
            if iteration in stepvalues[0:5]:
                step_increase += 1
                warmup_learning_rate(optimizer, args.lr, step_increase)
            else:
                step_index += 1
                adjust_learning_rate(optimizer, gamma, step_index)
            if args.visdom:
                viz.line(
                    X=torch.ones((1, 3)).cpu() * epoch,
                    Y=torch.Tensor([loc_loss, conf_loss,
                                    loc_loss + conf_loss]).unsqueeze(0).cpu() / epoch_size,
                    win=epoch_lot,
                    update='append'
                )
            # reset epoch loss counters
            loc_loss = 0
            conf_loss = 0
            epoch += 1

        # load train data
        images, targets = next(batch_iterator)

        if args.cuda:
            images = Variable(images.cuda())
            targets = [Variable(anno.cuda(), volatile=True) for anno in targets]
        else:
            images = Variable(images)
            targets = [Variable(anno, volatile=True) for anno in targets]
        # forward
        t1 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(tuple(out[0:3]), targets)
        loss_l_head, loss_c_head = criterion(tuple(out[3:6]), targets)

        loss = loss_l + loss_c + 0.5 * loss_l_head + 0.5 * loss_c_head
        if (loss.data[0] < min_loss):
            min_loss = loss.data[0]
            print("min_loss: " , min_loss)
            torch.save(ssd_net.state_dict(), args.save_folder + 'best_our_ucsd_Res50_pyramid_aug' + '.pth')
        loss.backward()
        optimizer.step()
        t2 = time.time()
        loc_loss += loss_l.data[0]
        conf_loss += loss_c.data[0]
        if iteration % 50 == 0:
            print('front and back Timer: {} sec.'.format((t2 - t1)))
            print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]))
            print('Loss conf: {} Loss loc: {}'.format(loss_c.data[0], loss_l.data[0]))
            print('Loss head conf: {} Loss head loc: {}'.format(loss_c_head.data[0], loss_l_head.data[0]))
            print('lr: {}'.format(optimizer.param_groups[0]['lr']))
            if args.visdom and args.send_images_to_visdom:
                random_batch_index = np.random.randint(images.size(0))
                viz.image(images.data[random_batch_index].cpu().numpy())
        if args.visdom:
            viz.line(
                X=torch.ones((1, 3)).cpu() * iteration,
                Y=torch.Tensor([loss_l.data[0], loss_c.data[0],
                                loss_l.data[0] + loss_c.data[0]]).unsqueeze(0).cpu(),
                win=lot,
                update='append'
            )
            # hacky fencepost solution for 0th epoch plot
            if iteration == 0:
                viz.line(
                    X=torch.zeros((1, 3)).cpu(),
                    Y=torch.Tensor([loc_loss, conf_loss,
                                    loc_loss + conf_loss]).unsqueeze(0).cpu(),
                    win=epoch_lot,
                    update=True
                )
        if iteration % 500 == 0 or iteration in stepvalues:
            print('Saving state, iter:', iteration)
            torch.save(ssd_net.state_dict(), args.save_folder + 'our_ucsd_Res50_pyramid_aug_' +  repr(iteration) + '.pth')
    torch.save(ssd_net.state_dict(), args.save_folder + 'our_ucsd_Res50_pyramid_aug' + '.pth')
Example #2
0
def main():
    args.step_values = [int(val) for val in args.step_values.split(',')]
    # args.loss_reset_step = 10
    args.log_step = 10
    args.dataset = args.dataset.lower()
    args.basenet = args.basenet.lower()

    args.bn = abs(args.bn)  # 0 freeze or else use bn
    if args.bn > 0:
        args.bn = 1  # update bn layer set the flag to 1

    args.shared_heads = abs(
        args.shared_heads)  # 0 no sharing of feature else yes
    if args.shared_heads > 0:
        args.shared_heads = 1

    args.exp_name = 'FPN{:d}-{:s}sh{:02d}-{:s}-bs{:02d}-{:s}-lr{:05d}-bn{:d}'.format(
        args.input_dim, args.anchor_type, args.shared_heads, args.dataset,
        args.batch_size, args.basenet, int(args.lr * 100000), args.bn)

    args.save_root += args.dataset + '/'
    args.save_root = args.save_root + 'cache/' + args.exp_name + '/'

    if not os.path.isdir(
            args.save_root):  # if save directory doesn't exist create it
        os.makedirs(args.save_root)

    source_dir = args.save_root + '/source/'  # where to save the source
    utils.copy_source(source_dir)

    anchors = 'None'
    with torch.no_grad():
        if args.anchor_type == 'kmeans':
            anchorbox = kanchorBoxes(input_dim=args.input_dim,
                                     dataset=args.dataset)
        else:
            anchorbox = anchorBox(args.anchor_type,
                                  input_dim=args.input_dim,
                                  dataset=args.dataset)
        anchors = anchorbox.forward()
        args.ar = anchorbox.ar

    args.num_anchors = anchors.size(0)

    if args.dataset == 'coco':
        args.train_sets = ['train2017']
        args.val_sets = ['val2017']
    else:
        args.train_sets = ['train2007', 'val2007', 'train2012', 'val2012']
        args.val_sets = ['test2007']

    args.means = [0.485, 0.456, 0.406]
    args.stds = [0.229, 0.224, 0.225]

    print('\nLoading Datasets')
    train_dataset = Detection(args,
                              train=True,
                              image_sets=args.train_sets,
                              transform=Augmentation(args.input_dim,
                                                     args.means, args.stds))
    print('Done Loading Dataset Train Dataset :::>>>\n',
          train_dataset.print_str)
    val_dataset = Detection(args,
                            train=False,
                            image_sets=args.val_sets,
                            transform=BaseTransform(args.input_dim, args.means,
                                                    args.stds),
                            full_test=False)
    print('Done Loading Dataset Validation Dataset :::>>>\n',
          val_dataset.print_str)

    args.num_classes = len(train_dataset.classes) + 1
    args.classes = train_dataset.classes

    args.head_size = 256
    if args.shared_heads > 0:
        net = build_fpn_shared_heads(args.basenet,
                                     args.model_dir,
                                     ar=args.ar,
                                     head_size=args.head_size,
                                     num_classes=args.num_classes)
    else:
        net = build_fpn_unshared(args.basenet,
                                 args.model_dir,
                                 ar=args.ar,
                                 head_size=args.head_size,
                                 num_classes=args.num_classes)

    net = net.cuda()

    if args.ngpu > 1:
        print('\nLets do dataparallel\n')
        net = torch.nn.DataParallel(net)

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    criterion = MultiBoxLoss()

    scheduler = MultiStepLR(optimizer,
                            milestones=args.step_values,
                            gamma=args.gamma)

    train(args, net, anchors, optimizer, criterion, scheduler, train_dataset,
          val_dataset)
Example #3
0
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0
    print('Loading Dataset...')

    dataset = Detection(args.annoPath, PyramidAugmentation(ssd_dim, means), AnnotationTransform())
    print('len(dataset) = ' + str(len(dataset)))
    print(dataset.__getitem__(0))
    epoch_size = len(dataset) // args.batch_size
    print('Training PyramidBox on', dataset.name)
    step_index = 0
    if args.visdom:
        # initialize visdom loss plot
        lot = viz.line(
            X=np.array(torch.zeros((1,)).cpu()),
            Y=np.array(torch.zeros((1, 3)).cpu()),
            opts=dict(
                xlabel='Iteration',
                ylabel='Loss',
                title='Current PyramidBox Training Loss',
                legend=['Loc Loss', 'Conf Loss', 'Loss']
            )
        )
        epoch_lot = viz.line(
            X=np.array(torch.zeros((1,)).cpu()),
            Y=np.array(torch.zeros((1, 3)).cpu()),
            opts=dict(
                xlabel='Epoch',
                ylabel='Loss',
                title='Epoch PyramidBox Training Loss',
                legend=['Loc Loss', 'Conf Loss', 'Loss']
            )
        )
        lr_lot = viz.line(
            X=np.array(torch.zeros((1,)).cpu()),
            Y=np.array(torch.zeros((1,1)).cpu()),
            opts=dict(
                xlabel='iteration',
                ylabel='learning-rate',
                title='Warm-up',
                legend=['lr']
            )
        )
    batch_iterator = None
    data_loader = data.DataLoader(dataset, batch_size, num_workers=args.num_workers,
                                  shuffle=True, collate_fn=detection_collate, pin_memory=True)
    print('data loading finished...')
    for iteration in range(args.start_iter, max_iter):
        t0 = time.time()
        try:
            if (not batch_iterator) or (iteration % epoch_size == 0):
                batch_iterator = iter(data_loader)
            adjust_learning_rate(optimizer, gamma, iteration)
            if iteration in stepvalues:
                step_index += 1
                if args.visdom:
                    viz.line(
                        X=np.array(torch.ones((1, 3)).cpu()) * epoch,
                        Y=np.array(torch.Tensor([loc_loss, conf_loss,
                            loc_loss + conf_loss]).unsqueeze(0).cpu() )/ epoch_size,
                        win=epoch_lot,
                        update='append'
                    )
                # reset epoch loss counters
                loc_loss = 0
                conf_loss = 0
                epoch += 1
            # load train data
            images, targets = next(batch_iterator)

            if args.cuda:
                images = Variable(images.cuda())
                targets = [Variable(anno.cuda(), volatile=True) for anno in targets]
            else:
                images = Variable(images)
                targets = [Variable(anno, volatile=True) for anno in targets]
            # forward
            t1 = time.time()
            out = net(images)
            # backprop
            optimizer.zero_grad()
            loss_l, loss_c = criterion(tuple(out[0:3]), targets)
            loss_l_head, loss_c_head = criterion(tuple(out[3:6]), targets)
        
            loss = loss_l + loss_c + 0.5 * loss_l_head + 0.5 * loss_c_head
            loss.backward()
            optimizer.step()
            t2 = time.time()
            loc_loss += loss_l.data[0]
            conf_loss += loss_c.data[0]
            if iteration % 10 == 0:
                print('front and back Timer: {} sec.' .format((t2 - t1)))
                print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]))
                print('Loss conf: {} Loss loc: {}'.format(loss_c.data[0],loss_l.data[0]))
                print('Loss head conf: {} Loss head loc: {}'.format(loss_c_head.data[0],loss_l_head.data[0]))
                print('lr: {}'.format(optimizer.param_groups[0]['lr']))
                if args.visdom and args.send_images_to_visdom:
                    random_batch_index = np.random.randint(images.size(0))
                    viz.image(images.data[random_batch_index].cpu().numpy())
            if args.visdom:
                viz.line(
                    X=np.array(torch.ones((1, 3)).cpu()) * iteration,
                    Y=np.array(torch.Tensor([loss_l.data[0], loss_c.data[0],
                        loss_l.data[0] + loss_c.data[0]]).unsqueeze(0).cpu()),
                    win=lot,
                    update='append'
                )
                viz.line(
                    X=np.array(torch.ones((1,1)).cpu()) * iteration,
                    Y=np.array(torch.Tensor([optimizer.param_groups[0]['lr']]).unsqueeze(0).cpu()),
                    win=lr_lot,
                    update='append'
                )
                # hacky fencepost solution for 0th epoch plot
                if iteration == 0:
                    viz.line(
                        X=np.array(torch.zeros((1, 3)).cpu()),
                        Y=np.array(torch.Tensor([loc_loss, conf_loss,
                            loc_loss + conf_loss]).unsqueeze(0).cpu()),
                        win=epoch_lot,
                        update=True
                    )
                    viz.line(
                        X=np.array(torch.zeros((1,1)).cpu()),
                        Y=np.array(torch.Tensor([optimizer.param_groups[0]['lr']]).unsqueeze(0).cpu()),
                        win=lr_lot,
                        update=True
                    )

        except TypeError as e:
            print(e)
            print('-'*20,'jump to next iter and log.')
            continue
        except ValueError as e2:
            print(e2)
            print('='*20,'jump to next iter and log.')
            continue
        if iteration % 5000 == 0:
            print('Saving state, iter:', iteration)
            torch.save(ssd_net.state_dict(), args.save_folder + 'Res50_pyramid_' +
                       repr(iteration) + '.pth')
    torch.save(ssd_net.state_dict(), args.save_folder + 'Res50_pyramid_' + '.pth')
Example #4
0
def main():

    args.eval_iters = [int(val) for val in args.eval_iters.split(',')]
    # args.loss_reset_step = 10
    args.log_step = 10
    args.dataset = args.dataset.lower()
    args.basenet = args.basenet.lower()

    args.bn = abs(args.bn)  # 0 freeze or else use bn
    if args.bn > 0:
        args.bn = 1  # update bn layer set the flag to 1

    args.exp_name = 'FPN{:d}-{:s}sh{:02d}-{:s}-bs{:02d}-{:s}-lr{:05d}-bn{:d}'.format(
        args.input_dim, args.anchor_type, args.shared_heads, args.dataset,
        args.batch_size, args.basenet, int(args.lr * 100000), args.bn)

    args.save_root += args.dataset + '/'
    args.save_root = args.save_root + 'cache/' + args.exp_name + '/'

    if not os.path.isdir(
            args.save_root):  # if save directory doesn't exist create it
        os.makedirs(args.save_root)

    source_dir = args.save_root + '/source/'  # where to save the source
    utils.copy_source(source_dir)

    anchors = 'None'
    with torch.no_grad():
        if args.anchor_type == 'kmeans':
            anchorbox = kanchorBoxes(input_dim=args.input_dim,
                                     dataset=args.dataset)
        else:
            anchorbox = anchorBox(args.anchor_type,
                                  input_dim=args.input_dim,
                                  dataset=args.dataset)
        anchors = anchorbox.forward()
        args.ar = anchorbox.ar

    args.num_anchors = anchors.size(0)
    anchors = anchors.cuda(0, non_blocking=True)
    if args.dataset == 'coco':
        args.train_sets = ['train2017']
        args.val_sets = ['val2017']
    else:
        args.train_sets = ['train2007', 'val2007', 'train2012', 'val2012']
        args.val_sets = ['test2007']

    args.means = [0.485, 0.456, 0.406]
    args.stds = [0.229, 0.224, 0.225]
    val_dataset = Detection(args,
                            train=False,
                            image_sets=args.val_sets,
                            transform=BaseTransform(args.input_dim, args.means,
                                                    args.stds),
                            full_test=False)
    print('Done Loading Dataset Validation Dataset :::>>>\n',
          val_dataset.print_str)
    args.data_dir = val_dataset.root
    args.num_classes = len(val_dataset.classes) + 1
    args.classes = val_dataset.classes
    args.bias_heads = args.bias_heads > 0
    args.head_size = 256
    if args.shared_heads > 0:
        net = build_fpn_shared_heads(args.basenet,
                                     args.model_dir,
                                     ar=args.ar,
                                     head_size=args.head_size,
                                     num_classes=args.num_classes,
                                     bias_heads=args.bias_heads)
    else:
        net = build_fpn_unshared(args.basenet,
                                 args.model_dir,
                                 ar=args.ar,
                                 head_size=args.head_size,
                                 num_classes=args.num_classes,
                                 bias_heads=args.bias_heads)

    net = net.cuda()

    if args.ngpu > 1:
        print('\nLets do dataparallel\n')
        net = torch.nn.DataParallel(net)
    net.eval()

    for iteration in args.eval_iters:
        args.det_itr = iteration
        log_file = open(
            "{:s}/testing-{:d}.log".format(args.save_root, iteration), "w", 1)
        log_file.write(args.exp_name + '\n')

        args.model_path = args.save_root + '/model_' + repr(iteration) + '.pth'
        log_file.write(args.model_path + '\n')

        net.load_state_dict(torch.load(args.model_path))

        print('Finished loading model %d !' % iteration)
        # Load dataset
        val_data_loader = data_utils.DataLoader(val_dataset,
                                                int(args.batch_size / 2),
                                                num_workers=args.num_workers,
                                                shuffle=False,
                                                pin_memory=True,
                                                collate_fn=custum_collate)

        # evaluation
        torch.cuda.synchronize()
        tt0 = time.perf_counter()
        log_file.write('Testing net \n')
        net.eval()  # switch net to evaluation mode
        if args.dataset != 'coco':
            mAP, ap_all, ap_strs, det_boxes = validate(
                args,
                net,
                anchors,
                val_data_loader,
                val_dataset,
                iteration,
                iou_thresh=args.iou_thresh)
        else:
            mAP, ap_all, ap_strs, det_boxes = validate_coco(
                args,
                net,
                anchors,
                val_data_loader,
                val_dataset,
                iteration,
                iou_thresh=args.iou_thresh)

        for ap_str in ap_strs:
            print(ap_str)
            log_file.write(ap_str + '\n')
        ptr_str = '\nMEANAP:::=>' + str(mAP) + '\n'
        print(ptr_str)
        log_file.write(ptr_str)

        torch.cuda.synchronize()
        print('Complete set time {:0.2f}'.format(time.perf_counter() - tt0))
        log_file.close()
Example #5
0
    print('Resuming training, loading {}...'.format(args.resume))
    ssd_net.load_weights(args.resume)
else:
    pass

optimizer = optim.SGD(net.parameters(),
                      lr=args.lr,
                      momentum=momentum,
                      weight_decay=weight_decay)
ssd_net, optimizer = amp.initialize(ssd_net, optimizer, opt_level="02")

criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 3, 0.35, False,
                         False, args.cuda)
criterion1 = MultiBoxLoss(num_classes, 0.35, True, 0, True, 3, 0.35, False,
                          True, args.cuda)
dataset = Detection(args.annoPath, PyramidAugmentation(ssd_dim, means),
                    AnnotationTransform())
if args.useMultiProcess:
    torch.distributed.init_process_group(backend=args.dist_backend,
                                         init_method="env://",
                                         world_size=args.world_size,
                                         rank=args.device_ids)
    net = torch.nn.parallel.DistributedDataParallel(
        ssd_net, device_ids=tuple(np.arange(0, args.device_ids)))
    # boost computing rate but some randomness vice versa cudnn.deterministic=True
    cudnn.benchmark = True
    train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)


def train():
    net.train()
    # loss counters