Example #1
0
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0 + args.resume_epoch
    print('Loading Dataset...')

    if args.dataset == 'VOC':
        dataset = VOCDetection(VOCroot, train_sets,
                               preproc(img_dim, rgb_means, p),
                               AnnotationTransform())
    elif args.dataset == 'COCO':
        dataset = COCODetection(COCOroot, train_sets,
                                preproc(img_dim, rgb_means, p))
    else:
        print('Only VOC and COCO are supported now!')
        return

    epoch_size = len(dataset) // args.batch_size
    max_iter = args.max_epoch * epoch_size

    stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size)
    stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size)
    stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO']
    print('Training', args.version, 'on', dataset.name)
    step_index = 0

    if args.visdom:
        # initialize visdom loss plot
        lot = viz.line(X=torch.zeros((1, )).cpu(),
                       Y=torch.zeros((1, 3)).cpu(),
                       opts=dict(xlabel='Iteration',
                                 ylabel='Loss',
                                 title='Current SSD Training Loss',
                                 legend=['Loc Loss', 'Conf Loss', 'Loss']))
        epoch_lot = viz.line(X=torch.zeros((1, )).cpu(),
                             Y=torch.zeros((1, 3)).cpu(),
                             opts=dict(
                                 xlabel='Epoch',
                                 ylabel='Loss',
                                 title='Epoch SSD Training Loss',
                                 legend=['Loc Loss', 'Conf Loss', 'Loss']))
    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
    else:
        start_iter = 0

    lr = args.lr
    log_file = open(log_file_path, 'a')
    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            batch_iterator = iter(
                data.DataLoader(dataset,
                                batch_size,
                                shuffle=True,
                                num_workers=args.num_workers,
                                collate_fn=detection_collate))
            loc_loss = 0
            conf_loss = 0
            if epoch % args.save_frequency == 0 and epoch > 0:
                torch.save(
                    net.state_dict(), save_folder + args.version + '_' +
                    args.dataset + '_epoches_' + repr(epoch) + '.pth')
            epoch += 1

        load_t0 = time.time()
        if iteration in stepvalues:
            step_index += 1
            if args.visdom:
                viz.line(
                    X=torch.ones((1, 3)).cpu() * epoch,
                    Y=torch.Tensor([loc_loss, conf_loss, loc_loss + conf_loss
                                    ]).unsqueeze(0).cpu() / epoch_size,
                    win=epoch_lot,
                    update='append')
        lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index,
                                  iteration, epoch_size)

        # load train data
        images, targets = next(batch_iterator)

        #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets]))

        if args.cuda:
            images = Variable(images.cuda())
            targets = [
                Variable(anno.cuda(), volatile=True) for anno in targets
            ]
        else:
            images = Variable(images)
            targets = [Variable(anno, volatile=True) for anno in targets]
        # forward
        t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, priors, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.data[0]
        conf_loss += loss_c.data[0]
        load_t1 = time.time()
        if iteration % 10 == 0:
            print('Epoch:' + repr(epoch) + ' || epochiter: ' +
                  repr(iteration % epoch_size) + '/' + repr(epoch_size) +
                  '|| Totel iter ' + repr(iteration) +
                  ' || L: %.4f C: %.4f||' % (loss_l.data[0], loss_c.data[0]) +
                  'Batch time: %.4f sec. ||' % (load_t1 - load_t0) +
                  'LR: %.8f' % (lr))
            log_file.write('Epoch:' + repr(epoch) + ' || epochiter: ' +
                           repr(iteration % epoch_size) + '/' +
                           repr(epoch_size) + '|| Totel iter ' +
                           repr(iteration) + ' || L: %.4f C: %.4f||' %
                           (loss_l.data[0], loss_c.data[0]) +
                           'Batch time: %.4f sec. ||' % (load_t1 - load_t0) +
                           'LR: %.8f' % (lr) + '\n')
            if args.visdom and args.send_images_to_visdom:
                random_batch_index = np.random.randint(images.size(0))
                viz.image(images.data[random_batch_index].cpu().numpy())
        if args.visdom:
            viz.line(X=torch.ones((1, 3)).cpu() * iteration,
                     Y=torch.Tensor([
                         loss_l.data[0], loss_c.data[0],
                         loss_l.data[0] + loss_c.data[0]
                     ]).unsqueeze(0).cpu(),
                     win=lot,
                     update='append')
            if iteration == 0:
                viz.line(X=torch.zeros((1, 3)).cpu(),
                         Y=torch.Tensor(
                             [loc_loss, conf_loss,
                              loc_loss + conf_loss]).unsqueeze(0).cpu(),
                         win=epoch_lot,
                         update=True)
    log_file.close()
    torch.save(
        net.state_dict(),
        save_folder + 'Final_' + args.version + '_' + args.dataset + '.pth')
Example #2
0
def main():
    parser = argparse.ArgumentParser(
        description='Single Shot MultiBox Detector Testing')
    parser.add_argument('--resume',
                        dest='resume',
                        help='initialize with pretrained model weights',
                        default='./weights/ic15_90_15.pth',
                        type=str)
    parser.add_argument('--version',
                        dest='version',
                        help='512x512, 768x768, 768x1280, 1280x1280',
                        default='768x1280',
                        type=str)
    parser.add_argument('--dataset',
                        dest='dataset',
                        help='ic15, ic13, td500, coco',
                        default='ic15',
                        type=str)
    parser.add_argument('--works',
                        dest='num_workers',
                        help='num_workers to load data',
                        default=1,
                        type=int)
    parser.add_argument('--test_batch_size',
                        dest='test_batch_size',
                        help='train_batch_size',
                        default=1,
                        type=int)
    parser.add_argument('--out',
                        dest='out',
                        help='output file dir',
                        default='./outputs_eval/ic15/',
                        type=str)
    parser.add_argument('--log_file_dir',
                        dest='log_file_dir',
                        help='log_file_dir',
                        default='./logs/',
                        type=str)
    parser.add_argument('--ssd_dim', default=512, type=int, help='ssd dim')

    #parser.add_argument('--root', default='../../DataSets/text_detect/',type=str,  help='Location of data root directory')
    parser.add_argument('--ic_root',
                        default='../data/ocr/detection/',
                        type=str,
                        help='Location of data root directory')
    # parser.add_argument('--ic_root', default='/home/lvpengyuan/research/text/',type=str,  help='Location of data root directory')
    parser.add_argument('--td_root',
                        default='/home/lpy/Datasets/TD&&TR/',
                        type=str,
                        help='Location of data root directory')
    parser.add_argument('--coco_root',
                        default='/home/lpy/Datasets/coco-text/',
                        type=str,
                        help='Location of data root direction')
    args = parser.parse_args()
    cuda = torch.cuda.is_available()
    ## setup logger
    if os.path.exists(args.log_file_dir) == False:
        os.mkdir(args.log_file_dir)
    log_file_path = args.log_file_dir + 'eval_' + time.strftime(
        '%Y%m%d_%H%M%S') + '.log'
    setup_logger(log_file_path)

    if args.version == '512x512':
        cfg = cfg_512x512
    elif args.version == '768x768':
        cfg = cfg_768x768
    elif args.version == '1280x1280':
        cfg = cfg_1280x1280
    elif args.version == '768x1280':
        cfg = cfg_768x1280
    else:
        exit()

    ssd_dim = args.ssd_dim
    means = (104, 117, 123)

    if args.dataset == 'ic15':
        dataset = ICDARDetection(args.ic_root,
                                 'val',
                                 None,
                                 None,
                                 '15',
                                 dim=cfg['min_dim'])
        data_loader = data.DataLoader(dataset,
                                      args.test_batch_size,
                                      num_workers=args.num_workers,
                                      shuffle=False,
                                      pin_memory=True)
    elif args.dataset == 'ic13':
        dataset = ICDARDetection(args.ic_root,
                                 'val',
                                 None,
                                 None,
                                 '13',
                                 dim=cfg['min_dim'])
        data_loader = data.DataLoader(dataset,
                                      args.test_batch_size,
                                      num_workers=args.num_workers,
                                      shuffle=False,
                                      pin_memory=True)
    elif args.dataset == 'td500':
        dataset = TD500Detection(args.td_root,
                                 'val',
                                 None,
                                 None,
                                 aug=False,
                                 dim=cfg['min_dim'])
        data_loader = data.DataLoader(dataset,
                                      args.test_batch_size,
                                      num_workers=args.num_workers,
                                      shuffle=False,
                                      pin_memory=True)
    elif args.dataset == 'coco':
        dataset = COCODetection(args.coco_root, 'test', dim=cfg['min_dim'])
        data_loader = data.DataLoader(dataset,
                                      args.test_batch_size,
                                      num_workers=args.num_workers,
                                      shuffle=False,
                                      pin_memory=True)
    else:
        exit()

    logging.info('dataset initialize done.')

    ## setup mode

    net = build_dssd('test', cfg, ssd_dim, 2)

    logging.info('loading {}...'.format(args.resume))
    net.load_weights(args.resume)
    rpsroi_pool = RPSRoIPool(2, 2, 1, 2, 1)
    if cuda:
        net = net.cuda()
        rpsroi_pool = rpsroi_pool.cuda()
    net.eval()
    rpsroi_pool.eval()
    if os.path.exists(args.out) == False:
        os.makedirs(args.out)
    save_dir = args.out + '/' + args.resume.strip().split('_')[-1].split(
        '.')[0] + '/'
    if os.path.exists(save_dir) == False:
        os.mkdir(save_dir)
    seg_dir = save_dir + 'seg/'
    box_dir = save_dir + 'box/'
    res_dir = save_dir + 'res/'

    if os.path.exists(seg_dir) == False:
        os.mkdir(seg_dir)
        os.mkdir(box_dir)
        os.mkdir(res_dir)
    logging.info('eval begin')
    for i, sample in enumerate(data_loader, 0):
        img, image_name, ori_h, ori_w = sample
        # print(image_name)
        if i % 100 == 0:
            print(i, len(data_loader))
        h, w = img.size(2), img.size(3)
        if cuda:
            img = img.cuda()
        img = Variable(img)
        out, seg_pred, seg_map = net(img)
        save_name = image_name[0].split('/')[-1].split('.')[0]
        candidate_box = eval_img(out,
                                 seg_pred,
                                 seg_map,
                                 rpsroi_pool,
                                 img,
                                 save_name,
                                 seg_dir,
                                 box_dir,
                                 vis=True)

        # format output
        if args.dataset == 'coco':
            save_name = save_name.strip().split('_')[-1]
            save_name = str(int(save_name))
        res_name = res_dir + '/' + 'res_' + save_name + '.txt'
        fp = open(res_name, 'w')
        for box in candidate_box:
            temp_x = []
            temp_y = []
            temp = []
            for j in range(len(box) - 1):
                if j % 2 == 0:
                    temp_x.append(int(box[j] * ori_w[0] / w))
                    temp.append(str(int(box[j] * ori_w[0] / w)))
                else:
                    temp_y.append(int(box[j] * ori_h[0] / h))
                    temp.append(str(int(box[j] * ori_h[0] / h)))
            if args.dataset == 'ic13':
                fp.write(','.join([
                    str(min(temp_x)),
                    str(min(temp_y)),
                    str(max(temp_x)),
                    str(max(temp_y))
                ]) + '\n')
            elif args.dataset == 'coco':
                fp.write(','.join([
                    str(min(temp_x)),
                    str(min(temp_y)),
                    str(max(temp_x)),
                    str(max(temp_y)),
                    str(box[-1])
                ]) + '\n')
            else:
                fp.write(','.join(temp) + '\n')
        fp.close()

    logging.info('evaluate done')
Example #3
0
detector = Detect(num_classes, 0, cfg)
optimizer = optim.SGD(net.parameters(), lr=args.lr,
                      momentum=args.momentum, weight_decay=args.weight_decay)

criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False)
priorbox = PriorBox(cfg)
priors = Variable(priorbox.forward())
# dataset
print('Loading Dataset...')
if args.dataset == 'VOC':
    testset = VOCDetection(
        VOCroot, [('2007', 'test')], None, AnnotationTransform())
    train_dataset = VOCDetection(VOCroot, train_sets, preproc(
        img_dim, rgb_means, rgb_std, p), AnnotationTransform())
elif args.dataset == 'COCO':
    testset = COCODetection(
        COCOroot, [('2017', 'val')], None)
    #testset = COCODetection(COCOroot, [('2017', 'test-dev')], None)
    train_dataset = COCODetection(COCOroot, train_sets, preproc(
        img_dim, rgb_means, rgb_std, p))
else:
    print('Only VOC and COCO are supported now!')
    exit()


def train():
    net.train()
    # loss counters
    epoch = 0
    if args.resume_net:
        epoch = 0 + args.resume_epoch
    epoch_size = len(train_dataset) // args.batch_size
Example #4
0
# optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08,
#                     momentum=args.momentum, weight_decay=args.weight_decay)

criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False)
priorbox = PriorBox(cfg)
priors = Variable(priorbox.forward(), volatile=True)
# dataset
print('Loading Dataset...')
if args.dataset == 'VOC':
    testset = VOCDetection(VOCroot, [('2007', 'test')], None,
                           AnnotationTransform())
    train_dataset = VOCDetection(
        VOCroot, train_sets, preproc(img_dim, rgb_means, p=p, rgb_std=rgb_std),
        AnnotationTransform())
elif args.dataset == 'COCO':
    testset = COCODetection(COCOroot, [('2014', 'valminusminival')], None)
    train_dataset = COCODetection(
        COCOroot, train_sets, preproc(img_dim, rgb_means, p=p,
                                      rgb_std=rgb_std))
else:
    print('Only VOC and COCO are supported now!')
    exit()


def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0
    if args.resume_net:
Example #5
0
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0 + args.resume_epoch
    print('Loading Dataset...')

    if args.dataset == 'VOC':
        dataset = VOCDetection(VOCroot, train_sets,
                               preproc(img_dim, rgb_means, p),
                               AnnotationTransform())
    elif args.dataset == 'COCO':
        dataset = COCODetection(COCOroot, train_sets,
                                preproc(img_dim, rgb_means, p))
    else:
        print('Only VOC and COCO are supported now!')
        return

    epoch_size = len(dataset) // args.batch_size
    max_iter = args.max_epoch * epoch_size

    stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size)
    stepvalues_COCO = (100 * epoch_size, 135 * epoch_size, 170 * epoch_size)
    stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO']
    print('Training', args.version, 'on', dataset.name)
    step_index = 0

    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
        for sv in stepvalues:
            if start_iter > sv:
                step_index += 1
                continue
            else:
                break
    else:
        start_iter = 0

    lr = args.lr
    avg_loss_list = []
    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            batch_iterator = iter(
                data.DataLoader(dataset,
                                batch_size,
                                shuffle=True,
                                num_workers=args.num_workers,
                                collate_fn=detection_collate))
            avg_loss = (loc_loss + conf_loss) / epoch_size
            avg_loss_list.append(avg_loss)
            print("avg_loss_list:")
            if len(avg_loss_list) <= 5:
                print(avg_loss_list)
            else:
                print(avg_loss_list[-5:])
            loc_loss = 0
            conf_loss = 0
            if (epoch % 10 == 0):
                torch.save(
                    net.state_dict(), args.save_folder + args.version + '_' +
                    args.dataset + '_epoches_' + repr(epoch) + '.pth')
            epoch += 1

        load_t0 = time.time()
        if iteration in stepvalues:
            step_index += 1
        lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index,
                                  iteration, epoch_size)

        images, targets = next(batch_iterator)

        if args.cuda:
            images = Variable(images.cuda())
            targets = [Variable(anno.cuda()) for anno in targets]
        else:
            images = Variable(images)
            targets = [Variable(anno) for anno in targets]
        out = net(images)
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, priors, targets)
        loss = loss_l + loss_c
        loss.backward()
        # if epoch > args.warm_epoch:
        #     updateBN()
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.item()
        conf_loss += loss_c.item()
        load_t1 = time.time()
        if iteration % 10 == 0:
            print(
                'Epoch:' + repr(epoch) + ' || epochiter: ' +
                repr(iteration % epoch_size) + '/' + repr(epoch_size) +
                '|| Totel iter ' + repr(iteration) +
                ' || L: %.4f C: %.4f S: %.4f||' %
                (loss_l.item(), loss_c.item(), loss_l.item() + loss_c.item()) +
                'Batch time: %.4f ||' % (load_t1 - load_t0) + 'LR: %.7f' %
                (lr))

    torch.save(
        net.state_dict(), args.save_folder + 'Final_' + args.version + '_' +
        args.dataset + '.pth')
Example #6
0
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0 + args.resume_epoch
    print('Loading Dataset...')
    f_writer.write('Loading Dataset...\n')

    if args.dataset == 'VOC':
        dataset = VOCDetection(VOCroot, train_sets, preproc(
            img_dim, rgb_means, p), AnnotationTransform())
    elif args.dataset == 'COCO':
        dataset = COCODetection(COCOroot, train_sets, preproc(
            img_dim, rgb_means, p))
    else:
        print('Only VOC and COCO are supported now!')
        return

    epoch_size = len(dataset) // args.batch_size
    max_iter = args.max_epoch * epoch_size

    stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size)
    stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size)
    stepvalues = (stepvalues_VOC,stepvalues_COCO)[args.dataset=='COCO']
    print('Training',args.version, 'on', dataset.name)
    f_writer.write('Training'+args.version+ 'on'+ dataset.name+ '\n')
    step_index = 0

    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
    else:
        start_iter = 0

    lr = args.lr

    loss = [None] * 2
    loss_l = [None] * 2
    loss_c = [None] * 2

    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            batch_iterator = iter(data.DataLoader(dataset, batch_size,
                                                  shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate))
            loc_loss = 0
            conf_loss = 0
            if (epoch % 40 == 0 and epoch > 0) or (epoch % 10 ==0 and epoch > 200):
                torch.save(net.state_dict(), args.save_folder+args.version+'_'+args.dataset + '_epoches_'+
                           repr(epoch) + '_refine_agnostic_{}.pth.{}'.format(C_agnostic, args.extra))
            epoch += 1

        load_t0 = time.time()
        if iteration in stepvalues:
            step_index += 1
        lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size)


        # load train data
        targets = [None] * 2
        images, targets[1] = next(batch_iterator)

        targets[0] = [None] * len(targets[1])
        if C_agnostic:
            for i in range(len(targets[1])):
                targets[0][i] = targets[1][i].clone()
                targets[0][i][:,4] = targets[0][i][:,4].ge(1)
        else:
            targets[0] = targets[1]
        
        #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets]))

        if args.cuda:
            images = Variable(images.cuda())
            targets[0] = [Variable(anno.cuda(),volatile=True) for anno in targets[0]]
            targets[1] = [Variable(anno.cuda(),volatile=True) for anno in targets[1]]
        else:
            images = Variable(images)
            targets[0] = [Variable(anno, volatile=True) for anno in targets[0]]
            targets[1] = [Variable(anno, volatile=True) for anno in targets[1]]
        # forward
        t0 = time.time()
        out = net(images)

        ### calculation refined anchors
        # loc_data = Variable(out[0][0].data.clone(), volatile=True)
        loc_data = out[0][0].data.clone()
        conf_data = Variable(out[0][1].data.clone(), volatile=True)
        ## decode and clamp
        r_priors = decode(loc_data, priors.data, cfg['variance'])
        if args.bp_anchors:
            r_priors = Variable(r_priors, requires_grad=True)
        else:
            r_priors = Variable(r_priors, volatile=True)

        # for i in range(loc_data.size(0)):
        #     z = box_utils.decode(loc_data.data[i,:,:], priors.data, cfg['variance'])
        #     # loc_data[i,:,:].clamp_(0,1)

        # backprop
        optimizer.zero_grad()

        loss_l[0], loss_c[0], pass_index = criterion[0](out[0], priors, targets[0])
        loss[0] = loss_l[0] + loss_c[0]
        
        loss_l[1], loss_c[1], _ = criterion[1](out[1], r_priors, targets[1], pass_index)
        loss[1] = loss_l[1] + loss_c[1]


        loss_total = loss[0] + loss[1]
        loss_total.backward()
        optimizer.step()
        t1 = time.time()
        # loc_loss += loss_l.data[0]
        # conf_loss += loss_c.data[0]
        load_t1 = time.time()
        if iteration % 10 == 0:
            print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size)
                  + '|| Totel iter ' +
                  repr(iteration) + ' || L1: %.4f C1: %.4f||' % (loss_l[0].data[0],loss_c[0].data[0]) + 
                  ' || L2: %.4f C2: %.4f||' % (loss_l[1].data[0],loss_c[1].data[0]) + 
                  'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr))
            f_writer.write('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size)
                  + '|| Totel iter ' +
                  repr(iteration) + ' || L1: %.4f C1: %.4f||' % (loss_l[0].data[0],loss_c[0].data[0]) + 
                  ' || L2: %.4f C2: %.4f||' % (loss_l[1].data[0],loss_c[1].data[0]) + 
                  'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr) + '\n')

    torch.save(net.state_dict(), args.save_folder +
               'Final_' + args.version +'_' + args.dataset+ '_refine_agnostic_{}.pth.{}'.format(C_agnostic, args.extra))

    f_writer.write('training finished!\n')
    f_writer.close()
    dataset.evaluate_detections(all_boxes, output_dir)


if __name__ == '__main__':
    # load net
    num_classes = len(labelmap) + 1  # +1 for background
    # net = build_refinedet('test', int(args.input_size), num_classes)            # initialize SSD
    net = build_refinedet('test', int(args.input_size), 81)  # initialize SSD
    net.load_state_dict(torch.load(args.trained_model))
    net.eval()
    print('Finished loading model!')
    # load data

    dataset = COCODetection(
        root=COCO_ROOT,
        image_set="val2017",
        transform=BaseTransform(320, dataset_mean),
    )
    shutil.rmtree("./result")
    os.mkdir("./result")

    if args.cuda:
        net = net.cuda()
        cudnn.benchmark = True
    # evaluation
    test_net(args.save_folder,
             net,
             args.cuda,
             dataset,
             BaseTransform(net.size, dataset_mean),
             args.top_k,
Example #8
0
    def Train(self,
              epochs=200,
              log_iters=True,
              output_weights_dir="weights",
              saved_epoch_interval=10):
        self.system_dict["params"]["max_epoch"] = epochs
        self.system_dict["params"]["log_iters"] = log_iters
        self.system_dict["params"]["save_folder"] = output_weights_dir

        if not os.path.exists(self.system_dict["params"]["save_folder"]):
            os.mkdir(self.system_dict["params"]["save_folder"])

        if (self.system_dict["params"]["size"] == 300):
            cfg = COCO_300
        else:
            cfg = COCO_512

        if self.system_dict["params"]["version"] == 'RFB_vgg':
            from models.RFB_Net_vgg import build_net
        elif self.system_dict["params"]["version"] == 'RFB_E_vgg':
            from models.RFB_Net_E_vgg import build_net
        elif self.system_dict["params"]["version"] == 'RFB_mobile':
            from models.RFB_Net_mobile import build_net
            cfg = COCO_mobile_300
        else:
            print('Unkown version!')

        img_dim = (300, 512)[self.system_dict["params"]["size"] == 512]
        rgb_means = ((104, 117, 123), (
            103.94, 116.78,
            123.68))[self.system_dict["params"]["version"] == 'RFB_mobile']
        p = (0.6, 0.2)[self.system_dict["params"]["version"] == 'RFB_mobile']

        f = open(
            self.system_dict["dataset"]["train"]["root_dir"] + "/" +
            self.system_dict["dataset"]["train"]["coco_dir"] +
            "/annotations/classes.txt", 'r')
        lines = f.readlines()
        if (lines[-1] == ""):
            num_classes = len(lines) - 1
        else:
            num_classes = len(lines) + 1

        batch_size = self.system_dict["params"]["batch_size"]
        weight_decay = self.system_dict["params"]["weight_decay"]
        gamma = self.system_dict["params"]["gamma"]
        momentum = self.system_dict["params"]["momentum"]

        self.system_dict["local"]["net"] = build_net('train', img_dim,
                                                     num_classes)

        if self.system_dict["params"]["resume_net"] == None:
            base_weights = torch.load(self.system_dict["params"]["basenet"])
            print('Loading base network...')
            self.system_dict["local"]["net"].base.load_state_dict(base_weights)

            def xavier(param):
                init.xavier_uniform(param)

            def weights_init(m):
                for key in m.state_dict():
                    if key.split('.')[-1] == 'weight':
                        if 'conv' in key:
                            init.kaiming_normal_(m.state_dict()[key],
                                                 mode='fan_out')
                        if 'bn' in key:
                            m.state_dict()[key][...] = 1
                    elif key.split('.')[-1] == 'bias':
                        m.state_dict()[key][...] = 0

            print('Initializing weights...')
            # initialize newly added layers' weights with kaiming_normal method
            self.system_dict["local"]["net"].extras.apply(weights_init)
            self.system_dict["local"]["net"].loc.apply(weights_init)
            self.system_dict["local"]["net"].conf.apply(weights_init)
            self.system_dict["local"]["net"].Norm.apply(weights_init)
            if self.system_dict["params"]["version"] == 'RFB_E_vgg':
                self.system_dict["local"]["net"].reduce.apply(weights_init)
                self.system_dict["local"]["net"].up_reduce.apply(weights_init)

        else:
            # load resume network
            print('Loading resume network...')
            state_dict = torch.load(self.system_dict["params"]["resume_net"])
            # create new OrderedDict that does not contain `module.`
            from collections import OrderedDict
            new_state_dict = OrderedDict()
            for k, v in state_dict.items():
                head = k[:7]
                if head == 'module.':
                    name = k[7:]  # remove `module.`
                else:
                    name = k
                new_state_dict[name] = v
            self.system_dict["local"]["net"].load_state_dict(new_state_dict)

        if self.system_dict["params"]["ngpu"] > 1:
            self.system_dict["local"]["net"] = torch.nn.DataParallel(
                self.system_dict["local"]["net"],
                device_ids=list(range(self.system_dict["params"]["ngpu"])))

        if self.system_dict["params"]["cuda"]:
            self.system_dict["local"]["net"].cuda()
            cudnn.benchmark = True

        optimizer = optim.SGD(
            self.system_dict["local"]["net"].parameters(),
            lr=self.system_dict["params"]["lr"],
            momentum=self.system_dict["params"]["momentum"],
            weight_decay=self.system_dict["params"]["weight_decay"])
        #optimizer = optim.RMSprop(self.system_dict["local"]["net"].parameters(), lr=self.system_dict["params"]["lr"], alpha = 0.9, eps=1e-08,
        #                      momentum=self.system_dict["params"]["momentum"], weight_decay=self.system_dict["params"]["weight_decay"])

        criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5,
                                 False)
        priorbox = PriorBox(cfg)
        with torch.no_grad():
            priors = priorbox.forward()
            if self.system_dict["params"]["cuda"]:
                priors = priors.cuda()

        self.system_dict["local"]["net"].train()
        # loss counters
        loc_loss = 0  # epoch
        conf_loss = 0
        epoch = 0 + self.system_dict["params"]["resume_epoch"]
        print('Loading Dataset...')

        if (os.path.isdir("coco_cache")):
            os.system("rm -r coco_cache")

        dataset = COCODetection(
            self.system_dict["dataset"]["train"]["root_dir"],
            self.system_dict["dataset"]["train"]["coco_dir"],
            self.system_dict["dataset"]["train"]["set_dir"],
            preproc(img_dim, rgb_means, p))

        epoch_size = len(dataset) // self.system_dict["params"]["batch_size"]
        max_iter = self.system_dict["params"]["max_epoch"] * epoch_size

        stepvalues = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size)
        print('Training', self.system_dict["params"]["version"], 'on',
              dataset.name)
        step_index = 0

        if self.system_dict["params"]["resume_epoch"] > 0:
            start_iter = self.system_dict["params"]["resume_epoch"] * epoch_size
        else:
            start_iter = 0

        lr = self.system_dict["params"]["lr"]

        for iteration in range(start_iter, max_iter):
            if iteration % epoch_size == 0:
                # create batch iterator
                batch_iterator = iter(
                    data.DataLoader(
                        dataset,
                        batch_size,
                        shuffle=True,
                        num_workers=self.system_dict["params"]["num_workers"],
                        collate_fn=detection_collate))
                loc_loss = 0
                conf_loss = 0

                torch.save(
                    self.system_dict["local"]["net"].state_dict(),
                    self.system_dict["params"]["save_folder"] + "/" +
                    self.system_dict["params"]["version"] + '_' +
                    self.system_dict["params"]["dataset"] + '_epoches_' +
                    'intermediate' + '.pth')
                epoch += 1

            load_t0 = time.time()
            if iteration in stepvalues:
                step_index += 1
            lr = self.adjust_learning_rate(optimizer,
                                           self.system_dict["params"]["gamma"],
                                           epoch, step_index, iteration,
                                           epoch_size)

            # load train data
            images, targets = next(batch_iterator)

            #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets]))

            if self.system_dict["params"]["cuda"]:
                images = Variable(images.cuda())
                targets = [Variable(anno.cuda()) for anno in targets]
            else:
                images = Variable(images)
                targets = [Variable(anno) for anno in targets]
            # forward
            t0 = time.time()
            out = self.system_dict["local"]["net"](images)
            # backprop
            optimizer.zero_grad()
            loss_l, loss_c = criterion(out, priors, targets)
            loss = loss_l + loss_c
            loss.backward()
            optimizer.step()
            t1 = time.time()
            loc_loss += loss_l.item()
            conf_loss += loss_c.item()
            load_t1 = time.time()
            if iteration % saved_epoch_interval == 0:
                print('Epoch:' + repr(epoch) + ' || epochiter: ' +
                      repr(iteration % epoch_size) + '/' + repr(epoch_size) +
                      '|| Current iter ' + repr(iteration) + '|| Total iter ' +
                      repr(max_iter) + ' || L: %.4f C: %.4f||' %
                      (loss_l.item(), loss_c.item()) +
                      'Batch time: %.4f sec. ||' % (load_t1 - load_t0) +
                      'LR: %.8f' % (lr))

        torch.save(
            self.system_dict["local"]["net"].state_dict(),
            self.system_dict["params"]["save_folder"] + "/" + 'Final_' +
            self.system_dict["params"]["version"] + '_' +
            self.system_dict["params"]["dataset"] + '.pth')
Example #9
0
def main():
    parser = argparse.ArgumentParser(
        description='Single Shot MultiBox Detector Testing')
    parser.add_argument('--resume',
                        dest='resume',
                        help='initialize with pretrained model weights',
                        default='./weights/ic13_60.pth',
                        type=str)
    parser.add_argument('--version',
                        dest='version',
                        help='512x512, 768x768, 768x1280, 1280x1280',
                        default='768x768',
                        type=str)
    parser.add_argument('--dataset',
                        dest='dataset',
                        help='ic15, ic13, td500, coco, mlt',
                        default='ic13',
                        type=str)
    parser.add_argument('--works',
                        dest='num_workers',
                        help='num_workers to load data',
                        default=1,
                        type=int)
    parser.add_argument('--test_batch_size',
                        dest='test_batch_size',
                        help='train_batch_size',
                        default=1,
                        type=int)
    parser.add_argument('--out',
                        dest='out',
                        help='output file dir',
                        default='./outputs/imgs/ic13/',
                        type=str)
    parser.add_argument('--log_file_dir',
                        dest='log_file_dir',
                        help='log_file_dir',
                        default='./logs/',
                        type=str)
    parser.add_argument('--ssd_dim', default=512, type=int, help='ssd dim')

    parser.add_argument('--ic_root',
                        default='../data/ocr/detection/',
                        type=str,
                        help='Location of data root directory')
    parser.add_argument('--td_root',
                        default='/home/lpy/Datasets/TD&&TR/',
                        type=str,
                        help='Location of data root directory')
    parser.add_argument('--coco_root',
                        default='/home/lpy/Datasets/coco-text/',
                        type=str,
                        help='Location of data root direction')
    parser.add_argument('--mlt_root',
                        default='/home/lpy/Datasets/MLT_test/',
                        type=str,
                        help='Location of data root direction')
    parser.add_argument('--vis',
                        default=True,
                        type=bool,
                        help='Vis the bounding box')
    args = parser.parse_args()
    cuda = torch.cuda.is_available()
    ## setup logger
    if os.path.exists(args.log_file_dir) == False:
        os.mkdir(args.log_file_dir)
    log_file_path = args.log_file_dir + 'eval_' + time.strftime(
        '%Y%m%d_%H%M%S') + '.log'
    setup_logger(log_file_path)
    ##versions  = ['512x512', '768x768', '768x1280', '1280x1280']
    versions = ['768x768']
    cfgs = []
    print(args.dataset)

    if '512x512' in versions:
        cfgs.append(cfg_512x512)
    if '768x768' in versions:
        cfgs.append(cfg_768x768)
    if '768x1280' in versions:
        cfgs.append(cfg_768x1280)
    if '1280x1280' in versions:
        cfgs.append(cfg_1280x1280)

    if args.dataset == 'ic15':
        test_nums = 500
    elif args.dataset == 'ic13':
        test_nums = 233
    elif args.dataset == 'td500':
        test_nums = 200
    elif args.dataset == 'coco':
        test_nums = 10000
    elif args.dataset == 'mlt':
        test_nums = 9000
    else:
        exit()

    boxes = []
    for i in range(test_nums):
        boxes.append([])
    ssd_dim = args.ssd_dim
    means = (104, 117, 123)

    rpsroi_pool = RPSRoIPool(2, 2, 1, 2, 1)
    rpsroi_pool = rpsroi_pool.cuda()
    rpsroi_pool.eval()

    if os.path.exists(args.out) == False:
        os.makedirs(args.out)
    save_dir = args.out + '/' + args.resume.strip().split('_')[-1].split(
        '.')[0] + '/'
    if os.path.exists(save_dir) == False:
        os.mkdir(save_dir)
    seg_dir = save_dir + 'seg/'
    box_dir = save_dir + 'box/'
    res_dir = save_dir + 'res/'

    if os.path.exists(seg_dir) == False:
        os.mkdir(seg_dir)
        os.mkdir(box_dir)
        os.mkdir(res_dir)
    logging.info('eval begin')

    for cfg in cfgs:
        if args.dataset == 'ic15':
            dataset = ICDARDetection(args.ic_root,
                                     'val',
                                     None,
                                     None,
                                     '15',
                                     dim=cfg['min_dim'])
            data_loader = data.DataLoader(dataset,
                                          args.test_batch_size,
                                          num_workers=args.num_workers,
                                          shuffle=False,
                                          pin_memory=True)
        elif args.dataset == 'ic13':
            dataset = ICDARDetection(args.ic_root,
                                     'val',
                                     None,
                                     None,
                                     '13',
                                     dim=cfg['min_dim'])
            data_loader = data.DataLoader(dataset,
                                          args.test_batch_size,
                                          num_workers=args.num_workers,
                                          shuffle=False,
                                          pin_memory=True)
        elif args.dataset == 'td500':
            dataset = TD500Detection(args.td_root,
                                     'val',
                                     None,
                                     None,
                                     aug=False,
                                     dim=cfg['min_dim'])
            data_loader = data.DataLoader(dataset,
                                          args.test_batch_size,
                                          num_workers=args.num_workers,
                                          shuffle=False,
                                          pin_memory=True)
        elif args.dataset == 'coco':
            dataset = COCODetection(args.coco_root, 'val', dim=cfg['min_dim'])
            data_loader = data.DataLoader(dataset,
                                          args.test_batch_size,
                                          num_workers=args.num_workers,
                                          shuffle=False,
                                          pin_memory=True)
        elif args.dataset == 'mlt':
            dataset = MLTDetection(args.mlt_root, 'test', dim=cfg['min_dim'])
            data_loader = data.DataLoader(dataset,
                                          args.test_batch_size,
                                          num_workers=args.num_workers,
                                          shuffle=False,
                                          pin_memory=True)

        else:
            exit()

        logging.info('dataset initialize done.')

        ## setup mode
        logging.info('loading {}...'.format(args.resume))
        net = build_dssd('test', cfg, ssd_dim, 2).cuda()
        net.load_weights(args.resume)
        net.eval()
        logging.info('begin')
        for i, sample in enumerate(data_loader, 0):
            img, image_name, ori_h, ori_w = sample
            # print(image_name)
            if i % 100 == 0:
                print(i, len(data_loader))
            h, w = img.size(2), img.size(3)
            if cuda:
                img = img.cuda()
            img = Variable(img)
            out, seg_pred, seg_map = net(img)
            candidate_boxes = eval_img(out, seg_pred, seg_map, rpsroi_pool,
                                       img)
            temp_boxes = []
            for box in candidate_boxes:
                temp_box = []
                for k in range(len(box) - 1):
                    if k % 2 == 0:
                        temp_box.append(int(box[k] * ori_w[0] / w))
                    else:
                        temp_box.append(int(box[k] * ori_h[0] / h))
                temp_box.append(box[-1])
                temp_boxes.append(temp_box)
            boxes[i] = boxes[i] + temp_boxes
        logging.info('forward done')
    for i, sample in enumerate(data_loader, 0):
        img, image_name, ori_h, ori_w = sample
        save_name = image_name[0].split('/')[-1].split('.')[0]
        temp_boxes = boxes[i]

        keep = ploy_nms(temp_boxes, 0.3)
        keep_box = []
        for j, item in enumerate(temp_boxes):
            if j in keep:
                keep_box.append(item)
        if args.vis == True:
            box_img = show_box(img, keep_box, ori_h, ori_w)
            box_img.save(box_dir + '/' + save_name + '.jpg')

        # format output
        if args.dataset == 'coco':
            save_name = save_name.strip().split('_')[-1]
            save_name = str(int(save_name))
        if args.dataset == 'mlt':
            save_name = save_name[3:]
        res_name = res_dir + '/' + 'res_' + save_name + '.txt'
        fp = open(res_name, 'w')
        for box in keep_box:
            temp_x = []
            temp_y = []
            temp = []
            for j in range(len(box) - 1):
                if j % 2 == 0:
                    temp_x.append(box[j])
                    temp.append(str(box[j]))
                else:
                    temp_y.append(box[j])
                    temp.append(str(box[j]))
            if args.dataset == 'ic13':
                fp.write(','.join([
                    str(min(temp_x)),
                    str(min(temp_y)),
                    str(max(temp_x)),
                    str(max(temp_y))
                ]) + '\n')
            elif args.dataset == 'coco':
                #fp.write(','.join([temp[0], temp[1], temp[4], temp[5], box[-1]]) + '\n')
                fp.write(','.join([
                    str(min(temp_x)),
                    str(min(temp_y)),
                    str(max(temp_x)),
                    str(max(temp_y)),
                    str(box[-1])
                ]) + '\n')
            elif args.dataset == 'mlt':
                fp.write(','.join(temp + [str(box[-1])]) + '\n')
            else:
                fp.write(','.join(temp) + '\n')
        fp.close()
    logging.info('evaluate done')
Example #10
0
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0 + args.resume_epoch
    print('Loading Dataset...')

    if args.dataset == 'Logo':
        dataset = LogoDetection(Logoroot, train_sets,
                                preproc(img_dim, rgb_means, p),
                                AnnotationTransform())
    elif args.dataset == 'COCO':
        dataset = COCODetection(COCOroot, train_sets,
                                preproc(img_dim, rgb_means, p))
    else:
        print('Only VOC and COCO are supported now!')
        return

    epoch_size = len(dataset) // args.batch_size
    max_iter = args.max_epoch * epoch_size

    stepvalues_Logo = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size)
    stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size)
    stepvalues = (stepvalues_Logo, stepvalues_COCO)[args.dataset == 'COCO']
    print('Training', args.version, 'on', dataset.name)
    step_index = 0

    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
    else:
        start_iter = 0

    lr = args.lr
    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            batch_iterator = iter(
                data.DataLoader(dataset,
                                batch_size,
                                shuffle=True,
                                num_workers=args.num_workers,
                                collate_fn=collate_minibatch))
            #batch_iterator = iter(data.DataLoader(dataset, batch_size,
            #                                      shuffle=True, num_workers=args.num_workers,collate_fn=collate_minibatch))
            loc_loss = 0
            conf_loss = 0
            if (epoch % 5 == 0 and epoch > 0) or (epoch % 5 == 0
                                                  and epoch > 200):
                torch.save(
                    net.state_dict(), args.save_folder + args.version + '_' +
                    args.dataset + '_epoches_' + repr(epoch) + '.pth')
            epoch += 1

        load_t0 = time.time()
        if iteration in stepvalues:
            step_index += 1
        lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index,
                                  iteration, epoch_size)

        # load train data
        samples = next(batch_iterator)
        # import pdb;pdb.set_trace()
        #from IPython import embed; embed()
        if args.cuda:
            # samples['image'] = Variable(samples['image'])

            for key in samples:
                if key != 'target':  # roidb is a list of ndarrays with inconsistent length
                    samples[key] = list(map(Variable, samples[key]))

            #targets = [Variable(anno.cuda()) for anno in targets]
        else:
            images = Variable(images)
            targets = [Variable(anno) for anno in targets]
        # forward
        t0 = time.time()
        #out = net(images,targets)
        #samples = {'images':images,'targets':targets}
        # backprop
        optimizer.zero_grad()

        return_dict = net(**samples)

        loss_l = return_dict['loss_l'].mean()
        loss_c = return_dict['loss_c'].mean()
        #loss_l, loss_c = criterion(out, priors, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.item()
        conf_loss += loss_c.item()
        load_t1 = time.time()
        if iteration % 10 == 0:
            print('Epoch:' + repr(epoch) + ' || epochiter: ' +
                  repr(iteration % epoch_size) + '/' + repr(epoch_size) +
                  '|| Totel iter ' + repr(iteration) +
                  ' || L: %.4f C: %.4f||' % (loss_l.item(), loss_c.item()) +
                  'Batch time: %.4f sec. ||' % (load_t1 - load_t0) +
                  'LR: %.8f' % (lr))

    torch.save(
        net.state_dict(), args.save_folder + 'Final_' + args.version + '_' +
        args.dataset + '.pth')
Example #11
0
        head = k[:7]
        if head == 'module.':
            name = k[7:]  # remove `module.`
        else:
            name = k
        new_state_dict[name] = v
    net.load_state_dict(new_state_dict)

    net.eval()
    print('Finished loading model!')
    # load data
    if args.dataset == 'VOC':
        dataset = VOCDetection(args.voc_root, [('0712', "2007_test")], None,
                               AnnotationTransform())
    elif args.dataset == 'COCO':
        dataset = COCODetection(COCOroot, [('2014', 'minival')], None,
                                COCOAnnotationTransform())
        #COCOroot, [('2015', 'test-dev')], None)

    if args.cuda:
        net = net.cuda()
        cudnn.benchmark = True
    # evaluation
    top_k = 200
    save_folder = os.path.join(args.save_folder, args.dataset)
    if args.version == "drf_refine_vgg":
        detector = Detect(num_classes, 0, cfg, use_arm=True)
    else:
        detector = Detect(num_classes, 0, cfg)
    test_net(save_folder,
             net,
             detector,
Example #12
0
weight_decay = 0.0005
gamma = 0.1
momentum = 0.9

dataset_name = args.dataset
if dataset_name[0] == "V":
    cfg = (VOC_300, VOC_512)[args.size == '512']
    train_dataset = VOCDetection(VOCroot, datasets_dict[dataset_name],
                                 SSDAugmentation(img_dim, bgr_means),
                                 AnnotationTransform(), dataset_name)
    # train_dataset = VOCDetection(VOCroot, datasets_dict[dataset_name],    preproc(img_dim, bgr_means, p), AnnotationTransform())
    test_dataset = VOCDetection(VOCroot, datasets_dict["VOC2007"], None,
                                AnnotationTransform(), dataset_name)
elif dataset_name[0] == "C":
    train_dataset = COCODetection(COCOroot, datasets_dict[dataset_name],
                                  SSDAugmentation(img_dim, bgr_means),
                                  COCOAnnotationTransform(), dataset_name)
    test_dataset = COCODetection(COCOroot, datasets_dict["COCOval"], None,
                                 COCOAnnotationTransform(), dataset_name)
    cfg = (COCO_300, COCO_512)[args.size == '512']
else:
    print('Unkown dataset!')

if args.version == "ssd_vgg":
    from models.ssd.vgg_net import build_ssd
    print("ssd vgg")
elif args.version == "ssd_res":
    from models.ssd.res_net import build_ssd
    print("ssd resnet")
elif args.version == "drf_ssd_vgg":
    from models.drfssd.vgg_drfnet import build_ssd
Example #13
0
if __name__ == '__main__':
    if args.detection:
        num_classes = 81  # +1 background
        prior = 'VOC_' + str(args.ssd_dim)
        if 'RefineDet' in args.backbone and args.ssd_dim == 512:
            prior += '_RefineDet'
        elif 'RFB' in args.backbone and args.ssd_dim == 300:
            prior += '_RFB'
        cfg = mb_cfg[prior]
        dataset_mean = (104, 117, 123)
        ssd_dim = args.ssd_dim
        dataset = COCODetection(COCOroot,
                                year=args.year,
                                image_sets=[
                                    args.set_file_name,
                                ],
                                transform=BaseTransform(ssd_dim, dataset_mean),
                                phase='test')

        if 'MobNet' in args.backbone:
            if args.deform:
                from model.dualrefinedet_mobilenet import build_net
                net = build_net('test',
                                size=ssd_dim,
                                num_classes=num_classes,
                                def_groups=args.deform,
                                multihead=args.multihead)
            else:
                from model.refinedet_mobilenet import build_net
                net = build_net('test',
Example #14
0
def train(model, resume=False):
    model.train()
    optimizer = build_optimizer(args, model)
    scheduler = build_lr_scheduler(args, optimizer)
    checkpointer = DetectionCheckpointer(
        model, args, optimizer=optimizer, scheduler=scheduler
    )
    criterion = MultiBoxLoss_combined(num_classes, overlap_threshold, True, 0, True, 3, 0.5, False)
    start_iter = (
        checkpointer.resume_or_load(args.basenet if args.phase == 1 else args.load_file,
                                    resume=resume).get("iteration", -1) + 1
    )
    max_iter = args.max_iter
    periodic_checkpointer = PeriodicCheckpointer(
        checkpointer, args.checkpoint_period, max_iter=max_iter
    )

    writers = (
        [
            CommonMetricPrinter(max_iter),
            TensorboardXWriter(args.save_folder),
        ]
    )

    if args.dataset == 'VOC':
        dataset = VOCDetection(args, VOCroot, train_sets, preproc(
            img_dim, rgb_means, p), AnnotationTransform(0 if args.setting == 'transfer' else args.split))
    elif args.dataset == 'COCO':
        dataset = COCODetection(COCOroot, train_sets, preproc(
            img_dim, rgb_means, p))
    else:
        raise ValueError(f"Unknown dataset: {args.dataset}")

    if args.phase == 2 and args.method == 'ours':
        sampler = TrainingSampler(len(dataset))
        data_loader = torch.utils.data.DataLoader(
            dataset,
            args.batch_size,
            sampler=sampler,
            num_workers=args.num_workers,
            collate_fn=detection_collate,
        )
        # initialize the OBJ(Target) parameters
        init_reweight(args, model, data_loader)
        dataset.set_mixup(np.random.beta, 1.5, 1.5)
        logger.info('Fine tuning on ' + str(args.shot) + '-shot task')

    sampler = TrainingSampler(len(dataset))
    data_loader = iter(torch.utils.data.DataLoader(
        dataset,
        args.batch_size,
        sampler=sampler,
        num_workers=args.num_workers,
        collate_fn=detection_collate,
    ))
    assert model.training, 'Model.train() must be True during training.'
    logger.info("Starting training from iteration {}".format(start_iter))

    # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=args.gamma, last_epoch=epoch - 1)

    with EventStorage(start_iter) as storage:
        for iteration in range(start_iter, max_iter):
            iteration = iteration + 1
            storage.step()
            if args.phase == 2 and args.method == 'ours' and \
                    iteration == (args.max_iter - args.no_mixup_iter):
                dataset.set_mixup(None)
                data_loader = iter(torch.utils.data.DataLoader(
                    dataset,
                    args.batch_size,
                    sampler=sampler,
                    num_workers=args.num_workers,
                    collate_fn=detection_collate,
                ))

            data, targets = next(data_loader)
            # storage.put_image('image', vis_tensorboard(data))
            output = model(data)
            loss_dict = criterion(output, priors, targets)
            losses = sum(loss for loss in loss_dict.values())
            # assert torch.isfinite(losses).all(), loss_dict
            storage.put_scalars(total_loss=losses, **loss_dict)

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
            if args.phase == 2 and args.method == 'ours':
                if isinstance(model, (DistributedDataParallel, DataParallel)):
                    model.module.normalize()
                else:
                    model.normalize()
            storage.put_scalar("lr", optimizer.param_groups[-1]["lr"], smoothing_hint=False)
            scheduler.step()

            if iteration - start_iter > 5 and (iteration % 20 == 0 or iteration == max_iter):
                for writer in writers:
                    writer.write()
            periodic_checkpointer.step(iteration)
Example #15
0
def main(args):

    create_time = time.strftime('%Y%m%d_%H%M', time.localtime(time.time()))
    save_folder_path = os.path.join(args.save_folder, create_time)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # dataset = customDetection(root = args.image_root,
    #                           json_path = args.annotation,
    #                           transform = BaseTransform(img_size = args.image_size),
    #                           target_transform = customAnnotationTransform())

    dataset = COCODetection(root=args.image_root,
                            annotation_json=args.annotation,
                            transform=BaseTransform(img_size=args.image_size),
                            target_transform=COCOAnnotationTransform)

    dataloader = DataLoader(dataset=dataset,
                            batch_size=4,
                            shuffle=True,
                            collate_fn=detection_collate)

    n_classes = dataset.get_class_number() + 1
    print("Detect class number: {}".format(n_classes))

    ## write category id to label name map
    dataset.get_class_map()

    model = mobilenetv3(n_classes=n_classes)
    ssd = ssd_mobilenetv3(model, n_classes)

    if args.pretrain_model_path:
        ssd.load_state_dict(torch.load(args.pretrain_model_path))

    # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
    biases = list()
    not_biases = list()
    for param_name, param in model.named_parameters():
        if param.requires_grad:
            if param_name.endswith('.bias'):
                biases.append(param)
            else:
                not_biases.append(param)

    optimizer = torch.optim.SGD(params=[{
        'params': biases,
        'lr': args.learning_rate
    }, {
        'params': not_biases
    }],
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    ssd = ssd.to(device)
    criterion = MultiBoxLossV3(ssd.priors_cxcy, args.threshold,
                               args.neg_pos_ratio).to(device)

    print(f"epochs: {args.epochs}")
    for param_group in optimizer.param_groups:
        optimizer.param_groups[1]['lr'] = args.learning_rate
    print(f"learning rate. The new LR is {optimizer.param_groups[1]['lr']}")

    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='min',
                                  factor=0.1,
                                  patience=15,
                                  verbose=True,
                                  threshold=0.00001,
                                  threshold_mode='rel',
                                  cooldown=0,
                                  min_lr=0,
                                  eps=1e-08)

    n_train = min(dataset.__len__(), 5000)
    global_step = 0
    writer = SummaryWriter()

    for epoch in range(args.epochs):
        mean_loss = 0
        inference_count = 0
        ssd.train()
        mean_count = 0
        with tqdm(total=n_train,
                  desc=f"{epoch + 1} / {args.epochs}",
                  unit='img') as pbar:
            for img, target in dataloader:
                img = img.to(device)
                # target = [anno.to(device) for anno in target]
                # print(target)
                # boxes = target[:, :-1]
                # labels = target[:, -1]

                boxes = [anno.to(device)[:, :-1] for anno in target]
                labels = [anno.to(device)[:, -1] for anno in target]

                prediction_location_loss, prediction_confidence_loss = ssd(img)
                loss = criterion(prediction_location_loss,
                                 prediction_confidence_loss, boxes, labels)
                pbar.set_postfix(**{"loss ": float(loss)})
                mean_loss += float(loss)
                mean_count += 1
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                pbar.update(img.shape[0])

        scheduler.step(mean_loss)
        writer.add_scalar('Train/Loss', float(mean_loss / mean_count),
                          global_step)
        global_step += 1

        if epoch % 10 == 0 or epoch == args.epochs - 1:
            save_model(save_folder_path, ssd, epoch)

    writer.close()
Example #16
0
def train():
    net.train()
    epoch = args.start_iter
    if args.dataset_name == 'COCO':
        dataset = COCODetection(COCOroot, year='trainval2014', image_sets=train_sets, transform=data_transform(ssd_dim, means), phase='train')
    else:
        dataset = VOCDetection(data_root, train_sets, data_transform(ssd_dim, means),
                               AnnotationTransform(dataset_name=args.dataset_name),
                               dataset_name=args.dataset_name, set_file_name=set_filename)
    epoch_size = len(dataset) // args.batch_size
    drop_step = [s * epoch_size for s in args.step_list]
    max_iter = max_epoch * epoch_size
    logging.info('Loading Dataset:' + args.dataset_name + ' dataset size: ' +str(len(dataset)))

    step_index = 0
    if args.visdom:
        # initialize visdom loss plot
        y_dim = 3
        legend = ['Loss', 'Loc Loss', 'Conf Loss',]
        if use_refine:
            y_dim += 1
            legend += ['Arm Loc Loss',]

        lot = viz.line(
            X=torch.zeros((1,)),
            Y=torch.zeros((1, y_dim)),
            opts=dict(
                xlabel='Iteration',
                ylabel='Loss',
                title=args.save_folder.split('/')[-1],
                legend=legend,
            )
        )
    batch_iterator = None
    data_loader = data.DataLoader(dataset, batch_size, num_workers=args.num_workers, shuffle=True,
                                  collate_fn=collate_fn,
                                  pin_memory=True)

    for iteration in range(epoch*epoch_size, max_iter + 10):
        if (not batch_iterator) or (iteration % epoch_size == 0):
            # create batch iterator
            batch_iterator = iter(data_loader)
            if epoch % args.save_interval == 0:
                logging.info('Saving state, epoch: '+ str(epoch))
                torch.save(ssd_net.state_dict(), os.path.join(args.save_folder, args.model_name + str(
                    ssd_dim) + '_' + args.dataset_name + '_' +repr(epoch) + '.pth'))
            epoch += 1

        t0 = time.time()
        if iteration in drop_step:
            step_index = drop_step.index(iteration) + 1
        adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size)
            # adjust_learning_rate(optimizer, args.gamma)

        collected_data = next(batch_iterator)
        with torch.no_grad():
            images, targets = collected_data[:2]
            images = images.to(device)
            targets = [anno.to(device) for anno in targets]

        # forward
        loss = torch.tensor(0., requires_grad=True).to(device)
        out = net(images)
        # backward
        optimizer.zero_grad()
        if use_refine:
            loss_arm_l = arm_criterion(out[0], priors, targets)
            loss_l, loss_c = criterion(out[2:], priors, targets, arm_data=out[:2])
            loss += args.loss_coe[0] * loss_arm_l

        else:
            loss_l, loss_c = criterion(out, priors, targets)
        loss += args.loss_coe[0] * loss_l + args.loss_coe[1] * loss_c

        loss.backward()
        optimizer.step()
        t1 = time.time()
        if iteration % 10 == 0:
            if use_refine:
                logging.info('Epoch:' + repr(epoch) + ', epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + ', total_iter ' + repr(
                    iteration) + ' || loss: %.4f, Loss_l: %.4f, loss_c: %.4f, loss_arm_l: %.4f, lr: %.5f || Timer: %.4f sec.' % (
                             loss, loss_l, loss_c,loss_arm_l, optimizer.param_groups[0]['lr'], t1 - t0))
            else:
                logging.info('Epoch:' + repr(epoch) + ', epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + ', total_iter ' + repr(
                    iteration) + ' || loss: %.4f, Loss_l: %.4f, loss_c: %.4f, lr: %.5f || Timer: %.4f sec.' % (loss, loss_l, loss_c, optimizer.param_groups[0]['lr'], t1 - t0))

        if args.visdom:
            y_dis = [loss.cpu(), args.loss_coe[0]*loss_l.cpu(), args.loss_coe[1]*loss_c.cpu()]
            if iteration == 1000:
                # initialize visdom loss plot
                lot = viz.line(
                    X=torch.zeros((1,)),
                    Y=torch.zeros((1, y_dim)),
                    opts=dict(
                        xlabel='Iteration',
                        ylabel='Loss',
                        title=args.save_folder.split('/')[-1],
                        legend=legend,
                    )
                )
            if use_refine:
                y_dis += [args.loss_coe[0]*loss_arm_l.cpu(),]
            # update = 'append' if iteration
            viz.line(
                X=torch.ones((1, y_dim)) * iteration,
                Y=torch.FloatTensor(y_dis).unsqueeze(0),
                win=lot,
                update='append',
                opts=dict(
                    xlabel='Iteration',
                    ylabel='Loss',
                    title=args.save_folder.split('/')[-1],
                    legend=legend,)
            )


    torch.save(ssd_net.state_dict(),
               os.path.join(args.save_folder, args.model_name + str(ssd_dim) + '_' + args.dataset_name + '_' +
                            repr(iteration) + '.pth'))
    print('Complet Training. Saving state, iter:', iteration)
Example #17
0
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0 + args.resume_epoch
    print('Loading Dataset...')
<<<<<<< HEAD
    VOCroot  = "/home/sqy/disk/ydata/Det_datasets/VOC_Tank"
    dataset = VOCDetection(VOCroot, train_sets, preproc(
        img_dim, rgb_means, p), AnnotationTransform())
=======

    if args.dataset == 'VOC':
        dataset = VOCDetection(VOCroot, train_sets, preproc(
            img_dim, rgb_means, p), AnnotationTransform())
    elif args.dataset == 'COCO':
        dataset = COCODetection(COCOroot, train_sets, preproc(
            img_dim, rgb_means, p))
    else:
        print('Only VOC and COCO are supported now!')
        return

>>>>>>> 6544e535e60c169d1904751184fb44cdf61ff894
    epoch_size = len(dataset) // args.batch_size
    max_iter = args.max_epoch * epoch_size

    stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size)
<<<<<<< HEAD
    stepvalues = (stepvalues_VOC)
=======
    stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size)
    stepvalues = (stepvalues_VOC,stepvalues_COCO)[args.dataset=='COCO']
>>>>>>> 6544e535e60c169d1904751184fb44cdf61ff894
Example #18
0
def load_data(args):
    if args.data_type.lower() == "regresion" or args.data_type.lower(
    ) == "recognation":
        if osp.exists(args.train_file) and osp.exists(args.test_file):
            train_dataset = ClassDataset(
                root=args.root,
                file_list=args.train_file,
                data_type=args.data_type.lower(),
                gray=args.gray,
                num_classes=args.num_classes,
                transform=ClassAugmentation(gray=args.gray,
                                            parse_type='train'),
            )
            val_dataset = ClassDataset(
                root=args.root,
                file_list=args.test_file,
                data_type=args.data_type.lower(),
                gray=args.gray,
                num_classes=args.num_classes,
                transform=ClassAugmentation(gray=args.gray, parse_type='val'),
            )
        else:
            train_dataset = datasets.ImageFolder(osp.join(args.root, 'train'),
                                                 transform=ClassAugmentation(
                                                     gray=args.gray,
                                                     parse_type='train'))

            val_dataset = datasets.ImageFolder(osp.join(args.root, 'val'),
                                               transform=ClassAugmentation(
                                                   gray=args.gray,
                                                   parse_type='val'))

        # drop_last = True/False 是否扔掉最后不足一个batch的数据,batch=100,最后剩36个数据,是否扔掉,看drop_last
        train_loader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  num_workers=args.workers,
                                  shuffle=True,
                                  pin_memory=True,
                                  drop_last=False)
        val_loader = DataLoader(val_dataset,
                                batch_size=args.val_batch_size,
                                num_workers=args.workers,
                                shuffle=False,
                                pin_memory=True)

    elif args.data_type.lower() == "detector":
        if args.dataset_type == 'COCO':
            train_dataset = COCODetection(root=args.dataset_root,
                                          transform=None,
                                          mosaic=False)
        if args.dataset_type == 'VOC':
            train_dataset = VOCDetection(root=args.dataset_root,
                                         transform=None,
                                         mosaic=False)
        train_loader = None
        val_loader = None

    else:
        raise Exception(f"This project not support {args.data_type} type!!!")

    return train_loader, val_loader
Example #19
0
         name = k[7:]  # remove `module.`
     else:
         name = k
     new_state_dict[name] = v
 net.load_state_dict(new_state_dict)
 net.eval()
 print('Finished loading model!')
 print(net)
 # load data
 if args.dataset == 'VOC':
     testset = VOCDetection(VOCroot, [('2007', 'test')], None,
                            AnnotationTransform())
 elif args.dataset == 'COCO':
     testset = COCODetection(
         #COCOroot, [('2014', 'minival')], None)
         COCOroot,
         [('2015', 'test-dev')],
         None)
 else:
     print('Only VOC and COCO dataset are supported now!')
 if args.cuda:
     net = net.cuda()
     cudnn.benchmark = True
 else:
     net = net.cpu()
 # evaluation
 #top_k = (300, 200)[args.dataset == 'COCO']
 top_k = 200
 detector = Detect(num_classes, 0, cfg)
 save_folder = os.path.join(args.save_folder, args.dataset)
 rgb_means = (104, 117, 123)
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0 + args.resume_epoch
    print('Loading Dataset...')

    if args.dataset == 'VOC':
        if args.alpha - 0.0 > 1e-5:
            dataset = VOCDetection(VOCroot,
                                   train_sets,
                                   preproc_mixup(img_dim, rgb_means, p),
                                   AnnotationTransform(),
                                   random_erasing=args.random_erasing,
                                   mixup_alpha=args.alpha)
        else:
            dataset = VOCDetection(VOCroot,
                                   train_sets,
                                   preproc(img_dim, rgb_means, p),
                                   AnnotationTransform(),
                                   random_erasing=args.random_erasing)
    elif args.dataset == 'COCO':
        dataset = COCODetection(COCOroot, train_sets,
                                preproc(img_dim, rgb_means, p))
    else:
        print('Only VOC and COCO are supported now!')
        return

    epoch_size = len(dataset) // args.batch_size
    max_iter = args.max_epoch * epoch_size

    stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size)
    stepvalues_COCO = (100 * epoch_size, 135 * epoch_size, 170 * epoch_size)
    stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO']
    print('Training', args.version, 'on', dataset.name)
    step_index = 0

    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
        for sv in stepvalues:
            if start_iter > sv:
                step_index += 1
                continue
            else:
                break
    else:
        start_iter = 0

    lr = args.lr
    avg_loss_list = []
    flag = True
    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            batch_iterator = iter(
                data.DataLoader(dataset,
                                batch_size,
                                shuffle=True,
                                num_workers=args.num_workers,
                                collate_fn=detection_collate))
            avg_loss = (loc_loss + conf_loss) / epoch_size
            avg_loss_list.append(avg_loss)
            print("avg_loss_list:")
            if len(avg_loss_list) <= 5:
                print(avg_loss_list)
            else:
                print(avg_loss_list[-5:])
            loc_loss = 0
            conf_loss = 0
            if (epoch <= 150 and epoch % 10 == 0) or (
                    150 < epoch < 200 and epoch % 5 == 0) or (epoch > 200):
                torch.save(
                    net.state_dict(), args.save_folder + args.version + '_' +
                    args.dataset + '_epoches_' + repr(epoch) + '.pth')
                if (epoch != args.resume_epoch):
                    #if(epoch):
                    ValNet = build_net(img_dim, num_classes, args.norm,
                                       args.vgg_bn)
                    val_state_dict = torch.load(args.save_folder +
                                                args.version + '_' +
                                                args.dataset + '_epoches_' +
                                                repr(epoch) + '.pth')
                    from collections import OrderedDict
                    new_state_dict = OrderedDict()
                    for k, v in val_state_dict.items():
                        head = k[:7]
                        if head == 'module.':
                            name = k[7:]
                        else:
                            name = k
                        new_state_dict[name] = v
                    ValNet.load_state_dict(new_state_dict)
                    ValNet.eval()
                    print('Finished loading ' + args.version + '_' +
                          args.dataset + '_epoches_' + repr(epoch) +
                          '.pth model!')
                    if args.dataset == 'VOC':
                        testset = VOCDetection(VOCroot, [('2007', 'test')],
                                               None, AnnotationTransform())
                    elif args.dataset == 'COCO':
                        testset = COCODetection(COCOroot,
                                                [('2014', 'minival')], None)
                    if args.cuda:
                        ValNet = ValNet.cuda()
                        cudnn.benchmark = True
                    else:
                        ValNet = ValNet.cpu()
                    top_k = 200
                    detector = Detect(num_classes, 0, cfg, GIOU=args.giou)
                    save_val_folder = os.path.join(args.save_val_folder,
                                                   args.dataset)
                    val_transform = BaseTransform(ValNet.size, rgb_means,
                                                  (2, 0, 1))
                    val_net(priors, save_val_folder, testset, num_classes,
                            ValNet, detector, val_transform, top_k, 0.01,
                            args.cuda, args.vgg_bn)
            epoch += 1

        load_t0 = time.time()
        if iteration in stepvalues:
            step_index += 1
        lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index,
                                  iteration, epoch_size)

        images, targets = next(batch_iterator)

        # no mixup
        if args.cuda:
            images = Variable(images.cuda())
            targets = [Variable(anno.cuda()) for anno in targets]
        else:
            images = Variable(images)
            targets = [Variable(anno) for anno in targets]

        # fh = net.base[22].register_forward_hook(get_features_hook)
        # bh = net.base[22].register_backward_hook(get_grads_hook)
        out = net(images, vgg_bn=args.vgg_bn)
        optimizer.zero_grad()
        loss_l, loss_c, = criterion(out, priors, targets)
        loss = loss_l + loss_c
        loss.backward()
        # fh.remove()
        # bh.remove()

        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.item()
        conf_loss += loss_c.item()
        load_t1 = time.time()
        if iteration % 10 == 0:
            print(
                'Epoch:' + repr(epoch) + ' || epochiter: ' +
                repr(iteration % epoch_size) + '/' + repr(epoch_size) +
                '|| Totel iter ' + repr(iteration) +
                ' || L: %.4f C: %.4f S: %.4f||' %
                (loss_l.item(), loss_c.item(), loss_l.item() + loss_c.item()) +
                'Batch time: %.4f ||' % (load_t1 - load_t0) + 'LR: %.7f' %
                (lr))

    torch.save(
        net.state_dict(), args.save_folder + 'Final_' + args.version + '_' +
        args.dataset + '.pth')
Example #21
0
def train():
    if args.dataset == 'COCO':
        if args.dataset_root == VOC_ROOT:
            if not os.path.exists(COCO_ROOT):
                parser.error('Must specify dataset_root if specifying dataset')
            print("WARNING: Using default COCO dataset_root because " +
                  "--dataset_root was not specified.")
            args.dataset_root = COCO_ROOT
        cfg = coco
        dataset = COCODetection(root=args.dataset_root,
                                transform=SSDAugmentation(cfg['min_dim'],
                                                          MEANS))
    elif args.dataset == 'VOC':
        if args.dataset_root == COCO_ROOT:
            parser.error('Must specify dataset if specifying dataset_root')
        cfg = voc
        dataset = VOCDetection(root=args.dataset_root,
                               transform=SSDAugmentation(cfg['min_dim'],
                                                         MEANS))
    print(VOC_ROOT)
    print(COCO_ROOT)
    viz =None

    if args.visdom:
        import visdom
        viz = visdom.Visdom()

    ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'])
    net = ssd_net

    if args.cuda:
        net = torch.nn.DataParallel(ssd_net)
        cudnn.benchmark = True

    if args.resume:
        print('Resuming training, loading {}...'.format(args.resume))
        ssd_net.load_weights(args.resume)
    else:
        vgg_weights = torch.load(args.save_folder + args.basenet)
        print('Loading base network...')
        ssd_net.vgg.load_state_dict(vgg_weights)

    if args.cuda:
        net = net.cuda()

    if not args.resume:
        print('Initializing weights...')
        # initialize newly added layers' weights with xavier method
        ssd_net.extras.apply(weights_init)
        ssd_net.loc.apply(weights_init)
        ssd_net.conf.apply(weights_init)

    optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum,
                          weight_decay=args.weight_decay)
    criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
                             False, args.cuda)

    net.train()
    # loss counters
    loc_loss = 0
    conf_loss = 0
    epoch = 0
    print('Loading the dataset...')

    epoch_size = len(dataset) // args.batch_size
    print('Training SSD on:', dataset.name)
    print('Using the specified args:')
    print(args)

    step_index = 0

    if args.visdom:
        vis_title = 'SSD.PyTorch on ' + dataset.name
        vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss']
        iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend)
        epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend)

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)
    # create batch iterator
    batch_iterator = iter(data_loader)
    for iteration in range(args.start_iter, cfg['max_iter']):
        if args.visdom and iteration != 0 and (iteration % epoch_size == 0):
            update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None,
                            'append', epoch_size)
            # reset epoch loss counters
            loc_loss = 0
            conf_loss = 0
            epoch += 1

        if iteration in cfg['lr_steps']:
            step_index += 1
            adjust_learning_rate(optimizer, args.gamma, step_index)

        # load train data
        try:
            images, targets = next(batch_iterator)
        except StopIteration:
            batch_iterator = iter(data_loader)
            images,targets = next(batch_iterator)

        if args.cuda:
            images = images.cuda() # Variable(images.cuda())
            targets = [ann.cuda() for ann in targets] # [Variable(ann.cuda(), volatile=True) for ann in targets]
        else:
            images = images # Variable(images)
            targets = [ann for ann in targets] # [Variable(ann, volatile=True) for ann in targets]
        # forward
        t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()
        loss_l += loss_l.item()#data[0]
        loss_c += loss_c.item()#data[0]

        if iteration % 10 == 0:
            print('timer: %.4f sec.' % (t1 - t0))
            print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ')

        if args.visdom:
            update_vis_plot(iteration, loss_l.item(), loss_c.item(),
                            iter_plot, epoch_plot, 'append')

        if iteration != 0 and iteration % 5000 == 0:
            print('Saving state, iter:', iteration)
            torch.save(ssd_net.state_dict(), 'weights/ssd300_COCO_' +
                       repr(iteration) + '.pth')
    torch.save(ssd_net.state_dict(),
               args.save_folder + '' + args.dataset + '.pth')
Example #22
0
if module_path not in sys.path:
    sys.path.append(module_path)
from utils.utils import color_list, vis_detections
import torch
from torch.autograd import Variable
import numpy as np
import cv2
from data import COCODetection, COCO_ROOT, COCOAnnotationTransform
from models.refinedetlite import build_refinedet
from data import COCO_CLASSES as labels

if torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
net = build_refinedet('test', 320, 81)  # initialize SSD
net.load_weights('../weights/RefineDetLiteCOCO/RefineDet320_COCO_138000.pth')
testset = COCODetection(COCO_ROOT, "val2017", None, COCOAnnotationTransform())
img_id = 121
image = testset.pull_image(img_id)

x = cv2.resize(image, (320, 320)).astype(np.float32)
x -= (104.0, 117.0, 123.0)
x = x.astype(np.float32)
x = x[:, :, ::-1].copy()

x = torch.from_numpy(x).permute(2, 0, 1)

xx = Variable(x.unsqueeze(0))  # wrap tensor in Variable

if torch.cuda.is_available():
    xx = xx.cuda()
Example #23
0
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0 + args.resume_epoch
    print('Loading Dataset...')

    if args.dataset == 'VOC':
        dataset = VOCDetection(VOCroot, train_sets,
                               preproc(img_dim, rgb_means, p),
                               AnnotationTransform())
    elif args.dataset == 'COCO':
        dataset = COCODetection(COCOroot, train_sets,
                                preproc(img_dim, rgb_means, p))
    else:
        print('Only VOC and COCO are supported now!')
        return

    epoch_size = len(dataset) // args.batch_size
    max_iter = args.max_epoch * epoch_size

    stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size)
    stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size)
    stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO']
    print('Training', args.version, 'on', dataset.name)
    step_index = 0

    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
    else:
        start_iter = 0

    lr = args.lr
    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            batch_iterator = iter(
                data.DataLoader(dataset,
                                batch_size,
                                shuffle=True,
                                num_workers=args.num_workers,
                                collate_fn=detection_collate))
            loc_loss = 0
            conf_loss = 0
            if (epoch % 2 == 0 and epoch > 0) or (epoch % 5 == 0
                                                  and epoch > 200):
                torch.save(
                    net.state_dict(), args.save_folder + args.version + '_' +
                    args.dataset + '_epoches_' + repr(epoch) + '.pth')
            epoch += 1

        load_t0 = time.time()
        if iteration in stepvalues:
            step_index += 1
        lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index,
                                  iteration, epoch_size)

        images, targets = next(batch_iterator)

        #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets]))

        if args.cuda:
            images = Variable(images.cuda())
            targets = [Variable(anno.cuda()) for anno in targets]
        else:
            images = Variable(images)
            targets = [Variable(anno) for anno in targets]
        # forward
        t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, priors, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.item()
        conf_loss += loss_c.item()
        load_t1 = time.time()
        if iteration % 10 == 0:
            print('Epoch:' + repr(epoch) + ' || epochiter: ' +
                  repr(iteration % epoch_size) + '/' + repr(epoch_size) +
                  '|| Totel iter ' + repr(iteration) +
                  ' || L: %.4f C: %.4f||' % (loss_l.item(), loss_c.item()) +
                  'Batch time: %.4f sec. ||' % (load_t1 - load_t0) +
                  'LR: %.8f' % (lr))

    torch.save(
        net.state_dict(), args.save_folder + 'Final_' + args.version + '_' +
        args.dataset + '.pth')
Example #24
0
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0 + args.resume_epoch  # finetune方式地训练
    print('Loading Dataset...')
    # 加载训练、验证集,preproc类可以参照data_augment.py函数,与SSD数据增强方式一致
    if args.dataset == 'VOC':
        dataset = VOCDetection(VOCroot, train_sets,
                               preproc(img_dim, rgb_means, p),
                               AnnotationTransform())
    elif args.dataset == 'COCO':
        dataset = COCODetection(COCOroot, train_sets,
                                preproc(img_dim, rgb_means, p))
    else:
        print('Only VOC and COCO are supported now!')
        return

    epoch_size = len(dataset) // args.batch_size  # 每个epoch内需要处理的iter次数
    max_iter = args.max_epoch * epoch_size  # 总iter次数,max_epoch*epoch_size
    # learning rate调整的节点
    stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size)
    stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size)
    stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO']
    print('Training', args.version, 'on', dataset.name)
    step_index = 0
    # 是否需要finetune
    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
    else:
        start_iter = 0

    lr = args.lr
    for iteration in range(start_iter,
                           max_iter):  # 共需迭代的次数,是否finetune间有差异,同时也对应到了epoch次数
        if iteration % epoch_size == 0:
            # create batch iterator 新一轮epoch加载数据,把全部数据又重新加载了,下面的next(batch_iterator)再逐batch_size地取数据
            batch_iterator = iter(
                data.DataLoader(dataset,
                                batch_size,
                                shuffle=True,
                                num_workers=args.num_workers,
                                collate_fn=detection_collate))
            # detection_collate逐batch_size地取出图像 + 标签
            loc_loss = 0
            conf_loss = 0
            if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0
                                                   and epoch > 200):
                torch.save(net.state_dict(), args.save_folder + args.version +
                           '_' + args.dataset + '_epoches_' + repr(epoch) +
                           '.pth')  # 模型保存
            epoch += 1

        load_t0 = time.time()

        # 以下操作就是针对lr的调整,warming up操作
        if iteration in stepvalues:
            step_index += 1
        lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index,
                                  iteration, epoch_size)

        # load train data,batch_iterator一次性加载了数据,next操作就逐个batch_size地取出数据了
        images, targets = next(batch_iterator)  # 可以对应到detection_collate函数

        #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets]))
        # 对应cuda操作
        if args.cuda:
            images = Variable(images.cuda())
            targets = [Variable(anno.cuda()) for anno in targets]
        else:
            images = Variable(images)
            targets = [Variable(anno) for anno in targets]
        # forward
        t0 = time.time()
        out = net(images)  # batch_size图像批操作,直接forward得到结果
        # backprop
        optimizer.zero_grad(
        )  # Clears the gradients of all optimized,本batch_size内来一波
        loss_l, loss_c = criterion(
            out, priors, targets)  # 对应到MultiBoxLoss,可以参照multibox_loss.py
        loss = loss_l + loss_c  # 这里设置的loc loss、cls loss权重系数为1:1
        loss.backward()  # loss bp反向传播
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.item()  # 累加batch_size内的loss
        conf_loss += loss_c.item()
        load_t1 = time.time()
        if iteration % 10 == 0:
            print('Epoch:' + repr(epoch) + ' || epochiter: ' +
                  repr(iteration % epoch_size) + '/' + repr(epoch_size) +
                  '|| Totel iter ' + repr(iteration) +
                  ' || L: %.4f C: %.4f||' % (loss_l.item(), loss_c.item()) +
                  'Batch time: %.4f sec. ||' % (load_t1 - load_t0) +
                  'LR: %.8f' % (lr))
    # 最终保存的模型
    torch.save(
        net.state_dict(), args.save_folder + 'Final_' + args.version + '_' +
        args.dataset + '.pth')
Example #25
0
    return ( testset.evaluate_detections(all_boxes, save_folder),total_detect_time,total_nms_time,4951/(total_nms_time+total_detect_time),4951/(total_detect_time) )



if __name__ == '__main__':
    # load net
    #torch.cuda.set_device(args.device)
    img_dim = (300,512)[args.size=='512']
    num_classes = (21, 81)[args.dataset == 'COCO']
    net = build_ssd('test', img_dim, num_classes)    # initialize detector

    if args.dataset == 'VOC':
        testset = VOCDetection(
            VOCroot, [('2007', 'test')], None, AnnotationTransform())
    elif args.dataset == 'COCO':
        testset = COCODetection(
            COCOroot, [('2014', 'minival')], None)
            #COCOroot, [('2015', 'test-dev')], None)
    else:
        print('Only VOC and COCO dataset are supported now!')

    top_k = 200
    detector = Detect(num_classes,0,cfg)
    save_folder = os.path.join(args.save_folder,args.dataset)
    rgb_means = ((104, 117, 123),(103.94,116.78,123.68))[args.version == 'RFB_mobile']

    start_iter = 100000
    end_iter = 154000
    step = 2000
    best_ap = 0 
    best_iter = 100000
    output_file = open('detect_summ.txt','w')
def train(args):
    create_time = time.strftime('%Y%m%d_%H%M', time.localtime(time.time()))
    save_folder_path = os.path.join(args.save_folder, create_time)

    # n_classes = [20, 80][args.dataset == 'COCO']
    # n_classes = 91

    if not ((args.train_image_folder and args.val_image_folder)
            or args.annotation):
        print("train/val image folder and annotation should not be None")
        return

    train_dataset = COCODetection(
        root=args.root,
        image_set=args.train_image_folder,
        annotation_json=args.annotation,
        transform=SSDAugmentation(img_size=args.image_size),
        # transform = BaseTransform(img_size = args.image_size),
        target_transform=COCOAnnotationTransform())

    train_dataloader = DataLoader(dataset=train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  collate_fn=detection_collate)

    val_dataset = COCODetection(
        root=args.root,
        image_set=args.val_image_folder,
        annotation_json=args.annotation,
        transform=BaseTransform(img_size=args.image_size),
        target_transform=COCOAnnotationTransform())

    n_classes = train_dataset.get_class_size() + 1

    if args.class_map_path:
        train_dataset.get_class_map(args.class_map_path)

    if args.model == "mobilenetv2":
        model = MobileNetv2(
            n_classes=n_classes,
            width_mult=args.width_mult,
            round_nearest=8,
            dropout_ratio=args.dropout_ratio,
            use_batch_norm=True,
        )

        ssd = create_mobilenetv2_ssd_lite(model,
                                          n_classes,
                                          width_mult=args.width_mult,
                                          use_batch_norm=True)

    elif args.model == "mobilenetv3":
        model = MobileNetv3(model_mode=args.model_mode,
                            n_classes=n_classes,
                            width_mult=args.width_mult,
                            dropout_ratio=args.dropout_ratio)

        ssd = create_mobilenetv3_ssd_lite(model,
                                          n_classes,
                                          model_mode=args.model_mode)

    else:
        print("model structure only accept mobilenetv2 or mobilenetv3")
        return
    print("builded ssd module")

    if GPU:
        import torch.backends.cudnn as cudnn
        model.cuda()
        ssd.cuda()
        cudnn.benchmark = True

    if args.pretrain_model:
        ssd.load_state_dict(
            torch.load(args.pretrain_model, map_location=torch.device('cpu')))

    elif args.pretrain_tfmodel and args.pretrain_tfmodel_weight_list:
        ssd_state_dict = ssd.state_dict()
        tf_weights_dict = load_tf_weights(args, ssd_state_dict)
        ssd.load_state_dict(tf_weights_dict)

    optimizer = optim.Adam(ssd.parameters(),
                           lr=args.learning_rate,
                           weight_decay=args.weight_decay)

    criterion = MultiBoxLoss(n_classes,
                             overlap_thresh=args.overlap_threshold,
                             prior_for_matching=True,
                             bkg_label=0,
                             neg_mining=True,
                             neg_pos=args.neg_pos_ratio,
                             neg_overlap=0.5,
                             encode_target=False)
    with torch.no_grad():

        if args.model == "mobilenetv2":
            prior_box = PriorBox(MOBILEV2_300)

        elif args.model == "mobilenetv3":
            prior_box = PriorBox(MOBILEV3_300)

        priors = Variable(prior_box.forward())
        print("created default bbox")

    n_train = min(train_dataset.__len__(), 5000)
    n_val = min(val_dataset.__len__(), 1000)
    global_step = 0
    val_global_step = 0
    writer = SummaryWriter(log_dir=args.summary_path)
    for epoch in range(args.epochs):
        mean_loss_conf = 0
        mean_loss_loc = 0
        inference_count = 0

        ssd.train()
        with tqdm(total=n_train,
                  desc=f"{epoch + 1} / {args.epochs}",
                  unit='img') as pbar:
            for img, target in train_dataloader:

                if GPU:
                    img = Variable(img.cuda())
                    target = [Variable(anno.cuda()) for anno in target]
                else:
                    img = Variable(img)
                    target = [Variable(anno) for anno in target]

                optimizer.zero_grad()

                inference = ssd(img)

                loss_loc, loss_conf = criterion(inference, priors, target)
                writer.add_scalar('Train/location_loss', float(loss_loc),
                                  global_step)
                writer.add_scalar('Train/confidence_loss', float(loss_conf),
                                  global_step)

                pbar.set_postfix(
                    **{
                        "location loss": float(loss_loc),
                        "confidence loss": float(loss_conf)
                    })

                mean_loss_loc += float(loss_loc)
                mean_loss_conf += float(loss_conf)

                total_loss = loss_loc + loss_conf
                total_loss.backward()

                # # clip gradient
                # # clip_grad_norm_(net.parameters(), 0.1)

                optimizer.step()
                pbar.update(img.shape[0])
                global_step += 1
                inference_count += img.shape[0]

                if inference_count > n_train: break
            pbar.set_postfix(
                **{
                    "location loss": float(mean_loss_loc / n_train),
                    "confidence loss": float(mean_loss_conf / n_train)
                })

        ssd.eval()
        val_mean_loss_loc = 0
        val_mean_loss_conf = 0
        with tqdm(total=n_val, desc="Validation", unit="img") as vpbar:
            for i in range(n_val):
                img = val_dataset.get_image(i)
                img = cv2.resize(img, (args.image_size, args.image_size))
                height, width, _ = img.shape
                target = val_dataset.get_annotation(i, width, height)

                if GPU:
                    img = torch.from_numpy(
                        np.expand_dims(img.transpose(2, 0, 1),
                                       0)).to(dtype=torch.float32).cuda()
                    target = torch.FloatTensor(target).unsqueeze(0).cuda()
                else:
                    img = torch.from_numpy(
                        np.expand_dims(img.transpose(2, 0, 1),
                                       0)).to(dtype=torch.float32)
                    target = torch.FloatTensor(target).unsqueeze(0)

                inference = ssd(img)
                loss_loc, loss_conf = criterion(inference, priors, target)

                val_mean_loss_loc += float(loss_loc)
                val_mean_loss_conf += float(loss_conf)
                vpbar.set_postfix(
                    **{
                        'location loss': float(loss_loc),
                        'confidnece loss': float(loss_conf)
                    })
                vpbar.update(1)

            vpbar.set_postfix(
                **{
                    'location loss': float(val_mean_loss_loc / n_val),
                    'confidnece loss': float(val_mean_loss_conf / n_val)
                })
            writer.add_scalar('Test/location_loss',
                              float(val_mean_loss_loc / n_val),
                              val_global_step)
            writer.add_scalar('Test/confidence_loss',
                              float(val_mean_loss_conf / n_val),
                              val_global_step)
        val_global_step += 1

        if epoch % 10 == 0 or epoch == args.epochs - 1:
            save_model(save_folder_path, ssd, epoch)
    writer.close()
Example #27
0
                anno["bbox"] = list(box)
                anno["score"] = scores
                output.append(anno)
            

        #print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1,
        #                                            num_images, detect_time))

    print('time: ', sum_time/num_images, 'fps: ', sum_fps/num_images)
    print('sum: ', net.detect.count, 'mean: ', net.detect.count/num_images)
    
    print('writing detections')
    output_path = os.path.join(args.root_path, 'result/result.json')
    with open(output_path, 'w') as f:
        json.dump(output, f)
    

if __name__ == '__main__':
    # load net
    net = build_ssd('test', ssd_dim, num_classes)    # initialize SSD
    net.load_state_dict(torch.load(args.trained_model))
    net.eval()
    print('Finished loading model!')
    # load data
    dataset = COCODetection(args.root_path, img_sets, BaseTransform(ssd_dim, dataset_mean), target_transform=False)
    if args.cuda:
        net = net.cuda()
        cudnn.benchmark = True
    # evaluation
    test_net(net, args.cuda, dataset)
Example #28
0
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
    print('Evaluating detections')
    # with open(det_file, 'rb') as f:
    #     all_boxes = pickle.load(f)
    # print('LOADED')
    dataset.evaluate_detections(all_boxes, save_folder)

if __name__ == '__main__':
    # load net
    num_classes = len(labelmap) + 1                      # +1 for background
    net = build_ssd('test', cfg, args.use_pred_module)            # initialize SSD
    net.load_state_dict(torch.load(args.trained_model))
    net.eval()
    print('Finished loading model!')
    print(net)

    # load data
    dataset = COCODetection(args.dataset_root,
                            image_set='minival2014',
                            transform=BaseTransform(cfg['min_dim'], MEANS),
                            target_transform=COCOAnnotationTransform())
    if args.cuda:
        net = net.cuda()
        cudnn.benchmark = True

    # evaluation
    test_net(args.save_folder, net, args.cuda, dataset,
             BaseTransform(net.size, MEANS), args.top_k, 512,
             thresh=args.confidence_threshold)
Example #29
0
        if args.cuda:
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')

        if args.resume and not args.display:
            with open(args.ap_data_file, 'rb') as f:
                ap_data = pickle.load(f)
            calc_map(ap_data)
            exit()

        if args.image is None and args.video is None and args.images is None:
            dataset = COCODetection(cfg.dataset.valid_images,
                                    cfg.dataset.valid_info,
                                    transform=BaseTransform(),
                                    has_gt=cfg.dataset.has_gt)
            prep_coco_cats()
        else:
            dataset = None

        print('Loading model...', end='')
        net = Yolact()
        net.load_weights(args.trained_model)
        net.eval()
        print(' Done.')

        if args.cuda:
            net = net.cuda()

        evaluate(net, dataset)
Example #30
0
    # target_size = 1024
    cfg = coco_refinedet[args.input_size]
    target_size = cfg['min_dim']
    num_classes = cfg['num_classes']
    objectness_threshold = 0.01
    args.nms_threshold = 0.49  # nms
    # args.nms_threshold = 0.45  # softnms
    args.confidence_threshold = 0.01
    args.top_k = 1000
    args.keep_top_k = 500
    args.vis_thres = 0.3
    # args.multi_scale_test = True

    # load data
    dataset = COCODetection(COCOroot, ['val2017'], None, dataset_name='coco2017')
    # dataset = COCODetection(COCOroot, ['test2017'], None, dataset_name='coco2017')

    # load net
    torch.set_grad_enabled(False)
    load_to_cpu = not args.cuda
    cudnn.benchmark = True
    device = torch.device('cuda' if args.cuda else 'cpu')
    detect = Detect_RefineDet(num_classes, int(args.input_size), 0, objectness_threshold, confidence_threshold=args.confidence_threshold, nms_threshold=args.nms_threshold, top_k=args.top_k, keep_top_k=args.keep_top_k)
    net = build_refinedet('test', int(args.input_size), num_classes, backbone_dict) 

    # test multi models, to filter out the best model.
    # start_epoch = 10; step = 10
    start_epoch = 200; step = 5
    ToBeTested = []
    ToBeTested = [prefix + f'/RefineDet{args.input_size}_COCO_epoches_{epoch}.pth' for epoch in range(start_epoch, 300, step)]