Beispiel #1
0
def train():
    dataset = VOCDetection(root=args.dataset_root,
                           transform=SSDAugmentation(512, MEANS))
    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=0,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=False)
    model = EfficientDet(num_classes=21)

    model = model.cuda()

    optimizer = optim.AdamW(model.parameters(), lr=args.lr)
    criterion = FocalLoss()

    model.train()
    iteration = 0

    for epoch in range(args.num_epoch):
        print('Start epoch: {} ...'.format(epoch))
        total_loss = []
        for idx, sample in enumerate(data_loader):
            images = sample['img'].cuda()
            classification, regression, anchors = model(images)
            classification_loss, regression_loss = criterion(
                classification, regression, anchors, sample['annot'])
            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()

            loss = classification_loss + regression_loss
            if bool(loss == 0):
                continue
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
            optimizer.step()
            total_loss.append(loss.item())

            if (iteration % 100 == 0):
                print(
                    'Epoch/Iteration: {}/{}, classification: {}, regression: {}, totol_loss: {}'
                    .format(epoch, iteration, classification_loss.item(),
                            regression_loss.item(), np.mean(total_loss)))
            iteration += 1
        torch.save(model.state_dict(),
                   './weights/checkpoint_{}.pth'.format(epoch))
Beispiel #2
0
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu
    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            # args.rank = int(os.environ["RANK"])
            args.rank = 1
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    checkpoint = []
    if (args.resume is not None):
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
        params = checkpoint['parser']
        args.num_class = params.num_class
        args.network = params.network
        args.start_epoch = params.start_epoch + 1
        del params

    model = EfficientDet(num_classes=args.num_class,
                         network=args.network,
                         W_bifpn=EFFICIENTDET[args.network]['W_bifpn'],
                         D_bifpn=EFFICIENTDET[args.network]['D_bifpn'],
                         D_class=EFFICIENTDET[args.network]['D_class'],
                         gpu=args.gpu)
    if (args.resume is not None):
        model.load_state_dict(checkpoint['state_dict'])
    del checkpoint
    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu], find_unused_parameters=True)
            print('Run with DistributedDataParallel with divice_ids....')
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
            print('Run with DistributedDataParallel without device_ids....')
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        print('Run with DataParallel ....')
        model = torch.nn.DataParallel(model).cuda()

    # Training dataset
    train_dataset = []
    if (args.dataset == 'VOC'):
        #         train_dataset = VOCDetection(root=args.dataset_root,
        #                                      transform=get_augumentation(phase='train', width=EFFICIENTDET[args.network]['input_size'], height=EFFICIENTDET[args.network]['input_size']))
        train_dataset = VOCDetection(root=args.dataset_root,
                                     transform=transforms.Compose([
                                         Normalizer(),
                                         Augmenter(),
                                         Resizer()
                                     ]))

    elif (args.dataset == 'COCO'):
        train_dataset = CocoDataset(
            root_dir=args.dataset_root,
            set_name='train2017',
            transform=get_augumentation(
                phase='train',
                width=EFFICIENTDET[args.network]['input_size'],
                height=EFFICIENTDET[args.network]['input_size']))


#     train_loader = DataLoader(train_dataset,
#                                   batch_size=args.batch_size,
#                                   num_workers=args.workers,
#                                   shuffle=True,
#                                   collate_fn=detection_collate,
#                                   pin_memory=True)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              num_workers=args.workers,
                              shuffle=True,
                              collate_fn=collater,
                              pin_memory=True)
    # define loss function (criterion) , optimizer, scheduler
    optimizer = optim.AdamW(model.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)
    cudnn.benchmark = True

    for epoch in range(args.start_epoch, args.num_epoch):
        train(train_loader, model, scheduler, optimizer, epoch, args)
        state = {
            'epoch': epoch,
            'parser': args,
            'state_dict': get_state_dict(model)
        }
        torch.save(
            state,
            './weights/checkpoint_{}_{}_{}.pth'.format(args.dataset,
                                                       args.network, epoch))
Beispiel #3
0
                        help='Checkpoint state_dict file to resume training from')
    args = parser.parse_args()

    if(args.weight is not None):
        resume_path = str(args.weight)
        print("Loading checkpoint: {} ...".format(resume_path))
        checkpoint = torch.load(
            args.weight, map_location=lambda storage, loc: storage)
        params = checkpoint['parser']
        args.num_class = params.num_class
        args.network = params.network
        model = EfficientDet(
            num_classes=args.num_class,
            network=args.network,
            W_bifpn=EFFICIENTDET[args.network]['W_bifpn'],
            D_bifpn=EFFICIENTDET[args.network]['D_bifpn'],
            D_class=EFFICIENTDET[args.network]['D_class'],
            is_training=False,
            threshold=args.threshold,
            iou_threshold=args.iou_threshold)
        model.load_state_dict(checkpoint['state_dict'])
    model = model.cuda()
    if(args.dataset == 'VOC'):
        valid_dataset = VOCDetection(root=args.dataset_root, image_sets=[('2007', 'test')],
                                     transform=transforms.Compose([Normalizer(), Resizer()]))
        evaluate(valid_dataset, model)
    else:
        valid_dataset = CocoDataset(root_dir=args.dataset_root, set_name='val2017',
                                    transform=transforms.Compose([Normalizer(), Resizer()]))
        evaluate_coco(valid_dataset, model)
Beispiel #4
0
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu
    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            # args.rank = int(os.environ["RANK"])
            args.rank = 1
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(
            backend=args.dist_backend,
            init_method=args.dist_url,
            world_size=args.world_size,
            rank=args.rank)

    # args.num_class = train_dataset.num_classes()

    print('dataset:', args.dataset)
    print('network:', args.network)
    print('num_epoch:', args.num_epoch)
    print('batch_size:', args.batch_size)
    print('lr_choice:', args.lr_choice)
    print('lr:', args.lr)
    print('lr_fn:', args.lr_fn)
    print('image_size:', args.image_size)
    print('workers:', args.workers)
    print('num_class:', args.num_class)
    print('save_folder:', args.save_folder)
    print('limit:', args.limit)

    if args.dataset == 'h5':
        train_dataset = H5CoCoDataset('{}/train_small.hdf5'.format(args.dataset_root), 'train_small')
        valid_dataset = H5CoCoDataset('{}/test.hdf5'.format(args.dataset_root), 'test')
    else:
        train_dataset = CocoDataset(args.dataset_root, set_name='train_small',
                                # transform=transforms.Compose([Normalizer(), Augmenter(), Resizer(args.image_size)]),
                                transform=get_augumentation('train'),
                                limit_len=args.limit[0])
        valid_dataset = CocoDataset(args.dataset_root, set_name='test',
                              # transform=transforms.Compose([Normalizer(), Resizer(args.image_size)]),
                              transform=get_augumentation('test'),
                              limit_len=args.limit[1])

    print('train_dataset:', len(train_dataset))
    print('valid_dataset:', len(valid_dataset))

    steps_pre_epoch = len(train_dataset) // args.batch_size
    print('steps_pre_epoch:', steps_pre_epoch)

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              num_workers=args.workers,
                              shuffle=True,
                              collate_fn=detection_collate,
                              pin_memory=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=1,
                              num_workers=args.workers,
                              shuffle=False,
                              collate_fn=detection_collate,
                              pin_memory=True)

    checkpoint = []
    if(args.resume is not None):
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
        params = checkpoint['parser']
        args.num_class = params.num_class
        args.network = params.network
        args.start_epoch = checkpoint['epoch'] + 1
        del params

    model = EfficientDet(num_classes=args.num_class,
                         network=args.network,
                         W_bifpn=EFFICIENTDET[args.network]['W_bifpn'],
                         D_bifpn=EFFICIENTDET[args.network]['D_bifpn'],
                         D_class=EFFICIENTDET[args.network]['D_class']
                         )
    
    if(args.resume is not None):
        model.load_state_dict(checkpoint['state_dict'])
    
    del checkpoint

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True)
            print('Run with DistributedDataParallel with divice_ids....')
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
            print('Run with DistributedDataParallel without device_ids....')
    elif args.gpu is not None:
        # print('using gpu:', args.gpu)
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        model = model.cpu()
        # print('Run with DataParallel ....')
        model = torch.nn.DataParallel(model).cuda()

    if args.lr_choice == 'lr_fn':
        lr_now = float(args.lr_fn['LR_START'])
    elif args.lr_choice == 'lr_scheduler':
        lr_now = args.lr

    optimizer = optim.Adam(model.parameters(), lr=lr_now)
    # optimizer = optim.AdamW(model.parameters(), lr=args.lr)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=0.1, verbose=True)
    cudnn.benchmark = True
    
    iteration_loss_path = 'iteration_loss.csv'
    if os.path.isfile(iteration_loss_path):
        os.remove(iteration_loss_path)
    
    epoch_loss_path = 'epoch_loss.csv'
    if os.path.isfile(epoch_loss_path):
        os.remove(epoch_loss_path)
    
    eval_train_path = 'eval_train_result.csv'
    if os.path.isfile(eval_train_path):
        os.remove(eval_train_path)

    eval_val_path = 'eval_val_result.csv'
    if os.path.isfile(eval_val_path):
        os.remove(eval_val_path)

    USE_KAGGLE = True if os.environ.get('KAGGLE_KERNEL_RUN_TYPE', False) else False
    if USE_KAGGLE:
        iteration_loss_path = '/kaggle/working/' + iteration_loss_path
        epoch_loss_path = '/kaggle/working/' + epoch_loss_path
        eval_val_path = '/kaggle/working/' + eval_val_path
        eval_train_path = '/kaggle/working/' + eval_train_path
    
    with open(epoch_loss_path, 'a+') as epoch_loss_file, \
         open(iteration_loss_path, 'a+') as iteration_loss_file, \
         open(eval_train_path, 'a+') as eval_train_file, \
         open(eval_val_path, 'a+') as eval_val_file:

        epoch_loss_file.write('epoch_num,mean_epoch_loss\n')
        iteration_loss_file.write('epoch_num,iteration,classification_loss,regression_loss,iteration_loss\n')
        eval_train_file.write('epoch_num,map50\n')
        eval_val_file.write('epoch_num,map50\n')

        for epoch in range(args.start_epoch, args.num_epoch):
            train(train_loader, model, scheduler, optimizer, epoch, args, epoch_loss_file, iteration_loss_file, steps_pre_epoch)

            # test
            _model = model.module
            _model.eval()
            _model.is_training = False
            with torch.no_grad():
                if args.dataset != 'show':
                    evaluate_coco(train_dataset, _model, args.dataset, epoch, eval_train_file)
                evaluate_coco(valid_dataset, _model, args.dataset, epoch, eval_val_file)
def train():
    if args.dataset == 'COCO':
        if args.dataset_root == VOC_ROOT:
            if not os.path.exists(COCO_ROOT):
                parser.error('Must specify dataset_root if specifying dataset')
            print("WARNING: Using default COCO dataset_root because " +
                  "--dataset_root was not specified.")
            args.dataset_root = COCO_ROOT
        cfg = coco
        dataset = COCODetection(root=args.dataset_root,
                                transform=SSDAugmentation(
                                    cfg['min_dim'], MEANS))
    elif args.dataset == 'VOC':
        if args.dataset_root == COCO_ROOT:
            parser.error('Must specify dataset if specifying dataset_root')
        cfg = voc
        dataset = VOCDetection(root=args.dataset_root,
                               transform=SSDAugmentation(
                                   cfg['min_dim'], MEANS))

    if args.visdom:
        import visdom
        viz = visdom.Visdom()

    # ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'])
    net = EfficientDet(num_class=cfg['num_classes'])

    if args.cuda:
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True

    # if args.resume:
    #     print('Resuming training, loading {}...'.format(args.resume))
    #     ssd_net.load_weights(args.resume)
    # else:
    #     vgg_weights = torch.load(args.save_folder + args.basenet)
    #     print('Loading base network...')
    #     ssd_net.vgg.load_state_dict(vgg_weights)

    if args.cuda:
        net = net.cuda()

    optimizer = optim.AdamW(net.parameters(), lr=args.lr)
    criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
                             False, args.cuda)

    net.train()
    # loss counters
    loc_loss = 0
    conf_loss = 0
    epoch = 0
    print('Loading the dataset...')

    epoch_size = len(dataset) // args.batch_size
    print('Training SSD on:', dataset.name)
    print('Using the specified args:')
    print(args)

    step_index = 0

    if args.visdom:
        vis_title = 'SSD.PyTorch on ' + dataset.name
        vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss']
        iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend)
        epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend)

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)

    iteration = 0
    for epoch in range(args.num_epoch):
        for idx, (images, targets) in enumerate(data_loader):
            if iteration in cfg['lr_steps']:
                step_index += 1
                adjust_learning_rate(optimizer, args.gamma, step_index)

            if args.cuda:
                images = Variable(images.cuda())
                targets = [
                    Variable(ann.cuda(), volatile=True) for ann in targets
                ]
            else:
                images = Variable(images)
                targets = [Variable(ann, volatile=True) for ann in targets]
            # forward
            t0 = time.time()
            out = net(images)
            # backprop
            optimizer.zero_grad()
            loss_l, loss_c = criterion(out, targets)
            loss = loss_l + loss_c
            loss.backward()
            optimizer.step()
            t1 = time.time()
            loc_loss += loss_l
            conf_loss += loss_c

            if iteration % 10 == 0:
                print('timer: %.4f sec.' % (t1 - t0))
                print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss),
                      end=' ')
            if iteration != 0 and iteration % 5000 == 0:
                print('Saving state, iter:', iteration)
                torch.save(net.state_dict(),
                           'weights/Effi' + repr(idx) + '.pth')
            iteration += 1
    torch.save(net.state_dict(), args.save_folder + '' + args.dataset + '.pth')