def train(): dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation(512, MEANS)) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=0, shuffle=True, collate_fn=detection_collate, pin_memory=False) model = EfficientDet(num_classes=21) model = model.cuda() optimizer = optim.AdamW(model.parameters(), lr=args.lr) criterion = FocalLoss() model.train() iteration = 0 for epoch in range(args.num_epoch): print('Start epoch: {} ...'.format(epoch)) total_loss = [] for idx, sample in enumerate(data_loader): images = sample['img'].cuda() classification, regression, anchors = model(images) classification_loss, regression_loss = criterion( classification, regression, anchors, sample['annot']) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() total_loss.append(loss.item()) if (iteration % 100 == 0): print( 'Epoch/Iteration: {}/{}, classification: {}, regression: {}, totol_loss: {}' .format(epoch, iteration, classification_loss.item(), regression_loss.item(), np.mean(total_loss))) iteration += 1 torch.save(model.state_dict(), './weights/checkpoint_{}.pth'.format(epoch))
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: # args.rank = int(os.environ["RANK"]) args.rank = 1 if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) checkpoint = [] if (args.resume is not None): if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) params = checkpoint['parser'] args.num_class = params.num_class args.network = params.network args.start_epoch = params.start_epoch + 1 del params model = EfficientDet(num_classes=args.num_class, network=args.network, W_bifpn=EFFICIENTDET[args.network]['W_bifpn'], D_bifpn=EFFICIENTDET[args.network]['D_bifpn'], D_class=EFFICIENTDET[args.network]['D_class'], gpu=args.gpu) if (args.resume is not None): model.load_state_dict(checkpoint['state_dict']) del checkpoint if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu], find_unused_parameters=True) print('Run with DistributedDataParallel with divice_ids....') else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) print('Run with DistributedDataParallel without device_ids....') elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: print('Run with DataParallel ....') model = torch.nn.DataParallel(model).cuda() # Training dataset train_dataset = [] if (args.dataset == 'VOC'): # train_dataset = VOCDetection(root=args.dataset_root, # transform=get_augumentation(phase='train', width=EFFICIENTDET[args.network]['input_size'], height=EFFICIENTDET[args.network]['input_size'])) train_dataset = VOCDetection(root=args.dataset_root, transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer() ])) elif (args.dataset == 'COCO'): train_dataset = CocoDataset( root_dir=args.dataset_root, set_name='train2017', transform=get_augumentation( phase='train', width=EFFICIENTDET[args.network]['input_size'], height=EFFICIENTDET[args.network]['input_size'])) # train_loader = DataLoader(train_dataset, # batch_size=args.batch_size, # num_workers=args.workers, # shuffle=True, # collate_fn=detection_collate, # pin_memory=True) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.workers, shuffle=True, collate_fn=collater, pin_memory=True) # define loss function (criterion) , optimizer, scheduler optimizer = optim.AdamW(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) cudnn.benchmark = True for epoch in range(args.start_epoch, args.num_epoch): train(train_loader, model, scheduler, optimizer, epoch, args) state = { 'epoch': epoch, 'parser': args, 'state_dict': get_state_dict(model) } torch.save( state, './weights/checkpoint_{}_{}_{}.pth'.format(args.dataset, args.network, epoch))
help='Checkpoint state_dict file to resume training from') args = parser.parse_args() if(args.weight is not None): resume_path = str(args.weight) print("Loading checkpoint: {} ...".format(resume_path)) checkpoint = torch.load( args.weight, map_location=lambda storage, loc: storage) params = checkpoint['parser'] args.num_class = params.num_class args.network = params.network model = EfficientDet( num_classes=args.num_class, network=args.network, W_bifpn=EFFICIENTDET[args.network]['W_bifpn'], D_bifpn=EFFICIENTDET[args.network]['D_bifpn'], D_class=EFFICIENTDET[args.network]['D_class'], is_training=False, threshold=args.threshold, iou_threshold=args.iou_threshold) model.load_state_dict(checkpoint['state_dict']) model = model.cuda() if(args.dataset == 'VOC'): valid_dataset = VOCDetection(root=args.dataset_root, image_sets=[('2007', 'test')], transform=transforms.Compose([Normalizer(), Resizer()])) evaluate(valid_dataset, model) else: valid_dataset = CocoDataset(root_dir=args.dataset_root, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) evaluate_coco(valid_dataset, model)
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: # args.rank = int(os.environ["RANK"]) args.rank = 1 if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group( backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # args.num_class = train_dataset.num_classes() print('dataset:', args.dataset) print('network:', args.network) print('num_epoch:', args.num_epoch) print('batch_size:', args.batch_size) print('lr_choice:', args.lr_choice) print('lr:', args.lr) print('lr_fn:', args.lr_fn) print('image_size:', args.image_size) print('workers:', args.workers) print('num_class:', args.num_class) print('save_folder:', args.save_folder) print('limit:', args.limit) if args.dataset == 'h5': train_dataset = H5CoCoDataset('{}/train_small.hdf5'.format(args.dataset_root), 'train_small') valid_dataset = H5CoCoDataset('{}/test.hdf5'.format(args.dataset_root), 'test') else: train_dataset = CocoDataset(args.dataset_root, set_name='train_small', # transform=transforms.Compose([Normalizer(), Augmenter(), Resizer(args.image_size)]), transform=get_augumentation('train'), limit_len=args.limit[0]) valid_dataset = CocoDataset(args.dataset_root, set_name='test', # transform=transforms.Compose([Normalizer(), Resizer(args.image_size)]), transform=get_augumentation('test'), limit_len=args.limit[1]) print('train_dataset:', len(train_dataset)) print('valid_dataset:', len(valid_dataset)) steps_pre_epoch = len(train_dataset) // args.batch_size print('steps_pre_epoch:', steps_pre_epoch) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) valid_loader = DataLoader(valid_dataset, batch_size=1, num_workers=args.workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) checkpoint = [] if(args.resume is not None): if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) params = checkpoint['parser'] args.num_class = params.num_class args.network = params.network args.start_epoch = checkpoint['epoch'] + 1 del params model = EfficientDet(num_classes=args.num_class, network=args.network, W_bifpn=EFFICIENTDET[args.network]['W_bifpn'], D_bifpn=EFFICIENTDET[args.network]['D_bifpn'], D_class=EFFICIENTDET[args.network]['D_class'] ) if(args.resume is not None): model.load_state_dict(checkpoint['state_dict']) del checkpoint if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True) print('Run with DistributedDataParallel with divice_ids....') else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) print('Run with DistributedDataParallel without device_ids....') elif args.gpu is not None: # print('using gpu:', args.gpu) torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: model = model.cpu() # print('Run with DataParallel ....') model = torch.nn.DataParallel(model).cuda() if args.lr_choice == 'lr_fn': lr_now = float(args.lr_fn['LR_START']) elif args.lr_choice == 'lr_scheduler': lr_now = args.lr optimizer = optim.Adam(model.parameters(), lr=lr_now) # optimizer = optim.AdamW(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=0.1, verbose=True) cudnn.benchmark = True iteration_loss_path = 'iteration_loss.csv' if os.path.isfile(iteration_loss_path): os.remove(iteration_loss_path) epoch_loss_path = 'epoch_loss.csv' if os.path.isfile(epoch_loss_path): os.remove(epoch_loss_path) eval_train_path = 'eval_train_result.csv' if os.path.isfile(eval_train_path): os.remove(eval_train_path) eval_val_path = 'eval_val_result.csv' if os.path.isfile(eval_val_path): os.remove(eval_val_path) USE_KAGGLE = True if os.environ.get('KAGGLE_KERNEL_RUN_TYPE', False) else False if USE_KAGGLE: iteration_loss_path = '/kaggle/working/' + iteration_loss_path epoch_loss_path = '/kaggle/working/' + epoch_loss_path eval_val_path = '/kaggle/working/' + eval_val_path eval_train_path = '/kaggle/working/' + eval_train_path with open(epoch_loss_path, 'a+') as epoch_loss_file, \ open(iteration_loss_path, 'a+') as iteration_loss_file, \ open(eval_train_path, 'a+') as eval_train_file, \ open(eval_val_path, 'a+') as eval_val_file: epoch_loss_file.write('epoch_num,mean_epoch_loss\n') iteration_loss_file.write('epoch_num,iteration,classification_loss,regression_loss,iteration_loss\n') eval_train_file.write('epoch_num,map50\n') eval_val_file.write('epoch_num,map50\n') for epoch in range(args.start_epoch, args.num_epoch): train(train_loader, model, scheduler, optimizer, epoch, args, epoch_loss_file, iteration_loss_file, steps_pre_epoch) # test _model = model.module _model.eval() _model.is_training = False with torch.no_grad(): if args.dataset != 'show': evaluate_coco(train_dataset, _model, args.dataset, epoch, eval_train_file) evaluate_coco(valid_dataset, _model, args.dataset, epoch, eval_val_file)
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = voc dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) if args.visdom: import visdom viz = visdom.Visdom() # ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = EfficientDet(num_class=cfg['num_classes']) if args.cuda: net = torch.nn.DataParallel(net) cudnn.benchmark = True # if args.resume: # print('Resuming training, loading {}...'.format(args.resume)) # ssd_net.load_weights(args.resume) # else: # vgg_weights = torch.load(args.save_folder + args.basenet) # print('Loading base network...') # ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() optimizer = optim.AdamW(net.parameters(), lr=args.lr) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) iteration = 0 for epoch in range(args.num_epoch): for idx, (images, targets) in enumerate(data_loader): if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) if args.cuda: images = Variable(images.cuda()) targets = [ Variable(ann.cuda(), volatile=True) for ann in targets ] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l conf_loss += loss_c if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss), end=' ') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save(net.state_dict(), 'weights/Effi' + repr(idx) + '.pth') iteration += 1 torch.save(net.state_dict(), args.save_folder + '' + args.dataset + '.pth')