net = FPNSSD512(num_classes=9).to(device) net.load_state_dict(torch.load(args.model)) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True best_loss = float('inf') # best test loss start_epoch = 0 # start from epoch 0 or last epoch if args.resume: print('==> Resuming from checkpoint..') checkpoint = torch.load(args.checkpoint) net.load_state_dict(checkpoint['net']) best_loss = checkpoint['loss'] start_epoch = checkpoint['epoch'] criterion = SSDLoss(num_classes=9) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) # Training def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 for batch_idx, (inputs, loc_targets, cls_targets) in enumerate(trainloader): inputs = inputs.to(device) loc_targets = loc_targets.to(device)
NUM_WORKERS = 2 trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS) testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS) net.cuda() # net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True criterion = SSDLoss(num_classes=NUM_CLASSES) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) # Training def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 for batch_idx, (inputs, loc_targets, cls_targets) in enumerate(trainloader): inputs = Variable(inputs.cuda()) loc_targets = Variable(loc_targets.cuda())
def main(**kwargs): opt._parse(kwargs) vis = Visualizer(env=opt.env) # Model print('==> Building model..') net = DSOD(num_classes=21) # Dataset print('==> Preparing dataset..') box_coder = SSDBoxCoder(net) trainset = ListDataset(root=opt.train_img_root, list_file=opt.train_img_list, transform=Transform(box_coder, True)) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_worker, pin_memory=True) net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True criterion = SSDLoss(num_classes=21) optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9, weight_decay=1e-4) best_map_ = 0 best_loss = 1e100 start_epoch = 0 if opt.load_path is not None: print('==> Resuming from checkpoint..') checkpoint = torch.load(opt.load_path) net.load_state_dict(checkpoint['net']) best_loss = checkpoint['map'] start_epoch = checkpoint['epoch'] + 1 print('start_epoch = ', start_epoch, 'best_loss = ', best_loss) for epoch in range(start_epoch, start_epoch + 100): print('\nEpoch: ', epoch) net.train() train_loss = 0 optimizer.zero_grad() ix = 0 for batch_idx, (inputs, loc_targets, cls_targets) in tqdm(enumerate(trainloader)): inputs = Variable(inputs.cuda()) loc_targets = Variable(loc_targets.cuda()) cls_targets = Variable(cls_targets.cuda()) loc_preds, cls_preds = net(inputs) ix += 1 loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets) loss.backward() train_loss += loss.data.item() current_loss = train_loss / (1 + batch_idx) if (batch_idx + 1) % (opt.iter_size) == 0: for name, p in net.named_parameters(): p.grad.data.div_(ix) ix = 0 optimizer.step() optimizer.zero_grad() if (batch_idx + 1) % opt.plot_every == 0: vis.plot('loss', current_loss) # img = predict(net, box_coder, os.path.join(opt.train_img_root, trainset.fnames[batch_idx])) # vis.img('predict', np.array(img).transpose(2, 0, 1)) # if os.path.exists(opt.debug_file): # import ipdb # ipdb.set_trace() print('current_loss: ', current_loss, 'best_loss: ', best_loss) if (epoch + 1) % 20 == 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.1 if (epoch + 1) % opt.save_state_every == 0: state = { 'net': net.state_dict(), 'map': current_loss, 'epoch': epoch, } torch.save(state, opt.checkpoint + '%s.pth' % epoch) if current_loss < best_loss: best_loss = current_loss print('saving model at epoch: ', epoch) state = { 'net': net.state_dict(), 'map': best_loss, 'epoch': epoch, } torch.save(state, opt.checkpoint + 'dsod.pth')
shuffle=False, num_workers=INPUT_WORKERS #collate_fn=text_dataset.bbox_collate_fn ) print(len(trainloader)) print(len(valloader)) img, bboxes, labels, img_name = next(iter(trainloader)) print(img.size()) print(bboxes.size()) print(labels.size()) net = torch.nn.DataParallel(net) #, device_ids=[2,3,4,5]) cudnn.benchmark = True net.cuda() criterion = SSDLoss(num_classes=2) criterion.cuda() optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4) # Training def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 for batch_idx, (inputs, loc_targets, cls_targets, names) in enumerate(trainloader): #print(batch_idx/len(trainloader)) inputs = Variable(inputs.cuda()) loc_targets = Variable(loc_targets.cuda()) cls_targets = Variable(cls_targets.cuda())
def main(**kwargs): opt._parse(kwargs) vis = Visualizer(env=opt.env) # Model print('==> Building model..') net = DSOD(num_classes=21) start_epoch = 0 # start from epoch 0 or last epoch if opt.load_path is not None: print('==> Resuming from checkpoint..') checkpoint = torch.load(opt.load_path) net.load_state_dict(checkpoint['net']) # Dataset print('==> Preparing dataset..') box_coder = SSDBoxCoder(net) trainset = ListDataset(root=opt.data_root, list_file=[opt.voc07_trainval, opt.voc12_trainval], transform=Transform(box_coder, True)) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, num_workers=8) net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True criterion = SSDLoss(num_classes=21) optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9, weight_decay=5e-4) best_map_ = 0 for epoch in range(start_epoch, start_epoch + 200): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 for batch_idx, (inputs, loc_targets, cls_targets) in tqdm(enumerate(trainloader)): inputs = Variable(inputs.cuda()) loc_targets = Variable(loc_targets.cuda()) cls_targets = Variable(cls_targets.cuda()) optimizer.zero_grad() loc_preds, cls_preds = net(inputs) loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets) loss.backward() optimizer.step() train_loss += loss.data[0] if (batch_idx + 1) % opt.plot_every == 0: vis.plot('loss', train_loss / (batch_idx + 1)) img = predict( net, box_coder, os.path.join(opt.data_root, trainset.fnames[batch_idx])) vis.img('predict', np.array(img).transpose(2, 0, 1)) if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() aps = eval(net.module, test_num=epoch * 100 + 100) map_ = aps['map'] if map_ > best_map_: print('Saving..') state = { 'net': net.state_dict(), 'map': best_map_, 'epoch': epoch, } best_map_ = map_ if not os.path.isdir(os.path.dirname(opt.checkpoint)): os.mkdir(os.path.dirname(opt.checkpoint)) torch.save(state, opt.checkpoint + '/%s.pth' % best_map_)
def main(**kwargs): opt._parse(kwargs) vis = Visualizer(env=opt.env) # Model print('==> Building model..') net = DSOD(num_classes=21) start_epoch = 0 # start from epoch 0 or last epoch # Dataset print('==> Preparing dataset..') box_coder = SSDBoxCoder(net) trainset = ListDataset(root=opt.data_root, list_file=[opt.voc07_trainval, opt.voc12_trainval], transform=Transform(box_coder, True)) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, num_workers=8, pin_memory=True) net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True if opt.load_path is not None: print('==> Resuming from checkpoint..') checkpoint = torch.load(opt.load_path) net.module.load_state_dict(checkpoint['net']) criterion = SSDLoss(num_classes=21) optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9, weight_decay=5e-4) best_map_ = 0 best_loss = 1e100 for epoch in range(start_epoch, start_epoch + 200): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 optimizer.zero_grad() ix = 0 for batch_idx, (inputs, loc_targets, cls_targets) in tqdm(enumerate(trainloader)): inputs = Variable(inputs.cuda()) loc_targets = Variable(loc_targets.cuda()) cls_targets = Variable(cls_targets.cuda()) loc_preds, cls_preds = net(inputs) ix += 1 loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets) loss.backward() train_loss += loss.data[0] if (batch_idx + 1) % (opt.iter_size) == 0: # if True: for name, p in net.named_parameters(): p.grad.data.div_(ix) ix = 0 optimizer.step() optimizer.zero_grad() if (batch_idx + 1) % opt.plot_every == 0: vis.plot('loss', train_loss / (batch_idx + 1)) img = predict( net, box_coder, os.path.join(opt.data_root, trainset.fnames[batch_idx])) vis.img('predict', np.array(img).transpose(2, 0, 1)) if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() # if (epoch+1)%10 == 0 : # state = { # 'net': net.module.state_dict(), # # 'map': best_map_, # 'epoch': epoch, # } # torch.save(state, opt.checkpoint + '/%s.pth' % epoch) # if (epoch+1) % 30 == 0: # for param_group in optimizer.param_groups: # param_group['lr'] *= 0.1 current_loss = train_loss / (1 + batch_idx) if current_loss < best_loss: best_loss = current_loss torch.save(net.module.state_dict(), '/tmp/dsod.pth') if (epoch + 1) % opt.eval_every == 0: net.module.load_state_dict(torch.load('/tmp/dsod.pth')) aps = eval(net.module) map_ = aps['map'] if map_ > best_map_: print('Saving..') state = { 'net': net.module.state_dict(), 'map': best_map_, 'epoch': epoch, } best_map_ = map_ if not os.path.isdir(os.path.dirname(opt.checkpoint)): os.mkdir(os.path.dirname(opt.checkpoint)) best_path = opt.checkpoint + '/%s.pth' % best_map_ torch.save(state, best_path) else: net.module.load_state_dict(torch.load(best_path)['net']) for param_group in optimizer.param_groups: param_group['lr'] *= 0.1 vis.log( dict(epoch=(epoch + 1), map=map_, loss=train_loss / (batch_idx + 1)))