def set_criterion(cfg): return MultiBoxLoss(cfg.model.num_classes, overlap_thresh=cfg.loss.overlap_thresh, prior_for_matching=cfg.loss.prior_for_matching, bkg_label=cfg.loss.bkg_label, neg_mining=cfg.loss.neg_mining, neg_pos=cfg.loss.neg_pos, neg_overlap=cfg.loss.neg_overlap, encode_target=cfg.loss.encode_target)
def __init__(self, model, num_classes=3, **kwargs): ## kwargs self.model = model self.batch_size = kwargs.pop('batch_size', 16) self.visdom = kwargs.pop('visdom', False) self.gamma = kwargs.pop('gamma', False) self.cuda = kwargs.pop('cuda', False) self.weight_decay = kwargs.pop('weight_decay', 0.0005) self.momentum = kwargs.pop('momentum', 0.9) self.lr = kwargs.pop('lr', 0.001) self.save_folder = kwargs.pop('save_folder', 'weights') self.version = kwargs.pop('version', 'v2') self.accum_batch_size = 32 self.iter_size = self.accum_batch_size / self.batch_size self.max_iter = 200 self.stepvalues = (50, 100, 150) self.ssd_dim = 300 # only support 300 now self.rgb_means = (104, 117, 123) # only support voc now ## initializations self.model.extras.apply(weights_init) self.model.loc.apply(weights_init) self.model.conf.apply(weights_init) self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay) self.criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 15, 0.5, False) if self.visdom: import visdom self.viz = visdom.Visdom() if self.cuda: self.model.cuda() cudnn.benchmark = True
use_gpu = torch.cuda.is_available() file_root = '/home/lxg/codedata/' learning_rate = 0.001 num_epochs = 300 batch_size = 64 net = FaceBox() if use_gpu: net.cuda() print('load model...') # net.load_state_dict(torch.load('weight/faceboxes.pt')) criterion = MultiBoxLoss() # optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0003) optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=1e-4) train_dataset = ListDataset(root=file_root, list_file='label/box_label.txt', train=True, transform=[transforms.ToTensor()]) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=5) print('the dataset has %d images' % (len(train_dataset)))
optimizer.step() #print(pred_bbox.size(),pred_class.size()) #print("epoch: {},bbox loss:{:.8f} , class loss:{:.8f}".format(epoch + 1,loss[0].cpu().item(),loss[1].cpu().item())) print("*" * 20) print("average bbox loss: {:.8f}; average class loss: {:.8f}".format( running_loss_bbox / len(dataloader), running_loss_class / len(dataloader))) if epoch % 5 == 0: torch.save(net.state_dict(), "./ckpt/{}.pkl".format(epoch)) if __name__ == "__main__": net = TinySSD() net.cuda() loss_fn = MultiBoxLoss(3.) transform = transforms.Compose([ transforms.Resize((512, 512)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # transform = Compose([ # ConvertFromInts(), # PhotometricDistort(), # Expand([123, 117, 104]), # RandomSampleCrop(), # RandomMirror(), # ToPercentCoords(), # Resize(300), # SubtractMeans([123, 117, 104]), # ToTensor(),
def train(): use_gpu = torch.cuda.is_available() file_root = os.path.dirname(os.path.abspath(__file__)) learning_rate = 0.001 num_epochs = 300 batch_size = 32 net = FaceBox() if use_gpu: net.cuda() print('load model...') net.load_state_dict(torch.load('weight/faceboxes_add_norm.pt')) criterion = MultiBoxLoss() #optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0005) optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=1e-4) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[198, 248], gamma=0.1) train_dataset = ListDataset(root=file_root, list_file='data/train_rewrite.txt', train=True, transform = [transforms.ToTensor()]) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) val_dataset = ListDataset(root=file_root, list_file='data/val_rewrite.txt', train=False, transform = [transforms.ToTensor()]) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4) print('the dataset has %d images' % (len(train_dataset))) print('the batch_size is %d' % (batch_size)) num_iter = 0 vis = visdom.Visdom() win = vis.line(Y=np.array([0]), X = np.array([0])) net.train() for epoch in range(num_epochs): scheduler.step() print('\n\nStarting epoch %d / %d' % (epoch + 1, num_epochs)) print('Learning Rate for this epoch: {}'.format(learning_rate)) total_loss = 0. net.train() for i,(images,loc_targets,conf_targets) in enumerate(train_loader): if use_gpu: images = images.cuda() loc_targets = loc_targets.cuda() conf_targets = conf_targets.cuda() loc_preds, conf_preds = net(images) loss = criterion(loc_preds,loc_targets,conf_preds,conf_targets) total_loss += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 10 == 0: print ('Epoch [{}/{}], Iter [{}/{}] Loss: {:.4f}, average_loss: {:.4f}'.format( epoch+1, num_epochs, i+1, len(train_loader), loss.item(), total_loss / (i+1))) #train_loss = total_loss /(len(train_dataset) / batch_size) vis.line(Y=np.array([total_loss / (i+1)]), X=np.array([num_iter]), win=win, name='train', update='append') num_iter += 1 # val_loss = 0.0 # net.eval() # for idx, (images, loc_targets,conf_targets) in enumerate(val_loader): # with torch.no_grad(): # if use_gpu: # images = images.cuda() # loc_targets = loc_targets.cuda() # conf_targets = conf_targets.cuda() # # loc_preds, conf_preds = net(images) # loss = criterion(loc_preds, loc_targets, conf_preds, conf_targets) # val_loss += loss.item() # val_loss /= len(val_dataset)/batch_size # vis.line(Y=np.array([val_loss]), X=np.array([epoch]), # win=win, # name='val', # update='append') # print('loss of val is {}'.format(val_loss)) if not os.path.exists('weight/'): os.mkdir('weight') print('saving model ...') torch.save(net.state_dict(),'weight/faceboxes_add_norm.pt')
new_state_dict[name] = v net.load_state_dict(new_state_dict) if len(gpu_ids) > 1: net = torch.nn.DataParallel(net, device_ids=gpu_ids) #device = torch.device(args.device) device = torch.device('cuda:' + str(gpu_ids[0])) cudnn.benchmark = True net = net.to(device) optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 3, 0.35, False, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.to(device) def train(): net.train() #load the two dataset for face rectangles and landmarks respectively print('Loading Dataset...') dataset_rect = FaceRectLMDataset(training_face_rect_dir, img_dim, rgb_mean) dataset_landmark = FaceRectLMDataset(training_face_landmark_dir, img_dim,
def train(train_config): logger = Logger(HOME+'/log', train_config.basenet) if train_config.dataset_name == 'VOC': cfg = voc_config dataset = VOCDataset(DATA_DIR, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif train_config.dataset_name == 'COCO': cfg = coco_config dataset = COCODataset(DATA_DIR, transform=SSDAugmentation( cfg['min_dim'], MEANS)) if train_config.visdom: import visdom viz = visdom.Visdom() ssd_net = SSD('train', train_config.basenet, cfg['min_dim'], cfg['num_classes'], with_fpn=train_config.with_fpn) net = ssd_net if train_config.cuda: net = nn.DataParallel(ssd_net) cudnn.benchmark = True if train_config.resume: logger('Loading {} ...'.format(train_config.resume)) load_weights = torch.load( train_config.resume, map_location=lambda storage, loc: storage) ssd_net.load_state_dict(load_weights) if train_config.cuda: net = net.cuda() if not train_config.resume: logger('Initializing weights ...') ssd_net.topnet.apply(weights_init) ssd_net.loc_layers.apply(weights_init) ssd_net.conf_layers.apply(weights_init) optimizer = optim.Adam(net.parameters(), lr=train_config.lr, weight_decay=train_config.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, train_config.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 logger('Loading the dataset...') epoch_size = len(dataset) // train_config.batch_size logger('Training SSD on:{}'.format(dataset.name)) # logger('using the specified args:') step_index = 0 if train_config.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, train_config.batch_size, num_workers=train_config.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) t0 = time.time() for iteration in range(train_config.start_iter, cfg['max_iter']): if train_config.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss.item(), conf_loss.item(), epoch_plot, None, 'append', epoch_size) logger('epoch = {} : loss = {}, loc_loss = {}, conf_loss = {}'.format( epoch, loc_loss + conf_loss, loc_loss, conf_loss)) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, train_config.lr, train_config.gamma, step_index) # load train data images, targets = next(batch_iterator) if iteration//epoch_size > 0 and iteration % epoch_size == 0: batch_iterator = iter(data_loader) print(iteration) if train_config.cuda: images = images.cuda() targets = [ann.cuda()for ann in targets] # else: # images=torch.tensor(images) # targets=torch.tensor(targets) # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() if train_config.visdom: loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % 50 == 0: t1 = time.time() logger('timer: %.4f sec. || ' % (t1 - t0)+'iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()) + ' || loc_loss: %.4f ||' % (loss_l.item()) + ' || conf_loss: %.4f ||' % (loss_c.item())) t0 = time.time() if train_config.visdom: update_vis_plot(iteration, loss_l.item(), loss_c.item(), iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: logger('Saving state, iter:%d' % iteration) torch.save(ssd_net.state_dict(), train_config.save_folder + 'ssd224_VOC_' + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), train_config.save_folder + 'ssd224_VOC.pth')
def __init__(self, train_path, test_path, label_name, model_file, model, img_size=1024, batch_size=16, lr=1e-3, re_train=False, best_loss=2, use_gpu=False, nms_threshold=0.5): self.train_path = train_path self.test_path = test_path self.label_name = label_name self.model_file = model_file self.img_size = img_size self.batch_size = batch_size self.re_train = re_train # 不加载训练模型,重新进行训练 self.best_loss = best_loss # 最好的损失值,小于这个值,才会保存模型 self.use_gpu = False self.nms_threshold = nms_threshold if use_gpu is True: print("gpu available: %s" % str(torch.cuda.is_available())) if torch.cuda.is_available(): self.use_gpu = True else: self.use_gpu = False # 模型 self.model = model if self.use_gpu: print('[use gpu] ...') self.model = self.model.cuda() # 加载模型 if os.path.exists(self.model_file) and not self.re_train: self.load(self.model_file) # RandomHorizontalFlip self.transform_train = T.Compose([ # T.Resize((self.img_size, self.img_size)), T.ToTensor(), # T.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]), ]) self.transform_test = T.Compose([ # T.Resize((self.img_size, self.img_size)), T.ToTensor(), # T.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]) ]) # Dataset train_label = os.path.join(self.train_path, self.label_name) test_label = os.path.join(self.test_path, self.label_name) train_dataset = ListDataset(root=self.train_path, list_file=train_label, train=True, transform=self.transform_train) self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1) test_dataset = ListDataset(root=self.test_path, list_file=test_label, train=False, transform=self.transform_test) self.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=1) print('train_loader len: %d' % len(self.train_loader.dataset)) print(' test_loader len: %d' % len(self.test_loader.dataset)) self.criterion = MultiBoxLoss() self.lr = lr # self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.5) self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=1e-4) # optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=1e-4) pass
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchSz', type=int, default=1, help='batch size') parser.add_argument('--nEpochs', type=int, default=300, help='number of epoch to end training') parser.add_argument('--lr', type=float, default=1e-5, help='learning rate') parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--wd', type=float, default=5e-4, help='weight decay') # parser.add_argument('--save') # parser.add_argument('--seed', type=int, default=1) parser.add_argument('--opt', type=str, default='sgd', choices=('sgd', 'adam', 'rmsprop')) parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint') parser.add_argument('--resume_from', type=int, default=220, help='resume from which checkpoint') parser.add_argument('--visdom', '-v', action='store_true', help='use visdom for training visualization') args = parser.parse_args() # args.save = args.save or 'work/DSOS.base' # setproctitle.setproctitle(args.save) # if os.path.exists(args.save): # shutil.rmtree(args.save) # os.makedirs(args.save, exist_ok=True) use_cuda = torch.cuda.is_available() best_loss = float('inf') # best test loss start_epoch = 0 # start from epoch 0 for last epoch normMean = [0.485, 0.456, 0.406] normStd = [0.229, 0.224, 0.225] normTransform = transforms.Normalize(normMean, normStd) trainTransform = transforms.Compose([ transforms.Scale((300, 300)), transforms.ToTensor(), normTransform ]) testTransform = transforms.Compose([ transforms.Scale((300, 300)), transforms.ToTensor(), normTransform ]) # Data kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {} trainset = ListDataset(root=cfg.img_root, list_file=cfg.label_train, train=True, transform=trainTransform) trainLoader = DataLoader(trainset, batch_size=args.batchSz, shuffle=True, **kwargs) testset = ListDataset(root=cfg.img_root, list_file=cfg.label_test, train=False, transform=testTransform) testLoader = DataLoader(testset, batch_size=args.batchSz, shuffle=False, **kwargs) # Model net = DSOD(growthRate=48, reduction=1) if args.resume: print('==> Resuming from checkpoint...') checkpoint = torch.load('./checkpoint/ckpt_{:03d}.pth'.format(args.resume_from)) net.load_state_dict(checkpoint['net']) best_loss = checkpoint['loss'] start_epoch = checkpoint['epoch']+1 print('Previours_epoch: {}, best_loss: {}'.format(start_epoch-1, best_loss)) else: print('==> Initializing weight...') def init_weights(m): if isinstance(m, nn.Conv2d): init.xavier_uniform(m.weight.data) # m.bias.data.zero_() net.apply(init_weights) print(' + Number of params: {}'.format( sum([p.data.nelement() for p in net.parameters()]))) if use_cuda: net = net.cuda() if args.opt == 'sgd': optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.wd) elif args.opt == 'adam': optimizer = optim.Adam(net.parameters(), weight_decay=args.wd) elif args.opt == 'rmsprop': optimizer = optim.RMSprop(net.parameters(), weight_decay=args.wd) criterion = MultiBoxLoss() if use_cuda: net.cuda() cudnn.benchmark = True if args.visdom: import visdom viz = visdom.Visdom() training_plot = viz.line( X=torch.zeros((1,)).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict( xlabel='Epoch', ylabel='Loss', title='Epoch DSOD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'] ) ) testing_plot = viz.line( X=torch.zeros((1,)).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict( xlabel='Epoch', ylabel='Loss', title='Epoch DSOD Testing Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'] ) ) with open(cfg.label_test) as f: test_lines = f.readlines() num_tests = len(test_lines) transform = trainTransform transform_viz = testTransform data_encoder = DataEncoder() if args.visdom: testing_image = viz.image(np.ones((3, 300, 300)), opts=dict(caption='Random Testing Image')) # TODO: save training data on log file # trainF = open(os.path.join(args.save, 'train.csv'), 'w') # testF = open(os.path.join(args.save, 'test.csv'), 'w') for epoch in range(start_epoch, start_epoch+args.nEpochs+1): adjust_opt(args.opt, optimizer, epoch) train(epoch, net, trainLoader, optimizer, criterion, use_cuda, args.visdom, viz=None) test(epoch, net, testLoader, optimizer, criterion, use_cuda, args.visdom, viz=None) if epoch%10 == 0: state = { 'net': net.state_dict(), 'loss': test_loss, 'epoch': epoch } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(state, './checkpoint/ckpt_{:03d}.pth'.format(epoch))
def train_net(args): if torch.cuda.is_available(): if args.cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') if not args.cuda: print( "WARNING: It looks like you have a CUDA device, but aren't " + "using CUDA.\nRun with --cuda for optimal training speed.") torch.set_default_tensor_type('torch.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') if not os.path.exists(args.save_folder): os.makedirs(args.save_folder) #*******load data train_dataset, val_dataset = dataset_factory(args.dataset) train_loader = data.DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) val_batchsize = args.batch_size // 2 val_loader = data.DataLoader(val_dataset, val_batchsize, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) # s3fd_net = build_s3fd('train', cfg.NUM_CLASSES) if args.cuda: device = 'cuda' else: device = 'cpu' s3fd_net = S3FD(cfg.NUM_CLASSES, cfg.NumAnchor).to(device) #print(">>",net) if True: #not args.resume: print('Initializing weights...') s3fd_net.extras.apply(s3fd_net.weights_init) s3fd_net.loc.apply(s3fd_net.weights_init) s3fd_net.conf.apply(s3fd_net.weights_init) s3fd_net.iou.apply(s3fd_net.weights_init) if args.resume: print('Resuming training, loading {}...'.format(args.resume)) # start_epoch = s3fd_net.load_weights(args.resume) s3fd_net.load_state_dict(torch.load(args.resume, map_location=device), strict=False) else: vgg_weights = torch.load(os.path.join(args.save_folder, args.basenet), map_location=device) print('Load base network....') s3fd_net.vgg.load_state_dict(vgg_weights) if args.multigpu: s3fd_net = torch.nn.DataParallel(s3fd_net) cudnn.benckmark = True optimizer = optim.SGD(s3fd_net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg, args.dataset, args.cuda) print('Using the specified args:') print(args) return s3fd_net, optimizer, criterion, train_loader, val_loader