def eval(checkpoint, data_path, params): # 数据 files, label, boxes = load_annotation(data_path, 'test') eval_set = YoloDataset(paths=files, bboxes=boxes, labels=label, params=params, train=False) eval_loader = DataLoader(eval_set, batch_size=params.batch_size, num_workers=params.num_gpus * 8, shuffle=False) # 模型 state_dict = torch.load(checkpoint) model = Backbone() model.load_state_dict(state_dict) model = model.cuda() # 损失 criterion = SumSquareError() model.eval() total_loss = 0 with torch.no_grad(): for iter, (img, annotation) in enumerate(eval_loader): img = img.cuda() annotation = annotation.cuda() output = model(img) loss = criterion(output, annotation).item() total_loss += loss * len(img) print(f'evaluate loss: {total_loss / len(eval_set)}')
def train(params, _run=None): params = Params(params) set_random_seeds(params.seed) time_now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") params.save_root = params.save_root + f'/{params.project_name}_{time_now}_{params.version}' os.makedirs(params.save_root, exist_ok=True) logging.basicConfig(filename=f'{params.save_root}/{params.project_name}_{time_now}_{params.version}.log', filemode='a', format='%{asctime}s - %(levalname)s: %(message)s') if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' logging.info(f'Available GPUs: {torch.cuda.device_count()}') train2007, train_label_2007, train_bb_2007 = load_annotation(os.path.join(params.data_root, 'VOC2007'), 'trainval') test2007, test_label_2007, test_bb_2007 = load_annotation(os.path.join(params.data_root, 'VOC2007'), 'test') train2012, train_label_2012, train_bb_2012 = load_annotation(os.path.join(params.data_root, 'VOC2012'), 'trainval') test2012, test_label_2012, test_bb_2012 = load_annotation(os.path.join(params.data_root, 'VOC2012'), 'test') train_data = train2007+test2007+train2012 train_label = train_label_2007+test_label_2007+train_label_2012 train_bb = train_bb_2007 + test_bb_2007 + train_bb_2012 test_data = test2012 test_label = test_label_2012 test_bb = test_bb_2012 train_dataset = YoloDataset(train_data, train_bb, train_label, params, train=True) eval_dataset = YoloDataset(test_data, test_bb, test_label, params, train=False) train_loader = DataLoader(dataset=train_dataset, num_workers=params.num_gpus*8, batch_size=params.batch_size, shuffle=True, drop_last=True, pin_memory=True) eval_loader = DataLoader(dataset=eval_dataset, num_workers=1, batch_size=1, shuffle=False, pin_memory=True) model = Backbone() last_step = 0 last_epoch = 0 if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = nn.DataParallel(model) if params.optim == 'Adam': optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate) else: optimizer = torch.optim.SGD(model.parameters(), lr=params.learning_rate, momentum=0.9, nesterov=True, weight_decay=0.0005) criterion = SumSquareError() schedule = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, factor=0.5, verbose=True, patience=10) epoch = 0 begin_epoch = max(0, last_epoch) step = max(0, last_step) best_loss = 1e6 logging.info('Begin to train...') model.train() import cv2 as cv try: for epoch in range(begin_epoch, params.epoch): for iter, (img, annotation) in enumerate(train_loader): output = model(img.cuda()) loss = criterion(output, annotation.cuda()) optimizer.zero_grad() loss.backward() optimizer.step() if iter % params.save_interval == 0: logging.info(f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} ' f'Train Epoch: {epoch} iter: {iter} loss: {loss.item()}') step += 1 if epoch % params.eval_interval == 0: model.eval() epoch_loss = 0 with torch.no_grad(): for iter, (img, annotation) in enumerate(eval_loader): output = model(img.cuda()) loss = criterion(output, annotation.cuda()).item() epoch_loss += loss * len(img) loss = epoch_loss / len(eval_dataset) logging.info(f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} ' f'Eval Epoch: {epoch} loss: {loss}') schedule.step(loss) if loss < best_loss: best_loss = loss save_checkpoint(model, f'{params.save_root}/{epoch}_{step}.pth') model.train() except KeyboardInterrupt: save_checkpoint(model, f'{params.save_root}/Interrupt_{epoch}_{step}.pth')
transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(CIFAR_MEAN, CIFAR_STD) ]) else: augmentation = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(CIFAR_MEAN, CIFAR_STD) ]) # configuration currentTime = datetime.datetime.now() currentTime = currentTime.strftime('%m%d%H%M%S') writer = SummaryWriter() model = Backbone() model = model.cuda() optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 40, gamma=0.1) if not args.eval: train_dataset = cifar10(transform=augmentation, eta=args.eta) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) else: test_dataset = cifar10(transform=augmentation, if_test=True) test_loader = DataLoader(test_dataset,