def main(): start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch # Data print('==> Preparing dataset %s' % args.dataset) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) if args.dataset == 'cifar10': dataloader = datasets.CIFAR10 num_classes = 10 else: dataloader = datasets.CIFAR100 num_classes = 100 trainset = dataloader(root=args.dataroot, train=True, download=True, transform=transform_train) sampler = torch.utils.data.distributed.DistributedSampler(trainset,num_replicas=hvd.size(), rank=hvd.rank()) trainloader = data.DataLoader(dataset=trainset, batch_size=args.train_batch * world_size, shuffle=False, sampler=sampler) testset = dataloader(root=args.dataroot, train=False, download=False, transform=transform_test) testloader = data.DataLoader(testset, batch_size=args.test_batch * world_size, shuffle=False, num_workers=args.workers) # Model print("==> creating model '{}'".format("Alexnet")) model = AlexNet(num_classes=num_classes) device = torch.device('cuda', local_rank) model = model.to(device) # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank) print('Model on cuda:%d' % local_rank) print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0)) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # 用horovod封装优化器 optimizer = hvd.DistributedOptimizer(optimizer, named_parameters=model.named_parameters()) # 广播参数 hvd.broadcast_parameters(model.state_dict(), root_rank=0) # Train and val for epoch in range(start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda) print('Rank:{} Epoch[{}/{}]: LR: {:.3f}, Train loss: {:.5f}, Test loss: {:.5f}, Train acc: {:.2f}, Test acc: {:.2f}.'.format(local_rank,epoch+1, args.epochs, state['lr'], train_loss, test_loss, train_acc, test_acc))
def main(): start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch # Data print('==> Preparing dataset %s' % args.dataset) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) if args.dataset == 'cifar10': dataloader = datasets.CIFAR10 num_classes = 10 else: dataloader = datasets.CIFAR100 num_classes = 100 trainset = dataloader(root=args.dataroot, train=True, download=True, transform=transform_train) trainloader = data.DataLoader(dataset=trainset, batch_size=args.train_batch, shuffle=False) testset = dataloader(root=args.dataroot, train=False, download=False, transform=transform_test) testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers) # Model print("==> creating model '{}'".format("Alexnet")) model = AlexNet(num_classes=num_classes) model = model.cuda() print('Model on cuda') cudnn.benchmark = True print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0)) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Train and val for epoch in range(start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda) print('Epoch[{}/{}]: LR: {:.3f}, Train loss: {:.5f}, Test loss: {:.5f}, Train acc: {:.2f}, Test acc: {:.2f}.'.format(epoch+1, args.epochs, state['lr'], train_loss, test_loss, train_acc, test_acc))
def main(): IMAGE_PATH = "/home/gonken2019/Desktop/subProject/dataset45" #"/home/gonken2019/Desktop/subProject/images"# LABELS_PATH = "/home/gonken2019/Desktop/subProject/poseData45/" #"/home/gonken2019/Desktop/subProject/labels/"# BATCH_SIZE = 256 #こことsubmodel.py 85行目と113行目の最初の引数を変える NUM_EPOCH = 20 #多くて20~30 if torch.cuda.is_available(): device = "cuda" print("[Info] Use CUDA") else: device = "cpu" model1 = AlexNet() model2 = PositionNet() dataloaders = Dataloaders(IMAGE_PATH, LABELS_PATH, BATCH_SIZE) # optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) optimizer1 = torch.optim.AdamW(model1.parameters(), lr=0.00001, weight_decay=5e-4) optimizer2 = torch.optim.AdamW(model2.parameters(), lr=0.0001, weight_decay=5e-4) #lossがnanになるのはよくあるので、こういうときはoptimizerを変えるか学習率変えるかするといい trainer1 = MyTrainer(model1, dataloaders, optimizer1, device, "Classification") trainer2 = MyTrainer(model2, dataloaders, optimizer2, device, "Regression") trainer1.run(NUM_EPOCH) trainer2.run(NUM_EPOCH)
def main(): IMAGE_PATH = "/home/gonken2019/Desktop/subProject/images" # LABELS_PATH = "/home/gonken2019/Desktop/subProject/labels/" # BATCH_SIZE = 512 #BATCH_SIZE = 10 #RuntimeError: size mismatch, m1: [10 x 12544], m2: [9216 x 4096] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:197 #9216×4096=37748736 #37748736÷12544=3009.306122449 #4096=2**12 #BATCH_SIZE = 8 #RuntimeError: size mismatch, m1: [8 x 12544], m2: [9216 x 4096] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:197 NUM_EPOCH = 50 #多くて20~30 if torch.cuda.is_available(): device = "cuda" print("[Info] Use CUDA") else: device = "cpu" model = AlexNet() dataloaders = Dataloaders(IMAGE_PATH, LABELS_PATH, BATCH_SIZE) # optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=5e-4) #lossがnanになるのはよくあるので、こういうときはoptimizerを変えるか学習率変えるかするといい trainer = MyTrainer(model, dataloaders, optimizer, device) trainer.run(NUM_EPOCH) #
def main(): print(f"Train numbers:{len(dataset)}") # first train run this line model = AlexNet().to(device) # Load model # if device == 'cuda': # model = torch.load(MODEL_PATH + MODEL_NAME).to(device) # else: # model = torch.load(MODEL_PATH + MODEL_NAME, map_location='cpu') # cast cast = torch.nn.CrossEntropyLoss().to(device) # Optimization optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-8) step = 1 for epoch in range(1, NUM_EPOCHS + 1): model.train() # cal one epoch time start = time.time() for images, labels in dataset_loader: images = images.to(device) labels = labels.to(device) # Forward pass outputs = model(images) loss = cast(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() print(f"Step [{step * BATCH_SIZE}/{NUM_EPOCHS * len(dataset)}], " f"Loss: {loss.item():.8f}.") step += 1 # cal train one epoch time end = time.time() print(f"Epoch [{epoch}/{NUM_EPOCHS}], " f"time: {end - start} sec!") # Save the model checkpoint torch.save(model, MODEL_PATH + '/' + MODEL_NAME) print(f"Model save to {MODEL_PATH + '/' + MODEL_NAME}.")
def _main(data_dir, batch_size, learning_rate, n_epoch): ''' main function ''' # Create dataloader dataloaders_dict = create_dataloader(data_dir, batch_size) # Detect if we have a GPU available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Create model model = AlexNet() model = model.to(device) # Observe that all parameters are being optimized optimizer_ft = optim.Adam(model.parameters(), lr=learning_rate) criterion = nn.CrossEntropyLoss() model = train_model(model, dataloaders_dict, criterion, optimizer_ft, device, n_epoch) torch.save(model, 'model.pt')
def main(): # 设置运行设备 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) # 数据处理 data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), "val": transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) } # 存放train与val的路径 image_path = '/home/xulei/数据集大本营/5_flower_data/flower_data' # flower data root path # 若该目录不存在,在报错并终止程序 assert os.path.exists(image_path), "{} path does not exist.".format( image_path) # 定义训练数据集 train_dataset = datasets.ImageFolder(root=os.path.join( image_path, "train"), transform=data_transform["train"]) # 训练数据集的文件数量 train_num = len(train_dataset) # flower_list: {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx # cla_dict : {0: 'daisy', 1: 'dandelion', 2: 'roses', 3: 'sunflowers', 4: 'tulips'} cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file # 要输出json格式,需要对json数据进行编码,要用到函数:json.dumps # indent=4, 的作用是让字典的内容逐行显示,每个key占一行 # json_str : # '{ # "0": "daisy", # "1": "dandelion", # "2": "roses", # "3": "sunflowers", # "4": "tulips" # }' json_str = json.dumps(cla_dict, indent=4) with open('class_idices.json', 'w') as json_file: json_file.write(json_str) batch_size = 128 nw = min(os.cpu_count(), batch_size if batch_size > 1 else 0, 8) # number of workers nw: 8 ????? print("using {} dataloader workers every process".format(nw)) train_loader = datas.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join( image_path, "val"), transform=data_transform["val"]) # val_num: 364 val_num = len(validate_dataset) validate_loader = datas.DataLoader(validate_dataset, batch_size, shuffle=False, num_workers=nw) print("using {} images for trainning, {} images for validation.".format( train_num, val_num)) net = AlexNet(num_classes=5).to(device) loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.00004) epoches = 20 save_path = './AlexNet.pth' best_acc = 0.0 # train_steps : 26 len(train_loader)= training_images_num/batch_size train_steps = len(train_loader) for epoch in range(epoches): net.train() running_loss = 0.0 train_bar = tqdm(train_loader) # 进度条 for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format( epoch + 1, epoches, loss) # validata net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader) # , colour='green' for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num print('\n[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print("Finshed Training")
def train(): try: os.makedirs(opt.checkpoints_dir) except OSError: pass if torch.cuda.device_count() > 1: model = torch.nn.parallel.DataParallel( AlexNet(num_classes=opt.num_classes)) else: model = AlexNet(num_classes=opt.num_classes) if os.path.exists(MODEL_PATH): model.load_state_dict( torch.load(MODEL_PATH, map_location=lambda storage, loc: storage)) model.to(device) ################################################ # Set loss function and Adam optimizer ################################################ criterion = torch.nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=opt.lr) for epoch in range(opt.epochs): # train for one epoch print(f"\nBegin Training Epoch {epoch + 1}") # Calculate and return the top-k accuracy of the model # so that we can track the learning process. losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() for i, data in enumerate(train_dataloader): # get the inputs; data is a list of [inputs, labels] inputs, targets = data inputs = inputs.to(device) targets = targets.to(device) # compute output output = model(inputs) loss = criterion(output, targets) # measure accuracy and record loss prec1, prec5 = accuracy(output, targets, topk=(1, 2)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1, inputs.size(0)) top5.update(prec5, inputs.size(0)) # compute gradients in a backward pass optimizer.zero_grad() loss.backward() # Call step of optimizer to update model params optimizer.step() print( f"Epoch [{epoch + 1}] [{i + 1}/{len(train_dataloader)}]\t" f"Loss {loss.item():.4f}\t" f"Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t" f"Prec@5 {top5.val:.3f} ({top5.avg:.3f})", end="\r") # save model file torch.save(model.state_dict(), MODEL_PATH)
(0.4914,0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) cifar_train = CIFAR10(root=data_dir, download=False, train=True, transform=transform) cifar_test = CIFAR10(root=data_dir, download=False, train=False, transform=transform) # setup model model = AlexNet().to(device) # loss function loss_fn = nn.CrossEntropyLoss() # optimizer optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # initialize our entropy based select query select_fn = EntropySelectQuery(model, cifar_train) # main training loop unlabeled, labeled = [i for i in range(len(cifar_train))], [] for loop in range(num_loops): if loop == 0: # randomly select <init_samples> many samples selected = random.sample(unlabeled, init_samples) else: # select based on entropy
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #指定设备 print("using {} device.".format(device)) data_transform = { #数据预处理 "train": transforms.Compose([transforms.RandomResizedCrop(224),# key 为trian 返回这些方法 随机裁剪 224*224 transforms.RandomHorizontalFlip(),#随机反转 transforms.ToTensor(),#转成 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),#标准化处理 "val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224) transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])} data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "dog_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format(image_path) train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"])#数据预处理 train_num = len(train_dataset) #个数 # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx #获取名称所对应索引 cla_dict = dict((val, key) for key, val in flower_list.items()) #遍历 key value 对调 # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file:#生成json 便于打开 json_file.write(json_str) batch_size = 32 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) #加载 validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=4, shuffle=False, num_workers=nw) print("using {} images for training, {} images fot validation.".format(train_num, val_num)) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() # # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_classes=5, init_weights=True) #类别5 net.to(device) #网络设备 loss_function = nn.CrossEntropyLoss() #损失函数 # pata = list(net.parameters()) optimizer = optim.Adam(net.parameters(), lr=0.0002) #adam优化器 对象是网络中可训练参数 学习率 自己调参 save_path = './AlexNet.pth' #保存模型路径 best_acc = 0.0 for epoch in range(10):#训练 # train net.train() #管理神经元失活 running_loss = 0.0 #统计平均损失 t1 = time.perf_counter() #训练时间 for step, data in enumerate(train_loader, start=0): #遍历数据集 images, labels = data #分为图像 标签 optimizer.zero_grad() #清空梯度信息 outputs = net(images.to(device)) #正向传播 指定设备 loss = loss_function(outputs, labels.to(device)) #损失 loss.backward() #反向传播 optimizer.step() #更新结点参数 # print statistics running_loss += loss.item() #损失累加 # print train process rate = (step + 1) / len(train_loader) #打印训练进度 a = "*" * int(rate * 50) b = "." * int((1 - rate) * 50) print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="") print() print(time.perf_counter()-t1) # validate net.eval() #关闭失活 acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): for val_data in validate_loader: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] #最大就是类别 acc += (predict_y == val_labels.to(device)).sum().item() #预测与真实对比 累加 val_accurate = acc / val_num #准确率 if val_accurate > best_acc: #如果准确率大于历史最优 best_acc = val_accurate #更新 torch.save(net.state_dict(), save_path) #保存权重 print('[epoch %d] train_loss: %.3f test_accuracy: %.3f' % #打印信息 (epoch + 1, running_loss / step, val_accurate)) print('Finished Training')
def train(args): device = torch.device(f"cuda:{args.device_id}") model = AlexNet(n_cls=100, useLRN=args.useLRN, useDropOut=args.useDropOut) # model = AlexNet(num_classes= 100) criterion = nn.CrossEntropyLoss() model.to(device) optimizer = Adam(model.parameters(), lr=args.lr) train_loader, valid_loader = getLoaders(split="train", batch_size=args.batch_size, num_workers=args.num_workers, aug=args.useAug) train_loss_arr = [] valid_loss_arr = [] valid_acc_arr = [] valid_top5_arr = [] n_iter = 0 best_loss = float('inf') best_top1_acc = 0 best_top5_acc = 0 for ep in range(args.epoch): model.train() for _, (img, label) in tqdm(enumerate(train_loader), total=len(train_loader)): img, label = img.to(device), label.to(device) optimizer.zero_grad() pred = model(img) loss = criterion(pred, label) # loss = model.criterion(pred, label) loss.backward() optimizer.step() train_loss_arr.append(loss.item()) n_iter += 1 model.eval() ep_valid_loss_arr = [] ep_acc_arr = [] ep_top5_arr = [] with torch.no_grad(): for _, (img, label) in tqdm(enumerate(valid_loader), total=len(valid_loader)): img, label = img.to(device), label.to(device) pred = model(img) loss = criterion(pred, label) # loss = model.criterion(pred, label) acc = utils.top_k_acc(k=1, pred=pred.detach().cpu().numpy(), label=label.detach().cpu().numpy()) acc5 = utils.top_k_acc(k=5, pred=pred.detach().cpu().numpy(), label=label.detach().cpu().numpy()) ep_acc_arr.append(acc) ep_top5_arr.append(acc5) ep_valid_loss_arr.append(loss.item()) valid_loss = np.mean(ep_valid_loss_arr) valid_acc = np.mean(ep_acc_arr) valid_top5 = np.mean(ep_top5_arr) train_loss = np.mean(train_loss_arr[-len(train_loader):]) valid_loss_arr.append(valid_loss) if valid_loss < best_loss: best_loss = valid_loss best_top1_acc = valid_acc best_top5_acc = valid_top5 model.cpu() torch.save(model.state_dict(), "best_model.pth") model.to(device) if (ep + 1) % 10 == 0: model.cpu() torch.save( { "model": model.state_dict(), "optimizer": optimizer.state_dict(), "train_loss": train_loss_arr, "valid_loss": valid_loss_arr, "valid_acc": valid_acc_arr, "valid_top5": valid_top5_arr, "best_loss": best_loss, "ep": ep, "n_iter": n_iter, }, "model_checkpoint.pth") model.to(device) print( f"[{ep}, {n_iter}] train: {train_loss:.4f}, valid: {valid_loss:.4f}, acc: {valid_acc:.4f}, top5: {valid_top5:.4f}" ) with open("exp_result.txt", "a+") as f: f.write( f"{args}, loss: {best_loss:.4f}, top1: {best_top1_acc*100:.1f}, top5: {best_top5_acc*100:.1f}\n" )
def main(): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print("using {} device.".format(device)) tbwriter = SummaryWriter(log_dir="./logs") data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(360), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), "val": transforms.Compose([ transforms.Resize(360, 360), # cannot 360, must (360,360) transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) } data_root = os.path.abspath(os.path.join(os.getcwd(), "./DATA")) # get data root path image_path = os.path.join(data_root, "male") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format( image_path) train_dataset = datasets.ImageFolder(root=os.path.join( image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=2) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 8 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join( image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=8, shuffle=True, num_workers=nw) print("using {} images for training, {} images fot validation.".format( train_num, val_num)) if os.path.exists("./log360.pth"): net = AlexNet() #net.load_state_dict(torch.load("./log360.pth", map_location='cuda:2')) net = torch.load("./log360.pth", 'cpu') print("continue training") else: net = AlexNet(num_classes=3, init_weights=True) net.to(device) print("start training anew") loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.0001) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.98) epochs = 2000 save_path = './AlexNet.pth' best_acc = 0.0 train_steps = len(train_loader) #json_path = './class_indices.json' #json_file = open(json_path, "r") #class_indict = json.load(json_file) #model = AlexNet(num_classed=6).to(device) trainLOSS = [] #save loss testLOSS = [] #save loss valACC = [] #save val acc for epoch in range(epochs): scheduler.step() print('LR:{}'.format(scheduler.get_lr()[0])) # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format( epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader, colour='green') for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num tbwriter.add_scalar('train/loss', running_loss / train_steps, epoch) tbwriter.add_scalar('val/acc', val_accurate, epoch) trainLOSS.append(running_loss / train_steps) valACC.append(val_accurate) print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) print(' ') if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) #predict #weights_path="./AlexNet.pth" #model.load_state_dict(torch.load(weights_path)) #model.eval() #with torch.no_grad(): # putput = torch.squeeze(model(img.to(device))).cpu() # predict = torch.softmax(output, dim=0) # predict_cla = torch.argmax(predict.numpy) npLOSS = np.array(trainLOSS) npVALACC = np.array(valACC) np.save('./save/loss_epoch_{}'.format(epoch), npLOSS) np.save('./save/valacc_epoch_{}'.format(epoch), npVALACC) print('Finished Training')
# create data loader dataloader_train = DataLoader( datasets.CIFAR10(root='./data', train=True, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, 4), transforms.ToTensor(), normalize, ]), download=True), batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True) print("Training dataloader created") # create optimizer optimizer = optim.SGD( params=filter(lambda p: p.requires_grad, alexnet.parameters()), lr=LR_INIT, momentum=MOMENTUM, weight_decay=LR_DECAY ) print("Optimizer created") # multiply LR by 1 / 10 after every 30 epochs lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) print("LR Scheduler created") # training print("Starting training...") total_steps = 1 for epoch in range(NUM_EPOCHS): for imgs, classes in dataloader_train:
def main(): # viz = Visdom() # viz.line([0.], [0.], win='train_loss', opts=dict(title='train loss')) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]), "val": transforms.Compose([ transforms.Resize((224, 224)), # cannot 224, must (224, 224) transforms.ToTensor() ]) } data_root = "/home/zhongsy/datasets/dataset/" # get data root path train_dataset = datasets.ImageFolder(root=os.path.join(data_root, "train"), transform=data_transform["train"]) # print(train_dataset.imgs) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 1 # number of workers nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join( data_root, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=4, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format( train_num, val_num)) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() # # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_classes=2, init_weights=True) net.to(device) loss_function = nn.CrossEntropyLoss() # pata = list(net.parameters()) optimizer = optim.Adam(net.parameters(), lr=0.0001) epochs = 30 save_path = './AlexNet.pt' best_acc = 0.0 train_steps = len(train_loader) global_step = 0 for epoch in range(epochs): # train epochloss = 100000 net.train() running_loss = 0.0 train_bar = tqdm(train_loader) for step, data in enumerate(train_bar): images, labels = data # print("label: ", labels, labels.dtype) optimizer.zero_grad() outputs = net(images.to(device)) # print("imges: ", images, images.dtype) # outputs_ = outputs.squeeze() # print("output__ : ", outputs_) # outputs_ = outputs.to(torch.float) loss = loss_function(outputs, labels.to(device)) # loss = loss.to(torch.float) if epochloss > loss: epochloss = loss loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format( epoch + 1, epochs, loss) # viz.line([epochloss.cpu().detach().numpy()], [global_step], # win='train_loss', update='append') global_step += 1 print("[ start val ]") # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader) for val_data in val_bar: val_images, val_labels = val_data val_labels.unsqueeze(1) outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] # print("prect ;", predict_y) # outputs = outputs.squeeze() # print("out_puts: ", outputs) # a = torch.gt(outputs, 0.5) # print("a ", a) # for i, (data, label_) in enumerate(zip(outputs, val_labels)): # if abs(data-label_) <= 0.5: # acc += 1 # viz.images(val_images.view(-1, 3, 224, 224), win='x') # viz.text(str(predict_y.detach().cpu().numpy()), # win='pred', opts=dict(title='pred')) acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net, save_path) print('Finished Training')
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) train_datasize = len(train_dataset) valid_datasize = len(valid_dataset) test_datasize = len(test_dataset) num_classes = 2 model = AlexNet(num_classes=2) model.apply(weights_init) ''' model = models.alexnet(pretrained=True) num_ftrs = model.classifier[-1].in_features model.classifier[-1] = nn.Linear(num_ftrs, num_classes) ''' if use_gpu: model = model.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9) exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.2) model = train_model(model=model, criterion=criterion, optimizer=optimizer, scheduler=exp_lr_scheduler, num_epochs=80, use_gpu=use_gpu)
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), "val": transforms.Compose([ transforms.Resize((224, 224)), # cannot 224, must (224, 224) transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) } data_root = os.path.abspath(os.path.join(os.getcwd(), "./")) image_path = os.path.join(data_root, "flower_data") train_dataset = datasets.ImageFolder(root=image_path + "/train", transform=data_transform['train']) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 8 train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0) validate_dataset = datasets.ImageFolder(root=image_path + "/val", transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=4, shuffle=True, num_workers=0) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() # # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_class=5) print(net) net.to(device) loss_function = nn.CrossEntropyLoss() # pata = list(net.parameters()) optimizer = optim.Adam(net.parameters(), lr=0.0002) save_path = './AlexNet.pth' best_acc = 0.0 for epoch in range(10): # train net.train() running_loss = 0.0 t1 = time.perf_counter() for step, data in enumerate(train_loader, start=0): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() # print train process rate = (step + 1) / len(train_loader) a = "*" * int(rate * 50) b = "." * int((1 - rate) * 50) print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format( int(rate * 100), a, b, loss), end="") print() print(time.perf_counter() - t1) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch # 验证过程中不计算损失梯度 with torch.no_grad(): for val_data in validate_loader: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += (predict_y == val_labels.to(device)).sum().item() val_accurate = acc / val_num if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('[epoch %d] train_loss: %.3f test_accuracy: %.3f' % (epoch + 1, running_loss / step, val_accurate)) print('Finished Training')
from torch.utils.data import DataLoader from torchvision import transforms from torchvision.datasets import ImageFolder from model import AlexNet if os.path.exists("net.pkl"): pkl = torch.load("net.pkl") net = pkl.get("model") sepoch = pkl.get("epoch") else: net = AlexNet().cuda() sepoch = 1 criterion = nn.CrossEntropyLoss().cuda() optimizer = optim.SGD(params=net.parameters(), lr=1e-2, momentum=9e-1) data_loader = DataLoader(dataset=ImageFolder( "data/train", transform=transforms.Compose([ transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])), batch_size=50, shuffle=True) def adjust_learning_rate(epoch): lr = 1e-2 * 1e-1**(epoch // 20)
import os import sys pardir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(pardir) from dataset import * from result_save_visualization import * from model import AlexNet import torch import torch.optim as optim import time net = AlexNet(num_classes=10).to(device) optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) num_epochs = 100 data_dict = { 'epoch': [], 'time': [], 'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [] } start = time.time() for epoch in range(num_epochs): # train net.train()
def train(args): # the number of N way, K shot images k = args.nway * args.kshot """ TODO 1.a """ " Make your own model for Few-shot Classification in 'model.py' file." # model setting model = AlexNet() model.cuda() """ TODO 1.a END """ # pretrained model load if args.restore_ckpt is not None: state_dict = torch.load(args.restore_ckpt) model.load_state_dict(state_dict) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) if args.test_mode == 1: Test_phase(model, args, k) else: # Train data loading dataset = Dataset(args.dpath, state='train') train_sampler = Train_Sampler(dataset._labels, n_way=args.nway, k_shot=args.kshot, query=args.query) data_loader = DataLoader(dataset=dataset, batch_sampler=train_sampler, num_workers=8, pin_memory=True) # Validation data loading val_dataset = Dataset(args.dpath, state='val') val_sampler = Sampler(val_dataset._labels, n_way=args.nway, k_shot=args.kshot, query=args.query) val_data_loader = DataLoader(dataset=val_dataset, batch_sampler=val_sampler, num_workers=8, pin_memory=True) """ TODO 1.b (optional) """ " Set an optimizer or scheduler for Few-shot classification (optional) " # Default optimizer setting #optimizer = torch.optim.Adam(model.parameters(), lr=1e-5) optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4) """ TODO 1.b (optional) END """ tl = Averager() # save average loss ta = Averager() # save average accuracy # training start print('train start') train_correct = 0 train_total = 0 train_loss = 0 test_correct = 0 test_total = 0 test_loss = 0 model.train() for i in range(args.se + 1, TOTAL): for episode in data_loader: optimizer.zero_grad() data, label = [_ for _ in episode] # load an episode # split an episode images and labels into shots and query set # note! data_shot shape is ( nway * kshot, 3, h, w ) not ( kshot * nway, 3, h, w ) # Take care when reshape the data shot data_shot, data_query = data[:k], data[k:] label_shot, label_query = label[:k], label[k:] label_shot = sorted(list(set(label_shot.tolist()))) # convert labels into 0-4 values label_query = label_query.tolist() labels = [] for j in range(len(label_query)): label = label_shot.index(label_query[j]) labels.append(label) labels = torch.tensor(labels).cuda() """ TODO 2 ( Same as above TODO 2 ) """ """ Train the model Input: data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w] be careful when using torch.reshape or .view functions data_query : torch.tensor, query images, [args.query, 3, h, w] labels : torch.tensor, labels of query images, [args.query] output: loss : torch scalar tensor which used for updating your model logits : A value to measure accuracy and loss """ features_shot = model(data_shot.cuda()) n_sample = int(args.query / args.nway) features_shot_mean = torch.zeros(args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j * args.kshot end = (j + 1) * args.kshot features_shot_mean[j] = features_shot[start:end].mean(dim=0) features_query = model(data_query.cuda()) logits = square_euclidean_metric(features_query, features_shot_mean) labels_expanded = labels.view(args.query, 1, 1) labels_expanded = labels_expanded.expand(args.query, args.nway, 1) lsoft = F.log_softmax(-logits, dim=1).view(args.kshot, n_sample, -1) labels_expanded = labels_expanded.view(lsoft.size()) loss = -lsoft.gather(2, labels_expanded).squeeze().view(-1).mean() _, pred = lsoft.max(2) """ TODO 2 END """ acc = count_acc(logits, labels) tl.add(loss.item()) ta.add(acc) loss.backward() optimizer.step() proto = None logits = None loss = None if (i + 1) % PRINT_FREQ == 0: print('train {}, loss={:.4f} acc={:.4f}'.format( i + 1, tl.item(), ta.item())) # initialize loss and accuracy mean tl = None ta = None tl = Averager() ta = Averager() # validation start if (i + 1) % VAL_FREQ == 0: print('validation start') model.eval() with torch.no_grad(): vl = Averager() # save average loss va = Averager() # save average accuracy for j in range(VAL_TOTAL): for episode in val_data_loader: data, label = [_.cuda() for _ in episode] data_shot, data_query = data[:k], data[ k:] # load an episode label_shot, label_query = label[:k], label[k:] label_shot = sorted(list(set(label_shot.tolist()))) label_query = label_query.tolist() labels = [] for j in range(len(label_query)): label = label_shot.index(label_query[j]) labels.append(label) labels = torch.tensor(labels).cuda() """ TODO 2 ( Same as above TODO 2 ) """ """ Train the model Input: data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w] be careful when using torch.reshape or .view functions data_query : torch.tensor, query images, [args.query, 3, h, w] labels : torch.tensor, labels of query images, [args.query] output: loss : torch scalar tensor which used for updating your model logits : A value to measure accuracy and loss """ optimizer.zero_grad() data, label = [_.cuda() for _ in episode] # load an episode # split an episode images and labels into shots and query set # note! data_shot shape is ( nway * kshot, 3, h, w ) not ( kshot * nway, 3, h, w ) # Take care when reshape the data shot data_shot, data_query = data[:k], data[k:] label_shot, label_query = label[:k], label[k:] label_shot = sorted(list(set(label_shot.tolist()))) # convert labels into 0-4 values label_query = label_query.tolist() labels = [] for j in range(len(label_query)): label = label_shot.index(label_query[j]) labels.append(label) labels = torch.tensor(labels).cuda() """ TODO 2 ( Same as above TODO 2 ) """ """ Make a loss function and train your own model Input: data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w] be careful when using torch.reshape or .view functions (25, 3, 400, 400) data_query : torch.tensor, query images, [args.query, 3, h, w] (20, 3, 400, 400) labels : torch.tensor, labels of query images, [args.query] (20) output: loss : torch scalar tensor which used for updating your model logits : A value to measure accuracy and loss """ features_shot = model(data_shot.cuda()) n_sample = int(args.query / args.nway) features_shot_mean = torch.zeros( args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j * args.kshot end = (j + 1) * args.kshot features_shot_mean[j] = features_shot[ start:end].mean(dim=0) features_query = model(data_query.cuda()) logits = square_euclidean_metric( features_query, features_shot_mean) labels_expanded = labels.view(args.query, 1, 1) labels_expanded = labels_expanded.expand( args.query, args.nway, 1) lsoft = F.log_softmax(-logits, dim=1).view( args.kshot, n_sample, -1) labels_expanded = labels_expanded.view(lsoft.size()) loss = -lsoft.gather( 2, labels_expanded).squeeze().view(-1).mean() _, pred = lsoft.max(2) """ TODO 2 END """ acc = count_acc(logits, labels) vl.add(loss.item()) va.add(acc) proto = None logits = None loss = None print('val accuracy mean : %.4f' % va.item()) print('val loss mean : %.4f' % vl.item()) # initialize loss and accuracy mean vl = None va = None vl = Averager() va = Averager() if (i + 1) % SAVE_FREQ == 0: PATH = 'checkpoints/%d_%s.pth' % (i + 1, args.name) torch.save(model.module.state_dict(), PATH) print('model saved, iteration : %d' % i)
# img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_classes=5, init_weights=True) net.to(device) loss_function = nn.CrossEntropyLoss() # pata = list(net.parameters()) # lr: learning_rate optimizer = optim.Adam(net.parameters(), lr=0.0002) save_path = r'D:\Document\GitHub\deep-learning-for-image-processing\pytorch_classification\Test2_alexnet\AlexNet.pth' best_acc = 0.0 epo = 20 for epoch in range(epo): # train net.train() running_loss = 0.0 t1 = time.perf_counter() #从train_loader,加载一个batch for step, data in enumerate(train_loader, start=0): images, labels = data optimizer.zero_grad() outputs = net(images.to(device))
outf = f'logs' while os.path.exists(outf): outf += '_' os.mkdir(outf) batch_size = 128 learning_rate = 0.01 epochs = 100 start_epoch = 1 check_point = '' train_loader, test_loader = load_dataset(batch_size) model = AlexNet().to(device) if check_point: model.load_state_dict(torch.load(check_point)) optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.99)) criterion = nn.CrossEntropyLoss() writer = SummaryWriter(outf + '/exp') best_acc = float('-inf') best_since_last = 0 for ep in range(start_epoch, epochs + 1): if best_since_last == 20: break elif best_since_last % 8 == 0 and best_since_last != 0: adjust_learning_rate(optimizer, 0.5) train_metrics = train(model, train_loader, optimizer, criterion, ep) test_metrics = test(model, test_loader, criterion) # print(train_metrics) # print(test_metrics) writer.add_scalar('train-acc', train_metrics['acc'], global_step=ep)
import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from BatchNorm import BatchNorm from cifar10_dataloader import get_loader from pipeline import Pipeline from model import AlexNet as Model from model_bn import AlexNet_BN as Model_BN ###### with BN pipeline.working() ##### without BN train_loader, test_loader = get_loader(batch_size=128, num_workers=1) model = Model() optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-2) pipeline = Pipeline(task_name='alexnet', log_dir='alexnet', model=model, optimizer=optimizer, loss_func=nn.CrossEntropyLoss(), train_loader=train_loader, test_loader=test_loader, epochs=5, cuda=True) pipeline.working()
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) batch_size = 16 epochs = 20 data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), "val": transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } data_root = os.path.abspath(os.path.join(os.getcwd(), ".")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format( image_path) train_dataset = datasets.ImageFolder(root=os.path.join( image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join( image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format( train_num, val_num)) # create model net = AlexNet(num_classes=5) net.to(device) # define loss function loss_function = nn.CrossEntropyLoss() # construct an optimizer params = [p for p in net.parameters() if p.requires_grad] optimizer = optim.Adam(params, lr=0.0001) best_acc = 0.0 save_path = 'weights/alexnet.pth' train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() logits = net(images.to(device)) loss = loss_function(logits, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format( epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader) for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) # loss = loss_function(outputs, test_labels) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1, epochs) val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training')
class Solver(object): def __init__(self, config): self.model = None self.lr = config.lr self.epochs = config.epoch self.train_batch_size = config.trainBatchSize self.test_batch_size = config.testBatchSize self.criterion = None self.optimizer = None self.scheduler = None self.device = None self.cuda = config.cuda self.train_loader = None self.test_loader = None self.is_board = False def load_data(self): train_transform = transforms.Compose( [transforms.RandomHorizontalFlip(), transforms.ToTensor()]) test_transform = transforms.Compose([transforms.ToTensor()]) train_set = torchvision.datasets.CIFAR10( root='/mnt/disk50/datasets/cifar', train=True, download=True, transform=train_transform) self.train_loader = torch.utils.data.DataLoader( dataset=train_set, batch_size=self.train_batch_size, shuffle=True) test_set = torchvision.datasets.CIFAR10( root='/mnt/disk50/datasets/cifar', train=False, download=True, transform=test_transform) self.test_loader = torch.utils.data.DataLoader( dataset=test_set, batch_size=self.test_batch_size, shuffle=False) def load_model_from_pth(self, model_path): """Load the pre-trained model weight :param model_path: :return: """ checkpoint = torch.load(model_path, map_location=self.device_name)['model'] # TODO:这里需要具体了解原因在哪里? checkpoint_parameter_name = list(checkpoint.keys())[0] model_parameter_name = next(self.model.named_parameters())[0] is_checkpoint = checkpoint_parameter_name.startswith('module.') is_model = model_parameter_name.startswith('module.') if is_checkpoint and not is_model: # 移除checkpoint模型里面参数 new_parameter_check = OrderedDict() for key, value in checkpoint.items(): if key.startswith('module.'): new_parameter_check[key[7:]] = value self.model.load_state_dict(new_parameter_check) elif not is_checkpoint and is_model: # 添加module.参数 new_parameter_dict = OrderedDict() for key, value in checkpoint.items(): if not key.startswith('module.'): key = 'module.' + key new_parameter_dict[key] = value else: self.model.load_state_dict(checkpoint) return self.model def load_model(self): if self.cuda: self.device = torch.device('cuda:0') cudnn.benchmark = True else: self.device = torch.device('cpu') # self.model = LeNet().to(self.device) self.model = AlexNet().to(self.device) self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr) self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[75, 150], gamma=0.5) self.criterion = nn.CrossEntropyLoss().to(self.device) def train(self, writer=None): print("train:") self.model.train() train_loss = 0 train_correct = 0 total = 0 for batch_num, (data, target) in enumerate(self.train_loader): data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() output = self.model(data) loss = self.criterion(output, target) loss.backward() self.optimizer.step() train_loss += loss.item() prediction = torch.max( output, 1) # second param "1" represents the dimension to be reduced total += target.size(0) # train_correct incremented by one if predicted right train_correct += np.sum( prediction[1].cpu().numpy() == target.cpu().numpy()) progress_bar( batch_num, len(self.train_loader), 'Loss: %.4f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_num + 1), 100. * train_correct / total, train_correct, total)) # if not writer: # writer.add_scalar return train_loss, train_correct / total def test(self): print("test:") self.model.eval() test_loss = 0 test_correct = 0 total = 0 start = time.time() with torch.no_grad(): for batch_num, (data, target) in enumerate(self.test_loader): data, target = data.to(self.device), target.to(self.device) output = self.model(data) loss = self.criterion(output, target) test_loss += loss.item() prediction = torch.max(output, 1) total += target.size(0) test_correct += np.sum( prediction[1].cpu().numpy() == target.cpu().numpy()) progress_bar( batch_num, len(self.test_loader), 'Loss: %.4f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_num + 1), 100. * test_correct / total, test_correct, total)) end = time.time() time_used = end - start return test_loss, test_correct / total, time_used def save(self): model_out_path = "./best_model_new.pkl" torch.save(self.model.state_dict(), model_out_path) print("Checkpoint saved to {}".format(model_out_path)) def run(self): self.load_data() self.load_model() # for k, v in self.model.state_dict(): # print('layer{}'.k) # print(v) accuracy = 0 writer = SummaryWriter() for epoch in range(1, self.epochs + 1): self.scheduler.step(epoch) print("\n===> epoch: %d/200" % epoch) train_loss, train_acc = self.train() test_loss, test_acc = self.test() # writer.add_scalar('loss_group',{'train_loss':train_loss.numpy(), # 'test_loss':test_loss.numpy()},epoch) # writer.add_scalar('acc_group',{'train_acc':train_acc.numpy(), # 'test_acc':test_acc.numpy()}, epoch) if test_acc > accuracy: accuracy = test_acc self.save() elif epoch == self.epochs: print("===> BEST ACC. PERFORMANCE: %.3f%%" % (accuracy * 100)) self.save()
test_image, test_label = test_data_iter.next() # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # print(' '.join('%5s' % class_names[test_label[j].item()] # for j in range(len(test_label)))) # imshow(utils.make_grid(test_image)) epochs = 10 model = AlexNet(num_classes=5) loss_function = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) best_acc = 0.0 for epoch in range(epochs): # Train model model.train() running_loss = 0.0 t = time.perf_counter() for index, data in enumerate(train_data_loader): imgs, labels = data outputs = model(imgs) optimizer.zero_grad() loss = loss_function(outputs, labels) running_loss += loss loss.backward() optimizer.step()
model.train() for inputs, targets in train_loader: inputs, targets = inputs.to(device), targets.to(device) optimizer.zero_grad() with amp.autocast(): outputs = model(inputs) loss = criterion(outputs, targets) scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() train_batch_loss.append(loss.item()) train_losses.append(np.mean(train_batch_loss)) return np.array(train_losses) if __name__=='__main__': model = AlexNet() scaler = amp.GradScaler() model = nn.DataParallel(model) optimizer = nn.optim.SDG(model.parameters(), lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) criterion = nn.CrossEntropy() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') train_loader = get_dataloaders(path=PATH, batch_size=BATCH_SIZE) train(model, criterion, optimizer, scaler, train_loader, device, EPOCHS)
def train(pertrained=False, resume_file=None): if pertrained: from model import alexnet net = alexnet(pretrained=True, num_classes=NUMBER_CLASSES) else: from model import AlexNet net = AlexNet(num_classes=NUMBER_CLASSES) valid_precision = 0 policies = net.parameters() optimizer = optim.SGD(policies, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) train_log = open( "logs/train_logs_{}.log".format( time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())), "w") valid_log = open( "logs/valid_logs_{}.log".format( time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())), "w") train_log.write("{}\t{}\t{}\n".format("epoch", "losses ", "correct")) valid_log.write("{}\t{}\t{}\n".format("epoch", "losses ", "correct")) # 恢复训练 if resume_file: if os.path.isfile(resume_file): print(("=> loading checkpoint '{}'".format(resume_file))) checkpoint = torch.load(resume_file) start_epoch = checkpoint['epoch'] net.load_state_dict(checkpoint['model_state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( resume_file, checkpoint['epoch']))) else: start_epoch = 0 print(("=> no checkpoint found at '{}'".format(resume_file))) # valid_precision = valid(net) for epoch in range(start_epoch, EPOCHES): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() correct = AverageMeter() end = time.time() optimizer = adjust_learning_rate(optimizer, epoch, LR, LR_steps, WEIGHT_DECAY) for i_batch, sample_batched in enumerate(train_dataloader): # measure data loading time data_time.update(time.time() - end) inputs, labels = sample_batched if CUDA_AVALIABLE: outputs = net.forward(inputs.cuda()) labels = labels.long().flatten().cuda() else: outputs = net.forward(inputs) labels = labels.long().flatten() outputs = outputs.reshape([-1, NUMBER_CLASSES]) loss = criterion(outputs, labels) # 更新统计数据 losses.update(loss.item(), inputs.size(0)) _, predicted = torch.max(outputs.data, 1) # 计算准确率 correct.update( (predicted == labels.long()).sum().item() / len(labels), inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i_batch % 10 == 0: print(('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'.format( epoch, i_batch, len(train_dataloader), batch_time=batch_time, data_time=data_time, loss=losses, top1=correct, lr=optimizer.param_groups[-1]['lr']))) train_log.write("{:5d}\t{:.5f}\t{:.5f}\n".format( epoch, losses.avg, correct.avg)) train_log.flush() if epoch % 1 == 0: valid_precision = valid(net, epoch, valid_log) # 保存网络 if (epoch > 0 and epoch % 10 == 0) or epoch == EPOCHES - 1: save_path = os.path.join( "models", "{:d}_{}_{:d}_{:d}_{:.5f}.pt".format(int(time.time()), "alexnet", epoch, BATCHSIZE, valid_precision)) print("[INFO] Save weights to " + save_path) torch.save( { 'epoch': epoch, 'model_state_dict': net.state_dict(), 'optimizer_state_dir': optimizer.state_dict, 'loss': loss }, save_path) train_log.close() valid_log.close()
# 存储 索引:标签 的字典 # 字典,类别:索引 {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx # 将 flower_list 中的 key 和 val 调换位置 cla_dict = dict((val, key) for key, val in flower_list.items()) # 将 cla_dict 写入 json 文件中 json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) #训练过程 net = AlexNet(num_classes=5, init_weights=True) # 实例化网络(输出类型为5,初始化权重) net.to(device) # 分配网络到指定的设备(GPU/CPU)训练 loss_function = nn.CrossEntropyLoss() # 交叉熵损失 optimizer = optim.Adam(net.parameters(), lr=0.0002) # 优化器(训练参数,学习率) save_path = './AlexNet.pth' best_acc = 0.0 for epoch in range(150): ########################################## train ############################################### net.train() # 训练过程中开启 Dropout running_loss = 0.0 # 每个 epoch 都会对 running_loss 清零 time_start = time.perf_counter() # 对训练一个 epoch 计时 for step, data in enumerate(train_loader, start=0): # 遍历训练集,step从0开始计算 images, labels = data # 获取训练集的图像和标签 optimizer.zero_grad() # 清除历史梯度 outputs = net(images.to(device)) # 正向传播
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), "val": transforms.Compose([ transforms.Resize((224, 224)), # cannot 224, must (224, 224) transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) } data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format( image_path) train_dataset = datasets.ImageFolder(root=os.path.join( image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 32 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join( image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=4, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format( train_num, val_num)) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() # # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_classes=5, init_weights=True) net.to(device) loss_function = nn.CrossEntropyLoss() # pata = list(net.parameters()) optimizer = optim.Adam(net.parameters(), lr=0.0002) epochs = 10 save_path = './AlexNet.pth' best_acc = 0.0 train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format( epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader) for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training')
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # torch.device规定训练中所使用的设备 print("using {} device.".format(device)) data_transform = { # data_transform数据预处理 "train": transforms.Compose([ transforms.RandomResizedCrop(224), # 随机裁剪为224*224 transforms.RandomHorizontalFlip(), # 水平方向随机翻转 transforms.ToTensor(), # 转化为tensor transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), # 标准化处理 "val": transforms.Compose([ transforms.Resize((224, 224)), # * cannot 224, must (224, 224) transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) } print(os.getcwd()) # data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path # 先获取数据集所在的根目录os.getcwd() # ^ os.getcwd() 返回当前进程的工作目录,并非当前文件所在的目录 # "../.."表示的是上两层目录,这个要看具体的情况,这是一个相对路径的写法 # ^ os.path.join 路径拼接,拼接后得到的就是当前目录的上两级目录 # ^ os.path.abspath() 获取指定文件或目录的绝对路径(完整路径) data_root = os.path.abspath(os.getcwd()) image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path # 等价于 image_path = data_root + "data_set/flower_data" # assert os.path.exists(image_path), "{} path does not exist.".format(image_path) train_dataset = datasets.ImageFolder( root=os.path.join(image_path, "train"), # 下载数据集 ,"train"表示是训练集数据 transform=data_transform["train"]) # 使用"train"的预处理方式 train_num = len(train_dataset) # 查看训练集有多少张图片 # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx # * .class_to_idx 得到分类名称对应的索引 cla_dict = dict( (val, key) for key, val in flower_list.items()) # * 将刚刚字典的键值对 变为 值键对 # write dict into json file json_str = json.dumps(cla_dict, indent=4) # 将刚刚的字典变为json形式 with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 32 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader( train_dataset, # 加载数据集 batch_size=batch_size, shuffle=True, # 通过batchsize和随机参数从样本中获取一批批数据 num_workers=nw) # wins下num_workers一般设置为0,linux下num_workers设置可以分布式计算 validate_dataset = datasets.ImageFolder( root=os.path.join( image_path, "val" ), # root=os.path.join(image_path, "val")等价于 root=image_path+"val" transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader( validate_dataset, batch_size=batch_size, shuffle=False, # batch_size=4, shuffle=True, num_workers=nw) print("using {} images for training, {} images for validation.".format( train_num, val_num)) # 下面是查看数据集的demo # 注意,第60行的batch_size=4, shuffle=True再查看: # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_classes=5, init_weights=True) # 5个类别的花数据集,初始化权重为True # 实例化模型对象 net net.to(device) # ^ net.to(device)将网络放入刚刚指定的设备中 loss_function = nn.CrossEntropyLoss() # 定义损失函数,多类别的交叉熵函数 # pata = list(net.parameters()) # 调试所用,查看模型的参数 optimizer = optim.Adam( net.parameters(), lr=0.0002) # 定义Adam优化器,优化对象是网络中所有的可训练参数net.parameters(),以及学习了lr=0.0002 epochs = 10 save_path = './AlexNet.pth' # 保存权重的路径 best_acc = 0.0 # 最佳准确率 best_acc,首先初始化为0,后面再更新 train_steps = len(train_loader) for epoch in range(epochs): # 迭代10次 # * 因为使用了dropout,只在训练中使用,预测中不使用 # train # & 训练阶段 net.train() # 调用net.train()进入训练阶段,同时使用 dropout 方法 running_loss = 0.0 # 统计训练中的平均损失 train_bar = tqdm(train_loader) # 为了统计训练一个epoch所需时间 for step, data in enumerate(train_bar): # 遍历数据集;数据集分为图像和标签 images, labels = data optimizer.zero_grad() # 梯度清0 outputs = net( images.to(device)) # 正向传播,图像放入设备中,然后实例化AlexNet的网络net中 loss = loss_function( outputs, labels.to(device)) # 计算损失,计算预测值与真实值的损失,这里label也要放入设备中 loss.backward() # 反向传播到每一个节点 optimizer.step() # 更新每一个节点的参数 # print statistics running_loss += loss.item() # 累加loss值 train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format( epoch + 1, epochs, loss) # 为了或者训练进度 # validate # & 测试阶段 net.eval() # 调用net.eval() 进入测试阶段,同时关闭 dropout 方法 acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): # * with torch.no_grad() 禁止参数跟踪:验证中不计算损失梯度 val_bar = tqdm(validate_loader) for val_data in val_bar: val_images, val_labels = val_data # 数据划分为图片和对应的标签 outputs = net( val_images.to(device)) # 放入网络net中得到输出,输出的维度是 [batch, 10] predict_y = torch.max( outputs, dim=1 )[1] # 求出输出的第1个维度(dim=1类别维度)max(只关注最大值对应的位置[1],不关心数值 ),得到预测值 predict_y acc += torch.eq(predict_y, val_labels.to( device)).sum().item() # 统计预测正确的个数 # ^ 通过.item()得到相应的数值 # acc += (predict_y == val_labels.to(device)).sum().item() # 等价的 val_accurate = acc / val_num # 累加的准确率除以样本个数,得到平均准确率 print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: # 如果当前准确率大于历史最优准确率 best_acc = val_accurate # 更新 torch.save(net.state_dict(), save_path) print('Finished Training')