def main(): print(f"Train numbers:{len(dataset)}") # first train run this line model = AlexNet().to(device) # Load model # if device == 'cuda': # model = torch.load(MODEL_PATH + MODEL_NAME).to(device) # else: # model = torch.load(MODEL_PATH + MODEL_NAME, map_location='cpu') # cast cast = torch.nn.CrossEntropyLoss().to(device) # Optimization optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-8) step = 1 for epoch in range(1, NUM_EPOCHS + 1): model.train() # cal one epoch time start = time.time() for images, labels in dataset_loader: images = images.to(device) labels = labels.to(device) # Forward pass outputs = model(images) loss = cast(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() print(f"Step [{step * BATCH_SIZE}/{NUM_EPOCHS * len(dataset)}], " f"Loss: {loss.item():.8f}.") step += 1 # cal train one epoch time end = time.time() print(f"Epoch [{epoch}/{NUM_EPOCHS}], " f"time: {end - start} sec!") # Save the model checkpoint torch.save(model, MODEL_PATH + '/' + MODEL_NAME) print(f"Model save to {MODEL_PATH + '/' + MODEL_NAME}.")
if loop == 0: # randomly select <init_samples> many samples selected = random.sample(unlabeled, init_samples) else: # select based on entropy selected = select_fn(unlabeled, n_rec) # removed selected samples from the pool of unlabeled data unlabeled = list(set(unlabeled) - set(selected)) # add selected samples to the pool of labeled data labeled.extend(selected) # train the model using labeled data model.train() for epoch in range(num_epochs): sampler = SubsetRandomSampler(labeled) dataloader = DataLoader( cifar_train, batch_size=batch_size, sampler=sampler, pin_memory=True) step = 0 for imgs, labels in dataloader: imgs, labels = imgs.to(device), labels.to(device) outputs = model(imgs) loss = loss_fn(outputs, labels) optimizer.zero_grad()
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #指定设备 print("using {} device.".format(device)) data_transform = { #数据预处理 "train": transforms.Compose([transforms.RandomResizedCrop(224),# key 为trian 返回这些方法 随机裁剪 224*224 transforms.RandomHorizontalFlip(),#随机反转 transforms.ToTensor(),#转成 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),#标准化处理 "val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224) transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])} data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "dog_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format(image_path) train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"])#数据预处理 train_num = len(train_dataset) #个数 # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx #获取名称所对应索引 cla_dict = dict((val, key) for key, val in flower_list.items()) #遍历 key value 对调 # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file:#生成json 便于打开 json_file.write(json_str) batch_size = 32 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) #加载 validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=4, shuffle=False, num_workers=nw) print("using {} images for training, {} images fot validation.".format(train_num, val_num)) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() # # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_classes=5, init_weights=True) #类别5 net.to(device) #网络设备 loss_function = nn.CrossEntropyLoss() #损失函数 # pata = list(net.parameters()) optimizer = optim.Adam(net.parameters(), lr=0.0002) #adam优化器 对象是网络中可训练参数 学习率 自己调参 save_path = './AlexNet.pth' #保存模型路径 best_acc = 0.0 for epoch in range(10):#训练 # train net.train() #管理神经元失活 running_loss = 0.0 #统计平均损失 t1 = time.perf_counter() #训练时间 for step, data in enumerate(train_loader, start=0): #遍历数据集 images, labels = data #分为图像 标签 optimizer.zero_grad() #清空梯度信息 outputs = net(images.to(device)) #正向传播 指定设备 loss = loss_function(outputs, labels.to(device)) #损失 loss.backward() #反向传播 optimizer.step() #更新结点参数 # print statistics running_loss += loss.item() #损失累加 # print train process rate = (step + 1) / len(train_loader) #打印训练进度 a = "*" * int(rate * 50) b = "." * int((1 - rate) * 50) print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="") print() print(time.perf_counter()-t1) # validate net.eval() #关闭失活 acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): for val_data in validate_loader: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] #最大就是类别 acc += (predict_y == val_labels.to(device)).sum().item() #预测与真实对比 累加 val_accurate = acc / val_num #准确率 if val_accurate > best_acc: #如果准确率大于历史最优 best_acc = val_accurate #更新 torch.save(net.state_dict(), save_path) #保存权重 print('[epoch %d] train_loss: %.3f test_accuracy: %.3f' % #打印信息 (epoch + 1, running_loss / step, val_accurate)) print('Finished Training')
def train(args): device = torch.device(f"cuda:{args.device_id}") model = AlexNet(n_cls=100, useLRN=args.useLRN, useDropOut=args.useDropOut) # model = AlexNet(num_classes= 100) criterion = nn.CrossEntropyLoss() model.to(device) optimizer = Adam(model.parameters(), lr=args.lr) train_loader, valid_loader = getLoaders(split="train", batch_size=args.batch_size, num_workers=args.num_workers, aug=args.useAug) train_loss_arr = [] valid_loss_arr = [] valid_acc_arr = [] valid_top5_arr = [] n_iter = 0 best_loss = float('inf') best_top1_acc = 0 best_top5_acc = 0 for ep in range(args.epoch): model.train() for _, (img, label) in tqdm(enumerate(train_loader), total=len(train_loader)): img, label = img.to(device), label.to(device) optimizer.zero_grad() pred = model(img) loss = criterion(pred, label) # loss = model.criterion(pred, label) loss.backward() optimizer.step() train_loss_arr.append(loss.item()) n_iter += 1 model.eval() ep_valid_loss_arr = [] ep_acc_arr = [] ep_top5_arr = [] with torch.no_grad(): for _, (img, label) in tqdm(enumerate(valid_loader), total=len(valid_loader)): img, label = img.to(device), label.to(device) pred = model(img) loss = criterion(pred, label) # loss = model.criterion(pred, label) acc = utils.top_k_acc(k=1, pred=pred.detach().cpu().numpy(), label=label.detach().cpu().numpy()) acc5 = utils.top_k_acc(k=5, pred=pred.detach().cpu().numpy(), label=label.detach().cpu().numpy()) ep_acc_arr.append(acc) ep_top5_arr.append(acc5) ep_valid_loss_arr.append(loss.item()) valid_loss = np.mean(ep_valid_loss_arr) valid_acc = np.mean(ep_acc_arr) valid_top5 = np.mean(ep_top5_arr) train_loss = np.mean(train_loss_arr[-len(train_loader):]) valid_loss_arr.append(valid_loss) if valid_loss < best_loss: best_loss = valid_loss best_top1_acc = valid_acc best_top5_acc = valid_top5 model.cpu() torch.save(model.state_dict(), "best_model.pth") model.to(device) if (ep + 1) % 10 == 0: model.cpu() torch.save( { "model": model.state_dict(), "optimizer": optimizer.state_dict(), "train_loss": train_loss_arr, "valid_loss": valid_loss_arr, "valid_acc": valid_acc_arr, "valid_top5": valid_top5_arr, "best_loss": best_loss, "ep": ep, "n_iter": n_iter, }, "model_checkpoint.pth") model.to(device) print( f"[{ep}, {n_iter}] train: {train_loss:.4f}, valid: {valid_loss:.4f}, acc: {valid_acc:.4f}, top5: {valid_top5:.4f}" ) with open("exp_result.txt", "a+") as f: f.write( f"{args}, loss: {best_loss:.4f}, top1: {best_top1_acc*100:.1f}, top5: {best_top5_acc*100:.1f}\n" )
def train(args): # the number of N way, K shot images k = args.nway * args.kshot """ TODO 1.a """ " Make your own model for Few-shot Classification in 'model.py' file." # model setting model = AlexNet() model.cuda() """ TODO 1.a END """ # pretrained model load if args.restore_ckpt is not None: state_dict = torch.load(args.restore_ckpt) model.load_state_dict(state_dict) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) if args.test_mode == 1: Test_phase(model, args, k) else: # Train data loading dataset = Dataset(args.dpath, state='train') train_sampler = Train_Sampler(dataset._labels, n_way=args.nway, k_shot=args.kshot, query=args.query) data_loader = DataLoader(dataset=dataset, batch_sampler=train_sampler, num_workers=8, pin_memory=True) # Validation data loading val_dataset = Dataset(args.dpath, state='val') val_sampler = Sampler(val_dataset._labels, n_way=args.nway, k_shot=args.kshot, query=args.query) val_data_loader = DataLoader(dataset=val_dataset, batch_sampler=val_sampler, num_workers=8, pin_memory=True) """ TODO 1.b (optional) """ " Set an optimizer or scheduler for Few-shot classification (optional) " # Default optimizer setting #optimizer = torch.optim.Adam(model.parameters(), lr=1e-5) optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4) """ TODO 1.b (optional) END """ tl = Averager() # save average loss ta = Averager() # save average accuracy # training start print('train start') train_correct = 0 train_total = 0 train_loss = 0 test_correct = 0 test_total = 0 test_loss = 0 model.train() for i in range(args.se + 1, TOTAL): for episode in data_loader: optimizer.zero_grad() data, label = [_ for _ in episode] # load an episode # split an episode images and labels into shots and query set # note! data_shot shape is ( nway * kshot, 3, h, w ) not ( kshot * nway, 3, h, w ) # Take care when reshape the data shot data_shot, data_query = data[:k], data[k:] label_shot, label_query = label[:k], label[k:] label_shot = sorted(list(set(label_shot.tolist()))) # convert labels into 0-4 values label_query = label_query.tolist() labels = [] for j in range(len(label_query)): label = label_shot.index(label_query[j]) labels.append(label) labels = torch.tensor(labels).cuda() """ TODO 2 ( Same as above TODO 2 ) """ """ Train the model Input: data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w] be careful when using torch.reshape or .view functions data_query : torch.tensor, query images, [args.query, 3, h, w] labels : torch.tensor, labels of query images, [args.query] output: loss : torch scalar tensor which used for updating your model logits : A value to measure accuracy and loss """ features_shot = model(data_shot.cuda()) n_sample = int(args.query / args.nway) features_shot_mean = torch.zeros(args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j * args.kshot end = (j + 1) * args.kshot features_shot_mean[j] = features_shot[start:end].mean(dim=0) features_query = model(data_query.cuda()) logits = square_euclidean_metric(features_query, features_shot_mean) labels_expanded = labels.view(args.query, 1, 1) labels_expanded = labels_expanded.expand(args.query, args.nway, 1) lsoft = F.log_softmax(-logits, dim=1).view(args.kshot, n_sample, -1) labels_expanded = labels_expanded.view(lsoft.size()) loss = -lsoft.gather(2, labels_expanded).squeeze().view(-1).mean() _, pred = lsoft.max(2) """ TODO 2 END """ acc = count_acc(logits, labels) tl.add(loss.item()) ta.add(acc) loss.backward() optimizer.step() proto = None logits = None loss = None if (i + 1) % PRINT_FREQ == 0: print('train {}, loss={:.4f} acc={:.4f}'.format( i + 1, tl.item(), ta.item())) # initialize loss and accuracy mean tl = None ta = None tl = Averager() ta = Averager() # validation start if (i + 1) % VAL_FREQ == 0: print('validation start') model.eval() with torch.no_grad(): vl = Averager() # save average loss va = Averager() # save average accuracy for j in range(VAL_TOTAL): for episode in val_data_loader: data, label = [_.cuda() for _ in episode] data_shot, data_query = data[:k], data[ k:] # load an episode label_shot, label_query = label[:k], label[k:] label_shot = sorted(list(set(label_shot.tolist()))) label_query = label_query.tolist() labels = [] for j in range(len(label_query)): label = label_shot.index(label_query[j]) labels.append(label) labels = torch.tensor(labels).cuda() """ TODO 2 ( Same as above TODO 2 ) """ """ Train the model Input: data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w] be careful when using torch.reshape or .view functions data_query : torch.tensor, query images, [args.query, 3, h, w] labels : torch.tensor, labels of query images, [args.query] output: loss : torch scalar tensor which used for updating your model logits : A value to measure accuracy and loss """ optimizer.zero_grad() data, label = [_.cuda() for _ in episode] # load an episode # split an episode images and labels into shots and query set # note! data_shot shape is ( nway * kshot, 3, h, w ) not ( kshot * nway, 3, h, w ) # Take care when reshape the data shot data_shot, data_query = data[:k], data[k:] label_shot, label_query = label[:k], label[k:] label_shot = sorted(list(set(label_shot.tolist()))) # convert labels into 0-4 values label_query = label_query.tolist() labels = [] for j in range(len(label_query)): label = label_shot.index(label_query[j]) labels.append(label) labels = torch.tensor(labels).cuda() """ TODO 2 ( Same as above TODO 2 ) """ """ Make a loss function and train your own model Input: data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w] be careful when using torch.reshape or .view functions (25, 3, 400, 400) data_query : torch.tensor, query images, [args.query, 3, h, w] (20, 3, 400, 400) labels : torch.tensor, labels of query images, [args.query] (20) output: loss : torch scalar tensor which used for updating your model logits : A value to measure accuracy and loss """ features_shot = model(data_shot.cuda()) n_sample = int(args.query / args.nway) features_shot_mean = torch.zeros( args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j * args.kshot end = (j + 1) * args.kshot features_shot_mean[j] = features_shot[ start:end].mean(dim=0) features_query = model(data_query.cuda()) logits = square_euclidean_metric( features_query, features_shot_mean) labels_expanded = labels.view(args.query, 1, 1) labels_expanded = labels_expanded.expand( args.query, args.nway, 1) lsoft = F.log_softmax(-logits, dim=1).view( args.kshot, n_sample, -1) labels_expanded = labels_expanded.view(lsoft.size()) loss = -lsoft.gather( 2, labels_expanded).squeeze().view(-1).mean() _, pred = lsoft.max(2) """ TODO 2 END """ acc = count_acc(logits, labels) vl.add(loss.item()) va.add(acc) proto = None logits = None loss = None print('val accuracy mean : %.4f' % va.item()) print('val loss mean : %.4f' % vl.item()) # initialize loss and accuracy mean vl = None va = None vl = Averager() va = Averager() if (i + 1) % SAVE_FREQ == 0: PATH = 'checkpoints/%d_%s.pth' % (i + 1, args.name) torch.save(model.module.state_dict(), PATH) print('model saved, iteration : %d' % i)
def main(): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print("using {} device.".format(device)) tbwriter = SummaryWriter(log_dir="./logs") data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(360), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), "val": transforms.Compose([ transforms.Resize(360, 360), # cannot 360, must (360,360) transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) } data_root = os.path.abspath(os.path.join(os.getcwd(), "./DATA")) # get data root path image_path = os.path.join(data_root, "male") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format( image_path) train_dataset = datasets.ImageFolder(root=os.path.join( image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=2) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 8 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join( image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=8, shuffle=True, num_workers=nw) print("using {} images for training, {} images fot validation.".format( train_num, val_num)) if os.path.exists("./log360.pth"): net = AlexNet() #net.load_state_dict(torch.load("./log360.pth", map_location='cuda:2')) net = torch.load("./log360.pth", 'cpu') print("continue training") else: net = AlexNet(num_classes=3, init_weights=True) net.to(device) print("start training anew") loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.0001) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.98) epochs = 2000 save_path = './AlexNet.pth' best_acc = 0.0 train_steps = len(train_loader) #json_path = './class_indices.json' #json_file = open(json_path, "r") #class_indict = json.load(json_file) #model = AlexNet(num_classed=6).to(device) trainLOSS = [] #save loss testLOSS = [] #save loss valACC = [] #save val acc for epoch in range(epochs): scheduler.step() print('LR:{}'.format(scheduler.get_lr()[0])) # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format( epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader, colour='green') for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num tbwriter.add_scalar('train/loss', running_loss / train_steps, epoch) tbwriter.add_scalar('val/acc', val_accurate, epoch) trainLOSS.append(running_loss / train_steps) valACC.append(val_accurate) print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) print(' ') if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) #predict #weights_path="./AlexNet.pth" #model.load_state_dict(torch.load(weights_path)) #model.eval() #with torch.no_grad(): # putput = torch.squeeze(model(img.to(device))).cpu() # predict = torch.softmax(output, dim=0) # predict_cla = torch.argmax(predict.numpy) npLOSS = np.array(trainLOSS) npVALACC = np.array(valACC) np.save('./save/loss_epoch_{}'.format(epoch), npLOSS) np.save('./save/valacc_epoch_{}'.format(epoch), npVALACC) print('Finished Training')
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), "val": transforms.Compose([ transforms.Resize((224, 224)), # cannot 224, must (224, 224) transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) } data_root = os.path.abspath(os.path.join(os.getcwd(), "./")) image_path = os.path.join(data_root, "flower_data") train_dataset = datasets.ImageFolder(root=image_path + "/train", transform=data_transform['train']) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 8 train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0) validate_dataset = datasets.ImageFolder(root=image_path + "/val", transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=4, shuffle=True, num_workers=0) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() # # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_class=5) print(net) net.to(device) loss_function = nn.CrossEntropyLoss() # pata = list(net.parameters()) optimizer = optim.Adam(net.parameters(), lr=0.0002) save_path = './AlexNet.pth' best_acc = 0.0 for epoch in range(10): # train net.train() running_loss = 0.0 t1 = time.perf_counter() for step, data in enumerate(train_loader, start=0): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() # print train process rate = (step + 1) / len(train_loader) a = "*" * int(rate * 50) b = "." * int((1 - rate) * 50) print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format( int(rate * 100), a, b, loss), end="") print() print(time.perf_counter() - t1) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch # 验证过程中不计算损失梯度 with torch.no_grad(): for val_data in validate_loader: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += (predict_y == val_labels.to(device)).sum().item() val_accurate = acc / val_num if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('[epoch %d] train_loss: %.3f test_accuracy: %.3f' % (epoch + 1, running_loss / step, val_accurate)) print('Finished Training')
# plt.show() # # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_classes=5, init_weights=True) # 构造一个新模型 #net.load_state_dict(torch.load("./AlexNet.pth")) net.to(device) # 将数据迁移到GPU(如果有的话) loss_function = nn.CrossEntropyLoss() # 损失函数 #pata = list(net.parameters()) optimizer = optim.Adam(net.parameters(), lr=0.0002) # 梯度下降优化器 save_path = './AlexNet.pth' best_acc = 0.0 # 历史最高准确率(每一个epoch得到的模型都有一个新的准确率) for epoch in range(10): net.train() # 训练模式开启 running_loss = 0.0 t1 = time.perf_counter() for step, data in enumerate(train_loader, start=0): images, labels = data optimizer.zero_grad() # 各参数梯度置零,否则梯度会累加 outputs = net(images.to(device)) # 将数据正向过一遍网络,得到一个输出 loss = loss_function(outputs, labels.to(device)) # 计算输出值和期望值的某种误差 loss.backward() # 计算 loss函数 对于各权重参数的偏导数,存入各参数的梯度属性 optimizer.step() # 梯度下降法更新各参数 running_loss += loss.item() rate = (step + 1) / len(train_loader) # 本epoch的进度 a = "*" * int(rate * 50) b = "." * int((1 - rate) * 50) print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(
class Solver(object): def __init__(self, config): self.model = None self.lr = config.lr self.epochs = config.epoch self.train_batch_size = config.trainBatchSize self.test_batch_size = config.testBatchSize self.criterion = None self.optimizer = None self.scheduler = None self.device = None self.cuda = config.cuda self.train_loader = None self.test_loader = None self.is_board = False def load_data(self): train_transform = transforms.Compose( [transforms.RandomHorizontalFlip(), transforms.ToTensor()]) test_transform = transforms.Compose([transforms.ToTensor()]) train_set = torchvision.datasets.CIFAR10( root='/mnt/disk50/datasets/cifar', train=True, download=True, transform=train_transform) self.train_loader = torch.utils.data.DataLoader( dataset=train_set, batch_size=self.train_batch_size, shuffle=True) test_set = torchvision.datasets.CIFAR10( root='/mnt/disk50/datasets/cifar', train=False, download=True, transform=test_transform) self.test_loader = torch.utils.data.DataLoader( dataset=test_set, batch_size=self.test_batch_size, shuffle=False) def load_model_from_pth(self, model_path): """Load the pre-trained model weight :param model_path: :return: """ checkpoint = torch.load(model_path, map_location=self.device_name)['model'] # TODO:这里需要具体了解原因在哪里? checkpoint_parameter_name = list(checkpoint.keys())[0] model_parameter_name = next(self.model.named_parameters())[0] is_checkpoint = checkpoint_parameter_name.startswith('module.') is_model = model_parameter_name.startswith('module.') if is_checkpoint and not is_model: # 移除checkpoint模型里面参数 new_parameter_check = OrderedDict() for key, value in checkpoint.items(): if key.startswith('module.'): new_parameter_check[key[7:]] = value self.model.load_state_dict(new_parameter_check) elif not is_checkpoint and is_model: # 添加module.参数 new_parameter_dict = OrderedDict() for key, value in checkpoint.items(): if not key.startswith('module.'): key = 'module.' + key new_parameter_dict[key] = value else: self.model.load_state_dict(checkpoint) return self.model def load_model(self): if self.cuda: self.device = torch.device('cuda:0') cudnn.benchmark = True else: self.device = torch.device('cpu') # self.model = LeNet().to(self.device) self.model = AlexNet().to(self.device) self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr) self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[75, 150], gamma=0.5) self.criterion = nn.CrossEntropyLoss().to(self.device) def train(self, writer=None): print("train:") self.model.train() train_loss = 0 train_correct = 0 total = 0 for batch_num, (data, target) in enumerate(self.train_loader): data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() output = self.model(data) loss = self.criterion(output, target) loss.backward() self.optimizer.step() train_loss += loss.item() prediction = torch.max( output, 1) # second param "1" represents the dimension to be reduced total += target.size(0) # train_correct incremented by one if predicted right train_correct += np.sum( prediction[1].cpu().numpy() == target.cpu().numpy()) progress_bar( batch_num, len(self.train_loader), 'Loss: %.4f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_num + 1), 100. * train_correct / total, train_correct, total)) # if not writer: # writer.add_scalar return train_loss, train_correct / total def test(self): print("test:") self.model.eval() test_loss = 0 test_correct = 0 total = 0 start = time.time() with torch.no_grad(): for batch_num, (data, target) in enumerate(self.test_loader): data, target = data.to(self.device), target.to(self.device) output = self.model(data) loss = self.criterion(output, target) test_loss += loss.item() prediction = torch.max(output, 1) total += target.size(0) test_correct += np.sum( prediction[1].cpu().numpy() == target.cpu().numpy()) progress_bar( batch_num, len(self.test_loader), 'Loss: %.4f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_num + 1), 100. * test_correct / total, test_correct, total)) end = time.time() time_used = end - start return test_loss, test_correct / total, time_used def save(self): model_out_path = "./best_model_new.pkl" torch.save(self.model.state_dict(), model_out_path) print("Checkpoint saved to {}".format(model_out_path)) def run(self): self.load_data() self.load_model() # for k, v in self.model.state_dict(): # print('layer{}'.k) # print(v) accuracy = 0 writer = SummaryWriter() for epoch in range(1, self.epochs + 1): self.scheduler.step(epoch) print("\n===> epoch: %d/200" % epoch) train_loss, train_acc = self.train() test_loss, test_acc = self.test() # writer.add_scalar('loss_group',{'train_loss':train_loss.numpy(), # 'test_loss':test_loss.numpy()},epoch) # writer.add_scalar('acc_group',{'train_acc':train_acc.numpy(), # 'test_acc':test_acc.numpy()}, epoch) if test_acc > accuracy: accuracy = test_acc self.save() elif epoch == self.epochs: print("===> BEST ACC. PERFORMANCE: %.3f%%" % (accuracy * 100)) self.save()
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) batch_size = 16 epochs = 20 data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), "val": transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } data_root = os.path.abspath(os.path.join(os.getcwd(), ".")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format( image_path) train_dataset = datasets.ImageFolder(root=os.path.join( image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join( image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format( train_num, val_num)) # create model net = AlexNet(num_classes=5) net.to(device) # define loss function loss_function = nn.CrossEntropyLoss() # construct an optimizer params = [p for p in net.parameters() if p.requires_grad] optimizer = optim.Adam(params, lr=0.0001) best_acc = 0.0 save_path = 'weights/alexnet.pth' train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() logits = net(images.to(device)) loss = loss_function(logits, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format( epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader) for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) # loss = loss_function(outputs, test_labels) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1, epochs) val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training')
net = AlexNet(num_classes=5, init_weights=True) net.to(device) loss_function = nn.CrossEntropyLoss() # pata = list(net.parameters()) # lr: learning_rate optimizer = optim.Adam(net.parameters(), lr=0.0002) save_path = r'D:\Document\GitHub\deep-learning-for-image-processing\pytorch_classification\Test2_alexnet\AlexNet.pth' best_acc = 0.0 epo = 20 for epoch in range(epo): # train net.train() running_loss = 0.0 t1 = time.perf_counter() #从train_loader,加载一个batch for step, data in enumerate(train_loader, start=0): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() # print train process rate = (step + 1) / len(train_loader)
14: [['solarize', 0.5, 2], ['invert', 0, 0.3]], 15: [['equalize', 0.2, 0], ['autocontrast', 0.6, 0]], 16: [['equalize', 0.2, 8], ['equalize', 0.6, 4]], 17: [['color', 0.9, 9], ['equalize', 0.6, 6]], 18: [['autocontrast', 0.8, 4], ['solarize', 0.2, 8]], 19: [['brightness', 0.1, 3], ['color', 0.7, 0]], 20: [['solarize', 0.4, 5], ['autocontrast', 0.9, 3]], 21: [['translatey', 0.9, 9], ['translatey', 0.7, 9]], 22: [['autocontrast', 0.9, 2], ['solarize', 0.8, 3]], 23: [['equalize', 0.8, 8], ['invert', 0.1, 3]], 24: [['translatey', 0.7, 9], ['autocontrast', 0.9, 1]] } # 开始进行训练和测试,训练一轮,测试一轮 for epoch in range(60): # train net.train() # 训练过程中,使用之前定义网络中的dropout running_loss = 0.0 t1 = time.perf_counter() # preprocess all pics if not os.path.exists(image_path_cifar + "/train" + str(epoch)): os.mkdir(image_path_cifar + "/train" + str(epoch)) for name in os.listdir(image_path_cifar + "/train"): for file in os.listdir(image_path_cifar + "/train/" + name): jpg = Image.open(image_path_cifar + "/train/" + name + '/' + file) operate_id = random.randint(0, 24) operation = operate_dict[operate_id] jpg = locals()[operation[0][0]](jpg, operation[0][1], operation[0][2]) jpg = locals()[operation[1][0]](jpg, operation[1][1], operation[1][2])
def main(): # viz = Visdom() # viz.line([0.], [0.], win='train_loss', opts=dict(title='train loss')) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]), "val": transforms.Compose([ transforms.Resize((224, 224)), # cannot 224, must (224, 224) transforms.ToTensor() ]) } data_root = "/home/zhongsy/datasets/dataset/" # get data root path train_dataset = datasets.ImageFolder(root=os.path.join(data_root, "train"), transform=data_transform["train"]) # print(train_dataset.imgs) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 1 # number of workers nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join( data_root, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=4, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format( train_num, val_num)) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() # # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_classes=2, init_weights=True) net.to(device) loss_function = nn.CrossEntropyLoss() # pata = list(net.parameters()) optimizer = optim.Adam(net.parameters(), lr=0.0001) epochs = 30 save_path = './AlexNet.pt' best_acc = 0.0 train_steps = len(train_loader) global_step = 0 for epoch in range(epochs): # train epochloss = 100000 net.train() running_loss = 0.0 train_bar = tqdm(train_loader) for step, data in enumerate(train_bar): images, labels = data # print("label: ", labels, labels.dtype) optimizer.zero_grad() outputs = net(images.to(device)) # print("imges: ", images, images.dtype) # outputs_ = outputs.squeeze() # print("output__ : ", outputs_) # outputs_ = outputs.to(torch.float) loss = loss_function(outputs, labels.to(device)) # loss = loss.to(torch.float) if epochloss > loss: epochloss = loss loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format( epoch + 1, epochs, loss) # viz.line([epochloss.cpu().detach().numpy()], [global_step], # win='train_loss', update='append') global_step += 1 print("[ start val ]") # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader) for val_data in val_bar: val_images, val_labels = val_data val_labels.unsqueeze(1) outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] # print("prect ;", predict_y) # outputs = outputs.squeeze() # print("out_puts: ", outputs) # a = torch.gt(outputs, 0.5) # print("a ", a) # for i, (data, label_) in enumerate(zip(outputs, val_labels)): # if abs(data-label_) <= 0.5: # acc += 1 # viz.images(val_images.view(-1, 3, 224, 224), win='x') # viz.text(str(predict_y.detach().cpu().numpy()), # win='pred', opts=dict(title='pred')) acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net, save_path) print('Finished Training')
def main(): # 设置运行设备 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) # 数据处理 data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), "val": transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) } # 存放train与val的路径 image_path = '/home/xulei/数据集大本营/5_flower_data/flower_data' # flower data root path # 若该目录不存在,在报错并终止程序 assert os.path.exists(image_path), "{} path does not exist.".format( image_path) # 定义训练数据集 train_dataset = datasets.ImageFolder(root=os.path.join( image_path, "train"), transform=data_transform["train"]) # 训练数据集的文件数量 train_num = len(train_dataset) # flower_list: {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx # cla_dict : {0: 'daisy', 1: 'dandelion', 2: 'roses', 3: 'sunflowers', 4: 'tulips'} cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file # 要输出json格式,需要对json数据进行编码,要用到函数:json.dumps # indent=4, 的作用是让字典的内容逐行显示,每个key占一行 # json_str : # '{ # "0": "daisy", # "1": "dandelion", # "2": "roses", # "3": "sunflowers", # "4": "tulips" # }' json_str = json.dumps(cla_dict, indent=4) with open('class_idices.json', 'w') as json_file: json_file.write(json_str) batch_size = 128 nw = min(os.cpu_count(), batch_size if batch_size > 1 else 0, 8) # number of workers nw: 8 ????? print("using {} dataloader workers every process".format(nw)) train_loader = datas.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join( image_path, "val"), transform=data_transform["val"]) # val_num: 364 val_num = len(validate_dataset) validate_loader = datas.DataLoader(validate_dataset, batch_size, shuffle=False, num_workers=nw) print("using {} images for trainning, {} images for validation.".format( train_num, val_num)) net = AlexNet(num_classes=5).to(device) loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.00004) epoches = 20 save_path = './AlexNet.pth' best_acc = 0.0 # train_steps : 26 len(train_loader)= training_images_num/batch_size train_steps = len(train_loader) for epoch in range(epoches): net.train() running_loss = 0.0 train_bar = tqdm(train_loader) # 进度条 for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format( epoch + 1, epoches, loss) # validata net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader) # , colour='green' for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num print('\n[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print("Finshed Training")
def ceal_learning_algorithm(du: DataLoader, dl: DataLoader, dtest: DataLoader, k: int = 1000, delta_0: float = 0.005, dr: float = 0.00033, t: int = 1, epochs: int = 10, criteria: str = 'cl', max_iter: int = 45): """ Algorithm1 : Learning algorithm of CEAL. For simplicity, I used the same notation in the paper. Parameters ---------- du: DataLoader Unlabeled samples dl : DataLoader labeled samples dtest : DataLoader test data k: int, (default = 1000) uncertain samples selection delta_0: float hight confidence samples selection threshold dr: float threshold decay t: int fine-tuning interval epochs: int criteria: str max_iter: int maximum iteration number. Returns ------- """ logger.info('Initial configuration: len(du): {}, len(dl): {} '.format( len(du.sampler.indices), len(dl.sampler.indices))) # Create the model model = AlexNet(n_classes=256, device=None) # Initialize the model logger.info('Intialize training the model on `dl` and test on `dtest`') model.train(epochs=epochs, train_loader=dl, valid_loader=None) # Evaluate model on dtest acc = model.evaluate(test_loader=dtest) print('====> Initial accuracy: {} '.format(acc)) for iteration in range(max_iter): logger.info('Iteration: {}: run prediction on unlabeled data ' '`du` '.format(iteration)) pred_prob = model.predict(test_loader=du) # get k uncertain samples uncert_samp_idx, _ = get_uncertain_samples(pred_prob=pred_prob, k=k, criteria=criteria) # get original indices uncert_samp_idx = [du.sampler.indices[idx] for idx in uncert_samp_idx] # add the uncertain samples selected from `du` to the labeled samples # set `dl` dl.sampler.indices.extend(uncert_samp_idx) logger.info( 'Update size of `dl` and `du` by adding uncertain {} samples' ' in `dl`' ' len(dl): {}, len(du) {}'.format(len(uncert_samp_idx), len(dl.sampler.indices), len(du.sampler.indices))) # get high confidence samples `dh` hcs_idx, hcs_labels = get_high_confidence_samples(pred_prob=pred_prob, delta=delta_0) # get the original indices hcs_idx = [du.sampler.indices[idx] for idx in hcs_idx] # remove the samples that already selected as uncertain samples. hcs_idx = [ x for x in hcs_idx if x not in list(set(uncert_samp_idx) & set(hcs_idx)) ] # add high confidence samples to the labeled set 'dl' # (1) update the indices dl.sampler.indices.extend(hcs_idx) # (2) update the original labels with the pseudo labels. for idx in range(len(hcs_idx)): dl.dataset.labels[hcs_idx[idx]] = hcs_labels[idx] logger.info( 'Update size of `dl` and `du` by adding {} hcs samples in `dl`' ' len(dl): {}, len(du) {}'.format(len(hcs_idx), len(dl.sampler.indices), len(du.sampler.indices))) if iteration % t == 0: logger.info('Iteration: {} fine-tune the model on dh U dl'.format( iteration)) model.train(epochs=epochs, train_loader=dl) # update delta_0 delta_0 = update_threshold(delta=delta_0, dr=dr, t=iteration) # remove the uncertain samples from the original `du` logger.info('remove {} uncertain samples from du'.format( len(uncert_samp_idx))) for val in uncert_samp_idx: du.sampler.indices.remove(val) acc = model.evaluate(test_loader=dtest) print("Iteration: {}, len(dl): {}, len(du): {}," " len(dh) {}, acc: {} ".format(iteration, len(dl.sampler.indices), len(du.sampler.indices), len(hcs_idx), acc))
json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) #训练过程 net = AlexNet(num_classes=5, init_weights=True) # 实例化网络(输出类型为5,初始化权重) net.to(device) # 分配网络到指定的设备(GPU/CPU)训练 loss_function = nn.CrossEntropyLoss() # 交叉熵损失 optimizer = optim.Adam(net.parameters(), lr=0.0002) # 优化器(训练参数,学习率) save_path = './AlexNet.pth' best_acc = 0.0 for epoch in range(150): ########################################## train ############################################### net.train() # 训练过程中开启 Dropout running_loss = 0.0 # 每个 epoch 都会对 running_loss 清零 time_start = time.perf_counter() # 对训练一个 epoch 计时 for step, data in enumerate(train_loader, start=0): # 遍历训练集,step从0开始计算 images, labels = data # 获取训练集的图像和标签 optimizer.zero_grad() # 清除历史梯度 outputs = net(images.to(device)) # 正向传播 loss = loss_function(outputs, labels.to(device)) # 计算损失 loss.backward() # 反向传播 optimizer.step() # 优化器更新参数 running_loss += loss.item() # 打印训练进度(使训练过程可视化) rate = (step + 1) / len(
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), "val": transforms.Compose([ transforms.Resize((224, 224)), # cannot 224, must (224, 224) transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) } data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format( image_path) train_dataset = datasets.ImageFolder(root=os.path.join( image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 32 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join( image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=4, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format( train_num, val_num)) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() # # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_classes=5, init_weights=True) net.to(device) loss_function = nn.CrossEntropyLoss() # pata = list(net.parameters()) optimizer = optim.Adam(net.parameters(), lr=0.0002) epochs = 10 save_path = './AlexNet.pth' best_acc = 0.0 train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format( epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader) for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training')
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # torch.device规定训练中所使用的设备 print("using {} device.".format(device)) data_transform = { # data_transform数据预处理 "train": transforms.Compose([ transforms.RandomResizedCrop(224), # 随机裁剪为224*224 transforms.RandomHorizontalFlip(), # 水平方向随机翻转 transforms.ToTensor(), # 转化为tensor transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), # 标准化处理 "val": transforms.Compose([ transforms.Resize((224, 224)), # * cannot 224, must (224, 224) transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) } print(os.getcwd()) # data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path # 先获取数据集所在的根目录os.getcwd() # ^ os.getcwd() 返回当前进程的工作目录,并非当前文件所在的目录 # "../.."表示的是上两层目录,这个要看具体的情况,这是一个相对路径的写法 # ^ os.path.join 路径拼接,拼接后得到的就是当前目录的上两级目录 # ^ os.path.abspath() 获取指定文件或目录的绝对路径(完整路径) data_root = os.path.abspath(os.getcwd()) image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path # 等价于 image_path = data_root + "data_set/flower_data" # assert os.path.exists(image_path), "{} path does not exist.".format(image_path) train_dataset = datasets.ImageFolder( root=os.path.join(image_path, "train"), # 下载数据集 ,"train"表示是训练集数据 transform=data_transform["train"]) # 使用"train"的预处理方式 train_num = len(train_dataset) # 查看训练集有多少张图片 # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx # * .class_to_idx 得到分类名称对应的索引 cla_dict = dict( (val, key) for key, val in flower_list.items()) # * 将刚刚字典的键值对 变为 值键对 # write dict into json file json_str = json.dumps(cla_dict, indent=4) # 将刚刚的字典变为json形式 with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 32 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader( train_dataset, # 加载数据集 batch_size=batch_size, shuffle=True, # 通过batchsize和随机参数从样本中获取一批批数据 num_workers=nw) # wins下num_workers一般设置为0,linux下num_workers设置可以分布式计算 validate_dataset = datasets.ImageFolder( root=os.path.join( image_path, "val" ), # root=os.path.join(image_path, "val")等价于 root=image_path+"val" transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader( validate_dataset, batch_size=batch_size, shuffle=False, # batch_size=4, shuffle=True, num_workers=nw) print("using {} images for training, {} images for validation.".format( train_num, val_num)) # 下面是查看数据集的demo # 注意,第60行的batch_size=4, shuffle=True再查看: # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_classes=5, init_weights=True) # 5个类别的花数据集,初始化权重为True # 实例化模型对象 net net.to(device) # ^ net.to(device)将网络放入刚刚指定的设备中 loss_function = nn.CrossEntropyLoss() # 定义损失函数,多类别的交叉熵函数 # pata = list(net.parameters()) # 调试所用,查看模型的参数 optimizer = optim.Adam( net.parameters(), lr=0.0002) # 定义Adam优化器,优化对象是网络中所有的可训练参数net.parameters(),以及学习了lr=0.0002 epochs = 10 save_path = './AlexNet.pth' # 保存权重的路径 best_acc = 0.0 # 最佳准确率 best_acc,首先初始化为0,后面再更新 train_steps = len(train_loader) for epoch in range(epochs): # 迭代10次 # * 因为使用了dropout,只在训练中使用,预测中不使用 # train # & 训练阶段 net.train() # 调用net.train()进入训练阶段,同时使用 dropout 方法 running_loss = 0.0 # 统计训练中的平均损失 train_bar = tqdm(train_loader) # 为了统计训练一个epoch所需时间 for step, data in enumerate(train_bar): # 遍历数据集;数据集分为图像和标签 images, labels = data optimizer.zero_grad() # 梯度清0 outputs = net( images.to(device)) # 正向传播,图像放入设备中,然后实例化AlexNet的网络net中 loss = loss_function( outputs, labels.to(device)) # 计算损失,计算预测值与真实值的损失,这里label也要放入设备中 loss.backward() # 反向传播到每一个节点 optimizer.step() # 更新每一个节点的参数 # print statistics running_loss += loss.item() # 累加loss值 train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format( epoch + 1, epochs, loss) # 为了或者训练进度 # validate # & 测试阶段 net.eval() # 调用net.eval() 进入测试阶段,同时关闭 dropout 方法 acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): # * with torch.no_grad() 禁止参数跟踪:验证中不计算损失梯度 val_bar = tqdm(validate_loader) for val_data in val_bar: val_images, val_labels = val_data # 数据划分为图片和对应的标签 outputs = net( val_images.to(device)) # 放入网络net中得到输出,输出的维度是 [batch, 10] predict_y = torch.max( outputs, dim=1 )[1] # 求出输出的第1个维度(dim=1类别维度)max(只关注最大值对应的位置[1],不关心数值 ),得到预测值 predict_y acc += torch.eq(predict_y, val_labels.to( device)).sum().item() # 统计预测正确的个数 # ^ 通过.item()得到相应的数值 # acc += (predict_y == val_labels.to(device)).sum().item() # 等价的 val_accurate = acc / val_num # 累加的准确率除以样本个数,得到平均准确率 print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: # 如果当前准确率大于历史最优准确率 best_acc = val_accurate # 更新 torch.save(net.state_dict(), save_path) print('Finished Training')