for epoch in range(2): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs inputs, labels = data # wrap them in Variable inputs, labels = Variable(inputs), Variable(labels) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.data[0] if i % 2000 == 1999: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 print('Finished Training') # save model torch.save(net.state_dict(), MODEL_PATH)
class Trainer(object): def __init__(self, args): self.args = args self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.prepare_data() self.setup_train() def prepare_data(self): train_val = MnistDataset( self.args.train_image_file, self.args.train_label_file, transform=transforms.Compose([ToTensor()]), ) train_len = int(0.8 * len(train_val)) train_ds, val_ds = torch.utils.data.random_split( train_val, [train_len, len(train_val) - train_len] ) print("Train {}, val {}".format(len(train_ds), len(val_ds))) self.train_loader = torch.utils.data.DataLoader( train_ds, batch_size=self.args.batch_size, collate_fn=collate_fn, shuffle=True, ) self.val_loader = torch.utils.data.DataLoader( val_ds, batch_size=self.args.batch_size, collate_fn=collate_fn, shuffle=False, ) def setup_train(self): self.model = Net().to(self.device) self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.args.lr) self.criterion = nn.CrossEntropyLoss().to(self.device) if not os.path.isdir(self.args.ckpt): os.mkdir(self.args.ckpt) def train_one_epoch(self): train_loss = 0.0 self.model.train() for i, sample in enumerate(self.train_loader): X, Y_true = sample["X"].to(self.device), sample["Y"].to(self.device) self.optimizer.zero_grad() output = self.model(X) loss = self.criterion(output, Y_true) loss.backward() self.optimizer.step() train_loss += loss.item() return train_loss / len(self.train_loader) def evaluate(self): val_loss = 0.0 self.model.eval() predicts = [] truths = [] with torch.no_grad(): for i, sample in enumerate(self.val_loader): X, Y_true = sample["X"].to(self.device), sample["Y"].to(self.device) output = self.model(X) loss = self.criterion(output, Y_true) val_loss += loss.item() predicts.append(torch.argmax(output, dim=1)) truths.append(Y_true) predicts = torch.cat(predicts, dim=0) truths = torch.cat(truths, dim=0) acc = torch.sum(torch.eq(predicts, truths)) return acc / len(predicts), val_loss / (len(self.val_loader)) def run(self): min_loss = 10e4 max_acc = 0 for epoch in range(self.args.epochs): train_loss = self.train_one_epoch() val_acc, val_loss = self.evaluate() if val_acc > max_acc: max_acc = val_acc torch.save( self.model.state_dict(), os.path.join( self.args.ckpt, "{}_{}_{:.4f}.pth".format(self.args.name, epoch, max_acc), ), ) print( "Epoch {}, loss {:.4f}, val_acc {:.4f}".format( epoch, train_loss, val_acc ) )
eval_loss += loss.item() eval_pred = torch.max(predictions, 1)[1] num_correct = (eval_pred == targets).sum() eval_acc += num_correct.item() eval_acc_list.append(eval_acc) if eval_acc == max(eval_acc_list): is_best = True best_pred = eval_acc / (len(data['valid'])) print('The best best_predAcc is {:.6f}, epoch {}'.format(best_pred, epoch + 1)) f.write('The best best_predAcc is {:.6f}, epoch {}'.format(best_pred, epoch + 1) + '\n') else: is_best = False # 保存模型 torch.save(net.state_dict(), os.path.join(save_model_path, 'model-20200904-2.pth.tar')) # 如果是best,则复制最优模型 if is_best: shutil.copyfile(os.path.join(save_model_path, 'model-20200904-2.pth.tar'), os.path.join(save_model_path, 'best_model-20200904-2.pth.tar')) # 输出日志信息 print('epoch {} trainLoss {:.6f} trainAcc {:.6f} validLoss {:.6f} validAcc {:.6f}'.format( epoch + 1, train_loss / (len(data['train'])), train_acc / (len(data['train'])), eval_loss / (len( data['valid'])), eval_acc / (len(data['valid'])))) f.write('epoch {} trainLoss {:.6f} trainAcc {:.6f} validLoss {:.6f} validAcc {:.6f}'.format( epoch + 1, train_loss / (len(data['train'])), train_acc / (len(data['train'])), eval_loss / (len( data['valid'])), eval_acc / (len(data['valid']))) + '\n') end = time.time() f.write(end + '\n')
num_correct = (eval_pred == targets).sum() eval_acc += num_correct.item() eval_acc_list.append(eval_acc) if eval_acc == max(eval_acc_list): is_best = True best_pred = eval_acc / (len(data['valid'])) print('The best best_predAcc is {:.6f}, epoch {}'.format( best_pred, epoch + 1)) f.write('The best best_predAcc is {:.6f}, epoch {}'.format( best_pred, epoch + 1) + '\n') else: is_best = False # 保存模型 torch.save(model.state_dict(), os.path.join(save_model_path, '8-24.pth.tar')) # 如果是best,则复制最优模型 if is_best: shutil.copyfile(os.path.join(save_model_path, '8-24.pth.tar'), os.path.join(save_model_path, 'best-8-24.pth.tar')) # 输出日志信息 print( 'epoch {} trainLoss {:.6f} trainAcc {:.6f} validLoss {:.6f} validAcc {:.6f}' .format(epoch + 1, train_loss / (len(data['train'])), train_acc / (len(data['train'])), eval_loss / (len(data['valid'])), eval_acc / (len(data['valid'])))) f.write( 'epoch {} trainLoss {:.6f} trainAcc {:.6f} validLoss {:.6f} validAcc {:.6f}'