output_prob = F.softmax(output, dim=1) predict_vector = np.argmax(to_np(output_prob), axis=1) label_vector = to_np(tags) bool_vector = predict_vector == label_vector accuracy = bool_vector.sum() / len(bool_vector) if batch_idx % args.log_interval == 0: print( 'Batch {} / {}: Batch Loss {:2.4f} / Batch Acc {:2.4f}' .format(batch_idx, len(dataloader), loss.item(), accuracy)) total_loss += loss.item() total_correct += bool_vector.sum() ## validation model.eval() for batch_idx, (image, tags) in enumerate(valid_dataloader): image = image.to(device) tags = tags.to(device) output = model(image).double() loss = criterion(output, tags) output_prob = F.softmax(output, dim=1) predict_vector = np.argmax(to_np(output_prob), axis=1) label_vector = to_np(tags) bool_vector = predict_vector == label_vector accuracy = bool_vector.sum() / len(bool_vector) total_valid_loss += loss.item() total_valid_correct += bool_vector.sum() nsml.save(epoch_idx) print(
class Trainer: def __init__(self, total_cls): self.total_cls = total_cls self.seen_cls = 0 self.dataset = Cifar100() self.model = Resnet(32, total_cls).cuda() print(self.model) self.input_transform = Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), ToTensor(), Normalize([0.5071, 0.4866, 0.4409], [0.2673, 0.2564, 0.2762]) ]) self.input_transform_eval = Compose([ ToTensor(), Normalize([0.5071, 0.4866, 0.4409], [0.2673, 0.2564, 0.2762]) ]) total_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad) print("Solver total trainable parameters : ", total_params) print("---------------------------------------------") def eval(self, valdata): self.model.eval() count = 0 correct = 0 wrong = 0 for i, (image, label) in enumerate(valdata): image = image.cuda() label = label.view(-1).cuda() p = self.model(image) pred = p[:, :self.seen_cls].argmax(dim=-1) correct += sum(pred == label).item() wrong += sum(pred != label).item() acc = correct / (wrong + correct) print("Val Acc: {}".format(acc * 100)) self.model.train() print("---------------------------------------------") return acc # Get learning rate def get_lr(self, optimizer): for param_group in optimizer.param_groups: return param_group['lr'] def train(self, batch_size, epoches, lr, max_size, is_WA): total_cls = self.total_cls criterion = nn.CrossEntropyLoss() # Used for Knowledge Distill previous_model = None dataset = self.dataset val_xs = [] val_ys = [] train_xs = [] train_ys = [] test_accs = [] for step_b in range(dataset.batch_num): print(f"Incremental step : {step_b + 1}") # Get the train and val data for step b, # and split them into train_x, train_y, val_x, val_y train, val = dataset.getNextClasses(step_b) print( f'number of trainset: {len(train)}, number of valset: {len(val)}' ) train_x, train_y = zip(*train) val_x, val_y = zip(*val) val_xs.extend(val_x) val_ys.extend(val_y) train_xs.extend(train_x) train_ys.extend(train_y) # Transform data and prepare dataloader train_data = DataLoader(BatchData(train_xs, train_ys, self.input_transform), batch_size=batch_size, shuffle=True, drop_last=True) val_data = DataLoader(BatchData(val_xs, val_ys, self.input_transform_eval), batch_size=batch_size, shuffle=False) # Set optimizer and scheduler optimizer = optim.SGD(self.model.parameters(), lr=lr, momentum=0.9, weight_decay=2e-4) scheduler = MultiStepLR(optimizer, [100, 150, 200], gamma=0.1) # Print the number of classes have been trained self.seen_cls += total_cls // dataset.batch_num print("seen classes : ", self.seen_cls) test_acc = [] for epoch in range(epoches): print("---------------------------------------------") print("Epoch", epoch) # Print current learning rate scheduler.step() cur_lr = self.get_lr(optimizer) print("Current Learning Rate : ", cur_lr) # Train the model with KD self.model.train() if step_b >= 1: self.stage1_distill(train_data, criterion, optimizer) else: self.stage1(train_data, criterion, optimizer) # Evaluation acc = self.eval(val_data) if is_WA: # Maintaining Fairness if step_b >= 1: self.model.weight_align(step_b) # deepcopy the previous model used for KD self.previous_model = deepcopy(self.model) # Evaluate final accuracy at the end of one batch acc = self.eval(val_data) test_accs.append(acc) print(f'Previous accuracies: {test_accs}') def stage1(self, train_data, criterion, optimizer): print("Training ... ") losses = [] for i, (image, label) in enumerate(train_data): image = image.cuda() label = label.view(-1).cuda() p = self.model(image) loss = criterion(p[:, :self.seen_cls], label) optimizer.zero_grad() loss.backward() optimizer.step() losses.append(loss.item()) print("CE loss :", np.mean(losses)) def stage1_distill(self, train_data, criterion, optimizer): print("Training ... ") distill_losses = [] ce_losses = [] T = 2 beta = (self.seen_cls - 20) / self.seen_cls print("classification proportion 1-beta = ", 1 - beta) for i, (image, label) in enumerate(train_data): image = image.cuda() label = label.view(-1).cuda() p = self.model(image) with torch.no_grad(): previous_q = self.previous_model(image) previous_q = F.softmax(previous_q[:, :self.seen_cls - 20] / T, dim=1) log_current_p = F.log_softmax(p[:, :self.seen_cls - 20] / T, dim=1) loss_distillation = -torch.mean( torch.sum(previous_q * log_current_p, dim=1)) loss_crossEntropy = nn.CrossEntropyLoss()(p[:, :self.seen_cls], label) loss = loss_distillation * T * T + (1 - beta) * loss_crossEntropy optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() distill_losses.append(loss_distillation.item()) ce_losses.append(loss_crossEntropy.item()) print("KD loss :", np.mean(distill_losses), "; CE loss :", np.mean(ce_losses))
import matplotlib.pyplot as plt import os import torch from dataset import TrainDataset from model import Resnet path = "pytorch/model_l1_128.pth" ### MODEL ### model = Resnet().cuda() model = model.eval() ### DATASET ### dataset = TrainDataset(max_num_pic=3) ### LOAD ### if os.path.isfile(path): m = torch.load(path) model.load_state_dict(m["model"]) del m benchmark_img = dataset.transform( "DanbooRegion2020/train/0.image.png").unsqueeze(0).cuda() benchmark_skel = dataset.transform( "DanbooRegion2020/train/0.skeleton.png").unsqueeze(0).expand(1, 3, -1, -1).cuda() y = model(benchmark_img) plt.imsave("pytorch/test.png", -y[0, 0].detach().cpu().numpy() + 1, cmap='Greys')
def train(): my_model = Resnet(kernel_size=3, filters=64, inChannels=3, input_shape=(3, 240, 320), conv_nonlinearity='relu', num_class=25) my_model = my_model.to(device) if os.path.exists('my_model.pt'): my_model.load_state_dict(torch.load('my_model.pt')) print('Load my_model.pt') batch_size = 32 num_epoch = 100 num_classes = 25 learning_rate = 8e-4 train_set = MyDataset(is_train=True, num_cat=num_classes) validation_set = MyDataset(is_train=False, num_cat=num_classes) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=True) validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=32, shuffle=True, pin_memory=True) optimizer = torch.optim.Adam(my_model.parameters(), lr=learning_rate) loss_func = torch.nn.NLLLoss() scheduler = ReduceLROnPlateau(optimizer, 'max', factor=0.5, patience=10, threshold=2e-1, verbose=True, min_lr=1e-5) bestTestAccuracy = 0 print('Start training') train_size = len(train_loader.dataset) test_size = len(validation_loader.dataset) for epoch in range(num_epoch): total = 0 correct = 0 my_model.train() for i, data in enumerate(train_loader, 0): labels = data['label'].to(device) img = data['img'].to(device).float() prediction = my_model(img) loss = loss_func(prediction, labels) optimizer.zero_grad() loss.backward() optimizer.step() _, predicted = torch.max(prediction, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print( f'Train | Epoch {epoch}/{num_epoch}, Batch {i}/{int(train_size/batch_size)} ' f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}' f' Acc: {(100 * correct / total):.3f}') total = 0 correct = 0 my_model.eval() for i, data in enumerate(validation_loader, 0): labels = data['label'].to(device) img = data['img'].to(device).float() prediction = my_model(img) _, predicted = torch.max(prediction, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print( f'Test | Epoch {epoch}/{num_epoch}, Batch {i}/{int(test_size/batch_size)} ' f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}' f' Acc: {(100 * correct / total):.3f} Best-so-far: {100*bestTestAccuracy:.5f}' ) if (correct / total) > bestTestAccuracy: bestTestAccuracy = correct / total print(f'Update best test: {100*bestTestAccuracy:.5f}') torch.save( my_model.state_dict(), f"my_model_{str(round(100*bestTestAccuracy,2)).replace('.', '_')}.pt" ) scheduler.step(bestTestAccuracy)
#将图片输入conv1里,得到64个feature maps kernels = net.get_featureconv1(image).cpu().data.clone() visTensor(kernels, epoch, 'conv1-0feature', ch=0, allkernels=True) plt.axis('off') plt.ioff() plt.show() #尝试从checkpoint读取数据并继续训练 b = args.b if b: net.load_state_dict( torch.load( r'C:\Users\acrobat\.spyder-py3\checkpoint\20191021\123-best.pth' ), True) #加r的意思是只读路径,没有什么转义字符;net把最好的网络读进来之后,可以集训训练或者测试结果 net.eval() #开启评测模式 correct_1 = 0.0 correct_5 = 0.0 total = 0 for n_iter, (image, label) in enumerate(cifar10_test_loader): print("iteration: {}\ttotal {} iterations".format( n_iter + 1, len(cifar10_test_loader))) image = Variable(image).cuda() label = Variable(label).cuda() output = net(image) _, pred = output.topk(5, 1, largest=True, sorted=True) label = label.view(label.size(0), -1).expand_as(pred) correct = pred.eq(label).float()