def train(): train_dataloader, val_dataloader = loadData() pretrained_params = torch.load('VGG_pretrained.pth') model = VGG() # strict=False 使得预训练模型参数中和新模型对应上的参数会被载入,对应不上或没有的参数被抛弃。 model.load_state_dict(pretrained_params.state_dict(), strict=False) if torch.cuda.is_available(): model.cuda() # finetune 时冻结XXlayer的参数 # for p in model.XXlayers.parameters(): # p.requires_grad = False optimizer = optim.Adam(model.parameters(), lr=learning_rate) loss_func = nn.CrossEntropyLoss() best_acc = 0 for epoch in range(epochs): epoch_loss = 0 steps = 0 for i, data in enumerate(train_dataloader): inputs, labels = data if torch.cuda.is_available(): inputs, labels = inputs.cuda(), labels.cuda() inputs, labels = Variable(inputs), Variable(labels) model.train() optimizer.zero_grad() outputs = model(inputs) loss = loss_func(outputs, labels) loss.backward() optimizer.step() epoch_loss += loss.data[0] steps += 1 print('epoch:%d loss:%.3f' % (epoch + 1, epoch_loss / steps)) if epoch % 5 == 0: val_acc = evaluate(model, val_dataloader) if val_acc > best_acc: best_acc = val_acc torch.save(model, 'best_VGG.pkl') torch.save(model.state_dict(), 'best_VGG_params.pkl') print('test acc:'.format(val_acc)) print('Finished Training') torch.save(model, 'VGG.pkl') torch.save(model.state_dict(), 'VGG_params.pkl')
G.train() else: G = Model(img_channels=3, nof_blocks=opt.nof_blocks, feat_size=opt.feat_size) if cuda: G = G.cuda() print('===> Building Loss') def criterion(a, b): return torch.mean(torch.abs((a-b)**2).view(-1)) l2_loss = nn.MSELoss().cuda() if opt.loss_texture: vgg_layers = [int(i) for i in opt.texture_layers] vgg_texture = VGG(layers=vgg_layers, replace_pooling = False) if cuda: vgg_texture = vgg_texture.cuda() print('===> Building Optimizer') optimizer = optim.Adam(G.parameters(), lr=opt.lr) def to_variable(x): """Convert tensor to variable.""" if torch.cuda.is_available(): x = x.cuda() return Variable(x) def adjust_learning_rate(optimizer, opt): opt.lr *= 0.1 for param_group in optimizer.param_groups: param_group['lr'] = opt.lr
def objective(params): kernel1 = int(params[0]) kernel2 = int(params[1]) kernel3 = int(params[2]) kernel4 = int(params[3]) kernel5 = int(params[4]) kernel6 = int(params[5]) kernel7 = int(params[6]) kernel8 = int(params[7]) kernel9 = int(params[8]) kernel10 = int(params[9]) dropout5 = float(params[10]) dropout6 = float(params[11]) net = VGG(kernel1=kernel1, kernel2=kernel2, kernel3=kernel3, kernel4=kernel4, kernel5=kernel5, kernel6=kernel6, kernel7=kernel7, kernel8=kernel8, kernel9=kernel9, kernel10=kernel10, dropout5=dropout5, dropout6=dropout6) if use_cuda and torch.cuda.device_count() > 1: net = nn.DataParallel(net) net.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) num_epochs = 50 for _ in range(num_epochs): # Training net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() train_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() #print("Train loss: {}".format(train_loss)) # Validation net.eval() val_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(validloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs, volatile=True), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) val_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() #print("Validation loss: {}".format(val_loss)) return val_loss