def main(args): logs = [] transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) trainloader, testloader = get_dataset(args, transform) net = AlexNet() if args.no_distributed: optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.0) else: optimizer = DownpourSGD(net.parameters(), lr=args.lr, n_push=args.num_push, n_pull=args.num_pull, model=net) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True, min_lr=1e-3) # train net.train() if args.cuda: net = net.cuda() for epoch in range(args.epochs): # loop over the dataset multiple times print("Training for epoch {}".format(epoch)) for i, data in enumerate(trainloader, 0): # get the inputs inputs, labels = data if args.cuda: inputs, labels = inputs.cuda(), labels.cuda() # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = F.cross_entropy(outputs, labels) loss.backward() optimizer.step() _, predicted = torch.max(outputs, 1) accuracy = accuracy_score(predicted, labels) log_obj = { 'timestamp': datetime.now(), 'iteration': i, 'training_loss': loss.item(), 'training_accuracy': accuracy, } if i % args.log_interval == 0 and i > 0: # print every n mini-batches log_obj['test_loss'], log_obj['test_accuracy']= evaluate( net, testloader, args) print("Timestamp: {timestamp} | " "Iteration: {iteration:6} | " "Loss: {training_loss:6.4f} | " "Accuracy : {training_accuracy:6.4f} | " "Test Loss: {test_loss:6.4f} | " "Test Accuracy: {test_accuracy:6.4f}".format(**log_obj)) logs.append(log_obj) val_loss, val_accuracy = evaluate(net, testloader, args, verbose=True) scheduler.step(val_loss) df = pd.DataFrame(logs) print(df) if args.no_distributed: if args.cuda: df.to_csv('log/gpu.csv', index_label='index') else: df.to_csv('log/single.csv', index_label='index') else: df.to_csv('log/node{}.csv'.format(dist.get_rank()), index_label='index') print('Finished Training')
params(iterable) --- 可迭代的参数来优化或取消定义参数组 lr(float, 可选) --- 学习率(默认值 1e-3) beta(Tuple[float, float], 可选) --- 用于计算梯度及其平方的运行平均值的系数(默认值:(0.9,0.999)) eps (float, 可选) ---- 添加到分母以提高数值稳定性(默认值:1e-8) weight_decay (float, 可选) --- 权重衰减(L2 惩罚)(默认值:0) amsgrad (boolean, 可选) ---- 是否使用该算法的AMSGrad变体来自论文关于 Adam 和 Beyond 的融合 2、还有这里我们使用的损失函数 class torch.nn.CrossEntropyLoss(weight=None, size_average=True, ignore_index=-100, reduce=True) 交叉熵损失函数 具体的请看:http://pytorch.apachecn.org/cn/docs/0.3.0/nn.html ''' # 3. 设置优化器和损失函数 # 这里我们使用 Adam 优化器,使用的损失函数是 交叉熵损失 optimizer = torch.optim.Adam(cnn.parameters(), lr=0.005, betas=(0.9, 0.99)) # 优化所有的 cnn 参数 loss_func = nn.CrossEntropyLoss() # 目标 label 不是 one-hotted 类型的 # --------------------------- 3.设置相应的优化器和损失函数 end ------------------------------------------------------------------ # --------------------------- 4.训练 CNN 模型 start ------------------------------------------------------------------ # 4. 训练模型 # 设置训练模型的次数,这里我们设置的是 10 次,也就是用我们的训练数据集对我们的模型训练 10 次,为了节省时间,我们可以只训练 1 次 EPOCH = 10 # 训练和测试 for epoch in range(EPOCH): num = 0 # 给出 batch 数据,在迭代 train_loader 的时候对 x 进行 normalize for step, (x, y) in enumerate(loader_train): b_x = Variable(x) # batch x
batch_size=20, shuffle=True, num_workers=1) testdir = '/opt/data/kaggle/playground/dogs-vs-cats/sample_test' test_dataset = DogCat(testdir, train=True) loader_test = data.DataLoader(test_dataset, batch_size=3, shuffle=True, num_workers=1) # 2. 创建 CNN 模型 cnn = AlexNet() print(cnn) # 3. 设置优化器和损失函数 optimizer = torch.optim.Adam(cnn.parameters(), lr=0.005, betas=(0.9, 0.99)) # optimize all cnn parameters loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted # 4. 训练模型 EPOCH = 10 # train the training data n times, to save time, we just train 1 epoch # training and testing for epoch in range(EPOCH): num = 0 # gives batch data, normalize x when iterate train_loader for step, (x, y) in enumerate(loader_train): b_x = Variable(x) # batch x b_y = Variable(y) # batch y output = cnn(b_x) # cnn output loss = loss_func(output, b_y) # cross entropy loss
from models.DenseNet import * model = DenseNet121() elif modelName == "MobileNet": from models.MobileNet import * model = MobileNet() else: model = None if model is not None: model = model.to(Device) else: raise Exception("model not found") # optimizer and loss function optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weightDecay) loss_func = torch.nn.CrossEntropyLoss() # use testset to evaluate def evaluateModel(): total_acc = 0.0 total_loss = 0.0 step = 0 with torch.no_grad(): for i, (image, label) in enumerate(test_loader): image, label = image.to(Device), label.to(Device) if modelName != 'GoogLeNet': # Others output = model(image)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, sampler=valid_sampler) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, sampler=test_sample) # GPU setup device = torch.device('cuda') net = AlexNet(in_channel=2, classes=10).to(device=device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.001) #, momentum=0.9) # Train loop num_epochs = 25 for epoch in range(num_epochs): print("Epoch: {} - Train".format(epoch)) net.train() running_loss = 0. # Train: for batch_index, (signals, labels) in enumerate(tqdm(train_loader)): signals, labels = signals.to(device=device), labels.to(device=device) optimizer.zero_grad()
if torch.cuda.is_available(): print('CUDA enabled.') net.cuda() print("--- Pretrained network loaded ---") # test(net, loader_test) # prune the weights masks = weight_prune(net, param['pruning_perc']) net.set_masks(masks) net = nn.DataParallel(net) print("--- {}% parameters pruned ---".format(param['pruning_perc'])) test(net, loader_test) # Retraining criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) train(net, criterion, optimizer, param, loader_train) # Check accuracy and nonzeros weights in each layer print("--- After retraining ---") test(net, loader_test) prune_rate(net) # Save and load the entire model torch.save(net.state_dict(), 'models/alexnet_pruned.pkl')
dtest = torchvision.datasets.CIFAR10( root='./data', train=False, download=True, transform=transform_test ) test_loader = torch.utils.data.DataLoader( dtest, batch_size=BATCH_SIZE, shuffle=False ) device = 'cuda' if torch.cuda.is_available() else 'cpu' # 損失関数にクロスエントロピー誤差を用いる criterion = nn.CrossEntropyLoss() # モデルの定義 model = AlexNet(num_classes=num_classes) # optimizerはSGD optimizer = torch.optim.SGD(model.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4) # 実際の学習部分 for epoch in range(1, NUM_EPOCHS + 1): train_loss, train_acc = epoch_train(train_loader, model, optimizer, criterion) test_loss, test_acc = epoch_eval(test_loader, model, criterion) print(f'EPOCH: [{epoch}/{NUM_EPOCHS}]') print(f'TRAIN LOSS: {train_loss:.3f}, TRAIN ACC: {train_acc:.3f}') print(f'TEST LOSS: {test_loss:.3f}, TEST ACC: {test_acc:.3f}') # このように重みを保存する parameters = model.state_dict() torch.save(parameters, f'../weights/{epoch}.pth')
for i in range(cat_samples.shape[0]): tensorboard.add_images("Cat/{}".format(i), None, cat_samples[i:i + 1]) tensorboard.add_images("Dog/{}".format(i), None, dog_samples[i:i + 1]) model = AlexNet(input_size, output_size) cur_epoch = 0 if RESUME_TRAINING: cur_epoch, optimizer_states_dict, loss = load_checkpoint(model) cur_epoch += 1 model = model.to(device) print(model) loss_func = torch.nn.BCELoss(reduction="mean") optimizer = opt(model.parameters(), lr=lr, momentum=momentum) if RESUME_TRAINING: optimizer.load_state_dict(optimizer_states_dict) # Train for epoch in range(cur_epoch, epochs): accumulated_train_loss = [] # Set model in trainng mode model.train() iteration = 0 for batch_x, batch_y in tqdm(train_generator): # Forward preds = model(batch_x) # compute loss
def main(): global args, best_prec1 args = parser.parse_args() print(args) # Set # classes if args.data == 'UCF101': num_classes = 101 else: num_classes = 0 print('Specify the dataset to use ') # Create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() if args.arch.startswith('alexnet'): model = AlexNet(num_classes=num_classes) model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # Modify last layer of the model model_ft = models.resnet18(pretrained=True) num_ftrs = model_ft.fc.in_features model_ft.fc = nn.Linear(num_ftrs, 101) model = model_ft.cuda() model = torch.nn.DataParallel(model).cuda() # Using one GPU (device_ids = 1) # print(model) # Define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Optionally resume from a checkpoint if args.resume: checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') testdir = os.path.join(args.data, 'test') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader( datasets.ImageFolder(traindir, transforms.Compose([ transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), batch_size = args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True ) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(testdir, transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size = args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True ) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # Train for one epoch train(train_loader, model, criterion, optimizer, epoch) # # Evaluate on validation set prec1 = validate(val_loader, model, criterion) # Remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best)
def main(): """ This code is written for the pre-training of the AlexNet to implement the SA-Siam object tarcker. SA-Siam has the two subnetwork, S-Net and A-Net, and this pre-trained AlexNet is used for the feature extractor of S-Net. I slightly changed the code from the pytorch examples (https://github.com/pytorch/examples/tree/master/imagenet) """ global args, best_prec1 args = parser.parse_args() args.distributed = args.world_size > 1 if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) # create model model = AlexNet() model = torch.nn.parallel.DataParallel(model).cuda() # model = torch.nn.parallel.DistributedDataParallel(model) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(255), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder( valdir, transforms.Compose([ # transforms.Resize((255, 255)), transforms.RandomResizedCrop(255), # transforms.CenterCrop(255), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best)