def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.cifar_data = nd.random.normal(shape=(1, 3, 32, 32)) self.cifar_att56 = ResidualAttentionModel_32input() self.cifar_att56.initialize() self.cifar_att92 = ResidualAttentionModel_32input( additional_stage=True) self.cifar_att92.initialize() self.imgnet_data = nd.random.normal(shape=(1, 3, 224, 224)) self.att56 = ResidualAttentionModel() self.att56.initialize() self.att92 = ResidualAttentionModel(additional_stage=True) self.att92.initialize()
def main(): model = ResidualAttentionModel() model = torch.nn.DataParallel(model) optimizer = torch.optim.SGD(model.parameters(), 0.001, \ momentum=0.9, \ weight_decay=1e-4) checkpoint = torch.load('best_model/model_best.pth.tar') model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) model.eval()
class TestModel(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.cifar_data = nd.random.normal(shape=(1, 3, 32, 32)) self.cifar_att56 = ResidualAttentionModel_32input() self.cifar_att56.initialize() self.cifar_att92 = ResidualAttentionModel_32input( additional_stage=True) self.cifar_att92.initialize() self.imgnet_data = nd.random.normal(shape=(1, 3, 224, 224)) self.att56 = ResidualAttentionModel() self.att56.initialize() self.att92 = ResidualAttentionModel(additional_stage=True) self.att92.initialize() def test_model(self): self.assertEqual((1, 10), self.cifar_att56(self.cifar_data).shape) self.assertEqual((1, 10), self.cifar_att92(self.cifar_data).shape) self.assertEqual((1, 1000), self.att56(self.imgnet_data).shape) self.assertEqual((1, 1000), self.att92(self.imgnet_data).shape)
test_dataset = datasets.CIFAR10(root='./data/', train=False, transform=transform) # Data Loader (Input Pipeline) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=35, shuffle=True, num_workers=8) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=20, shuffle=False) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') model = ResidualAttentionModel().cuda() print(model) lr = 0.001 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) is_train = True model_file = 'model_92.pkl' if is_train is True: # Training for epoch in range(100): for i, (images, labels) in enumerate(train_loader): images = Variable(images.cuda()) # print(images.data) labels = Variable(labels.cuda())
from __future__ import print_function, division import torch import torch.nn as nn import torch.optim as optim from torch.autograd import Variable from torch.utils.data import Dataset, DataLoader import numpy as np import torchvision from torchvision import transforms, datasets, models import os import cv2 import time # from model.residual_attention_network_pre import ResidualAttentionModel # based https://github.com/liudaizong/Residual-Attention-Network from model.residual_attention_network import ResidualAttentionModel_92_32input_update as ResidualAttentionModel if __name__ == "__main__": model_file = 'model_92_sgd.pkl' classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') model = ResidualAttentionModel() print(model) model.load_state_dict( torch.load(model_file, map_location=torch.device('cpu'))) model.eval()
trainer.set_learning_rate(trainer.learning_rate * lr_decay) if __name__ == '__main__': batch_size = 64 iterations = 530e3 wd = 1e-4 lr = 0.1 lr_period = tuple([iterations * i for i in (0.3, 0.6, 0.9)]) lr_decay = 0.1 cat_interval = 10e3 num_workers = 12 num_gpus = 2 ctx = [mx.gpu(i) for i in range(num_gpus)] net = ResidualAttentionModel() net.hybridize(static_alloc=True) net.initialize(init=mx.init.MSRAPrelu(), ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'nag', { 'learning_rate': lr, 'momentum': 0.9, 'wd': wd }) train_data = gluon.data.DataLoader(ImageFolderDataset( '/system1/Dataset/ImageNet/ILSVRC2012_img_train', transform=transformer), batch_size=batch_size, shuffle=True, num_workers=num_workers,
def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) # model = models.__dict__[args.arch]() model = ResidualAttentionModel() if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ # transforms.RandomResizedCrop(224), # transforms.RandomHorizontalFlip(), # transforms.RandomHorizontalFlip(), # transforms.RandomCrop((32, 32), padding=4), # transforms.ToTensor(), # transforms.RandomHorizontalFlip(), # transforms.RandomCrop((32, 32), padding=4), #left, top, right, bottom # transforms.Scale(224), transforms.ToTensor(), normalize ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder( valdir, transforms.Compose([ # transforms.Resize(256), # transforms.CenterCrop(224), # transforms.RandomCrop((32, 32), padding=4), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion, args) return for epoch in range(args.start_epoch, args.epochs): # for epoch in range(args.start_epoch, args.start_epoch+1): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args) # evaluate on validation set acc1 = validate(val_loader, model, criterion, args) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best) print([ "train_acc", train_acc, "val_acc", val_acc, "train_loss", train_loss, "val_loss", val_loss ]) np.save('full_training_in_progress.npy', np.array([train_acc, val_acc, train_loss, val_loss])) fig, (ax1, ax2) = plt.subplots(1, 2) plot_result(range(1), train_acc, val_acc, 'acc', 'Accuracy', ax=ax1) plot_result(range(1), train_loss, val_loss, 'loss', 'Loss', ax=ax2) fig.tight_layout() # plt.show() plt.savefig('result.png')
train=False, transform=transform) # Data Loader (Input Pipeline) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=20, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=20, shuffle=False) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') model = ResidualAttentionModel().cuda() print(model) lr = 0.001 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) # Training for epoch in range(100): for i, (images, labels) in enumerate(train_loader): images = Variable(images.cuda()) # print(images.data) labels = Variable(labels.cuda()) # Forward + Backward + Optimize optimizer.zero_grad()