deconv=opts.deconv, channel_deconv=opts.channel_deconv) # net = GoogLeNet() if opts.arch == 'densenet': net = densenet_cifar() if opts.arch == 'densenet121': net = DenseNet121(num_classes=opts.num_outputs, deconv=opts.deconv, channel_deconv=opts.channel_deconv) if opts.arch == 'densenet121d': from models.densenet_imagenet import densenet121d net = densenet121d(num_classes=opts.num_outputs, deconv=opts.deconv, channel_deconv=opts.channel_deconv) if opts.arch == 'simple_v1': from models.simple import * net = SimpleCNN_v1(channels_in=opts.in_planes, kernel_size=opts.input_size, num_outputs=opts.num_outputs, method=opts.method) if opts.arch == 'simple_v2': from models.simple import * net = SimpleCNN_v2(channels_in=opts.in_planes, kernel_size=3, hidden_layers=10, hidden_channels=4,
def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) if args.arch in models.__dict__: model = models.__dict__[args.arch]() elif args.arch=='resnet18d': from models.resnet_imagenet import resnet18d model = resnet18d(deconv=args.deconv,delinear=args.delinear,channel_deconv=args.channel_deconv) elif args.arch == 'resnet34d': from models.resnet_imagenet import resnet34d model = resnet34d(deconv=args.deconv,delinear=args.delinear,channel_deconv=args.channel_deconv) elif args.arch == 'resnet50d': from models.resnet_imagenet import resnet50d model = resnet50d(deconv=args.deconv,delinear=args.delinear,channel_deconv=args.channel_deconv) elif args.arch == 'resnet101d': from models.resnet_imagenet import resnet101d model = resnet101d(deconv=args.deconv,delinear=args.delinear,channel_deconv=args.channel_deconv) elif args.arch=='vgg11d': from models.vgg_imagenet import vgg11d model = vgg11d('VGG11d', deconv=args.deconv,delinear=args.delinear,channel_deconv=args.channel_deconv) elif args.arch == 'vgg16d': from models.vgg_imagenet import vgg16d model = vgg16d('VGG16d', deconv=args.deconv,delinear=args.delinear,channel_deconv=args.channel_deconv) elif args.arch == 'densenet121d': from models.densenet_imagenet import densenet121d model = densenet121d(deconv=args.deconv,delinear=args.delinear,channel_deconv=args.channel_deconv) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) print(args) parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in parameters]) print(params,'trainable parameters in the network.') # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) del checkpoint else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.evaluate: val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) validate(val_loader, model, criterion, 0, args) return train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) else: train_sampler = None if args.lr_scheduler=='multistep': milestones=[int(args.milestone*args.epochs)] while milestones[-1]+milestones[0]<args.epochs: milestones.append(milestones[-1]+milestones[0]) args.current_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=args.multistep_gamma) if args.lr_scheduler=='step': args.current_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.scheduler_step_size, gamma=args.multistep_gamma) if args.lr_scheduler=='cosine': total_steps = math.ceil(len(train_dataset)/args.batch_size)*args.epochs args.current_scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, total_steps, eta_min=0, last_epoch=-1) if args.resume: lr = args.lr #for param_group in optimizer.param_groups: # param_group['lr'] = lr if args.lr_scheduler == 'multistep' or args.lr_scheduler == 'step': for i in range(args.start_epoch): args.current_scheduler.step() if args.lr_scheduler == 'cosine': total_steps = math.ceil(len(train_dataset) / args.batch_size) * args.start_epoch global n_iter for i in range(total_steps): n_iter = n_iter + 1 args.current_scheduler.step() train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) #adjust_learning_rate(optimizer, epoch, args) if args.lr_scheduler == 'multistep' or args.lr_scheduler =='step': args.current_scheduler.step() if args.lr_scheduler == 'multistep' or args.lr_scheduler =='step' or args.lr_scheduler == 'cosine': print('Current learning rate:', args.current_scheduler.get_lr()[0]) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args) # evaluate on validation set acc1 = validate(val_loader, model, criterion, epoch, args) if args.save_plot: plt.subplot(1, 3, 1) plt.title('Loss Plot', fontsize=10) plt.xlabel('Epochs', fontsize=10) plt.ylabel('Loss', fontsize=10) plt.xticks(fontsize=10) plt.yticks(fontsize=10) plt.plot(args.train_losses, 'b') plt.plot(args.eval_losses, 'r') plt.subplot(1, 3, 2) plt.title('Top 1 Accuracy Plot', fontsize=10) plt.xlabel('Epochs', fontsize=10) plt.ylabel('Top 1 Acc', fontsize=10) plt.xticks(fontsize=10) plt.yticks(fontsize=10) plt.plot(args.train_top1, 'b') plt.plot(args.eval_top1, 'r') plt.subplot(1, 3, 3) plt.title('Top 5 Accuracy Plot', fontsize=10) plt.xlabel('Epochs', fontsize=10) plt.ylabel('Top 5 Acc', fontsize=10) plt.xticks(fontsize=10) plt.yticks(fontsize=10) plt.plot(args.train_top5, 'b') plt.plot(args.eval_top5, 'r') plt.savefig(os.path.join(args.log_dir, 'TrainingPlots')) plt.clf() #if args.test_run: # break # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer' : optimizer.state_dict(), }, is_best,path=args.log_dir) args.writer.close()
channel_deconv=args.channel_deconv) # net = GoogLeNet() if args.arch == 'densenet': net = densenet_cifar() if args.arch == 'densenet121': net = DenseNet121(num_classes=args.num_outputs, deconv=args.deconv, delinear=args.delinear, channel_deconv=args.channel_deconv) if args.arch == 'densenet121d': from models.densenet_imagenet import densenet121d net = densenet121d(num_classes=args.num_outputs, deconv=args.deconv, delinear=args.delinear, channel_deconv=args.channel_deconv) if args.arch == 'simple_v1': from models.simple import * net = SimpleCNN_v1(channels_in=args.in_planes, kernel_size=args.input_size, num_outputs=args.num_outputs, method=args.method) if args.arch == 'simple_v2': from models.simple import * net = SimpleCNN_v2(channels_in=args.in_planes, kernel_size=3, hidden_layers=10, hidden_channels=4,