def __init__(self, class_num, droprate=0.5, stride=2): super(ft_net110_fc128, self).__init__() self.add_module("module", resnet.resnet110()) weights_ = torch.load("weights_cifar10/resnet110-1d1ed7c2.th") self.load_state_dict(weights_['state_dict']) self.module.linear = nn.Sequential() self.classifier = ClassBlock(64, class_num, droprate, num_bottleneck=128)
def __init__(self, class_num, droprate=0.5, stride=2): super(ft_net110_spp, self).__init__() self.add_module("module", resnet.resnet110()) weights_ = torch.load("weights_cifar10/resnet110-1d1ed7c2.th") self.load_state_dict(weights_['state_dict']) self.module.linear = nn.Sequential() #### self.spp = pyrpool.SpatialPyramidPooling((1,2)) self.classifier = ClassBlock(320, class_num, droprate, num_bottleneck=128)
'BS%d' % args.batch_size ] if args.origin: save_fold_name.insert(0, 'Origin') if args.model == 'resnet': if args.depth == 20: network = resnet.resnet20() if args.depth == 32: network = resnet.resnet32() if args.depth == 44: network = resnet.resnet44() if args.depth == 56: network = resnet.resnet56() if args.depth == 110: network = resnet.resnet110() if not args.origin: print('Pruning the model in %s' % args.pruned_model_dir) check_point = torch.load(args.pruned_model_dir + "model_best.pth.tar") network.load_state_dict(check_point['state_dict']) codebook_index_list = np.load(args.pruned_model_dir + "codebook.npy", allow_pickle=True).tolist() m_l = [] b_l = [] for i in network.modules(): if isinstance(i, nn.Conv2d): m_l.append(i) if isinstance(i, nn.BatchNorm2d): b_l.append(i)
transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])) val_sampler = torch.utils.data.distributed.DistributedSampler( val_dataset, num_replicas=hvd.size(), rank=hvd.rank()) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.val_batch_size, sampler=val_sampler, **kwargs) # Set up standard ResNet-50 model. # model = models.resnet50() # model = resnet.resnet110() model = resnet.resnet110() if args.cuda: # Move model to GPU. model.cuda() # Horovod: scale learning rate by the number of GPUs. # Gradient Accumulation: scale learning rate by batches_per_allreduce optimizer = optim.SGD(model.parameters(), lr=(args.base_lr * args.batches_per_allreduce * hvd.size()), momentum=args.momentum, weight_decay=args.wd) # Horovod: (optional) compression algorithm.
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchSz', type=int, default=64) parser.add_argument('--nEpochs', type=int, default=300) parser.add_argument('--no-cuda', action='store_true') parser.add_argument('--net') parser.add_argument('--seed', type=int, default=1) parser.add_argument('--opt', type=str, default='sgd', choices=('sgd', 'adam', 'rmsprop')) parser.add_argument('--gpu_id', type=str, default='0') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() args.save = 'work/' + args.net setproctitle.setproctitle(args.save) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) if os.path.exists(args.save): shutil.rmtree(args.save) os.makedirs(args.save) normMean = [0.49139968, 0.48215827, 0.44653124] normStd = [0.24703233, 0.24348505, 0.26158768] normTransform = transforms.Normalize(normMean, normStd) trainTransform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normTransform ]) testTransform = transforms.Compose([transforms.ToTensor(), normTransform]) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} trainLoader = DataLoader(dset.CIFAR10(root='cifar', train=True, download=True, transform=trainTransform), batch_size=args.batchSz, shuffle=True, **kwargs) testLoader = DataLoader(dset.CIFAR10(root='cifar', train=False, download=True, transform=testTransform), batch_size=args.batchSz, shuffle=False, **kwargs) n_classes = 10 if args.net == 'resnet20': net = resnet.resnet20(num_classes=n_classes) elif args.net == 'resnet32': net = resnet.resnet32(num_classes=n_classes) elif args.net == 'resnet44': net = resnet.resnet44(num_classes=n_classes) elif args.net == 'resnet56': net = resnet.resnet56(num_classes=n_classes) elif args.net == 'resnet110': net = resnet.resnet110(num_classes=n_classes) elif args.net == 'resnetxt29': net = resnetxt.resnetxt29(num_classes=n_classes) elif args.net == 'deform_resnet32': net = deformconvnet.deform_resnet32(num_classes=n_classes) else: net = densenet.DenseNet(growthRate=12, depth=100, reduction=0.5, bottleneck=True, nClasses=n_classes) print(' + Number of params: {}'.format( sum([p.data.nelement() for p in net.parameters()]))) if args.cuda: net = net.cuda() gpu_id = args.gpu_id gpu_list = gpu_id.split(',') gpus = [int(i) for i in gpu_list] net = nn.DataParallel(net, device_ids=gpus) if args.opt == 'sgd': optimizer = optim.SGD(net.parameters(), lr=1e-1, momentum=0.9, weight_decay=1e-4) elif args.opt == 'adam': optimizer = optim.Adam(net.parameters(), weight_decay=1e-4) elif args.opt == 'rmsprop': optimizer = optim.RMSprop(net.parameters(), weight_decay=1e-4) trainF = open(os.path.join(args.save, 'train.csv'), 'w') testF = open(os.path.join(args.save, 'test.csv'), 'w') for epoch in range(1, args.nEpochs + 1): adjust_opt(args.opt, optimizer, epoch) train(args, epoch, net, trainLoader, optimizer, trainF) test(args, epoch, net, testLoader, optimizer, testF) torch.save(net, os.path.join(args.save, 'latest.pth')) os.system('python plot.py {} &'.format(args.save)) trainF.close() testF.close()
def main(): args = parser.parse_args() # Set-up tensorboard # Horovod: initialize library. seed = 42 hvd.init() torch.manual_seed(seed) # Horovod: pin GPU to local rank. torch.cuda.set_device(hvd.local_rank()) torch.cuda.manual_seed(seed) # Horovod: limit # of CPU threads to be used per worker. torch.set_num_threads(4) kwargs = {'num_workers': 4, 'pin_memory': True} # When supported, use 'forkserver' to spawn dataloader workers instead of 'fork' to prevent # issues with Infiniband implementations that are not fork-safe if (hasattr(mp, '_supports_context') and mp._supports_context and 'forkserver' in mp.get_all_start_methods()): kwargs['multiprocessing_context'] = 'forkserver' data_dir = args.data_dir with FileLock(os.path.expanduser("~/.horovod_lock")): train_dataset = get_dataset(data_dir, train=True) # Horovod: use DistributedSampler to partition the training data. train_sampler = DistributedSampler(train_dataset, num_replicas=hvd.size(), rank=hvd.rank(), shuffle=True) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, sampler=train_sampler, **kwargs) test_dataset = get_dataset(data_dir, train=False) # Horovod: use DistributedSampler to partition the test data. test_sampler = DistributedSampler(test_dataset, num_replicas=hvd.size(), rank=hvd.rank()) test_loader = DataLoader(test_dataset, batch_size=args.test_batch_size, sampler=test_sampler, **kwargs) model = resnet.resnet110() loss_function = nn.CrossEntropyLoss() running_loss = 0.0 # By default, Adasum doesn't need scaling up learning rate. lr_scaler = hvd.size() if not args.use_adasum else 1 # Move model to GPU. model.cuda() # If using GPU Adasum allreduce, scale learning rate by local_size. if args.use_adasum and hvd.nccl_built(): lr_scaler = hvd.local_size() # Horovod: scale learning rate by lr_scaler. optimizer = optim.Adam(model.parameters(), lr=args.base_lr * lr_scaler) # Horovod: broadcast parameters & optimizer state. hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=0) # Horovod: (optional) compression algorithm. compression = hvd.Compression.none # Horovod: wrap optimizer with DistributedOptimizer. optimizer = hvd.DistributedOptimizer( optimizer, named_parameters=model.named_parameters(), compression=compression, op=hvd.Adasum if args.use_adasum else hvd.Average) # Profile training logs = "logs/" + datetime.now().strftime("%Y%m%d-%H%M%S") for epoch in range(1, args.epochs + 1): train(epoch, model, train_sampler, train_loader, optimizer, loss_function, args) test_loss, test_acc = test(model, test_sampler, test_loader) if hvd.rank() == 0: print("Epoch: ", epoch, "Test loss:", test_loss, ", Test acc.", test_acc)