def main(): global args, rank, world_size if args.dist == 1: rank, world_size = dist_init() else: rank = 0 world_size = 1 DATA_DIR = './data' train_set_raw = torchvision.datasets.CIFAR10(root=DATA_DIR, train=True, download=True) test_set_raw = torchvision.datasets.CIFAR10(root=DATA_DIR, train=False, download=True) lr_schedule = PiecewiseLinear([0, 5, 24], [0, 0.4 * args.lr_scale, 0]) train_transforms = [Crop(32, 32), FlipLR(), Cutout(8, 8)] model = TorchGraph(union(net(), losses)).cuda() if args.half == 1: model = model.half() if args.double == 1: model = model.double() if args.dist == 1: model = DistModule(model) opt = torch.optim.SGD(model.parameters(), lr=0.0, momentum=args.momentum, weight_decay=5e-4 * args.batch_size, nesterov=True) t = Timer() train_set = list( zip(transpose(normalise(pad(train_set_raw.data, 4))), train_set_raw.targets)) test_set = list( zip(transpose(normalise(test_set_raw.data)), test_set_raw.targets)) dataset_len = len(train_set) args.warm_up_iter = math.ceil(dataset_len * args.warm_up_epoch / (world_size * args.batch_size)) TSV = TSVLogger() train(model, lr_schedule, opt, Transform(train_set, train_transforms), test_set, args=args, batch_size=args.batch_size, num_workers=args.workers, loggers=(TableLogger(rank), TSV), timer=t, test_time_in_total=False, drop_last=True)
def main(): global args, best_prec1, min_loss args = parser.parse_args() rank, world_size = dist_init(args.port) print("world_size is: {}".format(world_size)) assert (args.batch_size % world_size == 0) assert (args.workers % world_size == 0) args.batch_size = args.batch_size // world_size args.workers = args.workers // world_size # create model print("=> creating model '{}'".format("inceptionv4")) print("save_path is: {}".format(args.save_path)) image_size = 341 input_size = 299 model = get_model('inceptionv4', pretrained=True) # print("model is: {}".format(model)) model.cuda() model = DistModule(model) # optionally resume from a checkpoint if args.load_path: if args.resume_opt: best_prec1, start_epoch = load_state(args.load_path, model, optimizer=optimizer) else: # print('load weights from', args.load_path) load_state(args.load_path, model) cudnn.benchmark = True # Data loading code normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) train_dataset = McDataset( args.train_root, args.train_source, transforms.Compose([ transforms.Resize(image_size), transforms.RandomCrop(input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ColorAugmentation(), normalize, ])) val_dataset = McDataset( args.val_root, args.val_source, transforms.Compose([ transforms.Resize(image_size), transforms.CenterCrop(input_size), transforms.ToTensor(), normalize, ])) train_sampler = DistributedSampler(train_dataset) val_sampler = DistributedSampler(val_dataset) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False, sampler=train_sampler) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False, sampler=val_sampler) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() lr = 0 patience = 0 for epoch in range(args.start_epoch, args.epochs): # adjust_learning_rate(optimizer, epoch) train_sampler.set_epoch(epoch) if epoch == 1: lr = 0.00003 if patience == 2: patience = 0 checkpoint = load_checkpoint(args.save_path + '_best.pth.tar') model.load_state_dict(checkpoint['state_dict']) print("Loading checkpoint_best.............") # model.load_state_dict(torch.load('checkpoint_best.pth.tar')) lr = lr / 10.0 if epoch == 0: lr = 0.001 for name, param in model.named_parameters(): # print("name is: {}".format(name)) if (name not in last_layer_names): param.requires_grad = False optimizer = torch.optim.RMSprop(filter(lambda p: p.requires_grad, model.parameters()), lr=lr) # optimizer = torch.optim.Adam( # filter(lambda p: p.requires_grad, model.parameters()), lr=lr) else: for param in model.parameters(): param.requires_grad = True optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, weight_decay=0.0001) # optimizer = torch.optim.Adam( # model.parameters(), lr=lr, weight_decay=0.0001) print("lr is: {}".format(lr)) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set val_prec1, val_losses = validate(val_loader, model, criterion) print("val_losses is: {}".format(val_losses)) # remember best prec@1 and save checkpoint if rank == 0: # remember best prec@1 and save checkpoint if val_losses < min_loss: is_best = True save_checkpoint( { 'epoch': epoch + 1, 'arch': 'inceptionv4', 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args.save_path) # torch.save(model.state_dict(), 'best_val_weight.pth') print( 'val score improved from {:.5f} to {:.5f}. Saved!'.format( min_loss, val_losses)) min_loss = val_losses patience = 0 else: patience += 1 if rank == 1 or rank == 2 or rank == 3 or rank == 4 or rank == 5 or rank == 6 or rank == 7: if val_losses < min_loss: min_loss = val_losses patience = 0 else: patience += 1 print("patience is: {}".format(patience)) print("min_loss is: {}".format(min_loss)) print("min_loss is: {}".format(min_loss))
def main(): global args args = parser.parse_args() # TODO model arguments module should be more easy to write and read if args.approach == 'lwf': approach = lwf assert (args.memory_size is None) assert (args.memory_mini_batch_size is None) elif args.approach == 'joint_train': approach = joint_train assert (args.memory_size is None) assert (args.memory_mini_batch_size is None) elif args.approach == 'fine_tuning': approach = fine_tuning assert (args.memory_size is None) assert (args.memory_mini_batch_size is None) elif args.approach == 'gem': approach = gem assert (args.memory_size is not None) assert (args.memory_mini_batch_size is None) else: approach = None rank, world_size = dist_init('27777') if rank == 0: print('=' * 100) print('Arguments = ') for arg in vars(args): print('\t' + arg + ':', getattr(args, arg)) print('=' * 100) np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) else: print('[CUDA unavailable]') sys.exit() # Generate Tasks args.batch_size = args.batch_size // world_size Tasks = generator.GetTasks(args.approach, args.batch_size, world_size, \ memory_size=args.memory_size, memory_mini_batch_size=args.memory_mini_batch_size) # Network net = network.resnet50(pretrained=True).cuda() net = DistModule(net) # Approach Appr = approach.Approach(net, args, Tasks) # Solve tasks incrementally for t in range(len(Tasks)): task = Tasks[t] if rank == 0: print('*' * 100) print() print('Task {:d}: {:d} classes ({:s})'.format( t, task['class_num'], task['description'])) print() print('*' * 100) Appr.solve(t, Tasks) if rank == 0: print('*' * 100) print('Task {:d}: {:d} classes Finished.'.format( t, task['class_num'])) print('*' * 100)
def main(): global args, config, best_prec1 args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) config = EasyDict(config['common']) config.save_path = os.path.dirname(args.config) rank, world_size = dist_init() # create model bn_group_size = config.model.kwargs.bn_group_size bn_var_mode = config.model.kwargs.get('bn_var_mode', 'L2') if bn_group_size == 1: bn_group = None else: assert world_size % bn_group_size == 0 bn_group = simple_group_split(world_size, rank, world_size // bn_group_size) config.model.kwargs.bn_group = bn_group config.model.kwargs.bn_var_mode = (link.syncbnVarMode_t.L1 if bn_var_mode == 'L1' else link.syncbnVarMode_t.L2) model = model_entry(config.model) if rank == 0: print(model) model.cuda() if config.optimizer.type == 'FP16SGD' or config.optimizer.type == 'FusedFP16SGD': args.fp16 = True else: args.fp16 = False if args.fp16: # if you have modules that must use fp32 parameters, and need fp32 input # try use link.fp16.register_float_module(your_module) # if you only need fp32 parameters set cast_args=False when call this # function, then call link.fp16.init() before call model.half() if config.optimizer.get('fp16_normal_bn', False): print('using normal bn for fp16') link.fp16.register_float_module(link.nn.SyncBatchNorm2d, cast_args=False) link.fp16.register_float_module(torch.nn.BatchNorm2d, cast_args=False) link.fp16.init() model.half() model = DistModule(model, args.sync) # create optimizer opt_config = config.optimizer opt_config.kwargs.lr = config.lr_scheduler.base_lr if config.get('no_wd', False): param_group, type2num = param_group_no_wd(model) opt_config.kwargs.params = param_group else: opt_config.kwargs.params = model.parameters() optimizer = optim_entry(opt_config) # optionally resume from a checkpoint last_iter = -1 best_prec1 = 0 if args.load_path: if args.recover: best_prec1, last_iter = load_state(args.load_path, model, optimizer=optimizer) else: load_state(args.load_path, model) cudnn.benchmark = True # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # augmentation aug = [ transforms.RandomResizedCrop(config.augmentation.input_size), transforms.RandomHorizontalFlip() ] for k in config.augmentation.keys(): assert k in [ 'input_size', 'test_resize', 'rotation', 'colorjitter', 'colorold' ] rotation = config.augmentation.get('rotation', 0) colorjitter = config.augmentation.get('colorjitter', None) colorold = config.augmentation.get('colorold', False) if rotation > 0: aug.append(transforms.RandomRotation(rotation)) if colorjitter is not None: aug.append(transforms.ColorJitter(*colorjitter)) aug.append(transforms.ToTensor()) if colorold: aug.append(ColorAugmentation()) aug.append(normalize) # train train_dataset = McDataset(config.train_root, config.train_source, transforms.Compose(aug), fake=args.fake) # val val_dataset = McDataset( config.val_root, config.val_source, transforms.Compose([ transforms.Resize(config.augmentation.test_resize), transforms.CenterCrop(config.augmentation.input_size), transforms.ToTensor(), normalize, ]), args.fake) train_sampler = DistributedGivenIterationSampler( train_dataset, config.lr_scheduler.max_iter, config.batch_size, last_iter=last_iter) val_sampler = DistributedSampler(val_dataset, round_up=False) train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True, sampler=train_sampler) val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True, sampler=val_sampler) config.lr_scheduler['optimizer'] = optimizer.optimizer if isinstance( optimizer, FP16SGD) else optimizer config.lr_scheduler['last_iter'] = last_iter lr_scheduler = get_scheduler(config.lr_scheduler) if rank == 0: tb_logger = SummaryWriter(config.save_path + '/events') logger = create_logger('global_logger', config.save_path + '/log.txt') logger.info('args: {}'.format(pprint.pformat(args))) logger.info('config: {}'.format(pprint.pformat(config))) else: tb_logger = None if args.evaluate: if args.fusion_list is not None: validate(val_loader, model, fusion_list=args.fusion_list, fuse_prob=args.fuse_prob) else: validate(val_loader, model) link.finalize() return train(train_loader, val_loader, model, optimizer, lr_scheduler, last_iter + 1, tb_logger) link.finalize()
def validate(val_loader, model, fusion_list=None, fuse_prob=False): batch_time = AverageMeter(0) losses = AverageMeter(0) top1 = AverageMeter(0) top5 = AverageMeter(0) # switch to evaluate mode if fusion_list is not None: model_list = [] for i in range(len(fusion_list)): model_list.append(model_entry(config.model)) model_list[i].cuda() model_list[i] = DistModule(model_list[i], args.sync) load_state(fusion_list[i], model_list[i]) model_list[i].eval() if fuse_prob: softmax = nn.Softmax(dim=1) else: model.eval() rank = link.get_rank() world_size = link.get_world_size() logger = logging.getLogger('global_logger') criterion = nn.CrossEntropyLoss() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): input = input.cuda() if not args.fp16 else input.half().cuda() target = target.cuda() # compute output if fusion_list is not None: output_list = [] for model_idx in range(len(fusion_list)): output = model_list[model_idx](input) if fuse_prob: output = softmax(output) output_list.append(output) output = torch.stack(output_list, 0) output = torch.mean(output, 0) else: output = model(input) # measure accuracy and record loss loss = criterion( output, target ) #/ world_size ## loss should not be scaled here, it's reduced later! prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) num = input.size(0) losses.update(loss.item(), num) top1.update(prec1.item(), num) top5.update(prec5.item(), num) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.print_freq == 0 and rank == 0: logger.info( 'Test: [{0}/{1}]\tTime {batch_time.val:.3f} ({batch_time.avg:.3f})' .format(i, len(val_loader), batch_time=batch_time)) # gather final results total_num = torch.Tensor([losses.count]) loss_sum = torch.Tensor([losses.avg * losses.count]) top1_sum = torch.Tensor([top1.avg * top1.count]) top5_sum = torch.Tensor([top5.avg * top5.count]) link.allreduce(total_num) link.allreduce(loss_sum) link.allreduce(top1_sum) link.allreduce(top5_sum) final_loss = loss_sum.item() / total_num.item() final_top1 = top1_sum.item() / total_num.item() final_top5 = top5_sum.item() / total_num.item() if rank == 0: logger.info( ' * Prec@1 {:.3f}\tPrec@5 {:.3f}\tLoss {:.3f}\ttotal_num={}'. format(final_top1, final_top5, final_loss, total_num.item())) model.train() return final_loss, final_top1, final_top5
def main(): global args, rank, world_size, best_prec1, dataset_len if args.dist == 1: rank, world_size = dist_init() else: rank = 0 world_size = 1 model = LeNet() model.cuda() if args.double == 1: param_copy = [ param.clone().type(torch.cuda.DoubleTensor).detach() for param in model.parameters() ] else: param_copy = [ param.clone().type(torch.cuda.FloatTensor).detach() for param in model.parameters() ] for param in param_copy: param.requires_grad = True if args.double == 1: model = model.double() if args.half == 1: model = model.half() if args.dist == 1: model = DistModule(model) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(param_copy, args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint last_iter = -1 # Data loading code train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=False) val_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=False) dataset_len = len(train_dataset) args.max_iter = math.ceil( (dataset_len * args.epoch) / (world_size * args.batch_size)) if args.dist == 1: train_sampler = DistributedGivenIterationSampler(train_dataset, args.max_iter, args.batch_size, last_iter=last_iter) val_sampler = DistributedSampler(val_dataset, round_up=False) else: train_sampler = DistributedGivenIterationSampler(train_dataset, args.max_iter, args.batch_size, world_size=1, rank=0, last_iter=last_iter) val_sampler = None # pin_memory if true, will copy the tensor to cuda pinned memory train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) train(train_loader, val_loader, model, criterion, optimizer, param_copy)
def main(): global args, best_prec1, timer args = parser.parse_args() rank, world_size = dist_init(args.port) assert (args.batch_size % world_size == 0) assert (args.workers % world_size == 0) args.batch_size = args.batch_size // world_size args.workers = args.workers // world_size # step1: create model print("=> creating model '{}'".format(args.arch)) if args.arch.startswith('inception_v3'): print('inception_v3 without aux_logits!') image_size = 341 input_size = 299 model = models.__dict__[args.arch](aux_logits=False) elif args.arch.startswith('ir18'): image_size = 640 input_size = 448 model = IR18() else: image_size = 256 input_size = 224 model = models.__dict__[args.arch]() if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) if os.path.isfile(args.pretrained): print("=> loading pretrained_model '{}'".format(args.pretrained)) pretrained_model = torch.load(args.pretrained) model.load_state_dict(pretrained_model['state_dict'], strict=False) print("=> loaded pretrained_model '{}'".format(args.pretrained)) else: print("=> no checkpoint found at '{}'".format(args.pretrained)) model.cuda() model = DistModule(model) # step2: define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # step3: Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = McDataset( args.train_root, args.train_source, transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), # ColorAugmentation(), # normalize, ])) val_dataset = McDataset( args.val_root, args.val_source, transforms.Compose([ transforms.Resize(image_size), transforms.CenterCrop(input_size), transforms.ToTensor(), # normalize, ])) train_sampler = DistributedSampler(train_dataset) val_sampler = DistributedSampler(val_dataset) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False, sampler=train_sampler) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False, sampler=val_sampler) if args.evaluate: validate(val_loader, model, criterion) return timer = Timer( len(train_loader) + len(val_loader), args.epochs - args.start_epoch) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) train_sampler.set_epoch(epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) if rank == 0: # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args.save_path) print('* Best Prec 1: {best:.3f}'.format(best=best_prec1))
def parse_rev_args(receive_msg): """ parse reveive msgs to global variable """ global trainloader global testloader global trainsampler global testsampler global net global criterion global optimizer global rank, world_size # Loading Data if rank == 0: logger.debug("Preparing data..") transform_train, transform_test = utils.data_transforms_cifar10(args) dataPath = os.environ["HOME"] + "/mountdir/data/" trainset = torchvision.datasets.CIFAR10(root=dataPath, train=True, download=True, transform=transform_train) # # trainsampler = DistributedSampler(trainset) # # trainloader = torch.utils.data.DataLoader( # trainset, batch_size=args.batch_size_per_gpu, shuffle=False, num_workers=args.workers, # pin_memory=False, sampler=trainsampler # ) testset = torchvision.datasets.CIFAR10(root=dataPath, train=False, download=True, transform=transform_test) testsampler = DistributedSampler(testset) testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=False, sampler=testsampler) if rank == 0: print("len(trainset)=" + str(len(trainset))) print("len(testset)=" + str(len(testset))) # Model if rank == 0: logger.debug("Building model..") net = build_graph_from_json(receive_msg) net = net.to(device) net = DistModule(net) criterion = nn.CrossEntropyLoss() # if args.optimizer == "SGD": # optimizer = optim.SGD( # net.parameters(), lr=args.learning_rate, momentum=0.9, weight_decay=5e-4 # ) # if args.optimizer == "Adadelta": # optimizer = optim.Adadelta(net.parameters(), lr=args.learning_rate) # if args.optimizer == "Adagrad": # optimizer = optim.Adagrad(net.parameters(), lr=args.learning_rate) # if args.optimizer == "Adam": # optimizer = optim.Adam(net.parameters(), lr=args.learning_rate) # if args.optimizer == "Adamax": # optimizer = optim.Adamax(net.parameters(), lr=args.learning_rate) # if args.optimizer == "RMSprop": # optimizer = optim.RMSprop(net.parameters(), lr=args.learning_rate) cudnn.benchmark = True return 0