def main(): """Do stuff.""" args = parser.parse_args() # args.batch_size = args.batch_size * torch.cuda.device_count() if args.save_folder and not os.path.isdir(args.save_folder): os.makedirs(args.save_folder) if not torch.cuda.is_available(): logging.info('no gpu device available') args.cuda = False torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) cudnn.benchmark = True # If set > 0, will resume training from a given checkpoint. resume_from_epoch = 0 resume_folder = args.load_folder for try_epoch in range(200, 0, -1): if os.path.exists( args.checkpoint_format.format(save_folder=resume_folder, epoch=try_epoch)): resume_from_epoch = try_epoch break if args.restore_epoch: resume_from_epoch = args.restore_epoch # Set default train and test path if not provided as input. utils.set_dataset_paths(args) if resume_from_epoch: filepath = args.checkpoint_format.format(save_folder=resume_folder, epoch=resume_from_epoch) checkpoint = torch.load(filepath) checkpoint_keys = checkpoint.keys() dataset_history = checkpoint['dataset_history'] dataset2num_classes = checkpoint['dataset2num_classes'] masks = checkpoint['masks'] if 'shared_layer_info' in checkpoint_keys: shared_layer_info = checkpoint['shared_layer_info'] else: shared_layer_info = {} if 'num_for_construct' in checkpoint_keys: num_for_construct = checkpoint['num_for_construct'] else: dataset_history = [] dataset2num_classes = {} masks = {} shared_layer_info = {} if args.arch == 'vgg16_bn_cifar100': model = packnet_models.__dict__[args.arch]( pretrained=False, dataset_history=dataset_history, dataset2num_classes=dataset2num_classes) elif args.arch == 'resnet18': model = packnet_models.__dict__[args.arch]( dataset_history=dataset_history, dataset2num_classes=dataset2num_classes) else: print('Error!') sys.exit(0) # Add and set the model dataset. model.add_dataset(args.dataset, args.num_classes) model.set_dataset(args.dataset) if args.dataset not in shared_layer_info: shared_layer_info[args.dataset] = { 'conv_bias': {}, 'bn_layer_running_mean': {}, 'bn_layer_running_var': {}, 'bn_layer_weight': {}, 'bn_layer_bias': {}, 'fc_bias': {} } model = nn.DataParallel(model) model = model.cuda() if args.initial_from_task and 'None' not in args.initial_from_task: filepath = '' for try_epoch in range(200, 0, -1): if os.path.exists( args.checkpoint_format.format( save_folder=args.initial_from_task, epoch=try_epoch)): filepath = args.checkpoint_format.format( save_folder=args.initial_from_task, epoch=try_epoch) break if filepath == '': pdb.set_trace() print('Something is wrong') checkpoint = torch.load(filepath) state_dict = checkpoint['model_state_dict'] curr_model_state_dict = model.module.state_dict() for name, param in state_dict.items(): if 'num_batches_tracked' in name: continue try: curr_model_state_dict[name][:].copy_(param) except: pdb.set_trace() print('here') if not masks: for name, module in model.named_modules(): if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear): if 'classifiers' in name: continue mask = torch.ByteTensor(module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() masks[name] = mask if args.num_classes == 2: train_loader = dataset.cifar100_train_loader_two_class( args.dataset, args.batch_size) val_loader = dataset.cifar100_val_loader_two_class( args.dataset, args.val_batch_size) elif args.num_classes == 5: train_loader = dataset.cifar100_train_loader(args.dataset, args.batch_size) val_loader = dataset.cifar100_val_loader(args.dataset, args.val_batch_size) else: print("num_classes should be either 2 or 5") sys.exit(1) # if we are going to save checkpoint in other folder, then we recalculate the starting epoch if args.save_folder != args.load_folder: start_epoch = 0 else: start_epoch = resume_from_epoch manager = Manager(args, model, shared_layer_info, masks, train_loader, val_loader) if args.mode == 'inference': manager.load_checkpoint_for_inference(resume_from_epoch, resume_folder) manager.validate(resume_from_epoch - 1) return lr = args.lr # update all layers named_params = dict(model.named_parameters()) params_to_optimize_via_SGD = [] named_params_to_optimize_via_SGD = [] masks_to_optimize_via_SGD = [] named_masks_to_optimize_via_SGD = [] for tuple_ in named_params.items(): if 'classifiers' in tuple_[0]: if '.{}.'.format(model.module.datasets.index( args.dataset)) in tuple_[0]: params_to_optimize_via_SGD.append(tuple_[1]) named_params_to_optimize_via_SGD.append(tuple_) continue else: params_to_optimize_via_SGD.append(tuple_[1]) named_params_to_optimize_via_SGD.append(tuple_) # here we must set weight decay to 0.0, # because the weight decay strategy in build-in step() function will change every weight elem in the tensor, # which will hurt previous tasks' accuracy. (Instead, we do weight decay ourself in the `prune.py`) optimizer_network = optim.SGD(params_to_optimize_via_SGD, lr=lr, weight_decay=0.0, momentum=0.9, nesterov=True) optimizers = Optimizers() optimizers.add(optimizer_network, lr) manager.load_checkpoint(optimizers, resume_from_epoch, resume_folder) """Performs training.""" curr_lrs = [] for optimizer in optimizers: for param_group in optimizer.param_groups: curr_lrs.append(param_group['lr']) break if args.mode == 'prune': print() print('Sparsity ratio: {}'.format(args.one_shot_prune_perc)) print('Before pruning: ') baseline_acc = manager.validate(start_epoch - 1) print('Execute one shot pruning ...') manager.one_shot_prune(args.one_shot_prune_perc) elif args.mode == 'finetune': manager.pruner.make_finetuning_mask() for epoch_idx in range(start_epoch, args.epochs): avg_train_acc = manager.train(optimizers, epoch_idx, curr_lrs) avg_val_acc = manager.validate(epoch_idx) if args.mode == 'finetune': if epoch_idx + 1 == 50 or epoch_idx + 1 == 80: for param_group in optimizers[0].param_groups: param_group['lr'] *= 0.1 curr_lrs[0] = param_group['lr'] if args.mode == 'prune': if epoch_idx + 1 == 25: for param_group in optimizers[0].param_groups: param_group['lr'] *= 0.1 curr_lrs[0] = param_group['lr'] if args.save_folder is not None: # paths = os.listdir(args.save_folder) # if paths and '.pth.tar' in paths[0]: # for checkpoint_file in paths: # os.remove(os.path.join(args.save_folder, checkpoint_file)) pass else: print('Something is wrong! Block the program with pdb') pdb.set_trace() if args.mode == 'finetune': manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) if args.logfile: json_data = {} if os.path.isfile(args.logfile): with open(args.logfile) as json_file: json_data = json.load(json_file) json_data[args.dataset] = '{:.4f}'.format(avg_val_acc) with open(args.logfile, 'w') as json_file: json.dump(json_data, json_file) if avg_train_acc < 0.97: print('Cannot prune any more!') elif args.mode == 'prune': #if avg_train_acc > 0.97 and (avg_val_acc - baseline_acc) >= -0.01: if avg_train_acc > 0.97: manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) else: print('Pruning too much!') print('-' * 16)
def main(): """Do stuff.""" args = parser.parse_args() # don't use this, neither set learning rate as a linear function # of the count of gpus, it will make accuracy lower # args.batch_size = args.batch_size * torch.cuda.device_count() args.network_width_multiplier = math.sqrt(args.network_width_multiplier) args.max_allowed_network_width_multiplier = math.sqrt( args.max_allowed_network_width_multiplier) if args.mode == 'prune': args.save_folder = os.path.join(args.save_folder, str(args.target_sparsity)) if args.initial_sparsity != 0.0: args.load_folder = os.path.join(args.load_folder, str(args.initial_sparsity)) if args.save_folder and not os.path.isdir(args.save_folder): os.makedirs(args.save_folder) if args.log_path: set_logger(args.log_path) if args.pruning_ratio_to_acc_record_file and not os.path.isdir( args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0]): os.makedirs(args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0]) if not torch.cuda.is_available(): logging.info('no gpu device available') args.cuda = False torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) cudnn.benchmark = True # If set > 0, will resume training from a given checkpoint. resume_from_epoch = 0 resume_folder = args.load_folder for try_epoch in range(200, 0, -1): if os.path.exists( args.checkpoint_format.format(save_folder=resume_folder, epoch=try_epoch)): resume_from_epoch = try_epoch break if args.restore_epoch: resume_from_epoch = args.restore_epoch # Set default train and test path if not provided as input. utils.set_dataset_paths(args) if resume_from_epoch and not args.initial_from_previous_task: filepath = args.checkpoint_format.format(save_folder=resume_folder, epoch=resume_from_epoch) checkpoint = torch.load(filepath) checkpoint_keys = checkpoint.keys() dataset_history = checkpoint['dataset_history'] dataset2num_classes = checkpoint['dataset2num_classes'] masks = checkpoint['masks'] shared_layer_info = checkpoint['shared_layer_info'] if 'num_for_construct' in checkpoint_keys: num_for_construct = checkpoint['num_for_construct'] if args.mode == 'inference' and 'network_width_multiplier' in shared_layer_info[ args.dataset]: # TODO, temporary solution args.network_width_multiplier = shared_layer_info[ args.dataset]['network_width_multiplier'] else: dataset_history = [] dataset2num_classes = {} masks = {} shared_layer_info = {} if args.mode == 'prune' and not args.pruning_ratio_to_acc_record_file: sys.exit(-1) if args.arch == 'resnet50': num_for_construct = [ 64, 64, 64 * 4, 128, 128 * 4, 256, 256 * 4, 512, 512 * 4 ] model = models.__dict__[args.arch](pretrained=True, num_for_construct=num_for_construct, threshold=args.threshold) elif 'vgg' in args.arch: custom_cfg = [ 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M' ] model = models.__dict__[args.arch]( custom_cfg, dataset_history=dataset_history, dataset2num_classes=dataset2num_classes, network_width_multiplier=args.network_width_multiplier, shared_layer_info=shared_layer_info) else: print('Error!') sys.exit(1) # Add and set the model dataset. model.add_dataset(args.dataset, args.num_classes) model.set_dataset(args.dataset) model = nn.DataParallel(model) model = model.cuda() if resume_from_epoch and args.initial_from_previous_task: filepath = args.checkpoint_format.format(save_folder=resume_folder, epoch=resume_from_epoch) checkpoint = torch.load(filepath) state_dict = checkpoint['model_state_dict'] curr_model_state_dict = model.module.state_dict() for name, param in state_dict.items(): if 'num_batches_tracked' in name: continue try: curr_model_state_dict[name][:].copy_(param) except: pdb.set_trace() print('here') if not masks: for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): mask = torch.ByteTensor(module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() masks[name] = mask else: # when we expand network, we need to allocate new masks NEED_ADJUST_MASK = False for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): if masks[name].size(1) < module.weight.data.size(1): assert args.mode == 'finetune' NEED_ADJUST_MASK = True elif masks[name].size(1) > module.weight.data.size(1): assert args.mode == 'inference' NEED_ADJUST_MASK = True if NEED_ADJUST_MASK: if args.mode == 'finetune': for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:masks[name].size(0), :masks[name]. size(1), :, :].copy_(masks[name]) masks[name] = mask elif isinstance(module, nl.SharableLinear): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:masks[name].size(0), :masks[name].size(1)].copy_( masks[name]) masks[name] = mask elif args.mode == 'inference': for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:, :, :, :].copy_( masks[name][:mask.size(0), :mask.size(1), :, :]) masks[name] = mask elif isinstance(module, nl.SharableLinear): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:, :].copy_( masks[name][:mask.size(0), :mask.size(1)]) masks[name] = mask if args.dataset not in shared_layer_info: shared_layer_info[args.dataset] = { 'bias': {}, 'bn_layer_running_mean': {}, 'bn_layer_running_var': {}, 'bn_layer_weight': {}, 'bn_layer_bias': {}, 'piggymask': {} } piggymasks = {} task_id = model.module.datasets.index(args.dataset) + 1 if task_id > 1: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): piggymasks[name] = torch.zeros_like(masks['module.' + name], dtype=torch.float32) piggymasks[name].fill_(0.01) piggymasks[name] = Parameter(piggymasks[name]) module.piggymask = piggymasks[name] #elif args.finetune_again: # # reinitialize piggymask # piggymasks = {} # for name, module in model.module.named_modules(): # if isinstance(module, nl.SharableConv2d) or isinstance(module, nl.SharableLinear): # piggymasks[name] = torch.zeros_like(masks['module.' + name], dtype=torch.float32) # piggymasks[name].fill_(0.01) # piggymasks[name] = Parameter(piggymasks[name]) # module.piggymask = piggymasks[name] else: try: piggymasks = shared_layer_info[args.dataset]['piggymask'] except: piggymasks = {} task_id = model.module.datasets.index(args.dataset) + 1 if task_id > 1: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): module.piggymask = piggymasks[name] shared_layer_info[args.dataset][ 'network_width_multiplier'] = args.network_width_multiplier if args.num_classes == 2: train_loader = dataset.cifar100_train_loader_two_class( args.dataset, args.batch_size) val_loader = dataset.cifar100_val_loader_two_class( args.dataset, args.val_batch_size) elif args.num_classes == 5: train_loader = dataset.cifar100_train_loader(args.dataset, args.batch_size) val_loader = dataset.cifar100_val_loader(args.dataset, args.val_batch_size) else: print("num_classes should be either 2 or 5") sys.exit(1) # if we are going to save checkpoint in other folder, then we recalculate the starting epoch if args.save_folder != args.load_folder: start_epoch = 0 else: start_epoch = resume_from_epoch curr_prune_step = begin_prune_step = start_epoch * len(train_loader) end_prune_step = curr_prune_step + args.pruning_interval * len( train_loader) manager = Manager(args, model, shared_layer_info, masks, train_loader, val_loader, begin_prune_step, end_prune_step) if args.mode == 'inference': manager.load_checkpoint_only_for_evaluate(resume_from_epoch, resume_folder) manager.validate(resume_from_epoch - 1) return lr = args.lr lr_mask = args.lr_mask # update all layers named_params = dict(model.named_parameters()) params_to_optimize_via_SGD = [] named_of_params_to_optimize_via_SGD = [] masks_to_optimize_via_Adam = [] named_of_masks_to_optimize_via_Adam = [] for name, param in named_params.items(): if 'classifiers' in name: if '.{}.'.format(model.module.datasets.index( args.dataset)) in name: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) continue elif 'piggymask' in name: masks_to_optimize_via_Adam.append(param) named_of_masks_to_optimize_via_Adam.append(name) else: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) optimizer_network = optim.SGD(params_to_optimize_via_SGD, lr=lr, weight_decay=0.0, momentum=0.9, nesterov=True) optimizers = Optimizers() optimizers.add(optimizer_network, lr) if masks_to_optimize_via_Adam: optimizer_mask = optim.Adam(masks_to_optimize_via_Adam, lr=lr_mask) optimizers.add(optimizer_mask, lr_mask) manager.load_checkpoint(optimizers, resume_from_epoch, resume_folder) """Performs training.""" curr_lrs = [] for optimizer in optimizers: for param_group in optimizer.param_groups: curr_lrs.append(param_group['lr']) break if args.mode == 'prune': if 'gradual_prune' in args.load_folder and args.save_folder == args.load_folder: args.epochs = 20 + resume_from_epoch logging.info('') logging.info('Before pruning: ') logging.info('Sparsity range: {} -> {}'.format(args.initial_sparsity, args.target_sparsity)) must_pruning_ratio_for_curr_task = 0.0 json_data = {} if os.path.isfile(args.pruning_ratio_to_acc_record_file): with open(args.pruning_ratio_to_acc_record_file, 'r') as json_file: json_data = json.load(json_file) #if args.network_width_multiplier == args.max_allowed_network_width_multiplier and json_data['0.0'] < baseline_acc: # # if we reach the upperbound and still do not get the accuracy over our target on curr task, we still do pruning # logging.info('we reach the upperbound and still do not get the accuracy over our target on curr task') # remain_num_tasks = args.total_num_tasks - len(dataset_history) # logging.info('remain_num_tasks: {}'.format(remain_num_tasks)) # ratio_allow_for_curr_task = round(1.0 / (remain_num_tasks + 1), 1) # logging.info('ratio_allow_for_curr_task: {:.4f}'.format(ratio_allow_for_curr_task)) # must_pruning_ratio_for_curr_task = 1.0 - ratio_allow_for_curr_task # if args.initial_sparsity >= must_pruning_ratio_for_curr_task: # sys.exit(6) manager.validate(start_epoch - 1) logging.info('') elif args.mode == 'finetune': if not args.finetune_again: # manager.pruner.make_finetuning_mask() logging.info('Finetune stage...') manager.pruner.current_dataset_idx += 1 for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): mask = masks[name] mask[mask.eq(0)] = manager.pruner.current_dataset_idx else: logging.info('Piggymask Retrain...') history_best_avg_val_acc_when_retraining = manager.validate( start_epoch - 1) num_epochs_that_criterion_does_not_get_better = 0 stop_lr_mask = True if manager.pruner.calculate_curr_task_ratio() == 0.0: logging.info( 'There is no left space in convolutional layer for curr task' ', we will try to use prior experience as long as possible') stop_lr_mask = False for epoch_idx in range(start_epoch, args.epochs): avg_train_acc, curr_prune_step = manager.train(optimizers, epoch_idx, curr_lrs, curr_prune_step) avg_val_acc = manager.validate(epoch_idx) # if args.mode == 'prune' and (epoch_idx+1) >= (args.pruning_interval + start_epoch) and ( # avg_val_acc > history_best_avg_val_acc_when_prune): # pass if args.finetune_again: if avg_val_acc > history_best_avg_val_acc_when_retraining: history_best_avg_val_acc_when_retraining = avg_val_acc num_epochs_that_criterion_does_not_get_better = 0 if args.save_folder is not None: for path in os.listdir(args.save_folder): if '.pth.tar' in path: os.remove(os.path.join(args.save_folder, path)) else: print('Something is wrong! Block the program with pdb') pdb.set_trace() history_best_avg_val_acc = avg_val_acc manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) else: num_epochs_that_criterion_does_not_get_better += 1 if args.finetune_again and num_epochs_that_criterion_does_not_get_better == 5: logging.info("stop retraining") sys.exit(0) if args.mode == 'finetune': if epoch_idx + 1 == 50 or epoch_idx + 1 == 80: for param_group in optimizers[0].param_groups: param_group['lr'] *= 0.1 curr_lrs[0] = param_group['lr'] if len(optimizers.lrs) == 2: if epoch_idx + 1 == 50: for param_group in optimizers[1].param_groups: param_group['lr'] *= 0.2 if stop_lr_mask and epoch_idx + 1 == 70: for param_group in optimizers[1].param_groups: param_group['lr'] *= 0.0 curr_lrs[1] = param_group['lr'] if args.save_folder is not None: pass # paths = os.listdir(args.save_folder) # if paths and '.pth.tar' in paths[0]: # for checkpoint_file in paths: # os.remove(os.path.join(args.save_folder, checkpoint_file)) else: print('Something is wrong! Block the program with pdb') pdb.set_trace() if avg_train_acc > 0.95: manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) logging.info('-' * 16) if args.pruning_ratio_to_acc_record_file: json_data = {} if os.path.isfile(args.pruning_ratio_to_acc_record_file): with open(args.pruning_ratio_to_acc_record_file, 'r') as json_file: json_data = json.load(json_file) if args.mode == 'finetune' and not args.test_piggymask: json_data[0.0] = round(avg_val_acc, 4) if args.baseline_acc_file: baseline_json_data = {} if os.path.isfile(args.baseline_acc_file): with open(args.baseline_acc_file) as json_file: baseline_json_data = json.load(json_file) baseline_json_data[args.dataset] = '{:.4f}'.format(avg_val_acc) with open(args.baseline_acc_file, 'w') as json_file: json.dump(baseline_json_data, json_file) with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file: json.dump(json_data, json_file) if avg_train_acc > 0.95: # and avg_val_acc >= baseline_acc: pass if manager.pruner.calculate_curr_task_ratio() == 0.0: logging.info( 'There is no left space in convolutional layer for curr task, so needless to prune' ) sys.exit(5) elif args.mode == 'prune': if avg_train_acc > 0.95: json_data[args.target_sparsity] = round(avg_val_acc, 4) with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file: json.dump(json_data, json_file) else: sys.exit(6) must_pruning_ratio_for_curr_task = 0.0
def main(): """Do stuff.""" args = parser.parse_args() # args.batch_size = args.batch_size * torch.cuda.device_count() args.network_width_multiplier = math.sqrt(args.network_width_multiplier) if args.mode == 'prune': args.save_folder = os.path.join(args.save_folder, str(args.target_sparsity)) if args.initial_sparsity != 0.0: args.load_folder = os.path.join(args.load_folder, str(args.initial_sparsity)) if args.pruning_ratio_to_acc_record_file and not os.path.isdir( args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0]): os.makedirs(args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0]) if args.save_folder and not os.path.isdir(args.save_folder): os.makedirs(args.save_folder) if args.log_path: set_logger(args.log_path) if not torch.cuda.is_available(): logging.info('no gpu device available') args.cuda = False torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) cudnn.benchmark = True # If set > 0, will resume training from a given checkpoint. resume_from_epoch = 0 resume_folder = args.load_folder for try_epoch in range(200, 0, -1): if os.path.exists( args.checkpoint_format.format(save_folder=resume_folder, epoch=try_epoch)): resume_from_epoch = try_epoch break if args.restore_epoch: resume_from_epoch = args.restore_epoch # Set default train and test path if not provided as input. utils.set_dataset_paths(args) if resume_from_epoch: filepath = args.checkpoint_format.format(save_folder=resume_folder, epoch=resume_from_epoch) checkpoint = torch.load(filepath) checkpoint_keys = checkpoint.keys() dataset_history = checkpoint['dataset_history'] dataset2num_classes = checkpoint['dataset2num_classes'] masks = checkpoint['masks'] shared_layer_info = checkpoint['shared_layer_info'] if 'num_for_construct' in checkpoint_keys: num_for_construct = checkpoint['num_for_construct'] if args.mode == 'inference' and 'network_width_multiplier' in shared_layer_info[ args.dataset]: args.network_width_multiplier = shared_layer_info[ args.dataset]['network_width_multiplier'] else: dataset_history = [] dataset2num_classes = {} masks = {} shared_layer_info = {} if args.arch == 'resnet50': # num_for_construct = [64, 64, 64*4, 128, 128*4, 256, 256*4, 512, 512*4] model = models.__dict__[args.arch]( dataset_history=dataset_history, dataset2num_classes=dataset2num_classes, network_width_multiplier=args.network_width_multiplier, shared_layer_info=shared_layer_info) elif 'vgg' in args.arch: custom_cfg = [ 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M' ] model = models.__dict__[args.arch]( custom_cfg, dataset_history=dataset_history, dataset2num_classes=dataset2num_classes, network_width_multiplier=args.network_width_multiplier, shared_layer_info=shared_layer_info, progressive_init=args.progressive_init) else: print('Error!') sys.exit(1) # Add and set the model dataset. model.add_dataset(args.dataset, args.num_classes) model.set_dataset(args.dataset) # Move model to GPU model = nn.DataParallel(model) model = model.cuda() # For datasets whose image_size is 224 and also the first task if args.use_imagenet_pretrained and model.module.datasets.index( args.dataset) == 0: curr_model_state_dict = model.state_dict() if args.arch == 'custom_vgg': state_dict = model_zoo.load_url(model_urls['vgg16_bn']) for name, param in state_dict.items(): if 'classifier' not in name: curr_model_state_dict['module.' + name].copy_(param) curr_model_state_dict['module.features.45.weight'].copy_( state_dict['classifier.0.weight']) curr_model_state_dict['module.features.45.bias'].copy_( state_dict['classifier.0.bias']) curr_model_state_dict['module.features.48.weight'].copy_( state_dict['classifier.3.weight']) curr_model_state_dict['module.features.48.bias'].copy_( state_dict['classifier.3.bias']) if args.dataset == 'imagenet': curr_model_state_dict['module.classifiers.0.weight'].copy_( state_dict['classifier.6.weight']) curr_model_state_dict['module.classifiers.0.bias'].copy_( state_dict['classifier.6.bias']) elif args.arch == 'resnet50': state_dict = model_zoo.load_url(model_urls['resnet50']) for name, param in state_dict.items(): if 'fc' not in name: curr_model_state_dict['module.' + name].copy_(param) if args.dataset == 'imagenet': curr_model_state_dict['module.classifiers.0.weight'].copy_( state_dict['fc.weight']) curr_model_state_dict['module.classifiers.0.bias'].copy_( state_dict['fc.bias']) else: print( "Currently, we didn't define the mapping of {} between imagenet pretrained weight and our model" .format(args.arch)) sys.exit(5) if not masks: for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): mask = torch.ByteTensor(module.weight.data.size()).fill_(0) mask = mask.cuda() masks[name] = mask else: # when we expand network, we need to allocate new masks NEED_ADJUST_MASK = False for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): if masks[name].size(1) < module.weight.data.size(1): assert args.mode == 'finetune' NEED_ADJUST_MASK = True elif masks[name].size(1) > module.weight.data.size(1): assert args.mode == 'inference' NEED_ADJUST_MASK = True if NEED_ADJUST_MASK: if args.mode == 'finetune': for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) mask = mask.cuda() mask[:masks[name].size(0), :masks[name]. size(1), :, :].copy_(masks[name]) masks[name] = mask elif isinstance(module, nl.SharableLinear): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) mask = mask.cuda() mask[:masks[name].size(0), :masks[name].size(1)].copy_( masks[name]) masks[name] = mask elif args.mode == 'inference': for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) mask = mask.cuda() mask[:, :, :, :].copy_( masks[name][:mask.size(0), :mask.size(1), :, :]) masks[name] = mask elif isinstance(module, nl.SharableLinear): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) mask = mask.cuda() mask[:, :].copy_( masks[name][:mask.size(0), :mask.size(1)]) masks[name] = mask if args.dataset not in shared_layer_info: shared_layer_info[args.dataset] = { 'bias': {}, 'bn_layer_running_mean': {}, 'bn_layer_running_var': {}, 'bn_layer_weight': {}, 'bn_layer_bias': {}, 'piggymask': {} } piggymasks = {} task_id = model.module.datasets.index(args.dataset) + 1 if task_id > 1: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): piggymasks[name] = torch.zeros_like(masks['module.' + name], dtype=torch.float32) piggymasks[name].fill_(0.01) piggymasks[name] = Parameter(piggymasks[name]) module.piggymask = piggymasks[name] else: piggymasks = shared_layer_info[args.dataset]['piggymask'] task_id = model.module.datasets.index(args.dataset) + 1 if task_id > 1: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): module.piggymask = piggymasks[name] shared_layer_info[args.dataset][ 'network_width_multiplier'] = args.network_width_multiplier if 'cropped' in args.dataset: train_loader = dataset.train_loader_cropped(args.train_path, args.batch_size) val_loader = dataset.val_loader_cropped(args.val_path, args.val_batch_size) else: train_loader = dataset.train_loader(args.train_path, args.batch_size) val_loader = dataset.val_loader(args.val_path, args.val_batch_size) # if we are going to save checkpoint in other folder, then we recalculate the starting epoch if args.save_folder != args.load_folder: start_epoch = 0 else: start_epoch = resume_from_epoch curr_prune_step = begin_prune_step = start_epoch * len(train_loader) end_prune_step = curr_prune_step + args.pruning_interval * len( train_loader) manager = Manager(args, model, shared_layer_info, masks, train_loader, val_loader, begin_prune_step, end_prune_step) if args.mode == 'inference': manager.load_checkpoint_only_for_evaluate(resume_from_epoch, resume_folder) manager.validate(resume_from_epoch - 1) return lr = args.lr lr_mask = args.lr_mask # update all layers named_params = dict(model.named_parameters()) params_to_optimize_via_SGD = [] named_of_params_to_optimize_via_SGD = [] masks_to_optimize_via_Adam = [] named_of_masks_to_optimize_via_Adam = [] for name, param in named_params.items(): if 'classifiers' in name: if '.{}.'.format(model.module.datasets.index( args.dataset)) in name: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) continue elif 'piggymask' in name: masks_to_optimize_via_Adam.append(param) named_of_masks_to_optimize_via_Adam.append(name) else: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) optimizer_network = optim.SGD(params_to_optimize_via_SGD, lr=lr, weight_decay=0.0, momentum=0.9, nesterov=True) optimizers = Optimizers() optimizers.add(optimizer_network, lr) if masks_to_optimize_via_Adam: optimizer_mask = optim.Adam(masks_to_optimize_via_Adam, lr=lr_mask) optimizers.add(optimizer_mask, lr_mask) manager.load_checkpoint(optimizers, resume_from_epoch, resume_folder) # total_elements = 0 # total_zeros_elements = 0 # for name, module in model.named_modules(): # if isinstance(module, nl.SharableConv2d): # zero_channels = module.piggymask.le(args.threshold).sum() # zero_elements = module.weight.data.numel()/module.piggymask.size(0)*zero_channels # total_zeros_elements += zero_elements # total_elements += module.weight.data.numel() # print('{}: channel level: num_zeros {}, total {}; ' # 'element level: num_zeros {}, total {}'.format( # name, zero_channels, module.piggymask.size(0), # zero_elements, module.weight.data.numel())) # # zero_elements = module.piggymask.le(args.threshold).sum() # # total_zeros_elements += zero_elements # # total_elements += module.weight.data.numel() # # print('{}: element level: num_zeros {}, total {}'.format( # # name, zero_elements, module.piggymask.numel())) # print('pruning ratio: {}'.format(float(total_zeros_elements)/total_elements)) # pdb.set_trace() """Performs training.""" curr_lrs = [] for optimizer in optimizers: for param_group in optimizer.param_groups: curr_lrs.append(param_group['lr']) break if args.jsonfile is None or not os.path.isfile(args.jsonfile): sys.exit(3) with open(args.jsonfile, 'r') as jsonfile: json_data = json.load(jsonfile) baseline_acc = float(json_data[args.dataset]) if args.mode == 'prune': if args.dataset != 'imagenet': history_best_avg_val_acc_when_prune = 0.0 #history_best_avg_val_acc_when_prune = baseline_acc - 0.005 else: if 'vgg' in args.arch: baseline_acc = 0.7336 history_best_avg_val_acc_when_prune = baseline_acc - 0.005 elif 'resnet50' in args.arch: baseline_acc = 0.7616 history_best_avg_val_acc_when_prune = baseline_acc - 0.005 else: print('Something is wrong') exit(1) stop_prune = True if 'gradual_prune' in args.load_folder and args.save_folder == args.load_folder: if args.dataset == 'imagenet': args.epochs = 10 + resume_from_epoch else: args.epochs = 20 + resume_from_epoch logging.info('') logging.info('Before pruning: ') logging.info('Sparsity range: {} -> {}'.format(args.initial_sparsity, args.target_sparsity)) manager.validate(start_epoch - 1) logging.info('') elif args.mode == 'finetune': manager.pruner.make_finetuning_mask() if args.dataset == 'imagenet': manager.validate(0) manager.save_checkpoint(optimizers, 0, args.save_folder) return history_best_avg_val_acc = 0.0 num_epochs_that_criterion_does_not_get_better = 0 times_of_decaying_learning_rate = 0 for epoch_idx in range(start_epoch, args.epochs): avg_train_acc, curr_prune_step = manager.train(optimizers, epoch_idx, curr_lrs, curr_prune_step) avg_val_acc = manager.validate(epoch_idx) if args.mode == 'prune' and (epoch_idx + 1) >= ( args.pruning_interval + start_epoch) and ( avg_val_acc > history_best_avg_val_acc_when_prune): stop_prune = False history_best_avg_val_acc_when_prune = avg_val_acc if args.save_folder is not None: paths = os.listdir(args.save_folder) if paths and '.pth.tar' in paths[0]: for checkpoint_file in paths: os.remove( os.path.join(args.save_folder, checkpoint_file)) else: print('Something is wrong! Block the program with pdb') pdb.set_trace() manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) if args.mode == 'finetune': if avg_val_acc > history_best_avg_val_acc: num_epochs_that_criterion_does_not_get_better = 0 if args.save_folder is not None: paths = os.listdir(args.save_folder) if paths and '.pth.tar' in paths[0]: for checkpoint_file in paths: os.remove( os.path.join(args.save_folder, checkpoint_file)) else: print('Something is wrong! Block the program with pdb') pdb.set_trace() history_best_avg_val_acc = avg_val_acc manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) else: num_epochs_that_criterion_does_not_get_better += 1 if times_of_decaying_learning_rate >= 3: print() print( "times_of_decaying_learning_rate reach {}, stop training". format(times_of_decaying_learning_rate)) break if num_epochs_that_criterion_does_not_get_better >= 5: times_of_decaying_learning_rate += 1 num_epochs_that_criterion_does_not_get_better = 0 for param_group in optimizers[0].param_groups: param_group['lr'] *= 0.1 curr_lrs[0] = param_group['lr'] print() print("continously {} epochs doesn't get higher acc, " "decay learning rate by multiplying 0.1".format( num_epochs_that_criterion_does_not_get_better)) if times_of_decaying_learning_rate == 1 and len( optimizers.lrs) == 2: for param_group in optimizers[1].param_groups: param_group['lr'] *= 0.2 curr_lrs[1] = param_group['lr'] print('-' * 16) if args.pruning_ratio_to_acc_record_file: json_data = {} if os.path.isfile(args.pruning_ratio_to_acc_record_file): with open(args.pruning_ratio_to_acc_record_file, 'r') as json_file: json_data = json.load(json_file) if args.mode == 'finetune' and not args.test_piggymask: if args.pruning_ratio_to_acc_record_file: json_data[0.0] = round(history_best_avg_val_acc, 4) with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file: json.dump(json_data, json_file) if history_best_avg_val_acc - baseline_acc > -0.005: # TODO #json_data = {} #json_data['acc_before_prune'] = '{:.4f}'.format(history_best_avg_val_acc) #with open(args.tmp_benchmark_file, 'w') as jsonfile: # json.dump(json_data, jsonfile) pass else: print("It's time to expand the Network") print('Auto expand network') sys.exit(2) if manager.pruner.calculate_curr_task_ratio() == 0.0: print( 'There is no left space in convolutional layer for curr task, so needless to prune' ) sys.exit(5) elif args.mode == 'prune': # if stop_prune: # print('Acc too low, stop pruning.') # sys.exit(4) if args.pruning_ratio_to_acc_record_file: json_data[args.target_sparsity] = round( history_best_avg_val_acc_when_prune, 4) with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file: json.dump(json_data, json_file)
def main(): """Do stuff.""" args = parser.parse_args() # don't use this, neither set learning rate as a linear function # of the count of gpus, it will make accuracy lower # args.batch_size = args.batch_size * torch.cuda.device_count() args.network_width_multiplier = math.sqrt(args.network_width_multiplier) if args.save_folder and not os.path.isdir(args.save_folder): os.makedirs(args.save_folder) if not torch.cuda.is_available(): logging.info('no gpu device available') args.cuda = False torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) cudnn.benchmark = True # If set > 0, will resume training from a given checkpoint. resume_from_epoch = 0 resume_folder = args.load_folder for try_epoch in range(200, 0, -1): if os.path.exists( args.checkpoint_format.format(save_folder=resume_folder, epoch=try_epoch)): resume_from_epoch = try_epoch break if args.restore_epoch: resume_from_epoch = args.restore_epoch # Set default train and test path if not provided as input. utils.set_dataset_paths(args) if resume_from_epoch: filepath = args.checkpoint_format.format(save_folder=resume_folder, epoch=resume_from_epoch) checkpoint = torch.load(filepath) checkpoint_keys = checkpoint.keys() dataset_history = checkpoint['dataset_history'] dataset2num_classes = checkpoint['dataset2num_classes'] masks = checkpoint['masks'] shared_layer_info = checkpoint['shared_layer_info'] if 'num_for_construct' in checkpoint_keys: num_for_construct = checkpoint['num_for_construct'] if args.mode == 'inference' and 'network_width_multiplier' in shared_layer_info[ args.dataset]: args.network_width_multiplier = shared_layer_info[ args.dataset]['network_width_multiplier'] else: dataset_history = [] dataset2num_classes = {} masks = {} shared_layer_info = {} if args.arch == 'resnet50': num_for_construct = [ 64, 64, 64 * 4, 128, 128 * 4, 256, 256 * 4, 512, 512 * 4 ] model = models.__dict__[args.arch](pretrained=True, num_for_construct=num_for_construct, threshold=args.threshold) elif 'vgg' in args.arch: custom_cfg = [ 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M' ] model = models.__dict__[args.arch]( custom_cfg, dataset_history=dataset_history, dataset2num_classes=dataset2num_classes, network_width_multiplier=args.network_width_multiplier, shared_layer_info=shared_layer_info) else: print('Error!') sys.exit(1) # Add and set the model dataset. model.add_dataset(args.dataset, args.num_classes) model.set_dataset(args.dataset) model = nn.DataParallel(model) model = model.cuda() if not masks: for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): mask = torch.ByteTensor(module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() masks[name] = mask else: # when we expand network, we need to allocate new masks NEED_ADJUST_MASK = False for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): if masks[name].size(1) < module.weight.data.size(1): assert args.mode == 'finetune' NEED_ADJUST_MASK = True elif masks[name].size(1) > module.weight.data.size(1): assert args.mode == 'inference' NEED_ADJUST_MASK = True if NEED_ADJUST_MASK: if args.mode == 'finetune': for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:masks[name].size(0), :masks[name]. size(1), :, :].copy_(masks[name]) masks[name] = mask elif isinstance(module, nl.SharableLinear): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:masks[name].size(0), :masks[name].size(1)].copy_( masks[name]) masks[name] = mask elif args.mode == 'inference': for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:, :, :, :].copy_( masks[name][:mask.size(0), :mask.size(1), :, :]) masks[name] = mask elif isinstance(module, nl.SharableLinear): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:, :].copy_( masks[name][:mask.size(0), :mask.size(1)]) masks[name] = mask if args.dataset not in shared_layer_info: shared_layer_info[args.dataset] = { 'bias': {}, 'bn_layer_running_mean': {}, 'bn_layer_running_var': {}, 'bn_layer_weight': {}, 'bn_layer_bias': {}, 'piggymask': {} } piggymasks = {} task_id = model.module.datasets.index(args.dataset) + 1 if task_id > 1: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): piggymasks[name] = torch.zeros_like(masks['module.' + name], dtype=torch.float32) piggymasks[name].fill_(0.01) piggymasks[name] = Parameter(piggymasks[name]) module.piggymask = piggymasks[name] else: piggymasks = shared_layer_info[args.dataset]['piggymask'] task_id = model.module.datasets.index(args.dataset) + 1 if task_id > 1: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): module.piggymask = piggymasks[name] shared_layer_info[args.dataset][ 'network_width_multiplier'] = args.network_width_multiplier if args.num_classes == 2: train_loader = dataset.cifar100_train_loader_two_class( args.dataset, args.batch_size) val_loader = dataset.cifar100_val_loader_two_class( args.dataset, args.val_batch_size) elif args.num_classes == 5: train_loader = dataset.cifar100_train_loader(args.dataset, args.batch_size) val_loader = dataset.cifar100_val_loader(args.dataset, args.val_batch_size) else: print("num_classes should be either 2 or 5") sys.exit(1) # if we are going to save checkpoint in other folder, then we recalculate the starting epoch if args.save_folder != args.load_folder: start_epoch = 0 else: start_epoch = resume_from_epoch curr_prune_step = begin_prune_step = start_epoch * len(train_loader) end_prune_step = curr_prune_step + args.pruning_interval * len( train_loader) manager = Manager(args, model, shared_layer_info, masks, train_loader, val_loader, begin_prune_step, end_prune_step) if args.mode == 'inference': manager.load_checkpoint_only_for_evaluate(resume_from_epoch, resume_folder) manager.validate(resume_from_epoch - 1) return lr = args.lr lr_mask = args.lr_mask # update all layers named_params = dict(model.named_parameters()) params_to_optimize_via_SGD = [] named_of_params_to_optimize_via_SGD = [] masks_to_optimize_via_Adam = [] named_of_masks_to_optimize_via_Adam = [] for name, param in named_params.items(): if 'classifiers' in name: if '.{}.'.format(model.module.datasets.index( args.dataset)) in name: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) continue elif 'piggymask' in name: masks_to_optimize_via_Adam.append(param) named_of_masks_to_optimize_via_Adam.append(name) else: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) optimizer_network = optim.SGD(params_to_optimize_via_SGD, lr=lr, weight_decay=0.0, momentum=0.9, nesterov=True) optimizers = Optimizers() optimizers.add(optimizer_network, lr) if masks_to_optimize_via_Adam: optimizer_mask = optim.Adam(masks_to_optimize_via_Adam, lr=lr_mask) optimizers.add(optimizer_mask, lr_mask) manager.load_checkpoint(optimizers, resume_from_epoch, resume_folder) # total_elements = 0 # total_zeros_elements = 0 # for name, module in model.named_modules(): # if isinstance(module, nl.SharableConv2d): # zero_channels = module.piggymask.le(args.threshold).sum() # zero_elements = module.weight.data.numel()/module.piggymask.size(0)*zero_channels # total_zeros_elements += zero_elements # total_elements += module.weight.data.numel() # print('{}: channel level: num_zeros {}, total {}; ' # 'element level: num_zeros {}, total {}'.format( # name, zero_channels, module.piggymask.size(0), # zero_elements, module.weight.data.numel())) # # zero_elements = module.piggymask.le(args.threshold).sum() # # total_zeros_elements += zero_elements # # total_elements += module.weight.data.numel() # # print('{}: element level: num_zeros {}, total {}'.format( # # name, zero_elements, module.piggymask.numel())) # print('pruning ratio: {}'.format(float(total_zeros_elements)/total_elements)) # pdb.set_trace() """Performs training.""" curr_lrs = [] for optimizer in optimizers: for param_group in optimizer.param_groups: curr_lrs.append(param_group['lr']) break if start_epoch != 0: curr_best_accuracy = manager.validate(start_epoch - 1) else: curr_best_accuracy = 0.0 if args.jsonfile is None or not os.path.isfile(args.jsonfile): sys.exit(3) with open(args.jsonfile, 'r') as jsonfile: json_data = json.load(jsonfile) baseline_acc = float(json_data[args.dataset]) if args.mode == 'prune': # with open(os.path.join(os.getcwd(), args.tmp_benchmark_file), 'r') as jsonfile: # json_data = json.load(jsonfile) # acc_before_prune = float(json_data['acc_before_prune']) # if acc_before_prune - baseline_acc > 0.01: # history_best_avg_val_acc_when_prune = acc_before_prune - 0.015 # else: # history_best_avg_val_acc_when_prune = acc_before_prune - 0.01 history_best_avg_val_acc_when_prune = baseline_acc - 0.01 stop_prune = True if 'gradual_prune' in args.load_folder and args.save_folder == args.load_folder: args.epochs = 20 + resume_from_epoch print() print('Before pruning: ') print('Sparsity range: {} -> {}'.format(args.initial_sparsity, args.target_sparsity)) curr_best_accuracy = manager.validate(start_epoch - 1) print() elif args.mode == 'finetune': manager.pruner.make_finetuning_mask() history_best_avg_val_acc = 0.0 for epoch_idx in range(start_epoch, args.epochs): avg_train_acc, curr_prune_step = manager.train(optimizers, epoch_idx, curr_lrs, curr_prune_step) avg_val_acc = manager.validate(epoch_idx) if args.mode == 'prune' and (epoch_idx + 1) >= ( args.pruning_interval + start_epoch) and ( avg_val_acc > history_best_avg_val_acc_when_prune): stop_prune = False history_best_avg_val_acc_when_prune = avg_val_acc if args.save_folder is not None: paths = os.listdir(args.save_folder) if paths and '.pth.tar' in paths[0]: for checkpoint_file in paths: os.remove( os.path.join(args.save_folder, checkpoint_file)) else: print('Something is wrong! Block the program with pdb') pdb.set_trace() manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) if args.mode == 'finetune': if epoch_idx + 1 == 50 or epoch_idx + 1 == 80: for param_group in optimizers[0].param_groups: param_group['lr'] *= 0.1 curr_lrs[0] = param_group['lr'] if len(optimizers.lrs) == 2 and epoch_idx + 1 == 50: for param_group in optimizers[1].param_groups: param_group['lr'] *= 0.2 curr_lrs[1] = param_group['lr'] if avg_val_acc > history_best_avg_val_acc: if args.save_folder is not None: paths = os.listdir(args.save_folder) if paths and '.pth.tar' in paths[0]: for checkpoint_file in paths: os.remove( os.path.join(args.save_folder, checkpoint_file)) else: print('Something is wrong! Block the program with pdb') pdb.set_trace() history_best_avg_val_acc = avg_val_acc manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) print('-' * 16) if args.mode == 'finetune' and not args.test_piggymask: if avg_train_acc > 0.95 and (history_best_avg_val_acc - baseline_acc) > -0.01: # json_data = {} # json_data['acc_before_prune'] = '{:.4f}'.format(history_best_avg_val_acc) # with open(args.tmp_benchmark_file, 'w') as jsonfile: # json.dump(json_data, jsonfile) pass else: print("It's time to expand the Network") print('Auto expand network') sys.exit(2) elif args.mode == 'prune' and stop_prune: print('Acc too low, stop pruning.') sys.exit(4)
def main(): """Do stuff.""" args = parser.parse_args() #args.batch_size = args.batch_size * torch.cuda.device_count() if args.save_folder and not os.path.isdir(args.save_folder): os.makedirs(args.save_folder) if not torch.cuda.is_available(): logging.info('no gpu device available') args.cuda = False torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) cudnn.benchmark = True # If set > 0, will resume training from a given checkpoint. resume_from_epoch = 0 resume_folder = args.load_folder for try_epoch in range(200, 0, -1): if os.path.exists( args.checkpoint_format.format(save_folder=resume_folder, epoch=try_epoch)): resume_from_epoch = try_epoch break if args.restore_epoch: resume_from_epoch = args.restore_epoch # Set default train and test path if not provided as input. utils.set_dataset_paths(args) if resume_from_epoch: filepath = args.checkpoint_format.format(save_folder=resume_folder, epoch=resume_from_epoch) checkpoint = torch.load(filepath) checkpoint_keys = checkpoint.keys() dataset_history = checkpoint['dataset_history'] dataset2num_classes = checkpoint['dataset2num_classes'] masks = checkpoint['masks'] if 'shared_layer_info' in checkpoint_keys: shared_layer_info = checkpoint['shared_layer_info'] else: shared_layer_info = {} else: dataset_history = [] dataset2num_classes = {} masks = {} shared_layer_info = {} if args.arch == 'resnet50': model = packnet_models.__dict__[args.arch]( dataset_history=dataset_history, dataset2num_classes=dataset2num_classes) elif 'vgg' in args.arch: model = packnet_models.__dict__[args.arch]( pretrained=args.use_imagenet_pretrained, dataset_history=dataset_history, dataset2num_classes=dataset2num_classes) else: print('Error!') sys.exit(0) # Add and set the model dataset. model.add_dataset(args.dataset, args.num_classes) model.set_dataset(args.dataset) # Move model to GPU model = nn.DataParallel(model) model = model.cuda() # For datasets whose image_size is 224 and also the first task if args.use_imagenet_pretrained and model.module.datasets.index( args.dataset) == 0: curr_model_state_dict = model.state_dict() if args.arch == 'vgg16_bn': state_dict = model_zoo.load_url(model_urls['vgg16_bn']) curr_model_state_dict = model.state_dict() for name, param in state_dict.items(): if 'classifier' not in name: curr_model_state_dict['module.' + name].copy_(param) curr_model_state_dict['module.features.45.weight'].copy_( state_dict['classifier.0.weight']) curr_model_state_dict['module.features.45.bias'].copy_( state_dict['classifier.0.bias']) curr_model_state_dict['module.features.48.weight'].copy_( state_dict['classifier.3.weight']) curr_model_state_dict['module.features.48.bias'].copy_( state_dict['classifier.3.bias']) if args.dataset == 'imagenet': curr_model_state_dict['module.classifiers.0.weight'].copy_( state_dict['classifier.6.weight']) curr_model_state_dict['module.classifiers.0.bias'].copy_( state_dict['classifier.6.bias']) elif args.arch == 'resnet50': state_dict = model_zoo.load_url(model_urls['resnet50']) for name, param in state_dict.items(): if 'fc' not in name: curr_model_state_dict['module.' + name].copy_(param) if args.dataset == 'imagenet': curr_model_state_dict['module.classifiers.0.weight'].copy_( state_dict['fc.weight']) curr_model_state_dict['module.classifiers.0.bias'].copy_( state_dict['fc.bias']) else: print( "Currently, we didn't define the mapping of {} between imagenet pretrained weight and our model" .format(args.arch)) sys.exit(5) if not masks: for name, module in model.named_modules(): if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear): if 'classifiers' in name: continue mask = torch.ByteTensor(module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() masks[name] = mask if args.dataset not in shared_layer_info: shared_layer_info[args.dataset] = { 'conv_bias': {}, 'bn_layer_running_mean': {}, 'bn_layer_running_var': {}, 'bn_layer_weight': {}, 'bn_layer_bias': {}, 'fc_bias': {} } if 'cropped' in args.dataset: train_loader = dataset.train_loader_cropped(args.train_path, args.batch_size) val_loader = dataset.val_loader_cropped(args.val_path, args.val_batch_size) else: train_loader = dataset.train_loader(args.train_path, args.batch_size) val_loader = dataset.val_loader(args.val_path, args.val_batch_size) # if we are going to save checkpoint in other folder, then we recalculate the starting epoch if args.save_folder != args.load_folder: start_epoch = 0 else: start_epoch = resume_from_epoch manager = Manager(args, model, shared_layer_info, masks, train_loader, val_loader) if args.mode == 'inference': manager.load_checkpoint_for_inference(resume_from_epoch, resume_folder) manager.validate(resume_from_epoch - 1) return lr = args.lr # update all layers named_params = dict(model.named_parameters()) params_to_optimize_via_SGD = [] named_of_params_to_optimize_via_SGD = [] for name, param in named_params.items(): if 'classifiers' in name: if '.{}.'.format(model.module.datasets.index( args.dataset)) in name: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) continue else: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) # here we must set weight decay to 0.0, # because the weight decay strategy in build-in step() function will change every weight elem in the tensor, # which will hurt previous tasks' accuracy. (Instead, we do weight decay ourself in the `prune.py`) optimizer_network = optim.SGD(params_to_optimize_via_SGD, lr=lr, weight_decay=0.0, momentum=0.9, nesterov=True) optimizers = Optimizers() optimizers.add(optimizer_network, lr) manager.load_checkpoint(optimizers, resume_from_epoch, resume_folder) """Performs training.""" curr_lrs = [] for optimizer in optimizers: for param_group in optimizer.param_groups: curr_lrs.append(param_group['lr']) break if start_epoch != 0: curr_best_accuracy = manager.validate(start_epoch - 1) elif args.mode == 'prune': print() print('Sparsity ratio: {}'.format(args.one_shot_prune_perc)) print('Before pruning: ') with open(args.jsonfile, 'r') as jsonfile: json_data = json.load(jsonfile) baseline_acc = float(json_data[args.dataset]) # baseline_acc = manager.validate(start_epoch-1) print('Execute one shot pruning ...') manager.one_shot_prune(args.one_shot_prune_perc) else: curr_best_accuracy = 0.0 if args.mode == 'finetune': manager.pruner.make_finetuning_mask() if args.dataset == 'imagenet': avg_val_acc = manager.validate(0) manager.save_checkpoint(optimizers, 0, args.save_folder) if args.logfile: json_data = {} if os.path.isfile(args.logfile): with open(args.logfile) as json_file: json_data = json.load(json_file) json_data[args.dataset] = '{:.4f}'.format(avg_val_acc) with open(args.logfile, 'w') as json_file: json.dump(json_data, json_file) return history_best_val_acc = 0.0 num_epochs_that_criterion_does_not_get_better = 0 times_of_decaying_learning_rate = 0 for epoch_idx in range(start_epoch, args.epochs): avg_train_acc = manager.train(optimizers, epoch_idx, curr_lrs) avg_val_acc = manager.validate(epoch_idx) if args.mode == 'finetune': if avg_val_acc > history_best_val_acc: num_epochs_that_criterion_does_not_get_better = 0 history_best_val_acc = avg_val_acc if args.save_folder is not None: paths = os.listdir(args.save_folder) if paths and '.pth.tar' in paths[0]: for checkpoint_file in paths: os.remove( os.path.join(args.save_folder, checkpoint_file)) else: print('Something is wrong! Block the program with pdb') pdb.set_trace() manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) if args.logfile: json_data = {} if os.path.isfile(args.logfile): with open(args.logfile) as json_file: json_data = json.load(json_file) json_data[args.dataset] = '{:.4f}'.format(avg_val_acc) with open(args.logfile, 'w') as json_file: json.dump(json_data, json_file) else: num_epochs_that_criterion_does_not_get_better += 1 if times_of_decaying_learning_rate >= 3: print() print( "times_of_decaying_learning_rate reach {}, stop training". format(times_of_decaying_learning_rate)) break if num_epochs_that_criterion_does_not_get_better >= 10: times_of_decaying_learning_rate += 1 num_epochs_that_criterion_does_not_get_better = 0 for param_group in optimizers[0].param_groups: param_group['lr'] *= 0.1 curr_lrs[0] = param_group['lr'] print() print("continously {} epochs doesn't get higher acc, " "decay learning rate by multiplying 0.1".format( num_epochs_that_criterion_does_not_get_better)) if args.mode == 'prune': if epoch_idx + 1 == 40: for param_group in optimizers[0].param_groups: param_group['lr'] *= 0.1 curr_lrs[0] = param_group['lr'] if args.mode == 'prune': if avg_train_acc > 0.97 and (avg_val_acc - baseline_acc) >= -0.01: manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) else: print('Pruning too much!') elif args.mode == 'finetune': if avg_train_acc < 0.97: print('Cannot prune any more!') print('-' * 16)