def get_se_sites(): # Read the data from CSV file and returns dictionary coordinates_data = {} try: csv_file = open('sites.tmp', "r") except IOError: sys.exit("There are no coordinates info for SE. Get first the computing sites with get_ce_sites()") lines = csv_file.read().split("\n") for row in lines: if row != "": site = row.split(',') if site[1].replace('.','',1).replace('-','',1).isdigit() and \ site[2].replace('.','',1).replace('-','',1).isdigit(): coordinates_data[site[0]] = [float(site[1]),float(site[2])] se_sites = {} manager = Manager() se_result = manager.listSEs()['Value'] se_health = service.getHealthyProductionSEs() #se_health = service.getAllStorageElementStatus() if not se_health['OK']: print "[WARNING]: No health info for SE sites" for se in sorted(se_result['Active']): se_site = {} result = gConfig.getOptionsDict('/Resources/StorageElements/' + se) result2 = gConfig.getOptionsDict('/Resources/StorageElements/' + se \ + '/AccessProtocol.1') if result['OK'] and result2['OK']: result = result['Value'] result2 = result2['Value'] se_site = result2 se_site['Read'] = result['ReadAccess'] se_site['Write'] = result['WriteAccess'] if se.split('-')[0] in coordinates_data: se_site['Coordinates'] = coordinates_data[se.split('-')[0]] else: print "[WARNING]: No location info for " + se se_sites[se] = se_site if se_health['OK'] and se in se_health['Value']: se_site['Health'] = se_health['Value'][se][0] return se_sites
def main(): """Do stuff.""" args = argparser.get_parser() # Don't use this, neither set learning rate as a linear function # of the count of gpus, it will make accuracy lower # args.batch_size = args.batch_size * torch.cuda.device_count() set_seeds(args) settings(args) logger = CsvLogger(file_name='acc', resume=True, path='results', data_format='csv') # If set > 0, will resume training from a given checkpoint. resume_from_epoch, resume_folder = check_resume_epoch(args) dataset_history, dataset2num_classes, masks, shared_layer_info = info_reload( resume_from_epoch, args) model = build_model(args, dataset_history, dataset2num_classes) model = nn.DataParallel(model) for dataset in dataset_history: args.dataset = dataset utils.set_dataset_paths(args) model.module.set_dataset(args.dataset) train_loader, val_loader, test_loader = load_data(args) manager = Manager(args, model, shared_layer_info, masks, train_loader, test_loader) manager.load_checkpoint_only_for_evaluate(resume_from_epoch, resume_folder) model = model.cuda() test_acc = manager.validate(resume_from_epoch - 1) idx = dataset_history.index(dataset) finished = len(dataset_history) logger.add(dataset=dataset, idx=idx, finished=finished, acc=round(test_acc, 4)) logger.save() return
""" Documentation: http://wiki.guildwars2.com/wiki/API Threading: http://www.tutorialspoint.com/python/python_multithreading.htm """ from utils.manager import Manager from helpers.data_mapping import MetaData """ Main processing """ if __name__ == "__main__": # Create manager (a.k.a Task Master) to do the majority of the work # @todo: Provide it a list of tasks which need to be done num_threads = 15 # 15 max_queue_size = 50000 # 30,000 missed_only = False print('Running main crawler....') taskmaster = Manager(num_threads, max_queue_size, missed_only) taskmaster.go() print('Populating Meta Data.....') md = MetaData() md.populate() print('Done')
def main(): """Do stuff.""" args = parser.parse_args() # Don't use this, neither set learning rate as a linear function # of the count of gpus, it will make accuracy lower # args.batch_size = args.batch_size * torch.cuda.device_count() args.network_width_multiplier = math.sqrt(args.network_width_multiplier) args.max_allowed_network_width_multiplier = math.sqrt( args.max_allowed_network_width_multiplier) if args.mode == 'prune': args.save_folder = os.path.join(args.save_folder, str(args.target_sparsity)) if args.initial_sparsity != 0.0: args.load_folder = os.path.join(args.load_folder, str(args.initial_sparsity)) if args.save_folder and not os.path.isdir(args.save_folder): os.makedirs(args.save_folder) if args.log_path: set_logger(args.log_path) if args.pruning_ratio_to_acc_record_file and not os.path.isdir( args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0]): os.makedirs(args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0]) if not torch.cuda.is_available(): logging.info('no gpu device available') args.cuda = False torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) cudnn.benchmark = True # If set > 0, will resume training from a given checkpoint. resume_from_epoch = 0 resume_folder = args.load_folder for try_epoch in range(200, 0, -1): if os.path.exists( args.checkpoint_format.format(save_folder=resume_folder, epoch=try_epoch)): resume_from_epoch = try_epoch break if args.restore_epoch: resume_from_epoch = args.restore_epoch # Set default train and test path if not provided as input. utils.set_dataset_paths(args) if resume_from_epoch: print("Resume from epoch: ", resume_from_epoch) filepath = args.checkpoint_format.format(save_folder=resume_folder, epoch=resume_from_epoch) checkpoint = torch.load(filepath) checkpoint_keys = checkpoint.keys() dataset_history = checkpoint['dataset_history'] dataset2num_classes = checkpoint['dataset2num_classes'] masks = checkpoint['masks'] shared_layer_info = checkpoint['shared_layer_info'] # shared_layer_info[args.dataset]['network_width_multiplier'] = 1.0 if 'num_for_construct' in checkpoint_keys: num_for_construct = checkpoint['num_for_construct'] if args.mode == 'inference' and 'network_width_multiplier' in shared_layer_info[ args.dataset]: # TODO, temporary solution args.network_width_multiplier = shared_layer_info[ args.dataset]['network_width_multiplier'] else: dataset_history = [] dataset2num_classes = {} masks = {} shared_layer_info = {} if args.baseline_acc_file is None or not os.path.isfile( args.baseline_acc_file): sys.exit(3) with open(args.baseline_acc_file, 'r') as jsonfile: json_data = json.load(jsonfile) baseline_acc = float(json_data[args.dataset]) if args.mode == 'prune' and not args.pruning_ratio_to_acc_record_file: sys.exit(-1) if args.arch == 'resnet18': model = models.__dict__[args.arch]( dataset_history=dataset_history, dataset2num_classes=dataset2num_classes, network_width_multiplier=args.network_width_multiplier, shared_layer_info=shared_layer_info) elif 'vgg' in args.arch: custom_cfg = [ 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M' ] model = models.__dict__[args.arch]( custom_cfg, dataset_history=dataset_history, dataset2num_classes=dataset2num_classes, network_width_multiplier=args.network_width_multiplier, shared_layer_info=shared_layer_info) else: print('Error!') sys.exit(1) # Add and set the model dataset. model.add_dataset(args.dataset, args.num_classes) model.set_dataset(args.dataset) model = nn.DataParallel(model) model = model.cuda() if not masks: for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): mask = torch.ByteTensor(module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() masks[name] = mask else: # when we expand network, we need to allocate new masks NEED_ADJUST_MASK = False for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): if masks[name].size(1) < module.weight.data.size(1): assert args.mode == 'finetune' NEED_ADJUST_MASK = True elif masks[name].size(1) > module.weight.data.size(1): assert args.mode == 'inference' NEED_ADJUST_MASK = True if NEED_ADJUST_MASK: if args.mode == 'finetune': for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:masks[name].size(0), :masks[name]. size(1), :, :].copy_(masks[name]) masks[name] = mask elif isinstance(module, nl.SharableLinear): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:masks[name].size(0), :masks[name].size(1)].copy_( masks[name]) masks[name] = mask elif args.mode == 'inference': for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:, :, :, :].copy_( masks[name][:mask.size(0), :mask.size(1), :, :]) masks[name] = mask elif isinstance(module, nl.SharableLinear): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:, :].copy_( masks[name][:mask.size(0), :mask.size(1)]) masks[name] = mask if args.dataset not in shared_layer_info: shared_layer_info[args.dataset] = { 'bias': {}, 'bn_layer_running_mean': {}, 'bn_layer_running_var': {}, 'bn_layer_weight': {}, 'bn_layer_bias': {}, 'piggymask': {} } piggymasks = {} task_id = model.module.datasets.index(args.dataset) + 1 if task_id > 1: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): piggymasks[name] = torch.zeros_like(masks['module.' + name], dtype=torch.float32) piggymasks[name].fill_(0.01) piggymasks[name] = Parameter(piggymasks[name]) module.piggymask = piggymasks[name] elif args.finetune_again: # reinitialize piggymask piggymasks = {} for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): piggymasks[name] = torch.zeros_like(masks['module.' + name], dtype=torch.float32) piggymasks[name].fill_(0.01) piggymasks[name] = Parameter(piggymasks[name]) module.piggymask = piggymasks[name] else: # try: piggymasks = shared_layer_info[args.dataset]['piggymask'] # except: # piggymasks = {} task_id = model.module.datasets.index(args.dataset) + 1 if task_id > 1: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): module.piggymask = piggymasks[name] shared_layer_info[args.dataset][ 'network_width_multiplier'] = args.network_width_multiplier if args.num_classes == 2: train_loader = dataset.cifar100_train_loader_two_class( args.dataset, args.batch_size) val_loader = dataset.cifar100_val_loader_two_class( args.dataset, args.val_batch_size) elif args.num_classes == 5: train_loader = dataset.cifar100_train_loader(args.dataset, args.batch_size) val_loader = dataset.cifar100_val_loader(args.dataset, args.val_batch_size) else: print("num_classes should be either 2 or 5") sys.exit(1) # if we are going to save checkpoint in other folder, then we recalculate the starting epoch if args.save_folder != args.load_folder: start_epoch = 0 else: start_epoch = resume_from_epoch curr_prune_step = begin_prune_step = start_epoch * len(train_loader) end_prune_step = curr_prune_step + args.pruning_interval * len( train_loader) manager = Manager(args, model, shared_layer_info, masks, train_loader, val_loader, begin_prune_step, end_prune_step) if args.mode == 'inference': manager.load_checkpoint_only_for_evaluate(resume_from_epoch, resume_folder) manager.validate(resume_from_epoch - 1) return lr = args.lr lr_mask = args.lr_mask # update all layers named_params = dict(model.named_parameters()) params_to_optimize_via_SGD = [] named_of_params_to_optimize_via_SGD = [] masks_to_optimize_via_Adam = [] named_of_masks_to_optimize_via_Adam = [] for name, param in named_params.items(): if 'classifiers' in name: if '.{}.'.format(model.module.datasets.index( args.dataset)) in name: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) continue elif 'piggymask' in name: masks_to_optimize_via_Adam.append(param) named_of_masks_to_optimize_via_Adam.append(name) else: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) optimizer_network = optim.SGD(params_to_optimize_via_SGD, lr=lr, weight_decay=0.0, momentum=0.9, nesterov=True) optimizers = Optimizers() optimizers.add(optimizer_network, lr) if masks_to_optimize_via_Adam: optimizer_mask = optim.Adam(masks_to_optimize_via_Adam, lr=lr_mask) optimizers.add(optimizer_mask, lr_mask) manager.load_checkpoint(optimizers, resume_from_epoch, resume_folder) """Performs training.""" curr_lrs = [] for optimizer in optimizers: for param_group in optimizer.param_groups: curr_lrs.append(param_group['lr']) break if args.mode == 'prune': if 'gradual_prune' in args.load_folder and args.save_folder == args.load_folder: args.epochs = 20 + resume_from_epoch logging.info('') logging.info('Before pruning: ') logging.info('Sparsity range: {} -> {}'.format(args.initial_sparsity, args.target_sparsity)) must_pruning_ratio_for_curr_task = 0.0 json_data = {} if os.path.isfile(args.pruning_ratio_to_acc_record_file): with open(args.pruning_ratio_to_acc_record_file, 'r') as json_file: json_data = json.load(json_file) if args.network_width_multiplier >= args.max_allowed_network_width_multiplier and json_data[ '0.0'] < baseline_acc: # If we reach the upperbound and still do not get the accuracy over our target on curr task, we still do pruning logging.info( 'we reach the upperbound and still do not get the accuracy over our target on curr task' ) remain_num_tasks = args.total_num_tasks - len(dataset_history) logging.info('remain_num_tasks: {}'.format(remain_num_tasks)) ratio_allow_for_curr_task = round(1.0 / (remain_num_tasks + 1), 1) logging.info('ratio_allow_for_curr_task: {:.4f}'.format( ratio_allow_for_curr_task)) must_pruning_ratio_for_curr_task = 1.0 - ratio_allow_for_curr_task if args.initial_sparsity >= must_pruning_ratio_for_curr_task: sys.exit(6) manager.validate(start_epoch - 1) logging.info('') elif args.mode == 'finetune': if not args.finetune_again: manager.pruner.make_finetuning_mask() logging.info('Finetune stage...') else: logging.info('Piggymask Retrain...') history_best_avg_val_acc_when_retraining = manager.validate( start_epoch - 1) num_epochs_that_criterion_does_not_get_better = 0 stop_lr_mask = True if manager.pruner.calculate_curr_task_ratio() == 0.0: logging.info( 'There is no left space in convolutional layer for curr task' ', we will try to use prior experience as long as possible') stop_lr_mask = False for epoch_idx in range(start_epoch, args.epochs): avg_train_acc, curr_prune_step = manager.train(optimizers, epoch_idx, curr_lrs, curr_prune_step) avg_val_acc = manager.validate(epoch_idx) # if args.mode == 'prune' and (epoch_idx+1) >= (args.pruning_interval + start_epoch) and ( # avg_val_acc > history_best_avg_val_acc_when_prune): # pass if args.finetune_again: if avg_val_acc > history_best_avg_val_acc_when_retraining: history_best_avg_val_acc_when_retraining = avg_val_acc num_epochs_that_criterion_does_not_get_better = 0 if args.save_folder is not None: print("Removing saved checkpoint") for path in os.listdir(args.save_folder): if '.pth.tar' in path: os.remove(os.path.join(args.save_folder, path)) else: print('Something is wrong! Block the program with pdb') pdb.set_trace() history_best_avg_val_acc = avg_val_acc manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) else: num_epochs_that_criterion_does_not_get_better += 1 if args.finetune_again and num_epochs_that_criterion_does_not_get_better == 5: saved = False for try_epoch in range(200, 0, -1): if os.path.exists( args.checkpoint_format.format( save_folder=args.save_folder, epoch=try_epoch)): saved = True print("Found saved checkpoint") break if not saved: print("No saved checkpoint..") manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) logging.info("stop retraining") sys.exit(0) if args.mode == 'finetune': if epoch_idx + 1 == 50 or epoch_idx + 1 == 80: for param_group in optimizers[0].param_groups: param_group['lr'] *= 0.1 curr_lrs[0] = param_group['lr'] if len(optimizers.lrs) == 2: if epoch_idx + 1 == 50: for param_group in optimizers[1].param_groups: param_group['lr'] *= 0.2 if stop_lr_mask and epoch_idx + 1 == 70: for param_group in optimizers[1].param_groups: param_group['lr'] *= 0.0 curr_lrs[1] = param_group['lr'] if args.save_folder is not None: pass # paths = os.listdir(args.save_folder) # if paths and '.pth.tar' in paths[0]: # for checkpoint_file in paths: # os.remove(os.path.join(args.save_folder, checkpoint_file)) else: print('Something is wrong! Block the program with pdb') pdb.set_trace() if avg_train_acc > 0.95: manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) else: logging.info(f"Training Accuracy goal not met ({avg_train_acc})!") if args.dataset == "aquatic_mammals" and avg_train_acc > 0.85: logging.info("Saving model...") manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) else: logging.info("Not saving model...") logging.info('-' * 16) if args.pruning_ratio_to_acc_record_file: json_data = {} if os.path.isfile(args.pruning_ratio_to_acc_record_file): with open(args.pruning_ratio_to_acc_record_file, 'r') as json_file: json_data = json.load(json_file) if args.mode == 'finetune' and not args.test_piggymask: json_data[0.0] = round(avg_val_acc, 4) with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file: json.dump(json_data, json_file) if avg_train_acc > 0.95 and avg_val_acc >= baseline_acc: print("Pass!") pass elif args.network_width_multiplier >= args.max_allowed_network_width_multiplier and avg_val_acc < baseline_acc: print("Option 2") if manager.pruner.calculate_curr_task_ratio() == 0.0: sys.exit(5) else: sys.exit(0) else: print("Option 3") if args.network_width_multiplier >= args.max_allowed_network_width_multiplier: print("Network Cannot Expand Anymore!") logging.info("Network Cannot Expand Anymore!") if manager.pruner.calculate_curr_task_ratio() == 0.0: sys.exit(5) else: sys.exit(0) else: logging.info("It's time to expand the Network") logging.info('Auto expand network') sys.exit(2) if manager.pruner.calculate_curr_task_ratio() == 0.0: logging.info( 'There is no left space in convolutional layer for curr task, so needless to prune' ) sys.exit(5) elif args.mode == 'prune': if avg_train_acc > 0.95: json_data[args.target_sparsity] = round(avg_val_acc, 4) with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file: json.dump(json_data, json_file) else: sys.exit(6) must_pruning_ratio_for_curr_task = 0.0 if args.network_width_multiplier >= args.max_allowed_network_width_multiplier and json_data[ '0.0'] < baseline_acc: # If we reach the upperbound and still do not get the accuracy over our target on curr task, we still do pruning logging.info( 'we reach the upperbound and still do not get the accuracy over our target on curr task' ) remain_num_tasks = args.total_num_tasks - len(dataset_history) logging.info('remain_num_tasks: {}'.format(remain_num_tasks)) ratio_allow_for_curr_task = round(1.0 / (remain_num_tasks + 1), 1) logging.info('ratio_allow_for_curr_task: {:.4f}'.format( ratio_allow_for_curr_task)) must_pruning_ratio_for_curr_task = 1.0 - ratio_allow_for_curr_task if args.target_sparsity >= must_pruning_ratio_for_curr_task: sys.exit(6)
import os from utils.manager import Manager from spider.spcCrawl import XiciCrawl, XilaCrawl, NimaCrawl from verify.judger import Judger from db.spcDB import RedisDB if "__main__" == __name__: # db = RedisDB("test") spiders = [XiciCrawl(), XilaCrawl(), NimaCrawl()] for spi in spiders: spi.cocurrent = True # if not spi.db: # spi.db = db # spi.run() # print(spi.proxyLs) # Judger().run() manager = Manager() manager.run()
import spotipy from utils.manager import Manager from utils.constants import Constants Constants.init() Manager.init() Manager.get_token() sp = Manager.get_spotify_instance() songs_ids = Manager.get_songs_ids() limit = 50 [ sp.current_user_saved_tracks_delete(tracks=songs_ids[i:i + limit]) for i in range(0, len(songs_ids), limit) ]
from utils.constants import Constants from utils.manager import Manager Constants.init() Manager.init() Manager.get_token() ''' Manager.get_spotify_instance() Manager.get_songs_ids() Manager.pick_base_song_id() Manager.get_candidates_ids() Manager.get_songs_features() print(Manager.get_similar_songs()) '''
def main(): """Do stuff.""" args = parser.parse_args() # args.batch_size = args.batch_size * torch.cuda.device_count() args.network_width_multiplier = math.sqrt(args.network_width_multiplier) if args.mode == 'prune': args.save_folder = os.path.join(args.save_folder, str(args.target_sparsity)) if args.initial_sparsity != 0.0: args.load_folder = os.path.join(args.load_folder, str(args.initial_sparsity)) if args.pruning_ratio_to_acc_record_file and not os.path.isdir( args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0]): os.makedirs(args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0]) if args.save_folder and not os.path.isdir(args.save_folder): os.makedirs(args.save_folder) if args.log_path: set_logger(args.log_path) if not torch.cuda.is_available(): logging.info('no gpu device available') args.cuda = False torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) cudnn.benchmark = True # If set > 0, will resume training from a given checkpoint. resume_from_epoch = 0 resume_folder = args.load_folder for try_epoch in range(200, 0, -1): if os.path.exists( args.checkpoint_format.format(save_folder=resume_folder, epoch=try_epoch)): resume_from_epoch = try_epoch break if args.restore_epoch: resume_from_epoch = args.restore_epoch # Set default train and test path if not provided as input. utils.set_dataset_paths(args) if resume_from_epoch: filepath = args.checkpoint_format.format(save_folder=resume_folder, epoch=resume_from_epoch) checkpoint = torch.load(filepath) checkpoint_keys = checkpoint.keys() dataset_history = checkpoint['dataset_history'] dataset2num_classes = checkpoint['dataset2num_classes'] masks = checkpoint['masks'] shared_layer_info = checkpoint['shared_layer_info'] if 'num_for_construct' in checkpoint_keys: num_for_construct = checkpoint['num_for_construct'] if args.mode == 'inference' and 'network_width_multiplier' in shared_layer_info[ args.dataset]: args.network_width_multiplier = shared_layer_info[ args.dataset]['network_width_multiplier'] else: dataset_history = [] dataset2num_classes = {} masks = {} shared_layer_info = {} if args.arch == 'resnet50': # num_for_construct = [64, 64, 64*4, 128, 128*4, 256, 256*4, 512, 512*4] model = models.__dict__[args.arch]( dataset_history=dataset_history, dataset2num_classes=dataset2num_classes, network_width_multiplier=args.network_width_multiplier, shared_layer_info=shared_layer_info) elif 'vgg' in args.arch: custom_cfg = [ 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M' ] model = models.__dict__[args.arch]( custom_cfg, dataset_history=dataset_history, dataset2num_classes=dataset2num_classes, network_width_multiplier=args.network_width_multiplier, shared_layer_info=shared_layer_info, progressive_init=args.progressive_init) else: print('Error!') sys.exit(1) # Add and set the model dataset. model.add_dataset(args.dataset, args.num_classes) model.set_dataset(args.dataset) # Move model to GPU model = nn.DataParallel(model) model = model.cuda() # For datasets whose image_size is 224 and also the first task if args.use_imagenet_pretrained and model.module.datasets.index( args.dataset) == 0: curr_model_state_dict = model.state_dict() if args.arch == 'custom_vgg': state_dict = model_zoo.load_url(model_urls['vgg16_bn']) for name, param in state_dict.items(): if 'classifier' not in name: curr_model_state_dict['module.' + name].copy_(param) curr_model_state_dict['module.features.45.weight'].copy_( state_dict['classifier.0.weight']) curr_model_state_dict['module.features.45.bias'].copy_( state_dict['classifier.0.bias']) curr_model_state_dict['module.features.48.weight'].copy_( state_dict['classifier.3.weight']) curr_model_state_dict['module.features.48.bias'].copy_( state_dict['classifier.3.bias']) if args.dataset == 'imagenet': curr_model_state_dict['module.classifiers.0.weight'].copy_( state_dict['classifier.6.weight']) curr_model_state_dict['module.classifiers.0.bias'].copy_( state_dict['classifier.6.bias']) elif args.arch == 'resnet50': state_dict = model_zoo.load_url(model_urls['resnet50']) for name, param in state_dict.items(): if 'fc' not in name: curr_model_state_dict['module.' + name].copy_(param) if args.dataset == 'imagenet': curr_model_state_dict['module.classifiers.0.weight'].copy_( state_dict['fc.weight']) curr_model_state_dict['module.classifiers.0.bias'].copy_( state_dict['fc.bias']) else: print( "Currently, we didn't define the mapping of {} between imagenet pretrained weight and our model" .format(args.arch)) sys.exit(5) if not masks: for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): mask = torch.ByteTensor(module.weight.data.size()).fill_(0) mask = mask.cuda() masks[name] = mask else: # when we expand network, we need to allocate new masks NEED_ADJUST_MASK = False for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): if masks[name].size(1) < module.weight.data.size(1): assert args.mode == 'finetune' NEED_ADJUST_MASK = True elif masks[name].size(1) > module.weight.data.size(1): assert args.mode == 'inference' NEED_ADJUST_MASK = True if NEED_ADJUST_MASK: if args.mode == 'finetune': for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) mask = mask.cuda() mask[:masks[name].size(0), :masks[name]. size(1), :, :].copy_(masks[name]) masks[name] = mask elif isinstance(module, nl.SharableLinear): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) mask = mask.cuda() mask[:masks[name].size(0), :masks[name].size(1)].copy_( masks[name]) masks[name] = mask elif args.mode == 'inference': for name, module in model.named_modules(): if isinstance(module, nl.SharableConv2d): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) mask = mask.cuda() mask[:, :, :, :].copy_( masks[name][:mask.size(0), :mask.size(1), :, :]) masks[name] = mask elif isinstance(module, nl.SharableLinear): mask = torch.ByteTensor( module.weight.data.size()).fill_(0) mask = mask.cuda() mask[:, :].copy_( masks[name][:mask.size(0), :mask.size(1)]) masks[name] = mask if args.dataset not in shared_layer_info: shared_layer_info[args.dataset] = { 'bias': {}, 'bn_layer_running_mean': {}, 'bn_layer_running_var': {}, 'bn_layer_weight': {}, 'bn_layer_bias': {}, 'piggymask': {} } piggymasks = {} task_id = model.module.datasets.index(args.dataset) + 1 if task_id > 1: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): piggymasks[name] = torch.zeros_like(masks['module.' + name], dtype=torch.float32) piggymasks[name].fill_(0.01) piggymasks[name] = Parameter(piggymasks[name]) module.piggymask = piggymasks[name] else: piggymasks = shared_layer_info[args.dataset]['piggymask'] task_id = model.module.datasets.index(args.dataset) + 1 if task_id > 1: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): module.piggymask = piggymasks[name] shared_layer_info[args.dataset][ 'network_width_multiplier'] = args.network_width_multiplier if 'cropped' in args.dataset: train_loader = dataset.train_loader_cropped(args.train_path, args.batch_size) val_loader = dataset.val_loader_cropped(args.val_path, args.val_batch_size) else: train_loader = dataset.train_loader(args.train_path, args.batch_size) val_loader = dataset.val_loader(args.val_path, args.val_batch_size) # if we are going to save checkpoint in other folder, then we recalculate the starting epoch if args.save_folder != args.load_folder: start_epoch = 0 else: start_epoch = resume_from_epoch curr_prune_step = begin_prune_step = start_epoch * len(train_loader) end_prune_step = curr_prune_step + args.pruning_interval * len( train_loader) manager = Manager(args, model, shared_layer_info, masks, train_loader, val_loader, begin_prune_step, end_prune_step) if args.mode == 'inference': manager.load_checkpoint_only_for_evaluate(resume_from_epoch, resume_folder) manager.validate(resume_from_epoch - 1) return lr = args.lr lr_mask = args.lr_mask # update all layers named_params = dict(model.named_parameters()) params_to_optimize_via_SGD = [] named_of_params_to_optimize_via_SGD = [] masks_to_optimize_via_Adam = [] named_of_masks_to_optimize_via_Adam = [] for name, param in named_params.items(): if 'classifiers' in name: if '.{}.'.format(model.module.datasets.index( args.dataset)) in name: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) continue elif 'piggymask' in name: masks_to_optimize_via_Adam.append(param) named_of_masks_to_optimize_via_Adam.append(name) else: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) optimizer_network = optim.SGD(params_to_optimize_via_SGD, lr=lr, weight_decay=0.0, momentum=0.9, nesterov=True) optimizers = Optimizers() optimizers.add(optimizer_network, lr) if masks_to_optimize_via_Adam: optimizer_mask = optim.Adam(masks_to_optimize_via_Adam, lr=lr_mask) optimizers.add(optimizer_mask, lr_mask) manager.load_checkpoint(optimizers, resume_from_epoch, resume_folder) """Performs training.""" curr_lrs = [] for optimizer in optimizers: for param_group in optimizer.param_groups: curr_lrs.append(param_group['lr']) break if args.jsonfile is None or not os.path.isfile(args.jsonfile): sys.exit(3) with open(args.jsonfile, 'r') as jsonfile: json_data = json.load(jsonfile) baseline_acc = float(json_data[args.dataset]) if args.mode == 'prune': if args.dataset != 'imagenet': history_best_avg_val_acc_when_prune = 0.0 #history_best_avg_val_acc_when_prune = baseline_acc - 0.005 else: if 'vgg' in args.arch: baseline_acc = 0.7336 history_best_avg_val_acc_when_prune = baseline_acc - 0.005 elif 'resnet50' in args.arch: baseline_acc = 0.7616 history_best_avg_val_acc_when_prune = baseline_acc - 0.005 else: print('Something is wrong') exit(1) stop_prune = True if 'gradual_prune' in args.load_folder and args.save_folder == args.load_folder: if args.dataset == 'imagenet': args.epochs = 10 + resume_from_epoch else: args.epochs = 20 + resume_from_epoch logging.info('') logging.info('Before pruning: ') logging.info('Sparsity range: {} -> {}'.format(args.initial_sparsity, args.target_sparsity)) manager.validate(start_epoch - 1) logging.info('') elif args.mode == 'finetune': manager.pruner.make_finetuning_mask() if args.dataset == 'imagenet': manager.validate(0) manager.save_checkpoint(optimizers, 0, args.save_folder) return history_best_avg_val_acc = 0.0 num_epochs_that_criterion_does_not_get_better = 0 times_of_decaying_learning_rate = 0 for epoch_idx in range(start_epoch, args.epochs): avg_train_acc, curr_prune_step = manager.train(optimizers, epoch_idx, curr_lrs, curr_prune_step) avg_val_acc = manager.validate(epoch_idx) if args.mode == 'prune' and (epoch_idx + 1) >= ( args.pruning_interval + start_epoch) and ( avg_val_acc > history_best_avg_val_acc_when_prune): stop_prune = False history_best_avg_val_acc_when_prune = avg_val_acc if args.save_folder is not None: paths = os.listdir(args.save_folder) if paths and '.pth.tar' in paths[0]: for checkpoint_file in paths: os.remove( os.path.join(args.save_folder, checkpoint_file)) else: print('Something is wrong! Block the program with pdb') pdb.set_trace() manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) if args.mode == 'finetune': if avg_val_acc > history_best_avg_val_acc: num_epochs_that_criterion_does_not_get_better = 0 if args.save_folder is not None: paths = os.listdir(args.save_folder) if paths and '.pth.tar' in paths[0]: for checkpoint_file in paths: os.remove( os.path.join(args.save_folder, checkpoint_file)) else: print('Something is wrong! Block the program with pdb') pdb.set_trace() history_best_avg_val_acc = avg_val_acc manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) else: num_epochs_that_criterion_does_not_get_better += 1 if times_of_decaying_learning_rate >= 3: print() print( "times_of_decaying_learning_rate reach {}, stop training". format(times_of_decaying_learning_rate)) break if num_epochs_that_criterion_does_not_get_better >= 5: times_of_decaying_learning_rate += 1 num_epochs_that_criterion_does_not_get_better = 0 for param_group in optimizers[0].param_groups: param_group['lr'] *= 0.1 curr_lrs[0] = param_group['lr'] print() print("continously {} epochs doesn't get higher acc, " "decay learning rate by multiplying 0.1".format( num_epochs_that_criterion_does_not_get_better)) if times_of_decaying_learning_rate == 1 and len( optimizers.lrs) == 2: for param_group in optimizers[1].param_groups: param_group['lr'] *= 0.2 curr_lrs[1] = param_group['lr'] print('-' * 16) if args.pruning_ratio_to_acc_record_file: json_data = {} if os.path.isfile(args.pruning_ratio_to_acc_record_file): with open(args.pruning_ratio_to_acc_record_file, 'r') as json_file: json_data = json.load(json_file) if args.mode == 'finetune' and not args.test_piggymask: if args.pruning_ratio_to_acc_record_file: json_data[0.0] = round(history_best_avg_val_acc, 4) with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file: json.dump(json_data, json_file) if history_best_avg_val_acc - baseline_acc > -0.005: # TODO #json_data = {} #json_data['acc_before_prune'] = '{:.4f}'.format(history_best_avg_val_acc) #with open(args.tmp_benchmark_file, 'w') as jsonfile: # json.dump(json_data, jsonfile) pass else: print("It's time to expand the Network") print('Auto expand network') sys.exit(2) if manager.pruner.calculate_curr_task_ratio() == 0.0: print( 'There is no left space in convolutional layer for curr task, so needless to prune' ) sys.exit(5) elif args.mode == 'prune': # if stop_prune: # print('Acc too low, stop pruning.') # sys.exit(4) if args.pruning_ratio_to_acc_record_file: json_data[args.target_sparsity] = round( history_best_avg_val_acc_when_prune, 4) with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file: json.dump(json_data, json_file)
# coding: utf-8 from utils.manager import Manager __author__ = 'deff' import sys import os if len(sys.argv) < 1: print("usage:python coeus.py sdk_path [-d]") print("-d means delete temp file.") print("sdk_path: aar or jar or apk") sys.exit(0) sdk_path = sys.argv[1] if not os.path.exists(sdk_path): sdk_path = os.path.join(os.curdir, sdk_path) if not os.path.exists(sdk_path): print("please input correct sdk_path.") sys.exit(0) m = Manager(sdk_path) m.start() if len(sys.argv) > 2 and sys.argv[2] == "-d": m.delete()
import sys sys.path.append("../") from utils.manager import Manager if "__main__" == __name__: try: m = Manager() m.run() except KeyboardInterrupt: exit()
def main(): args = argparser.get_parser() set_seeds(args) settings(args) # If set > 0, will resume training from a given checkpoint. resume_from_epoch, resume_folder = check_resume_epoch(args) dataset_history, dataset2num_classes, masks, shared_layer_info = info_reload(resume_from_epoch, args) model = build_model(args, dataset_history, dataset2num_classes) # Add and set the model dataset. model.add_dataset(args.dataset, args.num_classes) model.set_dataset(args.dataset) masks = load_or_build_masks(masks, model, args) model = nn.DataParallel(model) shared_layer_info = check_if_need_build_shared_layer_info(args, shared_layer_info) train_loader, val_loader, test_loader = load_data(args) # if we are going to save checkpoint in other folder, then we recalculate the starting epoch start_epoch = calculate_start_epoch(args, resume_from_epoch) manager = Manager(args, model, shared_layer_info, masks, train_loader, val_loader) args.training_steps = args.epochs * len(train_loader) optimizers, schedulers = set_optimizers(args, model) # manager.save_checkpoint(optimizers, 0, args.save_folder) manager.load_checkpoint(resume_from_epoch, resume_folder, args) """Performs training.""" curr_lrs = obtain_curr_lrs(optimizers) if args.mode == 'prune': print('Sparsity ratio: {}'.format(args.one_shot_prune_perc)) print('Execute one shot pruning ...') manager.one_shot_prune(args.one_shot_prune_perc) manager.pruner.apply_mask() elif args.mode == 'finetune': manager.pruner.make_finetuning_mask() logging.info('Finetune stage...') freeze_modules(model, args) max_val_acc = 0 max_test_acc = 0 model = model.cuda() for epoch_idx in range(start_epoch, args.epochs): need_save = False manager.train(optimizers, schedulers, epoch_idx, curr_lrs) avg_val_acc = manager.validate(epoch_idx) manager.val_loader = test_loader logging.info("performance on test") test_acc = manager.validate(epoch_idx) manager.val_loader = val_loader if avg_val_acc >= max_val_acc: need_save = True max_val_acc = avg_val_acc max_test_acc = test_acc if need_save: check_if_need_remove_checkpoint_files(args) manager.save_checkpoint(epoch_idx, args.save_folder) logging.info('-' * 16)