def profiling(model, use_cuda): """profiling on either gpu or cpu""" print('Start model profiling, use_cuda: {}.'.format(use_cuda)) if getattr(FLAGS, 'autoslim', False): flops, params = model_profiling(model, FLAGS.image_size, FLAGS.image_size, use_cuda=use_cuda, verbose=getattr( FLAGS, 'profiling_verbose', False)) elif getattr(FLAGS, 'slimmable_training', False): for width_mult in sorted(FLAGS.width_mult_list, reverse=True): model.apply(lambda m: setattr(m, 'width_mult', width_mult)) print('Model profiling with width mult {}x:'.format(width_mult)) flops, params = model_profiling(model, FLAGS.image_size, FLAGS.image_size, use_cuda=use_cuda, verbose=getattr( FLAGS, 'profiling_verbose', False)) else: flops, params = model_profiling(model, FLAGS.image_size, FLAGS.image_size, use_cuda=use_cuda, verbose=getattr( FLAGS, 'profiling_verbose', True)) return flops, params
def profiling(model, use_cuda): """Profiling on either gpu or cpu.""" logging.info('Start model profiling, use_cuda:{}.'.format(use_cuda)) model_profiling(model, FLAGS.image_size, FLAGS.image_size, verbose=getattr(FLAGS, 'model_profiling_verbose', True) and is_root_rank)
def shrink_model(model_wrapper, ema, optimizer, prune_info, threshold=1e-3, ema_only=False): r"""Dynamic network shrinkage to discard dead atomic blocks. Args: model_wrapper: model to be shrinked. ema: An instance of `ExponentialMovingAverage`, could be None. optimizer: Global optimizer. prune_info: An instance of `PruneInfo`, could be None. threshold: A small enough constant. ema_only: If `True`, regard an atomic block as dead only when `$$\hat{alpha} \le threshold$$`. Otherwise use both current value and momentum version. """ model = unwrap_model(model_wrapper) for block_name, block in model.get_named_block_list().items(): assert isinstance(block, mb.InvertedResidualChannels) masks = [ bn.weight.detach().abs() > threshold for bn in block.get_depthwise_bn() ] if ema is not None: masks_ema = [ ema.average('{}.{}.weight'.format( block_name, name)).detach().abs() > threshold for name in block.get_named_depthwise_bn().keys() ] if not ema_only: masks = [ mask0 | mask1 for mask0, mask1 in zip(masks, masks_ema) ] else: masks = masks_ema block.compress_by_mask(masks, ema=ema, optimizer=optimizer, prune_info=prune_info, prefix=block_name, verbose=False) if optimizer is not None: assert set(optimizer.param_groups[0]['params']) == set( model.parameters()) model_profiling(model, FLAGS.image_size, FLAGS.image_size, num_forwards=0, verbose=False) logging.info('Model Shrink to FLOPS: {}'.format(model.n_macs)) logging.info('Current model: {}'.format(mb.output_network(model)))
def profiling(model, use_cuda): """profiling on either gpu or cpu""" print('Start model profiling, use_cuda:{}.'.format(use_cuda)) for width_mult in sorted(FLAGS.width_mult_list, reverse=True): model.apply( lambda m: setattr(m, 'width_mult', width_mult)) print('Model profiling with width mult {}x:'.format(width_mult)) verbose = width_mult == max(FLAGS.width_mult_list) model_profiling( model, FLAGS.image_size, FLAGS.image_size, verbose=getattr(FLAGS, 'model_profiling_verbose', verbose))
def __init__(self, opt): super(SPADEModel, self).__init__(opt) self.model_names = ['G_student', 'G_teacher', 'D'] self.visual_names = ['labels', 'Tfake_B', 'Sfake_B', 'real_B'] self.model_names.append('D') self.loss_names = [ 'G_gan', 'G_feat', 'G_vgg', 'G_distill', 'D_real', 'D_fake' ] if hasattr(opt, 'distiller'): self.modules = SPADEDistillerModules(opt).to(self.device) if len(opt.gpu_ids) > 0: self.modules = DataParallelWithCallback(self.modules, device_ids=opt.gpu_ids) self.modules_on_one_gpu = self.modules.module else: self.modules_on_one_gpu = self.modules for i in range(len(self.modules_on_one_gpu.mapping_layers)): self.loss_names.append('G_distill%d' % i) self.optimizer_G, self.optimizer_D = self.modules_on_one_gpu.create_optimizers( ) self.optimizers = [self.optimizer_G, self.optimizer_D] if not opt.no_fid: block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[2048] self.inception_model = InceptionV3([block_idx]) self.inception_model.to(self.device) self.inception_model.eval() if 'cityscapes' in opt.dataroot and not opt.no_mIoU: self.drn_model = DRNSeg('drn_d_105', 19, pretrained=False) util.load_network(self.drn_model, opt.drn_path, verbose=False) self.drn_model.to(self.device) self.drn_model.eval() self.eval_dataloader = create_eval_dataloader(self.opt) self.best_fid = 1e9 self.best_mIoU = -1e9 self.fids, self.mIoUs = [], [] self.is_best = False self.npz = np.load(opt.real_stat_path) model_profiling(self.modules_on_one_gpu.netG_teacher, self.opt.data_height, self.opt.data_width, channel=self.opt.data_channel, num_forwards=0, verbose=False) model_profiling(self.modules_on_one_gpu.netG_student, self.opt.data_height, self.opt.data_width, channel=self.opt.data_channel, num_forwards=0, verbose=False) print( f'Teacher FLOPs: {self.modules_on_one_gpu.netG_teacher.n_macs}, Student FLOPs: {self.modules_on_one_gpu.netG_student.n_macs}.' )
def profiling(model, use_cuda): """profiling on either gpu or cpu""" mprint('Start model profiling, use_cuda:{}.'.format(use_cuda)) if getattr(FLAGS, 'adaptive_training', False): for bits in FLAGS.bits_list: model.apply( lambda m: setattr(m,'bits', bits)) mprint('Model profiling with {} bits.'.format(bits)) flops, params, bitops, bytesize, energy, latency = model_profiling( model, FLAGS.image_size, FLAGS.image_size, verbose=getattr(FLAGS, 'model_profiling_verbose', False)) else: flops, params, bitops, bytesize, energy, latency = model_profiling( model, FLAGS.image_size, FLAGS.image_size, verbose=getattr(FLAGS, 'model_profiling_verbose', False)) return flops, params
def profiling(model, use_cuda): """profiling on either gpu or cpu""" mprint('Start model profiling, use_cuda:{}.'.format(use_cuda)) flops, params, bitops, bitops_max, bytesize, energy, latency = model_profiling( model, FLAGS.image_size, FLAGS.image_size, verbose=getattr(FLAGS, 'model_profiling_verbose', False)) return bitops, bytesize
def slimming(loader, model, criterion): """network slimming by slimmable network""" model.eval() bn_calibration_init(model) model.apply(lambda m: setattr(m, 'width_mult', 1.0)) if getattr(FLAGS, 'distributed', False): layers = get_conv_layers(model.module) else: raise NotImplementedError print('Totally {} layers to slim.'.format(len(layers))) error = np.zeros(len(layers)) # get data if getattr(FLAGS, 'distributed', False): loader.sampler.set_epoch(0) input, target = next(iter(loader)) input = input.cuda() target = target.cuda() # start to slim print('Start to slim...') flops = 10e10 FLAGS.autoslim_target_flops = sorted(FLAGS.autoslim_target_flops) autoslim_target_flop = FLAGS.autoslim_target_flops.pop() while True: flops, params = model_profiling(model, FLAGS.image_size, FLAGS.image_size, verbose=getattr( FLAGS, 'profiling_verbose', False)) if flops < autoslim_target_flop: if len(FLAGS.autoslim_target_flops) == 0: break else: print('Find autoslim net at flops {}'.format( autoslim_target_flop)) autoslim_target_flop = FLAGS.autoslim_target_flops.pop() for i in range(len(layers)): torch.cuda.empty_cache() error[i] = 0. outc = layers[i].out_channels - layers[i].divisor if outc <= 0 or outc > layers[i].out_channels_max: error[i] += 1. continue layers[i].out_channels -= layers[i].divisor loss, error_batch = forward_loss(model, criterion, input, target, None, return_acc=True) error[i] += error_batch layers[i].out_channels += layers[i].divisor best_index = np.argmin(error) print(*[f'{element:.4f}' for element in error]) layers[best_index].out_channels -= layers[best_index].divisor print('Adjust layer {} for {} to {}, error: {}.'.format( best_index, -layers[best_index].divisor, layers[best_index].out_channels, error[best_index])) return
def profile(self, input_semantics): netG = self.netG if isinstance(netG, nn.DataParallel): netG = netG.module batch_, channel_, height_, width_ = input_semantics.shape macs, params = model_profiling(netG, height_, width_, batch_, channel_, verbose=False) return macs, params