def do_profiling(self, first_encoder_inputs, first_decoder_inputs): emacs, eparams, dmacs, dparams = None, None, None, None if first_encoder_inputs is None or first_decoder_inputs is None: return None # Reduce batch size to 1 and run profiler if not first_encoder_inputs is None: encoder_inputs = first_encoder_inputs[0:1, ...] emacs, eparams = profile(self.encoder, (encoder_inputs, )) if not first_decoder_inputs is None: decoder_inputs = first_decoder_inputs[0:1, ...] dmacs, dparams = profile(self.decoder, (decoder_inputs, )) return emacs, eparams, dmacs, dparams
def do_profiling(self, first_batch_inputs): if first_batch_inputs is None: return None # Reduce batch size to 1 inputs = first_batch_inputs[0:1, ...] macs, params = profile(self.model, (inputs, )) return macs, params
def run_evaluation(args, model, data_loaders, model_description, n_choices, layers_types, downsample_layers): start = time.time() num_samples = utils.get_number_of_samples(args.dataset) all_values = {} device = 'cuda' #setting up random seeds utils.setup_torch(args.seed) #creating model skeleton based on description propagate_weights = [] for layer in model_description: cur_weights = [0 for i in range(n_choices)] cur_weights[layers_types.index(layer)] = 1 propagate_weights.append(cur_weights) model.propagate = propagate_weights #Create the computationally identical model but without multiple choice blocks (just a single path net) #This is needed to correctly measure MACs pruned_model = models.SinglePathSupernet( num_classes=utils.get_number_of_classes(args.dataset), propagate=propagate_weights, put_downsampling=downsample_layers) #.to(device) pruned_model.propagate = propagate_weights inputs = torch.randn((1, 3, 32, 32)) total_ops, total_params = profile(pruned_model, (inputs, ), verbose=True) all_values['MMACs'] = np.round(total_ops / (1000.0**2), 2) all_values['Params'] = int(total_params) del pruned_model del inputs ################################################ criterion = torch.nn.CrossEntropyLoss() #Initialize batch normalization parameters utils.bn_update(device, data_loaders['train_for_bn_recalc'], model) val_res = utils.evaluate(device, data_loaders['val'], model, criterion, num_samples['val']) test_res = utils.evaluate(device, data_loaders['test'], model, criterion, num_samples['test']) all_values['val_loss'] = np.round(val_res['loss'], 3) all_values['val_acc'] = np.round(val_res['accuracy'], 3) all_values['test_loss'] = np.round(test_res['loss'], 3) all_values['test_acc'] = np.round(test_res['accuracy'], 3) print(all_values, 'time taken: %.2f sec.' % (time.time() - start)) utils.save_result(all_values, args.dir, model_description)
def cal_flops(net, mask, input, enable_gflops=True, comment=''): net = copy.deepcopy(net) mask = copy.deepcopy(mask) input = [input] if (not isinstance(input, list)) else input net = reparam_network(net, mask) flops, params = profile(net, input, verbose=False) if enable_gflops: flops /= 10**9 return flops, params
def calculate_model_complexity(model, input_dim=(1, 3, 256, 256), cuda=True): if len(input_dim) < 4: input_dim = (1, *input_dim) inputs = torch.randn(input_dim) if cuda: model = model.cuda() inputs = inputs.cuda() summary(model, input_size=tuple(input_dim[1:])) macs, params = profile(model, inputs=(inputs,)) print("----------------------------------------------------------------") print("Params size (MB): {:.2f}".format(params / (1000 ** 2))) print("MACs (M): {:.2f}".format(macs / (1000 ** 2))) print("MACs (G): {:.2f}".format(macs / (1000 ** 3))) print("----------------------------------------------------------------") return macs, params
import sys sys.path.append('.../NIR-ISL2021master/') # change as you need import torch from thop.profile import profile import time import numpy as np from models import EfficientUNet net = EfficientUNet(num_classes=3).cpu() example_input = torch.randn(1, 3, 480, 640).cuda() flops, params = profile(net, (example_input, )) print('net FLOPs is: {:.3f} G, Params is {:.3f} M'.format( flops / 1e9, params / 1e6)) net.eval() res = [] for i in range(100): torch.cuda.synchronize() start = time.time() example_output = net(example_input) torch.cuda.synchronize() end = time.time() res.append(end - start) print('FPS is {:.3f}'.format(1 / (np.mean(res))))
import torch from torchvision import models from thop.profile import profile print('Lab 1-2:\n') device = "cpu" if torch.cuda.is_available(): device = "cuda" model = models.resnet50().to(device) model2 = models.mobilenet_v2().to(device) dsize = (1, 3, 224, 224) inputs = torch.randn(dsize).to(device) print('resnet50:') total_MACs, total_params = profile(model, (inputs,)) print("Total params: %.2fM" % (total_params / (1000 ** 2))) print("Total MACs: %.2fM\n" % (total_MACs / (1000 ** 2))) print('mobilenet_v2:') total_MACs2, total_params2 = profile(model2, (inputs,)) print("Total params: %.2fM" % (total_params2 / (1000 ** 2))) print("Total MACs: %.2fM" % (total_MACs2 / (1000 ** 2))) # In[7]: print(models) # In[ ]:
device=device, callbacks=[ ('tensorboard', TensorBoard(writer)), ('cp', cp), ('train_end_cp', train_end_cp), # ("load_state", load_state), ('early_stoping', EarlyStopping(patience=5)), ('lr_scheduler', LRScheduler(policy=ReduceLROnPlateau, monitor='valid_loss')), ], ) print("Begin training") try: y_train = np.concatenate((np.zeros((100, )), np.ones( (100, )))).astype('float32') net.fit(train_dataset, y_train) except KeyboardInterrupt: net.save_params(f_params=run + '.pkl') net.save_params(f_params=run + '.pkl') print("Finish training") inputs = torch.randn(160, 64).to(device) total_ops, total_params = profile(net.module_, (inputs, ), verbose=False) print("%s | %s | %s" % ("Model", "Params(k)", "FLOPs(M)")) print("%s | %.2f | %.2f" % ("net.name", total_params / (1000), total_ops / (1000**2)))
import torch from torchvision import models from thop.profile import profile model_names = sorted( name for name in models.__dict__ if name.islower() and not name.startswith("__") # and "inception" in name and callable(models.__dict__[name])) print("%s | %s | %s" % ("Model", "Params(M)", "FLOPs(G)")) print("---|---|---") device = "cpu" if torch.cuda.is_available(): device = "cuda" for name in model_names: model = models.__dict__[name]().to(device) dsize = (1, 3, 224, 224) if "inception" in name: dsize = (1, 3, 299, 299) inputs = torch.randn(dsize).to(device) total_ops, total_params = profile(model, (inputs, ), verbose=False) print("%s | %.2f | %.2f" % (name, total_params / (1000**2), total_ops / (1000**3)))
blocks=cfg.MODEL.RESNET.BLOCKS, extras=cfg.MODEL.RESNET.EXTRAS, se=cfg.MODEL.RESNET.SE, cbam=cfg.MODEL.RESNET.CBAM, fusion=cfg.MODEL.RESNET.FUSION) if pretrained: pretrained_dict = load_state_dict_from_url(model_urls['resnet101']) model_dict = model.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) return model if __name__ == '__main__': import torch from torchsummary import summary from thop.profile import profile resnet = ResNet(block=Bottleneck, blocks=[3, 4, 23, 3], extras=[128, 256, 512, 256, 128, 64, 64], se=False) # summary(resnet, (3, 512, 512)) # # print(resnet) device = torch.device('cpu') inputs = torch.randn((1, 3, 300, 300)).to(device) total_ops, total_params = profile(resnet, (inputs, ), verbose=False) print("%.2f | %.2f" % (total_params / (1000**2), total_ops / (1000**3)))
mask.append(torch.randint(2, (5, 1)) * 0 + 1) mask.append(torch.randint(2, (5, )) * 0 + 1) mask.append(None) mask.append(torch.randint(2, (5, )) * 0 + 1) batch = 1 input = [torch.randn(batch, 3, 4, 4, dtype=torch.float32)] # # flops, params = cal_flops(model, mask, input, enable_gflops=False, comment='') # print('flops:', flops, ', params:', params) # Test 2 # model = nn.Sequential(nn.Linear(10, 5, bias=True)) # mask = [] # mask.append(torch.randint(2, (5, 10)) * 0 + 1) # mask.append(torch.randint(2, (5,)) * 0 + 1) # batch = 1 # input = [torch.randn(batch, 10, dtype=torch.float32)] # # flops, params = cal_flops(model, mask, input, enable_gflops=False, comment='') # print('flops:', flops, ', params:', params) # Test 3 model = resnet50() input = torch.randn(1, 3, 224, 224) flops, params = profile(model, inputs=(input, )) print('ResNet50 flops:{}, params:{}'.format(flops, params)) # # The same above # flops, params = get_model_complexity_info(model, (in_c, h, w), as_strings=True, print_per_layer_stat=False, units='E') # print(flops, params)
'stacked-BiGRU': nn.Sequential( nn.GRU(input_size, hidden_size, bidirectional=True, num_layers=4)), 'stacked-BiLSTM': nn.Sequential( nn.LSTM(input_size, hidden_size, bidirectional=True, num_layers=4)), } print('{} | {} | {}'.format('Model', 'Params(M)', "FLOPs(G)")) print("---|---|---") for name, model in models.items(): # time_first dummy inputs inputs = torch.randn(100, 32, input_size) if name.find('Cell') != -1: total_ops, total_params = profile(model, (inputs[0], ), verbose=False) else: total_ops, total_params = profile(model, (inputs, ), verbose=False) print('{} | {:.2f} | {:.2f}'.format( name, total_params / 1e6, total_ops / 1e9, )) # validate batch_first support inputs = torch.randn(100, 32, input_size) ops_time_first = profile(nn.Sequential(nn.LSTM(input_size, hidden_size)), (inputs, ), verbose=False)[0] ops_batch_first = profile(nn.Sequential( nn.LSTM(input_size, hidden_size, batch_first=True)),
import sys import copy import torch import torch.nn as nn import random import numpy as np from thop.profile import profile from torchvision.models import resnet50 if __name__ == '__main__': seed = 2019 random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False model = nn.Sequential(nn.Conv2d(3, 5, 2, stride=1, bias=True), nn.Conv2d(5, 1, 3, stride=1, bias=True), nn.Linear(1, 5, bias=True)) model = resnet50() input = torch.randn(1, 3, 224, 224) flops, params, memory = profile(model, inputs=(input, ), verbose=False) print('ResNet50 flops:{}, params:{}, memory:{}'.format( flops, params, memory))
def run_evaluation(model, ensemble_model, data_loaders, args, save_model='', load_model=''): all_values = {} device = 'cuda' utils.setup_torch(args['seed']) inputs = torch.randn( (1, args['input_channels'], args['img_size'], args['img_size'])) total_ops, total_params = profile(model, (inputs, ), verbose=True) all_values['MMACs'] = np.round(total_ops / (1000.0**2), 2) all_values['Params'] = int(total_params) print(all_values) start = time.time() model = model.to(device) ensemble_model = ensemble_model.to(device) print('models to device', time.time() - start) if len(load_model) > 0: model.load_state_dict(torch.load(os.path.join(args['dir'], load_model))) criterion = torch.nn.CrossEntropyLoss() ################################################ summary(model, (3, 32, 32), batch_size=args['batch_size'], device='cuda') criterion = torch.nn.CrossEntropyLoss().to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args['lr_init'], momentum=0.9, weight_decay=1e-4) lrs = [] n_models = 0 all_values['epoch'] = [] all_values['overall_time'] = [] all_values['lr'] = [] all_values['tr_loss'] = [] all_values['tr_acc'] = [] all_values['val_loss_single'] = [] all_values['val_acc_single'] = [] all_values['val_loss_ensemble'] = [] all_values['val_acc_ensemble'] = [] all_values['test_loss_single'] = [] all_values['test_acc_single'] = [] all_values['test_loss_ensemble'] = [] all_values['test_acc_ensemble'] = [] n_models = 0 time_start = time.time() for epoch in range(args['epochs']): time_ep = time.time() lr = utils.get_cyclic_lr(epoch, lrs, args['lr_init'], args['lr_start_cycle'], args['cycle_period']) #print ('lr=%.3f' % lr) utils.set_learning_rate(optimizer, lr) lrs.append(lr) train_res = utils.train_epoch(device, data_loaders['train'], model, criterion, optimizer, args['num_samples_train']) values = [epoch + 1, lr, train_res['loss'], train_res['accuracy']] if (epoch + 1) >= args['lr_start_cycle'] and ( epoch + 1) % args['cycle_period'] == 0: all_values['epoch'].append(epoch + 1) all_values['lr'].append(lr) all_values['tr_loss'].append(train_res['loss']) all_values['tr_acc'].append(train_res['accuracy']) val_res = utils.evaluate(device, data_loaders['val'], model, criterion, args['num_samples_val']) test_res = utils.evaluate(device, data_loaders['test'], model, criterion, args['num_samples_test']) all_values['val_loss_single'].append(val_res['loss']) all_values['val_acc_single'].append(val_res['accuracy']) all_values['test_loss_single'].append(test_res['loss']) all_values['test_acc_single'].append(test_res['accuracy']) utils.moving_average_ensemble(ensemble_model, model, 1.0 / (n_models + 1)) utils.bn_update(device, data_loaders['train_for_bn_recalc'], ensemble_model) n_models += 1 val_res = utils.evaluate(device, data_loaders['val'], ensemble_model, criterion, args['num_samples_val']) test_res = utils.evaluate(device, data_loaders['test'], ensemble_model, criterion, args['num_samples_test']) all_values['val_loss_ensemble'].append(val_res['loss']) all_values['val_acc_ensemble'].append(val_res['accuracy']) all_values['test_loss_ensemble'].append(test_res['loss']) all_values['test_acc_ensemble'].append(test_res['accuracy']) overall_training_time = time.time() - time_start all_values['overall_time'].append(overall_training_time) #print (epoch, 'epoch_time', time.time() - time_ep) overall_training_time = time.time() - time_start #print ('overall time', overall_training_time) #print (all_values) if len(save_model) > 0: torch.save(ensemble_model.state_dict(), os.path.join(args['dir'], save_model + '_ensemble')) torch.save(model.state_dict(), os.path.join(args['dir'], save_model)) return all_values