def do_profiling(self, first_encoder_inputs, first_decoder_inputs):
     emacs, eparams, dmacs, dparams = None, None, None, None
     if first_encoder_inputs is None or first_decoder_inputs is None:
         return None
     # Reduce batch size to 1 and run profiler
     if not first_encoder_inputs is None:
         encoder_inputs = first_encoder_inputs[0:1, ...]
         emacs, eparams = profile(self.encoder, (encoder_inputs, ))
     if not first_decoder_inputs is None:
         decoder_inputs = first_decoder_inputs[0:1, ...]
         dmacs, dparams = profile(self.decoder, (decoder_inputs, ))
     return emacs, eparams, dmacs, dparams
コード例 #2
0
 def do_profiling(self, first_batch_inputs):
     if first_batch_inputs is None:
         return None
     # Reduce batch size to 1
     inputs = first_batch_inputs[0:1, ...]
     macs, params = profile(self.model, (inputs, ))
     return macs, params
コード例 #3
0
def run_evaluation(args, model, data_loaders, model_description, n_choices,
                   layers_types, downsample_layers):

    start = time.time()

    num_samples = utils.get_number_of_samples(args.dataset)

    all_values = {}

    device = 'cuda'

    #setting up random seeds
    utils.setup_torch(args.seed)

    #creating model skeleton based on description
    propagate_weights = []
    for layer in model_description:
        cur_weights = [0 for i in range(n_choices)]
        cur_weights[layers_types.index(layer)] = 1
        propagate_weights.append(cur_weights)

    model.propagate = propagate_weights

    #Create the computationally identical model but without multiple choice blocks (just a single path net)
    #This is needed to correctly measure MACs
    pruned_model = models.SinglePathSupernet(
        num_classes=utils.get_number_of_classes(args.dataset),
        propagate=propagate_weights,
        put_downsampling=downsample_layers)  #.to(device)
    pruned_model.propagate = propagate_weights
    inputs = torch.randn((1, 3, 32, 32))
    total_ops, total_params = profile(pruned_model, (inputs, ), verbose=True)
    all_values['MMACs'] = np.round(total_ops / (1000.0**2), 2)
    all_values['Params'] = int(total_params)

    del pruned_model
    del inputs

    ################################################
    criterion = torch.nn.CrossEntropyLoss()

    #Initialize batch normalization parameters
    utils.bn_update(device, data_loaders['train_for_bn_recalc'], model)

    val_res = utils.evaluate(device, data_loaders['val'], model, criterion,
                             num_samples['val'])
    test_res = utils.evaluate(device, data_loaders['test'], model, criterion,
                              num_samples['test'])

    all_values['val_loss'] = np.round(val_res['loss'], 3)
    all_values['val_acc'] = np.round(val_res['accuracy'], 3)
    all_values['test_loss'] = np.round(test_res['loss'], 3)
    all_values['test_acc'] = np.round(test_res['accuracy'], 3)

    print(all_values, 'time taken: %.2f sec.' % (time.time() - start))

    utils.save_result(all_values, args.dir, model_description)
コード例 #4
0
ファイル: flops_old.py プロジェクト: zwxu064/RANP
def cal_flops(net, mask, input, enable_gflops=True, comment=''):
    net = copy.deepcopy(net)
    mask = copy.deepcopy(mask)
    input = [input] if (not isinstance(input, list)) else input
    net = reparam_network(net, mask)
    flops, params = profile(net, input, verbose=False)

    if enable_gflops:
        flops /= 10**9

    return flops, params
コード例 #5
0
def calculate_model_complexity(model, input_dim=(1, 3, 256, 256), cuda=True):
    if len(input_dim) < 4:
        input_dim = (1, *input_dim)
    inputs = torch.randn(input_dim)
    if cuda:
        model = model.cuda()
        inputs = inputs.cuda()
    summary(model, input_size=tuple(input_dim[1:]))
    macs, params = profile(model, inputs=(inputs,))
    print("----------------------------------------------------------------")
    print("Params size (MB): {:.2f}".format(params / (1000 ** 2)))
    print("MACs (M): {:.2f}".format(macs / (1000 ** 2)))
    print("MACs (G): {:.2f}".format(macs / (1000 ** 3)))
    print("----------------------------------------------------------------")
    return macs, params
コード例 #6
0
import sys
sys.path.append('.../NIR-ISL2021master/')  # change as you need
import torch
from thop.profile import profile
import time
import numpy as np

from models import EfficientUNet

net = EfficientUNet(num_classes=3).cpu()

example_input = torch.randn(1, 3, 480, 640).cuda()
flops, params = profile(net, (example_input, ))
print('net FLOPs is: {:.3f} G, Params is {:.3f} M'.format(
    flops / 1e9, params / 1e6))

net.eval()
res = []

for i in range(100):
    torch.cuda.synchronize()
    start = time.time()
    example_output = net(example_input)
    torch.cuda.synchronize()
    end = time.time()
    res.append(end - start)
print('FPS is {:.3f}'.format(1 / (np.mean(res))))
コード例 #7
0
import torch
from torchvision import models
from thop.profile import profile


print('Lab 1-2:\n')
device = "cpu"
if torch.cuda.is_available():
    device = "cuda"

model = models.resnet50().to(device)
model2 = models.mobilenet_v2().to(device)
dsize = (1, 3, 224, 224)
inputs = torch.randn(dsize).to(device)
print('resnet50:')
total_MACs, total_params = profile(model, (inputs,))
print("Total params: %.2fM" % (total_params / (1000 ** 2)))
print("Total MACs: %.2fM\n" % (total_MACs / (1000 ** 2)))
print('mobilenet_v2:')
total_MACs2, total_params2 = profile(model2, (inputs,))
print("Total params: %.2fM" % (total_params2 / (1000 ** 2)))
print("Total MACs: %.2fM" % (total_MACs2 / (1000 ** 2)))


# In[7]:


print(models)


# In[ ]:
コード例 #8
0
        device=device,
        callbacks=[
            ('tensorboard', TensorBoard(writer)),
            ('cp', cp),
            ('train_end_cp', train_end_cp),
            # ("load_state", load_state),
            ('early_stoping', EarlyStopping(patience=5)),
            ('lr_scheduler',
             LRScheduler(policy=ReduceLROnPlateau, monitor='valid_loss')),
        ],
    )

    print("Begin training")

    try:
        y_train = np.concatenate((np.zeros((100, )), np.ones(
            (100, )))).astype('float32')
        net.fit(train_dataset, y_train)
    except KeyboardInterrupt:
        net.save_params(f_params=run + '.pkl')

    net.save_params(f_params=run + '.pkl')

    print("Finish training")

    inputs = torch.randn(160, 64).to(device)
    total_ops, total_params = profile(net.module_, (inputs, ), verbose=False)
    print("%s | %s | %s" % ("Model", "Params(k)", "FLOPs(M)"))
    print("%s | %.2f | %.2f" % ("net.name", total_params / (1000), total_ops /
                                (1000**2)))
コード例 #9
0
import torch
from torchvision import models
from thop.profile import profile

model_names = sorted(
    name for name in models.__dict__
    if name.islower() and not name.startswith("__")  # and "inception" in name
    and callable(models.__dict__[name]))

print("%s | %s | %s" % ("Model", "Params(M)", "FLOPs(G)"))
print("---|---|---")

device = "cpu"
if torch.cuda.is_available():
    device = "cuda"

for name in model_names:
    model = models.__dict__[name]().to(device)
    dsize = (1, 3, 224, 224)
    if "inception" in name:
        dsize = (1, 3, 299, 299)
    inputs = torch.randn(dsize).to(device)
    total_ops, total_params = profile(model, (inputs, ), verbose=False)
    print("%s | %.2f | %.2f" % (name, total_params / (1000**2), total_ops /
                                (1000**3)))
コード例 #10
0
                   blocks=cfg.MODEL.RESNET.BLOCKS,
                   extras=cfg.MODEL.RESNET.EXTRAS,
                   se=cfg.MODEL.RESNET.SE,
                   cbam=cfg.MODEL.RESNET.CBAM,
                   fusion=cfg.MODEL.RESNET.FUSION)
    if pretrained:
        pretrained_dict = load_state_dict_from_url(model_urls['resnet101'])
        model_dict = model.state_dict()
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items() if k in model_dict
        }
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)
    return model


if __name__ == '__main__':
    import torch
    from torchsummary import summary
    from thop.profile import profile
    resnet = ResNet(block=Bottleneck,
                    blocks=[3, 4, 23, 3],
                    extras=[128, 256, 512, 256, 128, 64, 64],
                    se=False)
    # summary(resnet, (3, 512, 512))
    #     # print(resnet)
    device = torch.device('cpu')
    inputs = torch.randn((1, 3, 300, 300)).to(device)
    total_ops, total_params = profile(resnet, (inputs, ), verbose=False)
    print("%.2f | %.2f" % (total_params / (1000**2), total_ops / (1000**3)))
コード例 #11
0
ファイル: flops_old.py プロジェクト: zwxu064/RANP
    mask.append(torch.randint(2, (5, 1)) * 0 + 1)
    mask.append(torch.randint(2, (5, )) * 0 + 1)
    mask.append(None)
    mask.append(torch.randint(2, (5, )) * 0 + 1)

    batch = 1
    input = [torch.randn(batch, 3, 4, 4, dtype=torch.float32)]
    #
    # flops, params = cal_flops(model, mask, input, enable_gflops=False, comment='')
    # print('flops:', flops, ', params:', params)

    # Test 2
    # model = nn.Sequential(nn.Linear(10, 5, bias=True))
    # mask = []
    # mask.append(torch.randint(2, (5, 10)) * 0 + 1)
    # mask.append(torch.randint(2, (5,)) * 0 + 1)
    # batch = 1
    # input = [torch.randn(batch, 10, dtype=torch.float32)]
    #
    # flops, params = cal_flops(model, mask, input, enable_gflops=False, comment='')
    # print('flops:', flops, ', params:', params)

    # Test 3
    model = resnet50()
    input = torch.randn(1, 3, 224, 224)
    flops, params = profile(model, inputs=(input, ))
    print('ResNet50 flops:{}, params:{}'.format(flops, params))

    # # The same above
    # flops, params = get_model_complexity_info(model, (in_c, h, w), as_strings=True, print_per_layer_stat=False, units='E')
    # print(flops, params)
コード例 #12
0
    'stacked-BiGRU':
    nn.Sequential(
        nn.GRU(input_size, hidden_size, bidirectional=True, num_layers=4)),
    'stacked-BiLSTM':
    nn.Sequential(
        nn.LSTM(input_size, hidden_size, bidirectional=True, num_layers=4)),
}

print('{} | {} | {}'.format('Model', 'Params(M)', "FLOPs(G)"))
print("---|---|---")

for name, model in models.items():
    # time_first dummy inputs
    inputs = torch.randn(100, 32, input_size)
    if name.find('Cell') != -1:
        total_ops, total_params = profile(model, (inputs[0], ), verbose=False)
    else:
        total_ops, total_params = profile(model, (inputs, ), verbose=False)
    print('{} | {:.2f} | {:.2f}'.format(
        name,
        total_params / 1e6,
        total_ops / 1e9,
    ))

# validate batch_first support
inputs = torch.randn(100, 32, input_size)
ops_time_first = profile(nn.Sequential(nn.LSTM(input_size, hidden_size)),
                         (inputs, ),
                         verbose=False)[0]
ops_batch_first = profile(nn.Sequential(
    nn.LSTM(input_size, hidden_size, batch_first=True)),
コード例 #13
0
ファイル: my_test.py プロジェクト: zwxu064/RANP
import sys
import copy
import torch
import torch.nn as nn
import random
import numpy as np
from thop.profile import profile
from torchvision.models import resnet50

if __name__ == '__main__':
    seed = 2019
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    model = nn.Sequential(nn.Conv2d(3, 5, 2, stride=1, bias=True),
                          nn.Conv2d(5, 1, 3, stride=1, bias=True),
                          nn.Linear(1, 5, bias=True))

    model = resnet50()
    input = torch.randn(1, 3, 224, 224)
    flops, params, memory = profile(model, inputs=(input, ), verbose=False)
    print('ResNet50 flops:{}, params:{}, memory:{}'.format(
        flops, params, memory))
コード例 #14
0
ファイル: nas_evaluate.py プロジェクト: lilujunai/MacroNAS
def run_evaluation(model,
                   ensemble_model,
                   data_loaders,
                   args,
                   save_model='',
                   load_model=''):

    all_values = {}
    device = 'cuda'

    utils.setup_torch(args['seed'])

    inputs = torch.randn(
        (1, args['input_channels'], args['img_size'], args['img_size']))
    total_ops, total_params = profile(model, (inputs, ), verbose=True)
    all_values['MMACs'] = np.round(total_ops / (1000.0**2), 2)
    all_values['Params'] = int(total_params)
    print(all_values)

    start = time.time()
    model = model.to(device)
    ensemble_model = ensemble_model.to(device)
    print('models to device', time.time() - start)

    if len(load_model) > 0:
        model.load_state_dict(torch.load(os.path.join(args['dir'],
                                                      load_model)))

    criterion = torch.nn.CrossEntropyLoss()

    ################################################

    summary(model, (3, 32, 32), batch_size=args['batch_size'], device='cuda')

    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args['lr_init'],
                                momentum=0.9,
                                weight_decay=1e-4)

    lrs = []
    n_models = 0

    all_values['epoch'] = []
    all_values['overall_time'] = []
    all_values['lr'] = []

    all_values['tr_loss'] = []
    all_values['tr_acc'] = []

    all_values['val_loss_single'] = []
    all_values['val_acc_single'] = []
    all_values['val_loss_ensemble'] = []
    all_values['val_acc_ensemble'] = []

    all_values['test_loss_single'] = []
    all_values['test_acc_single'] = []
    all_values['test_loss_ensemble'] = []
    all_values['test_acc_ensemble'] = []

    n_models = 0
    time_start = time.time()

    for epoch in range(args['epochs']):
        time_ep = time.time()

        lr = utils.get_cyclic_lr(epoch, lrs, args['lr_init'],
                                 args['lr_start_cycle'], args['cycle_period'])
        #print ('lr=%.3f' % lr)
        utils.set_learning_rate(optimizer, lr)
        lrs.append(lr)

        train_res = utils.train_epoch(device, data_loaders['train'], model,
                                      criterion, optimizer,
                                      args['num_samples_train'])

        values = [epoch + 1, lr, train_res['loss'], train_res['accuracy']]

        if (epoch + 1) >= args['lr_start_cycle'] and (
                epoch + 1) % args['cycle_period'] == 0:

            all_values['epoch'].append(epoch + 1)
            all_values['lr'].append(lr)

            all_values['tr_loss'].append(train_res['loss'])
            all_values['tr_acc'].append(train_res['accuracy'])

            val_res = utils.evaluate(device, data_loaders['val'], model,
                                     criterion, args['num_samples_val'])
            test_res = utils.evaluate(device, data_loaders['test'], model,
                                      criterion, args['num_samples_test'])

            all_values['val_loss_single'].append(val_res['loss'])
            all_values['val_acc_single'].append(val_res['accuracy'])
            all_values['test_loss_single'].append(test_res['loss'])
            all_values['test_acc_single'].append(test_res['accuracy'])

            utils.moving_average_ensemble(ensemble_model, model,
                                          1.0 / (n_models + 1))
            utils.bn_update(device, data_loaders['train_for_bn_recalc'],
                            ensemble_model)
            n_models += 1

            val_res = utils.evaluate(device, data_loaders['val'],
                                     ensemble_model, criterion,
                                     args['num_samples_val'])
            test_res = utils.evaluate(device, data_loaders['test'],
                                      ensemble_model, criterion,
                                      args['num_samples_test'])

            all_values['val_loss_ensemble'].append(val_res['loss'])
            all_values['val_acc_ensemble'].append(val_res['accuracy'])
            all_values['test_loss_ensemble'].append(test_res['loss'])
            all_values['test_acc_ensemble'].append(test_res['accuracy'])

            overall_training_time = time.time() - time_start
            all_values['overall_time'].append(overall_training_time)

        #print (epoch, 'epoch_time', time.time() - time_ep)

        overall_training_time = time.time() - time_start
        #print ('overall time', overall_training_time)

        #print (all_values)

    if len(save_model) > 0:
        torch.save(ensemble_model.state_dict(),
                   os.path.join(args['dir'], save_model + '_ensemble'))
        torch.save(model.state_dict(), os.path.join(args['dir'], save_model))

    return all_values