def test(args): os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) args = merge_args_from_train(args.save_dir, args) model_name = args.model_name + '-bz' + str(args.bz) + '_ep' + str( args.ep) + '_' + args.n_loss if '_t' in model_name: model_name = model_name + '_' + args.t_loss if args.mode == 'n2n': model_name = model_name + '-' + args.mode if not os.path.exists(args.save_dir): args.save_dir = 'results2' + args.h5_dir.replace( '/', '_') + '/' + args.noise_name + '/' + model_name print('existing save_dir:', args.save_dir) # source_h5_path_test = args.data_root + '/' + args.h5_dir + '/' + args.mode + '_' + args.noise_name + '_' + args.test_set + '_noised_test.h5' # target_h5_path_test = args.data_root + '/' + args.h5_dir + '/' + args.mode + '_' + args.noise_name + '_' + args.test_set + '_clean_test.h5' source_h5_path_test = '/home/ipsg/code/sx/datasets/infread/images/n2c_infreadEN_noised_test.h5' target_h5_path_test = '/home/ipsg/code/sx/datasets/infread/images/n2c_infreadEN_clean_test.h5' test_set = dataset_img2img(source_h5_path_test, target_h5_path_test) print('source_h5_path_test:', source_h5_path_test) model_path = args.save_dir + '/' + str(args.test_ep) + '.pth' state_dict = torch.load(model_path) net = choose_model(args.model_name, 'test') print('loading:', model_path) net.load_state_dict(state_dict) net.eval() net.cuda() with torch.no_grad(): f, p = get_model_complexity_info(net, (3, 480, 640), as_strings=True, print_per_layer_stat=False, verbose=False) print('FLOPs:', f, 'Parms:', p) test_psnr = 0 test_ssim = 0 fw_times = [] for i, pair in enumerate(test_set): with torch.no_grad(): batch_datas = pair[0].unsqueeze(0).cuda() batch_labels = pair[1].unsqueeze(0).cuda() fw_s = time.clock() batch_inferences = net(batch_datas) fw_time = time.clock() - fw_s fps = np.round(1 / fw_time, 3) fw_times.append(fw_time) # print(batch_datas.shape, batch_labels.shape, batch_inferences.shape) psnr_batch = batch_PSNR(batch_inferences, batch_labels, data_range=1.0) test_psnr += psnr_batch ssim_batch = ssim(batch_inferences, batch_labels, data_range=1.0, win_size=11).item() test_ssim += ssim_batch print('image:{}, fps:{}, psnr:{}, ssim:{}'.format( i, fps, psnr_batch, ssim_batch)) inference = np.array( batch_inferences.cpu().squeeze(0).permute(1, 2, 0) * 255).astype('uint8') source = np.array(batch_datas.cpu().squeeze(0).permute(1, 2, 0) * 255).astype('uint8') target = np.array(batch_labels.cpu().squeeze(0).permute(1, 2, 0) * 255).astype('uint8') result = cv2.hconcat([source, inference, target]) if args.save_images: save_images_dir = args.save_dir + '/test_images_from_' + args.h5_dir.replace( '/', '_') + '_ep' + str(args.test_ep) if not os.path.exists(save_images_dir): os.makedirs(save_images_dir) cv2.imwrite( save_images_dir + '/' + args.noise_name + '_clean' + str(i) + '.jpg', target, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) cv2.imwrite( save_images_dir + '/' + args.noise_name + '_noised' + str(i) + '.jpg', inference, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) cv2.imwrite( save_images_dir + '/' + args.noise_name + '_concat' + str(i) + '.jpg', result, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) if args.show_images: plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB)) plt.pause(0.2) fw_fps = 1 / np.mean(fw_times) test_psnr /= len(test_set) test_ssim /= len(test_set) print('fw_fps:{}, psnr:{}, ssim:{}'.format(fw_fps, test_psnr, test_ssim)) test_info = vars(args) test_info.setdefault('fw_fps', fw_fps) test_info.setdefault('psnr', test_psnr) test_info.setdefault('ssim', test_ssim) with open(os.path.join(Path(model_path).parent, 'test_info.json'), 'w') as f: json.dump(test_info, f, indent=2)
self.conv_1_2 = DNCNN2(in_c, phase) def forward(self, x): if self.phase == 'train': x1, _ = self.conv_1_1(x) x1, _ = self.conv_1_2(x1) return x1, None else: x1 = self.conv_1_1(x) x1 = self.conv_1_2(x1) return x1 if __name__ == '__main__': from ptflops import get_model_complexity_info import time with torch.no_grad(): net = DRCNN(3, 'test').cuda() f, p = get_model_complexity_info(net, (3, 480, 640), as_strings=True, print_per_layer_stat=False, verbose=False) print('FLOPs:', f, 'Parms:', p) x = torch.randn(1, 3, 480, 640).cuda() s = time.clock() y = net(x) print(y.shape, 1 / (time.clock() - s))
def pruning_and_training(self, testloader, trainloader, batch_size=128, epoch=1, lr=0.001): for it in range(self.max_iter): best_acc = -1000 print( '\n[1] PRUNING | ITER : {}/{}-----------------------------------------------------------' .format(it + 1, self.max_iter)) print( '\n=> Pruning Net... | Layer1 : {}% Layer2 : {}% Layer3 : {}%'. format(self.P[0] * 100, self.P[1] * 100, self.P[2] * 100)) self.HardPruning() self.model.train() flops, params = get_model_complexity_info( self.model, (3, 32, 32), as_strings=True, print_per_layer_stat=False) print('{:<30} {:<8}'.format('Computational complexity: ', flops)) print('{:<30} {:<8}'.format('Number of parameters: ', params)) print( '\n[2] FINE TUNING----------------------------------------------------------------------' ) for e in range(epoch): train_loss = 0 correct = 0 total = 0 optimizer = optim.SGD(self.model.parameters(), lr=self.learning_rate(e, lr), momentum=0.9) criterion = nn.CrossEntropyLoss() for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = self.model(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() total += targets.size(0) predicted = torch.max(outputs.data, 1)[1] train_loss += loss.item() correct += predicted.eq(targets.data).cpu().sum() sys.stdout.write('\r') sys.stdout.write('Trainable params [{}]'.format(params)) sys.stdout.write( '| Iteration [%3d] Epoch [%3d/%3d] Iter [%3d/%3d] LR [%3d] \t\tLoss: %.4f Acc@1: %.3f%%' % (it + 1, e + 1, epoch, batch_idx + 1, 391, self.learning_rate( e, lr), loss.item(), 100. * correct / total)) sys.stdout.flush() self.model.eval() self.model.training = False test_loss = 0 correct = 0 total = 0 criterion = nn.CrossEntropyLoss() with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs), Variable(targets) outputs = self.model(inputs) loss = criterion(outputs, targets) test_loss += loss.item() predicted = torch.max(outputs.data, 1)[1] total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() # Save checkpoint when best model acc = 100. * correct / total print('\n | Test {} '.format(acc)) if acc > best_acc: print('| New Best Accuracy...\t\t\tTop1 = %.2f%%' % (acc)) print('| Saving Pruned Model...') torch.save(self.model, "wide_resnet_iter_hard.pth") best_acc = acc self.best_acc.append(best_acc.item()) self.net_weights.append(self.number_of_trainable_params( self.model))
from ptflops import get_model_complexity_info from model.segmentation.deeplabV3_plus import DeepLabV3_plus if __name__ == '__main__': print( '================================================================================' ) print('DeepLab V3+, ResNet, 513x513') print( '================================================================================' ) model = DeepLabV3_plus(pretrained=True) flops, params = get_model_complexity_info(model, (3, 513, 513), verbose=True) print('{:<30} {:<8}'.format('Computational complexity: ', flops)) print('{:<30} {:<8}'.format('Number of parameters: ', params)) print( '================================================================================' ) print('DeepLab V3+, ResNet, 769x769') print( '================================================================================' ) model = DeepLabV3_plus(pretrained=True) flops, params = get_model_complexity_info(model, (3, 769, 769), verbose=True) print('{:<30} {:<8}'.format('Computational complexity: ', flops))
10: { 'conv': [1, 2], 'rate': 1 }, 11: { 'conv': [1, 2], 'rate': 0 } } model = SuperNetwork(shadow_bn=False, layers=12, classes=10) print(model) input = torch.randn(3, 32, 32).unsqueeze(0) print(model(input, choice)) # (1, 10) # # params = list(model.parameters()) # p_s = params[1].size() # model.conv1.zero_grad() # model.conv1.weight.grad() import torch from ptflops import get_model_complexity_info with torch.cuda.device(0): # choice is added flops, params = get_model_complexity_info(model, (3, 32, 32), as_strings=True, print_per_layer_stat=True) print('{:<30} {:<8}'.format('Computational complexity: ', flops)) print('{:<30} {:<8}'.format('Number of parameters: ', params))
override_prev_results = args.override project_name = args.project weights_path = f'weights/efficientdet-d{compound_coef}.pth' if args.weights is None else args.weights print(f'running coco-style evaluation on project {project_name}, weights {weights_path}...') params = yaml.safe_load(open(f'projects/{project_name}.yml')) obj_list = params['obj_list_fushusheshi'] input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] det_save_odgt = f'val_result/fushusheshi_4class_d3_lr_1e-3_only_bad_epoch260_1201.odgt' det_save_eval_log_txt = f'val_result/fushusheshi_4class_d3_lr_1e-3_only_bad_epoch260_1201.txt' threshold=0.05 if __name__ == '__main__': with torch.cuda.device(0): model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=eval(params['anchors_ratios']), scales=eval(params['anchors_scales'])) macs, params = get_model_complexity_info(model, (3, input_sizes[compound_coef], input_sizes[compound_coef]), as_strings=True, print_per_layer_stat=True, verbose=True) print('{:<30} {:<8}'.format('Computational complexity: ', macs)) print('{:<30} {:<8}'.format('Number of parameters: ', params))
def train(): DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") writer = SummaryWriter(config.LOG_ROOT) train_transform = transforms.Compose([ transforms.RandomApply( [transforms.RandomResizedCrop(112, scale=(0.95, 1), ratio=(1, 1))]), transforms.Resize(112), transforms.RandomHorizontalFlip(), transforms.RandomGrayscale(0.01), transforms.ToTensor(), transforms.Normalize(mean=config.RGB_MEAN, std=config.RGB_STD), ]) dataset_train = ImageFolder(config.TRAIN_FILES, train_transform) train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=config.BATCH_SIZE, pin_memory=True, shuffle=True, num_workers=8, drop_last=True) NUM_CLASS = train_loader.dataset.classes print("Number of Training Classes: {}".format(NUM_CLASS)) BACKBONE = ResNet(num_layers=100, feature_dim=512) flops, params = get_model_complexity_info(BACKBONE, (3, 112, 112), as_strings=True, print_per_layer_stat=False) print('BACKBONE FLOPs:', flops) print('BACKBONE PARAMS:', params) PRETRAINED_BACKBONE = None PRETRAINED_QUALITY = None if os.path.isfile(config.PRETRAINED_BACKBONE) and os.path.isfile( config.PRETRAINED_QUALITY): PRETRAINED_BACKBONE = ResNet(num_layers=100, feature_dim=512) PRETRAINED_QUALITY = FaceQuality(512 * 7 * 7) checkpoint = torch.load(config.PRETRAINED_BACKBONE) load_state_dict(PRETRAINED_BACKBONE, checkpoint) PRETRAINED_BACKBONE = nn.DataParallel(PRETRAINED_BACKBONE, device_ids=config.BACKBONE_GPUS) PRETRAINED_BACKBONE = PRETRAINED_BACKBONE.cuda(0) PRETRAINED_BACKBONE.eval() checkpoint = torch.load(config.PRETRAINED_QUALITY) load_state_dict(PRETRAINED_QUALITY, checkpoint) PRETRAINED_QUALITY = nn.DataParallel(PRETRAINED_QUALITY, device_ids=config.BACKBONE_GPUS) PRETRAINED_QUALITY = PRETRAINED_QUALITY.cuda(0) PRETRAINED_QUALITY.eval() HEAD = GaussianFace(in_features=config.EMBEDDING_SIZE, out_features=NUM_CLASS) LOSS = FocalLoss() # optionally resume from a checkpoint if config.BACKBONE_RESUME_ROOT and config.HEAD_RESUME_ROOT: print("=" * 60) if os.path.isfile(config.BACKBONE_RESUME_ROOT): print("Loading Backbone Checkpoint '{}'".format( config.BACKBONE_RESUME_ROOT)) checkpoint = torch.load(config.BACKBONE_RESUME_ROOT) load_state_dict(BACKBONE, checkpoint) else: print( "No Checkpoint Found at '{}' Please Have a Check or Continue to Train from Scratch" .format(config.BACKBONE_RESUME_ROOT)) if os.path.isfile(config.HEAD_RESUME_ROOT): print("Loading Head Checkpoint '{}'".format( config.HEAD_RESUME_ROOT)) checkpoint = torch.load(config.HEAD_RESUME_ROOT) load_state_dict(HEAD, checkpoint) else: print( "No Checkpoint Found at '{}' Please Have a Check or Continue to Train from Scratch" .format(config.HEAD_RESUME_ROOT)) print("=" * 60) BACKBONE = nn.DataParallel(BACKBONE, device_ids=config.BACKBONE_GPUS, output_device=config.BACKBONE_GPUS[-1]) BACKBONE = BACKBONE.cuda(config.BACKBONE_GPUS[0]) HEAD = nn.DataParallel(HEAD, device_ids=config.HEAD_GPUS, output_device=config.HEAD_GPUS[0]) HEAD = HEAD.cuda(config.HEAD_GPUS[0]) OPTIMIZER = optim.SGD([{ 'params': BACKBONE.parameters(), 'lr': config.BACKBONE_LR, 'weight_decay': config.WEIGHT_DECAY }, { 'params': HEAD.parameters(), 'lr': config.BACKBONE_LR }], momentum=config.MOMENTUM) DISP_FREQ = len(train_loader) // 100 NUM_EPOCH_WARM_UP = config.NUM_EPOCH_WARM_UP NUM_BATCH_WARM_UP = len(train_loader) * NUM_EPOCH_WARM_UP batch = 0 step = 0 scheduler = CosineDecayLR(OPTIMIZER, T_max=10 * len(train_loader), lr_init=config.BACKBONE_LR, lr_min=1e-5, warmup=NUM_BATCH_WARM_UP) for epoch in range(config.NUM_EPOCH): BACKBONE.train() HEAD.train() arcface_losses = AverageMeter() confidences = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() scaler = torch.cuda.amp.GradScaler() for inputs, labels in tqdm(iter(train_loader)): inputs = inputs.cuda(config.BACKBONE_GPUS[0]) labels = labels.cuda(config.HEAD_GPUS[0]) with torch.cuda.amp.autocast(): features = BACKBONE(inputs) if PRETRAINED_BACKBONE is None or PRETRAINED_QUALITY is None: outputs = HEAD(None, features.cuda(config.HEAD_GPUS[0]), labels, False) else: with torch.no_grad(): _, fc = PRETRAINED_BACKBONE(inputs, True) quality = PRETRAINED_QUALITY(fc) outputs = HEAD(quality.cuda(config.HEAD_GPUS[0]), features.cuda(config.HEAD_GPUS[0]), labels, True) # measure accuracy and record loss arcface_loss = LOSS(outputs, labels) prec1, prec5 = accuracy(outputs.data, labels, topk=(1, 5)) arcface_losses.update(arcface_loss.data.item(), inputs.size(0)) top1.update(prec1.data.item(), inputs.size(0)) top5.update(prec5.data.item(), inputs.size(0)) loss = arcface_loss # compute gradient and do SGD step OPTIMIZER.zero_grad() #loss.backward() #OPTIMIZER.step() scaler.scale(loss).backward() scaler.step(OPTIMIZER) scaler.update() if ((batch + 1) % DISP_FREQ == 0) and batch != 0: print("=" * 60) print( 'Epoch {}/{} Batch {}/{}\t' 'Training Loss {arcface_loss.val:.4f} ({arcface_loss.avg:.4f})\t' 'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch + 1, config.NUM_EPOCH, batch + 1, len(train_loader) * config.NUM_EPOCH, arcface_loss=arcface_losses, top1=top1, top5=top5)) print("=" * 60) batch += 1 # batch index scheduler.step(batch) if batch % 1000 == 0: print(OPTIMIZER) # training statistics per epoch (buffer for visualization) epoch_loss = arcface_losses.avg epoch_acc = top1.avg writer.add_scalar("Training_Loss", epoch_loss, epoch + 1) writer.add_scalar("Training_Accuracy", epoch_acc, epoch + 1) print("=" * 60) print('Epoch: {}/{}\t' 'Training Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch + 1, config.NUM_EPOCH, loss=arcface_losses, top1=top1, top5=top5)) print("=" * 60) # save checkpoints per epoch curTime = get_time() if not os.path.exists(config.MODEL_ROOT): os.makedirs(config.MODEL_ROOT) torch.save( BACKBONE.state_dict(), os.path.join( config.MODEL_ROOT, "Backbone_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format( epoch + 1, batch, curTime))) torch.save( HEAD.state_dict(), os.path.join( config.MODEL_ROOT, "Head_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format( epoch + 1, batch, curTime)))
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.save_dir): mkdir_p(args.save_dir) # Data print('==> Preparing dataset %s' % args.dataset) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) if args.dataset == 'cifar10': dataloader = datasets.CIFAR10 num_classes = 10 else: dataloader = datasets.CIFAR100 num_classes = 100 trainset = dataloader(root='./data', train=True, download=True, transform=transform_train) trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers) testset = dataloader(root='./data', train=False, download=False, transform=transform_test) testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers) # Model print("==> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](dataset=args.dataset, depth=args.depth, reduction=args.reduction) print(model) if args.cuda: model.cuda() print(' Total params: %.2f' % (sum(p.numel() for p in model.parameters()))) with torch.cuda.device(0): net = model flops, params = get_model_complexity_info(net, (3, 32,32), as_strings=True, print_per_layer_stat=True) print('Flops: ' + flops) print('Params: ' + params) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Resume title = 'cifar-10-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' args.save_dir = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.save_dir, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.save_dir, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(testloader, model, criterion, start_epoch, args.cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.gamma, args.schedule) print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, args.cuda) test_loss, test_acc = test(testloader, model, criterion, epoch, args.cuda) # append logger file logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer' : optimizer.state_dict(), }, is_best, checkpoint=args.save_dir) logger.close() print('Best acc:') print(best_acc)
rgb = torch.FloatTensor(4, 6890, 3).cuda() net = net.cuda() print(net) net.proj_output = nn.Sequential() model_parameters = filter(lambda p: p.requires_grad, net.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print('Number of parameters: %.2f M' % (params / 1e6)) #output = net(xyz, rgb) market_data = Market3D('./2DMarket', flip=True, slim=0.25, bg=True) CustomDataLoader = partial(DataLoader, num_workers=0, batch_size=8, shuffle=True, drop_last=True) query_loader = CustomDataLoader(market_data.query()) batch0, label0 = next(iter(query_loader)) batch0 = batch0[0].unsqueeze(0) print(batch0.shape) macs, params = get_model_complexity_info(net, batch0.cuda(), ((round(6890 * 0.5), 3)), as_strings=True, print_per_layer_stat=False, verbose=True) #print(macs) print('{:<30} {:<8}'.format('Computational complexity: ', macs)) print('{:<30} {:<8}'.format('Number of parameters: ', params)) #print(output.shape)
d0 = self.outconv(torch.cat((d1, d2, d3, d4, d5, d6), 1)) # d00 = d0 + self.refconv(d0) return F.sigmoid(d0), F.sigmoid(d1), F.sigmoid(d2), F.sigmoid( d3), F.sigmoid(d4), F.sigmoid(d5), F.sigmoid(d6) if __name__ == '__main__': from thop import profile from thop import clever_format from ptflops import get_model_complexity_info os.environ['CUDA_VISIBLE_DEVICES'] = '3' device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # model = MyselfUnet3d_justdeepvision(1, 1, deepvision=True).to(device) model = U2NET(1, 1).cuda() # model = nn.DataParallel(model,device_ids=[0]) params = sum(param.numel() for param in model.parameters()) / 1e6 print(params) macs, params = get_model_complexity_info(model, (1, 19, 256, 256), as_strings=True, print_per_layer_stat=False, verbose=True) print('{:<30} {:<8}'.format('Computational complexity: ', macs)) print('{:<30} {:<8}'.format('Number of parameters: ', params)) # assert 1>3 input = torch.randn(1, 1, 19, 256, 256).to(device) macs, params = profile(model, inputs=(input, )) macs, params = clever_format([macs, params], "%.3f") print(macs, params)
# 获得输入图片的size input_res = (3, 224, 224) if model_name == "osnet": input_res = (3, 256, 128) elif model_name == "mgn" or model_name == "pcb" or model_name == "baseline": input_res = (3, 384, 128) elif model_name == "alphapose": input_res = (3, 256, 192) elif model_name == "st_gcn_net": input_res = (3, 256, 14) elif model_name == "matmul256": input_res = (1, 256, 256) elif model_name == "matmul1024": input_res = (1, 1024, 1024) elif model_name == "matmul4096": input_res = (1, 4096, 4096) # 获得模型的op macs, _ = get_model_complexity_info(model, input_res, as_strings=True, print_per_layer_stat=False, verbose=True) float_macs = transStr2Float(macs) op_num = float_macs * 2 # 先使用paper给的值 if model_name == "efficientnet_b3": op_num = 1.8 elif model_name == "osnet": op_num = 0.98 op_dir[model_name] = op_num print(op_dir)
import torch from ptflops import get_model_complexity_info from ptsemseg.models.FASSDNet import FASSDNet with torch.cuda.device(0): net = FASSDNet(19) flops, params = get_model_complexity_info(net, (3, 512, 1024), as_strings=True, print_per_layer_stat=True) print('Flops: ' + flops) print('Params: ' + params)
int(n_ch * compress_factor), kernel_size=1, bias=False), nn.AvgPool2d(kernel_size=2, stride=2) ] self.layer = nn.Sequential(*layer) def forward(self, x): return self.layer(x) class View(nn.Module): def __init__(self, *shape): super(View, self).__init__() self.shape = shape def forward(self, x): return x.view(x.shape[0], *self.shape) if __name__ == '__main__': from ptflops import get_model_complexity_info densenet_bc = DenseNetBC(depth=100, growth_rate=12, n_classes=100, efficient=False) flops, params = get_model_complexity_info(densenet_bc, (3, 32, 32), as_strings=False, print_per_layer_stat=False) print("flops: {}, params: {}".format(flops, params))
def forward(self, x): x = self.layer1(x) # out = self.layer2(x) return x def get_parameter_number(net): total_num = sum(p.numel() for p in net.parameters()) trainable_num = sum(p.numel() for p in net.parameters() if p.requires_grad) return {'Total': total_num, 'Trainable': trainable_num} def input_constructer(input_res): im1 = torch.randn(size=(1, 3, 5, 224, 224)) mv1 = torch.randn(size=(1, 2, 5, 224, 224)) return {'inputs': [[im1, mv1], [im1, mv1]]} if __name__ == '__main__': net = Model(2, 5) macs, params = get_model_complexity_info( net, input_res=(224, 224), input_constructor=input_constructer, as_strings=True, print_per_layer_stat=False, verbose=False) print('{:<30} {:<8}'.format('Computational complexity: ', macs)) print('{:<30} {:<8}'.format('Number of parameters: ', params))
net.append(layer) self.num_channels = c return nn.Sequential(*net) def forward(self, x): out = self.layer1(x) out = self.layer2(out) out = self.activation(self.bn1(self.conv1(out))) out = F.avg_pool2d(out, 4) out = self.conv2(out) out = torch.flatten(out, 1) return out def mobilenet_v2(activation='relu6', num_classes=10, width_multiplier=1.): return MobileNetV2(activation=activation, num_classes=num_classes, width_multiplier=width_multiplier) if __name__ == "__main__": from ptflops import get_model_complexity_info net = mobilenet_v2() macs, params = get_model_complexity_info(net, (3, 32, 32), as_strings=True, print_per_layer_stat=True, verbose=True) print('{:<30} {:<8}'.format('Number of parameters: ', params)) print('{:<30} {:<8}'.format('Computational complexity: ', macs))
'vgg16': models.vgg16, 'squeezenet': models.squeezenet1_0, 'densenet': models.densenet161, 'inception': models.inception_v3} if __name__ == '__main__': parser = argparse.ArgumentParser(description='ptflops sample script') parser.add_argument('--device', type=int, default=0, help='Device to store the model.') parser.add_argument('--model', choices=list(pt_models.keys()), type=str, default='resnet18') parser.add_argument('--result', type=str, default=None) args = parser.parse_args() if args.result is None: ost = sys.stdout else: ost = open(args.result, 'w') net = pt_models[args.model]() if torch.cuda.is_available(): net.cuda(device=args.device) flops, params = get_model_complexity_info(net, (3, 224, 224), as_strings=True, print_per_layer_stat=True, ost=ost) print('{:<30} {:<8}'.format('Computational complexity: ', flops)) print('{:<30} {:<8}'.format('Number of parameters: ', params))
def eval_all_dataset(): args.net.eval() header = ["file_name", "iou", "dc", "pr", "acc", "sp", "se", "auc"] total_metrics, gt_list, pd_list, time_list, total_list = {}, [], [], [], [] for h in header[1:]: total_metrics[h] = [] file_name = get_id() with torch.no_grad(): with tqdm(total=n_test, desc='Test', unit='img', leave=False) as p_bar: for index, batch in enumerate(test_loader): # load the picture image, label = batch['image'], batch['label'] image = image.to(device=args.device, dtype=torch.float32) label = label.to(device=args.device, dtype=torch.float32) # statistics inference time torch.cuda.synchronize(args.device) start = time.time() output = args.net(image) output = torch.sigmoid(output) torch.cuda.synchronize(args.device) time_list.append(time.time() - start) # save as the numpy array for plot the auc roc curve if args.roc: np_output = output.cpu().detach().numpy()[0, 0, :, :] np_label = label.cpu().detach().numpy()[0, 0, :, :] np_output = np.resize(np_output, np_label.shape) gt_list += list(np_label.flatten()) pd_list += list(np_output.flatten()) # value between 0. and 1. # calculate the metrics rows = [file_name[index]] for h in header[1:]: score = get_score(output, label, mode=h) total_metrics[h] += [score] rows.append(score) total_list.append(rows) p_bar.update(image.shape[0]) # predict and save the result image = cv2.imread(os.path.join(args.dir_img, file_name[index])) img_predict(args, image, save_path=os.path.join(args.dir_result, file_name[index])) # return the results if args.roc: np.save(os.path.join(args.dir_log, "gt.npy"), gt_list) np.save(os.path.join(args.dir_log, "pd.npy"), pd_list) for h in header[1:]: total_metrics[h] = np.round(np.mean(total_metrics[h]), 4) data = pd.DataFrame(total_list) data.to_csv( os.path.join(os.path.join(args.dir_log, 'scores.csv')), header=header, index=True, mode='w', encoding='utf-8' ) fps = np.mean(time_list) try: flops, params = get_model_complexity_info( args.net, (args.n_channels, args.height, args.width), print_per_layer_stat=False ) except RuntimeError as exception: if "out of memory" in str(exception): print("WARNING: out of memory") if hasattr(torch.cuda, 'empty_cache'): torch.cuda.empty_cache() flops, params = 0., 0. else: raise exception results = total_metrics results['fps'] = round(1.0 / fps, 0) results['flops'] = flops results['params'] = params return results
width = 224 height = 224 fd = finetune.ModifiedVGG16Model() # fd = torch.load("/data/kong/pytorch-pruning/prune/Iteration:0.pth", map_location=lambda storage, loc: storage) # model.load_state_dict("/data/kong/pytorch-pruning/final-model-prunned") # print(fd) fd.eval() fd.to(device) x = torch.randn(1, 3, width, height).to(device) from ptflops import get_model_complexity_info flops, params = get_model_complexity_info(fd.to(device), (3, width, height), print_per_layer_stat=True, as_strings=True) # print("FLOPS:", flops) # print("PARAMS:", params) string = [] # string.append(fd) string.append(f"FLOPs: {flops}\n") string.append(f"parameters: {params}\n") for i in range(5): time_time = time.time() features = fd(x) string.append("inference time: {} s \n".format(time.time() - time_time)) fopen = open("result_prune", "w+")
2): # there are two special tokens [CLS] and [SEP] inp_seq += tokenizer.pad_token # let's use pad token to form a fake # sequence for subsequent flops calculation inputs = tokenizer([inp_seq] * input_shape[0], padding=True, truncation=True, return_tensors="pt") labels = torch.tensor([1] * input_shape[0]) # Batch size input_shape[0], sequence length input_shape[128] inputs = dict(inputs) inputs.update({"labels": labels}) return inputs if __name__ == '__main__': bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForSequenceClassification.from_pretrained('bert-base-uncased') flops_count, params_count = get_model_complexity_info( model, (2, 128), as_strings=True, input_constructor=partial(bert_input_constructor, tokenizer=bert_tokenizer), print_per_layer_stat=False) print('{:<30} {:<8}'.format('Computational complexity: ', flops_count)) print('{:<30} {:<8}'.format('Number of parameters: ', params_count)) # Output: # Computational complexity: 21.74 GMac # Number of parameters: 109.48 M
def inference_npu(self, model_name, batch_size): if not torch.npu.is_available(): print("error!!! you don't have npu") return [], 0, 0 durations = [] ops = 0 opj = 0 model = pm.__dict__[model_name]() model = model.npu() input_res = (3, 224, 224) if model_name == "osnet": input_res = (3, 256, 128) elif model_name == "mgn" or model_name == "pcb" or model_name == "baseline": input_res = (3, 384, 128) elif model_name == "alphapose": input_res = (3, 256, 192) elif model_name == "st_gcn_net": input_res = (3, 256, 14) macs, params = get_model_complexity_info(model, input_res, as_strings=True, print_per_layer_stat=False, verbose=True) float_macs = transStr2Float(macs) op_num = float_macs * pow(10, 9) * 2 # 先使用paper给的值 if model_name == "efficientnet_b3": op_num = 1.8 * pow(10, 9) elif model_name == "osnet": op_num = 0.98 * pow(10,9) img_dataset = self.dataset if model_name == "osnet": img_dataset = self.dataset_osnet elif model_name == "mgn" or model_name == "pcb" or model_name == "baseline": img_dataset = self.dataset_reid elif model_name == "alphapose": img_dataset = self.dataset_pose elif model_name == "st_gcn_net": img_dataset = self.dataset_stgcn img_dataloader = DataLoader(dataset = img_dataset, batch_size = batch_size, num_workers = 4) loop_num = self.warm_up + self.infer_epoch time_sum = 0 model.eval() for step, img in enumerate(img_dataloader): img = img.npu() if step >= loop_num: break starter, ender = torch.npu.Event(enable_timing = True), torch.npu.Event(enable_timing = True) starter.record() model(img) ender.record() torch.npu.synchronize() if step >= self.warm_up: now_durations = starter.elapsed_time(ender) durations.append(now_durations) time_sum += now_durations / 1000 total_img_num = self.infer_epoch * batch_size ops = (op_num * total_img_num / time_sum) * pow(10,-9) opj = ops / self.hardware_info["NPU"] return durations, ops, opj
to_device = 'cpu' input_size = (3, 400, 400) verbose = False model_names = ['u2net', 'u2netp', 'u2net_groupconv', 'u2net_dsconv'] for name in model_names: model = get_net(name, False).to(to_device) # thop input_tensor = torch.randn(1, *input_size, device=to_device) flops, params = profile(model, (input_tensor, ), verbose=verbose) print(f"{name} flops: {flops}, params: {params}") # ptflops macs, params = get_model_complexity_info(model, input_size, as_strings=True, print_per_layer_stat=False, verbose=verbose) print(name) print('{:<30} {:<8}'.format('Computational complexity: ', macs)) print('{:<30} {:<8}'.format('Number of parameters: ', params)) # torchsummary # summary(model, input_size=input_size, device=to_device) # (channels, h, w) # U2NETP # Input size (MB): 1.83 (1, 3, 400, 400) # Forward/backward pass size (MB): 2270.41 # Params size (MB): 4.32 # Estimated Total Size (MB): 2276.56 # Computational complexity (GMac): 31.16 # Number of parameters (M): 1.13
def main(): #### options parser = argparse.ArgumentParser() parser.add_argument('-opt', type=str, help='Path to option YMAL file.') parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher') parser.add_argument('--local_rank', type=int, default=0) args = parser.parse_args() opt = option.parse(args.opt, is_train=True) #### distributed training settings if args.launcher == 'none': # disabled distributed training opt['dist'] = False rank = -1 print('Disabled distributed training.') else: opt['dist'] = True init_dist() world_size = torch.distributed.get_world_size() rank = torch.distributed.get_rank() #### loading resume state if exists if opt['path'].get('resume_state', None): # distributed resuming: all load into default GPU device_id = torch.cuda.current_device() resume_state = torch.load( opt['path']['resume_state'], map_location=lambda storage, loc: storage.cuda(device_id)) option.check_resume(opt, resume_state['iter']) # check resume options else: resume_state = None #### mkdir and loggers if rank <= 0: # normal training (rank -1) OR distributed training (rank 0) if resume_state is None: util.mkdir_and_rename( opt['path'] ['experiments_root']) # rename experiment folder if exists util.mkdirs( (path for key, path in opt['path'].items() if not key == 'experiments_root' and 'pretrain_model' not in key and 'resume' not in key)) # config loggers. Before it, the log will not work util.setup_logger('base', opt['path']['log'], 'train_' + opt['name'], level=logging.INFO, screen=True, tofile=True) util.setup_logger('val', opt['path']['log'], 'val_' + opt['name'], level=logging.INFO, screen=True, tofile=True) logger = logging.getLogger('base') logger.info(option.dict2str(opt)) # tensorboard logger if opt['use_tb_logger'] and 'debug' not in opt['name']: version = float(torch.__version__[0:3]) if version >= 1.1: # PyTorch 1.1 from torch.utils.tensorboard import SummaryWriter else: logger.info( 'You are using PyTorch {}. Tensorboard will use [tensorboardX]' .format(version)) from tensorboardX import SummaryWriter tb_logger = SummaryWriter(log_dir='../tb_logger/' + opt['name']) else: util.setup_logger('base', opt['path']['log'], 'train', level=logging.INFO, screen=True) logger = logging.getLogger('base') # convert to NoneDict, which returns None for missing keys opt = option.dict_to_nonedict(opt) #### random seed seed = opt['train']['manual_seed'] if seed is None: seed = random.randint(1, 10000) if rank <= 0: logger.info('Random seed: {}'.format(seed)) util.set_random_seed(seed) torch.backends.cudnn.benchmark = True # torch.backends.cudnn.deterministic = True #### create train and val dataloader dataset_ratio = 200 # enlarge the size of each epoch for phase, dataset_opt in opt['datasets'].items(): if phase == 'train': train_set = create_dataset(dataset_opt) train_size = int( math.ceil(len(train_set) / dataset_opt['batch_size'])) total_iters = int(opt['train']['niter']) total_epochs = int(math.ceil(total_iters / train_size)) if opt['dist']: train_sampler = DistIterSampler(train_set, world_size, rank, dataset_ratio) total_epochs = int( math.ceil(total_iters / (train_size * dataset_ratio))) else: train_sampler = None train_loader = create_dataloader(train_set, dataset_opt, opt, train_sampler) if rank <= 0: logger.info( 'Number of train images: {:,d}, iters: {:,d}'.format( len(train_set), train_size)) logger.info('Total epochs needed: {:d} for iters {:,d}'.format( total_epochs, total_iters)) elif phase == 'val': val_set = create_dataset(dataset_opt) val_loader = create_dataloader(val_set, dataset_opt, opt, None) if rank <= 0: logger.info('Number of val images in [{:s}]: {:d}'.format( dataset_opt['name'], len(val_set))) else: raise NotImplementedError( 'Phase [{:s}] is not recognized.'.format(phase)) assert train_loader is not None #### create model model = create_model(opt) flops, params = get_model_complexity_info(model.netG, (3, 480, 480), as_strings=True, print_per_layer_stat=True, verbose=True) print('{:<30} {:<8}'.format('Computational complexity: ', flops)) print('{:<30} {:<8}'.format('Number of parameters: ', params))
def resnet152(num_classes, pretrained=False, phase='train', **kwargs): """Constructs a ResNet-152 model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = ResNet(num_classes, Bottleneck, [3, 8, 36, 3], phase, **kwargs) if pretrained: model.load_state_dict(model_zoo.load_url(model_urls['resnet152'], model_dir='.'), strict=False) return model if __name__ == "__main__": model = resnet18(num_classes=2) x = torch.randn((10, 3, 300, 300)) for name, module in model.named_children(): x = module(x) print(name, x.shape) from ptflops import get_model_complexity_info img_dim = 300 flops, params = get_model_complexity_info(model, (img_dim, img_dim), as_strings=True, print_per_layer_stat=True) print('Flops: ' + flops) print('Params: ' + params)
def test(args, io): test_loader = DataLoader(ModelNet40(partition='test', num_points=args.num_points), batch_size=args.test_batch_size, shuffle=True, drop_last=False) device = torch.device("cuda" if args.cuda else "cpu") #Try to load models if args.model == 'pointnet': model = PointNet(args).to(device) elif args.model == 'dgcnn': model = DGCNN(args).to(device) elif args.model == 'ssg': model = PointNet2SSG(output_classes=40, dropout_prob=0) model.to(device) elif args.model == 'msg': model = PointNet2MSG(output_classes=40, dropout_prob=0) model.to(device) elif args.model == 'ognet': # [64,128,256,512] model = Model_dense(20, args.feature_dims, [512], output_classes=40, init_points=768, input_dims=3, dropout_prob=args.dropout, id_skip=args.id_skip, drop_connect_rate=args.drop_connect_rate, cluster='xyzrgb', pre_act=args.pre_act, norm=args.norm_layer) if args.efficient: model = ModelE_dense(20, args.feature_dims, [512], output_classes=40, init_points=768, input_dims=3, dropout_prob=args.dropout, id_skip=args.id_skip, drop_connect_rate=args.drop_connect_rate, cluster='xyzrgb', pre_act=args.pre_act, norm=args.norm_layer, gem=args.gem, ASPP=args.ASPP) model.to(device) elif args.model == 'ognet-small': # [48,96,192,384] model = Model_dense(20, args.feature_dims, [512], output_classes=40, init_points=768, input_dims=3, dropout_prob=args.dropout, id_skip=args.id_skip, drop_connect_rate=args.drop_connect_rate, cluster='xyzrgb', pre_act=args.pre_act, norm=args.norm_layer) model.to(device) else: raise Exception("Not implemented") try: model.load_state_dict(torch.load(args.model_path)) except: model = nn.DataParallel(model) model.load_state_dict(torch.load(args.model_path)) model = model.eval() model = model.module batch0, label0 = next(iter(test_loader)) batch0 = batch0[0].unsqueeze(0) print(batch0.shape) print(model) macs, params = get_model_complexity_info(model, batch0, ((1024, 3)), as_strings=True, print_per_layer_stat=False, verbose=True) print('{:<30} {:<8}'.format('Computational complexity: ', macs)) print('{:<30} {:<8}'.format('Number of parameters: ', params)) test_acc = 0.0 count = 0.0 test_true = [] test_pred = [] for data, label in test_loader: data, label = data.to(device), label.to(device).squeeze() batch_size = data.size()[0] if args.model == 'ognet' or args.model == 'ognet-small' or args.model == 'ssg' or args.model == 'msg': logits = model(data, data) #logits = model(1.1*data, 1.1*data) else: data = data.permute(0, 2, 1) logits = model(data) preds = logits.max(dim=1)[1] test_true.append(label.cpu().numpy()) test_pred.append(preds.detach().cpu().numpy()) test_true = np.concatenate(test_true) test_pred = np.concatenate(test_pred) test_acc = metrics.accuracy_score(test_true, test_pred) avg_per_class_acc = metrics.balanced_accuracy_score(test_true, test_pred) outstr = 'Test :: test acc: %.6f, test avg acc: %.6f' % (test_acc, avg_per_class_acc) io.cprint(outstr)
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.save): mkdir_p(args.save) # Data print('==> Preparing dataset %s' % args.dataset) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) if args.dataset == 'cifar10': dataloader = datasets.CIFAR10 elif args.dataset == 'cifar100': dataloader = datasets.CIFAR100 else: raise ValueError( 'Expect dataset to be either CIFAR-10 or CIFAR-100 but got {}'. format(args.dataset)) trainset = dataloader(root='./data', train=True, download=True, transform=transform_train) trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers) testset = dataloader(root='./data', train=False, download=False, transform=transform_test) testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers) # Model print("==> creating model '{}'".format(args.arch)) model = arch_module.__dict__[args.arch](dataset=args.dataset) cudnn.benchmark = True print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma) # Resume title = 'cifar-10-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.save = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_prec1'] start_epoch = checkpoint['epoch'] model = arch_module.__dict__[args.arch](dataset=args.dataset, cfg=checkpoint['cfg']) # load the state dict of saved checkpoint # turn the flag off to train from scratch if args.load_model: print('===> Resuming the state dict of saved model') model.load_state_dict(checkpoint['state_dict']) else: print('===> Skip loading state dict of saved model') # finetune a pruned network if args.load_optimizer and ('optimizer' in checkpoint.keys()): print('===> Resuming the state dict of saved checkpoint') optimizer.load_state_dict(checkpoint['optimizer']) else: print('===> Skip loading the state dict of saved optimizer') # if the log file is already exist then append the log to it if os.path.isfile('log.txt'): logger = Logger(os.path.join(args.save, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.save, 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) else: # training from scratch logger = Logger(os.path.join(args.save, 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) if use_cuda: model = model.cuda() # evaluate the results on test set if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(testloader, model, criterion, start_epoch, use_cuda) inp = torch.rand(1, 3, 32, 32) if use_cuda: inp = inp.cuda() flops, params = get_model_complexity_info(model, (3, 32, 32), as_strings=True, print_per_layer_stat=True) print('{:<30} {:<8}'.format('Computational complexity: ', flops)) print('{:<30} {:<8}'.format('Number of parameters: ', params)) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): current_lr = next(iter(optimizer.param_groups))['lr'] print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, current_lr)) train_loss, train_acc = train(trainloader, model, criterion, optimizer, lr_scheduler, epoch, use_cuda) test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda) # append logger file logger.append([current_lr, train_loss, test_loss, train_acc, test_acc]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': test_acc, 'optimizer': optimizer.state_dict(), 'cfg': model.cfg }, is_best, checkpoint=args.save) logger.close() logger.plot() savefig(os.path.join(args.save, 'log.eps')) print('Best acc:') print(best_acc)
if 'classifier' in i: continue self.state_dict()[i].copy_(param_dict[i]) def load_pretrained_model(model, weight_path): if weight_path != None: _ = model.load_state_dict(weight_path) return state_dict = model_zoo.load_url(PretrainedURL, map_location=torch.device("cpu")) _ = model.load_state_dict(state_dict) def baseline(pretrained = False, weight_path = None): model = Baseline() weights_init_kaiming(model) if pretrained: load_pretrained_model(model, weight_path) return model if __name__ == "__main__": model = baseline(pretrained=False) model.eval() img = torch.randn(1,3,384,128) start_time = time.time() out = model(img) end_time = time.time() duration = (end_time - start_time) * 1000 print("duration is ", duration) macs, params = get_model_complexity_info(model, (3, 384, 128), print_per_layer_stat=False) print("another mac is ", macs) macs, params = profile(model, inputs=(img, )) print("the mac is ", macs)
# Author:Han # @Time : 2019/5/20 17:23 import torch from torchvision import models from ptflops import get_model_complexity_info device = torch.device("cpu") net = models.AlexNet() flops, params = get_model_complexity_info(net, (3, 224, 224), True, True) print('Flops:' + flops) print('Params:' + params) model = str(net) # 将模型强制转换成字符串以便写入文件 file = open("Alexnet.txt", 'w') file.write(model) #将参数写入文件
def main(args): global best_acc transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) val_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4) print("number of batches are ", len(train_loader)) if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) model = torchvision.models.resnet18(pretrained=True) model.fc = nn.Linear(in_features=512, out_features=10, bias=True) flops, params = get_model_complexity_info(model, (3, 32, 32), as_strings=True, print_per_layer_stat=False) print("FLOPs in original resnet18 model are ", flops) print("Number of Params in original resnet18 model are", params) if args.enable_octave: make_octconv_net(model) flops, params = get_model_complexity_info(model, (3, 32, 32), as_strings=True, print_per_layer_stat=False) print("FLOPs in OctConv resnet18 model are ", flops) print("Number of Params in OctConv resnet18 model are", params) # print(model) # model = model.cuda() model = torch.nn.DataParallel(model).cuda() # checkpoint = torch.load("checkpoint/model_best.pth.tar") # model.load_state_dict(checkpoint['state_dict']) # summary(model, (3,32,32)) # criterion = FocalLoss() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=10, verbose=True) if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) args.lr = checkpoint['optimizer']['param_groups'][0]['lr'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # logger = Logger(join(args.checkpoint, 'log.txt'), title=title, resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) # validation if args.evaluate: validate(val_loader, model, criterion) return gamma = args.gamma lr = args.lr schedule = args.schedule for epoch in range(args.start_epoch, args.epochs): lr = optimizer.state_dict()['param_groups'][0]['lr'] print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) train_loss, train_acc = train(train_loader, model, optimizer, criterion) valid_loss, valid_acc = validate(val_loader, model, criterion) print(" val loss ", valid_loss) print(" val Accuracy ", valid_acc) is_best = valid_acc > best_acc best_acc = max(valid_acc, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) if (args.enable_octave): f = open("log_cifar10_resnet_octave_conv_.txt", "a") else: f = open("log_cifar10_resnet_vanilla_conv_.txt", "a") f.write('Train FP epoch: [{0}]\t' 'Train loss {train_loss:.3f} \t' 'Train Accuracy {train_acc:.3f} \t' 'Val loss {valid_loss:.3f} \t' 'Val Accuracy {valid_acc:.3f} \t' 'LR {lr} \n'.format(epoch, train_loss=train_loss, train_acc=train_acc, valid_loss=valid_loss, valid_acc=valid_acc, lr=lr)) f.close() scheduler.step(valid_acc)
kernel_size=1, stride=1, padding=0) self.scale = scale def forward(self, x): fm = self.conv_3x3(x) # fm = self.dropout(fm) output = self.conv_1x1(fm) if self.scale > 1: output = F.interpolate(output, scale_factor=self.scale, mode='bilinear', align_corners=True) return output if __name__ == "__main__": model = BiSeNet(19, is_training=False, criterion=None, ohem_criterion=None) print(model) from ptflops import get_model_complexity_info with torch.cuda.device(0): flops, params = get_model_complexity_info(model, input_res=(3, 1024, 2048), as_strings=True, print_per_layer_stat=True) print('Flops: ' + flops) print('Params: ' + params)
model = torch.nn.DataParallel(model, device_ids=opt.gpu_ids).cuda() model.load_state_dict(torch.load(model_path, map_location=dev)) model.module.proj_output = nn.Sequential() model.module.classifier = nn.Sequential() if opt.npart > 1: for i in range(opt.npart): model.module.proj_outputs[i] = nn.Sequential() print(model_path) batch0, label0 = next(iter(query_loader)) batch0 = batch0[0].unsqueeze(0) print(batch0.shape) macs, params = get_model_complexity_info(model, batch0, ((round(6890 * opt.slim), 3)), as_strings=True, print_per_layer_stat=False, verbose=True) #print(macs) print('{:<30} {:<8}'.format('Computational complexity: ', macs)) print('{:<30} {:<8}'.format('Number of parameters: ', params)) #model_parameters = filter(lambda p: p.requires_grad, model.parameters()) #params = sum([np.prod(p.size()) for p in model_parameters]) #print('Number of parameters: %.2f M'% (params/1e6) ) if not os.path.exists('./snapshot/'): os.mkdir('./snapshot/') save_model_path = './snapshot/' + opt.name if not os.path.exists(save_model_path): os.mkdir(save_model_path)