Beispiel #1
0
def test(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)

    args = merge_args_from_train(args.save_dir, args)
    model_name = args.model_name + '-bz' + str(args.bz) + '_ep' + str(
        args.ep) + '_' + args.n_loss
    if '_t' in model_name:
        model_name = model_name + '_' + args.t_loss
    if args.mode == 'n2n':
        model_name = model_name + '-' + args.mode
    if not os.path.exists(args.save_dir):
        args.save_dir = 'results2' + args.h5_dir.replace(
            '/', '_') + '/' + args.noise_name + '/' + model_name
    print('existing save_dir:', args.save_dir)

    # source_h5_path_test = args.data_root + '/' + args.h5_dir + '/' + args.mode + '_' + args.noise_name + '_' + args.test_set + '_noised_test.h5'
    # target_h5_path_test = args.data_root + '/' + args.h5_dir + '/' + args.mode + '_' + args.noise_name + '_' + args.test_set + '_clean_test.h5'
    source_h5_path_test = '/home/ipsg/code/sx/datasets/infread/images/n2c_infreadEN_noised_test.h5'
    target_h5_path_test = '/home/ipsg/code/sx/datasets/infread/images/n2c_infreadEN_clean_test.h5'
    test_set = dataset_img2img(source_h5_path_test, target_h5_path_test)
    print('source_h5_path_test:', source_h5_path_test)

    model_path = args.save_dir + '/' + str(args.test_ep) + '.pth'
    state_dict = torch.load(model_path)
    net = choose_model(args.model_name, 'test')
    print('loading:', model_path)

    net.load_state_dict(state_dict)
    net.eval()
    net.cuda()

    with torch.no_grad():
        f, p = get_model_complexity_info(net, (3, 480, 640),
                                         as_strings=True,
                                         print_per_layer_stat=False,
                                         verbose=False)
    print('FLOPs:', f, 'Parms:', p)

    test_psnr = 0
    test_ssim = 0
    fw_times = []
    for i, pair in enumerate(test_set):
        with torch.no_grad():
            batch_datas = pair[0].unsqueeze(0).cuda()
            batch_labels = pair[1].unsqueeze(0).cuda()
            fw_s = time.clock()
            batch_inferences = net(batch_datas)
            fw_time = time.clock() - fw_s
            fps = np.round(1 / fw_time, 3)
            fw_times.append(fw_time)
            # print(batch_datas.shape, batch_labels.shape, batch_inferences.shape)
            psnr_batch = batch_PSNR(batch_inferences,
                                    batch_labels,
                                    data_range=1.0)
            test_psnr += psnr_batch
            ssim_batch = ssim(batch_inferences,
                              batch_labels,
                              data_range=1.0,
                              win_size=11).item()
            test_ssim += ssim_batch
            print('image:{}, fps:{}, psnr:{}, ssim:{}'.format(
                i, fps, psnr_batch, ssim_batch))

            inference = np.array(
                batch_inferences.cpu().squeeze(0).permute(1, 2, 0) *
                255).astype('uint8')
            source = np.array(batch_datas.cpu().squeeze(0).permute(1, 2, 0) *
                              255).astype('uint8')
            target = np.array(batch_labels.cpu().squeeze(0).permute(1, 2, 0) *
                              255).astype('uint8')
            result = cv2.hconcat([source, inference, target])

            if args.save_images:
                save_images_dir = args.save_dir + '/test_images_from_' + args.h5_dir.replace(
                    '/', '_') + '_ep' + str(args.test_ep)
                if not os.path.exists(save_images_dir):
                    os.makedirs(save_images_dir)
                cv2.imwrite(
                    save_images_dir + '/' + args.noise_name + '_clean' +
                    str(i) + '.jpg', target,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100])
                cv2.imwrite(
                    save_images_dir + '/' + args.noise_name + '_noised' +
                    str(i) + '.jpg', inference,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100])
                cv2.imwrite(
                    save_images_dir + '/' + args.noise_name + '_concat' +
                    str(i) + '.jpg', result,
                    [int(cv2.IMWRITE_JPEG_QUALITY), 100])
            if args.show_images:
                plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
                plt.pause(0.2)

    fw_fps = 1 / np.mean(fw_times)
    test_psnr /= len(test_set)
    test_ssim /= len(test_set)
    print('fw_fps:{}, psnr:{}, ssim:{}'.format(fw_fps, test_psnr, test_ssim))

    test_info = vars(args)
    test_info.setdefault('fw_fps', fw_fps)
    test_info.setdefault('psnr', test_psnr)
    test_info.setdefault('ssim', test_ssim)
    with open(os.path.join(Path(model_path).parent, 'test_info.json'),
              'w') as f:
        json.dump(test_info, f, indent=2)
Beispiel #2
0
        self.conv_1_2 = DNCNN2(in_c, phase)

    def forward(self, x):
        if self.phase == 'train':
            x1, _ = self.conv_1_1(x)
            x1, _ = self.conv_1_2(x1)
            return x1, None
        else:
            x1 = self.conv_1_1(x)
            x1 = self.conv_1_2(x1)
            return x1


if __name__ == '__main__':
    from ptflops import get_model_complexity_info
    import time
    with torch.no_grad():
        net = DRCNN(3, 'test').cuda()

        f, p = get_model_complexity_info(net, (3, 480, 640),
                                         as_strings=True,
                                         print_per_layer_stat=False,
                                         verbose=False)
        print('FLOPs:', f, 'Parms:', p)

        x = torch.randn(1, 3, 480, 640).cuda()
        s = time.clock()
        y = net(x)
        print(y.shape, 1 / (time.clock() - s))
Beispiel #3
0
    def pruning_and_training(self,
                             testloader,
                             trainloader,
                             batch_size=128,
                             epoch=1,
                             lr=0.001):
        for it in range(self.max_iter):
            best_acc = -1000
            print(
                '\n[1] PRUNING | ITER : {}/{}-----------------------------------------------------------'
                .format(it + 1, self.max_iter))
            print(
                '\n=> Pruning Net... | Layer1 : {}% Layer2 : {}% Layer3 : {}%'.
                format(self.P[0] * 100, self.P[1] * 100, self.P[2] * 100))
            self.HardPruning()
            self.model.train()
            flops, params = get_model_complexity_info(
                self.model, (3, 32, 32),
                as_strings=True,
                print_per_layer_stat=False)
            print('{:<30}  {:<8}'.format('Computational complexity: ', flops))
            print('{:<30}  {:<8}'.format('Number of parameters: ', params))
            print(
                '\n[2] FINE TUNING----------------------------------------------------------------------'
            )
            for e in range(epoch):
                train_loss = 0
                correct = 0
                total = 0
                optimizer = optim.SGD(self.model.parameters(),
                                      lr=self.learning_rate(e, lr),
                                      momentum=0.9)
                criterion = nn.CrossEntropyLoss()
                for batch_idx, (inputs, targets) in enumerate(trainloader):
                    inputs, targets = inputs.cuda(), targets.cuda()
                    optimizer.zero_grad()
                    inputs, targets = Variable(inputs), Variable(targets)
                    outputs = self.model(inputs)
                    loss = criterion(outputs, targets)
                    loss.backward()
                    optimizer.step()
                    total += targets.size(0)
                    predicted = torch.max(outputs.data, 1)[1]
                    train_loss += loss.item()
                    correct += predicted.eq(targets.data).cpu().sum()
                    sys.stdout.write('\r')
                    sys.stdout.write('Trainable params [{}]'.format(params))
                    sys.stdout.write(
                        '| Iteration [%3d] Epoch [%3d/%3d] Iter [%3d/%3d] LR [%3d] \t\tLoss: %.4f Acc@1: %.3f%%'
                        % (it + 1, e + 1, epoch, batch_idx + 1, 391,
                           self.learning_rate(
                               e, lr), loss.item(), 100. * correct / total))
                    sys.stdout.flush()

                self.model.eval()
                self.model.training = False
                test_loss = 0
                correct = 0
                total = 0
                criterion = nn.CrossEntropyLoss()
                with torch.no_grad():
                    for batch_idx, (inputs, targets) in enumerate(testloader):
                        inputs, targets = inputs.cuda(), targets.cuda()
                        inputs, targets = Variable(inputs), Variable(targets)
                        outputs = self.model(inputs)
                        loss = criterion(outputs, targets)
                        test_loss += loss.item()
                        predicted = torch.max(outputs.data, 1)[1]
                        total += targets.size(0)
                        correct += predicted.eq(targets.data).cpu().sum()

        # Save checkpoint when best model
                    acc = 100. * correct / total
                    print('\n | Test {} '.format(acc))
                    if acc > best_acc:
                        print('| New Best Accuracy...\t\t\tTop1 = %.2f%%' %
                              (acc))
                        print('| Saving Pruned Model...')
                        torch.save(self.model, "wide_resnet_iter_hard.pth")
                        best_acc = acc
            self.best_acc.append(best_acc.item())
            self.net_weights.append(self.number_of_trainable_params(
                self.model))
Beispiel #4
0
from ptflops import get_model_complexity_info

from model.segmentation.deeplabV3_plus import DeepLabV3_plus

if __name__ == '__main__':
    print(
        '================================================================================'
    )
    print('DeepLab V3+, ResNet, 513x513')
    print(
        '================================================================================'
    )
    model = DeepLabV3_plus(pretrained=True)
    flops, params = get_model_complexity_info(model, (3, 513, 513),
                                              verbose=True)

    print('{:<30}  {:<8}'.format('Computational complexity: ', flops))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))

    print(
        '================================================================================'
    )
    print('DeepLab V3+, ResNet, 769x769')
    print(
        '================================================================================'
    )
    model = DeepLabV3_plus(pretrained=True)
    flops, params = get_model_complexity_info(model, (3, 769, 769),
                                              verbose=True)

    print('{:<30}  {:<8}'.format('Computational complexity: ', flops))
Beispiel #5
0
        10: {
            'conv': [1, 2],
            'rate': 1
        },
        11: {
            'conv': [1, 2],
            'rate': 0
        }
    }

    model = SuperNetwork(shadow_bn=False, layers=12, classes=10)
    print(model)
    input = torch.randn(3, 32, 32).unsqueeze(0)
    print(model(input, choice))  # (1, 10)

    #
    # params = list(model.parameters())
    # p_s = params[1].size()
    # model.conv1.zero_grad()
    # model.conv1.weight.grad()

    import torch
    from ptflops import get_model_complexity_info
    with torch.cuda.device(0):
        # choice is added
        flops, params = get_model_complexity_info(model, (3, 32, 32),
                                                  as_strings=True,
                                                  print_per_layer_stat=True)
        print('{:<30}  {:<8}'.format('Computational complexity: ', flops))
        print('{:<30}  {:<8}'.format('Number of parameters: ', params))
Beispiel #6
0
override_prev_results = args.override
project_name = args.project
weights_path = f'weights/efficientdet-d{compound_coef}.pth' if args.weights is None else args.weights

print(f'running coco-style evaluation on project {project_name}, weights {weights_path}...')

params = yaml.safe_load(open(f'projects/{project_name}.yml'))

obj_list = params['obj_list_fushusheshi']




input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]

det_save_odgt = f'val_result/fushusheshi_4class_d3_lr_1e-3_only_bad_epoch260_1201.odgt'
det_save_eval_log_txt = f'val_result/fushusheshi_4class_d3_lr_1e-3_only_bad_epoch260_1201.txt'
threshold=0.05

if __name__ == '__main__':

    with torch.cuda.device(0):
        model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                                        ratios=eval(params['anchors_ratios']), scales=eval(params['anchors_scales']))
        macs, params = get_model_complexity_info(model, (3, input_sizes[compound_coef], input_sizes[compound_coef]), as_strings=True,
                                            print_per_layer_stat=True, verbose=True)
        print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
        print('{:<30}  {:<8}'.format('Number of parameters: ', params))


Beispiel #7
0
def train():
    DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    writer = SummaryWriter(config.LOG_ROOT)

    train_transform = transforms.Compose([
        transforms.RandomApply(
            [transforms.RandomResizedCrop(112, scale=(0.95, 1),
                                          ratio=(1, 1))]),
        transforms.Resize(112),
        transforms.RandomHorizontalFlip(),
        transforms.RandomGrayscale(0.01),
        transforms.ToTensor(),
        transforms.Normalize(mean=config.RGB_MEAN, std=config.RGB_STD),
    ])

    dataset_train = ImageFolder(config.TRAIN_FILES, train_transform)
    train_loader = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=config.BATCH_SIZE,
                                               pin_memory=True,
                                               shuffle=True,
                                               num_workers=8,
                                               drop_last=True)

    NUM_CLASS = train_loader.dataset.classes
    print("Number of Training Classes: {}".format(NUM_CLASS))

    BACKBONE = ResNet(num_layers=100, feature_dim=512)
    flops, params = get_model_complexity_info(BACKBONE, (3, 112, 112),
                                              as_strings=True,
                                              print_per_layer_stat=False)
    print('BACKBONE FLOPs:', flops)
    print('BACKBONE PARAMS:', params)

    PRETRAINED_BACKBONE = None
    PRETRAINED_QUALITY = None

    if os.path.isfile(config.PRETRAINED_BACKBONE) and os.path.isfile(
            config.PRETRAINED_QUALITY):
        PRETRAINED_BACKBONE = ResNet(num_layers=100, feature_dim=512)
        PRETRAINED_QUALITY = FaceQuality(512 * 7 * 7)
        checkpoint = torch.load(config.PRETRAINED_BACKBONE)
        load_state_dict(PRETRAINED_BACKBONE, checkpoint)
        PRETRAINED_BACKBONE = nn.DataParallel(PRETRAINED_BACKBONE,
                                              device_ids=config.BACKBONE_GPUS)
        PRETRAINED_BACKBONE = PRETRAINED_BACKBONE.cuda(0)
        PRETRAINED_BACKBONE.eval()

        checkpoint = torch.load(config.PRETRAINED_QUALITY)
        load_state_dict(PRETRAINED_QUALITY, checkpoint)
        PRETRAINED_QUALITY = nn.DataParallel(PRETRAINED_QUALITY,
                                             device_ids=config.BACKBONE_GPUS)
        PRETRAINED_QUALITY = PRETRAINED_QUALITY.cuda(0)
        PRETRAINED_QUALITY.eval()

    HEAD = GaussianFace(in_features=config.EMBEDDING_SIZE,
                        out_features=NUM_CLASS)
    LOSS = FocalLoss()
    # optionally resume from a checkpoint
    if config.BACKBONE_RESUME_ROOT and config.HEAD_RESUME_ROOT:
        print("=" * 60)
        if os.path.isfile(config.BACKBONE_RESUME_ROOT):
            print("Loading Backbone Checkpoint '{}'".format(
                config.BACKBONE_RESUME_ROOT))
            checkpoint = torch.load(config.BACKBONE_RESUME_ROOT)
            load_state_dict(BACKBONE, checkpoint)
        else:
            print(
                "No Checkpoint Found at '{}' Please Have a Check or Continue to Train from Scratch"
                .format(config.BACKBONE_RESUME_ROOT))
        if os.path.isfile(config.HEAD_RESUME_ROOT):
            print("Loading Head Checkpoint '{}'".format(
                config.HEAD_RESUME_ROOT))
            checkpoint = torch.load(config.HEAD_RESUME_ROOT)
            load_state_dict(HEAD, checkpoint)
        else:
            print(
                "No Checkpoint Found at '{}' Please Have a Check or Continue to Train from Scratch"
                .format(config.HEAD_RESUME_ROOT))
        print("=" * 60)

    BACKBONE = nn.DataParallel(BACKBONE,
                               device_ids=config.BACKBONE_GPUS,
                               output_device=config.BACKBONE_GPUS[-1])
    BACKBONE = BACKBONE.cuda(config.BACKBONE_GPUS[0])
    HEAD = nn.DataParallel(HEAD,
                           device_ids=config.HEAD_GPUS,
                           output_device=config.HEAD_GPUS[0])
    HEAD = HEAD.cuda(config.HEAD_GPUS[0])
    OPTIMIZER = optim.SGD([{
        'params': BACKBONE.parameters(),
        'lr': config.BACKBONE_LR,
        'weight_decay': config.WEIGHT_DECAY
    }, {
        'params': HEAD.parameters(),
        'lr': config.BACKBONE_LR
    }],
                          momentum=config.MOMENTUM)
    DISP_FREQ = len(train_loader) // 100

    NUM_EPOCH_WARM_UP = config.NUM_EPOCH_WARM_UP
    NUM_BATCH_WARM_UP = len(train_loader) * NUM_EPOCH_WARM_UP
    batch = 0
    step = 0

    scheduler = CosineDecayLR(OPTIMIZER,
                              T_max=10 * len(train_loader),
                              lr_init=config.BACKBONE_LR,
                              lr_min=1e-5,
                              warmup=NUM_BATCH_WARM_UP)
    for epoch in range(config.NUM_EPOCH):
        BACKBONE.train()
        HEAD.train()
        arcface_losses = AverageMeter()
        confidences = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        scaler = torch.cuda.amp.GradScaler()
        for inputs, labels in tqdm(iter(train_loader)):
            inputs = inputs.cuda(config.BACKBONE_GPUS[0])
            labels = labels.cuda(config.HEAD_GPUS[0])
            with torch.cuda.amp.autocast():
                features = BACKBONE(inputs)
                if PRETRAINED_BACKBONE is None or PRETRAINED_QUALITY is None:
                    outputs = HEAD(None, features.cuda(config.HEAD_GPUS[0]),
                                   labels, False)
                else:
                    with torch.no_grad():
                        _, fc = PRETRAINED_BACKBONE(inputs, True)
                        quality = PRETRAINED_QUALITY(fc)
                    outputs = HEAD(quality.cuda(config.HEAD_GPUS[0]),
                                   features.cuda(config.HEAD_GPUS[0]), labels,
                                   True)
            # measure accuracy and record loss
            arcface_loss = LOSS(outputs, labels)
            prec1, prec5 = accuracy(outputs.data, labels, topk=(1, 5))
            arcface_losses.update(arcface_loss.data.item(), inputs.size(0))
            top1.update(prec1.data.item(), inputs.size(0))
            top5.update(prec5.data.item(), inputs.size(0))
            loss = arcface_loss
            # compute gradient and do SGD step
            OPTIMIZER.zero_grad()
            #loss.backward()
            #OPTIMIZER.step()
            scaler.scale(loss).backward()
            scaler.step(OPTIMIZER)
            scaler.update()
            if ((batch + 1) % DISP_FREQ == 0) and batch != 0:
                print("=" * 60)
                print(
                    'Epoch {}/{} Batch {}/{}\t'
                    'Training Loss {arcface_loss.val:.4f} ({arcface_loss.avg:.4f})\t'
                    'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                    'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                        epoch + 1,
                        config.NUM_EPOCH,
                        batch + 1,
                        len(train_loader) * config.NUM_EPOCH,
                        arcface_loss=arcface_losses,
                        top1=top1,
                        top5=top5))
                print("=" * 60)

            batch += 1  # batch index
            scheduler.step(batch)
            if batch % 1000 == 0:
                print(OPTIMIZER)
        # training statistics per epoch (buffer for visualization)
        epoch_loss = arcface_losses.avg
        epoch_acc = top1.avg
        writer.add_scalar("Training_Loss", epoch_loss, epoch + 1)
        writer.add_scalar("Training_Accuracy", epoch_acc, epoch + 1)
        print("=" * 60)
        print('Epoch: {}/{}\t'
              'Training Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
              'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                  epoch + 1,
                  config.NUM_EPOCH,
                  loss=arcface_losses,
                  top1=top1,
                  top5=top5))
        print("=" * 60)

        # save checkpoints per epoch
        curTime = get_time()
        if not os.path.exists(config.MODEL_ROOT):
            os.makedirs(config.MODEL_ROOT)
        torch.save(
            BACKBONE.state_dict(),
            os.path.join(
                config.MODEL_ROOT,
                "Backbone_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(
                    epoch + 1, batch, curTime)))
        torch.save(
            HEAD.state_dict(),
            os.path.join(
                config.MODEL_ROOT,
                "Head_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(
                    epoch + 1, batch, curTime)))
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.save_dir):
        mkdir_p(args.save_dir)

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100

    trainset = dataloader(root='./data', train=True, download=True, transform=transform_train)
    trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers)

    testset = dataloader(root='./data', train=False, download=False, transform=transform_test)
    testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))
    
    model = models.__dict__[args.arch](dataset=args.dataset, depth=args.depth, reduction=args.reduction)

    print(model)    

    if args.cuda:
        model.cuda()

    print('    Total params: %.2f' % (sum(p.numel() for p in model.parameters())))

    with torch.cuda.device(0):
      net = model
      flops, params = get_model_complexity_info(net, (3, 32,32), as_strings=True, print_per_layer_stat=True)
      print('Flops:  ' + flops)
      print('Params: ' + params)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)


    # Resume
    title = 'cifar-10-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        args.save_dir = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.save_dir, 'log.txt'), title=title, resume=True)
    else:
        logger = Logger(os.path.join(args.save_dir, 'log.txt'), title=title)
        logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch, args.cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.gamma, args.schedule)

        print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, args.cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch, args.cuda)

        # append logger file
        logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer' : optimizer.state_dict(),
            }, is_best, checkpoint=args.save_dir)

    logger.close()

    print('Best acc:')
    print(best_acc)
    rgb = torch.FloatTensor(4, 6890, 3).cuda()
    net = net.cuda()
    print(net)
    net.proj_output = nn.Sequential()
    model_parameters = filter(lambda p: p.requires_grad, net.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print('Number of parameters: %.2f M' % (params / 1e6))
    #output = net(xyz, rgb)
    market_data = Market3D('./2DMarket', flip=True, slim=0.25, bg=True)

    CustomDataLoader = partial(DataLoader,
                               num_workers=0,
                               batch_size=8,
                               shuffle=True,
                               drop_last=True)
    query_loader = CustomDataLoader(market_data.query())
    batch0, label0 = next(iter(query_loader))
    batch0 = batch0[0].unsqueeze(0)
    print(batch0.shape)
    macs, params = get_model_complexity_info(net,
                                             batch0.cuda(),
                                             ((round(6890 * 0.5), 3)),
                                             as_strings=True,
                                             print_per_layer_stat=False,
                                             verbose=True)
    #print(macs)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))

    #print(output.shape)
Beispiel #10
0
        d0 = self.outconv(torch.cat((d1, d2, d3, d4, d5, d6), 1))
        # d00 = d0 + self.refconv(d0)

        return F.sigmoid(d0), F.sigmoid(d1), F.sigmoid(d2), F.sigmoid(
            d3), F.sigmoid(d4), F.sigmoid(d5), F.sigmoid(d6)


if __name__ == '__main__':
    from thop import profile
    from thop import clever_format
    from ptflops import get_model_complexity_info
    os.environ['CUDA_VISIBLE_DEVICES'] = '3'
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # model = MyselfUnet3d_justdeepvision(1, 1, deepvision=True).to(device)
    model = U2NET(1, 1).cuda()
    # model = nn.DataParallel(model,device_ids=[0])
    params = sum(param.numel() for param in model.parameters()) / 1e6
    print(params)

    macs, params = get_model_complexity_info(model, (1, 19, 256, 256),
                                             as_strings=True,
                                             print_per_layer_stat=False,
                                             verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))
    # assert 1>3
    input = torch.randn(1, 1, 19, 256, 256).to(device)
    macs, params = profile(model, inputs=(input, ))
    macs, params = clever_format([macs, params], "%.3f")
    print(macs, params)
Beispiel #11
0
    # 获得输入图片的size
    input_res = (3, 224, 224)
    if model_name == "osnet":
        input_res = (3, 256, 128)
    elif model_name == "mgn" or model_name == "pcb" or model_name == "baseline":
        input_res = (3, 384, 128)
    elif model_name == "alphapose":
        input_res = (3, 256, 192)
    elif model_name == "st_gcn_net":
        input_res = (3, 256, 14)
    elif model_name == "matmul256":
        input_res = (1, 256, 256)
    elif model_name == "matmul1024":
        input_res = (1, 1024, 1024)
    elif model_name == "matmul4096":
        input_res = (1, 4096, 4096)

    # 获得模型的op
    macs, _ = get_model_complexity_info(model, input_res, as_strings=True, 
                                        print_per_layer_stat=False, verbose=True)                                                
    float_macs = transStr2Float(macs)
    op_num = float_macs * 2
    # 先使用paper给的值
    if model_name == "efficientnet_b3":
        op_num = 1.8
    elif model_name == "osnet":
        op_num = 0.98
    op_dir[model_name] = op_num

print(op_dir)
Beispiel #12
0
import torch
from ptflops import get_model_complexity_info
from ptsemseg.models.FASSDNet import FASSDNet

with torch.cuda.device(0):
    net = FASSDNet(19)
    flops, params = get_model_complexity_info(net, (3, 512, 1024),
                                              as_strings=True,
                                              print_per_layer_stat=True)
    print('Flops:  ' + flops)
    print('Params: ' + params)
                      int(n_ch * compress_factor),
                      kernel_size=1,
                      bias=False),
            nn.AvgPool2d(kernel_size=2, stride=2)
        ]
        self.layer = nn.Sequential(*layer)

    def forward(self, x):
        return self.layer(x)


class View(nn.Module):
    def __init__(self, *shape):
        super(View, self).__init__()
        self.shape = shape

    def forward(self, x):
        return x.view(x.shape[0], *self.shape)


if __name__ == '__main__':
    from ptflops import get_model_complexity_info
    densenet_bc = DenseNetBC(depth=100,
                             growth_rate=12,
                             n_classes=100,
                             efficient=False)
    flops, params = get_model_complexity_info(densenet_bc, (3, 32, 32),
                                              as_strings=False,
                                              print_per_layer_stat=False)
    print("flops: {}, params: {}".format(flops, params))
    def forward(self, x):
        x = self.layer1(x)
        # out = self.layer2(x)
        return x


def get_parameter_number(net):
    total_num = sum(p.numel() for p in net.parameters())
    trainable_num = sum(p.numel() for p in net.parameters() if p.requires_grad)
    return {'Total': total_num, 'Trainable': trainable_num}


def input_constructer(input_res):
    im1 = torch.randn(size=(1, 3, 5, 224, 224))
    mv1 = torch.randn(size=(1, 2, 5, 224, 224))
    return {'inputs': [[im1, mv1], [im1, mv1]]}


if __name__ == '__main__':
    net = Model(2, 5)

    macs, params = get_model_complexity_info(
        net,
        input_res=(224, 224),
        input_constructor=input_constructer,
        as_strings=True,
        print_per_layer_stat=False,
        verbose=False)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))
Beispiel #15
0
                net.append(layer)
                self.num_channels = c
        return nn.Sequential(*net)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.activation(self.bn1(self.conv1(out)))
        out = F.avg_pool2d(out, 4)
        out = self.conv2(out)
        out = torch.flatten(out, 1)
        return out


def mobilenet_v2(activation='relu6', num_classes=10, width_multiplier=1.):
    return MobileNetV2(activation=activation,
                       num_classes=num_classes,
                       width_multiplier=width_multiplier)


if __name__ == "__main__":
    from ptflops import get_model_complexity_info

    net = mobilenet_v2()
    macs, params = get_model_complexity_info(net, (3, 32, 32),
                                             as_strings=True,
                                             print_per_layer_stat=True,
                                             verbose=True)
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
             'vgg16': models.vgg16,
             'squeezenet': models.squeezenet1_0,
             'densenet': models.densenet161,
             'inception': models.inception_v3}

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='ptflops sample script')
    parser.add_argument('--device', type=int, default=0,
                        help='Device to store the model.')
    parser.add_argument('--model', choices=list(pt_models.keys()),
                        type=str, default='resnet18')
    parser.add_argument('--result', type=str, default=None)
    args = parser.parse_args()

    if args.result is None:
        ost = sys.stdout
    else:
        ost = open(args.result, 'w')

    net = pt_models[args.model]()

    if torch.cuda.is_available():
        net.cuda(device=args.device)

    flops, params = get_model_complexity_info(net, (3, 224, 224),
                                              as_strings=True,
                                              print_per_layer_stat=True,
                                              ost=ost)
    print('{:<30}  {:<8}'.format('Computational complexity: ', flops))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))
def eval_all_dataset():
    args.net.eval()
    header = ["file_name", "iou", "dc", "pr", "acc", "sp", "se", "auc"]
    total_metrics, gt_list, pd_list, time_list, total_list = {}, [], [], [], []
    for h in header[1:]:
        total_metrics[h] = []
    file_name = get_id()

    with torch.no_grad():
        with tqdm(total=n_test, desc='Test', unit='img', leave=False) as p_bar:
            for index, batch in enumerate(test_loader):
                # load the picture
                image, label = batch['image'], batch['label']
                image = image.to(device=args.device, dtype=torch.float32)
                label = label.to(device=args.device, dtype=torch.float32)

                # statistics inference time
                torch.cuda.synchronize(args.device)
                start = time.time()
                output = args.net(image)
                output = torch.sigmoid(output)
                torch.cuda.synchronize(args.device)
                time_list.append(time.time() - start)

                # save as the numpy array for plot the auc roc curve
                if args.roc:
                    np_output = output.cpu().detach().numpy()[0, 0, :, :]
                    np_label = label.cpu().detach().numpy()[0, 0, :, :]
                    np_output = np.resize(np_output, np_label.shape)
                    gt_list += list(np_label.flatten())
                    pd_list += list(np_output.flatten())  # value between 0. and 1.

                # calculate the metrics
                rows = [file_name[index]]
                for h in header[1:]:
                    score = get_score(output, label, mode=h)
                    total_metrics[h] += [score]
                    rows.append(score)
                total_list.append(rows)
                p_bar.update(image.shape[0])

                # predict and save the result
                image = cv2.imread(os.path.join(args.dir_img, file_name[index]))
                img_predict(args, image, save_path=os.path.join(args.dir_result, file_name[index]))

    # return the results
    if args.roc:
        np.save(os.path.join(args.dir_log, "gt.npy"), gt_list)
        np.save(os.path.join(args.dir_log, "pd.npy"), pd_list)
    for h in header[1:]:
        total_metrics[h] = np.round(np.mean(total_metrics[h]), 4)
    data = pd.DataFrame(total_list)
    data.to_csv(
        os.path.join(os.path.join(args.dir_log, 'scores.csv')),
        header=header,
        index=True,
        mode='w',
        encoding='utf-8'
    )
    fps = np.mean(time_list)
    try:
        flops, params = get_model_complexity_info(
            args.net,
            (args.n_channels, args.height, args.width),
            print_per_layer_stat=False
        )
    except RuntimeError as exception:
        if "out of memory" in str(exception):
            print("WARNING: out of memory")
            if hasattr(torch.cuda, 'empty_cache'):
                torch.cuda.empty_cache()
            flops, params = 0., 0.
        else:
            raise exception
    results = total_metrics
    results['fps'] = round(1.0 / fps, 0)
    results['flops'] = flops
    results['params'] = params

    return results
Beispiel #18
0
    width = 224
    height = 224

    fd = finetune.ModifiedVGG16Model()
    # fd = torch.load("/data/kong/pytorch-pruning/prune/Iteration:0.pth", map_location=lambda storage, loc: storage)
    # model.load_state_dict("/data/kong/pytorch-pruning/final-model-prunned")

    # print(fd)
    fd.eval()
    fd.to(device)
    x = torch.randn(1, 3, width, height).to(device)

    from ptflops import get_model_complexity_info

    flops, params = get_model_complexity_info(fd.to(device),
                                              (3, width, height),
                                              print_per_layer_stat=True,
                                              as_strings=True)
    # print("FLOPS:", flops)
    # print("PARAMS:", params)
    string = []
    # string.append(fd)
    string.append(f"FLOPs: {flops}\n")
    string.append(f"parameters: {params}\n")

    for i in range(5):
        time_time = time.time()
        features = fd(x)
        string.append("inference time: {} s \n".format(time.time() -
                                                       time_time))

    fopen = open("result_prune", "w+")
                   2):  # there are two special tokens [CLS] and [SEP]
        inp_seq += tokenizer.pad_token  # let's use pad token to form a fake
    # sequence for subsequent flops calculation

    inputs = tokenizer([inp_seq] * input_shape[0],
                       padding=True,
                       truncation=True,
                       return_tensors="pt")
    labels = torch.tensor([1] * input_shape[0])
    # Batch size input_shape[0], sequence length input_shape[128]
    inputs = dict(inputs)
    inputs.update({"labels": labels})
    return inputs


if __name__ == '__main__':
    bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
    flops_count, params_count = get_model_complexity_info(
        model, (2, 128),
        as_strings=True,
        input_constructor=partial(bert_input_constructor,
                                  tokenizer=bert_tokenizer),
        print_per_layer_stat=False)
    print('{:<30}  {:<8}'.format('Computational complexity: ', flops_count))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params_count))

# Output:
# Computational complexity:       21.74 GMac
# Number of parameters:           109.48 M
Beispiel #20
0
    def inference_npu(self, model_name, batch_size):
        if not torch.npu.is_available():
            print("error!!! you don't have npu")
            return [], 0, 0
            
        durations = []
        ops = 0
        opj = 0

        model = pm.__dict__[model_name]()
        model = model.npu()
        input_res = (3, 224, 224)
        if model_name == "osnet":
            input_res = (3, 256, 128)
        elif model_name == "mgn" or model_name == "pcb" or model_name == "baseline":
            input_res = (3, 384, 128)
        elif model_name == "alphapose":
            input_res = (3, 256, 192)
        elif model_name == "st_gcn_net":
            input_res = (3, 256, 14)
        macs, params = get_model_complexity_info(model, input_res, as_strings=True, 
                                        print_per_layer_stat=False, verbose=True)                                                
        float_macs = transStr2Float(macs)
        op_num = float_macs * pow(10, 9) * 2

        # 先使用paper给的值
        if model_name == "efficientnet_b3":
            op_num = 1.8 * pow(10, 9)
        elif model_name == "osnet":
            op_num = 0.98 * pow(10,9)

        img_dataset = self.dataset
        if model_name == "osnet":
            img_dataset = self.dataset_osnet
        elif model_name == "mgn" or model_name == "pcb" or model_name == "baseline":
            img_dataset = self.dataset_reid
        elif model_name == "alphapose":
            img_dataset = self.dataset_pose
        elif model_name == "st_gcn_net":
            img_dataset = self.dataset_stgcn
        img_dataloader = DataLoader(dataset = img_dataset,
                                batch_size = batch_size,
                                num_workers = 4)
        loop_num = self.warm_up + self.infer_epoch
        time_sum = 0
        model.eval()
        
        for step, img in enumerate(img_dataloader):
            img = img.npu()
            if step >= loop_num:
                break
            starter, ender = torch.npu.Event(enable_timing = True), torch.npu.Event(enable_timing = True)
            starter.record()
            model(img)
            ender.record()
            torch.npu.synchronize()
            if step >= self.warm_up:
                now_durations = starter.elapsed_time(ender)
                durations.append(now_durations)
                time_sum += now_durations / 1000
        
        total_img_num = self.infer_epoch * batch_size
        ops = (op_num * total_img_num / time_sum) * pow(10,-9)
        opj = ops / self.hardware_info["NPU"]
        
        return durations, ops, opj
Beispiel #21
0
to_device = 'cpu'
input_size = (3, 400, 400)
verbose = False

model_names = ['u2net', 'u2netp', 'u2net_groupconv', 'u2net_dsconv']

for name in model_names:
    model = get_net(name, False).to(to_device)
    
    # thop
    input_tensor = torch.randn(1, *input_size, device=to_device)
    flops, params = profile(model, (input_tensor, ), verbose=verbose)
    print(f"{name} flops: {flops}, params: {params}")
    
    # ptflops
    macs, params = get_model_complexity_info(model, input_size, as_strings=True,
                                           print_per_layer_stat=False, verbose=verbose)
    print(name)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))

    #  torchsummary
    # summary(model, input_size=input_size,  device=to_device) # (channels, h, w)

# U2NETP
# Input size (MB): 1.83 (1, 3, 400, 400)
# Forward/backward pass size (MB): 2270.41
# Params size (MB): 4.32
# Estimated Total Size (MB): 2276.56
# Computational complexity (GMac): 31.16 
# Number of parameters (M): 1.13
Beispiel #22
0
def main():
    #### options
    parser = argparse.ArgumentParser()
    parser.add_argument('-opt', type=str, help='Path to option YMAL file.')
    parser.add_argument('--launcher',
                        choices=['none', 'pytorch'],
                        default='none',
                        help='job launcher')
    parser.add_argument('--local_rank', type=int, default=0)
    args = parser.parse_args()
    opt = option.parse(args.opt, is_train=True)

    #### distributed training settings
    if args.launcher == 'none':  # disabled distributed training
        opt['dist'] = False
        rank = -1
        print('Disabled distributed training.')
    else:
        opt['dist'] = True
        init_dist()
        world_size = torch.distributed.get_world_size()
        rank = torch.distributed.get_rank()

    #### loading resume state if exists
    if opt['path'].get('resume_state', None):
        # distributed resuming: all load into default GPU
        device_id = torch.cuda.current_device()
        resume_state = torch.load(
            opt['path']['resume_state'],
            map_location=lambda storage, loc: storage.cuda(device_id))
        option.check_resume(opt, resume_state['iter'])  # check resume options
    else:
        resume_state = None

    #### mkdir and loggers
    if rank <= 0:  # normal training (rank -1) OR distributed training (rank 0)
        if resume_state is None:
            util.mkdir_and_rename(
                opt['path']
                ['experiments_root'])  # rename experiment folder if exists
            util.mkdirs(
                (path for key, path in opt['path'].items()
                 if not key == 'experiments_root'
                 and 'pretrain_model' not in key and 'resume' not in key))

        # config loggers. Before it, the log will not work
        util.setup_logger('base',
                          opt['path']['log'],
                          'train_' + opt['name'],
                          level=logging.INFO,
                          screen=True,
                          tofile=True)
        util.setup_logger('val',
                          opt['path']['log'],
                          'val_' + opt['name'],
                          level=logging.INFO,
                          screen=True,
                          tofile=True)
        logger = logging.getLogger('base')
        logger.info(option.dict2str(opt))
        # tensorboard logger
        if opt['use_tb_logger'] and 'debug' not in opt['name']:
            version = float(torch.__version__[0:3])
            if version >= 1.1:  # PyTorch 1.1
                from torch.utils.tensorboard import SummaryWriter
            else:
                logger.info(
                    'You are using PyTorch {}. Tensorboard will use [tensorboardX]'
                    .format(version))
                from tensorboardX import SummaryWriter
            tb_logger = SummaryWriter(log_dir='../tb_logger/' + opt['name'])
    else:
        util.setup_logger('base',
                          opt['path']['log'],
                          'train',
                          level=logging.INFO,
                          screen=True)
        logger = logging.getLogger('base')

    # convert to NoneDict, which returns None for missing keys
    opt = option.dict_to_nonedict(opt)

    #### random seed
    seed = opt['train']['manual_seed']
    if seed is None:
        seed = random.randint(1, 10000)
    if rank <= 0:
        logger.info('Random seed: {}'.format(seed))
    util.set_random_seed(seed)

    torch.backends.cudnn.benchmark = True
    # torch.backends.cudnn.deterministic = True

    #### create train and val dataloader
    dataset_ratio = 200  # enlarge the size of each epoch
    for phase, dataset_opt in opt['datasets'].items():
        if phase == 'train':
            train_set = create_dataset(dataset_opt)
            train_size = int(
                math.ceil(len(train_set) / dataset_opt['batch_size']))
            total_iters = int(opt['train']['niter'])
            total_epochs = int(math.ceil(total_iters / train_size))
            if opt['dist']:
                train_sampler = DistIterSampler(train_set, world_size, rank,
                                                dataset_ratio)
                total_epochs = int(
                    math.ceil(total_iters / (train_size * dataset_ratio)))
            else:
                train_sampler = None
            train_loader = create_dataloader(train_set, dataset_opt, opt,
                                             train_sampler)
            if rank <= 0:
                logger.info(
                    'Number of train images: {:,d}, iters: {:,d}'.format(
                        len(train_set), train_size))
                logger.info('Total epochs needed: {:d} for iters {:,d}'.format(
                    total_epochs, total_iters))
        elif phase == 'val':
            val_set = create_dataset(dataset_opt)
            val_loader = create_dataloader(val_set, dataset_opt, opt, None)
            if rank <= 0:
                logger.info('Number of val images in [{:s}]: {:d}'.format(
                    dataset_opt['name'], len(val_set)))
        else:
            raise NotImplementedError(
                'Phase [{:s}] is not recognized.'.format(phase))
    assert train_loader is not None

    #### create model
    model = create_model(opt)
    flops, params = get_model_complexity_info(model.netG, (3, 480, 480),
                                              as_strings=True,
                                              print_per_layer_stat=True,
                                              verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', flops))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))
Beispiel #23
0
def resnet152(num_classes, pretrained=False, phase='train', **kwargs):
    """Constructs a ResNet-152 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(num_classes, Bottleneck, [3, 8, 36, 3], phase, **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet152'],
                                                 model_dir='.'),
                              strict=False)
    return model


if __name__ == "__main__":
    model = resnet18(num_classes=2)

    x = torch.randn((10, 3, 300, 300))

    for name, module in model.named_children():
        x = module(x)

        print(name, x.shape)

    from ptflops import get_model_complexity_info

    img_dim = 300
    flops, params = get_model_complexity_info(model, (img_dim, img_dim),
                                              as_strings=True,
                                              print_per_layer_stat=True)
    print('Flops: ' + flops)
    print('Params: ' + params)
Beispiel #24
0
def test(args, io):
    test_loader = DataLoader(ModelNet40(partition='test',
                                        num_points=args.num_points),
                             batch_size=args.test_batch_size,
                             shuffle=True,
                             drop_last=False)

    device = torch.device("cuda" if args.cuda else "cpu")

    #Try to load models
    if args.model == 'pointnet':
        model = PointNet(args).to(device)
    elif args.model == 'dgcnn':
        model = DGCNN(args).to(device)
    elif args.model == 'ssg':
        model = PointNet2SSG(output_classes=40, dropout_prob=0)
        model.to(device)
    elif args.model == 'msg':
        model = PointNet2MSG(output_classes=40, dropout_prob=0)
        model.to(device)
    elif args.model == 'ognet':
        # [64,128,256,512]
        model = Model_dense(20,
                            args.feature_dims, [512],
                            output_classes=40,
                            init_points=768,
                            input_dims=3,
                            dropout_prob=args.dropout,
                            id_skip=args.id_skip,
                            drop_connect_rate=args.drop_connect_rate,
                            cluster='xyzrgb',
                            pre_act=args.pre_act,
                            norm=args.norm_layer)
        if args.efficient:
            model = ModelE_dense(20,
                                 args.feature_dims, [512],
                                 output_classes=40,
                                 init_points=768,
                                 input_dims=3,
                                 dropout_prob=args.dropout,
                                 id_skip=args.id_skip,
                                 drop_connect_rate=args.drop_connect_rate,
                                 cluster='xyzrgb',
                                 pre_act=args.pre_act,
                                 norm=args.norm_layer,
                                 gem=args.gem,
                                 ASPP=args.ASPP)
        model.to(device)
    elif args.model == 'ognet-small':
        # [48,96,192,384]
        model = Model_dense(20,
                            args.feature_dims, [512],
                            output_classes=40,
                            init_points=768,
                            input_dims=3,
                            dropout_prob=args.dropout,
                            id_skip=args.id_skip,
                            drop_connect_rate=args.drop_connect_rate,
                            cluster='xyzrgb',
                            pre_act=args.pre_act,
                            norm=args.norm_layer)
        model.to(device)
    else:
        raise Exception("Not implemented")

    try:
        model.load_state_dict(torch.load(args.model_path))
    except:
        model = nn.DataParallel(model)
        model.load_state_dict(torch.load(args.model_path))
    model = model.eval()
    model = model.module

    batch0, label0 = next(iter(test_loader))
    batch0 = batch0[0].unsqueeze(0)
    print(batch0.shape)
    print(model)

    macs, params = get_model_complexity_info(model,
                                             batch0, ((1024, 3)),
                                             as_strings=True,
                                             print_per_layer_stat=False,
                                             verbose=True)

    print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))

    test_acc = 0.0
    count = 0.0
    test_true = []
    test_pred = []
    for data, label in test_loader:

        data, label = data.to(device), label.to(device).squeeze()
        batch_size = data.size()[0]
        if args.model == 'ognet' or args.model == 'ognet-small' or args.model == 'ssg' or args.model == 'msg':
            logits = model(data, data)
            #logits = model(1.1*data, 1.1*data)
        else:
            data = data.permute(0, 2, 1)
            logits = model(data)
        preds = logits.max(dim=1)[1]
        test_true.append(label.cpu().numpy())
        test_pred.append(preds.detach().cpu().numpy())
    test_true = np.concatenate(test_true)
    test_pred = np.concatenate(test_pred)
    test_acc = metrics.accuracy_score(test_true, test_pred)
    avg_per_class_acc = metrics.balanced_accuracy_score(test_true, test_pred)
    outstr = 'Test :: test acc: %.6f, test avg acc: %.6f' % (test_acc,
                                                             avg_per_class_acc)
    io.cprint(outstr)
Beispiel #25
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.save):
        mkdir_p(args.save)

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
    elif args.dataset == 'cifar100':
        dataloader = datasets.CIFAR100
    else:
        raise ValueError(
            'Expect dataset to be either CIFAR-10 or CIFAR-100 but got {}'.
            format(args.dataset))

    trainset = dataloader(root='./data',
                          train=True,
                          download=True,
                          transform=transform_train)
    trainloader = data.DataLoader(trainset,
                                  batch_size=args.train_batch,
                                  shuffle=True,
                                  num_workers=args.workers)

    testset = dataloader(root='./data',
                         train=False,
                         download=False,
                         transform=transform_test)
    testloader = data.DataLoader(testset,
                                 batch_size=args.test_batch,
                                 shuffle=False,
                                 num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))
    model = arch_module.__dict__[args.arch](dataset=args.dataset)
    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                  milestones=args.schedule,
                                                  gamma=args.gamma)
    # Resume
    title = 'cifar-10-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.save = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_prec1']
        start_epoch = checkpoint['epoch']
        model = arch_module.__dict__[args.arch](dataset=args.dataset,
                                                cfg=checkpoint['cfg'])
        # load the state dict of saved checkpoint
        # turn the flag off to train from scratch
        if args.load_model:
            print('===> Resuming the state dict of saved model')
            model.load_state_dict(checkpoint['state_dict'])
        else:
            print('===> Skip loading state dict of saved model')
        # finetune a pruned network
        if args.load_optimizer and ('optimizer' in checkpoint.keys()):
            print('===> Resuming the state dict of saved checkpoint')
            optimizer.load_state_dict(checkpoint['optimizer'])
        else:
            print('===> Skip loading the state dict of saved optimizer')
        # if the log file is already exist then append the log to it
        if os.path.isfile('log.txt'):
            logger = Logger(os.path.join(args.save, 'log.txt'),
                            title=title,
                            resume=True)
        else:
            logger = Logger(os.path.join(args.save, 'log.txt'), title=title)
            logger.set_names([
                'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
                'Valid Acc.'
            ])
    else:
        # training from scratch
        logger = Logger(os.path.join(args.save, 'log.txt'), title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    if use_cuda:
        model = model.cuda()

    # evaluate the results on test set
    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch,
                                   use_cuda)
        inp = torch.rand(1, 3, 32, 32)
        if use_cuda:
            inp = inp.cuda()
        flops, params = get_model_complexity_info(model, (3, 32, 32),
                                                  as_strings=True,
                                                  print_per_layer_stat=True)
        print('{:<30}  {:<8}'.format('Computational complexity: ', flops))
        print('{:<30}  {:<8}'.format('Number of parameters: ', params))
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        current_lr = next(iter(optimizer.param_groups))['lr']
        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, current_lr))

        train_loss, train_acc = train(trainloader, model, criterion, optimizer,
                                      lr_scheduler, epoch, use_cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch,
                                   use_cuda)

        # append logger file
        logger.append([current_lr, train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': test_acc,
                'optimizer': optimizer.state_dict(),
                'cfg': model.cfg
            },
            is_best,
            checkpoint=args.save)

    logger.close()
    logger.plot()
    savefig(os.path.join(args.save, 'log.eps'))

    print('Best acc:')
    print(best_acc)
Beispiel #26
0
            if 'classifier' in i:
                continue
            self.state_dict()[i].copy_(param_dict[i])

def load_pretrained_model(model, weight_path):
    if weight_path != None:
        _ = model.load_state_dict(weight_path)
        return
    state_dict = model_zoo.load_url(PretrainedURL, map_location=torch.device("cpu"))
    _ = model.load_state_dict(state_dict)

def baseline(pretrained = False, weight_path = None):
    model = Baseline()
    weights_init_kaiming(model)
    if pretrained:
        load_pretrained_model(model, weight_path)
    return model

if __name__ == "__main__":
    model = baseline(pretrained=False)
    model.eval()
    img = torch.randn(1,3,384,128)
    start_time = time.time()
    out = model(img)
    end_time = time.time()
    duration = (end_time - start_time) * 1000
    print("duration is ", duration)
    macs, params = get_model_complexity_info(model, (3, 384, 128), print_per_layer_stat=False)
    print("another mac is ", macs)
    macs, params = profile(model, inputs=(img, ))
    print("the mac is ", macs)
Beispiel #27
0
# Author:Han
# @Time : 2019/5/20 17:23

import torch
from torchvision import models
from ptflops import get_model_complexity_info

device = torch.device("cpu")
net = models.AlexNet()
flops, params = get_model_complexity_info(net, (3, 224, 224), True, True)
print('Flops:' + flops)
print('Params:' + params)

model = str(net)  # 将模型强制转换成字符串以便写入文件
file = open("Alexnet.txt", 'w')
file.write(model)  #将参数写入文件
def main(args):
    global best_acc

    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    train_dataset = torchvision.datasets.CIFAR10(root='./data',
                                                 train=True,
                                                 download=True,
                                                 transform=transform_train)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=4)

    val_dataset = torchvision.datasets.CIFAR10(root='./data',
                                               train=False,
                                               download=True,
                                               transform=transform_test)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=4)

    print("number of batches are ", len(train_loader))

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    model = torchvision.models.resnet18(pretrained=True)
    model.fc = nn.Linear(in_features=512, out_features=10, bias=True)

    flops, params = get_model_complexity_info(model, (3, 32, 32),
                                              as_strings=True,
                                              print_per_layer_stat=False)
    print("FLOPs in original resnet18 model are ", flops)
    print("Number of Params in original resnet18 model are", params)

    if args.enable_octave:
        make_octconv_net(model)
        flops, params = get_model_complexity_info(model, (3, 32, 32),
                                                  as_strings=True,
                                                  print_per_layer_stat=False)
        print("FLOPs in OctConv resnet18 model are ", flops)
        print("Number of Params in OctConv resnet18 model are", params)
        # print(model)

    # model = model.cuda()
    model = torch.nn.DataParallel(model).cuda()
    # checkpoint = torch.load("checkpoint/model_best.pth.tar")
    # model.load_state_dict(checkpoint['state_dict'])
    # summary(model, (3,32,32))

    # criterion = FocalLoss()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=0.9,
                          weight_decay=5e-4)
    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='max',
                                  factor=0.1,
                                  patience=10,
                                  verbose=True)

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc = checkpoint['best_acc']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            args.lr = checkpoint['optimizer']['param_groups'][0]['lr']
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            # logger = Logger(join(args.checkpoint, 'log.txt'), title=title, resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # validation
    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    gamma = args.gamma
    lr = args.lr
    schedule = args.schedule

    for epoch in range(args.start_epoch, args.epochs):

        lr = optimizer.state_dict()['param_groups'][0]['lr']
        print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr))

        train_loss, train_acc = train(train_loader, model, optimizer,
                                      criterion)
        valid_loss, valid_acc = validate(val_loader, model, criterion)

        print(" val loss     ", valid_loss)
        print(" val Accuracy ", valid_acc)

        is_best = valid_acc > best_acc
        best_acc = max(valid_acc, best_acc)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            checkpoint=args.checkpoint)

        if (args.enable_octave):
            f = open("log_cifar10_resnet_octave_conv_.txt", "a")
        else:
            f = open("log_cifar10_resnet_vanilla_conv_.txt", "a")
        f.write('Train FP epoch: [{0}]\t'
                'Train loss {train_loss:.3f} \t'
                'Train Accuracy {train_acc:.3f} \t'
                'Val loss {valid_loss:.3f} \t'
                'Val Accuracy {valid_acc:.3f} \t'
                'LR {lr} \n'.format(epoch,
                                    train_loss=train_loss,
                                    train_acc=train_acc,
                                    valid_loss=valid_loss,
                                    valid_acc=valid_acc,
                                    lr=lr))
        f.close()

        scheduler.step(valid_acc)
Beispiel #29
0
                                      kernel_size=1,
                                      stride=1,
                                      padding=0)
        self.scale = scale

    def forward(self, x):
        fm = self.conv_3x3(x)
        # fm = self.dropout(fm)
        output = self.conv_1x1(fm)
        if self.scale > 1:
            output = F.interpolate(output,
                                   scale_factor=self.scale,
                                   mode='bilinear',
                                   align_corners=True)

        return output


if __name__ == "__main__":
    model = BiSeNet(19, is_training=False, criterion=None, ohem_criterion=None)
    print(model)
    from ptflops import get_model_complexity_info

    with torch.cuda.device(0):
        flops, params = get_model_complexity_info(model,
                                                  input_res=(3, 1024, 2048),
                                                  as_strings=True,
                                                  print_per_layer_stat=True)
        print('Flops:  ' + flops)
        print('Params: ' + params)
Beispiel #30
0
    model = torch.nn.DataParallel(model, device_ids=opt.gpu_ids).cuda()
    model.load_state_dict(torch.load(model_path, map_location=dev))
    model.module.proj_output = nn.Sequential()
    model.module.classifier = nn.Sequential()
    if opt.npart > 1:
        for i in range(opt.npart):
            model.module.proj_outputs[i] = nn.Sequential()

print(model_path)

batch0, label0 = next(iter(query_loader))
batch0 = batch0[0].unsqueeze(0)
print(batch0.shape)
macs, params = get_model_complexity_info(model,
                                         batch0, ((round(6890 * opt.slim), 3)),
                                         as_strings=True,
                                         print_per_layer_stat=False,
                                         verbose=True)
#print(macs)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))
#model_parameters = filter(lambda p: p.requires_grad, model.parameters())
#params = sum([np.prod(p.size()) for p in model_parameters])
#print('Number of parameters: %.2f M'% (params/1e6) )

if not os.path.exists('./snapshot/'):
    os.mkdir('./snapshot/')
save_model_path = './snapshot/' + opt.name
if not os.path.exists(save_model_path):
    os.mkdir(save_model_path)