Ejemplo n.º 1
0
# Data
print('==> Preparing data..')
data_tmp = imagenet.Data(args)
train_loader = data_tmp.trainLoader
val_loader = data_tmp.testLoader

# Architecture
if args.arch == 'resnet':
    origin_model = import_module(f'model.{args.arch}').resnet(args.cfg).to(device)
else:
    raise('arch not exist!')

# Calculate FLOPs of origin model
input = torch.randn(1, 3, 224, 224).to(device)
oriflops, oriparams = profile(origin_model, inputs=(input, ))

# Based on the trained class-wise mask, perform global voting to obtain pruned model
def build_resnet_pruned_model(origin_model):

    pruning_rate_now = 0
    channel_prune_rate = 0.9
    num_mask_cfg = {'resnet50' : 48}

    while pruning_rate_now < args.pruning_rate:

        score = []
        index_cfg = []
        block_index_cfg = []
        layer_cfg = []
        block_cfg = []
        # stage1
        [[3, 16, 16, 0, 1]],
        # stage2
        [[3, 48, 24, 0, 2]],
        [[3, 72, 24, 0, 1]],
        # stage3
        [[5, 72, 40, 0.25, 2]],
        [[5, 120, 40, 0.25, 1]],
        # stage4
        [[3, 240, 80, 0, 2]],
        [[3, 200, 80, 0, 1], [3, 184, 80, 0, 1], [3, 184, 80, 0, 1],
         [3, 480, 112, 0.25, 1], [3, 672, 112, 0.25, 1]],
        # stage5
        [[5, 672, 160, 0.25, 2]],
        [[5, 960, 160, 0, 1], [5, 960, 160, 0.25, 1], [5, 960, 160, 0, 1],
         [5, 960, 160, 0.25, 1]]
    ]
    return GhostNet(cfgs, **kwargs)


if __name__ == '__main__':
    model = ghostnet()
    model.eval()
    print('[1] ', model)

    # input = torch.randn(1, 3, 320, 256)
    input = torch.randn(1, 3, 224, 224)
    y = model(input)

    macs, params = profile(model, inputs=(input, ))
    print('[3] flops=', macs, ', param=', params)
Ejemplo n.º 3
0
        x = self.dilated2_6(x)
        x = self.asymmetric2_7(x)
        x = self.dilated2_8(x)
        x_dsn = self.dsn(x)

        # Stage 4 - Decoder
        x = self.head(x, self.recurrence)
        x = F.interpolate(input=x, size=(h, w), mode='bilinear', align_corners=True)
        x_dsn = F.interpolate(input=x_dsn, size=(h, w), mode='bilinear', align_corners=True)

        return [x, x_dsn]


def get_eccnet(gpu_ids=1, ema=False, num_classes=1):
    net = ECCNet(num_classes=num_classes, recurrence=1)
    if ema:
        for param in net.parameters():
            param.detach_()
    return init_network(net, gpu_ids)


if __name__ == '__main__':
    model = ECCNet(num_classes=1)
    print(model)
    from thop import profile, clever_format

    input = torch.randn(1, 3, 1024, 1024)
    flops, params = profile(model, inputs=(input,), verbose=False)
    flops, params = clever_format([flops, params], "%.3f")
    print('flops: ', flops, "params: ", params)
Ejemplo n.º 4
0
                    layers.append(block(self.cur_channel, c, s, t))
                else:
                    layers.append(block(self.cur_channel, c, 1, t))
                self.cur_channel = c

        return nn.Sequential(*layers)

    def forward(self, x):

        x = self.conv1(x)
        x = self.dwconv1(x)
        x = self.layers(x)
        x = self.conv2(x)
        # x = self.output_layer(x)
        x = self.linear7(x)
        sig_x = x
        x = self.linear1(x)
        x = x.view(x.size(0), -1)
        return x, sig_x


if __name__ == "__main__":
    input = torch.Tensor(1, 3, 112, 112)
    model = MobileFace(use_cbam=True)
    flops, params = thop.profile(model, inputs=(input, ))
    flops, params = thop.clever_format([flops, params], "%.3f")
    print(flops, params)
    # model = model.eval()
#     out = model(input)
#     print(out.shape)
Ejemplo n.º 5
0
def main(logger, args):
    if not torch.cuda.is_available():
        raise Exception("need gpu to train network!")

    if args.seed is not None:
        random.seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        cudnn.deterministic = True

    gpus = torch.cuda.device_count()
    logger.info(f'use {gpus} gpus')
    logger.info(f"args: {args}")

    cudnn.benchmark = True
    cudnn.enabled = True
    start_time = time.time()

    # dataset and dataloader
    logger.info('start loading data')
    train_loader = DataLoader(Config.train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    val_loader = DataLoader(Config.val_dataset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=args.num_workers)
    logger.info('finish loading data')

    logger.info(f"creating model '{args.network}'")
    model = resnet50(**{
        "pretrained": args.pretrained,
        "num_classes": args.num_classes,
    })

    flops_input = torch.randn(1, 3, args.input_image_size,
                              args.input_image_size)
    flops, params = profile(model, inputs=(flops_input, ))
    flops, params = clever_format([flops, params], "%.3f")
    logger.info(f"model: '{args.network}', flops: {flops}, params: {params}")

    for name, param in model.named_parameters():
        logger.info(f"{name},{param.requires_grad}")

    model = model.cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=args.milestones, gamma=0.1)

    if args.apex:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    model = nn.DataParallel(model)

    if args.evaluate:
        if not os.path.isfile(args.evaluate):
            raise Exception(
                f"{args.resume} is not a file, please check it again")
        logger.info('start only evaluating')
        logger.info(f"start resuming model from {args.evaluate}")
        checkpoint = torch.load(args.evaluate,
                                map_location=torch.device('cpu'))
        model.load_state_dict(checkpoint['model_state_dict'])
        acc1, acc5, throughput = validate(val_loader, model, args)
        logger.info(
            f"epoch {checkpoint['epoch']:0>3d}, top1 acc: {acc1:.2f}%, top5 acc: {acc5:.2f}%, throughput: {throughput:.2f}sample/s"
        )

        return

    start_epoch = 1
    # resume training
    if os.path.exists(args.resume):
        logger.info(f"start resuming model from {args.resume}")
        checkpoint = torch.load(args.resume, map_location=torch.device('cpu'))
        start_epoch += checkpoint['epoch']
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        logger.info(
            f"finish resuming model from {args.resume}, epoch {checkpoint['epoch']}, "
            f"loss: {checkpoint['loss']:3f}, lr: {checkpoint['lr']:.6f}, "
            f"top1_acc: {checkpoint['acc1']}%")

    if not os.path.exists(args.checkpoints):
        os.makedirs(args.checkpoints)

    logger.info('start training')
    for epoch in range(start_epoch, args.epochs + 1):
        acc1, acc5, losses = train(train_loader, model, criterion, optimizer,
                                   scheduler, epoch, logger, args)
        logger.info(
            f"train: epoch {epoch:0>3d}, top1 acc: {acc1:.2f}%, top5 acc: {acc5:.2f}%, losses: {losses:.2f}"
        )

        acc1, acc5, throughput = validate(val_loader, model, args)
        logger.info(
            f"val: epoch {epoch:0>3d}, top1 acc: {acc1:.2f}%, top5 acc: {acc5:.2f}%, throughput: {throughput:.2f}sample/s"
        )

        # remember best prec@1 and save checkpoint
        torch.save(
            {
                'epoch': epoch,
                'acc1': acc1,
                'loss': losses,
                'lr': scheduler.get_lr()[0],
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
            }, os.path.join(args.checkpoints, 'latest.pth'))
        if epoch == args.epochs:
            torch.save(
                model.module.state_dict(),
                os.path.join(
                    args.checkpoints,
                    "{}-epoch{}-acc{}.pth".format(args.network, epoch, acc1)))

    training_time = (time.time() - start_time) / 3600
    logger.info(
        f"finish training, total training time: {training_time:.2f} hours")
Ejemplo n.º 6
0
            test_loss = test_model(model=model, test_loader=test_loader)
            missing_entity_test_loss = test_model(
                model=model, test_loader=missing_entity_test_loader)
            print('epoch:', epoch, 'test_loss:', test_loss)
            print('epoch:', epoch, 'missing_entity_test_loss:',
                  missing_entity_test_loss)
            logger.log_scalar('test_loss', test_loss, step=train_step)
            logger.log_scalar('missing_entity_test_loss',
                              missing_entity_test_loss,
                              step=train_step)
            mylogger.log(seed=seed, tag='test_loss', value=test_loss)
            mylogger.log(seed=seed,
                         tag='missing_entity_test_loss',
                         value=missing_entity_test_loss)
        if train_step == 0:
            flops, params = profile(model=model, inputs=(data, ))
            print(flops)
            print(params)
        train_step += 1

model.eval()
for data in missing_entity_test_loader1:
    with torch.no_grad():
        out = model(data)
        print(out)
        print(data.x)
        print(data.y[:, :2])
        loss_vec = F.mse_loss(out * data.y[:, 2:],
                              data.y[:, :2] * data.y[:, 2:],
                              reduce=False)
        loss = torch.sum(torch.sum(loss_vec, dim=1)) / test_size
Ejemplo n.º 7
0
def get_profile(model, dataset, output):
    """ Params, """
    data = dataset.X[0].unsqueeze(0)
    macs, params = profile(model, (data, ), verbose=False)
    Print("Params(M): %.3f" % (params / 10**6), output)
Ejemplo n.º 8
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    data_transform = {
        "train":
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        "val":
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }

    data_root = os.path.abspath(os.path.join(os.getcwd(),
                                             "../.."))  # get data root path
    image_path = os.path.join("D:", "imagenet")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(
        image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(
        image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
    )

    validate_dataset = datasets.ImageFolder(root=os.path.join(
        image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(
        validate_dataset,
        batch_size=batch_size,
        shuffle=False,
    )

    print("using {} images for training, {} images for validation.".format(
        train_num, val_num))

    net = resnest50(num_classes=10)
    net_input = torch.randn(batch_size, 3, 224, 224)
    flops, params = profile(net, inputs=(net_input, ))
    print("网络计算量GFlops:{},网络参数量#P:{}".format(flops, params))

    # load pretrain weights
    # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth

    #assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)

    #####读取训练好的参数###############
    #model_weight_path = "./resNest50.pth"
    #missing_keys, unexpected_keys = net.load_state_dict(torch.load(model_weight_path), strict=False)
    #for param in net.parameters():
    #    param.requires_grad = False
    #####读取训练好的参数###############
    # change fc layer structure
    in_channel = net.fc.in_features
    net.fc = nn.Linear(in_channel, 10)
    net.to(device)

    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=lr)

    best_acc = 0.0
    save_path = './resNest50.pth'
    list_loss = list()
    list_acc = list()
    for epoch in range(epoch_all):
        # train
        net.train()
        running_loss = 0.0
        for step, data in enumerate(train_loader, start=0):
            images, labels = data
            optimizer.zero_grad()
            logits = net(images.to(device))
            loss = loss_function(logits, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            # print train process
            rate = (step + 1) / len(train_loader)
            a = "*" * int(rate * 50)
            b = "." * int((1 - rate) * 50)
            print("\rtrain loss: {:^3.0f}%[{}->{}]{:.4f}".format(
                int(rate * 100), a, b, loss),
                  end="")
        print()

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            for val_data in validate_loader:
                val_images, val_labels = val_data
                outputs = net(val_images.to(
                    device))  # eval model only have last output layer
                # loss = loss_function(outputs, test_labels)
                predict_y = torch.max(outputs, dim=1)[1]
                acc += (predict_y == val_labels.to(device)).sum().item()
            val_accurate = acc / val_num
            if val_accurate > best_acc:
                best_acc = val_accurate
                torch.save(net.state_dict(), save_path)
            print('[epoch %d] train_loss: %.3f  test_accuracy: %.3f' %
                  (epoch + 1, running_loss / step, val_accurate))
            list_loss.append(running_loss / step)
            list_acc.append(val_accurate)
    #保存loss and acc
    file_loss_path = "./ResNest_loss.txt"
    with open(file_loss_path, "w") as file_object:
        json.dump(list_loss, file_object)

    file_acc_path = "./ResNest_acc.txt"
    with open(file_acc_path, "w") as file_object:
        json.dump(list_acc, file_object)
    print('Finished Training')
Ejemplo n.º 9
0
            ResDepSepBlock(channel*4,channel*8,kernel_size=3,stride=2),
            ResDepSepBlock(channel*8,channel*8,kernel_size=3,stride=1),
            ResDepSepBlock(channel*8,channel*8,kernel_size=3,stride=1),

            ResDepSepBlock(channel*8,channel*8,kernel_size=3,stride=2),
            ResDepSepBlock(channel*8,channel*8,kernel_size=3,stride=1),
            ResDepSepBlock(channel*8,channel*8,kernel_size=3,stride=1),
            
            nn.AdaptiveAvgPool1d(1),
        )

        self.out = nn.Sequential(
            nn.Linear(channel*8,channel*2),
            nn.ReLU6(inplace=False),
            nn.Linear(channel*2,2),
            nn.Softmax(dim=-1)
        )
    def forward(self,x):
        x = self.net(x)
        x=torch.squeeze(x)
        return self.out(x),x

if __name__=='__main__':
    from thop import profile
    model = OneD_CNN(600,30)
    x1 = torch.randn(1, 1,30, 600)
    x2 = torch.randn(2, 36, 30)
    x3 = torch.randn(2, 36, 30)
    flops, params = profile(model, inputs=(x1))
    print(flops,params)
Ejemplo n.º 10
0
def test_resnet50_thop():
    model = AlexNet()
    input = torch.randn(1,3,224,224)
    flops, params = profile(model, inputs=(input,))
    flops, params = clever_format([flops, params], "%.3f")
    print("flops: ", flops, "params: ", params)
Ejemplo n.º 11
0
def constraint_cal(net, img_size=128):
    input = torch.randn(1, 3, img_size, img_size)
    macs, params = profile(net, inputs=(input,))
    return (macs, params)
Ejemplo n.º 12
0
        x = self.classifier(x)
        x = x.view(x.size()[0], -1)

        return x


if __name__ == '__main__':
    net = DSACNet()

    # stat(net, (3, 128, 128))
    # net = net.cuda()
    # summary(net, (3, 128, 128))
    input = torch.randn(1, 3, 128, 128)
    # output = net(input)
    # print("The net out: ", output)
    flops, params = profile(net, inputs=(input, ))
    print(flops, params)

    # #计算方式1
    # flops, params = profile(net, inputs=(input, ))
    # print(flops, params)

    # #计算方式2
    # flops, params = get_model_complexity_info(net, (3, 128, 128), as_strings=True, print_per_layer_stat=True)
    # print("|flops: %s |params: %s" % (flops, params))

    # #计算方式3
    # stat(net, (3, 128, 128))

    # #计算方式4
    # net = net.cuda()
Ejemplo n.º 13
0
# @File   : test_net.py

import torch
from torch import nn
from torchkeras import summary
from thop import profile


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        y = self.relu(x)
        return y


if __name__ == '__main__':
    model = Net()
    print(model)
    print(summary(model, input_shape=(3, 20, 20)))
    print('number of params:', sum(param.numel() for param in model.parameters()))
    inputs = torch.randn(8, 3, 20, 20)
    flops, params = profile(model, (inputs,))
    print('flops:', flops, 'params:', params)
Ejemplo n.º 14
0
def build_resnet_pruned_model(origin_model):

    pruning_rate_now = 0
    channel_prune_rate = 0.9
    num_mask_cfg = {'resnet50' : 48}

    while pruning_rate_now < args.pruning_rate:

        score = []
        index_cfg = []
        block_index_cfg = []
        layer_cfg = []
        block_cfg = []
        final_mask = []
        pruned_state_dict = {}

        for i in range(num_mask_cfg[args.cfg]):
            mask = origin_model.state_dict()['mask.'+str(i)]
            score.append(torch.abs(torch.sum(mask, 0)))
            final_mask.append(torch.div(torch.sum(mask, 0), 2))

        all_score = torch.cat(score,0)

        preserve_num = int(all_score.size(0) * channel_prune_rate)
        preserve_channel, _ = torch.topk(all_score, preserve_num)

        threshold = preserve_channel[preserve_num-1]
        
        block_score = []

        # Based on the pruning threshold, the prune cfg of each layer is obtained
        for i, mini_score in enumerate(score):
            mask = torch.ge(mini_score, threshold)
            index = []
            for j, m in enumerate(mask):
                if m == True:
                    index.append(j)
            if len(index) < mask.size(0) * args.min_preserve:
                _, index = torch.topk(mini_score, int(mask.size(0) * args.min_preserve))
                index = index.cpu().numpy().tolist()
            if (i + 1) % 3 != 0: #in block
                index_cfg.append(index)
                layer_cfg.append(len(index))
            else: #out block
                block_score.append(mini_score)

        num_blocks = [3,4,6,3]
        begin = 0
        for i in range(len(num_blocks)):
            block_cfg.append(int(block_score[begin].size(0)/4))
            for j in range(begin, begin + num_blocks[i]):
                block_index_cfg.append(torch.arange(block_score[begin].size(0)))
            begin = begin + num_blocks[i]
        
        model = import_module(f'model.{args.arch}').resnet(args.cfg, block_cfg, layer_cfg).to(device)

        flops, params = profile(model, inputs=(input, ))

        pruning_rate_now = (oriflops - flops) / oriflops

        channel_prune_rate = channel_prune_rate - 0.01

    model_state_dict = origin_model.state_dict()

    current_block = 0

    block_index = torch.arange(64)

    model = import_module(f'model.{args.arch}').resnet(args.cfg, block_cfg, layer_cfg).to(device)
    
    pruned_state_dict = model.state_dict()
    
    for name, module in origin_model.named_modules():

        if isinstance(module, Bottleneck_class):
        
            # conv1 & bn1
            index_1 = torch.LongTensor(index_cfg[current_block * 2]).to(device)
            
            pruned_weight = torch.index_select(model_state_dict[name + '.conv1.weight'], 0, index_1).cpu()
            pruned_weight = direct_project(pruned_weight, block_index)

            pruned_state_dict[name + '.conv1.weight'] = pruned_weight

            mask = final_mask[current_block * 3][index_cfg[current_block * 2]]
            pruned_state_dict[name + '.bn1.weight'] = torch.mul(mask,model_state_dict[name + '.bn1.weight'][index_1]).cpu()
            pruned_state_dict[name + '.bn1.bias'] = torch.mul(mask,model_state_dict[name + '.bn1.bias'][index_1]).cpu()
            pruned_state_dict[name + '.bn1.running_var'] = model_state_dict[name + '.bn1.running_var'][index_1].cpu()
            pruned_state_dict[name + '.bn1.running_mean'] = model_state_dict[name + '.bn1.running_mean'][index_1].cpu()

            # conv2 & bn2
            index_2 = torch.LongTensor(index_cfg[current_block * 2 + 1]).to(device)
            
            pruned_weight = torch.index_select(model_state_dict[name + '.conv2.weight'], 0, index_2).cpu()
            pruned_weight = direct_project(pruned_weight, index_1)

            pruned_state_dict[name + '.conv2.weight'] = pruned_weight

            mask = final_mask[current_block * 3 + 1][index_cfg[current_block * 2 + 1]]
            pruned_state_dict[name + '.bn2.weight'] = torch.mul(mask,model_state_dict[name + '.bn2.weight'][index_2]).cpu()
            pruned_state_dict[name + '.bn2.bias'] = torch.mul(mask,model_state_dict[name + '.bn2.bias'][index_2]).cpu()
            pruned_state_dict[name + '.bn2.running_var'] = model_state_dict[name + '.bn2.running_var'][index_2].cpu()
            pruned_state_dict[name + '.bn2.running_mean'] = model_state_dict[name + '.bn2.running_mean'][index_2].cpu()

            block_index = torch.LongTensor(block_index_cfg[current_block]).to(device)
            mask = final_mask[current_block * 3 + 2][block_index_cfg[current_block]]

            # conv3 & bn3 & shortcut
            
            pruned_state_dict[name + '.conv3.weight'] = torch.index_select(model_state_dict[name + '.conv3.weight'], 0, block_index).cpu()
            pruned_state_dict[name + '.conv3.weight'] = direct_project(pruned_state_dict[name + '.conv3.weight'], index_2)
            pruned_state_dict[name + '.bn3.weight'] = model_state_dict[name + '.bn3.weight'].cpu()
            pruned_state_dict[name + '.bn3.bias'] = model_state_dict[name + '.bn3.bias'].cpu()
            pruned_state_dict[name + '.bn3.running_var'] = model_state_dict[name + '.bn3.running_var'][block_index].cpu()
            pruned_state_dict[name + '.bn3.running_mean'] = model_state_dict[name + '.bn3.running_mean'][block_index].cpu()

            current_block += 1
    
    pruned_state_dict['fc.weight'] = model_state_dict['fc.weight'].cpu()
    pruned_state_dict['fc.bias'] = model_state_dict['fc.bias'].cpu()

    pruned_state_dict['conv1.weight'] = model_state_dict['conv1.weight'].cpu()
    pruned_state_dict['bn1.weight'] = model_state_dict['bn1.weight'].cpu()
    pruned_state_dict['bn1.bias'] = model_state_dict['bn1.bias'].cpu()
    pruned_state_dict['bn1.running_var'] = model_state_dict['bn1.running_var'].cpu()
    pruned_state_dict['bn1.running_mean'] = model_state_dict['bn1.running_mean'].cpu()

    #load weight
    model = import_module(f'model.{args.arch}').resnet(args.cfg, block_cfg = block_cfg, layer_cfg=layer_cfg).to(device)
    model.load_state_dict(pruned_state_dict)
    return model, [layer_cfg, block_cfg], flops, params
Ejemplo n.º 15
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    random.seed(args.seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = True

    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    model = get_model(args.model_name)
    model.drop_path_prob = 0.
    macs, params = profile(model, inputs=(torch.randn(1, 3, 32, 32), ))
    macs, params = macs / 1000. / 1000., params / 1000. / 1000.
    logging.info("The parameter size is: {0}".format((params)))
    logging.info("The FLOPS is: {0}".format(macs))
    model = torch.nn.DataParallel(model)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)
    valid_data = dset.CIFAR10(root=args.data,
                              train=False,
                              download=True,
                              transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    best_acc = 0.

    for epoch in range(args.epochs):

        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)
        scheduler.step()

        if best_acc < valid_acc:
            best_acc = valid_acc
            logging.info("Current best Prec@1 = %f", best_acc)
            utils.save(model, os.path.join(args.save, 'best.pt'))

        utils.save(model, os.path.join(args.save, 'weights.pt'))
Ejemplo n.º 16
0
        self.inplanes = planes
        for _ in range(1, blocks):
            layers.append(AsterBlock(self.inplanes, planes))
        return nn.Sequential(*layers)

    def forward(self, x):
        x0 = self.layer0(x)
        x1 = self.layer1(x0)
        x2 = self.layer2(x1)
        x3 = self.layer3(x2)
        x4 = self.layer4(x3)
        x5 = self.layer5(x4)

        cnn_feat = x5.squeeze(2)  # [N, c, w]
        cnn_feat = cnn_feat.transpose(2, 1)
        if self.with_lstm:
            rnn_feat, _ = self.rnn(cnn_feat)
            return rnn_feat
        else:
            return cnn_feat


if __name__ == "__main__":
    net = ResNet_ASTER()
    net2 = Tiny_ResNet_ASTER()
    x = torch.randn(1, 3, 32, 100)
    from thop import profile
    flops, params = profile(net, inputs=(x, ))
    flops2, params2 = profile(net2, inputs=(x, ))
    print('Flops: %.2f G, params: %.2f M' % (flops / 1e9, params / 1e6))
    print('Flops: %.2f G, params: %.2f M' % (flops2 / 1e9, params2 / 1e6))
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)

    num_gpus = torch.cuda.device_count()
    args.gpu = args.local_rank % num_gpus
    torch.cuda.set_device(args.gpu)

    np.random.seed(args.seed)
    cudnn.benchmark = True
    cudnn.deterministic = True

    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)
    logging.info("unparsed_args = %s", unparsed)

    torch.distributed.init_process_group(backend='nccl', init_method='env://')
    args.world_size = torch.distributed.get_world_size()
    args.batch_size = args.batch_size // args.world_size

    genotype = eval("genotypes.%s" % args.arch)
    logging.info('---------Genotype---------')
    logging.info(genotype)
    logging.info('--------------------------')
    model = Network(args.init_channels, CLASSES, args.layers, args.auxiliary,
                    genotype)
    model = model.cuda(args.gpu)
    model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True)

    model_profile = Network(args.init_channels, CLASSES, args.layers,
                            args.auxiliary, genotype)
    model_profile = model_profile.cuda(args.gpu)
    model_input_size_imagenet = (1, 3, 224, 224)
    model_profile.drop_path_prob = 0
    flops, _ = profile(model_profile, model_input_size_imagenet)
    logging.info("flops = %fMB, param size = %fMB", flops,
                 count_parameters_in_MB(model))

    criterion_smooth = CrossEntropyLabelSmooth(CLASSES, args.label_smooth)
    criterion_smooth = criterion_smooth.cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # Prepare data
    total_iters = per_epoch_iters * args.epochs
    train_loader = get_train_dataloader(args.train_dir, args.batch_size,
                                        args.local_rank, total_iters)
    train_dataprovider = DataIterator(train_loader)
    val_loader = get_val_dataloader(args.test_dir)
    val_dataprovider = DataIterator(val_loader)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    start_epoch = 0
    best_acc_top1 = 0
    best_acc_top5 = 0
    checkpoint_tar = os.path.join(args.save, 'checkpoint.pth.tar')
    if os.path.exists(checkpoint_tar):
        logging.info('loading checkpoint {} ..........'.format(checkpoint_tar))
        checkpoint = torch.load(
            checkpoint_tar,
            map_location={'cuda:0': 'cuda:{}'.format(args.local_rank)})
        start_epoch = checkpoint['epoch'] + 1
        model.load_state_dict(checkpoint['state_dict'])
        logging.info("loaded checkpoint {} epoch = {}".format(
            checkpoint_tar, checkpoint['epoch']))

    # evaluation mode
    if args.eval:
        if args.eval_resume is not None:
            checkpoint = torch.load(args.eval_resume)
            model.module.drop_path_prob = 0
            model.load_state_dict(checkpoint['state_dict'])
            valid_acc_top1, valid_acc_top5 = infer(val_dataprovider,
                                                   model.module, val_iters)
            print('valid_acc_top1: {}'.format(valid_acc_top1))
        exit(0)

    for epoch in range(start_epoch, args.epochs):
        if args.lr_scheduler == 'cosine':
            scheduler.step()
            current_lr = scheduler.get_lr()[0]
        elif args.lr_scheduler == 'linear':
            current_lr = adjust_lr(optimizer, epoch)
        else:
            logging.info('Wrong lr type, exit')
            sys.exit(1)

        logging.info('Epoch: %d lr %e', epoch, current_lr)
        if epoch < 5 and args.batch_size > 256:
            for param_group in optimizer.param_groups:
                param_group['lr'] = current_lr * (epoch + 1) / 5.0
            logging.info('Warming-up Epoch: %d, LR: %e', epoch,
                         current_lr * (epoch + 1) / 5.0)
        model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        epoch_start = time.time()
        train_acc, train_obj = train(train_dataprovider, model,
                                     criterion_smooth, optimizer,
                                     per_epoch_iters)

        writer.add_scalar('Train/Loss', train_obj, epoch)
        writer.add_scalar('Train/LR', current_lr, epoch)

        if args.local_rank == 0 and (epoch % 5 == 0
                                     or args.epochs - epoch < 10):
            valid_acc_top1, valid_acc_top5 = infer(val_dataprovider,
                                                   model.module, val_iters)
            is_best = False
            if valid_acc_top5 > best_acc_top5:
                best_acc_top5 = valid_acc_top5
            if valid_acc_top1 > best_acc_top1:
                best_acc_top1 = valid_acc_top1
                is_best = True

            logging.info('Valid_acc_top1: %f', valid_acc_top1)
            logging.info('Valid_acc_top5: %f', valid_acc_top5)
            logging.info('best_acc_top1: %f', best_acc_top1)
            epoch_duration = time.time() - epoch_start
            logging.info('Epoch time: %ds.', epoch_duration)

            save_checkpoint_(
                {
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'best_acc_top1': best_acc_top1,
                    'optimizer': optimizer.state_dict(),
                }, args.save)
def test_model(args):
    print(args)
    if args.use_gpu:
        # use one Graphics card to test
        os.environ["CUDA_VISIBLE_DEVICES"] = "0"
        if not torch.cuda.is_available():
            raise Exception("need gpu to test network!")
        torch.cuda.empty_cache()

    if args.seed is not None:
        random.seed(args.seed)
        if args.use_gpu:
            torch.cuda.manual_seed_all(args.seed)
            cudnn.deterministic = True

    if args.use_gpu:
        cudnn.benchmark = True
        cudnn.enabled = True

    scale = 256 / 224
    val_dataset = datasets.ImageFolder(
        os.path.join(ILSVRC2012_path, 'val'),
        transforms.Compose([
            transforms.Resize(int(args.input_image_size * scale)),
            transforms.CenterCrop(args.input_image_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
        ]))
    val_loader = DataLoader(val_dataset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=args.num_workers)

    if args.classifier == "darknet":
        model = _darknet(args.backbone, args.use_pretrained_model,
                         args.pretrained_model_path, args.num_classes)
    elif args.classifier == "efficientnet":
        model = _efficientnet(args.backbone, args.use_pretrained_model,
                              args.pretrained_model_path, args.num_classes)
    elif args.classifier == "regnet":
        model = _regnet(args.backbone, args.use_pretrained_model,
                        args.pretrained_model_path, args.num_classes)
    elif args.classifier == "resnet":
        model = _resnet(args.backbone, args.use_pretrained_model,
                        args.pretrained_model_path, args.num_classes)
    elif args.classifier == "vovnet":
        model = _vovnet(args.backbone, args.use_pretrained_model,
                        args.pretrained_model_path, args.num_classes)
    else:
        print("unsupport classification model!")
        return

    flops_input = torch.randn(1, 3, args.input_image_size,
                              args.input_image_size)
    flops, params = profile(model, inputs=(flops_input, ))
    flops, params = clever_format([flops, params], "%.3f")
    print(
        f"backbone:{args.backbone},classifier: '{args.classifier}', flops: {flops}, params: {params}"
    )

    if args.use_gpu:
        model = model.cuda()
        model = nn.DataParallel(model)

    print(f"start eval.")
    acc1, acc5, throughput = validate(val_loader, model, args)
    print(
        f"top1 acc: {acc1:.2f}%, top5 acc: {acc5:.2f}%, throughput: {throughput:.2f}sample/s"
    )
    print(f"eval done.")

    return
def main():
    args = parse_args()
    global local_rank
    local_rank = args.local_rank
    if local_rank == 0:
        global logger
        logger = get_logger(__name__, args.log)

    torch.cuda.empty_cache()

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        cudnn.deterministic = True

    torch.cuda.set_device(local_rank)
    dist.init_process_group(backend='nccl', init_method='env://')
    global gpus_num
    gpus_num = torch.cuda.device_count()
    if local_rank == 0:
        logger.info(f'use {gpus_num} gpus')
        logger.info(f"args: {args}")

    cudnn.benchmark = True
    cudnn.enabled = True
    start_time = time.time()

    # dataset and dataloader
    if local_rank == 0:
        logger.info('start loading data')
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        Config.train_dataset, shuffle=True)
    train_loader = DataLoader(Config.train_dataset,
                              batch_size=args.per_node_batch_size,
                              shuffle=False,
                              num_workers=args.num_workers,
                              collate_fn=collater,
                              sampler=train_sampler)
    if local_rank == 0:
        logger.info('finish loading data')

    model = centernet.__dict__[args.network](**{
        "pretrained": args.pretrained,
        "num_classes": args.num_classes,
    })

    for name, param in model.named_parameters():
        if local_rank == 0:
            logger.info(f"{name},{param.requires_grad}")

    flops_input = torch.randn(1, 3, args.input_image_size,
                              args.input_image_size)
    flops, params = profile(model, inputs=(flops_input, ))
    flops, params = clever_format([flops, params], "%.3f")
    if local_rank == 0:
        logger.info(
            f"model: '{args.network}', flops: {flops}, params: {params}")

    criterion = CenterNetLoss().cuda()
    decoder = CenterNetDecoder(image_w=args.input_image_size,
                               image_h=args.input_image_size).cuda()

    model = model.cuda()
    optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=args.milestones, gamma=0.1)

    if args.sync_bn:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    if args.apex:
        amp.register_float_function(torch, 'sigmoid')
        amp.register_float_function(torch, 'softmax')
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
        model = apex.parallel.DistributedDataParallel(model,
                                                      delay_allreduce=True)
        if args.sync_bn:
            model = apex.parallel.convert_syncbn_model(model)
    else:
        model = nn.parallel.DistributedDataParallel(model,
                                                    device_ids=[local_rank],
                                                    output_device=local_rank)

    if args.evaluate:
        if not os.path.isfile(args.evaluate):
            if local_rank == 0:
                logger.exception(
                    '{} is not a file, please check it again'.format(
                        args.resume))
            sys.exit(-1)
        if local_rank == 0:
            logger.info('start only evaluating')
            logger.info(f"start resuming model from {args.evaluate}")
        checkpoint = torch.load(args.evaluate,
                                map_location=torch.device('cpu'))
        model.load_state_dict(checkpoint['model_state_dict'])
        if local_rank == 0:
            logger.info(f"start eval.")
            all_eval_result = validate(Config.val_dataset, model, decoder)
            logger.info(f"eval done.")
            if all_eval_result is not None:
                logger.info(
                    f"val: epoch: {checkpoint['epoch']:0>5d}, IoU=0.5:0.95,area=all,maxDets=100,mAP:{all_eval_result[0]:.3f}, IoU=0.5,area=all,maxDets=100,mAP:{all_eval_result[1]:.3f}, IoU=0.75,area=all,maxDets=100,mAP:{all_eval_result[2]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAP:{all_eval_result[3]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAP:{all_eval_result[4]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAP:{all_eval_result[5]:.3f}, IoU=0.5:0.95,area=all,maxDets=1,mAR:{all_eval_result[6]:.3f}, IoU=0.5:0.95,area=all,maxDets=10,mAR:{all_eval_result[7]:.3f}, IoU=0.5:0.95,area=all,maxDets=100,mAR:{all_eval_result[8]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAR:{all_eval_result[9]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAR:{all_eval_result[10]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAR:{all_eval_result[11]:.3f}"
                )

        return

    best_map = 0.0
    start_epoch = 1
    # resume training
    if os.path.exists(args.resume):
        if local_rank == 0:
            logger.info(f"start resuming model from {args.resume}")
        checkpoint = torch.load(args.resume, map_location=torch.device('cpu'))
        start_epoch += checkpoint['epoch']
        best_map = checkpoint['best_map']
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        if local_rank == 0:
            logger.info(
                f"finish resuming model from {args.resume}, epoch {checkpoint['epoch']}, best_map: {checkpoint['best_map']}, "
                f"loss: {checkpoint['loss']:3f}, heatmap_loss: {checkpoint['heatmap_loss']:2f}, offset_loss: {checkpoint['offset_loss']:2f},wh_loss: {checkpoint['wh_loss']:2f}"
            )

    if local_rank == 0:
        if not os.path.exists(args.checkpoints):
            os.makedirs(args.checkpoints)

    if local_rank == 0:
        logger.info('start training')
    for epoch in range(start_epoch, args.epochs + 1):
        train_sampler.set_epoch(epoch)
        heatmap_losses, offset_losses, wh_losses, losses = train(
            train_loader, model, criterion, optimizer, scheduler, epoch, args)

        if local_rank == 0:
            logger.info(
                f"train: epoch {epoch:0>3d}, heatmap_loss: {heatmap_losses:.2f}, offset_loss: {offset_losses:.2f}, wh_loss: {wh_losses:.2f}, loss: {losses:.2f}"
            )

        if epoch % 5 == 0 or epoch == args.epochs:
            if local_rank == 0:
                logger.info(f"start eval.")
                all_eval_result = validate(Config.val_dataset, model, decoder)
                logger.info(f"eval done.")
                if all_eval_result is not None:
                    logger.info(
                        f"val: epoch: {epoch:0>5d}, IoU=0.5:0.95,area=all,maxDets=100,mAP:{all_eval_result[0]:.3f}, IoU=0.5,area=all,maxDets=100,mAP:{all_eval_result[1]:.3f}, IoU=0.75,area=all,maxDets=100,mAP:{all_eval_result[2]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAP:{all_eval_result[3]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAP:{all_eval_result[4]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAP:{all_eval_result[5]:.3f}, IoU=0.5:0.95,area=all,maxDets=1,mAR:{all_eval_result[6]:.3f}, IoU=0.5:0.95,area=all,maxDets=10,mAR:{all_eval_result[7]:.3f}, IoU=0.5:0.95,area=all,maxDets=100,mAR:{all_eval_result[8]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAR:{all_eval_result[9]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAR:{all_eval_result[10]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAR:{all_eval_result[11]:.3f}"
                    )
                    if all_eval_result[0] > best_map:
                        torch.save(model.module.state_dict(),
                                   os.path.join(args.checkpoints, "best.pth"))
                        best_map = all_eval_result[0]
        if local_rank == 0:
            torch.save(
                {
                    'epoch': epoch,
                    'best_map': best_map,
                    'heatmap_loss': heatmap_losses,
                    'offset_loss': offset_losses,
                    'wh_loss': wh_losses,
                    'loss': losses,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                }, os.path.join(args.checkpoints, 'latest.pth'))

    if local_rank == 0:
        logger.info(f"finish training, best_map: {best_map:.3f}")
    training_time = (time.time() - start_time) / 3600
    if local_rank == 0:
        logger.info(
            f"finish training, total training time: {training_time:.2f} hours")
Ejemplo n.º 20
0
def measure_model(model, IMAGE_SIZE1, IMAGE_SIZE2):
    inputs = torch.randn(1, 3, IMAGE_SIZE1, IMAGE_SIZE2)
    flops, params = profile(model, (inputs,))

    return flops, params
Ejemplo n.º 21
0
                               shuffle=False,
                               num_workers=16,
                               pin_memory=True)
    test_data = utils.CIFAR10Pair(root='data',
                                  train=False,
                                  transform=utils.test_transform,
                                  download=True)
    test_loader = DataLoader(test_data,
                             batch_size=batch_size,
                             shuffle=False,
                             num_workers=16,
                             pin_memory=True)

    # model setup and optimizer config
    model = Model(feature_dim).cuda()
    flops, params = profile(model, inputs=(torch.randn(1, 3, 32, 32).cuda(), ))
    flops, params = clever_format([flops, params])
    print('# Model Params: {} FLOPs: {}'.format(params, flops))
    optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-6)
    c = len(memory_data.classes)

    # training loop
    results = {'train_loss': [], 'test_acc@1': [], 'test_acc@5': []}
    save_name_pre = '{}_{}_{}_{}_{}'.format(feature_dim, temperature, k,
                                            batch_size, epochs)
    if not os.path.exists('results'):
        os.mkdir('results')
    best_acc = 0.0
    for epoch in range(1, epochs + 1):
        train_loss = train(model, train_loader, optimizer)
        results['train_loss'].append(train_loss)
Ejemplo n.º 22
0
import torch
from gluoncv.torch.model_zoo import get_model
from gluoncv.torch.engine.config import get_cfg_defaults

from thop import profile, clever_format

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Compute FLOPs of a model.')
    parser.add_argument('--config-file', type=str, help='path to config file.')
    parser.add_argument('--num-frames',
                        type=int,
                        default=32,
                        help='temporal clip length.')
    parser.add_argument('--input-size',
                        type=int,
                        default=224,
                        help='size of the input image size. default is 224')

    args = parser.parse_args()
    cfg = get_cfg_defaults()
    cfg.merge_from_file(args.config_file)

    model = get_model(cfg)
    input_tensor = torch.autograd.Variable(
        torch.rand(1, 3, args.num_frames, args.input_size, args.input_size))

    macs, params = profile(model, inputs=(input_tensor, ))
    macs, params = clever_format([macs, params], "%.3f")
    print("FLOPs: ", macs, "; #params: ", params)
Ejemplo n.º 23
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)
    num_gpus = torch.cuda.device_count()
    np.random.seed(args.seed)
    args.gpu = args.local_rank % num_gpus
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    cudnn.deterministic = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    torch.distributed.init_process_group(backend='nccl', init_method='env://')
    args.world_size = torch.distributed.get_world_size()
    args.batch_size = args.batch_size // args.world_size

    # The network architeture coding
    rngs = [int(id) for id in args.model_id.split(' ')]
    model = Network(rngs)
    op_flops_dict = pickle.load(open(config.flops_lookup_table, 'rb'))
    profile(model, config.model_input_size_imagenet, rngs=rngs)
    flops = get_arch_flops(op_flops_dict, rngs, config.backbone_info,
                           config.blocks_keys)
    params = count_parameters_in_MB(model)
    model = model.cuda(args.gpu)
    model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True)

    arch = model.module.architecture()
    logging.info('rngs:{}, arch:{}'.format(rngs, arch))
    logging.info("flops = %fMB, param size = %fMB", flops / 1e6, params)
    logging.info('batch_size:{}'.format(args.batch_size))

    criterion_smooth = CrossEntropyLabelSmooth(CLASSES, args.label_smooth)
    criterion_smooth = criterion_smooth.cuda()

    all_parameters = model.parameters()
    weight_parameters = []
    for pname, p in model.named_parameters():
        if p.ndimension(
        ) == 4 or 'classifier.0.weight' in pname or 'classifier.0.bias' in pname:
            weight_parameters.append(p)
    weight_parameters_id = list(map(id, weight_parameters))
    other_parameters = list(
        filter(lambda p: id(p) not in weight_parameters_id, all_parameters))

    optimizer = torch.optim.SGD(
        [{
            'params': other_parameters
        }, {
            'params': weight_parameters,
            'weight_decay': args.weight_decay
        }],
        args.learning_rate,
        momentum=args.momentum,
    )
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lambda step: (1.0 - step / args.total_iters), last_epoch=-1)

    # Prepare data
    train_loader = get_train_dataloader(args.train_dir, args.batch_size,
                                        args.local_rank, args.total_iters)
    train_dataprovider = DataIterator(train_loader)
    val_loader = get_val_dataloader(args.test_dir)
    val_dataprovider = DataIterator(val_loader)

    start_iter = 0
    best_acc_top1 = 0
    checkpoint_tar = os.path.join(args.save, 'checkpoint.pth.tar')
    if os.path.exists(checkpoint_tar):
        logging.info('loading checkpoint {} ..........'.format(checkpoint_tar))
        checkpoint = torch.load(
            checkpoint_tar,
            map_location={'cuda:0': 'cuda:{}'.format(args.local_rank)})
        start_iter = checkpoint['iters']
        best_acc_top1 = checkpoint['best_acc_top1']
        model.load_state_dict(checkpoint['state_dict'])
        logging.info("loaded checkpoint {} iters = {}".format(
            checkpoint_tar, checkpoint['iters']))

    for iters in range(start_iter):
        scheduler.step()

    # evaluation mode
    if args.eval:
        if args.eval_resume is not None:
            checkpoint = torch.load(args.eval_resume)
            model.load_state_dict(checkpoint['state_dict'])
            valid_acc_top1, valid_acc_top5 = infer(val_dataprovider,
                                                   model.module, val_iters)
            print('valid_acc_top1: {}'.format(valid_acc_top1))
        exit(0)

    iters = start_iter
    while iters < args.total_iters:
        train_iters = 10000
        train_acc, train_obj, iters= train(iters, train_dataprovider, model, criterion_smooth, \
          optimizer, train_iters, scheduler)
        writer.add_scalar('Train/Loss', train_obj, iters)
        writer.add_scalar('Train/LR', scheduler.get_lr()[0], iters)
        # torch.cuda.empty_cache()
        if args.local_rank == 0:
            valid_acc_top1, valid_acc_top5 = infer(val_dataprovider,
                                                   model.module, val_iters)

            is_best = False
            if valid_acc_top1 > best_acc_top1:
                best_acc_top1 = valid_acc_top1
                is_best = True

            logging.info(
                'valid_acc_top1: %f valid_acc_top5: %f best_acc_top1: %f',
                valid_acc_top1, valid_acc_top5, best_acc_top1)
            save_checkpoint_(
                {
                    'iters': iters,
                    'state_dict': model.state_dict(),
                    'best_acc_top1': best_acc_top1,
                    'optimizer': optimizer.state_dict(),
                }, args.save)
Ejemplo n.º 24
0
def main(pretrain=True):
    config.save = 'search-{}-{}'.format(config.save, time.strftime("%Y%m%d-%H%M%S"))
    create_exp_dir(config.save, scripts_to_save=glob.glob('*.py')+glob.glob('*.sh'))
    logger = SummaryWriter(config.save)

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(config.save, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    assert type(pretrain) == bool or type(pretrain) == str
    update_arch = True
    if pretrain == True:
        update_arch = False
    logging.info("args = %s", str(config))
    # preparation ################
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True
    seed = config.seed
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

    # config network and criterion ################
    min_kept = int(config.batch_size * config.image_height * config.image_width // (16 * config.gt_down_sampling ** 2))
    ohem_criterion = ProbOhemCrossEntropy2d(ignore_label=255, thresh=0.7, min_kept=min_kept, use_weight=False)

    # Model #######################################
    model = Network(config.num_classes, config.layers, ohem_criterion, Fch=config.Fch, width_mult_list=config.width_mult_list, prun_modes=config.prun_modes, stem_head_width=config.stem_head_width)
    flops, params = profile(model, inputs=(torch.randn(1, 3, 1024, 2048),), verbose=False)
    logging.info("params = %fMB, FLOPs = %fGB", params / 1e6, flops / 1e9)
    model = model.cuda()
    if type(pretrain) == str:
        partial = torch.load(pretrain + "/weights.pt", map_location='cuda:0')
        state = model.state_dict()
        pretrained_dict = {k: v for k, v in partial.items() if k in state and state[k].size() == partial[k].size()}
        state.update(pretrained_dict)
        model.load_state_dict(state)
    else:
        init_weight(model, nn.init.kaiming_normal_, nn.BatchNorm2d, config.bn_eps, config.bn_momentum, mode='fan_in', nonlinearity='relu')
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    architect = Architect(model, config)

    # Optimizer ###################################
    base_lr = config.lr
    parameters = []
    parameters += list(model.stem.parameters())
    parameters += list(model.cells.parameters())
    parameters += list(model.refine32.parameters())
    parameters += list(model.refine16.parameters())
    parameters += list(model.head0.parameters())
    parameters += list(model.head1.parameters())
    parameters += list(model.head2.parameters())
    parameters += list(model.head02.parameters())
    parameters += list(model.head12.parameters())
    optimizer = torch.optim.SGD(
        parameters,
        lr=base_lr,
        momentum=config.momentum,
        weight_decay=config.weight_decay)

    # lr policy ##############################
    lr_policy = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.978)

    # data loader ###########################
    data_setting = {'img_root': config.img_root_folder,
                    'gt_root': config.gt_root_folder,
                    'train_source': config.train_source,
                    'eval_source': config.eval_source,
                    'down_sampling': config.down_sampling}
    index_select = list(range(config.num_train_imgs))
    shuffle(index_select)  # shuffle to make sure balanced dataset split
    train_loader_model = get_train_loader(config, Cityscapes, portion=config.train_portion, index_select=index_select)
    train_loader_arch = get_train_loader(config, Cityscapes, portion=config.train_portion-1, index_select=index_select)

    evaluator = SegEvaluator(Cityscapes(data_setting, 'val', None), config.num_classes, config.image_mean,
                             config.image_std, model, config.eval_scale_array, config.eval_flip, 0, config=config,
                             verbose=False, save_path=None, show_image=False)

    if update_arch:
        for idx in range(len(config.latency_weight)):
            logger.add_scalar("arch/latency_weight%d"%idx, config.latency_weight[idx], 0)
            logging.info("arch_latency_weight%d = "%idx + str(config.latency_weight[idx]))

    tbar = tqdm(range(config.nepochs), ncols=80)
    valid_mIoU_history = []; FPSs_history = [];
    latency_supernet_history = []; latency_weight_history = [];
    valid_names = ["8s", "16s", "32s", "8s_32s", "16s_32s"]
    arch_names = {0: "teacher", 1: "student"}
    for epoch in tbar:
        logging.info(pretrain)
        logging.info(config.save)
        logging.info("lr: " + str(optimizer.param_groups[0]['lr']))

        logging.info("update arch: " + str(update_arch))

        # training
        tbar.set_description("[Epoch %d/%d][train...]" % (epoch + 1, config.nepochs))
        train(pretrain, train_loader_model, train_loader_arch, model, architect, ohem_criterion, optimizer, lr_policy, logger, epoch, update_arch=update_arch)
        torch.cuda.empty_cache()
        lr_policy.step()

        # validation
        tbar.set_description("[Epoch %d/%d][validation...]" % (epoch + 1, config.nepochs))
        with torch.no_grad():
            if pretrain == True:
                model.prun_mode = "min"
                valid_mIoUs = infer(epoch, model, evaluator, logger, FPS=False)
                for i in range(5):
                    logger.add_scalar('mIoU/val_min_%s'%valid_names[i], valid_mIoUs[i], epoch)
                    logging.info("Epoch %d: valid_mIoU_min_%s %.3f"%(epoch, valid_names[i], valid_mIoUs[i]))
                if len(model._width_mult_list) > 1:
                    model.prun_mode = "max"
                    valid_mIoUs = infer(epoch, model, evaluator, logger, FPS=False)
                    for i in range(5):
                        logger.add_scalar('mIoU/val_max_%s'%valid_names[i], valid_mIoUs[i], epoch)
                        logging.info("Epoch %d: valid_mIoU_max_%s %.3f"%(epoch, valid_names[i], valid_mIoUs[i]))
                    model.prun_mode = "random"
                    valid_mIoUs = infer(epoch, model, evaluator, logger, FPS=False)
                    for i in range(5):
                        logger.add_scalar('mIoU/val_random_%s'%valid_names[i], valid_mIoUs[i], epoch)
                        logging.info("Epoch %d: valid_mIoU_random_%s %.3f"%(epoch, valid_names[i], valid_mIoUs[i]))
            else:
                valid_mIoUss = []; FPSs = []
                model.prun_mode = None
                for idx in range(len(model._arch_names)):
                    # arch_idx
                    model.arch_idx = idx
                    valid_mIoUs, fps0, fps1 = infer(epoch, model, evaluator, logger)
                    valid_mIoUss.append(valid_mIoUs)
                    FPSs.append([fps0, fps1])
                    for i in range(5):
                        # preds
                        logger.add_scalar('mIoU/val_%s_%s'%(arch_names[idx], valid_names[i]), valid_mIoUs[i], epoch)
                        logging.info("Epoch %d: valid_mIoU_%s_%s %.3f"%(epoch, arch_names[idx], valid_names[i], valid_mIoUs[i]))
                    if config.latency_weight[idx] > 0:
                        logger.add_scalar('Objective/val_%s_8s_32s'%arch_names[idx], objective_acc_lat(valid_mIoUs[3], 1000./fps0), epoch)
                        logging.info("Epoch %d: Objective_%s_8s_32s %.3f"%(epoch, arch_names[idx], objective_acc_lat(valid_mIoUs[3], 1000./fps0)))
                        logger.add_scalar('Objective/val_%s_16s_32s'%arch_names[idx], objective_acc_lat(valid_mIoUs[4], 1000./fps1), epoch)
                        logging.info("Epoch %d: Objective_%s_16s_32s %.3f"%(epoch, arch_names[idx], objective_acc_lat(valid_mIoUs[4], 1000./fps1)))
                valid_mIoU_history.append(valid_mIoUss)
                FPSs_history.append(FPSs)
                if update_arch:
                    latency_supernet_history.append(architect.latency_supernet)
                latency_weight_history.append(architect.latency_weight)

        save(model, os.path.join(config.save, 'weights.pt'))
        if type(pretrain) == str:
            # contains arch_param names: {"alphas": alphas, "betas": betas, "gammas": gammas, "ratios": ratios}
            for idx, arch_name in enumerate(model._arch_names):
                state = {}
                for name in arch_name['alphas']:
                    state[name] = getattr(model, name)
                for name in arch_name['betas']:
                    state[name] = getattr(model, name)
                for name in arch_name['ratios']:
                    state[name] = getattr(model, name)
                state["mIoU02"] = valid_mIoUs[3]
                state["mIoU12"] = valid_mIoUs[4]
                if pretrain is not True:
                    state["latency02"] = 1000. / fps0
                    state["latency12"] = 1000. / fps1
                torch.save(state, os.path.join(config.save, "arch_%d_%d.pt"%(idx, epoch)))
                torch.save(state, os.path.join(config.save, "arch_%d.pt"%(idx)))

        if update_arch:
            for idx in range(len(config.latency_weight)):
                if config.latency_weight[idx] > 0:
                    if (int(FPSs[idx][0] >= config.FPS_max[idx]) + int(FPSs[idx][1] >= config.FPS_max[idx])) >= 1:
                        architect.latency_weight[idx] /= 2
                    elif (int(FPSs[idx][0] <= config.FPS_min[idx]) + int(FPSs[idx][1] <= config.FPS_min[idx])) > 0:
                        architect.latency_weight[idx] *= 2
                    logger.add_scalar("arch/latency_weight_%s"%arch_names[idx], architect.latency_weight[idx], epoch+1)
                    logging.info("arch_latency_weight_%s = "%arch_names[idx] + str(architect.latency_weight[idx]))
 # print('Total params: %f M' % (sum(p.numel() for p in model.parameters()) / 1024. / 1024.0))
 # print(len(list(model.modules())))
 # model =mobilenetv2_sandglass()
 # print('Total params: %f M' % (sum(p.numel() for p in model.parameters()) / 1024. / 1024.0))
 # print(len(list(model.modules())))
 # model = MobileNetV2_sandglass()
 # print('Total params: %f M' % (sum(p.numel() for p in model.parameters()) / 1024. / 1024.0))
 # print(len(list(model.modules())))
 # model = InvertedResidual(32, 32, 1, 6)
 # print('InvertedResidual params: %.f' % (sum(p.numel() for p in model.parameters())))
 # print(len(list(model.modules())))
 # print(model)
 # model = Sandglass(192, 192, 1, 6)
 # print('Sandglass params: %.f' % (sum(p.numel() for p in model.parameters())))
 # print(len(list(model.modules())))
 # # print(model)
 # model = My_Sandglass(192, 192, 1, 6)
 # print('Sandglass params: %.f' % (sum(p.numel() for p in model.parameters())))
 # print(len(list(model.modules())))
 # print(model)
 # model.eval()
 # # print(model)
 input = torch.randn(1, 3, 224, 224)
 # y = model(input)
 # # print(y.shape)
 # print('Total params: %f M' % (sum(p.numel() for p in model.parameters())/ 1024. / 1024.0))
 from thop import profile
 flops, params = profile(model, inputs=[input])
 print(flops)
 print(params)
 print('Total params: %f M' % (sum(p.numel() for p in model.parameters())))
Ejemplo n.º 26
0
 def _flops(h, w, C_in, C_out, stride=1):
     layer = FactorizedReduce(C_in, C_out, stride, slimmable=False)
     flops, params = profile(layer,
                             inputs=(torch.randn(1, C_in, h, w), ),
                             verbose=False)
     return flops
Ejemplo n.º 27
0
def count_flops(model, target_size):
  from thop import profile
  model_input = torch.randn(1, 3, target_size, target_size)
  flops, n_params = profile(model, inputs=(model_input, ), verbose=False)
  return flops, n_params
Ejemplo n.º 28
0
    write_voc_results_file(box_list, dataset)
    do_python_eval(output_dir)


if __name__ == '__main__':
    # load net
    num_classes = len(labelmap) + 1  # +1 for background
    net = build_ssd('test', 300, num_classes)  # initialize SSD

    # Load all of the weights into the network
    net.load_state_dict(torch.load(args.trained_model))

    # Get an estimated number of paramters and flops for the model
    if args.cuda == False:
        input = torch.randn(1, 3, 300, 300)
        flops, params = profile(net, inputs=(input, ), verbose=0)
        flops, params = clever_format([flops, params], "%.3f")
        print("\nFlops =", flops, "\nParams =", params, "\n")

    net.eval()
    print('Finished loading model!')
    # load data
    dataset = VOCDetection(args.voc_root, [('2007', set_type)],
                           BaseTransform(300, dataset_mean),
                           VOCAnnotationTransform())
    if args.cuda:
        net = net.cuda()
        cudnn.benchmark = True
    # evaluation
    test_net(args.save_folder,
             net,
Ejemplo n.º 29
0
def main_worker(gpu, opt, cfg):
    if opt.seed is not None:
        setup_seed(opt.seed)

    if gpu is not None:
        opt.gpu = gpu

    init_dist(opt)

    if not opt.log:
        logger.setLevel(50)
        null_writer = NullWriter()
        sys.stdout = null_writer

    logger.info('******************************')
    logger.info(opt)
    logger.info('******************************')
    logger.info(cfg)
    logger.info('******************************')

    opt.nThreads = int(opt.nThreads / num_gpu)

    # Model Initialize
    m = preset_model(cfg)
    if opt.params:
        from thop import clever_format, profile
        input = torch.randn(1, 3, 256, 256).cuda(opt.gpu)
        flops, params = profile(m.cuda(opt.gpu), inputs=(input, ))
        macs, params = clever_format([flops, params], "%.3f")
        logger.info(macs, params)

    m.cuda(opt.gpu)
    m = torch.nn.parallel.DistributedDataParallel(m, device_ids=[opt.gpu])

    criterion = builder.build_loss(cfg.LOSS).cuda(opt.gpu)
    optimizer = torch.optim.Adam(m.parameters(), lr=cfg.TRAIN.LR)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=cfg.TRAIN.LR_STEP, gamma=cfg.TRAIN.LR_FACTOR)

    if opt.log:
        writer = SummaryWriter('.tensorboard/{}/{}-{}'.format(
            cfg.DATASET.DATASET, cfg.FILE_NAME, opt.exp_id))
    else:
        writer = None

    if cfg.DATASET.DATASET == 'mix_smpl':
        train_dataset = MixDataset(cfg=cfg, train=True)
    else:
        raise NotImplementedError

    heatmap_to_coord = get_func_heatmap_to_coord(cfg)

    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset, num_replicas=opt.world_size, rank=opt.rank)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=cfg.TRAIN.BATCH_SIZE,
                                               shuffle=(train_sampler is None),
                                               num_workers=opt.nThreads,
                                               sampler=train_sampler,
                                               worker_init_fn=_init_fn)

    # gt val dataset
    gt_val_dataset_h36m = MixDataset(cfg=cfg, train=False)

    gt_val_dataset_3dpw = PW3D(cfg=cfg,
                               ann_file='3DPW_test_new.json',
                               train=False)

    opt.trainIters = 0
    best_err_h36m = 999
    best_err_3dpw = 999

    for i in range(cfg.TRAIN.BEGIN_EPOCH, cfg.TRAIN.END_EPOCH):
        opt.epoch = i
        train_sampler.set_epoch(i)

        current_lr = optimizer.state_dict()['param_groups'][0]['lr']

        logger.info(
            f'############# Starting Epoch {opt.epoch} | LR: {current_lr} #############'
        )

        # Training
        loss, acc17 = train(opt, train_loader, m, criterion, optimizer, writer)
        logger.epochInfo('Train', opt.epoch, loss, acc17)

        lr_scheduler.step()

        if (i + 1) % opt.snapshot == 0:
            if opt.log:
                # Save checkpoint
                torch.save(
                    m.module.state_dict(),
                    './exp/{}/{}-{}/model_{}.pth'.format(
                        cfg.DATASET.DATASET, cfg.FILE_NAME, opt.exp_id,
                        opt.epoch))
            # Prediction Test
            with torch.no_grad():
                gt_tot_err_h36m = validate_gt(m, opt, cfg, gt_val_dataset_h36m,
                                              heatmap_to_coord)
                gt_tot_err_3dpw = validate_gt(m, opt, cfg, gt_val_dataset_3dpw,
                                              heatmap_to_coord)
                if opt.log:
                    if gt_tot_err_h36m <= best_err_h36m:
                        best_err_h36m = gt_tot_err_h36m
                        torch.save(
                            m.module.state_dict(),
                            './exp/{}/{}-{}/best_h36m_model.pth'.format(
                                cfg.DATASET.DATASET, cfg.FILE_NAME,
                                opt.exp_id))
                    if gt_tot_err_3dpw <= best_err_3dpw:
                        best_err_3dpw = gt_tot_err_3dpw
                        torch.save(
                            m.module.state_dict(),
                            './exp/{}/{}-{}/best_3dpw_model.pth'.format(
                                cfg.DATASET.DATASET, cfg.FILE_NAME,
                                opt.exp_id))

                    logger.info(
                        f'##### Epoch {opt.epoch} | h36m err: {gt_tot_err_h36m} / {best_err_h36m} | 3dpw err: {gt_tot_err_3dpw} / {best_err_3dpw} #####'
                    )

        torch.distributed.barrier()  # Sync

    torch.save(
        m.module.state_dict(),
        './exp/{}/{}-{}/final_DPG.pth'.format(cfg.DATASET.DATASET,
                                              cfg.FILE_NAME, opt.exp_id))
Ejemplo n.º 30
0
from unet_parts import double_conv, double_dsconv
from unet_model import UNet, UNet_dsc
from torchsummary import summary
import torch 
from thop import profile

model = UNet(3, 2)
model_dsc = UNet_dsc(3, 2)

# summary(model.cuda(), (3, 512, 512))
# summary(model_dsc.cuda(), (3, 512, 512))
conv = double_conv(64, 128)
conv_dsc = double_dsconv(64, 128)

summary(conv.cuda(), (64,512,512))
summary(conv_dsc.cuda(), (64, 512, 512))

# torch.save(model.state_dict(), 'unet.pth')
# torch.save(model_dsc.state_dict(), 'unet_dsc.pth')

flops, params = profile(conv, input_size=(1, 64, 512, 512))
print(flops, params)
flops, params = profile(conv_dsc, input_size=(1, 64, 512, 512))
print(flops, params)