Beispiel #1
0
def main(model_dir, model, dataset):
    utils.default_model_dir = model_dir
    # model = model
    lr = 0.1
    start_time = time.time()

    if dataset == 'cifar10':
        train_loader, test_loader = utils.cifar10_loader()
    elif dataset == 'cifar100':
        train_loader, test_loader = utils.cifar100_loader()
    

    if torch.cuda.is_available():
        # os.environ["CUDA_VISIBLE_DEVICES"] = '0'
        print("USE", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model).cuda()
        cudnn.benchmark = True

    else:
        print("NO GPU -_-;")

    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss().cuda()

    start_epoch = 0
    checkpoint = utils.load_checkpoint(model_dir)
    
    if not checkpoint:
        pass
    else:
        start_epoch = checkpoint['epoch'] + 1
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])

    for epoch in range(start_epoch, 165):
        if epoch < 80:
            learning_rate = lr
        elif epoch < 120:
            learning_rate = lr * 0.1
        else:
            learning_rate = lr * 0.01
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate

        train(model, optimizer, criterion, train_loader, epoch)
        test(model, criterion, test_loader, epoch)

        if epoch % 5 == 0:
            model_filename = 'checkpoint_%03d.pth.tar' % epoch
            utils.save_checkpoint({
                'epoch': epoch,
                'model': model,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, model_filename, model_dir)

    utils.conv_weight_L1_printing(model.module)
    now = time.gmtime(time.time() - start_time)
    print('{} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))
def main(model_dir, model, dataset):
    utils.default_model_dir = model_dir
    utils.c = None
    utils.str_w = ''
    # model = model
    lr = 0.1
    start_time = time.time()

    if dataset == 'cifar10':
        train_loader, test_loader = utils.cifar10_loader()
    elif dataset == 'cifar100':
        train_loader, test_loader = utils.cifar100_loader()

    if torch.cuda.is_available():
        # os.environ["CUDA_VISIBLE_DEVICES"] = '0'
        print("USE", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model).cuda()
        cudnn.benchmark = True

    else:
        print("NO GPU -_-;")

    optimizer = optim.SGD(model.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss().cuda()

    start_epoch = 0
    checkpoint = utils.load_checkpoint(model_dir)

    if not checkpoint:
        pass
    else:
        start_epoch = checkpoint['epoch'] + 1
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])

    utils.init_learning(model.module)

    weight_extract_test(model, criterion, test_loader)
def main(model_dir, model, dataset, layer_name, layer_n):
    utils.default_model_dir = model_dir
    utils.c = None
    utils.str_w = ''
    # model = model
    lr = 0.1
    start_time = time.time()

    if dataset == 'cifar10':
        train_loader, test_loader = utils.cifar10_loader()
    elif dataset == 'cifar100':
        train_loader, test_loader = utils.cifar100_loader()

    if torch.cuda.is_available():
        # os.environ["CUDA_VISIBLE_DEVICES"] = '0'
        print("USE", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model).cuda()
        cudnn.benchmark = True

    else:
        print("NO GPU -_-;")

    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss().cuda()

    start_epoch = 0
    checkpoint = utils.load_checkpoint(model_dir)
    
    if not checkpoint:
        pass
    else:
        start_epoch = checkpoint['epoch'] + 1
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])

    utils.init_learning(model.module)

    for epoch in range(start_epoch, 165): # change 165
        if epoch < 80:
            learning_rate = lr
        elif epoch < 120:
            learning_rate = lr * 0.1
        else:
            learning_rate = lr * 0.01
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate

        train(model, optimizer, criterion, train_loader, epoch, True)
        test(model, criterion, test_loader, epoch, True)

        utils.switching_learning(model.module)
        print('switching_learning to Gate')
        
        train(model, optimizer, criterion, train_loader, epoch, False)
        test(model, criterion, test_loader, epoch, False)        

        utils.switching_learning(model.module)
        print('switching_learning to Gate')

        model_filename = 'checkpoint_%03d.pth.tar' % epoch
        utils.save_checkpoint({
            'epoch': epoch,
            'model': model,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }, model_filename, model_dir)
            

    now = time.gmtime(time.time() - start_time)
    weight_extract(model, optimizer, criterion, train_loader, epoch)

    class_counter, class_weight_sum, class_average, total_average = utils.load_gate_csv()

    _, index = torch.sort(total_average)
    layer_name = utils.make_layer_name(layer_n)

    for i in index:
        # weight delete layer_name[i]
        utils.weight_pruning_by_name(model.module, layer_name[i])
        test(model, criterion, test_loader, epoch, True)

    change index[0] to 'layerN and layerN-M'
    # index about (smallest) index[0], index[1], .... (biggest)
    # layer name change, layer0 to layer'n-1'
    # find 'layer' + str(index[0]) from model.module
    # and change self.z to 0

    # utils.conv_weight_L1_printing(model.module)
    
    print('{} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))
Beispiel #4
0
def main(model_dir, model, dataset, layer_n, reversed=False):
    utils.default_model_dir = model_dir
    utils.c = None
    utils.str_w = ''
    # model = model
    lr = 0.1
    start_time = time.time()

    if dataset == 'cifar10':
        train_loader, test_loader = utils.cifar10_loader()
    elif dataset == 'cifar100':
        train_loader, test_loader = utils.cifar100_loader()

    if torch.cuda.is_available():
        # os.environ["CUDA_VISIBLE_DEVICES"] = '0'
        print("USE", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model).cuda()
        cudnn.benchmark = True

    else:
        print("NO GPU -_-;")

    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss().cuda()

    start_epoch = 0
    checkpoint = utils.load_checkpoint(model_dir)
    
    if not checkpoint:
        pass
    else:
        start_epoch = checkpoint['epoch'] + 1
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])

    utils.init_learning(model.module)

    # for epoch in range(start_epoch, 165): # change 165
    #     if epoch < 80:
    #         learning_rate = lr
    #     elif epoch < 120:
    #         learning_rate = lr * 0.1
    #     else:
    #         learning_rate = lr * 0.01
    #     for param_group in optimizer.param_groups:
    #         param_group['lr'] = learning_rate

    #     train(model, optimizer, criterion, train_loader, epoch, True)
    #     test(model, criterion, test_loader, epoch, True)

    #     utils.switching_learning(model.module)
    #     print('switching_learning to Gate')
        
    #     train(model, optimizer, criterion, train_loader, epoch, False)
    #     test(model, criterion, test_loader, epoch, False)        

    #     utils.switching_learning(model.module)
    #     print('switching_learning to Gate')

    #     model_filename = 'checkpoint_%03d.pth.tar' % epoch
    #     utils.save_checkpoint({
    #         'epoch': epoch,
    #         'model': model,
    #         'state_dict': model.state_dict(),
    #         'optimizer': optimizer.state_dict(),
    #     }, model_filename, model_dir)
    # now = time.gmtime(time.time() - start_time)

    utils.del_csv_weight_for_test()
            

    weight_extract(model, optimizer, criterion, train_loader, 160)

    class_counter, class_weight_sum, class_average, total_average = utils.load_gate_csv()

    if reversed is True:
        _, index = torch.sort(total_average, descending=True)
    else:
        _, index = torch.sort(total_average)
    layer_name = utils.make_layer_name(layer_n)


    for i in index:
        # weight delete layer_name[i]
        utils.weight_pruning_by_name(model.module, layer_name[i])
        test(model, criterion, test_loader, 160, True)