Ejemplo n.º 1
0
def train_Epoch(args, state_info, Train_loader, Test_loader):  # all
    start_time = time.time()
    best_prec_result = torch.tensor(0, dtype=torch.float32)
    mode = args.model
    utils.default_model_dir = os.path.join(args.dir, mode)

    start_epoch = 0
    checkpoint = None
    checkpoint = utils.load_checkpoint(utils.default_model_dir)
    if not checkpoint:
        args.last_epoch = -1
        state_info.learning_scheduler_init(args, mode)
    else:
        print("loading {}/{}".format(utils.default_model_dir,
                                     "checkpoint_best.pth.tar"))
        state_info.load_state_dict(checkpoint, mode)
        state_info.learning_scheduler_init(args, mode)
        utils.default_model_dir = os.path.join(utils.default_model_dir, "cls")

    for epoch in range(0, args.epoch):

        epoch_result = train(args, state_info, Train_loader, Test_loader,
                             epoch)

        if epoch_result > best_prec_result:
            best_prec_result = epoch_result
            utils.save_state_checkpoint(state_info, best_prec_result, epoch,
                                        'checkpoint_best.pth.tar',
                                        utils.default_model_dir)
            print('save..')

        if args.use_switch and epoch % args.iter == args.iter - 1:
            utils.switching_learning(state_info.model.module)
            print('learning Gate')
            epoch_result = train(args, state_info, Train_loader, Test_loader,
                                 epoch)

            if epoch_result > best_prec_result:
                best_prec_result = epoch_result
                utils.save_state_checkpoint(state_info, best_prec_result,
                                            epoch, 'checkpoint_best.pth.tar',
                                            utils.default_model_dir)
                print('save..')

            utils.switching_learning(state_info.model.module)
            print('learning Base')

        state_info.lr_model.step()
        utils.print_log('')

    now = time.gmtime(time.time() - start_time)
    utils.print_log('Best Prec : {:.4f}'.format(best_prec_result.item()))
    utils.print_log('{} hours {} mins {} secs for training'.format(
        now.tm_hour, now.tm_min, now.tm_sec))
Ejemplo n.º 2
0
def main(model_dir, model, dataset):
    utils.default_model_dir = model_dir
    utils.c = None
    utils.str_w = ''
    # model = model
    lr = 0.1
    start_time = time.time()

    if dataset == 'cifar10':
        train_loader, test_loader = utils.cifar10_loader()
    elif dataset == 'cifar100':
        train_loader, test_loader = utils.cifar100_loader()

    if torch.cuda.is_available():
        # os.environ["CUDA_VISIBLE_DEVICES"] = '0'
        print("USE", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model).cuda()
        cudnn.benchmark = True

    else:
        print("NO GPU -_-;")

    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss().cuda()

    start_epoch = 0
    checkpoint = utils.load_checkpoint(model_dir)
    
    if not checkpoint:
        pass
    else:
        start_epoch = checkpoint['epoch'] + 1
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])

    utils.init_learning(model.module)

    for epoch in range(start_epoch, 350):
        if epoch < 150:
            learning_rate = lr
        elif epoch < 250:
            learning_rate = lr * 0.1
        else:
            learning_rate = lr * 0.01
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate

        train(model, optimizer, criterion, train_loader, epoch, True)
        test(model, criterion, test_loader, epoch, True)

        utils.switching_learning(model.module)
        print('switching_learning to Gate')
        
        train(model, optimizer, criterion, train_loader, epoch, False)
        test(model, criterion, test_loader, epoch, False)        

        utils.switching_learning(model.module)
        print('switching_learning to Gate')

        if epoch % 5 == 0:
            model_filename = 'checkpoint_%03d.pth.tar' % epoch
            utils.save_checkpoint({
                'epoch': epoch,
                'model': model,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, model_filename, model_dir)

    now = time.gmtime(time.time() - start_time)
    weight_extract(model, optimizer, criterion, train_loader, epoch)
    utils.conv_weight_L1_printing(model.module)
    
    print('{} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))
Ejemplo n.º 3
0
def main(model_dir, model, dataset, layer_name, layer_n):
    utils.default_model_dir = model_dir
    utils.c = None
    utils.str_w = ''
    # model = model
    lr = 0.1
    start_time = time.time()

    if dataset == 'cifar10':
        train_loader, test_loader = utils.cifar10_loader()
    elif dataset == 'cifar100':
        train_loader, test_loader = utils.cifar100_loader()

    if torch.cuda.is_available():
        # os.environ["CUDA_VISIBLE_DEVICES"] = '0'
        print("USE", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model).cuda()
        cudnn.benchmark = True

    else:
        print("NO GPU -_-;")

    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss().cuda()

    start_epoch = 0
    checkpoint = utils.load_checkpoint(model_dir)
    
    if not checkpoint:
        pass
    else:
        start_epoch = checkpoint['epoch'] + 1
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])

    utils.init_learning(model.module)

    for epoch in range(start_epoch, 165): # change 165
        if epoch < 80:
            learning_rate = lr
        elif epoch < 120:
            learning_rate = lr * 0.1
        else:
            learning_rate = lr * 0.01
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate

        train(model, optimizer, criterion, train_loader, epoch, True)
        test(model, criterion, test_loader, epoch, True)

        utils.switching_learning(model.module)
        print('switching_learning to Gate')
        
        train(model, optimizer, criterion, train_loader, epoch, False)
        test(model, criterion, test_loader, epoch, False)        

        utils.switching_learning(model.module)
        print('switching_learning to Gate')

        model_filename = 'checkpoint_%03d.pth.tar' % epoch
        utils.save_checkpoint({
            'epoch': epoch,
            'model': model,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }, model_filename, model_dir)
            

    now = time.gmtime(time.time() - start_time)
    weight_extract(model, optimizer, criterion, train_loader, epoch)

    class_counter, class_weight_sum, class_average, total_average = utils.load_gate_csv()

    _, index = torch.sort(total_average)
    layer_name = utils.make_layer_name(layer_n)

    for i in index:
        # weight delete layer_name[i]
        utils.weight_pruning_by_name(model.module, layer_name[i])
        test(model, criterion, test_loader, epoch, True)

    change index[0] to 'layerN and layerN-M'
    # index about (smallest) index[0], index[1], .... (biggest)
    # layer name change, layer0 to layer'n-1'
    # find 'layer' + str(index[0]) from model.module
    # and change self.z to 0

    # utils.conv_weight_L1_printing(model.module)
    
    print('{} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))
Ejemplo n.º 4
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    args.distributed = args.world_size > 1

    # create model
    
    print("=> creating model '{}'".format(args.arch))
    
    model = vision_model.resnet50()

    model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    else:
        train_sampler = None

#    train_loader = torch.utils.data.DataLoader(
#        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
#        num_workers=args.workers, pin_memory=True, sampler=train_sampler)
    train_loader = imagenet_seq.data.Loader(
         'train', batch_size=args.batch_size, shuffle=(train_sampler is None), 
         num_workers=args.workers, cuda=True)

#    val_loader = torch.utils.data.DataLoader(
#        datasets.ImageFolder(valdir, transforms.Compose([
#            transforms.Resize(256),
#            transforms.CenterCrop(224),
#            transforms.ToTensor(),
#            normalize,
#        ])),
#        batch_size=args.batch_size, shuffle=False,
#        num_workers=args.workers, pin_memory=True)
    val_loader = imagenet_seq.data.Loader(
	'val', batch_size=args.batch_size, shuffle=False, 
	num_workers=args.workers, cuda=True)


    if args.evaluate:
        validate(val_loader, model, criterion, is_main=True)
        return

    utils.init_learning(model.module)

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, is_main=True)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, is_main=True)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.arch,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer' : optimizer.state_dict(),
        }, is_best)

        # if epoch % 3 == 2:
        #     for i in range(3):
        utils.switching_learning(model.module)

        train(train_loader, model, criterion, optimizer, epoch, is_main=False)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, is_main=False)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.arch,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer' : optimizer.state_dict(),
        }, is_best)

        utils.switching_learning(model.module)