Esempio n. 1
0
def setup_model(opa=False, dataset='imagenet', model_size='SMALL'):
    seed = 42
    restart_from = 50
    n_epochs = 100
    pretrained = False
    n_gpus = 1
    use_group_norm = False
    shine = False
    fpn = False
    adjoint_broyden = True
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    try:
        torch.multiprocessing.set_start_method('spawn')
    except RuntimeError:
        pass
    args = update_config_w_args(
        n_epochs=n_epochs,
        pretrained=pretrained,
        n_gpus=n_gpus,
        dataset=dataset,
        model_size=model_size,
        use_group_norm=use_group_norm,
    )
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

    logger, final_output_dir, tb_log_dir = create_logger(
        config,
        args.cfg,
        'train',
        shine=shine,
        fpn=fpn,
        seed=seed,
        use_group_norm=use_group_norm,
        adjoint_broyden=adjoint_broyden,
        opa=True,
    )

    model = eval('models.' + config.MODEL.NAME + '.get_cls_net')(
        config,
        shine=shine,
        fpn=fpn,
        gradient_correl=False,
        gradient_ratio=False,
        adjoint_broyden=adjoint_broyden,
        opa=opa,
    ).cuda()

    resume_file = f'checkpoint_{restart_from}.pth.tar'
    model_state_file = os.path.join(final_output_dir, resume_file)
    checkpoint = torch.load(model_state_file)
    model.load_state_dict(checkpoint['state_dict'])

    return model
Esempio n. 2
0
def train_classifier(
    n_epochs=100,
    pretrained=False,
    n_gpus=1,
    dataset='imagenet',
    model_size='SMALL',
    shine=False,
    fpn=False,
    gradient_correl=False,
    gradient_ratio=False,
    adjoint_broyden=False,
    opa=False,
    refine=False,
    n_refine=None,
    fallback=False,
    save_at=None,
    restart_from=None,
    use_group_norm=False,
    seed=0,
):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    args = update_config_w_args(
        n_epochs=n_epochs,
        pretrained=pretrained,
        n_gpus=n_gpus,
        dataset=dataset,
        model_size=model_size,
        use_group_norm=use_group_norm,
        n_refine=n_refine,
    )
    print(colored("Setting default tensor type to cuda.FloatTensor", "cyan"))
    try:
        torch.multiprocessing.set_start_method('spawn')
    except RuntimeError:
        pass
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

    logger, final_output_dir, tb_log_dir = create_logger(
        config,
        args.cfg,
        'train',
        shine=shine,
        fpn=fpn,
        seed=seed,
        use_group_norm=use_group_norm,
        adjoint_broyden=adjoint_broyden,
        opa=opa,
        refine=refine,
        n_refine=n_refine,
        fallback=fallback,
    )

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    model = eval('models.'+config.MODEL.NAME+'.get_cls_net')(
        config,
        shine=shine,
        fpn=fpn,
        gradient_correl=gradient_correl,
        gradient_ratio=gradient_ratio,
        adjoint_broyden=adjoint_broyden,
        refine=refine,
        fallback=fallback,
        opa=opa,
    ).cuda()

    # dump_input = torch.rand(config.TRAIN.BATCH_SIZE_PER_GPU, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0]).cuda()
    # logger.info(get_model_summary(model, dump_input))

    if config.TRAIN.MODEL_FILE:
        model.load_state_dict(torch.load(config.TRAIN.MODEL_FILE))
        logger.info(colored('=> loading model from {}'.format(config.TRAIN.MODEL_FILE), 'red'))

    # copy model file
    this_dir = os.path.dirname(__file__)
    models_dst_dir = os.path.join(final_output_dir, 'models')
    if os.path.exists(models_dst_dir):
        shutil.rmtree(models_dst_dir)

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    gpus = list(config.GPUS)
    model = nn.DataParallel(model, device_ids=gpus).cuda()
    print("Finished constructing model!")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = get_optimizer(config, model)
    lr_scheduler = None

    best_perf = 0.0
    best_model = False
    last_epoch = config.TRAIN.BEGIN_EPOCH
    if config.TRAIN.RESUME:
        if restart_from is None:
            resume_file = 'checkpoint.pth.tar'
        else:
            resume_file = f'checkpoint_{restart_from}.pth.tar'
        model_state_file = os.path.join(final_output_dir, resume_file)
        if os.path.isfile(model_state_file):
            checkpoint = torch.load(model_state_file)
            last_epoch = checkpoint['epoch']
            best_perf = checkpoint['perf']
            model.module.load_state_dict(checkpoint['state_dict'])

            # Update weight decay if needed
            checkpoint['optimizer']['param_groups'][0]['weight_decay'] = config.TRAIN.WD
            optimizer.load_state_dict(checkpoint['optimizer'])

            if 'lr_scheduler' in checkpoint:
                lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 1e5,
                                  last_epoch=checkpoint['lr_scheduler']['last_epoch'])
                lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
            logger.info("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
            best_model = True

    # Data loading code
    dataset_name = config.DATASET.DATASET

    if dataset_name == 'imagenet':
        traindir = os.path.join(config.DATASET.ROOT+'/images', config.DATASET.TRAIN_SET)
        valdir = os.path.join(config.DATASET.ROOT+'/images', config.DATASET.TEST_SET)
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(config.MODEL.IMAGE_SIZE[0]),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
        transform_valid = transforms.Compose([
            transforms.Resize(int(config.MODEL.IMAGE_SIZE[0] / 0.875)),
            transforms.CenterCrop(config.MODEL.IMAGE_SIZE[0]),
            transforms.ToTensor(),
            normalize,
        ])
        train_dataset = datasets.ImageFolder(traindir, transform_train)
        valid_dataset = datasets.ImageFolder(valdir, transform_valid)
    else:
        assert dataset_name == "cifar10", "Only CIFAR-10 is supported at this phase"
        classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')  # For reference

        normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
        augment_list = [transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip()] if config.DATASET.AUGMENT else []
        transform_train = transforms.Compose(augment_list + [
            transforms.ToTensor(),
            normalize,
        ])
        transform_valid = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])
        train_dataset = datasets.CIFAR10(root=f'{config.DATASET.ROOT}', train=True, download=True, transform=transform_train)
        valid_dataset = datasets.CIFAR10(root=f'{config.DATASET.ROOT}', train=False, download=True, transform=transform_valid)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN.BATCH_SIZE_PER_GPU*len(gpus),
        shuffle=True,
        num_workers=config.WORKERS,
        pin_memory=True,
        worker_init_fn=partial(worker_init_fn, seed=seed),
    )
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.TEST.BATCH_SIZE_PER_GPU*len(gpus),
        shuffle=False,
        num_workers=config.WORKERS,
        pin_memory=True,
        worker_init_fn=partial(worker_init_fn, seed=seed),
    )

    # Learning rate scheduler
    if lr_scheduler is None:
        if config.TRAIN.LR_SCHEDULER != 'step':
            lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer, len(train_loader)*config.TRAIN.END_EPOCH, eta_min=1e-6)
        elif isinstance(config.TRAIN.LR_STEP, list):
            lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
                optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR,
                last_epoch-1)
        else:
            lr_scheduler = torch.optim.lr_scheduler.StepLR(
                optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR,
                last_epoch-1)

    # Training code
    for epoch in range(last_epoch, config.TRAIN.END_EPOCH):
        topk = (1,5) if dataset_name == 'imagenet' else (1,)
        if config.TRAIN.LR_SCHEDULER == 'step':
            lr_scheduler.step()

        # train for one epoch
        train(config, train_loader, model, criterion, optimizer, lr_scheduler, epoch,
              final_output_dir, tb_log_dir, writer_dict, topk=topk, opa=opa)
        torch.cuda.empty_cache()

        # evaluate on validation set
        perf_indicator = validate(config, valid_loader, model, criterion, lr_scheduler, epoch,
                                  final_output_dir, tb_log_dir, writer_dict, topk=topk)
        torch.cuda.empty_cache()
        writer_dict['writer'].flush()

        if perf_indicator > best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        checkpoint_files = ['checkpoint.pth.tar']
        if save_at is not None and save_at == epoch:
            checkpoint_files.append(f'checkpoint_{epoch}.pth.tar')
        for checkpoint_file in checkpoint_files:
            save_checkpoint({
                'epoch': epoch + 1,
                'model': config.MODEL.NAME,
                'state_dict': model.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
            }, best_model, final_output_dir, filename=checkpoint_file)

    final_model_state_file = os.path.join(final_output_dir,
                                          'final_state.pth.tar')
    logger.info('saving final model state to {}'.format(
        final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
Esempio n. 3
0
def train_classifier(
        n_epochs=100,
        pretrained=False,
        n_gpus=1,
        dataset='imagenet',
        model_size='SMALL',
        shine=False,
        fpn=False,
        fallback=False,
        refine=False,
        n_refine=None,
        gradient_correl=False,
        gradient_ratio=False,
        save_at=None,
        restart_from=None,
        use_group_norm=False,
        seed=0,
        compute_partial=True,
        compute_total=True,
        f_thres_range=range(2, 200),
        n_samples=1,
):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    args = update_config_w_args(
        n_epochs=n_epochs,
        pretrained=pretrained,
        n_gpus=n_gpus,
        dataset=dataset,
        model_size=model_size,
        use_group_norm=use_group_norm,
        n_refine=n_refine,
    )
    print(colored("Setting default tensor type to cuda.FloatTensor", "cyan"))
    try:
        torch.multiprocessing.set_start_method('spawn')
    except RuntimeError:
        pass
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

    logger, final_output_dir, tb_log_dir = create_logger(
        config,
        args.cfg,
        'train',
        shine=shine,
        fpn=fpn,
        seed=seed,
        use_group_norm=use_group_norm,
        refine=refine,
        n_refine=n_refine,
        fallback=fallback,
    )

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    model = eval('models.' + config.MODEL.NAME + '.get_cls_net')(
        config,
        shine=shine,
        fpn=fpn,
        gradient_correl=gradient_correl,
        gradient_ratio=gradient_ratio,
        refine=refine,
        fallback=fallback,
    ).cuda()

    dump_input = torch.rand(config.TRAIN.BATCH_SIZE_PER_GPU, 3,
                            config.MODEL.IMAGE_SIZE[1],
                            config.MODEL.IMAGE_SIZE[0]).cuda()
    logger.info(get_model_summary(model, dump_input))

    if config.TRAIN.MODEL_FILE:
        model.load_state_dict(torch.load(config.TRAIN.MODEL_FILE))
        logger.info(
            colored('=> loading model from {}'.format(config.TRAIN.MODEL_FILE),
                    'red'))

    # copy model file
    models_dst_dir = os.path.join(final_output_dir, 'models')
    if os.path.exists(models_dst_dir):
        shutil.rmtree(models_dst_dir)

    writer_dict = None

    print("Finished constructing model!")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = get_optimizer(config, model)
    lr_scheduler = None

    best_perf = 0.0
    best_model = False
    last_epoch = config.TRAIN.BEGIN_EPOCH
    if config.TRAIN.RESUME:
        if restart_from is None:
            resume_file = 'checkpoint.pth.tar'
        else:
            resume_file = f'checkpoint_{restart_from}.pth.tar'
        model_state_file = os.path.join(final_output_dir, resume_file)
        if os.path.isfile(model_state_file):
            checkpoint = torch.load(model_state_file)
            last_epoch = checkpoint['epoch']
            best_perf = checkpoint['perf']
            model.load_state_dict(checkpoint['state_dict'])

            # Update weight decay if needed
            checkpoint['optimizer']['param_groups'][0][
                'weight_decay'] = config.TRAIN.WD
            optimizer.load_state_dict(checkpoint['optimizer'])

            if 'lr_scheduler' in checkpoint:
                lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                    optimizer,
                    1e5,
                    last_epoch=checkpoint['lr_scheduler']['last_epoch'])
                lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
            logger.info("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
            best_model = True

    # Data loading code
    dataset_name = config.DATASET.DATASET

    if dataset_name == 'imagenet':
        traindir = os.path.join(config.DATASET.ROOT + '/images',
                                config.DATASET.TRAIN_SET)
        valdir = os.path.join(config.DATASET.ROOT + '/images',
                              config.DATASET.TEST_SET)
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(config.MODEL.IMAGE_SIZE[0]),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
        train_dataset = datasets.ImageFolder(traindir, transform_train)
    else:
        assert dataset_name == "cifar10", "Only CIFAR-10 is supported at this phase"
        classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog',
                   'horse', 'ship', 'truck')  # For reference

        normalize = transforms.Normalize((0.4914, 0.4822, 0.4465),
                                         (0.2023, 0.1994, 0.2010))
        augment_list = [
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip()
        ] if config.DATASET.AUGMENT else []
        transform_train = transforms.Compose(augment_list + [
            transforms.ToTensor(),
            normalize,
        ])
        train_dataset = datasets.CIFAR10(root=f'{config.DATASET.ROOT}',
                                         train=True,
                                         download=True,
                                         transform=transform_train)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=0,
        pin_memory=False,
        worker_init_fn=partial(worker_init_fn, seed=seed),
    )

    # Learning rate scheduler
    if lr_scheduler is None:
        if config.TRAIN.LR_SCHEDULER != 'step':
            lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer,
                len(train_loader) * config.TRAIN.END_EPOCH,
                eta_min=1e-6)
        elif isinstance(config.TRAIN.LR_STEP, list):
            lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
                optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR,
                last_epoch - 1)
        else:
            lr_scheduler = torch.optim.lr_scheduler.StepLR(
                optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR,
                last_epoch - 1)
    # train for one epoch
    forward_accelerated_times = []
    backward_accelerated_times = []
    forward_original_times = []
    backward_original_times = []
    data_iter = iter(train_loader)
    for i_data in range(n_samples):
        input, target = next(data_iter)
        model.train()
        if compute_partial:
            model.deq.shine = shine
            model.deq.fpn = fpn
            model.deq.gradient_ratio = gradient_ratio
            model.deq.gradient_correl = gradient_correl
            model.deq.refine = refine
            model.deq.fallback = fallback
            for f_thres in f_thres_range:
                model.f_thres = f_thres
                start_forward = time.time()
                output = model(input.cuda(),
                               train_step=-(i_data + 1),
                               writer=None)
                end_forward = time.time()
                forward_accelerated_times.append(end_forward - start_forward)
                start_backward = time.time()
                target = target.cuda(non_blocking=True)

                loss = criterion(output, target)

                # compute gradient and do update step
                optimizer.zero_grad()
                loss.backward()
                end_backward = time.time()
                backward_accelerated_times.append(end_backward -
                                                  start_backward)

        if compute_total:
            # model.f_thres = 30
            model.deq.shine = False
            model.deq.fpn = False
            model.deq.gradient_ratio = False
            model.deq.gradient_correl = False
            model.deq.refine = False
            model.deq.fallback = False
            for f_thres in f_thres_range:
                model.f_thres = f_thres
                start_forward = time.time()
                output = model(input.cuda(),
                               train_step=-(i_data + 1),
                               writer=None)
                end_forward = time.time()
                forward_original_times.append(end_forward - start_forward)
                start_backward = time.time()
                target = target.cuda(non_blocking=True)

                loss = criterion(output, target)

                # compute gradient and do update step
                optimizer.zero_grad()
                loss.backward()
                end_backward = time.time()
                backward_original_times.append(end_backward - start_backward)
    method_name = Path(final_output_dir).name
    torch.save(torch.tensor(forward_accelerated_times),
               f'{method_name}_forward_times.pt')
    torch.save(torch.tensor(backward_accelerated_times),
               f'{method_name}_backward_times.pt')
    torch.save(torch.tensor(forward_original_times),
               f'{model_size}_original_forward_times.pt')
    torch.save(torch.tensor(backward_original_times),
               f'{model_size}_original_backward_times.pt')
    return np.median(backward_accelerated_times)
def eval_ratio_fb_classifier(
    n_epochs=100,
    pretrained=False,
    n_gpus=1,
    dataset='imagenet',
    model_size='SMALL',
    shine=False,
    fpn=False,
    gradient_correl=False,
    gradient_ratio=False,
    adjoint_broyden=False,
    opa=False,
    refine=False,
    fallback=False,
    save_at=None,
    restart_from=None,
    use_group_norm=False,
    seed=0,
    n_samples=1,
):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    args = update_config_w_args(
        n_epochs=n_epochs,
        pretrained=pretrained,
        n_gpus=n_gpus,
        dataset=dataset,
        model_size=model_size,
        use_group_norm=use_group_norm,
    )
    print(colored("Setting default tensor type to cuda.FloatTensor", "cyan"))
    try:
        torch.multiprocessing.set_start_method('spawn')
    except RuntimeError:
        pass

    torch.set_default_tensor_type('torch.cuda.FloatTensor')

    logger, final_output_dir, tb_log_dir = create_logger(
        config,
        args.cfg,
        'train',
        shine=shine,
        fpn=fpn,
        seed=seed,
        use_group_norm=use_group_norm,
        adjoint_broyden=adjoint_broyden,
        opa=opa,
        refine=refine,
        fallback=fallback,
    )

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    model = eval('models.' + config.MODEL.NAME + '.get_cls_net')(
        config,
        shine=shine,
        fpn=fpn,
        gradient_correl=gradient_correl,
        gradient_ratio=gradient_ratio,
        adjoint_broyden=adjoint_broyden,
        refine=refine,
        fallback=fallback,
        opa=opa,
    ).cuda()

    # dump_input = torch.rand(config.TRAIN.BATCH_SIZE_PER_GPU, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0]).cuda()
    # logger.info(get_model_summary(model, dump_input))

    if config.TRAIN.MODEL_FILE:
        model.load_state_dict(torch.load(config.TRAIN.MODEL_FILE))
        logger.info(
            colored('=> loading model from {}'.format(config.TRAIN.MODEL_FILE),
                    'red'))

    # copy model file
    models_dst_dir = os.path.join(final_output_dir, 'models')
    if os.path.exists(models_dst_dir):
        shutil.rmtree(models_dst_dir)

    gpus = list(config.GPUS)
    print("Finished constructing model!")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    last_epoch = config.TRAIN.BEGIN_EPOCH
    if config.TRAIN.RESUME:
        if restart_from is None:
            resume_file = 'checkpoint.pth.tar'
        else:
            resume_file = f'checkpoint_{restart_from}.pth.tar'
        model_state_file = os.path.join(final_output_dir, resume_file)
        if os.path.isfile(model_state_file):
            checkpoint = torch.load(model_state_file)
            last_epoch = checkpoint['epoch']
            best_perf = checkpoint['perf']
            model.load_state_dict(checkpoint['state_dict'])

    # Data loading code
    dataset_name = config.DATASET.DATASET

    if dataset_name == 'imagenet':
        traindir = os.path.join(config.DATASET.ROOT + '/images',
                                config.DATASET.TRAIN_SET)
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(config.MODEL.IMAGE_SIZE[0]),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
        train_dataset = datasets.ImageFolder(traindir, transform_train)
    else:
        assert dataset_name == "cifar10", "Only CIFAR-10 is supported at this phase"
        classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog',
                   'horse', 'ship', 'truck')  # For reference

        normalize = transforms.Normalize((0.4914, 0.4822, 0.4465),
                                         (0.2023, 0.1994, 0.2010))
        augment_list = [
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip()
        ] if config.DATASET.AUGMENT else []
        transform_train = transforms.Compose(augment_list + [
            transforms.ToTensor(),
            normalize,
        ])
        train_dataset = datasets.CIFAR10(root=f'{config.DATASET.ROOT}',
                                         train=True,
                                         download=True,
                                         transform=transform_train)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN.BATCH_SIZE_PER_GPU * len(gpus),
        shuffle=True,
        num_workers=10,
        pin_memory=True,
        worker_init_fn=partial(worker_init_fn, seed=seed),
    )

    iter_loader = iter(train_loader)
    ratios = []
    with torch.autograd.profiler.profile(use_cuda=True) as prof:
        for i_sample in range(n_samples):
            with profiler.record_function("Data loading"):
                input, target = next(iter_loader)
                input = input.cuda(non_blocking=False)
            with profiler.record_function("Feature extraction PASS"):
                x_list, z_list = model.feature_extraction(input)
            # For variational dropout mask resetting and weight normalization re-computations
            model.fullstage._reset(z_list)
            model.fullstage_copy._copy(model.fullstage)
            x_list = [x.clone().detach().requires_grad_() for x in x_list]
            z_list = [z.clone().detach().requires_grad_() for z in z_list]
            with profiler.record_function("Forward PASS"):
                start_forward = time.time()
                with torch.no_grad():
                    model.fullstage_copy(z_list, x_list)
                    torch.cuda.synchronize()
                end_forward = time.time()
                time_forward = end_forward - start_forward
            with profiler.record_function("Forward PASS enable grad"):
                z_list_new = model.fullstage_copy(z_list, x_list)
            z = DEQFunc2d.list2vec(z_list_new)
            with profiler.record_function("Backward PASS"):
                start_backward = time.time()
                z.backward(z)
                torch.cuda.synchronize()
                end_backward = time.time()
            time_backward = end_backward - start_backward
            ratios.append(time_backward / time_forward)
    print(prof.key_averages().table(sort_by="cuda_time_total"))
    prof.export_chrome_trace("trace.json")
    return ratios
Esempio n. 5
0
def evaluate_classifier(
    n_gpus=1,
    dataset='imagenet',
    model_size='SMALL',
    shine=False,
    fpn=False,
    refine=False,
    n_refine=None,
    fallback=False,
    n_samples=None,
    seed=0,
    adjoint_broyden=False,
    opa=False,
    use_group_norm=False,
    check_contract=False,
    n_iter=20,
):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    args = update_config_w_args(
        n_gpus=n_gpus,
        dataset=dataset,
        model_size=model_size,
        use_group_norm=use_group_norm,
    )

    logger, final_output_dir, tb_log_dir = create_logger(
        config,
        args.cfg,
        'valid',
        shine=shine,
        fpn=fpn,
        seed=seed,
        use_group_norm=use_group_norm,
        adjoint_broyden=adjoint_broyden,
        opa=opa,
        refine=refine,
        n_refine=n_refine,
        fallback=fallback,
    )

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    model = eval('models.' + config.MODEL.NAME + '.get_cls_net')(
        config,
        shine=shine,
        fpn=fpn,
        refine=refine,
        fallback=fallback,
        adjoint_broyden=adjoint_broyden)

    dump_input = torch.rand(
        (1, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0]))
    logger.info(get_model_summary(model, dump_input))

    if config.TEST.MODEL_FILE:
        logger.info('=> loading model from {}'.format(config.TEST.MODEL_FILE))
        model.load_state_dict(torch.load(config.TEST.MODEL_FILE))
    else:
        model_state_file = os.path.join(final_output_dir,
                                        'final_state.pth.tar')
        logger.info('=> loading model from {}'.format(model_state_file))
        model.load_state_dict(torch.load(model_state_file))

    gpus = list(config.GPUS)
    model = torch.nn.DataParallel(model, device_ids=gpus).cuda()

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss().cuda()

    # Data loading code
    dataset_name = config.DATASET.DATASET

    if dataset_name == 'imagenet':
        valdir = os.path.join(config.DATASET.ROOT + '/images',
                              config.DATASET.TEST_SET)
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        transform_valid = transforms.Compose([
            transforms.Resize(int(config.MODEL.IMAGE_SIZE[0] / 0.875)),
            transforms.CenterCrop(config.MODEL.IMAGE_SIZE[0]),
            transforms.ToTensor(),
            normalize,
        ])
        valid_dataset = datasets.ImageFolder(valdir, transform_valid)
    else:
        assert dataset_name == "cifar10", "Only CIFAR-10 is supported at this phase"
        normalize = transforms.Normalize((0.4914, 0.4822, 0.4465),
                                         (0.2023, 0.1994, 0.2010))
        transform_valid = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])
        valid_dataset = datasets.CIFAR10(root=f'{config.DATASET.ROOT}',
                                         train=False,
                                         download=True,
                                         transform=transform_valid)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.TEST.BATCH_SIZE_PER_GPU *
        len(gpus) if not check_contract else 1,
        shuffle=False,
        num_workers=config.WORKERS,
        pin_memory=True)

    # evaluate on validation set
    if not check_contract:
        return 'top1', validate(
            config,
            valid_loader,
            model,
            criterion,
            None,
            None,
            final_output_dir,
            tb_log_dir,
            None,
        ).cpu().numpy()
    else:
        return 'maxeigen', validate_contractivity(
            valid_loader,
            model.module.cuda(),
            n_iter=n_iter,
        ).cpu().numpy()