Beispiel #1
0
def load_data(dataset, root, batch_size, num_workers):
    """
    Load dataset.

    Args
        dataset(str): Dataset name.
        root(str): Path of dataset.
        num_workers(int): Number of loading data threads.

    Returns
        train_dataloader, query_dataloader, retrieval_dataloader(torch.utils.data.DataLoader): Data loader.
    """
    if dataset == 'cifar-10':
        train_dataloader, query_dataloader, retrieval_dataloader = cifar10.load_data(
            root,
            batch_size,
            num_workers,
        )
    elif dataset == 'nus-wide-tc21':
        train_dataloader, query_dataloader, retrieval_dataloader = nuswide.load_data(
            root, batch_size, num_workers)
    elif dataset == 'imagenet-tc100':
        train_dataloader, query_dataloader, retrieval_dataloader = imagenet.load_data(
            root,
            batch_size,
            num_workers,
        )
    else:
        raise ValueError("Invalid dataset name!")

    return train_dataloader, query_dataloader, retrieval_dataloader
Beispiel #2
0
def main():
    depth = 64
    bin_quantile = 0.2
    model_pickle = f'models/nestedDropoutAutoencoder_deep_ReLU_21-01-07__01-18-13.pkl'

    dataloader = cifar10.get_dataloader(download=True)
    device = utils.get_device()
    data = cifar10.load_data(dataloader)
    print('Data loaded')

    autoencoder: Autoencoder = torch.load(model_pickle, map_location=device)
    autoencoder.eval()
    print('Model loaded')

    representation = utils.get_data_representation(autoencoder, dataloader,
                                                   device)
    del autoencoder
    data_repr = utils.binarize_data(representation, bin_quantile).cpu()
    print('Data representation created')

    binary_tree = BinaryTree(data, data_repr, tree_depth=depth)
    print(f'Binary tree created, with {binary_tree.get_num_nodes():,} nodes')
    pickle.dump({
        'binary tree': binary_tree,
        'data_repr': data_repr
    }, open(f'pickles/binary_tree_{depth}.pkl', 'wb'))
    print('The binary tree has been saved')
Beispiel #3
0
    def __init__(self, args, logger=None):
        self.args = args
        self.logger = logger

        # load training data
        self.input_image_size = {
            "cifar10": 32,  # CIFAR-10
            "imagenet": 224,  # ImageNet
        }[self.args.dataset]

        if self.args.dataset == 'cifar10':
            self.train_loader, self.val_loader = cifar10.load_data(self.args)
        else:
            data_tmp = imagenet.Data(args)
            self.train_loader = data_tmp.train_loader
            self.val_loader = data_tmp.test_loader

        #Cifar-10
        self.criterion = nn.CrossEntropyLoss().cuda()
        #ImageNet
        CLASSES = 1000  #label_smooth: 0.1
        self.criterion_smooth = utils.CrossEntropyLabelSmooth(CLASSES,
                                                              0.1).cuda()

        self.load_model(self.get_prune_ratio())
Beispiel #4
0
def load_data(dataset, root, num_seen, batch_size, num_workers):
    """
    Load dataset.

    Args
        dataset(str): Dataset name.
        root(str): Path of dataset.
        num_seen(int): Number of seen classes.
        num_workers(int): Number of loading data threads.

    Returns
        query_dataloader, seen_dataloader, unseen_dataloader(torch.utils.data.DataLoader): Data loader.
    """
    if dataset == 'cifar-10':
        query_dataloader, seen_dataloader, unseen_dataloader, retrieval_dataloader = cifar10.load_data(root,
                                                                                                       num_seen,
                                                                                                       batch_size,
                                                                                                       num_workers,
                                                                                                       )
    # elif dataset == 'nus-wide-tc21':
    #     query_dataloader, seen_dataloader, unseen_dataloader, retrieval_dataloader = nuswide.load_data(root,
    #                                                                                                    num_seen,
    #                                                                                                    batch_size,
    #                                                                                                    num_workers,
    #                                                                                                    )
    else:
        raise ValueError("Invalid dataset name!")

    return query_dataloader, seen_dataloader, unseen_dataloader, retrieval_dataloader
Beispiel #5
0
def load_data(dataset, root, num_query, num_train, batch_size, num_workers):
    """
    Load dataset.

    Args
        dataset(str): Dataset name.
        root(str): Path of dataset.
        num_query(int): Number of query data points.
        num_train(int): Number of training data points.
        num_workers(int): Number of loading data threads.

    Returns
        query_dataloader, train_dataloader, retrieval_dataloader(torch.utils.data.DataLoader): Data loader.
    """
    if dataset == 'cifar-10':
        query_dataloader, train_dataloader, retrieval_dataloader = cifar10.load_data(root,
                                                                                     num_query,
                                                                                     num_train,
                                                                                     batch_size,
                                                                                     num_workers,
                                                                                     )
    elif dataset == 'nus-wide-tc10':
        query_dataloader, train_dataloader, retrieval_dataloader = nuswide.load_data(10,
                                                                                     root,
                                                                                     num_query,
                                                                                     num_train,
                                                                                     batch_size,
                                                                                     num_workers,
                                                                                     )
    elif dataset == 'nus-wide-tc21':
        query_dataloader, train_dataloader, retrieval_dataloader = nuswide.load_data(21,
                                                                                     root,
                                                                                     num_query,
                                                                                     num_train,
                                                                                     batch_size,
                                                                                     num_workers
                                                                                     )
    elif dataset == 'flickr25k':
        query_dataloader, train_dataloader, retrieval_dataloader = flickr25k.load_data(root,
                                                                                       num_query,
                                                                                       num_train,
                                                                                       batch_size,
                                                                                       num_workers,
                                                                                       )
    elif dataset == 'imagenet':
        query_dataloader, train_dataloader, retrieval_dataloader = imagenet.load_data(root,
                                                                                      batch_size,
                                                                                      num_workers,
                                                                                      )
    else:
        raise ValueError("Invalid dataset name!")

    return query_dataloader, train_dataloader, retrieval_dataloader
def get_data(dataset="mnist",
             dataformat="NHWC",
             path=os.path.expanduser("~/.datasets/")):
    # the data, shuffled and split between train and test sets
    if not os.path.exists(path):
        os.makedirs(path)
    if dataset == "mnist":
        (x_train,
         y_train), (x_test,
                    y_test), input_shape, labels = mnist.load_data(path)
    elif dataset == "fashion_mnist":
        (x_train, y_train), (
            x_test,
            y_test), input_shape, labels = fashion_mnist.load_data(path)
    elif dataset == "cifar10":
        (x_train,
         y_train), (x_test,
                    y_test), input_shape, labels = cifar10.load_data(path)
    elif dataset == "mnist_rot":
        x_train, x_test, y_train, y_test, input_shape, labels = mnist_rot.load_data(
            path)
    elif dataset == "cluttered_mnist":
        (x_train, y_train), (
            x_test,
            y_test), input_shape, labels = cluttered_mnist.load_data(path)
    elif dataset == "lsa16":
        x_train, x_test, y_train, y_test,input_shape,labels \
            = lsa16.load_data(path,version="lsa32x32_nr_rgb_black_background",test_subjects=[9])
    elif dataset == "pugeault":
        x_train, x_test, y_train, y_test, input_shape, labels = pugeault.load_data(
            path)
    elif dataset == "irish":
        x_train, x_test, y_train, y_test, input_shape, labels = irish.load_data(
            path)
    else:
        raise ValueError("Unknown dataset: %s" % dataset)

    if dataformat == 'NCHW':
        x_train, x_test = x_train.transpose([0, 3, 1, 2
                                             ]), x_test.transpose([0, 3, 1, 2])
    elif dataformat == "NHWC":
        pass  #already in this format
    else:
        raise ValueError("Invalid channel format %s" % dataformat)

    num_classes = len(labels)
    # convert class vectors to binary class matrices
    y_train = to_categorical(y_train, num_classes)
    y_test = to_categorical(y_test, num_classes)

    return (x_train, y_train), (x_test,
                                y_test), input_shape, len(labels), labels
Beispiel #7
0
def load_data(opt):
    """加载数据
    Parameters
        opt: Parser
        参数
    Returns
        DataLoader
        数据加载器
    """
    if opt.dataset == 'cifar10':
        return cifar10.load_data(opt)
    elif opt.dataset == 'nus-wide':
        return nus_wide.load_data(opt)
def test_retrieval_times():
    binary_tree_pickle = 'pickles/binary_tree_64.pkl'
    current_time = utils.get_current_time()

    data = cifar10.load_data()
    print('Data loaded')

    pickle_dict = pickle.load(open(binary_tree_pickle, 'rb'))
    binary_tree = pickle_dict['binary tree']
    binarized_repr = pickle_dict['data_repr']
    print('Binary tree loaded')

    repr_dim = min(binarized_repr.shape[1], binary_tree.get_depth())

    #       binary tree retrieval
    def tree_search_i(sample, i):
        return binary_tree.search_tree(list(sample)[:i], max_depth=i)

    tree_search_times = evaluate_retrieval_method(binarized_repr,
                                                  tree_search_i, repr_dim)
    pickle.dump(tree_search_times,
                open(f'pickles/or_retrieval_times_{current_time}.pkl', 'wb'))

    #       linear scan
    def linear_scan_i(sample, i):
        binarized_repr_i = binarized_repr[:, :i].view(len(binarized_repr), -1)
        return linear_scan(sample[:i], data, binarized_repr_i)

    linear_scan_times = evaluate_retrieval_method(binarized_repr,
                                                  linear_scan_i, repr_dim)
    pickle.dump(linear_scan_times,
                open(f'pickles/ls_retrieval_times_{current_time}.pkl', 'wb'))

    # tree_search_times = pickle.load(open('pickles/or_retrieval_times_21-01-14__16-18-02.pkl', 'rb'))
    # linear_scan_times = pickle.load(open('pickles/ls_retrieval_times_21-01-14__13-03-04.pkl', 'rb'))

    # plotting
    plt.plot(tree_search_times.keys(),
             tree_search_times.values(),
             label='Tree Search')
    plt.plot(linear_scan_times.keys(),
             linear_scan_times.values(),
             label='Linear Scan')
    plt.xlabel('Code Length')
    plt.ylabel('Average retrieval time per query')
    plt.title('Retrieval time per code length')
    plt.yticks(list(tree_search_times.keys()))
    plt.yscale('log')
    plt.legend()
    plt.savefig('plots/retrieval_times')
    plt.show()
Beispiel #9
0
def main():

    cudnn.benchmark = True
    cudnn.enabled=True
    logger.info("args = %s", args)

    if args.compress_rate:
        import re
        cprate_str = args.compress_rate
        cprate_str_list = cprate_str.split('+')
        pat_cprate = re.compile(r'\d+\.\d*')
        pat_num = re.compile(r'\*\d+')
        cprate = []
        for x in cprate_str_list:
            num = 1
            find_num = re.findall(pat_num, x)
            if find_num:
                assert len(find_num) == 1
                num = int(find_num[0].replace('*', ''))
            find_cprate = re.findall(pat_cprate, x)
            assert len(find_cprate) == 1
            cprate += [float(find_cprate[0])] * num

        compress_rate = cprate

    # load model
    logger.info('compress_rate:' + str(compress_rate))
    logger.info('==> Building model..')
    model = eval(args.arch)(compress_rate=compress_rate).cuda()
    logger.info(model)

    #calculate model size
    input_image_size=32
    input_image = torch.randn(1, 3, input_image_size, input_image_size).cuda()
    flops, params = profile(model, inputs=(input_image,))
    logger.info('Params: %.2f' % (params))
    logger.info('Flops: %.2f' % (flops))

    # load training data
    train_loader, val_loader = cifar10.load_data(args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    if args.test_only:
        if os.path.isfile(args.test_model_dir):
            logger.info('loading checkpoint {} ..........'.format(args.test_model_dir))
            checkpoint = torch.load(args.test_model_dir)
            model.load_state_dict(checkpoint['state_dict'])
            valid_obj, valid_top1_acc, valid_top5_acc = validate(0, val_loader, model, criterion, args)
        else:
            logger.info('please specify a checkpoint file')
        return

    if len(args.gpu) > 1:
        device_id = []
        for i in range((len(args.gpu) + 1) // 2):
            device_id.append(i)
        model = nn.DataParallel(model, device_ids=device_id).cuda()

    optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)
    lr_decay_step = list(map(int, args.lr_decay_step.split(',')))
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=lr_decay_step, gamma=0.1)

    start_epoch = 0
    best_top1_acc= 0

    # load the checkpoint if it exists
    checkpoint_dir = os.path.join(args.job_dir, 'checkpoint.pth.tar')
    if args.resume:
        logger.info('loading checkpoint {} ..........'.format(checkpoint_dir))
        checkpoint = torch.load(checkpoint_dir)
        start_epoch = checkpoint['epoch'] + 1
        best_top1_acc = checkpoint['best_top1_acc']

        # deal with the single-multi GPU problem
        new_state_dict = OrderedDict()
        tmp_ckpt = checkpoint['state_dict']
        if len(args.gpu) > 1:
            for k, v in tmp_ckpt.items():
                new_state_dict['module.' + k.replace('module.', '')] = v
        else:
            for k, v in tmp_ckpt.items():
                new_state_dict[k.replace('module.', '')] = v

        model.load_state_dict(new_state_dict)
        logger.info("loaded checkpoint {} epoch = {}".format(checkpoint_dir, checkpoint['epoch']))
    else:
        if args.use_pretrain:
            logger.info('resuming from pretrain model')
            origin_model = eval(args.arch)(compress_rate=[0.] * 100).cuda()
            ckpt = torch.load(args.pretrain_dir, map_location='cuda:0')

            #if args.arch=='resnet_56':
            #    origin_model.load_state_dict(ckpt['state_dict'],strict=False)
            if args.arch == 'densenet_40' or args.arch == 'resnet_110':
                new_state_dict = OrderedDict()
                for k, v in ckpt['state_dict'].items():
                    new_state_dict[k.replace('module.', '')] = v
                origin_model.load_state_dict(new_state_dict)
            else:
                origin_model.load_state_dict(ckpt['state_dict'])

            oristate_dict = origin_model.state_dict()

            if args.arch == 'googlenet':
                load_google_model(model, oristate_dict)
            elif args.arch == 'vgg_16_bn':
                load_vgg_model(model, oristate_dict)
            elif args.arch == 'resnet_56':
                load_resnet_model(model, oristate_dict, 56)
            elif args.arch == 'resnet_110':
                load_resnet_model(model, oristate_dict, 110)
            elif args.arch == 'densenet_40':
                load_densenet_model(model, oristate_dict)
            else:
                raise
        else:
            logger('training from scratch')

    # adjust the learning rate according to the checkpoint
    for epoch in range(start_epoch):
        scheduler.step()

    # train the model
    epoch = start_epoch
    while epoch < args.epochs:
        train_obj, train_top1_acc,  train_top5_acc = train(epoch,  train_loader, model, criterion, optimizer, scheduler)
        valid_obj, valid_top1_acc, valid_top5_acc = validate(epoch, val_loader, model, criterion, args)

        is_best = False
        if valid_top1_acc > best_top1_acc:
            best_top1_acc = valid_top1_acc
            is_best = True

        utils.save_checkpoint({
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'best_top1_acc': best_top1_acc,
            'optimizer' : optimizer.state_dict(),
            }, is_best, args.job_dir)

        epoch += 1
        logger.info("=>Best accuracy {:.3f}".format(best_top1_acc))#