def load_data(dataset, root, batch_size, num_workers): """ Load dataset. Args dataset(str): Dataset name. root(str): Path of dataset. num_workers(int): Number of loading data threads. Returns train_dataloader, query_dataloader, retrieval_dataloader(torch.utils.data.DataLoader): Data loader. """ if dataset == 'cifar-10': train_dataloader, query_dataloader, retrieval_dataloader = cifar10.load_data( root, batch_size, num_workers, ) elif dataset == 'nus-wide-tc21': train_dataloader, query_dataloader, retrieval_dataloader = nuswide.load_data( root, batch_size, num_workers) elif dataset == 'imagenet-tc100': train_dataloader, query_dataloader, retrieval_dataloader = imagenet.load_data( root, batch_size, num_workers, ) else: raise ValueError("Invalid dataset name!") return train_dataloader, query_dataloader, retrieval_dataloader
def main(): depth = 64 bin_quantile = 0.2 model_pickle = f'models/nestedDropoutAutoencoder_deep_ReLU_21-01-07__01-18-13.pkl' dataloader = cifar10.get_dataloader(download=True) device = utils.get_device() data = cifar10.load_data(dataloader) print('Data loaded') autoencoder: Autoencoder = torch.load(model_pickle, map_location=device) autoencoder.eval() print('Model loaded') representation = utils.get_data_representation(autoencoder, dataloader, device) del autoencoder data_repr = utils.binarize_data(representation, bin_quantile).cpu() print('Data representation created') binary_tree = BinaryTree(data, data_repr, tree_depth=depth) print(f'Binary tree created, with {binary_tree.get_num_nodes():,} nodes') pickle.dump({ 'binary tree': binary_tree, 'data_repr': data_repr }, open(f'pickles/binary_tree_{depth}.pkl', 'wb')) print('The binary tree has been saved')
def __init__(self, args, logger=None): self.args = args self.logger = logger # load training data self.input_image_size = { "cifar10": 32, # CIFAR-10 "imagenet": 224, # ImageNet }[self.args.dataset] if self.args.dataset == 'cifar10': self.train_loader, self.val_loader = cifar10.load_data(self.args) else: data_tmp = imagenet.Data(args) self.train_loader = data_tmp.train_loader self.val_loader = data_tmp.test_loader #Cifar-10 self.criterion = nn.CrossEntropyLoss().cuda() #ImageNet CLASSES = 1000 #label_smooth: 0.1 self.criterion_smooth = utils.CrossEntropyLabelSmooth(CLASSES, 0.1).cuda() self.load_model(self.get_prune_ratio())
def load_data(dataset, root, num_seen, batch_size, num_workers): """ Load dataset. Args dataset(str): Dataset name. root(str): Path of dataset. num_seen(int): Number of seen classes. num_workers(int): Number of loading data threads. Returns query_dataloader, seen_dataloader, unseen_dataloader(torch.utils.data.DataLoader): Data loader. """ if dataset == 'cifar-10': query_dataloader, seen_dataloader, unseen_dataloader, retrieval_dataloader = cifar10.load_data(root, num_seen, batch_size, num_workers, ) # elif dataset == 'nus-wide-tc21': # query_dataloader, seen_dataloader, unseen_dataloader, retrieval_dataloader = nuswide.load_data(root, # num_seen, # batch_size, # num_workers, # ) else: raise ValueError("Invalid dataset name!") return query_dataloader, seen_dataloader, unseen_dataloader, retrieval_dataloader
def load_data(dataset, root, num_query, num_train, batch_size, num_workers): """ Load dataset. Args dataset(str): Dataset name. root(str): Path of dataset. num_query(int): Number of query data points. num_train(int): Number of training data points. num_workers(int): Number of loading data threads. Returns query_dataloader, train_dataloader, retrieval_dataloader(torch.utils.data.DataLoader): Data loader. """ if dataset == 'cifar-10': query_dataloader, train_dataloader, retrieval_dataloader = cifar10.load_data(root, num_query, num_train, batch_size, num_workers, ) elif dataset == 'nus-wide-tc10': query_dataloader, train_dataloader, retrieval_dataloader = nuswide.load_data(10, root, num_query, num_train, batch_size, num_workers, ) elif dataset == 'nus-wide-tc21': query_dataloader, train_dataloader, retrieval_dataloader = nuswide.load_data(21, root, num_query, num_train, batch_size, num_workers ) elif dataset == 'flickr25k': query_dataloader, train_dataloader, retrieval_dataloader = flickr25k.load_data(root, num_query, num_train, batch_size, num_workers, ) elif dataset == 'imagenet': query_dataloader, train_dataloader, retrieval_dataloader = imagenet.load_data(root, batch_size, num_workers, ) else: raise ValueError("Invalid dataset name!") return query_dataloader, train_dataloader, retrieval_dataloader
def get_data(dataset="mnist", dataformat="NHWC", path=os.path.expanduser("~/.datasets/")): # the data, shuffled and split between train and test sets if not os.path.exists(path): os.makedirs(path) if dataset == "mnist": (x_train, y_train), (x_test, y_test), input_shape, labels = mnist.load_data(path) elif dataset == "fashion_mnist": (x_train, y_train), ( x_test, y_test), input_shape, labels = fashion_mnist.load_data(path) elif dataset == "cifar10": (x_train, y_train), (x_test, y_test), input_shape, labels = cifar10.load_data(path) elif dataset == "mnist_rot": x_train, x_test, y_train, y_test, input_shape, labels = mnist_rot.load_data( path) elif dataset == "cluttered_mnist": (x_train, y_train), ( x_test, y_test), input_shape, labels = cluttered_mnist.load_data(path) elif dataset == "lsa16": x_train, x_test, y_train, y_test,input_shape,labels \ = lsa16.load_data(path,version="lsa32x32_nr_rgb_black_background",test_subjects=[9]) elif dataset == "pugeault": x_train, x_test, y_train, y_test, input_shape, labels = pugeault.load_data( path) elif dataset == "irish": x_train, x_test, y_train, y_test, input_shape, labels = irish.load_data( path) else: raise ValueError("Unknown dataset: %s" % dataset) if dataformat == 'NCHW': x_train, x_test = x_train.transpose([0, 3, 1, 2 ]), x_test.transpose([0, 3, 1, 2]) elif dataformat == "NHWC": pass #already in this format else: raise ValueError("Invalid channel format %s" % dataformat) num_classes = len(labels) # convert class vectors to binary class matrices y_train = to_categorical(y_train, num_classes) y_test = to_categorical(y_test, num_classes) return (x_train, y_train), (x_test, y_test), input_shape, len(labels), labels
def load_data(opt): """加载数据 Parameters opt: Parser 参数 Returns DataLoader 数据加载器 """ if opt.dataset == 'cifar10': return cifar10.load_data(opt) elif opt.dataset == 'nus-wide': return nus_wide.load_data(opt)
def test_retrieval_times(): binary_tree_pickle = 'pickles/binary_tree_64.pkl' current_time = utils.get_current_time() data = cifar10.load_data() print('Data loaded') pickle_dict = pickle.load(open(binary_tree_pickle, 'rb')) binary_tree = pickle_dict['binary tree'] binarized_repr = pickle_dict['data_repr'] print('Binary tree loaded') repr_dim = min(binarized_repr.shape[1], binary_tree.get_depth()) # binary tree retrieval def tree_search_i(sample, i): return binary_tree.search_tree(list(sample)[:i], max_depth=i) tree_search_times = evaluate_retrieval_method(binarized_repr, tree_search_i, repr_dim) pickle.dump(tree_search_times, open(f'pickles/or_retrieval_times_{current_time}.pkl', 'wb')) # linear scan def linear_scan_i(sample, i): binarized_repr_i = binarized_repr[:, :i].view(len(binarized_repr), -1) return linear_scan(sample[:i], data, binarized_repr_i) linear_scan_times = evaluate_retrieval_method(binarized_repr, linear_scan_i, repr_dim) pickle.dump(linear_scan_times, open(f'pickles/ls_retrieval_times_{current_time}.pkl', 'wb')) # tree_search_times = pickle.load(open('pickles/or_retrieval_times_21-01-14__16-18-02.pkl', 'rb')) # linear_scan_times = pickle.load(open('pickles/ls_retrieval_times_21-01-14__13-03-04.pkl', 'rb')) # plotting plt.plot(tree_search_times.keys(), tree_search_times.values(), label='Tree Search') plt.plot(linear_scan_times.keys(), linear_scan_times.values(), label='Linear Scan') plt.xlabel('Code Length') plt.ylabel('Average retrieval time per query') plt.title('Retrieval time per code length') plt.yticks(list(tree_search_times.keys())) plt.yscale('log') plt.legend() plt.savefig('plots/retrieval_times') plt.show()
def main(): cudnn.benchmark = True cudnn.enabled=True logger.info("args = %s", args) if args.compress_rate: import re cprate_str = args.compress_rate cprate_str_list = cprate_str.split('+') pat_cprate = re.compile(r'\d+\.\d*') pat_num = re.compile(r'\*\d+') cprate = [] for x in cprate_str_list: num = 1 find_num = re.findall(pat_num, x) if find_num: assert len(find_num) == 1 num = int(find_num[0].replace('*', '')) find_cprate = re.findall(pat_cprate, x) assert len(find_cprate) == 1 cprate += [float(find_cprate[0])] * num compress_rate = cprate # load model logger.info('compress_rate:' + str(compress_rate)) logger.info('==> Building model..') model = eval(args.arch)(compress_rate=compress_rate).cuda() logger.info(model) #calculate model size input_image_size=32 input_image = torch.randn(1, 3, input_image_size, input_image_size).cuda() flops, params = profile(model, inputs=(input_image,)) logger.info('Params: %.2f' % (params)) logger.info('Flops: %.2f' % (flops)) # load training data train_loader, val_loader = cifar10.load_data(args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() if args.test_only: if os.path.isfile(args.test_model_dir): logger.info('loading checkpoint {} ..........'.format(args.test_model_dir)) checkpoint = torch.load(args.test_model_dir) model.load_state_dict(checkpoint['state_dict']) valid_obj, valid_top1_acc, valid_top5_acc = validate(0, val_loader, model, criterion, args) else: logger.info('please specify a checkpoint file') return if len(args.gpu) > 1: device_id = [] for i in range((len(args.gpu) + 1) // 2): device_id.append(i) model = nn.DataParallel(model, device_ids=device_id).cuda() optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) lr_decay_step = list(map(int, args.lr_decay_step.split(','))) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=lr_decay_step, gamma=0.1) start_epoch = 0 best_top1_acc= 0 # load the checkpoint if it exists checkpoint_dir = os.path.join(args.job_dir, 'checkpoint.pth.tar') if args.resume: logger.info('loading checkpoint {} ..........'.format(checkpoint_dir)) checkpoint = torch.load(checkpoint_dir) start_epoch = checkpoint['epoch'] + 1 best_top1_acc = checkpoint['best_top1_acc'] # deal with the single-multi GPU problem new_state_dict = OrderedDict() tmp_ckpt = checkpoint['state_dict'] if len(args.gpu) > 1: for k, v in tmp_ckpt.items(): new_state_dict['module.' + k.replace('module.', '')] = v else: for k, v in tmp_ckpt.items(): new_state_dict[k.replace('module.', '')] = v model.load_state_dict(new_state_dict) logger.info("loaded checkpoint {} epoch = {}".format(checkpoint_dir, checkpoint['epoch'])) else: if args.use_pretrain: logger.info('resuming from pretrain model') origin_model = eval(args.arch)(compress_rate=[0.] * 100).cuda() ckpt = torch.load(args.pretrain_dir, map_location='cuda:0') #if args.arch=='resnet_56': # origin_model.load_state_dict(ckpt['state_dict'],strict=False) if args.arch == 'densenet_40' or args.arch == 'resnet_110': new_state_dict = OrderedDict() for k, v in ckpt['state_dict'].items(): new_state_dict[k.replace('module.', '')] = v origin_model.load_state_dict(new_state_dict) else: origin_model.load_state_dict(ckpt['state_dict']) oristate_dict = origin_model.state_dict() if args.arch == 'googlenet': load_google_model(model, oristate_dict) elif args.arch == 'vgg_16_bn': load_vgg_model(model, oristate_dict) elif args.arch == 'resnet_56': load_resnet_model(model, oristate_dict, 56) elif args.arch == 'resnet_110': load_resnet_model(model, oristate_dict, 110) elif args.arch == 'densenet_40': load_densenet_model(model, oristate_dict) else: raise else: logger('training from scratch') # adjust the learning rate according to the checkpoint for epoch in range(start_epoch): scheduler.step() # train the model epoch = start_epoch while epoch < args.epochs: train_obj, train_top1_acc, train_top5_acc = train(epoch, train_loader, model, criterion, optimizer, scheduler) valid_obj, valid_top1_acc, valid_top5_acc = validate(epoch, val_loader, model, criterion, args) is_best = False if valid_top1_acc > best_top1_acc: best_top1_acc = valid_top1_acc is_best = True utils.save_checkpoint({ 'epoch': epoch, 'state_dict': model.state_dict(), 'best_top1_acc': best_top1_acc, 'optimizer' : optimizer.state_dict(), }, is_best, args.job_dir) epoch += 1 logger.info("=>Best accuracy {:.3f}".format(best_top1_acc))#