def swap_feature_test(classifier, store_feature, store_index, pca_mat): PATCH_NUM = 10 PATCH_SIZE = 3 store_index = np.asarray(store_index).astype('float32') store_feature = np.asarray(store_feature).astype('float32') mat = faiss.read_VectorTransform(pca_mat) query = store_feature[1, :, :, :, :, :] query = classifier( torch.tensor(query.reshape(PATCH_NUM * PATCH_NUM, -1)).cuda()).cpu().numpy() query = mat.apply_py(query) index = faiss.IndexFlatL2(64) # build the index print(index.is_trained) index.add(store_index) # add vectors to the index print(index.ntotal) D, I = index.search(query, 1) print(I) print(D) # D, I = index.search(store_index[:5], 10) # print(I) # print(D) store_feature = torch.from_numpy(store_feature) transfered = torch.zeros( (1, 512, PATCH_SIZE * PATCH_NUM, PATCH_SIZE * PATCH_NUM)) num = 0 for item in I: transfered[:, :, num // 10 * PATCH_SIZE:num // 10 * PATCH_SIZE + PATCH_SIZE, num % 10 * PATCH_SIZE:num % 10 * PATCH_SIZE + PATCH_SIZE] = store_feature[item // 100, (item - item // 100 * 100) // 10, item % 10, :, :, :] num += 1 return transfered.cuda()
def deserializeIndex(self, indexFileResource, id=None): # the joined index file contains populated_index,empty_trained_index,preproc,IDMap, and (possible) merged IVF file in that order bytearrays = fileutil.splitMultiFileByteArray(indexFileResource._data, 12, 4) tmppath = 'tmp' mv = memoryview(indexFileResource._data) if id is not None: tmppath += '_' + str(id) all_tmp_paths = [] count = 0 for bytearray in bytearrays: p = tmppath + str(count) + '.dat' with open(p, 'wb') as fp: fp.write(mv[bytearray[0]:bytearray[1]]) count += 1 all_tmp_paths.append(p) index = faiss.read_index(all_tmp_paths[0], faiss.IO_FLAG_MMAP) emptyIndex = faiss.read_index(all_tmp_paths[1]) preproc = faiss.read_VectorTransform(all_tmp_paths[2]) IDToName = json.loads( bytes(mv[bytearrays[3][0]:bytearrays[3][1]]).decode('ascii')) print('index size: ', index.ntotal) print('map size:', len(IDToName)) del bytearrays del indexFileResource print('initializing index...') print('index size: ', self.index.ntotal) self.isDeserialized = True return (index, emptyIndex, preproc, IDToName, all_tmp_paths)
def load_traintest(nl, class_set=1, seed=1234, include_base_class=True, pca256=False, nnonlabeled=0): # imagenet validation images Xte, Yte, eval_classes_subset = load_bharath_fv3( class_set, include_base_class, 'val') # imagenet train images Xtr, Ytr, eval_classes_subset = load_bharath_fv3( class_set, include_base_class, 'train') Yte = BharathEval(Yte, eval_classes_subset) # reduce labels to consecutive numbers that start at 0 label_map = np.cumsum(np.bincount(Ytr).astype(bool)) - 1 Ytr = label_map[Ytr] Yte.remap_labels(label_map) eval_classes_subset = label_map[eval_classes_subset] nclasses = Ytr.max() + 1 print("selecting images, seed=%d" % seed) rs = np.random.RandomState(seed) perm1 = [] perm0 = [] base = [] for cl in range(nclasses): imnos = (Ytr == cl).nonzero()[0] if cl in eval_classes_subset: rs.shuffle(imnos) perm1.append(imnos[:nl]) perm0.append(imnos[nl:]) else: base.append(imnos) if nnonlabeled == 0: perm = np.hstack(base + perm1) else: perm = np.hstack(base + perm1 + perm0) nnonlabeled = np.hstack(perm0).size Ytr = Ytr[perm] if nnonlabeled != 0: Ytr[-nnonlabeled:] = -1 Xtr = Xtr[perm] if pca256: pca_fname = fv3_dir + 'PCAR256.vt' print("load", pca_fname) pcar = faiss.read_VectorTransform(pca_fname) Xtr = pcar.apply_py(Xtr) Xte = pcar.apply_py(Xte) return Xtr, Ytr, Xte, Yte
def get_preprocessor(preproc_str, preproc_cachefile): if preproc_str: if not preproc_cachefile or not os.path.exists(preproc_cachefile): preproc = train_preprocessor(preproc_str) if preproc_cachefile: print("store", preproc_cachefile) faiss.write_VectorTransform(preproc, preproc_cachefile) else: print("load", preproc_cachefile) preproc = faiss.read_VectorTransform(preproc_cachefile) else: preproc = IdentPreproc(dim) return preproc
def get_preprocessor(): if preproc_str: if not preproc_cachefile or not os.path.exists(preproc_cachefile): preproc = train_preprocessor() if preproc_cachefile: print "store", preproc_cachefile faiss.write_VectorTransform(preproc, preproc_cachefile) else: print "load", preproc_cachefile preproc = faiss.read_VectorTransform(preproc_cachefile) else: d = xb.shape[1] preproc = IdentPreproc(d) return preproc
def get_preprocessor(): if preproc_str: if not preproc_cachefile or not os.path.exists(preproc_cachefile): preproc = train_preprocessor() if preproc_cachefile: print "store", preproc_cachefile faiss.write_VectorTransform(preproc, preproc_cachefile) else: print "load", preproc_cachefile preproc = faiss.read_VectorTransform(preproc_cachefile) else: d = xb.shape[1] preproc = IdentPreproc(d) return preproc
def deserializeIndex(self, indexFileResource, id=None): # the joined index file contains populated_index,empty_trained_index,preproc,IDMap, (possible) IVF file, in that order bytearrays = fileutil.splitMultiFileByteArray(indexFileResource._data, 12, 6) tmppath = 'tmp' mv = memoryview(indexFileResource._data) if id is not None: tmppath += '_' + str(id) all_tmp_paths = [] mmap_path = None count = 0 for bytearray in bytearrays: if count == 4: # This is the path to where the ivf mmap file must be stored mmap_path = mv[bytearray[0]:bytearray[1]].tobytes().decode( 'ascii') print('making directory ', os.path.dirname(mmap_path), ' to store ivf data') if not os.path.exists(os.path.dirname(mmap_path)): os.makedirs(os.path.dirname(mmap_path)) p = tmppath + str(count) + '.dat' if count == 5: p = mmap_path with open(p, 'wb') as fp: fp.write(mv[bytearray[0]:bytearray[1]]) count += 1 all_tmp_paths.append(p) self.preproc = faiss.read_VectorTransform(all_tmp_paths[2]) self.IDToImage = json.loads( mv[bytearrays[3][0]:bytearrays[3][1]].tobytes().decode('ascii')) #print(mv[bytearrays[3][0]:bytearrays[3][1]].tobytes()) del bytearrays del indexFileResource print('initializing index...') self.index = faiss.read_index(all_tmp_paths[0], faiss.IO_FLAG_MMAP) print('index size: ', self.index.ntotal) print('map size:', len(self.IDToImage)) self.isDeserialized = True return (self.index, self.preproc, self.IDToImage)
def loadIndexParameters(indexParametersResource): if indexParametersResource is not None: indexParameterData = indexParametersResource._data #Takes 150 Mb of memory bytearrays = fileutil.splitMultiFileByteArray( indexParameterData, 12, 2) #Takes another 150 Mb of memory tmppath = 'tmp' mv = memoryview(indexParameterData) all_tmp_paths = [] count = 0 for bytearray in bytearrays: p = tmppath + str(count) + '.dat' with open(p, 'wb') as fp: fp.write(mv[bytearray[0]:bytearray[1]]) count += 1 all_tmp_paths.append(p) index = faiss.read_index(all_tmp_paths[0]) #WHY 12.5 GB?!!?! emptyIndexPath = all_tmp_paths[0] preproc = faiss.read_VectorTransform(all_tmp_paths[1]) return (index, preproc, emptyIndexPath) else: indexParameterData = None return (None, None, None)
def train_preprocessor(self, preproc_str_local, xt_local): if not self.preproc_cachefile or not os.path.exists( self.preproc_cachefile): print("train preproc", preproc_str_local) d = xt_local.shape[1] t0 = time.time() if preproc_str_local.startswith('OPQ'): fi = preproc_str_local[3:].split('_') m = int(fi[0]) dout = int(fi[1]) if len(fi) == 2 else d preproc = faiss.OPQMatrix(d, m, dout) elif preproc_str_local.startswith('PCAR'): dout = int(preproc_str_local[4:-1]) preproc = faiss.PCAMatrix(d, dout, 0, True) else: assert False preproc.train(indexfunctions.sanitize(xt_local[:100000000])) print("preproc train done in %.3f s" % (time.time() - t0)) faiss.write_VectorTransform(preproc, self.preproc_cachefile) else: print("load preproc ", self.preproc_cachefile) preproc = faiss.read_VectorTransform(self.preproc_cachefile) return preproc
def __init__(self, encoder, pca_model_path): self.encoder = encoder self.pca_mat = faiss.read_VectorTransform(pca_model_path)
def loadmat(self, path): self.mat = faiss.read_VectorTransform(path)
name = k[:8] + k[15:] # remove `module.` new_state_dict[name] = v else: new_state_dict[k] = v vgg_.load_state_dict(new_state_dict) print("=> loaded checkpoint '{}' (epoch {})".format( opt.encoder, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opt.encoder)) with open(opt.store_feature, 'r') as file_object: store_features = json.load(file_object) with open(opt.store_index, 'r') as file_object: store_index = json.load(file_object) mat = faiss.read_VectorTransform(opt.store_pca) store_index = np.asarray(store_index).astype('float32') index = faiss.IndexFlatL2(64) # build the index print(index.is_trained) index.add(store_index) # add vectors to the index store_feature = np.asarray(store_features) store_feature = torch.from_numpy(store_feature) for file in os.listdir(opt.dataroot): file_path = opt.dataroot + file img_tensor = image_loader(file_path) content_feature = vgg_.features(img_tensor) PATCH_NUM = 10 x = content_feature.unfold(2, 3, 3).unfold(3, 3, 3) x = x.permute(0, 2, 3, 1, 4, 5) x = x.reshape(x.size(0) * PATCH_NUM * PATCH_NUM, -1)
INDEX_FILENAME = INDEX_FILENAME_PRE + '.index' INDEX_FILENAME_PK = INDEX_FILENAME_PRE + '.pk' INDEX_FILENAME_PCA = INDEX_FILENAME_PRE + '.pca' + str(args.pca) res = faiss.StandardGpuResources() # use a single GPU co = faiss.GpuClonerOptions() # here we are using a 64-byte PQ, so we must set the lookup tables to # 16 bit float (this is due to the limited temporary memory). if args.float16: co.useFloat16 = True if os.path.exists(INDEX_FILENAME): cpu_index = faiss.read_index(INDEX_FILENAME) index = faiss.index_cpu_to_gpu(res, 0, cpu_index, co) if gpu else cpu_index if pca: mat = faiss.read_VectorTransform(INDEX_FILENAME_PCA) # todo calculate it if not there with open(INDEX_FILENAME_PK, 'rb') as fp: index_dict = pickle.load(fp) else: files = sorted(glob.glob(FEATURES_NPY)) index_dict = { } label_features = { } i = 0 n_train_subset = 0 for file_name in tqdm(files): label = file_name.split('/')[-1].split('.')[0] if len(label) == 16: continue features = np.load(file_name) assert features.shape[1] == FEATURES_NUMBER
if 'train' in todo: f = h5py.File(fv3_dir + 'f100m/block0.hdf5', 'r') count = f['count'][0] labels = f['all_labels'][:count] features = f['all_feats'][:count] pca = faiss.PCAMatrix(2048, 256, 0, True) pca.train(features) faiss.write_VectorTransform(pca, fv3_dir + 'PCAR256.vt') if 'apply' in todo: pca = faiss.read_VectorTransform(fv3_dir + 'PCAR256.vt') def load_block(i): f = h5py.File(fv3_dir + 'f100m/block%d.hdf5' % i, 'r') count = f['count'][0] # labels = f['all_labels'][:count] features = f['all_feats'][:count] return features # one read thread, one PCA computation thread, and main thread writes result. src = rate_limited_imap(load_block, range(100)) src2 = rate_limited_imap(pca.apply_py, src) f = open(fv3_dir + '/concatenated_PCAR256.raw', 'w') i = 0 for x in src2:
def main(): global args args = parser.parse_args() # fix random seeds # torch.manual_seed(args.seed) # torch.cuda.manual_seed_all(args.seed) # np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # preprocessing of data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) tra = [transforms.Resize(224), transforms.ToTensor(), normalize] # load the data end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers) # remove head model.top_layer = None model.classifier = nn.Sequential(*list(model.classifier.children())[:-1]) # get the features for the whole dataset features, patch_num, angles = compute_patchfeatures( '/home/visiting/datasets/source/vincent-van-gogh_road-with-cypresses-1890/0606.jpg', model, 224) file_name = '/home/visiting/Projects/levishery/deep_cluster/vangogh_all.json' with open(file_name, 'r') as file_object: contents = json.load(file_object) if args.patch_rotate: file_name = '/home/visiting/Projects/levishery/deep_cluster/angle_128.json' with open(file_name, 'r') as file_object: angle_style = json.load(file_object) style_ft = np.asarray(contents) style_ft = style_ft.astype('float32') mean_s, std_s = calc_mean_std(style_ft) mat = faiss.read_VectorTransform("vangogh-all.pca") photo_ft = mat.apply_py(features) mean_c, std_c = calc_mean_std(photo_ft) size = photo_ft.shape #photo_ft = ((photo_ft - mean_c) / std_c) * std_s + mean_s index = faiss.IndexFlatL2(256) # build the index print(index.is_trained) index.add(style_ft) # add vectors to the index print(index.ntotal) D, I = index.search(photo_ft, 1) print(I) print(D) dataset = datasets.ImageFolder( '/home/visiting/datasets/van-gogh_patch_112/') out_dir = '/home/visiting/datasets/result/' num = 0 result = np.zeros( (args.patch_size * patch_num[1], args.patch_size * patch_num[0], 3)) match_im = np.zeros((args.patch_size * 2, args.patch_size, 3)) photo = cv2.imread( '/home/visiting/datasets/source/vincent-van-gogh_road-with-cypresses-1890/0606.jpg' ) for i in range(patch_num[0]): for j in range(patch_num[1]): style_patch = cv2.imread(dataset.imgs[I[num][0]][0]) match_im[:args.patch_size, :args.patch_size] = photo[ j * args.patch_size:(j + 1) * args.patch_size, i * args.patch_size:(i + 1) * args.patch_size, :] #angle1 = angle_style[I[num][0]] if args.patch_rotate: angle1_ = calc_phase( cv2.resize( cv2.imread(dataset.imgs[I[num][0]][0], cv2.IMREAD_GRAYSCALE), (16, 16))) style_patch = rotate_image(style_patch, angles[num] - angle1_, True) #angle2 = calc_phase(cv2.resize(cv2.cvtColor(style_patch, cv2.COLOR_BGR2GRAY), (16, 16))) #angle2_ = angles[num] match_im[args.patch_size:2 * args.patch_size, :args.patch_size] = style_patch result[j * args.patch_size:(j + 1) * args.patch_size, i * args.patch_size:(i + 1) * args.patch_size, :] = style_patch filename = '/home/visiting/datasets/result/' + str(num) + 's.jpg' cv2.imwrite(filename, match_im) num += 1 file_name = '/home/visiting/datasets/result/r.jpg' cv2.imwrite(file_name, result)