self.binary = B[59000:] else: self.data = torch.from_numpy(m['X'])[:] self.label = torch.from_numpy(m['X_class'])[0][:] self.binary = B[:] def __getitem__(self, index): data = self.data[index] label = self.label[index] binary = self.binary[index] return data, label, binary def __len__(self): return len(self.data) B = torch.Tensor(ITQ.get_ITQ_binary()) def get_train_dataloader(): cifar_dataset_train = Cifar_Dataset(root, B, train=1) train_dataloader = DataLoader(cifar_dataset_train, batch_size=100, shuffle=True, num_workers=0) return train_dataloader def get_test_dataloader(): cifar_dataset_test = Cifar_Dataset(root, B, train=2) test_dataloader = DataLoader(cifar_dataset_test, batch_size=1000, shuffle=False, num_workers=0) return test_dataloader def get_exp_dataloader(): cifar_dataset_exp = Cifar_Dataset(root, B, train=3) exp_dataloader = DataLoader(cifar_dataset_exp, batch_size=1000, shuffle=False, num_workers=0) return exp_dataloader
with h5py.File(args["paramfile"], "r") as f: R = f["R"].value pc = f["pc"].value mean = f["mean"].value if os.path.exists(args["outpath"]): print("Reading the existing features") with h5py.File(args["outpath"], "r") as f: allhashes = f["hashes"].value.tolist() else: allhashes = [] nDone = len(allhashes) for i in range(nDone, len(imgslist)): impath = imgslist[i] tic_toc_print("Done %d / %d features" % (i, len(imgslist))) try: featpath = os.path.join(args["dir"], impath + ".h5") feat = load_feat(featpath, args["fracfeat"]).transpose() # Normalize this feature (that's how its used in training) feat = feat / np.linalg.norm(feat) hash = ITQ.hash(feat, pc, mean, R) allhashes.append(hash) except: continue allhashes = np.squeeze(np.array(allhashes)).astype("bool") with h5py.File(args["outpath"], "w") as f: f.create_dataset("hashes", data=allhashes, compression="gzip", compression_opts=9)
nFeat = args['numfeat'] allfeats = [] for impath in imgslist: tic_toc_print('Read %d features' % len(allfeats)) try: featpath = os.path.join(args['dir'], impath + '.h5') feat = load_feat(featpath, args['featfrac']).transpose() allfeats.append(feat) except Exception, e: tic_toc_print(e) continue if len(allfeats) >= nFeat: break allfeats = np.squeeze(np.array(allfeats)).astype(np.float64) allfeats[np.isnan(allfeats)] = 0 allfeats[np.isinf(allfeats)] = 0 # V.IMP to normalize each row by L2 norm, else mean/PCA etc calculations get screwed due to overflows norms = np.sum(np.abs(allfeats)**2,axis=-1)**(1./2) allfeats = allfeats / norms[:, np.newaxis] mean, pc, R = ITQ.train(allfeats, args['nbits']) subprocess.call('mkdir -p `dirname %s`' % outfpath, shell=True) with h5py.File(outfpath, 'w') as f: f.create_dataset('R', data=R, compression="gzip", compression_opts=9) f.create_dataset('pc', data=pc, compression="gzip", compression_opts=9) f.create_dataset('mean', data=mean, compression="gzip", compression_opts=9)