self.binary = B[59000:]
        else:
            self.data = torch.from_numpy(m['X'])[:]
            self.label = torch.from_numpy(m['X_class'])[0][:]
            self.binary = B[:]

    def __getitem__(self, index):
        data = self.data[index]
        label = self.label[index]
        binary = self.binary[index]
        return data, label, binary

    def __len__(self):
        return len(self.data)

B = torch.Tensor(ITQ.get_ITQ_binary())

def get_train_dataloader():
    cifar_dataset_train = Cifar_Dataset(root, B, train=1)
    train_dataloader = DataLoader(cifar_dataset_train, batch_size=100, shuffle=True, num_workers=0)
    return train_dataloader

def get_test_dataloader():
    cifar_dataset_test = Cifar_Dataset(root, B, train=2)
    test_dataloader = DataLoader(cifar_dataset_test, batch_size=1000, shuffle=False, num_workers=0)
    return test_dataloader

def get_exp_dataloader():
    cifar_dataset_exp = Cifar_Dataset(root, B, train=3)
    exp_dataloader = DataLoader(cifar_dataset_exp, batch_size=1000, shuffle=False, num_workers=0)
    return exp_dataloader
with h5py.File(args["paramfile"], "r") as f:
    R = f["R"].value
    pc = f["pc"].value
    mean = f["mean"].value

if os.path.exists(args["outpath"]):
    print("Reading the existing features")
    with h5py.File(args["outpath"], "r") as f:
        allhashes = f["hashes"].value.tolist()
else:
    allhashes = []

nDone = len(allhashes)
for i in range(nDone, len(imgslist)):
    impath = imgslist[i]
    tic_toc_print("Done %d / %d features" % (i, len(imgslist)))
    try:
        featpath = os.path.join(args["dir"], impath + ".h5")
        feat = load_feat(featpath, args["fracfeat"]).transpose()
        # Normalize this feature (that's how its used in training)
        feat = feat / np.linalg.norm(feat)
        hash = ITQ.hash(feat, pc, mean, R)
        allhashes.append(hash)
    except:
        continue

allhashes = np.squeeze(np.array(allhashes)).astype("bool")
with h5py.File(args["outpath"], "w") as f:
    f.create_dataset("hashes", data=allhashes, compression="gzip", compression_opts=9)
nFeat = args['numfeat']
allfeats = []
for impath in imgslist:
  tic_toc_print('Read %d features' % len(allfeats))
  try:
    featpath = os.path.join(args['dir'], impath + '.h5')
    feat = load_feat(featpath, args['featfrac']).transpose()
    allfeats.append(feat)
  except Exception, e:
    tic_toc_print(e)
    continue
  if len(allfeats) >= nFeat:
    break

allfeats = np.squeeze(np.array(allfeats)).astype(np.float64)
allfeats[np.isnan(allfeats)] = 0
allfeats[np.isinf(allfeats)] = 0

# V.IMP to normalize each row by L2 norm, else mean/PCA etc calculations get screwed due to overflows
norms = np.sum(np.abs(allfeats)**2,axis=-1)**(1./2)
allfeats = allfeats / norms[:, np.newaxis]

mean, pc, R = ITQ.train(allfeats, args['nbits'])
subprocess.call('mkdir -p `dirname %s`' % outfpath, shell=True)
with h5py.File(outfpath, 'w') as f:
  f.create_dataset('R', data=R, compression="gzip", compression_opts=9)
  f.create_dataset('pc', data=pc, compression="gzip", compression_opts=9)
  f.create_dataset('mean', data=mean, compression="gzip", compression_opts=9)