def _eval(self, env, dataLoader): self._debugPrint("Eval step") self.model.eval() B = list() X = list() for x in dataLoader: X.append(x) if self.env.DoNormalizationOnObs: x = (x - self._obsMean) / self._obsStd batchB = self.model.Encode(x) B.append(batchB) B = torch.cat(B, 0) X = torch.cat(X, 0) C, qErrors = env.Eval(X, B) del B B = list() for x in dataLoader: if self.env.DoNormalizationOnObs: x = (x - self._obsMean) / self._obsStd batchB = self.model.Encode(x, icm=True, C=C.repeat(self._nParallels, 1, 1), shift=self._obsMeanRepeat, scale=self._obsStdRepeat) B.append(batchB) B = torch.cat(B, 0) icmQErrors = QuantizationError(X, C, B) self.logger.info("After ICM: %f, %.0f%% samples are better.", icmQErrors.mean(), (icmQErrors < qErrors).sum().float() / len(qErrors) * 100.) del B, X, C self.model.train()
def forward(self, x, b, npert, icmIter): N, M = b.shape oldB = b.clone() x_ci = -2 * (x @ self.codebook.t()) b += self._shift if npert > 0: uniform = torch.ones_like(b, dtype=float) / M # [N, npert] pertidx = torch.multinomial(uniform, npert) # [N, npert], where each row = [i, i, i..., i] ix = torch.arange(N)[:, None].expand_as(pertidx) pertvals = torch.randint(self._k, (N, npert), device=b.device) + pertidx * self._k # [N, npert] b[[ix, pertidx]] = pertvals mIdx = torch.randperm(M) for _ in range(icmIter): for i in mIdx: otherBs = b[:, i != mIdx] b[:, i] = torch.argmin( x_ci[:, i * self._k:(i + 1) * self._k] + self._cUnary[:, i * self._k:(i + 1) * self._k] + self._cPair[i * self._k:(i + 1) * self._k, otherBs].sum(2).t(), 1) + self._k * i b -= self._shift oldQE = QuantizationError(x, self.Codebook(), oldB) newQE = QuantizationError(x, self.Codebook(), b) worse = newQE >= oldQE b[worse] = oldB[worse] return b
def Step(self, x: torch.Tensor, b: torch.Tensor, logStat: bool = True) -> (torch.Tensor, torch.Tensor): newCodebook = self.solver.solve(x, b, alternateWhenOutlier=True) if b.shape[-1] == self._m * self._k: newQError = (( x - b @ newCodebook.reshape(self._m * self._k, -1))**2).sum(-1) else: newQError = QuantizationError(x.cuda(), newCodebook, b.cuda()) if self._oldQError is None: self._oldQError = newQError self._meanQE = self._oldQError.mean() if self._doNormalizeOnRew: rewards = (self._oldQError - newQError) if self._firstRun: self._firstRun = False else: if logStat: _, variance = self.Estimate(("reward", rewards), (0, )) # TODO: mean or not mean? rewards = rewards / (variance + 1e-8).sqrt() else: rewards = rewards / (self._variance["estimate/reward"] + 1e-8).sqrt() else: # [N, ] rewards = (self._oldQError - newQError) / self._meanQE currentQError = newQError.mean() self.logger.debug("[%4d Train]QError: %3.2f", self._step, currentQError) if self.summaryWriter is not None: self.summaryWriter.add_scalar("env/QError", currentQError, global_step=self._step) self.summaryWriter.add_histogram("env/Reward", rewards, global_step=self._step) self._step += 1 if self._doNormalizeOnObs: # mean, variance = self.Estimate(("codebook", newCodebook), (1, )) # newCodebook = (newCodebook - mean) / (variance + 1e-8).sqrt() if not hasattr(self, "_obsMean"): raise AttributeError( f"Not feed obs mean and var with DoNormalizationOnObs = {self._doNormalizeOnObs}" ) newCodebook = (newCodebook - self._obsMean) / self._obsStd self._codebook.data = newCodebook del newCodebook if logStat: self._estimateQEStat(currentQError) return rewards.to(x.device), currentQError.to(x.device)
def Run(model: nn.Module, lr=1.): sift = SiftLike("labelme").Train() sift.data *= 100.0 # N, D = sift.shape dataLoader = DataLoader(sift, batch_size=1000, shuffle=True, num_workers=0) optim = torch.optim.Adam(model.parameters(), lr, amsgrad=True) scheduler = torch.optim.lr_scheduler.ExponentialLR(optim, 0.99) Train(model, optim, scheduler, dataLoader) del dataLoader with torch.no_grad(): sift.Encode("cuda") sift.data *= 100.0 dataLoader = DataLoader(sift, batch_size=1000, shuffle=False, num_workers=0) model.eval() codebook, codes = Encode(model, dataLoader) print(QuantizationError(sift.data, codebook, codes).mean()) del dataLoader sift.Query(device="cuda") sift.data *= 100.0 # dataLoader = DataLoader(sift, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) results = Eval.Retrieval(sift.data, codebook.cuda(), codes.cuda()) sift.Gt() recalls = Eval.Recall(results, sift.data[:, :1].cuda()) * 100 print("R @ 1: %.2f%%" % recalls[0]) print("R @ 10: %.2f%%" % recalls[9]) print("R @ 100: %.2f%%" % recalls[99])
def Eval(self, x: torch.Tensor, b: torch.Tensor, additionalMsg: str = None) -> torch.Tensor: newCodebook = self.solver.solve(x, b, alternateWhenOutlier=True) # assignCodes = self._randPerm(assignCodes) if b.shape[-1] == self._m * self._k: newQError = (( x - b @ newCodebook.reshape(self._m * self._k, -1))**2).sum(-1) else: newQError = QuantizationError(x, newCodebook, b) self.logger.info("[%4d %s]QError: %3.2f", self._step, additionalMsg or "Eval", newQError.mean()) if self.summaryWriter is not None: self.summaryWriter.add_scalar("eval/QError", newQError.mean(), global_step=self._step) return newCodebook, newQError
def EncodeFast(self, x, icm, realX, C, cUnary, cPair): b = list() for i in range(self._m): logit = self._subLayers[i](x) code = torch.argmax(logit, -1) b.append(code) b = torch.stack(b, -1) oldB = b.clone() if icm: b = self._fastICM(realX, b, C, cUnary, cPair) # self._miniBatchICM(realX, b, C.reshape(self._m, self._k, self._d), self._m) oldError = QuantizationError(realX, C.reshape(self._m, self._k, self._d), oldB) newError = QuantizationError(realX, C.reshape(self._m, self._k, self._d), b) worse = newError > oldError b[worse] = oldB[worse] return b
def Train(model, sift, nILS, nICM, nPERT): solver = ISolver(model._m, model._k) model.Update((torch.randn(model._m, model._k, model._d, device="cuda") * sift.data.std(0)) + sift.data.mean(0)) B = torch.randint(model._k, (sift.data.shape[0], model._m), device=sift.data.device) for _ in range(100): for _ in range(nILS): B = model(sift.data, B, nPERT, nICM) model.Update(solver.solve(sift.data, B)) print(QuantizationError(sift.data, model.Codebook(), B).mean())
def Run(model: nn.Module, nILS, nICM, nPERT, encodeILS): sift = SiftLike("SIFT/1M").Train() # N, D = sift.shape Train(model, sift, nILS=nILS, nICM=nICM, nPERT=nPERT) sift.Encode("cuda") model.eval() codebook, codes = Encode(model, sift, nILS=encodeILS, nICM=nICM, nPERT=nPERT) print(QuantizationError(sift.data, codebook, codes).mean()) sift.Query(device="cuda") # dataLoader = DataLoader(sift, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) results = Eval.Retrieval(sift.data, codebook.cuda(), codes.cuda()) sift.Gt() recalls = Eval.Recall(results, sift.data[:, :1].cuda()) * 100 print("R @ 1: %.2f%%" % recalls[0]) print("R @ 10: %.2f%%" % recalls[9]) print("R @ 100: %.2f%%" % recalls[99])
def Test(self, C=None, B=None): sift = self._dataset if self._env.DoNormalizationOnObs: self._obsMean = sift.data.mean(0).cuda() self._obsStd = sift.data.std(0).cuda() if C is None: sift.Train() C = self.GetCodebook(sift.data, icm=True).cuda() if B is None: sift.Encode(device="cpu") B = self.Encode(sift, icm=True, C=C) self._logger.info( "Quantization error in base: %.8e", QuantizationError(sift.data.cuda(), C.cuda(), B.cuda()).mean()) sift.Query(device="cuda") # queryLoader = DataLoader(sift, batch_size=1, shuffle=False, num_workers=mp.cpu_count()) results = self.Retrieval(sift.data, C.cuda(), B.cuda()) sift.Gt(device="cuda") recalls = self.Recall(results, sift.data[:, :1]) * 100 self._logger.info("R @ 1: %.2f%%", recalls[0]) self._logger.info("R @ 10: %.2f%%", recalls[9]) self._logger.info("R @ 100: %.2f%%", recalls[99])
def Step(self, trainDatas, assignCodes): if self._trainDatas is None: self._trainDatas = trainDatas else: assert np.mean(np.sum( (self._trainDatas - trainDatas)**2, -1)) < 1e-12, "Order of sampled data does not guarantee" # dataset = tf.data.Dataset.from_tensor_slices((trainDatas, assignCodes)).repeat(2).shuffle(5000).batch(Consts.GlobalBatchSize).prefetch(tf.data.experimental.AUTOTUNE) # dataset = self._distributedStrategy.experimental_distribute_dataset(dataset) # self.HotPatch(dataset.element_spec) # for trainData, assignCode in dataset: # self._updateCodebook(trainData, assignCode) # newCodebook = self._codebook.Raw.numpy() newCodebook = self._solver.solve(self._trainDatas, assignCodes) self._codebook.set_weights([newCodebook]) newQError = QuantizationError(self._trainDatas, newCodebook, assignCodes).astype(np.float32) if self._oldQError is None: self._oldQError = newQError self._meanQE = np.mean(self._oldQError) # delta = (self._oldQError - newQError) # delta[delta > 0] /= np.max(delta, -1) # delta[delta < 0] /= -np.min(delta, -1) rewards = 2. * (self._oldQError - newQError) / self._meanQE # delta *= 5.0 # rewards = ((self._oldQError - newQError) > 0).astype(np.float32) # rewards = np.tanh(delta) # rewards[delta < 0] *= -1.0 # rewards = (-2.0 * ((self._oldQError - newQError) < 0) + 1.0).astype(np.float32) print(f"Quantization error: {np.mean(newQError)}") # self._oldQError = newQError # self._oldQError -= (1 - 0.9) * (self._oldQError - newQError) # np.minimum(self._oldQError, newQError) # print(np.sum(rewards > 0)) # del dataset # obs, reward, done return rewards
idx = dist.argmin(axis=-1) b.append(idx) b = torch.stack(b, -1) B.append(b) B = torch.cat(B, 0) return B if __name__ == "__main__": with torch.no_grad(): sift = SiftLike("labelme") sift.Train(device="cpu") C, R = OPQTrain(sift.data.numpy(), 2, 256) C = torch.from_numpy(C).cuda() R = torch.from_numpy(R).cuda() sift.Encode(device="cuda") dataLoader = DataLoader(sift, batch_size=10000, shuffle=False, num_workers=0) B = OPQEncode(C, R, dataLoader) print(QuantizationError(sift.data @ R, C, B).mean()) sift.Query(device="cuda") sift.data = sift.data @ R.cuda() # dataLoader = DataLoader(sift, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) results = Eval.Retrieval(sift.data, C.cuda(), B.cuda()) sift.Gt() recalls = Eval.Recall(results, sift.data[:, :1].cuda()) * 100 print("R @ 1: %.2f%%" % recalls[0]) print("R @ 10: %.2f%%" % recalls[9]) print("R @ 100: %.2f%%" % recalls[99])