def preprocess_features(x, x2=None, d=256): """ Calculate PCA + Whitening + L2 normalization for each vector Args: x (ndarray): N x D, where N is number of vectors, D - dimensionality x2 (ndarray): optional, if not None apply PCA+Whitening learned on x to x2. d (int): number of output dimensions (how many principal components to use). Returns: transformed [N x d] matrix xt . """ n, orig_d = x.shape pcaw = faiss.PCAMatrix(d_in=orig_d, d_out=d, eigen_power=-0.5, random_rotation=False) pcaw.train(x) assert pcaw.is_trained print('Performing PCA + whitening') x = pcaw.apply_py(x) print('x.shape after PCA + whitening:', x.shape) l2normalization = faiss.NormalizationTransform(d, 2.0) print('Performing L2 normalization') x = l2normalization.apply_py(x) if x2 is not None: print('Perform PCA + whitening for x2') x2 = pcaw.apply_py(x2) x2 = l2normalization.apply_py(x2) return x, x2 else: return x
def test_IndexPreTransform(self): ltrans = faiss.NormalizationTransform(d) sub_index = faiss.IndexFlatL2(d) index = faiss.IndexPreTransform(ltrans, sub_index) index.add(xb) del ltrans gc.collect() index.add(xb) del sub_index gc.collect() index.add(xb)
def test_IndexPreTransform_2(self): sub_index = faiss.IndexFlatL2(d) index = faiss.IndexPreTransform(sub_index) ltrans = faiss.NormalizationTransform(d) index.prepend_transform(ltrans) index.add(xb) del ltrans gc.collect() index.add(xb) del sub_index gc.collect() index.add(xb)
def preprocess_features(x, d=256): """ Calculate PCA + Whitening + L2 normalization for each vector Args: x (ndarray): N x D, where N is number of vectors, D - dimensionality d (int): number of output dimensions (how many principal components to use). Returns: transformed [N x d] matrix xt . """ n, orig_d = x.shape pcaw = faiss.PCAMatrix(d_in=orig_d, d_out=d, eigen_power=-0.5, random_rotation=False) pcaw.train(x) assert pcaw.is_trained print 'Performing PCA + whitening' x = pcaw.apply_py(x) print 'x.shape after PCA + whitening:', x.shape l2normalization = faiss.NormalizationTransform(d, 2.0) print 'Performing L2 normalization' x = l2normalization.apply_py(x) return x