def get_invlist(invlists, l): """ returns the inverted lists content as a pair of (list_ids, list_codes). The codes are reshaped to a proper size """ invlists = faiss.downcast_InvertedLists(invlists) ls = invlists.list_size(l) list_ids = np.zeros(ls, dtype='int64') ids = codes = None try: ids = invlists.get_ids(l) if ls > 0: faiss.memcpy(faiss.swig_ptr(list_ids), ids, list_ids.nbytes) codes = invlists.get_codes(l) if invlists.code_size != faiss.InvertedLists.INVALID_CODE_SIZE: list_codes = np.zeros((ls, invlists.code_size), dtype='uint8') else: # it's a BlockInvertedLists npb = invlists.n_per_block bs = invlists.block_size ls_round = (ls + npb - 1) // npb list_codes = np.zeros((ls_round, bs // npb, npb), dtype='uint8') if ls > 0: faiss.memcpy(faiss.swig_ptr(list_codes), codes, list_codes.nbytes) finally: if ids is not None: invlists.release_ids(l, ids) if codes is not None: invlists.release_codes(l, codes) return list_ids, list_codes
def run_kmeans(x, nmb_clusters, verbose=False, init_cents=None): """Runs kmeans on 1 GPU. Args: x: data nmb_clusters (int): number of clusters Returns: list: ids of data in each cluster """ n_data, d = x.shape # faiss implementation of k-means clus = faiss.Clustering(d, nmb_clusters) clus.niter = 20 clus.max_points_per_centroid = 10000000 if init_cents is not None: clus.centroids.resize(init_cents.size) faiss.memcpy(clus.centroids.data(), faiss.swig_ptr(init_cents), init_cents.size * 4) index = faiss.IndexFlatL2(d) # perform the training clus.train(x, index) _, I = index.search(x, 1) losses = faiss.vector_to_array(clus.obj) if verbose: print('k-means loss evolution: {0}'.format(losses)) centroids = faiss.vector_to_array(clus.centroids).reshape( (nmb_clusters, d)) return [int(n[0]) for n in I], losses[-1], centroids
def get_cluster_ids(self, list_num: int) -> np.ndarray: """ TODO: docstring """ # TODO: assert IVF assert self.is_trained # This fixes problem with SWIG and numpy int list_num = int(list_num) index = faiss.read_index(str(self.tempdir / self.MERGED_INDEX_NAME)) # Get the IVF from potentially opaque index invlists = faiss.extract_index_ivf(index).invlists list_size = invlists.list_size(list_num) list_ids = np.zeros(list_size, dtype=np.int64) temp_ids = invlists.get_ids(list_num) # Need to copy since memory will be deallocated along with the invlist. faiss.memcpy(faiss.swig_ptr(list_ids), temp_ids, list_ids.nbytes) invlists.release_ids(list_num, temp_ids) if self.multi_id: list_ids = self._invert_cantor_pairing_vec(list_ids) return list_ids
def get_invlist(invlists, l): """ returns the inverted lists content. """ ls = invlists.list_size(l) list_ids = np.zeros(ls, dtype='int64') ids = codes = None try: ids = invlists.get_ids(l) faiss.memcpy(faiss.swig_ptr(list_ids), ids, list_ids.nbytes) codes = invlists.get_codes(l) list_codes = np.zeros((ls, invlists.code_size), dtype='uint8') faiss.memcpy(faiss.swig_ptr(list_codes), codes, list_codes.nbytes) finally: if ids is not None: invlists.release_ids(l, ids) if codes is not None: invlists.release_codes(l, codes) return list_ids, list_codes