def get_optimal_index_keys(nb_vectors: int, dim_vector: int, max_index_memory_usage: str) -> List[str]: """ Gives a list of interesting indices to try, *the one at the top is the most promising* See: https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index for detailed explanations. """ total_bytes = 4 * nb_vectors * dim_vector # x4 because float32 max_mem_bytes = cast_memory_to_bytes(max_index_memory_usage) # index options relevant_list: List[str] = [] # Cases with a lot of memory -> HNSW if 1.7 * total_bytes < max_mem_bytes: relevant_list.append("HNSW32") elif 1.3 * total_bytes < max_mem_bytes: relevant_list.append("HNSW15") else: # product quantization relevant_list.extend( get_optimal_quantization(nb_vectors, dim_vector, force_max_index_memory_usage=max_index_memory_usage) ) return relevant_list
def get_optimal_train_size( nb_vectors: int, index_key: str, current_memory_available: Optional[str], vec_dim: Optional[int] ) -> int: """ Function that determines the number of training points necessary to train the index, based on faiss heuristics for k-means clustering. """ train_size = nb_vectors matching = re.findall(r"IVF\d+|IMI\d+x\d+", index_key) if matching: nb_clusters = nb_vectors # case IVF index if re.findall(r"IVF\d+", matching[0]): nb_clusters = int(matching[0][3:]) # case IMI index elif re.findall(r"IMI\d+x\d+", matching[0]): nb_clusters = 2 ** reduce(mul, [int(num) for num in re.findall(r"\d+", matching[0])]) points_per_cluster: float = 100 # compute best possible number of vectors to give to train the index # given memory constraints if current_memory_available and vec_dim: size = cast_memory_to_bytes(current_memory_available) points_per_cluster = max(min(size / (4.0 * nb_clusters * vec_dim), points_per_cluster), 31.0) # You will need between 30 * nb_clusters and 256 * nb_clusters to train the index train_size = min(round(points_per_cluster * nb_clusters), nb_vectors) return train_size
def get_ground_truth( faiss_metric_type: int, embeddings_path: Union[np.ndarray, str], query_embeddings: np.ndarray, memory_available: Union[str, float], ): """ compute the ground truth (result with a perfect index) of the query on the embeddings """ dim = query_embeddings.shape[-1] if isinstance(embeddings_path, str): perfect_index = MemEfficientFlatIndex(dim, faiss_metric_type) perfect_index.add_files(embeddings_path) block_bytes = next( read_embeddings_local(embeddings_path, verbose=False)).nbytes else: perfect_index = faiss.IndexFlat(dim, faiss_metric_type) perfect_index.add(embeddings_path.astype("float32")) # pylint: disable= no-value-for-parameter block_bytes = embeddings_path.nbytes memory_available = cast_memory_to_bytes(memory_available) if isinstance( memory_available, str) else memory_available stack_input = max(int(0.25 * memory_available / block_bytes), 1) if isinstance(embeddings_path, str): _, ground_truth = perfect_index.search_files(query_embeddings, k=40, stack_input=stack_input) else: _, ground_truth = perfect_index.search(query_embeddings, k=40) return ground_truth
def get_optimal_batch_size(nb_vectors: int, vec_dim: int, current_memory_available: str) -> int: """ compute optimal batch size to use the RAM at its full potential """ total_size = nb_vectors * vec_dim * 4 # in bytes memory = cast_memory_to_bytes(current_memory_available) batch_size = int(0.5 * total_size / memory) return batch_size
def get_optimal_quantization( nb_vectors: int, dim_vector: int, force_quantization_value: Optional[int] = None, force_max_index_memory_usage: Optional[str] = None, ) -> List[str]: """ Function that returns a list of relevant index_keys to create quantized indices. Parameters: ---------- nb_vectors: int Number of vectors in the dataset. dim_vector: int Dimension of the vectors in the dataset. force_quantization_value: Optional[int] Force to use this value as the size of the quantized vectors (PQx). It can be used with the force_max_index_memory_usage parameter, but the result might be empty. force_max_index_memory_usage: Optional[str] Add a memory constraint on the index. It can be used with the force_quantization_value parameter, but the result might be empty. Return: ------- index_keys: List[str] List of index_keys that would be good choices for quantization. The list can be empty if the given constraints are too strong. """ # Default values pq_values = [64, 48, 32, 24, 16, 8, 4] targeted_compression_ratio = 0.0 # 0 = no constraint # Force compression ratio if required if force_max_index_memory_usage is not None: total_bytes = 4.0 * nb_vectors * dim_vector # x4 because float32 max_mem_bytes = float(cast_memory_to_bytes(force_max_index_memory_usage)) targeted_compression_ratio = total_bytes / max_mem_bytes # Force quantization value if required if force_quantization_value is not None: pq_values = [force_quantization_value] # Compute optimal number of clusters relevant_list: List[str] = [] nb_clusters_list = get_optimal_nb_clusters(nb_vectors) # Look for matching index keys for pq in pq_values: if pq < dim_vector: for nb_clusters in nb_clusters_list: # Compute quantized vector size cluster_size_byte = 1 + (log2(nb_clusters) - 1) // 8 vector_size_byte = pq + cluster_size_byte # Compute compression ratio with quantization PQx compression_ratio = (4 * dim_vector) / vector_size_byte # Add index_key if compression ratio is high enough if compression_ratio >= targeted_compression_ratio: # y is a multiple of pq (required) # y <= d, with d the dimension of the input vectors (preferable) # y <= 6*pq (preferable) # here we choose a y slightly bigger than d to avoid losing information # in case such as 101, 128 is better than 64 to avoid losing information # in the linear transform y = (min(dim_vector // pq, 6) + 1) * pq cluster_opt = f"IVF{nb_clusters}" if nb_clusters < 1000 else f"IVF{nb_clusters}_HNSW32" relevant_list.append(f"OPQ{pq}_{y},{cluster_opt},PQ{pq}x8") return relevant_list