def _nn(self, d, n=1): """ Internal method to be implemented by sub-classes to return the nearest `N` neighbors to the given descriptor element. When this internal method is called, we have already checked that there is a vector in ``d`` and our index is not empty. :param d: Descriptor element to compute the neighbors of. :type d: smqtk.representation.DescriptorElement :param n: Number of nearest neighbors to find. :type n: int :return: Tuple of nearest N DescriptorElement instances, and a tuple of the distance values to those neighbors. :rtype: (tuple[smqtk.representation.DescriptorElement], tuple[float]) """ q = d.vector()[np.newaxis, :].astype(np.float32) self._log.debug("Received query for %d nearest neighbors", n) with self._model_lock: s_dists, s_ids = self._faiss_index.search( q, min(n, self._faiss_index.ntotal)) s_dists, s_ids = np.sqrt(s_dists[0, :]), s_ids[0, :] s_ids = s_ids.astype(object) # s_id (the FAISS index indices) can equal -1 if fewer than the # requested number of nearest neighbors is returned. In this case, # eliminate the -1 entries uuids = list( self._idx2uid_kvs.get_many( filter(lambda s_id_: s_id_ >= 0, s_ids))) descriptors = tuple( self._descriptor_set.get_many_descriptors(uuids)) self._log.debug("Min and max FAISS distances: %g, %g", min(s_dists), max(s_dists)) d_vectors = np.vstack(DescriptorElement.get_many_vectors(descriptors)) d_dists = metrics.euclidean_distance(d_vectors, q) self._log.debug("Min and max descriptor distances: %g, %g", min(d_dists), max(d_dists)) order = d_dists.argsort() uuids, d_dists = zip(*((uuids[oidx], d_dists[oidx]) for oidx in order)) self._log.debug("Returning query result of size %g", len(uuids)) return descriptors, tuple(d_dists)
def _nn(self, d, n=1): """ Internal method to be implemented by sub-classes to return the nearest `N` neighbors to the given descriptor element. When this internal method is called, we have already checked that there is a vector in ``d`` and our index is not empty. :param d: Descriptor element to compute the neighbors of. :type d: smqtk.representation.DescriptorElement :param n: Number of nearest neighbors to find. :type n: int :return: Tuple of nearest N DescriptorElement instances, and a tuple of the distance values to those neighbors. :rtype: (tuple[smqtk.representation.DescriptorElement], tuple[float]) """ q = d.vector()[np.newaxis, :].astype(np.float32) self._log.debug("Received query for %d nearest neighbors", n) with self._model_lock: s_dists, s_ids = self._faiss_index.search(q, n) s_dists, s_ids = np.sqrt(s_dists[0, :]), s_ids[0, :] uuids = [self._idx2uid_kvs[s_id] for s_id in s_ids] descriptors = self._descriptor_set.get_many_descriptors(uuids) self._log.debug("Min and max FAISS distances: %g, %g", min(s_dists), max(s_dists)) descriptors = tuple(descriptors) d_vectors = elements_to_matrix(descriptors) d_dists = metrics.euclidean_distance(d_vectors, q) self._log.debug("Min and max descriptor distances: %g, %g", min(d_dists), max(d_dists)) order = d_dists.argsort() uuids, d_dists = zip(*((uuids[oidx], d_dists[oidx]) for oidx in order)) self._log.debug("Returning query result of size %g", len(uuids)) return descriptors, tuple(d_dists)
def _nn(self, d, n=1): """ Internal method to be implemented by sub-classes to return the nearest `N` neighbors to the given descriptor element. When this internal method is called, we have already checked that there is a vector in ``d`` and our index is not empty. :param d: Descriptor element to compute the neighbors of. :type d: smqtk.representation.DescriptorElement :param n: Number of nearest neighbors to find. :type n: int :return: Tuple of nearest N DescriptorElement instances, and a tuple of the distance values to those neighbors. :rtype: (tuple[smqtk.representation.DescriptorElement], tuple[float]) """ log = self._log q = d.vector()[np.newaxis, :].astype(np.float32) log.debug("Received query for %d nearest neighbors", n) with self._model_lock: if self._faiss_index is None: raise RuntimeError("No index currently available to remove " "from.") # Attempt to set n-probe of an IVF index self._set_index_nprobe() # noinspection PyArgumentList s_dists, s_ids = self._faiss_index.search( q, k=min(n, self._faiss_index.ntotal)) s_dists, s_ids = np.sqrt(s_dists[0, :]), s_ids[0, :] s_ids = s_ids.astype(object) # s_id (the FAISS index indices) can equal -1 if fewer than the # requested number of nearest neighbors is returned. In this case, # eliminate the -1 entries self._log.debug("Getting descriptor UIDs from idx2uid mapping.") uuids = list( self._idx2uid_kvs.get_many( filter(lambda s_id_: s_id_ >= 0, s_ids))) if len(uuids) < n: warnings.warn( "Less than n={} neighbors were retrieved from " "the FAISS index instance. Maybe increase " "nprobe if this is an IVF index?".format(n), RuntimeWarning) descriptors = tuple( self._descriptor_set.get_many_descriptors(uuids)) log.debug("Min and max FAISS distances: %g, %g", min(s_dists), max(s_dists)) d_vectors = np.vstack(DescriptorElement.get_many_vectors(descriptors)) d_dists = metrics.euclidean_distance(d_vectors, q) log.debug("Min and max descriptor distances: %g, %g", min(d_dists), max(d_dists)) order = d_dists.argsort() uuids, d_dists = zip(*((uuids[oidx], d_dists[oidx]) for oidx in order)) log.debug("Returning query result of size %g", len(uuids)) return descriptors, tuple(d_dists)