Exemple #1
0
    def _nn(self, d, n=1):
        """
        Internal method to be implemented by sub-classes to return the nearest
        `N` neighbors to the given descriptor element.

        When this internal method is called, we have already checked that there
        is a vector in ``d`` and our index is not empty.

        :param d: Descriptor element to compute the neighbors of.
        :type d: smqtk.representation.DescriptorElement

        :param n: Number of nearest neighbors to find.
        :type n: int

        :return: Tuple of nearest N DescriptorElement instances, and a tuple of
            the distance values to those neighbors.
        :rtype: (tuple[smqtk.representation.DescriptorElement], tuple[float])

        """
        q = d.vector()[np.newaxis, :].astype(np.float32)

        self._log.debug("Received query for %d nearest neighbors", n)

        with self._model_lock:
            s_dists, s_ids = self._faiss_index.search(
                q, min(n, self._faiss_index.ntotal))
            s_dists, s_ids = np.sqrt(s_dists[0, :]), s_ids[0, :]
            s_ids = s_ids.astype(object)
            # s_id (the FAISS index indices) can equal -1 if fewer than the
            # requested number of nearest neighbors is returned. In this case,
            # eliminate the -1 entries
            uuids = list(
                self._idx2uid_kvs.get_many(
                    filter(lambda s_id_: s_id_ >= 0, s_ids)))

            descriptors = tuple(
                self._descriptor_set.get_many_descriptors(uuids))

        self._log.debug("Min and max FAISS distances: %g, %g", min(s_dists),
                        max(s_dists))

        d_vectors = np.vstack(DescriptorElement.get_many_vectors(descriptors))
        d_dists = metrics.euclidean_distance(d_vectors, q)

        self._log.debug("Min and max descriptor distances: %g, %g",
                        min(d_dists), max(d_dists))

        order = d_dists.argsort()
        uuids, d_dists = zip(*((uuids[oidx], d_dists[oidx]) for oidx in order))

        self._log.debug("Returning query result of size %g", len(uuids))

        return descriptors, tuple(d_dists)
Exemple #2
0
    def _nn(self, d, n=1):
        """
        Internal method to be implemented by sub-classes to return the nearest
        `N` neighbors to the given descriptor element.

        When this internal method is called, we have already checked that there
        is a vector in ``d`` and our index is not empty.

        :param d: Descriptor element to compute the neighbors of.
        :type d: smqtk.representation.DescriptorElement

        :param n: Number of nearest neighbors to find.
        :type n: int

        :return: Tuple of nearest N DescriptorElement instances, and a tuple of
            the distance values to those neighbors.
        :rtype: (tuple[smqtk.representation.DescriptorElement], tuple[float])

        """
        q = d.vector()[np.newaxis, :].astype(np.float32)

        self._log.debug("Received query for %d nearest neighbors", n)

        with self._model_lock:
            s_dists, s_ids = self._faiss_index.search(q, n)
            s_dists, s_ids = np.sqrt(s_dists[0, :]), s_ids[0, :]
            uuids = [self._idx2uid_kvs[s_id] for s_id in s_ids]

            descriptors = self._descriptor_set.get_many_descriptors(uuids)

        self._log.debug("Min and max FAISS distances: %g, %g",
                        min(s_dists), max(s_dists))

        descriptors = tuple(descriptors)
        d_vectors = elements_to_matrix(descriptors)
        d_dists = metrics.euclidean_distance(d_vectors, q)

        self._log.debug("Min and max descriptor distances: %g, %g",
                        min(d_dists), max(d_dists))

        order = d_dists.argsort()
        uuids, d_dists = zip(*((uuids[oidx], d_dists[oidx]) for oidx in order))

        self._log.debug("Returning query result of size %g", len(uuids))

        return descriptors, tuple(d_dists)
Exemple #3
0
    def _nn(self, d, n=1):
        """
        Internal method to be implemented by sub-classes to return the nearest
        `N` neighbors to the given descriptor element.

        When this internal method is called, we have already checked that there
        is a vector in ``d`` and our index is not empty.

        :param d: Descriptor element to compute the neighbors of.
        :type d: smqtk.representation.DescriptorElement

        :param n: Number of nearest neighbors to find.
        :type n: int

        :return: Tuple of nearest N DescriptorElement instances, and a tuple of
            the distance values to those neighbors.
        :rtype: (tuple[smqtk.representation.DescriptorElement], tuple[float])

        """
        log = self._log
        q = d.vector()[np.newaxis, :].astype(np.float32)
        log.debug("Received query for %d nearest neighbors", n)

        with self._model_lock:
            if self._faiss_index is None:
                raise RuntimeError("No index currently available to remove "
                                   "from.")

            # Attempt to set n-probe of an IVF index
            self._set_index_nprobe()

            # noinspection PyArgumentList
            s_dists, s_ids = self._faiss_index.search(
                q, k=min(n, self._faiss_index.ntotal))
            s_dists, s_ids = np.sqrt(s_dists[0, :]), s_ids[0, :]
            s_ids = s_ids.astype(object)
            # s_id (the FAISS index indices) can equal -1 if fewer than the
            # requested number of nearest neighbors is returned. In this case,
            # eliminate the -1 entries
            self._log.debug("Getting descriptor UIDs from idx2uid mapping.")
            uuids = list(
                self._idx2uid_kvs.get_many(
                    filter(lambda s_id_: s_id_ >= 0, s_ids)))
            if len(uuids) < n:
                warnings.warn(
                    "Less than n={} neighbors were retrieved from "
                    "the FAISS index instance. Maybe increase "
                    "nprobe if this is an IVF index?".format(n),
                    RuntimeWarning)

            descriptors = tuple(
                self._descriptor_set.get_many_descriptors(uuids))

        log.debug("Min and max FAISS distances: %g, %g", min(s_dists),
                  max(s_dists))

        d_vectors = np.vstack(DescriptorElement.get_many_vectors(descriptors))
        d_dists = metrics.euclidean_distance(d_vectors, q)

        log.debug("Min and max descriptor distances: %g, %g", min(d_dists),
                  max(d_dists))

        order = d_dists.argsort()
        uuids, d_dists = zip(*((uuids[oidx], d_dists[oidx]) for oidx in order))

        log.debug("Returning query result of size %g", len(uuids))

        return descriptors, tuple(d_dists)