Exemple #1
0
def get_default_config():
    return {
        'plugins': {
            'descriptor_set': make_default_config(DescriptorIndex.get_impls()),
            'nn_index': make_default_config(NearestNeighborsIndex.get_impls())
        }
    }
Exemple #2
0
    def get_default_config(cls):
        """
        Generate and return a default configuration dictionary for this class.
        This will be primarily used for generating what the configuration
        dictionary would look like for this class without instantiating it.

        :return: Default configuration dictionary for the class.
        :rtype: dict

        """
        c = super(NearestNeighborServiceServer, cls).get_default_config()
        merge_dict(
            c, {
                "descriptor_factory":
                DescriptorElementFactory.get_default_config(),
                "descriptor_generator":
                make_default_config(DescriptorGenerator.get_impls()),
                "nn_index":
                make_default_config(NearestNeighborsIndex.get_impls()),
                "descriptor_index":
                make_default_config(DescriptorIndex.get_impls()),
                "update_descriptor_index":
                False,
            })
        return c
Exemple #3
0
    def update_working_set(self, nn_index: NearestNeighborsIndex) -> None:
        """
        Initialize or update our current working set using the given
        :class:`.NearestNeighborsIndex` instance given our current positively
        labeled descriptor elements.

        We only query from the index for new positive elements since the last
        update or reset.

        :param nn_index: :class:`.NearestNeighborsIndex` to query from.

        :raises RuntimeError: There are no positive example descriptors in this
            session to use as a basis for querying.

        """
        pos_examples = (self.external_positive_descriptors
                        | self.positive_descriptors)
        if len(pos_examples) == 0:
            raise RuntimeError("No positive descriptors to query the neighbor "
                               "index with.")

        # adding to working set
        self._log.info(
            "Building working set using %d positive examples "
            "(%d external, %d adjudicated)", len(pos_examples),
            len(self.external_positive_descriptors),
            len(self.positive_descriptors))
        # TODO: parallel_map and reduce with merge-dict
        for p in pos_examples:
            if p.uuid() not in self._wi_seeds_used:
                self._log.debug("Querying neighbors to: %s", p)
                self.working_set.add_many_descriptors(
                    nn_index.nn(p, n=self.pos_seed_neighbors)[0])
                self._wi_seeds_used.add(p.uuid())
Exemple #4
0
def main():
    # Print help and exit if no arguments were passed
    if len(sys.argv) == 1:
        get_cli_parser().print_help()
        sys.exit(1)

    args = get_cli_parser().parse_args()
    config = utility_main_helper(get_default_config, args)

    log = logging.getLogger(__name__)
    log.debug('Showing debug messages.')

    #: :type: smqtk.representation.DescriptorIndex
    descriptor_set = from_config_dict(config['plugins']['descriptor_set'],
                                      DescriptorIndex.get_impls())
    #: :type: smqtk.algorithms.NearestNeighborsIndex
    nearest_neighbor_index = from_config_dict(
        config['plugins']['nn_index'], NearestNeighborsIndex.get_impls())

    # noinspection PyShadowingNames
    def nearest_neighbors(descriptor, n):
        if n == 0:
            n = len(nearest_neighbor_index)

        uuids, descriptors = nearest_neighbor_index.nn(descriptor, n)
        # Strip first result (itself) and create list of (uuid, distance)
        return list(zip([x.uuid() for x in uuids[1:]], descriptors[1:]))

    if args.uuid_list is not None and not os.path.exists(args.uuid_list):
        log.error('Invalid file list path: %s', args.uuid_list)
        exit(103)
    elif args.num < 0:
        log.error('Number of nearest neighbors must be >= 0')
        exit(105)

    if args.uuid_list is not None:
        with open(args.uuid_list, 'r') as infile:
            for line in infile:
                descriptor = descriptor_set.get_descriptor(line.strip())
                print(descriptor.uuid())
                for neighbor in nearest_neighbors(descriptor, args.num):
                    print('%s,%f' % neighbor)
    else:
        for (uuid, descriptor) in descriptor_set.iteritems():
            print(uuid)
            for neighbor in nearest_neighbors(descriptor, args.num):
                print('%s,%f' % neighbor)
Exemple #5
0
def cli_build(config_filepath):
    """
    Build a new nearest-neighbors index from the configured descriptor set's
    contents.
    """
    config_dict, success = load_config(config_filepath,
                                       defaults=build_default_config())
    # Defaults are insufficient so we assert that the configuration file was
    # (successfully) loaded.
    if not success:
        raise RuntimeError("Failed to load configuration file.")

    descr_set = from_config_dict(config_dict['descriptor_set'],
                                 DescriptorSet.get_impls())

    nn_index = from_config_dict(config_dict['neighbor_index'],
                                NearestNeighborsIndex.get_impls())

    # TODO: reduced amount used for building ("training") and remainder used
    #       for update.
    nn_index.build_index(descr_set)
Exemple #6
0
def build_default_config():
    return {
        'descriptor_set': make_default_config(DescriptorSet.get_impls()),
        'neighbor_index':
        make_default_config(NearestNeighborsIndex.get_impls()),
    }
 def test_impl_findable(self):
     # Already here because the implementation is reporting itself as
     # usable.
     self.assertIn(FlannNearestNeighborsIndex,
                   NearestNeighborsIndex.get_impls())
Exemple #8
0
 def test_impl_findable(self):
     self.assertIn(MRPTNearestNeighborsIndex,
                   NearestNeighborsIndex.get_impls())
Exemple #9
0
    def __init__(self, json_config):
        """
        Initialize application based of supplied JSON configuration

        :param json_config: JSON configuration dictionary
        :type json_config: dict

        """
        super(NearestNeighborServiceServer, self).__init__(json_config)

        self.update_index = json_config['update_descriptor_index']

        # Descriptor factory setup
        self._log.info("Initializing DescriptorElementFactory")
        self.descr_elem_factory = DescriptorElementFactory.from_config(
            self.json_config['descriptor_factory'])

        #: :type: smqtk.representation.DescriptorIndex | None
        self.descr_index = None
        if self.update_index:
            self._log.info("Initializing DescriptorIndex to update")
            #: :type: smqtk.representation.DescriptorIndex | None
            self.descr_index = from_config_dict(
                json_config['descriptor_index'], DescriptorIndex.get_impls())

        #: :type: smqtk.algorithms.NearestNeighborsIndex
        self.nn_index = from_config_dict(json_config['nn_index'],
                                         NearestNeighborsIndex.get_impls())

        #: :type: smqtk.algorithms.DescriptorGenerator
        self.descriptor_generator_inst = from_config_dict(
            self.json_config['descriptor_generator'],
            DescriptorGenerator.get_impls())

        @self.route("/count", methods=['GET'])
        def count():
            """
            Return the number of elements represented in this index.
            """
            return flask.jsonify(**{
                "count": self.nn_index.count(),
            })

        @self.route("/compute/<path:uri>", methods=["POST"])
        def compute(uri):
            """
            Compute the descriptor for a URI specified data element using the
            configured descriptor generator.

            See ``compute_nearest_neighbors`` method docstring for URI
            specifications accepted.

            If the a descriptor index was configured and update was turned on,
            we add the computed descriptor to the index.

            JSON Return format::
                {
                    "success": <bool>

                    "message": <str>

                    "descriptor": <None|list[float]>

                    "reference_uri": <str>
                }

            :param uri: URI data specification.

            """
            descriptor = None
            try:
                descriptor = self.generate_descriptor_for_uri(uri)
                message = "Descriptor generated"
                descriptor = list(map(float, descriptor.vector()))
            except ValueError as ex:
                message = "Input value issue: %s" % str(ex)
            except RuntimeError as ex:
                message = "Descriptor extraction failure: %s" % str(ex)

            return flask.jsonify(
                success=descriptor is not None,
                message=message,
                descriptor=descriptor,
                reference_uri=uri,
            )

        @self.route("/nn/<path:uri>")
        @self.route("/nn/n=<int:n>/<path:uri>")
        @self.route("/nn/n=<int:n>/<int:start_i>:<int:end_i>/<path:uri>")
        def compute_nearest_neighbors(uri, n=10, start_i=None, end_i=None):
            """
            Data modes for upload/use:

                - local filepath
                - base64
                - http/s URL
                - existing data/descriptor UUID

            The following sub-sections detail how different URI's can be used.

            Local Filepath
            --------------
            The URI string must be prefixed with ``file://``, followed by the
            full path to the data file to describe.

            Base 64 data
            ------------
            The URI string must be prefixed with "base64://", followed by the
            base64 encoded string. This mode also requires an additional
            ``?content_type=`` to provide data content type information. This
            mode saves the encoded data to temporary file for processing.

            HTTP/S address
            --------------
            This is the default mode when the URI prefix is none of the above.
            This uses the requests module to locally download a data file
            for processing.

            Existing Data/Descriptor by UUID
            --------------------------------
            When given a uri prefixed with "uuid://", we interpret the remainder
            of the uri as the UUID of a descriptor already present in the
            configured descriptor index. If the given UUID is not present in the
            index, a KeyError is raised.

            JSON Return format
            ------------------
                {
                    "success": <bool>

                    "message": <str>

                    "neighbors": <None|list[float]>

                    "reference_uri": <str>
                }

            :param n: Number of neighbors to query for
            :param start_i: The starting index of the neighbor vectors to slice
                into for return.
            :param end_i: The ending index of the neighbor vectors to slice
                into for return.
            :type uri: str

            """
            descriptor = None
            try:
                descriptor = self.generate_descriptor_for_uri(uri)
                message = "descriptor computed"
            except ValueError as ex:
                message = "Input data issue: %s" % str(ex)
            except RuntimeError as ex:
                message = "Descriptor generation failure: %s" % str(ex)

            # Base pagination slicing based on provided start and end indices,
            # otherwise clamp to beginning/ending of queried neighbor sequence.
            page_slice = slice(start_i or 0, end_i or n)
            neighbors = []
            dists = []
            if descriptor is not None:
                try:
                    neighbors, dists = \
                        self.nn_index.nn(descriptor, n)
                except ValueError as ex:
                    message = "Descriptor or index related issue: %s" % str(ex)

            # TODO: Return the optional descriptor vectors for the neighbors
            # noinspection PyTypeChecker
            d = {
                "success": bool(descriptor is not None),
                "message": message,
                "neighbors": [n.uuid() for n in neighbors[page_slice]],
                "distances": dists[page_slice],
                "reference_uri": uri
            }
            return flask.jsonify(d)