def get_default_config(): return { 'plugins': { 'descriptor_set': make_default_config(DescriptorIndex.get_impls()), 'nn_index': make_default_config(NearestNeighborsIndex.get_impls()) } }
def get_default_config(cls): """ Generate and return a default configuration dictionary for this class. This will be primarily used for generating what the configuration dictionary would look like for this class without instantiating it. :return: Default configuration dictionary for the class. :rtype: dict """ c = super(NearestNeighborServiceServer, cls).get_default_config() merge_dict( c, { "descriptor_factory": DescriptorElementFactory.get_default_config(), "descriptor_generator": make_default_config(DescriptorGenerator.get_impls()), "nn_index": make_default_config(NearestNeighborsIndex.get_impls()), "descriptor_index": make_default_config(DescriptorIndex.get_impls()), "update_descriptor_index": False, }) return c
def update_working_set(self, nn_index: NearestNeighborsIndex) -> None: """ Initialize or update our current working set using the given :class:`.NearestNeighborsIndex` instance given our current positively labeled descriptor elements. We only query from the index for new positive elements since the last update or reset. :param nn_index: :class:`.NearestNeighborsIndex` to query from. :raises RuntimeError: There are no positive example descriptors in this session to use as a basis for querying. """ pos_examples = (self.external_positive_descriptors | self.positive_descriptors) if len(pos_examples) == 0: raise RuntimeError("No positive descriptors to query the neighbor " "index with.") # adding to working set self._log.info( "Building working set using %d positive examples " "(%d external, %d adjudicated)", len(pos_examples), len(self.external_positive_descriptors), len(self.positive_descriptors)) # TODO: parallel_map and reduce with merge-dict for p in pos_examples: if p.uuid() not in self._wi_seeds_used: self._log.debug("Querying neighbors to: %s", p) self.working_set.add_many_descriptors( nn_index.nn(p, n=self.pos_seed_neighbors)[0]) self._wi_seeds_used.add(p.uuid())
def main(): # Print help and exit if no arguments were passed if len(sys.argv) == 1: get_cli_parser().print_help() sys.exit(1) args = get_cli_parser().parse_args() config = utility_main_helper(get_default_config, args) log = logging.getLogger(__name__) log.debug('Showing debug messages.') #: :type: smqtk.representation.DescriptorIndex descriptor_set = from_config_dict(config['plugins']['descriptor_set'], DescriptorIndex.get_impls()) #: :type: smqtk.algorithms.NearestNeighborsIndex nearest_neighbor_index = from_config_dict( config['plugins']['nn_index'], NearestNeighborsIndex.get_impls()) # noinspection PyShadowingNames def nearest_neighbors(descriptor, n): if n == 0: n = len(nearest_neighbor_index) uuids, descriptors = nearest_neighbor_index.nn(descriptor, n) # Strip first result (itself) and create list of (uuid, distance) return list(zip([x.uuid() for x in uuids[1:]], descriptors[1:])) if args.uuid_list is not None and not os.path.exists(args.uuid_list): log.error('Invalid file list path: %s', args.uuid_list) exit(103) elif args.num < 0: log.error('Number of nearest neighbors must be >= 0') exit(105) if args.uuid_list is not None: with open(args.uuid_list, 'r') as infile: for line in infile: descriptor = descriptor_set.get_descriptor(line.strip()) print(descriptor.uuid()) for neighbor in nearest_neighbors(descriptor, args.num): print('%s,%f' % neighbor) else: for (uuid, descriptor) in descriptor_set.iteritems(): print(uuid) for neighbor in nearest_neighbors(descriptor, args.num): print('%s,%f' % neighbor)
def cli_build(config_filepath): """ Build a new nearest-neighbors index from the configured descriptor set's contents. """ config_dict, success = load_config(config_filepath, defaults=build_default_config()) # Defaults are insufficient so we assert that the configuration file was # (successfully) loaded. if not success: raise RuntimeError("Failed to load configuration file.") descr_set = from_config_dict(config_dict['descriptor_set'], DescriptorSet.get_impls()) nn_index = from_config_dict(config_dict['neighbor_index'], NearestNeighborsIndex.get_impls()) # TODO: reduced amount used for building ("training") and remainder used # for update. nn_index.build_index(descr_set)
def build_default_config(): return { 'descriptor_set': make_default_config(DescriptorSet.get_impls()), 'neighbor_index': make_default_config(NearestNeighborsIndex.get_impls()), }
def test_impl_findable(self): # Already here because the implementation is reporting itself as # usable. self.assertIn(FlannNearestNeighborsIndex, NearestNeighborsIndex.get_impls())
def test_impl_findable(self): self.assertIn(MRPTNearestNeighborsIndex, NearestNeighborsIndex.get_impls())
def __init__(self, json_config): """ Initialize application based of supplied JSON configuration :param json_config: JSON configuration dictionary :type json_config: dict """ super(NearestNeighborServiceServer, self).__init__(json_config) self.update_index = json_config['update_descriptor_index'] # Descriptor factory setup self._log.info("Initializing DescriptorElementFactory") self.descr_elem_factory = DescriptorElementFactory.from_config( self.json_config['descriptor_factory']) #: :type: smqtk.representation.DescriptorIndex | None self.descr_index = None if self.update_index: self._log.info("Initializing DescriptorIndex to update") #: :type: smqtk.representation.DescriptorIndex | None self.descr_index = from_config_dict( json_config['descriptor_index'], DescriptorIndex.get_impls()) #: :type: smqtk.algorithms.NearestNeighborsIndex self.nn_index = from_config_dict(json_config['nn_index'], NearestNeighborsIndex.get_impls()) #: :type: smqtk.algorithms.DescriptorGenerator self.descriptor_generator_inst = from_config_dict( self.json_config['descriptor_generator'], DescriptorGenerator.get_impls()) @self.route("/count", methods=['GET']) def count(): """ Return the number of elements represented in this index. """ return flask.jsonify(**{ "count": self.nn_index.count(), }) @self.route("/compute/<path:uri>", methods=["POST"]) def compute(uri): """ Compute the descriptor for a URI specified data element using the configured descriptor generator. See ``compute_nearest_neighbors`` method docstring for URI specifications accepted. If the a descriptor index was configured and update was turned on, we add the computed descriptor to the index. JSON Return format:: { "success": <bool> "message": <str> "descriptor": <None|list[float]> "reference_uri": <str> } :param uri: URI data specification. """ descriptor = None try: descriptor = self.generate_descriptor_for_uri(uri) message = "Descriptor generated" descriptor = list(map(float, descriptor.vector())) except ValueError as ex: message = "Input value issue: %s" % str(ex) except RuntimeError as ex: message = "Descriptor extraction failure: %s" % str(ex) return flask.jsonify( success=descriptor is not None, message=message, descriptor=descriptor, reference_uri=uri, ) @self.route("/nn/<path:uri>") @self.route("/nn/n=<int:n>/<path:uri>") @self.route("/nn/n=<int:n>/<int:start_i>:<int:end_i>/<path:uri>") def compute_nearest_neighbors(uri, n=10, start_i=None, end_i=None): """ Data modes for upload/use: - local filepath - base64 - http/s URL - existing data/descriptor UUID The following sub-sections detail how different URI's can be used. Local Filepath -------------- The URI string must be prefixed with ``file://``, followed by the full path to the data file to describe. Base 64 data ------------ The URI string must be prefixed with "base64://", followed by the base64 encoded string. This mode also requires an additional ``?content_type=`` to provide data content type information. This mode saves the encoded data to temporary file for processing. HTTP/S address -------------- This is the default mode when the URI prefix is none of the above. This uses the requests module to locally download a data file for processing. Existing Data/Descriptor by UUID -------------------------------- When given a uri prefixed with "uuid://", we interpret the remainder of the uri as the UUID of a descriptor already present in the configured descriptor index. If the given UUID is not present in the index, a KeyError is raised. JSON Return format ------------------ { "success": <bool> "message": <str> "neighbors": <None|list[float]> "reference_uri": <str> } :param n: Number of neighbors to query for :param start_i: The starting index of the neighbor vectors to slice into for return. :param end_i: The ending index of the neighbor vectors to slice into for return. :type uri: str """ descriptor = None try: descriptor = self.generate_descriptor_for_uri(uri) message = "descriptor computed" except ValueError as ex: message = "Input data issue: %s" % str(ex) except RuntimeError as ex: message = "Descriptor generation failure: %s" % str(ex) # Base pagination slicing based on provided start and end indices, # otherwise clamp to beginning/ending of queried neighbor sequence. page_slice = slice(start_i or 0, end_i or n) neighbors = [] dists = [] if descriptor is not None: try: neighbors, dists = \ self.nn_index.nn(descriptor, n) except ValueError as ex: message = "Descriptor or index related issue: %s" % str(ex) # TODO: Return the optional descriptor vectors for the neighbors # noinspection PyTypeChecker d = { "success": bool(descriptor is not None), "message": message, "neighbors": [n.uuid() for n in neighbors[page_slice]], "distances": dists[page_slice], "reference_uri": uri } return flask.jsonify(d)