def get_default_config(cls): """ Generate and return a default configuration dictionary for this class. This will be primarily used for generating what the configuration dictionary would look like for this class without instantiating it. :return: Default configuration dictionary for the class. :rtype: dict """ c = super(NearestNeighborServiceServer, cls).get_default_config() merge_dict( c, { "descriptor_factory": DescriptorElementFactory.get_default_config(), "descriptor_generator": plugin.make_config(get_descriptor_generator_impls()), "nn_index": plugin.make_config(get_nn_index_impls()), "descriptor_index": plugin.make_config(get_descriptor_index_impls()), "update_descriptor_index": False, }) return c
def get_default_config(cls): c = super(IqrService, cls).get_default_config() c_rel_index = plugin.make_config( get_relevancy_index_impls() ) merge_dict(c_rel_index, iqr_session.DFLT_REL_INDEX_CONFIG) merge_dict(c, { "iqr_service": { "positive_seed_neighbors": 500, "plugin_notes": { "relevancy_index_config": "The relevancy index config provided should not have " "persistent storage configured as it will be used in " "such a way that instances are created, built and " "destroyed often.", "descriptor_index": "This is the index from which given positive and " "negative example descriptors are retrieved from. " "Not used for nearest neighbor querying. " "This index must contain all descriptors that could " "possibly be used as positive/negative examples and " "updated accordingly.", "neighbor_index": "This is the neighbor index to pull initial near-" "positive descriptors from.", "classifier_config": "The configuration to use for training and using " "classifiers for the /classifier endpoint. " "When configuring a classifier for use, don't fill " "out model persistence values as many classifiers " "may be created and thrown away during this service's " "operation.", "classification_factory": "Selection of the backend in which classifications " "are stored. The in-memory version is recommended " "because normal caching mechanisms will not account " "for the variety of classifiers that can potentially " "be created via this utility.", }, "plugins": { "relevancy_index_config": c_rel_index, "descriptor_index": plugin.make_config( get_descriptor_index_impls() ), "neighbor_index": plugin.make_config(get_nn_index_impls()), "classifier_config": plugin.make_config(get_classifier_impls()), "classification_factory": ClassificationElementFactory.get_default_config(), } } }) return c
def get_default_config(cls): c = super(IqrService, cls).get_default_config() c_rel_index = plugin.make_config(get_relevancy_index_impls()) merge_dict(c_rel_index, iqr_session.DFLT_REL_INDEX_CONFIG) merge_dict( c, { "iqr_service": { "positive_seed_neighbors": 500, "plugin_notes": { "relevancy_index_config": "The relevancy index config provided should not have " "persistent storage configured as it will be used in " "such a way that instances are created, built and " "destroyed often.", "descriptor_index": "This is the index from which given positive and " "negative example descriptors are retrieved from. " "Not used for nearest neighbor querying. " "This index must contain all descriptors that could " "possibly be used as positive/negative examples and " "updated accordingly.", "neighbor_index": "This is the neighbor index to pull initial near-" "positive descriptors from.", "classifier_config": "The configuration to use for training and using " "classifiers for the /classifier endpoint. " "When configuring a classifier for use, don't fill " "out model persistence values as many classifiers " "may be created and thrown away during this service's " "operation.", "classification_factory": "Selection of the backend in which classifications " "are stored. The in-memory version is recommended " "because normal caching mechanisms will not account " "for the variety of classifiers that can potentially " "be created via this utility.", }, "plugins": { "relevancy_index_config": c_rel_index, "descriptor_index": plugin.make_config(get_descriptor_index_impls()), "neighbor_index": plugin.make_config(get_nn_index_impls()), "classifier_config": plugin.make_config(get_classifier_impls()), "classification_factory": ClassificationElementFactory.get_default_config(), } } }) return c
def __init__(self, json_config): super(IqrService, self).__init__(json_config) sc_config = json_config['iqr_service']['session_control'] # Initialize from config self.positive_seed_neighbors = sc_config['positive_seed_neighbors'] self.classifier_config = \ json_config['iqr_service']['plugins']['classifier_config'] self.classification_factory = \ ClassificationElementFactory.from_config( json_config['iqr_service']['plugins']['classification_factory'] ) #: :type: smqtk.representation.DescriptorIndex self.descriptor_index = plugin.from_plugin_config( json_config['iqr_service']['plugins']['descriptor_index'], get_descriptor_index_impls(), ) #: :type: smqtk.algorithms.NearestNeighborsIndex self.neighbor_index = plugin.from_plugin_config( json_config['iqr_service']['plugins']['neighbor_index'], get_nn_index_impls(), ) self.rel_index_config = \ json_config['iqr_service']['plugins']['relevancy_index_config'] # Record of trained classifiers for a session. Session classifier # modifications locked under the parent session's global lock. #: :type: dict[collections.Hashable, smqtk.algorithms.SupervisedClassifier | None] self.session_classifiers = {} # Control for knowing when a new classifier should be trained for a # session (True == train new classifier). Modification for specific # sessions under parent session's lock. #: :type: dict[collections.Hashable, bool] self.session_classifier_dirty = {} def session_expire_callback(session): """ :type session: smqtk.iqr.IqrSession """ with session: self._log.debug("Removing session %s classifier", session.uuid) del self.session_classifiers[session.uuid] del self.session_classifier_dirty[session.uuid] self.controller = iqr_controller.IqrController( sc_config['session_expiration']['enabled'], sc_config['session_expiration']['check_interval_seconds'], session_expire_callback) self.session_timeout = \ sc_config['session_expiration']['session_timeout']
def get_default_config(cls): """ Generate and return a default configuration dictionary for this class. This will be primarily used for generating what the configuration dictionary would look like for this class without instantiating it. :return: Default configuration dictionary for the class. :rtype: dict """ c = super(NearestNeighborServiceServer, cls).get_default_config() merge_dict(c, { "descriptor_factory": DescriptorElementFactory.get_default_config(), "descriptor_generator": plugin.make_config(get_descriptor_generator_impls()), "nn_index": plugin.make_config(get_nn_index_impls()), "descriptor_index": plugin.make_config(get_descriptor_index_impls()), "update_descriptor_index": False, }) return c
def __init__(self, json_config): super(IqrService, self).__init__(json_config) # Initialize from config self.positive_seed_neighbors = \ json_config['iqr_service']['positive_seed_neighbors'] self.classifier_config = \ json_config['iqr_service']['plugins']['classifier_config'] self.classification_factory = \ ClassificationElementFactory.from_config( json_config['iqr_service']['plugins']['classification_factory'] ) #: :type: smqtk.representation.DescriptorIndex self.descriptor_index = plugin.from_plugin_config( json_config['iqr_service']['plugins']['descriptor_index'], get_descriptor_index_impls(), ) #: :type: smqtk.algorithms.NearestNeighborsIndex self.neighbor_index = plugin.from_plugin_config( json_config['iqr_service']['plugins']['neighbor_index'], get_nn_index_impls(), ) self.rel_index_config = \ json_config['iqr_service']['plugins']['relevancy_index_config'] self.controller = iqr_controller.IqrController() # Record of trained classifiers for a session. Session classifier # modifications locked under the parent session's global lock. #: :type: dict[collections.Hashable, smqtk.algorithms.SupervisedClassifier | None] self.session_classifiers = {} # Control for knowing when a new classifier should be trained for a # session (True == train new classifier). Modification for specific # sessions under parent session's lock. #: :type: dict[collections.Hashable, bool] self.session_classifier_dirty = {}
def __init__(self, json_config): """ Initialize application based of supplied JSON configuration :param json_config: JSON configuration dictionary :type json_config: dict """ super(NearestNeighborServiceServer, self).__init__(json_config) self.update_index = json_config['update_descriptor_index'] # Descriptor factory setup self._log.info("Initializing DescriptorElementFactory") self.descr_elem_factory = DescriptorElementFactory.from_config( self.json_config['descriptor_factory'] ) #: :type: smqtk.representation.DescriptorIndex | None self.descr_index = None if self.update_index: self._log.info("Initializing DescriptorIndex to update") #: :type: smqtk.representation.DescriptorIndex | None self.descr_index = plugin.from_plugin_config( json_config['descriptor_index'], get_descriptor_index_impls() ) #: :type: smqtk.algorithms.NearestNeighborsIndex self.nn_index = plugin.from_plugin_config( json_config['nn_index'], get_nn_index_impls() ) #: :type: smqtk.algorithms.DescriptorGenerator self.descriptor_generator_inst = plugin.from_plugin_config( self.json_config['descriptor_generator'], get_descriptor_generator_impls() ) @self.route("/count", methods=['GET']) def count(): """ Return the number of elements represented in this index. """ return flask.jsonify(**{ "count": self.nn_index.count(), }) @self.route("/compute/<path:uri>", methods=["POST"]) def compute(uri): """ Compute the descriptor for a URI specified data element using the configured descriptor generator. If the a descriptor index was configured and update was turned on, we add the computed descriptor to the index. JSON Return format:: { "success": <bool> "message": <str> "descriptor": <None|list[float]> "reference_uri": <str> } :param uri: URI data specification. """ descriptor = None try: _, descriptor = self.generate_descriptor_for_uri(uri) message = "Descriptor generated" descriptor = map(float, descriptor.vector()) except ValueError, ex: message = "Input value issue: %s" % str(ex) except RuntimeError, ex: message = "Descriptor extraction failure: %s" % str(ex)
def test_impl_findable(self): # Already here because the implementation is reporting itself as # usable. ntools.assert_in(FlannNearestNeighborsIndex.__name__, get_nn_index_impls())
def main(): parser = cli_parser() args = parser.parse_args() # # Setup logging # if not logging.getLogger().handlers: if args.verbose: bin_utils.initialize_logging(logging.getLogger(), logging.DEBUG) else: bin_utils.initialize_logging(logging.getLogger(), logging.INFO) log = logging.getLogger("smqtk.scripts.iqr_app_model_generation") search_app_config = json.loads(jsmin.jsmin(open(args.config).read())) # # Input parameters # # The following dictionaries are JSON configurations that are used to # configure the various data structures and algorithms needed for the IQR demo # application. Values here can be changed to suit your specific data and # algorithm needs. # # See algorithm implementation doc-strings for more information on configuration # parameters (see implementation class ``__init__`` method). # # base actions on a specific IQR tab configuration (choose index here) if args.tab < 0 or args.tab > (len(search_app_config["iqr_tabs"]) - 1): log.error("Invalid tab number provided.") exit(1) search_app_iqr_config = search_app_config["iqr_tabs"][args.tab] # Configure DataSet implementation and parameters data_set_config = search_app_iqr_config['data_set'] # Configure DescriptorGenerator algorithm implementation, parameters and # persistant model component locations (if implementation has any). descriptor_generator_config = search_app_iqr_config['descr_generator'] # Configure NearestNeighborIndex algorithm implementation, parameters and # persistant model component locations (if implementation has any). nn_index_config = search_app_iqr_config['nn_index'] # Configure RelevancyIndex algorithm implementation, parameters and # persistant model component locations (if implementation has any). # # The LibSvmHikRelevancyIndex implementation doesn't actually build a persistant # model (or doesn't have to that is), but we're leaving this block here in # anticipation of other potential implementations in the future. # rel_index_config = search_app_iqr_config['rel_index_config'] # Configure DescriptorElementFactory instance, which defines what implementation # of DescriptorElement to use for storing generated descriptor vectors below. descriptor_elem_factory_config = search_app_iqr_config[ 'descriptor_factory'] # # Initialize data/algorithms # # Constructing appropriate data structures and algorithms, needed for the IQR # demo application, in preparation for model training. # descriptor_elem_factory = \ representation.DescriptorElementFactory \ .from_config(descriptor_elem_factory_config) #: :type: representation.DataSet data_set = \ plugin.from_plugin_config(data_set_config, representation.get_data_set_impls()) #: :type: algorithms.DescriptorGenerator descriptor_generator = \ plugin.from_plugin_config(descriptor_generator_config, algorithms.get_descriptor_generator_impls()) #: :type: algorithms.NearestNeighborsIndex nn_index = \ plugin.from_plugin_config(nn_index_config, algorithms.get_nn_index_impls()) #: :type: algorithms.RelevancyIndex rel_index = \ plugin.from_plugin_config(rel_index_config, algorithms.get_relevancy_index_impls()) # # Build models # # Perform the actual building of the models. # # Add data files to DataSet DataFileElement = representation.get_data_element_impls( )["DataFileElement"] for fp in args.input_files: fp = osp.expanduser(fp) if osp.isfile(fp): data_set.add_data(DataFileElement(fp)) else: log.debug("Expanding glob: %s" % fp) for g in glob.iglob(fp): data_set.add_data(DataFileElement(g)) # Generate a mode if the generator defines a known generation method. if hasattr(descriptor_generator, "generate_model"): descriptor_generator.generate_model(data_set) # Add other if-else cases for other known implementation-specific generation # methods stubs # Generate descriptors of data for building NN index. data2descriptor = descriptor_generator.compute_descriptor_async( data_set, descriptor_elem_factory) try: nn_index.build_index(six.itervalues(data2descriptor)) except RuntimeError: # Already built model, so skipping this step pass rel_index.build_index(six.itervalues(data2descriptor))
def test_impl_findable(self): self.assertIn(FaissNearestNeighborsIndex.__name__, get_nn_index_impls())
def test_impl_findable(self): # Already here because the implementation is reporting itself as # usable. self.assertIn(FlannNearestNeighborsIndex.__name__, get_nn_index_impls())
def main(): args = cli_parser().parse_args() ui_config_filepath, iqr_config_filepath = args.config llevel = logging.DEBUG if args.verbose else logging.INFO tab = args.tab input_files_globs = args.input_files # Not using `bin_utils.utility_main_helper`` due to deviating from single- # config-with-default usage. bin_utils.initialize_logging(logging.getLogger('smqtk'), llevel) bin_utils.initialize_logging(logging.getLogger('__main__'), llevel) log = logging.getLogger(__name__) log.info("Loading UI config: '{}'".format(ui_config_filepath)) ui_config, ui_config_loaded = bin_utils.load_config(ui_config_filepath) log.info("Loading IQR config: '{}'".format(iqr_config_filepath)) iqr_config, iqr_config_loaded = bin_utils.load_config(iqr_config_filepath) if not (ui_config_loaded and iqr_config_loaded): raise RuntimeError("One or both configuration files failed to load.") # Ensure the given "tab" exists in UI configuration. if tab is None: log.error("No configuration tab provided to drive model generation.") exit(1) if tab not in ui_config["iqr_tabs"]: log.error("Invalid tab provided: '{}'. Available tags: {}" .format(tab, list(ui_config["iqr_tabs"]))) exit(1) # # Gather Configurations # log.info("Extracting plugin configurations") ui_tab_config = ui_config["iqr_tabs"][tab] iqr_plugins_config = iqr_config['iqr_service']['plugins'] # Configure DataSet implementation and parameters data_set_config = ui_tab_config['data_set'] # Configure DescriptorElementFactory instance, which defines what # implementation of DescriptorElement to use for storing generated # descriptor vectors below. descriptor_elem_factory_config = iqr_plugins_config['descriptor_factory'] # Configure DescriptorGenerator algorithm implementation, parameters and # persistent model component locations (if implementation has any). descriptor_generator_config = iqr_plugins_config['descriptor_generator'] # Configure NearestNeighborIndex algorithm implementation, parameters and # persistent model component locations (if implementation has any). nn_index_config = iqr_plugins_config['neighbor_index'] # # Initialize data/algorithms # # Constructing appropriate data structures and algorithms, needed for the # IQR demo application, in preparation for model training. # log.info("Instantiating plugins") #: :type: representation.DataSet data_set = \ plugin.from_plugin_config(data_set_config, representation.get_data_set_impls()) descriptor_elem_factory = \ representation.DescriptorElementFactory \ .from_config(descriptor_elem_factory_config) #: :type: algorithms.DescriptorGenerator descriptor_generator = \ plugin.from_plugin_config(descriptor_generator_config, algorithms.get_descriptor_generator_impls()) #: :type: algorithms.NearestNeighborsIndex nn_index = \ plugin.from_plugin_config(nn_index_config, algorithms.get_nn_index_impls()) # # Build models # log.info("Adding files to dataset '{}'".format(data_set)) for g in input_files_globs: g = osp.expanduser(g) if osp.isfile(g): data_set.add_data(DataFileElement(g, readonly=True)) else: log.debug("Expanding glob: %s" % g) for fp in glob.iglob(g): data_set.add_data(DataFileElement(fp, readonly=True)) # Generate a model if the generator defines a known generation method. try: log.debug("descriptor generator as model to generate?") descriptor_generator.generate_model(data_set) except AttributeError as ex: log.debug("descriptor generator as model to generate - Nope: {}" .format(str(ex))) # Generate descriptors of data for building NN index. log.info("Computing descriptors for data set with {}" .format(descriptor_generator)) data2descriptor = descriptor_generator.compute_descriptor_async( data_set, descriptor_elem_factory ) # Possible additional support steps before building NNIndex try: # Fit the LSH index functor log.debug("Has LSH Functor to fit?") nn_index.lsh_functor.fit(six.itervalues(data2descriptor)) except AttributeError as ex: log.debug("Has LSH Functor to fit - Nope: {}".format(str(ex))) log.info("Building nearest neighbors index {}".format(nn_index)) nn_index.build_index(six.itervalues(data2descriptor))
def __init__(self, json_config): """ Initialize application based of supplied JSON configuration :param json_config: JSON configuration dictionary :type json_config: dict """ super(NearestNeighborServiceServer, self).__init__(json_config) self.update_index = json_config['update_descriptor_index'] # Descriptor factory setup self._log.info("Initializing DescriptorElementFactory") self.descr_elem_factory = DescriptorElementFactory.from_config( self.json_config['descriptor_factory']) #: :type: smqtk.representation.DescriptorIndex | None self.descr_index = None if self.update_index: self._log.info("Initializing DescriptorIndex to update") #: :type: smqtk.representation.DescriptorIndex | None self.descr_index = plugin.from_plugin_config( json_config['descriptor_index'], get_descriptor_index_impls()) #: :type: smqtk.algorithms.NearestNeighborsIndex self.nn_index = plugin.from_plugin_config(json_config['nn_index'], get_nn_index_impls()) #: :type: smqtk.algorithms.DescriptorGenerator self.descriptor_generator_inst = plugin.from_plugin_config( self.json_config['descriptor_generator'], get_descriptor_generator_impls()) @self.route("/count", methods=['GET']) def count(): """ Return the number of elements represented in this index. """ return flask.jsonify(**{ "count": self.nn_index.count(), }) @self.route("/compute/<path:uri>", methods=["POST"]) def compute(uri): """ Compute the descriptor for a URI specified data element using the configured descriptor generator. If the a descriptor index was configured and update was turned on, we add the computed descriptor to the index. JSON Return format:: { "success": <bool> "message": <str> "descriptor": <None|list[float]> "reference_uri": <str> } :param uri: URI data specification. """ descriptor = None try: _, descriptor = self.generate_descriptor_for_uri(uri) message = "Descriptor generated" descriptor = map(float, descriptor.vector()) except ValueError, ex: message = "Input value issue: %s" % str(ex) except RuntimeError, ex: message = "Descriptor extraction failure: %s" % str(ex)
def main(): parser = cli_parser() args = parser.parse_args() # # Setup logging # if not logging.getLogger().handlers: if args.verbose: bin_utils.initialize_logging(logging.getLogger(), logging.DEBUG) else: bin_utils.initialize_logging(logging.getLogger(), logging.INFO) log = logging.getLogger("smqtk.scripts.iqr_app_model_generation") search_app_config = json.loads(jsmin.jsmin(open(args.config).read())) # # Input parameters # # The following dictionaries are JSON configurations that are used to # configure the various data structures and algorithms needed for the IQR demo # application. Values here can be changed to suit your specific data and # algorithm needs. # # See algorithm implementation doc-strings for more information on configuration # parameters (see implementation class ``__init__`` method). # # base actions on a specific IQR tab configuration (choose index here) if args.tab < 0 or args.tab > (len(search_app_config["iqr_tabs"]) - 1): log.error("Invalid tab number provided.") exit(1) search_app_iqr_config = search_app_config["iqr_tabs"][args.tab] # Configure DataSet implementation and parameters data_set_config = search_app_iqr_config['data_set'] # Configure DescriptorGenerator algorithm implementation, parameters and # persistant model component locations (if implementation has any). descriptor_generator_config = search_app_iqr_config['descr_generator'] # Configure NearestNeighborIndex algorithm implementation, parameters and # persistant model component locations (if implementation has any). nn_index_config = search_app_iqr_config['nn_index'] # Configure RelevancyIndex algorithm implementation, parameters and # persistant model component locations (if implementation has any). # # The LibSvmHikRelevancyIndex implementation doesn't actually build a persistant # model (or doesn't have to that is), but we're leaving this block here in # anticipation of other potential implementations in the future. # rel_index_config = search_app_iqr_config['rel_index_config'] # Configure DescriptorElementFactory instance, which defines what implementation # of DescriptorElement to use for storing generated descriptor vectors below. descriptor_elem_factory_config = search_app_iqr_config['descriptor_factory'] # # Initialize data/algorithms # # Constructing appropriate data structures and algorithms, needed for the IQR # demo application, in preparation for model training. # descriptor_elem_factory = \ representation.DescriptorElementFactory \ .from_config(descriptor_elem_factory_config) #: :type: representation.DataSet data_set = \ plugin.from_plugin_config(data_set_config, representation.get_data_set_impls()) #: :type: algorithms.DescriptorGenerator descriptor_generator = \ plugin.from_plugin_config(descriptor_generator_config, algorithms.get_descriptor_generator_impls()) #: :type: algorithms.NearestNeighborsIndex nn_index = \ plugin.from_plugin_config(nn_index_config, algorithms.get_nn_index_impls()) #: :type: algorithms.RelevancyIndex rel_index = \ plugin.from_plugin_config(rel_index_config, algorithms.get_relevancy_index_impls()) # # Build models # # Perform the actual building of the models. # # Add data files to DataSet DataFileElement = representation.get_data_element_impls()["DataFileElement"] for fp in args.input_files: fp = osp.expanduser(fp) if osp.isfile(fp): data_set.add_data(DataFileElement(fp)) else: log.debug("Expanding glob: %s" % fp) for g in glob.iglob(fp): data_set.add_data(DataFileElement(g)) # Generate a mode if the generator defines a known generation method. if hasattr(descriptor_generator, "generate_model"): descriptor_generator.generate_model(data_set) # Add other if-else cases for other known implementation-specific generation # methods stubs # Generate descriptors of data for building NN index. data2descriptor = descriptor_generator.compute_descriptor_async( data_set, descriptor_elem_factory ) try: nn_index.build_index(data2descriptor.itervalues()) except RuntimeError: # Already built model, so skipping this step pass rel_index.build_index(data2descriptor.itervalues())
def __init__(self, json_config): """ Initialize application based of supplied JSON configuration :param json_config: JSON configuration dictionary :type json_config: dict """ super(NearestNeighborServiceServer, self).__init__(json_config) self.update_index = json_config['update_descriptor_index'] # Descriptor factory setup self._log.info("Initializing DescriptorElementFactory") self.descr_elem_factory = DescriptorElementFactory.from_config( self.json_config['descriptor_factory']) #: :type: smqtk.representation.DescriptorIndex | None self.descr_index = None if self.update_index: self._log.info("Initializing DescriptorIndex to update") #: :type: smqtk.representation.DescriptorIndex | None self.descr_index = plugin.from_plugin_config( json_config['descriptor_index'], get_descriptor_index_impls()) #: :type: smqtk.algorithms.NearestNeighborsIndex self.nn_index = plugin.from_plugin_config(json_config['nn_index'], get_nn_index_impls()) #: :type: smqtk.algorithms.DescriptorGenerator self.descriptor_generator_inst = plugin.from_plugin_config( self.json_config['descriptor_generator'], get_descriptor_generator_impls()) @self.route("/count", methods=['GET']) def count(): """ Return the number of elements represented in this index. """ return flask.jsonify(**{ "count": self.nn_index.count(), }) @self.route("/compute/<path:uri>", methods=["POST"]) def compute(uri): """ Compute the descriptor for a URI specified data element using the configured descriptor generator. See ``compute_nearest_neighbors`` method docstring for URI specifications accepted. If the a descriptor index was configured and update was turned on, we add the computed descriptor to the index. JSON Return format:: { "success": <bool> "message": <str> "descriptor": <None|list[float]> "reference_uri": <str> } :param uri: URI data specification. """ descriptor = None try: descriptor = self.generate_descriptor_for_uri(uri) message = "Descriptor generated" descriptor = list(map(float, descriptor.vector())) except ValueError as ex: message = "Input value issue: %s" % str(ex) except RuntimeError as ex: message = "Descriptor extraction failure: %s" % str(ex) return flask.jsonify( success=descriptor is not None, message=message, descriptor=descriptor, reference_uri=uri, ) @self.route("/nn/<path:uri>") @self.route("/nn/n=<int:n>/<path:uri>") @self.route("/nn/n=<int:n>/<int:start_i>:<int:end_i>/<path:uri>") def compute_nearest_neighbors(uri, n=10, start_i=None, end_i=None): """ Data modes for upload/use: - local filepath - base64 - http/s URL - existing data/descriptor UUID The following sub-sections detail how different URI's can be used. Local Filepath -------------- The URI string must be prefixed with ``file://``, followed by the full path to the data file to describe. Base 64 data ------------ The URI string must be prefixed with "base64://", followed by the base64 encoded string. This mode also requires an additional ``?content_type=`` to provide data content type information. This mode saves the encoded data to temporary file for processing. HTTP/S address -------------- This is the default mode when the URI prefix is none of the above. This uses the requests module to locally download a data file for processing. Existing Data/Descriptor by UUID -------------------------------- When given a uri prefixed with "uuid://", we interpret the remainder of the uri as the UUID of a descriptor already present in the configured descriptor index. If the given UUID is not present in the index, a KeyError is raised. JSON Return format ------------------ { "success": <bool> "message": <str> "neighbors": <None|list[float]> "reference_uri": <str> } :param n: Number of neighbors to query for :param start_i: The starting index of the neighbor vectors to slice into for return. :param end_i: The ending index of the neighbor vectors to slice into for return. :type uri: str """ descriptor = None try: descriptor = self.generate_descriptor_for_uri(uri) message = "descriptor computed" except ValueError as ex: message = "Input data issue: %s" % str(ex) except RuntimeError as ex: message = "Descriptor generation failure: %s" % str(ex) # Base pagination slicing based on provided start and end indices, # otherwise clamp to beginning/ending of queried neighbor sequence. page_slice = slice(start_i or 0, end_i or n) neighbors = [] dists = [] if descriptor is not None: try: neighbors, dists = \ self.nn_index.nn(descriptor, n) except ValueError as ex: message = "Descriptor or index related issue: %s" % str(ex) # TODO: Return the optional descriptor vectors for the neighbors # noinspection PyTypeChecker d = { "success": bool(descriptor is not None), "message": message, "neighbors": [n.uuid() for n in neighbors[page_slice]], "distances": dists[page_slice], "reference_uri": uri } return flask.jsonify(d)
def test_impl_findable(self): ntools.assert_in(MRPTNearestNeighborsIndex.__name__, get_nn_index_impls())
def __init__(self, json_config): """ Initialize application based of supplied JSON configuration :param json_config: JSON configuration dictionary :type json_config: dict """ super(NearestNeighborServiceServer, self).__init__(json_config) self.update_index = json_config['update_descriptor_index'] # Descriptor factory setup self._log.info("Initializing DescriptorElementFactory") self.descr_elem_factory = DescriptorElementFactory.from_config( self.json_config['descriptor_factory'] ) #: :type: smqtk.representation.DescriptorIndex | None self.descr_index = None if self.update_index: self._log.info("Initializing DescriptorIndex to update") #: :type: smqtk.representation.DescriptorIndex | None self.descr_index = plugin.from_plugin_config( json_config['descriptor_index'], get_descriptor_index_impls() ) #: :type: smqtk.algorithms.NearestNeighborsIndex self.nn_index = plugin.from_plugin_config( json_config['nn_index'], get_nn_index_impls() ) #: :type: smqtk.algorithms.DescriptorGenerator self.descriptor_generator_inst = plugin.from_plugin_config( self.json_config['descriptor_generator'], get_descriptor_generator_impls() ) @self.route("/count", methods=['GET']) def count(): """ Return the number of elements represented in this index. """ return flask.jsonify(**{ "count": self.nn_index.count(), }) @self.route("/compute/<path:uri>", methods=["POST"]) def compute(uri): """ Compute the descriptor for a URI specified data element using the configured descriptor generator. See ``compute_nearest_neighbors`` method docstring for URI specifications accepted. If the a descriptor index was configured and update was turned on, we add the computed descriptor to the index. JSON Return format:: { "success": <bool> "message": <str> "descriptor": <None|list[float]> "reference_uri": <str> } :param uri: URI data specification. """ descriptor = None try: descriptor = self.generate_descriptor_for_uri(uri) message = "Descriptor generated" descriptor = list(map(float, descriptor.vector())) except ValueError as ex: message = "Input value issue: %s" % str(ex) except RuntimeError as ex: message = "Descriptor extraction failure: %s" % str(ex) return flask.jsonify( success=descriptor is not None, message=message, descriptor=descriptor, reference_uri=uri, ) @self.route("/nn/<path:uri>") @self.route("/nn/n=<int:n>/<path:uri>") @self.route("/nn/n=<int:n>/<int:start_i>:<int:end_i>/<path:uri>") def compute_nearest_neighbors(uri, n=10, start_i=None, end_i=None): """ Data modes for upload/use: - local filepath - base64 - http/s URL - existing data/descriptor UUID The following sub-sections detail how different URI's can be used. Local Filepath -------------- The URI string must be prefixed with ``file://``, followed by the full path to the data file to describe. Base 64 data ------------ The URI string must be prefixed with "base64://", followed by the base64 encoded string. This mode also requires an additional ``?content_type=`` to provide data content type information. This mode saves the encoded data to temporary file for processing. HTTP/S address -------------- This is the default mode when the URI prefix is none of the above. This uses the requests module to locally download a data file for processing. Existing Data/Descriptor by UUID -------------------------------- When given a uri prefixed with "uuid://", we interpret the remainder of the uri as the UUID of a descriptor already present in the configured descriptor index. If the given UUID is not present in the index, a KeyError is raised. JSON Return format ------------------ { "success": <bool> "message": <str> "neighbors": <None|list[float]> "reference_uri": <str> } :param n: Number of neighbors to query for :param start_i: The starting index of the neighbor vectors to slice into for return. :param end_i: The ending index of the neighbor vectors to slice into for return. :type uri: str """ descriptor = None try: descriptor = self.generate_descriptor_for_uri(uri) message = "descriptor computed" except ValueError as ex: message = "Input data issue: %s" % str(ex) except RuntimeError as ex: message = "Descriptor generation failure: %s" % str(ex) # Base pagination slicing based on provided start and end indices, # otherwise clamp to beginning/ending of queried neighbor sequence. page_slice = slice(start_i or 0, end_i or n) neighbors = [] dists = [] if descriptor is not None: try: neighbors, dists = \ self.nn_index.nn(descriptor, n) except ValueError as ex: message = "Descriptor or index related issue: %s" % str(ex) # TODO: Return the optional descriptor vectors for the neighbors # noinspection PyTypeChecker d = { "success": bool(descriptor is not None), "message": message, "neighbors": [n.uuid() for n in neighbors[page_slice]], "distances": dists[page_slice], "reference_uri": uri } return flask.jsonify(d)
def test_impl_findable(self): ntools.assert_in(FlannNearestNeighborsIndex.__name__, get_nn_index_impls())