def cli_config(output_filepath, input_config, overwrite): """ Generate a default or template JSON configuration file for this tool. """ if input_config is not None: c_dict, success = load_config(input_config, build_default_config()) if not success: raise RuntimeError("Did not load input configuration file '{}' " "successfully.") else: c_dict = build_default_config() output_config(output_filepath, c_dict, overwrite=overwrite)
def cli_build(config_filepath): """ Build a new nearest-neighbors index from the configured descriptor set's contents. """ config_dict, success = load_config(config_filepath, defaults=build_default_config()) # Defaults are insufficient so we assert that the configuration file was # (successfully) loaded. if not success: raise RuntimeError("Failed to load configuration file.") descr_set = from_config_dict(config_dict['descriptor_set'], DescriptorSet.get_impls()) nn_index = from_config_dict(config_dict['neighbor_index'], NearestNeighborsIndex.get_impls()) # TODO: reduced amount used for building ("training") and remainder used # for update. nn_index.build_index(descr_set)
def main(): args = cli_parser().parse_args() ui_config_filepath, iqr_config_filepath = args.config llevel = logging.DEBUG if args.verbose else logging.INFO tab = args.tab input_files_globs = args.input_files # Not using `cli.utility_main_helper`` due to deviating from single- # config-with-default usage. cli.initialize_logging(logging.getLogger('smqtk'), llevel) cli.initialize_logging(logging.getLogger('__main__'), llevel) log = logging.getLogger(__name__) log.info("Loading UI config: '{}'".format(ui_config_filepath)) ui_config, ui_config_loaded = cli.load_config(ui_config_filepath) log.info("Loading IQR config: '{}'".format(iqr_config_filepath)) iqr_config, iqr_config_loaded = cli.load_config(iqr_config_filepath) if not (ui_config_loaded and iqr_config_loaded): raise RuntimeError("One or both configuration files failed to load.") # Ensure the given "tab" exists in UI configuration. if tab is None: log.error("No configuration tab provided to drive model generation.") exit(1) if tab not in ui_config["iqr_tabs"]: log.error("Invalid tab provided: '{}'. Available tags: {}".format( tab, list(ui_config["iqr_tabs"]))) exit(1) # # Gather Configurations # log.info("Extracting plugin configurations") ui_tab_config = ui_config["iqr_tabs"][tab] iqr_plugins_config = iqr_config['iqr_service']['plugins'] # Configure DataSet implementation and parameters data_set_config = ui_tab_config['data_set'] # Configure DescriptorElementFactory instance, which defines what # implementation of DescriptorElement to use for storing generated # descriptor vectors below. descriptor_elem_factory_config = iqr_plugins_config['descriptor_factory'] # Configure DescriptorGenerator algorithm implementation, parameters and # persistent model component locations (if implementation has any). descriptor_generator_config = iqr_plugins_config['descriptor_generator'] # Configure NearestNeighborIndex algorithm implementation, parameters and # persistent model component locations (if implementation has any). nn_index_config = iqr_plugins_config['neighbor_index'] # # Initialize data/algorithms # # Constructing appropriate data structures and algorithms, needed for the # IQR demo application, in preparation for model training. # log.info("Instantiating plugins") #: :type: representation.DataSet data_set = \ from_config_dict(data_set_config, representation.DataSet.get_impls()) descriptor_elem_factory = \ representation.DescriptorElementFactory \ .from_config(descriptor_elem_factory_config) #: :type: algorithms.DescriptorGenerator descriptor_generator = \ from_config_dict(descriptor_generator_config, algorithms.DescriptorGenerator.get_impls()) #: :type: algorithms.NearestNeighborsIndex nn_index = \ from_config_dict(nn_index_config, algorithms.NearestNeighborsIndex.get_impls()) # # Build models # log.info("Adding files to dataset '{}'".format(data_set)) for g in input_files_globs: g = osp.expanduser(g) if osp.isfile(g): data_set.add_data(DataFileElement(g, readonly=True)) else: log.debug("Expanding glob: %s" % g) for fp in glob.iglob(g): data_set.add_data(DataFileElement(fp, readonly=True)) # Generate a model if the generator defines a known generation method. try: log.debug("descriptor generator as model to generate?") descriptor_generator.generate_model(data_set) except AttributeError as ex: log.debug( "descriptor generator as model to generate - Nope: {}".format( str(ex))) # Generate descriptors of data for building NN index. log.info("Computing descriptors for data set with {}".format( descriptor_generator)) data2descriptor = descriptor_generator.compute_descriptor_async( data_set, descriptor_elem_factory) # Possible additional support steps before building NNIndex try: # Fit the LSH index functor log.debug("Has LSH Functor to fit?") nn_index.lsh_functor.fit(six.itervalues(data2descriptor)) except AttributeError as ex: log.debug("Has LSH Functor to fit - Nope: {}".format(str(ex))) log.info("Building nearest neighbors index {}".format(nn_index)) nn_index.build_index(six.itervalues(data2descriptor))