def get_default_config(cls): d = super(IqrSearch, cls).get_default_config() # Remove parent_app slot for later explicit specification. del d['parent_app'] # fill in plugin configs d['data_set'] = plugin.make_config(get_data_set_impls()) d['descr_generator'] = \ plugin.make_config(get_descriptor_generator_impls()) d['nn_index'] = plugin.make_config(get_nn_index_impls()) ri_config = plugin.make_config(get_relevancy_index_impls()) if d['rel_index_config']: ri_config.update(d['rel_index_config']) d['rel_index_config'] = ri_config df_config = DescriptorElementFactory.get_default_config() if d['descriptor_factory']: df_config.update(d['descriptor_factory'].get_config()) d['descriptor_factory'] = df_config return d
def from_config(cls, config, parent_app): """ Instantiate a new instance of this class given the configuration JSON-compliant dictionary encapsulating initialization arguments. :param config: JSON compliant dictionary encapsulating a configuration. :type config: dict :param parent_app: Parent containing flask app instance :type parent_app: smqtk.web.search_app.app.search_app :return: Constructed instance from the provided config. :rtype: IqrSearch """ merged = cls.get_default_config() merged.update(config) # construct nested objects via configurations merged['data_set'] = \ plugin.from_plugin_config(merged['data_set'], get_data_set_impls()) merged['descr_generator'] = \ plugin.from_plugin_config(merged['descr_generator'], get_descriptor_generator_impls()) merged['nn_index'] = \ plugin.from_plugin_config(merged['nn_index'], get_nn_index_impls()) merged['descriptor_factory'] = \ DescriptorElementFactory.from_config(merged['descriptor_factory']) return cls(parent_app, **merged)
def from_config(cls, config, parent_app): """ Instantiate a new instance of this class given the configuration JSON-compliant dictionary encapsulating initialization arguments. :param config: JSON compliant dictionary encapsulating a configuration. :type config: dict :param parent_app: Parent containing flask app instance :type parent_app: smqtk.web.search_app.app.search_app :return: Constructed instance from the provided config. :rtype: IqrSearch """ merged = cls.get_default_config() merged.update(config) # construct nested objects via configurations merged['data_set'] = \ plugin.from_plugin_config(merged['data_set'], get_data_set_impls()) return cls(parent_app, **merged)
def get_default_config(cls): d = super(IqrSearch, cls).get_default_config() # Remove parent_app slot for later explicit specification. del d['parent_app'] # fill in plugin configs d['data_set'] = plugin.make_config(get_data_set_impls()) d['descr_generator'] = \ plugin.make_config(get_descriptor_generator_impls()) d['nn_index'] = plugin.make_config(get_nn_index_impls()) ri_config = plugin.make_config(get_relevancy_index_impls()) if d['rel_index_config']: ri_config.update(d['rel_index_config']) d['rel_index_config'] = ri_config df_config = DescriptorElementFactory.get_default_config() if d['descriptor_factory']: df_config.update(d['descriptor_factory'].get_config()) d['descriptor_factory'] = df_config return d
def default_config(): return { "descriptor_generator": plugin.make_config(get_descriptor_generator_impls()), "descriptor_factory": DescriptorElementFactory.get_default_config(), "descriptor_index": plugin.make_config(get_descriptor_index_impls()), "optional_data_set": plugin.make_config(get_data_set_impls()) }
def default_config(): return { 'tool': { 'girder_api_root': 'http://localhost:8080/api/v1', 'api_key': None, 'api_query_batch': 1000, 'dataset_insert_batch_size': None, }, 'plugins': { 'data_set': plugin.make_config(get_data_set_impls()), } }
def get_default_config(cls): d = super(IqrSearch, cls).get_default_config() # Remove parent_app slot for later explicit specification. del d['parent_app'] d['iqr_service_url'] = None # fill in plugin configs d['data_set'] = plugin.make_config(get_data_set_impls()) return d
def get_default_config(cls): d = super(IqrSearch, cls).get_default_config() # Remove parent_app slot for later explicit specification. del d['parent_app'] d['iqr_service_url'] = None # fill in plugin configs d['data_set'] = plugin.make_config(get_data_set_impls()) return d
def default_config(): return { 'tool': { 'girder_api_root': 'http://localhost:8080/api/v1', 'api_key': None, 'api_query_batch': 1000, 'dataset_insert_batch_size': None, }, 'plugins': { 'data_set': plugin.make_config(get_data_set_impls()), } }
def main(): args = cli_parser().parse_args() config = bin_utils.utility_main_helper(default_config, args) log = logging.getLogger(__name__) api_root = config['tool']['girder_api_root'] api_key = config['tool']['api_key'] api_query_batch = config['tool']['api_query_batch'] insert_batch_size = config['tool']['dataset_insert_batch_size'] # Collect N folder/item/file references on CL and any files referenced. #: :type: list[str] ids_folder = args.folder #: :type: list[str] ids_item = args.item #: :type: list[str] ids_file = args.file if args.folder_list: with open(args.folder_list) as f: ids_folder.extend([fid.strip() for fid in f]) if args.item_list: with open(args.item_list) as f: ids_item.extend([iid.strip() for iid in f]) if args.file_list: with open(args.file_list) as f: ids_file.extend([fid.strip() for fid in f]) #: :type: smqtk.representation.DataSet data_set = plugin.from_plugin_config(config['plugins']['data_set'], get_data_set_impls()) batch = collections.deque() rps = [0]*7 for e in find_girder_files(api_root, ids_folder, ids_item, ids_file, api_key, api_query_batch): batch.append(e) if insert_batch_size and len(batch) >= insert_batch_size: data_set.add_data(*batch) batch.clear() bin_utils.report_progress(log.info, rps, 1.0) if batch: data_set.add_data(*batch) log.info('Done')
def main(): args = cli_parser().parse_args() config = bin_utils.utility_main_helper(default_config, args) log = logging.getLogger(__name__) api_root = config['tool']['girder_api_root'] api_key = config['tool']['api_key'] api_query_batch = config['tool']['api_query_batch'] insert_batch_size = config['tool']['dataset_insert_batch_size'] # Collect N folder/item/file references on CL and any files referenced. #: :type: list[str] ids_folder = args.folder #: :type: list[str] ids_item = args.item #: :type: list[str] ids_file = args.file if args.folder_list: with open(args.folder_list) as f: ids_folder.extend([fid.strip() for fid in f]) if args.item_list: with open(args.item_list) as f: ids_item.extend([iid.strip() for iid in f]) if args.file_list: with open(args.file_list) as f: ids_file.extend([fid.strip() for fid in f]) #: :type: smqtk.representation.DataSet data_set = plugin.from_plugin_config(config['plugins']['data_set'], get_data_set_impls()) batch = collections.deque() rps = [0] * 7 for e in find_girder_files(api_root, ids_folder, ids_item, ids_file, api_key, api_query_batch): batch.append(e) if insert_batch_size and len(batch) >= insert_batch_size: data_set.add_data(*batch) batch.clear() bin_utils.report_progress(log.info, rps, 1.0) if batch: data_set.add_data(*batch) log.info('Done')
def main(): parser = cli_parser() args = parser.parse_args() bin_utils.initialize_logging(logging.getLogger(), logging.INFO - (10 * args.verbose)) log = logging.getLogger("main") # Merge loaded config with default config_loaded = False config = default_config() if args.config: if osp.isfile(args.config): with open(args.config, 'r') as f: config.update(json.load(f)) config_loaded = True elif not osp.isfile(args.config): log.error("Configuration file path not valid.") exit(1) # output configuration dictionary when asked for. bin_utils.output_config(args.output_config, config, log, True) if not config_loaded: log.error("No configuration provided") exit(1) log.debug("Script arguments:\n%s" % args) def iter_input_elements(): for f in args.input_files: f = osp.expanduser(f) if osp.isfile(f): yield DataFileElement(f) else: log.debug("Expanding glob: %s" % f) for g in glob.glob(f): yield DataFileElement(g) log.info("Adding elements to data set") #: :type: smqtk.representation.DataSet ds = plugin.from_plugin_config(config['data_set'], get_data_set_impls()) ds.add_data(*iter_input_elements())
def main(): parser = cli_parser() args = parser.parse_args() bin_utils.initialize_logging(logging.getLogger(), logging.INFO - (10 * args.verbose)) log = logging.getLogger("main") # Merge loaded config with default config_loaded = False config = default_config() if args.config: if osp.isfile(args.config): with open(args.config, 'r') as f: config.update(json.load(f)) config_loaded = True elif not osp.isfile(args.config): log.error("Configuration file path not valid.") exit(1) # output configuration dictionary when asked for. bin_utils.output_config(args.output_config, config, log, True) if not config_loaded: log.error("No configuration provided") exit(1) log.debug("Script arguments:\n%s" % args) def iter_input_elements(): for f in args.input_files: f = osp.expanduser(f) if osp.isfile(f): yield DataFileElement(f) else: log.debug("Expanding glob: %s" % f) for g in glob.glob(f): yield DataFileElement(g) log.info("Adding elements to data set") #: :type: smqtk.representation.DataSet ds = plugin.from_plugin_config(config['data_set'], get_data_set_impls()) ds.add_data(*iter_input_elements())
def main(): parser = cli_parser() args = parser.parse_args() config = bin_utils.utility_main_helper(default_config, args) log = logging.getLogger(__name__) log.debug("Script arguments:\n%s" % args) def iter_input_elements(): for f in args.input_files: f = osp.expanduser(f) if osp.isfile(f): yield DataFileElement(f) else: log.debug("Expanding glob: %s" % f) for g in glob.glob(f): yield DataFileElement(g) log.info("Adding elements to data set") #: :type: smqtk.representation.DataSet ds = plugin.from_plugin_config(config['data_set'], get_data_set_impls()) ds.add_data(*iter_input_elements())
def main(): parser = cli_parser() args = parser.parse_args() config = bin_utils.utility_main_helper(default_config, args) log = logging.getLogger(__name__) log.debug("Script arguments:\n%s" % args) def iter_input_elements(): for f in args.input_files: f = osp.expanduser(f) if osp.isfile(f): yield DataFileElement(f) else: log.debug("Expanding glob: %s" % f) for g in glob.glob(f): yield DataFileElement(g) log.info("Adding elements to data set") #: :type: smqtk.representation.DataSet ds = plugin.from_plugin_config(config['data_set'], get_data_set_impls()) ds.add_data(*iter_input_elements())
def default_config(): return { "data_set": plugin.make_config(get_data_set_impls()) }
def main(): parser = cli_parser() args = parser.parse_args() # # Setup logging # if not logging.getLogger().handlers: if args.verbose: bin_utils.initialize_logging(logging.getLogger(), logging.DEBUG) else: bin_utils.initialize_logging(logging.getLogger(), logging.INFO) log = logging.getLogger("smqtk.scripts.iqr_app_model_generation") search_app_config = json.loads(jsmin.jsmin(open(args.config).read())) # # Input parameters # # The following dictionaries are JSON configurations that are used to # configure the various data structures and algorithms needed for the IQR demo # application. Values here can be changed to suit your specific data and # algorithm needs. # # See algorithm implementation doc-strings for more information on configuration # parameters (see implementation class ``__init__`` method). # # base actions on a specific IQR tab configuration (choose index here) if args.tab < 0 or args.tab > (len(search_app_config["iqr_tabs"]) - 1): log.error("Invalid tab number provided.") exit(1) search_app_iqr_config = search_app_config["iqr_tabs"][args.tab] # Configure DataSet implementation and parameters data_set_config = search_app_iqr_config['data_set'] # Configure DescriptorGenerator algorithm implementation, parameters and # persistant model component locations (if implementation has any). descriptor_generator_config = search_app_iqr_config['descr_generator'] # Configure NearestNeighborIndex algorithm implementation, parameters and # persistant model component locations (if implementation has any). nn_index_config = search_app_iqr_config['nn_index'] # Configure RelevancyIndex algorithm implementation, parameters and # persistant model component locations (if implementation has any). # # The LibSvmHikRelevancyIndex implementation doesn't actually build a persistant # model (or doesn't have to that is), but we're leaving this block here in # anticipation of other potential implementations in the future. # rel_index_config = search_app_iqr_config['rel_index_config'] # Configure DescriptorElementFactory instance, which defines what implementation # of DescriptorElement to use for storing generated descriptor vectors below. descriptor_elem_factory_config = search_app_iqr_config['descriptor_factory'] # # Initialize data/algorithms # # Constructing appropriate data structures and algorithms, needed for the IQR # demo application, in preparation for model training. # descriptor_elem_factory = \ representation.DescriptorElementFactory \ .from_config(descriptor_elem_factory_config) #: :type: representation.DataSet data_set = \ plugin.from_plugin_config(data_set_config, representation.get_data_set_impls()) #: :type: algorithms.DescriptorGenerator descriptor_generator = \ plugin.from_plugin_config(descriptor_generator_config, algorithms.get_descriptor_generator_impls()) #: :type: algorithms.NearestNeighborsIndex nn_index = \ plugin.from_plugin_config(nn_index_config, algorithms.get_nn_index_impls()) #: :type: algorithms.RelevancyIndex rel_index = \ plugin.from_plugin_config(rel_index_config, algorithms.get_relevancy_index_impls()) # # Build models # # Perform the actual building of the models. # # Add data files to DataSet DataFileElement = representation.get_data_element_impls()["DataFileElement"] for fp in args.input_files: fp = osp.expanduser(fp) if osp.isfile(fp): data_set.add_data(DataFileElement(fp)) else: log.debug("Expanding glob: %s" % fp) for g in glob.iglob(fp): data_set.add_data(DataFileElement(g)) # Generate a mode if the generator defines a known generation method. if hasattr(descriptor_generator, "generate_model"): descriptor_generator.generate_model(data_set) # Add other if-else cases for other known implementation-specific generation # methods stubs # Generate descriptors of data for building NN index. data2descriptor = descriptor_generator.compute_descriptor_async( data_set, descriptor_elem_factory ) try: nn_index.build_index(data2descriptor.itervalues()) except RuntimeError: # Already built model, so skipping this step pass rel_index.build_index(data2descriptor.itervalues())
def default_config(): return {"data_set": plugin.make_config(get_data_set_impls())}
def run_file_list(c, filelist_filepath, checkpoint_filepath, batch_size=None, check_image=False): """ Top level function handling configuration and inputs/outputs. :param c: Configuration dictionary (JSON) :type c: dict :param filelist_filepath: Path to a text file that lists paths to data files, separated by new lines. :type filelist_filepath: str :param checkpoint_filepath: Output file to which we write input filepath to SHA1 (UUID) relationships. :type checkpoint_filepath: :param batch_size: Optional batch size (None default) of data elements to process / descriptors to compute at a time. This causes files and stores to be written to incrementally during processing instead of one single batch transaction at a time. :type batch_size: :param check_image: Enable checking image loading from file before queueing that file for processing. If the check fails, the file is skipped instead of a halting exception being raised. :type check_image: bool """ log = logging.getLogger(__name__) file_paths = [l.strip() for l in open(filelist_filepath)] log.info("Making descriptor factory") factory = DescriptorElementFactory.from_config(c['descriptor_factory']) log.info("Making descriptor index") #: :type: smqtk.representation.DescriptorIndex descriptor_index = plugin.from_plugin_config(c['descriptor_index'], get_descriptor_index_impls()) data_set = None if c['optional_data_set']['type'] is None: log.info("Not saving loaded data elements to data set") else: log.info("Initializing data set to append to") #: :type: smqtk.representation.DataSet data_set = plugin.from_plugin_config(c['optional_data_set'], get_data_set_impls()) log.info("Making descriptor generator '%s'", c['descriptor_generator']['type']) #: :type: smqtk.algorithms.DescriptorGenerator generator = plugin.from_plugin_config(c['descriptor_generator'], get_descriptor_generator_impls()) def iter_valid_elements(): def is_valid(file_path): dfe = DataFileElement(file_path) if is_valid_element( dfe, valid_content_types=generator.valid_content_types(), check_image=check_image): return dfe else: return False data_elements = collections.deque() valid_files_filter = parallel.parallel_map(is_valid, file_paths, name="check-file-type", use_multiprocessing=True) for dfe in valid_files_filter: if dfe: yield dfe if data_set is not None: data_elements.append(dfe) if batch_size and len(data_elements) == batch_size: log.debug( "Adding data element batch to set (size: %d)", len(data_elements)) data_set.add_data(*data_elements) data_elements.clear() # elements only collected if we have a data-set configured, so add any # still in the deque to the set if data_elements: log.debug("Adding data elements to set (size: %d", len(data_elements)) data_set.add_data(*data_elements) log.info("Computing descriptors") m = compute_many_descriptors( iter_valid_elements(), generator, factory, descriptor_index, batch_size=batch_size, ) # Recording computed file paths and associated file UUIDs (SHA1) cf = open(checkpoint_filepath, 'w') cf_writer = csv.writer(cf) try: rps = [0] * 7 for fp, descr in m: cf_writer.writerow([fp, descr.uuid()]) report_progress(log.debug, rps, 1.) finally: del cf_writer cf.close() log.info("Done")
def main(): args = cli_parser().parse_args() ui_config_filepath, iqr_config_filepath = args.config llevel = logging.DEBUG if args.verbose else logging.INFO tab = args.tab input_files_globs = args.input_files # Not using `bin_utils.utility_main_helper`` due to deviating from single- # config-with-default usage. bin_utils.initialize_logging(logging.getLogger('smqtk'), llevel) bin_utils.initialize_logging(logging.getLogger('__main__'), llevel) log = logging.getLogger(__name__) log.info("Loading UI config: '{}'".format(ui_config_filepath)) ui_config, ui_config_loaded = bin_utils.load_config(ui_config_filepath) log.info("Loading IQR config: '{}'".format(iqr_config_filepath)) iqr_config, iqr_config_loaded = bin_utils.load_config(iqr_config_filepath) if not (ui_config_loaded and iqr_config_loaded): raise RuntimeError("One or both configuration files failed to load.") # Ensure the given "tab" exists in UI configuration. if tab is None: log.error("No configuration tab provided to drive model generation.") exit(1) if tab not in ui_config["iqr_tabs"]: log.error("Invalid tab provided: '{}'. Available tags: {}" .format(tab, list(ui_config["iqr_tabs"]))) exit(1) # # Gather Configurations # log.info("Extracting plugin configurations") ui_tab_config = ui_config["iqr_tabs"][tab] iqr_plugins_config = iqr_config['iqr_service']['plugins'] # Configure DataSet implementation and parameters data_set_config = ui_tab_config['data_set'] # Configure DescriptorElementFactory instance, which defines what # implementation of DescriptorElement to use for storing generated # descriptor vectors below. descriptor_elem_factory_config = iqr_plugins_config['descriptor_factory'] # Configure DescriptorGenerator algorithm implementation, parameters and # persistent model component locations (if implementation has any). descriptor_generator_config = iqr_plugins_config['descriptor_generator'] # Configure NearestNeighborIndex algorithm implementation, parameters and # persistent model component locations (if implementation has any). nn_index_config = iqr_plugins_config['neighbor_index'] # # Initialize data/algorithms # # Constructing appropriate data structures and algorithms, needed for the # IQR demo application, in preparation for model training. # log.info("Instantiating plugins") #: :type: representation.DataSet data_set = \ plugin.from_plugin_config(data_set_config, representation.get_data_set_impls()) descriptor_elem_factory = \ representation.DescriptorElementFactory \ .from_config(descriptor_elem_factory_config) #: :type: algorithms.DescriptorGenerator descriptor_generator = \ plugin.from_plugin_config(descriptor_generator_config, algorithms.get_descriptor_generator_impls()) #: :type: algorithms.NearestNeighborsIndex nn_index = \ plugin.from_plugin_config(nn_index_config, algorithms.get_nn_index_impls()) # # Build models # log.info("Adding files to dataset '{}'".format(data_set)) for g in input_files_globs: g = osp.expanduser(g) if osp.isfile(g): data_set.add_data(DataFileElement(g, readonly=True)) else: log.debug("Expanding glob: %s" % g) for fp in glob.iglob(g): data_set.add_data(DataFileElement(fp, readonly=True)) # Generate a model if the generator defines a known generation method. try: log.debug("descriptor generator as model to generate?") descriptor_generator.generate_model(data_set) except AttributeError as ex: log.debug("descriptor generator as model to generate - Nope: {}" .format(str(ex))) # Generate descriptors of data for building NN index. log.info("Computing descriptors for data set with {}" .format(descriptor_generator)) data2descriptor = descriptor_generator.compute_descriptor_async( data_set, descriptor_elem_factory ) # Possible additional support steps before building NNIndex try: # Fit the LSH index functor log.debug("Has LSH Functor to fit?") nn_index.lsh_functor.fit(six.itervalues(data2descriptor)) except AttributeError as ex: log.debug("Has LSH Functor to fit - Nope: {}".format(str(ex))) log.info("Building nearest neighbors index {}".format(nn_index)) nn_index.build_index(six.itervalues(data2descriptor))
def main(): parser = cli_parser() args = parser.parse_args() # # Setup logging # if not logging.getLogger().handlers: if args.verbose: bin_utils.initialize_logging(logging.getLogger(), logging.DEBUG) else: bin_utils.initialize_logging(logging.getLogger(), logging.INFO) log = logging.getLogger("smqtk.scripts.iqr_app_model_generation") search_app_config = json.loads(jsmin.jsmin(open(args.config).read())) # # Input parameters # # The following dictionaries are JSON configurations that are used to # configure the various data structures and algorithms needed for the IQR demo # application. Values here can be changed to suit your specific data and # algorithm needs. # # See algorithm implementation doc-strings for more information on configuration # parameters (see implementation class ``__init__`` method). # # base actions on a specific IQR tab configuration (choose index here) if args.tab < 0 or args.tab > (len(search_app_config["iqr_tabs"]) - 1): log.error("Invalid tab number provided.") exit(1) search_app_iqr_config = search_app_config["iqr_tabs"][args.tab] # Configure DataSet implementation and parameters data_set_config = search_app_iqr_config['data_set'] # Configure DescriptorGenerator algorithm implementation, parameters and # persistant model component locations (if implementation has any). descriptor_generator_config = search_app_iqr_config['descr_generator'] # Configure NearestNeighborIndex algorithm implementation, parameters and # persistant model component locations (if implementation has any). nn_index_config = search_app_iqr_config['nn_index'] # Configure RelevancyIndex algorithm implementation, parameters and # persistant model component locations (if implementation has any). # # The LibSvmHikRelevancyIndex implementation doesn't actually build a persistant # model (or doesn't have to that is), but we're leaving this block here in # anticipation of other potential implementations in the future. # rel_index_config = search_app_iqr_config['rel_index_config'] # Configure DescriptorElementFactory instance, which defines what implementation # of DescriptorElement to use for storing generated descriptor vectors below. descriptor_elem_factory_config = search_app_iqr_config[ 'descriptor_factory'] # # Initialize data/algorithms # # Constructing appropriate data structures and algorithms, needed for the IQR # demo application, in preparation for model training. # descriptor_elem_factory = \ representation.DescriptorElementFactory \ .from_config(descriptor_elem_factory_config) #: :type: representation.DataSet data_set = \ plugin.from_plugin_config(data_set_config, representation.get_data_set_impls()) #: :type: algorithms.DescriptorGenerator descriptor_generator = \ plugin.from_plugin_config(descriptor_generator_config, algorithms.get_descriptor_generator_impls()) #: :type: algorithms.NearestNeighborsIndex nn_index = \ plugin.from_plugin_config(nn_index_config, algorithms.get_nn_index_impls()) #: :type: algorithms.RelevancyIndex rel_index = \ plugin.from_plugin_config(rel_index_config, algorithms.get_relevancy_index_impls()) # # Build models # # Perform the actual building of the models. # # Add data files to DataSet DataFileElement = representation.get_data_element_impls( )["DataFileElement"] for fp in args.input_files: fp = osp.expanduser(fp) if osp.isfile(fp): data_set.add_data(DataFileElement(fp)) else: log.debug("Expanding glob: %s" % fp) for g in glob.iglob(fp): data_set.add_data(DataFileElement(g)) # Generate a mode if the generator defines a known generation method. if hasattr(descriptor_generator, "generate_model"): descriptor_generator.generate_model(data_set) # Add other if-else cases for other known implementation-specific generation # methods stubs # Generate descriptors of data for building NN index. data2descriptor = descriptor_generator.compute_descriptor_async( data_set, descriptor_elem_factory) try: nn_index.build_index(six.itervalues(data2descriptor)) except RuntimeError: # Already built model, so skipping this step pass rel_index.build_index(six.itervalues(data2descriptor))