def from_config(cls, config_dict, merge_default=True): """ Instantiate a new instance of this class given the configuration JSON-compliant dictionary encapsulating initialization arguments. This method should not be called via super unless and instance of the class is desired. :param config_dict: JSON compliant dictionary encapsulating a configuration. :type config_dict: dict :param merge_default: Merge the given configuration on top of the default provided by ``get_default_config``. :type merge_default: bool :return: Constructed instance from the provided config. :rtype: LSHNearestNeighborIndex """ if merge_default: cfg = cls.get_default_config() merge_dict(cfg, config_dict) else: cfg = config_dict cfg['descriptor_set'] = from_config_dict(cfg['descriptor_set'], DescriptorSet.get_impls()) cfg['uid2idx_kvs'] = from_config_dict(cfg['uid2idx_kvs'], KeyValueStore.get_impls()) cfg['idx2uid_kvs'] = from_config_dict(cfg['idx2uid_kvs'], KeyValueStore.get_impls()) if (cfg['index_element'] and cfg['index_element']['type']): index_element = from_config_dict(cfg['index_element'], DataElement.get_impls()) cfg['index_element'] = index_element else: cfg['index_element'] = None if (cfg['index_param_element'] and cfg['index_param_element']['type']): index_param_element = from_config_dict(cfg['index_param_element'], DataElement.get_impls()) cfg['index_param_element'] = index_param_element else: cfg['index_param_element'] = None return super(FaissNearestNeighborsIndex, cls).from_config(cfg, False)
def from_config(cls, config_dict, merge_default=True): """ Instantiate a new instance of this class given the configuration JSON-compliant dictionary encapsulating initialization arguments. :param config_dict: JSON compliant dictionary encapsulating a configuration. :type config_dict: dict :param merge_default: Merge the given configuration on top of the default provided by ``get_default_config``. :type merge_default: bool :return: Constructed instance from the provided config. :rtype: KVSDataSet """ if merge_default: config_dict = merge_dict(cls.get_default_config(), config_dict) # Convert KVStore config to instance for constructor. kvs_inst = from_config_dict(config_dict['kvstore'], KeyValueStore.get_impls()) config_dict['kvstore'] = kvs_inst return super(KVSDataSet, cls).from_config(config_dict, False)
def get_default_config(cls): """ Generate and return a default configuration dictionary for this class. This will be primarily used for generating what the configuration dictionary would look like for this class without instantiating it. By default, we observe what this class's constructor takes as arguments, turning those argument names into configuration dictionary keys. If any of those arguments have defaults, we will add those values into the configuration dictionary appropriately. The dictionary returned should only contain JSON compliant value types. It is not be guaranteed that the configuration dictionary returned from this method is valid for construction of an instance of this class. :return: Default configuration dictionary for the class. :rtype: dict """ default = super(LSHNearestNeighborIndex, cls).get_default_config() lf_default = make_default_config(LshFunctor.get_impls()) default['lsh_functor'] = lf_default di_default = make_default_config(DescriptorSet.get_impls()) default['descriptor_set'] = di_default hi_default = make_default_config(HashIndex.get_impls()) default['hash_index'] = hi_default h2u_default = make_default_config(KeyValueStore.get_impls()) default['hash2uuids_kvstore'] = h2u_default return default
def get_default_config(cls): """ Generate and return a default configuration dictionary for this class. This will be primarily used for generating what the configuration dictionary would look like for this class without instantiating it. By default, we observe what this class's constructor takes as arguments, turning those argument names into configuration dictionary keys. If any of those arguments have defaults, we will add those values into the configuration dictionary appropriately. The dictionary returned should only contain JSON compliant value types. It is not be guaranteed that the configuration dictionary returned from this method is valid for construction of an instance of this class. :return: Default configuration dictionary for the class. :rtype: dict """ default = super(FaissNearestNeighborsIndex, cls).get_default_config() data_element_default_config = \ make_default_config(DataElement.get_impls()) default['index_element'] = data_element_default_config default['index_param_element'] = deepcopy(data_element_default_config) di_default = make_default_config(DescriptorSet.get_impls()) default['descriptor_set'] = di_default kvs_default = make_default_config(KeyValueStore.get_impls()) default['idx2uid_kvs'] = kvs_default default['uid2idx_kvs'] = deepcopy(kvs_default) return default
def from_config(cls, config_dict, merge_default=True): """ Instantiate a new instance of this class given the configuration JSON-compliant dictionary encapsulating initialization arguments. This method should not be called via super unless and instance of the class is desired. :param config_dict: JSON compliant dictionary encapsulating a configuration. :type config_dict: dict :param merge_default: Merge the given configuration on top of the default provided by ``get_default_config``. :type merge_default: bool :return: Constructed instance from the provided config. :rtype: LSHNearestNeighborIndex """ # Controlling merge here so we can control known comment stripping from # default config. if merge_default: merged = cls.get_default_config() merge_dict(merged, config_dict) else: merged = config_dict merged['lsh_functor'] = \ from_config_dict(merged['lsh_functor'], LshFunctor.get_impls()) merged['descriptor_set'] = \ from_config_dict(merged['descriptor_set'], DescriptorSet.get_impls()) # Hash index may be None for a default at-query-time linear indexing if merged['hash_index'] and merged['hash_index']['type']: merged['hash_index'] = \ from_config_dict(merged['hash_index'], HashIndex.get_impls()) else: cls.get_logger().debug("No HashIndex impl given. Passing " "``None``.") merged['hash_index'] = None # remove possible comment added by default generator if 'hash_index_comment' in merged: del merged['hash_index_comment'] merged['hash2uuids_kvstore'] = \ from_config_dict(merged['hash2uuids_kvstore'], KeyValueStore.get_impls()) return super(LSHNearestNeighborIndex, cls).from_config(merged, False)
def default_config(): return { "utility": { "report_interval": 1.0, "use_multiprocessing": False, }, "plugins": { "descriptor_set": make_default_config(DescriptorSet.get_impls()), "lsh_functor": make_default_config(LshFunctor.get_impls()), "hash2uuid_kvstore": make_default_config(KeyValueStore.get_impls()), }, }
def get_default_config(cls): """ Generate and return a default configuration dictionary for this class. It is not be guaranteed that the configuration dictionary returned from this method is valid for construction of an instance of this class. :return: Default configuration dictionary for the class. :rtype: dict """ c = super(KVSDataSet, cls).get_default_config() c['kvstore'] = merge_dict( make_default_config(KeyValueStore.get_impls()), to_config_dict(c['kvstore'])) return c
def main(): args = cli_parser().parse_args() config = cli.utility_main_helper(default_config, args) log = logging.getLogger(__name__) # # Load configuration contents # uuid_list_filepath = args.uuids_list report_interval = config['utility']['report_interval'] use_multiprocessing = config['utility']['use_multiprocessing'] # # Checking input parameters # if (uuid_list_filepath is not None) and \ not os.path.isfile(uuid_list_filepath): raise ValueError("UUIDs list file does not exist!") # # Loading stuff # log.info("Loading descriptor index") #: :type: smqtk.representation.DescriptorIndex descriptor_index = from_config_dict(config['plugins']['descriptor_index'], DescriptorIndex.get_impls()) log.info("Loading LSH functor") #: :type: smqtk.algorithms.LshFunctor lsh_functor = from_config_dict(config['plugins']['lsh_functor'], LshFunctor.get_impls()) log.info("Loading Key/Value store") #: :type: smqtk.representation.KeyValueStore hash2uuids_kvstore = from_config_dict( config['plugins']['hash2uuid_kvstore'], KeyValueStore.get_impls()) # Iterate either over what's in the file given, or everything in the # configured index. def iter_uuids(): if uuid_list_filepath: log.info("Using UUIDs list file") with open(uuid_list_filepath) as f: for l in f: yield l.strip() else: log.info("Using all UUIDs resent in descriptor index") for k in descriptor_index.keys(): yield k # # Compute codes # log.info("Starting hash code computation") kv_update = {} for uuid, hash_int in \ compute_hash_codes(uuids_for_processing(iter_uuids(), hash2uuids_kvstore), descriptor_index, lsh_functor, report_interval, use_multiprocessing, True): # Get original value in KV-store if not in update dict. if hash_int not in kv_update: kv_update[hash_int] = hash2uuids_kvstore.get(hash_int, set()) kv_update[hash_int] |= {uuid} if kv_update: log.info("Updating KV store... (%d keys)" % len(kv_update)) hash2uuids_kvstore.add_many(kv_update) log.info("Done")