Ejemplo n.º 1
0
    def from_config(cls, config_dict, merge_default=True):
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        This method should not be called via super unless and instance of the
        class is desired.

        :param config_dict: JSON compliant dictionary encapsulating
            a configuration.
        :type config_dict: dict

        :param merge_default: Merge the given configuration on top of the
            default provided by ``get_default_config``.
        :type merge_default: bool

        :return: Constructed instance from the provided config.
        :rtype: LSHNearestNeighborIndex

        """
        if merge_default:
            cfg = cls.get_default_config()
            merge_dict(cfg, config_dict)
        else:
            cfg = config_dict

        cfg['descriptor_set'] = plugin.from_plugin_config(
            cfg['descriptor_set'], get_descriptor_index_impls()
        )
        cfg['uid2idx_kvs'] = plugin.from_plugin_config(
            cfg['uid2idx_kvs'], get_key_value_store_impls()
        )
        cfg['idx2uid_kvs'] = plugin.from_plugin_config(
            cfg['idx2uid_kvs'], get_key_value_store_impls()
        )

        if (cfg['index_element'] and
                cfg['index_element']['type']):
            index_element = plugin.from_plugin_config(
                cfg['index_element'], get_data_element_impls())
            cfg['index_element'] = index_element
        else:
            cfg['index_element'] = None

        if (cfg['index_param_element'] and
                cfg['index_param_element']['type']):
            index_param_element = plugin.from_plugin_config(
                cfg['index_param_element'], get_data_element_impls())
            cfg['index_param_element'] = index_param_element
        else:
            cfg['index_param_element'] = None

        return super(FaissNearestNeighborsIndex, cls).from_config(cfg, False)
Ejemplo n.º 2
0
    def from_config(cls, config_dict, merge_default=True):
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        :param config_dict: JSON compliant dictionary encapsulating
            a configuration.
        :type config_dict: dict

        :param merge_default: Merge the given configuration on top of the
            default provided by ``get_default_config``.
        :type merge_default: bool

        :return: Constructed instance from the provided config.
        :rtype: KVSDataSet
        """
        if merge_default:
            config_dict = merge_dict(cls.get_default_config(), config_dict)

        # Convert KVStore config to instance for constructor.
        kvs_inst = plugin.from_plugin_config(config_dict['kvstore'],
                                             get_key_value_store_impls())
        config_dict['kvstore'] = kvs_inst

        return super(KVSDataSet, cls).from_config(config_dict, False)
Ejemplo n.º 3
0
    def from_config(cls, config_dict, merge_default=True):
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        :param config_dict: JSON compliant dictionary encapsulating
            a configuration.
        :type config_dict: dict

        :param merge_default: Merge the given configuration on top of the
            default provided by ``get_default_config``.
        :type merge_default: bool

        :return: Constructed instance from the provided config.
        :rtype: KVSDataSet
        """
        if merge_default:
            config_dict = merge_dict(cls.get_default_config(), config_dict)

        # Convert KVStore config to instance for constructor.
        kvs_inst = plugin.from_plugin_config(config_dict['kvstore'],
                                             get_key_value_store_impls())
        config_dict['kvstore'] = kvs_inst

        return super(KVSDataSet, cls).from_config(config_dict, False)
Ejemplo n.º 4
0
    def get_default_config(cls):
        """
        Generate and return a default configuration dictionary for this class.
        This will be primarily used for generating what the configuration
        dictionary would look like for this class without instantiating it.

        By default, we observe what this class's constructor takes as
        arguments, turning those argument names into configuration dictionary
        keys. If any of those arguments have defaults, we will add those
        values into the configuration dictionary appropriately. The dictionary
        returned should only contain JSON compliant value types.

        It is not be guaranteed that the configuration dictionary returned
        from this method is valid for construction of an instance of this
        class.

        :return: Default configuration dictionary for the class.
        :rtype: dict

        """
        default = super(FaissNearestNeighborsIndex, cls).get_default_config()

        data_element_default_config = plugin.make_config(
            get_data_element_impls())
        default['index_element'] = data_element_default_config
        default['index_param_element'] = deepcopy(data_element_default_config)

        di_default = plugin.make_config(get_descriptor_index_impls())
        default['descriptor_set'] = di_default

        kvs_default = plugin.make_config(get_key_value_store_impls())
        default['idx2uid_kvs'] = kvs_default
        default['uid2idx_kvs'] = deepcopy(kvs_default)

        return default
Ejemplo n.º 5
0
    def from_config(cls, config_dict, merge_default=True):
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        This method should not be called via super unless and instance of the
        class is desired.

        :param config_dict: JSON compliant dictionary encapsulating
            a configuration.
        :type config_dict: dict

        :param merge_default: Merge the given configuration on top of the
            default provided by ``get_default_config``.
        :type merge_default: bool

        :return: Constructed instance from the provided config.
        :rtype: LSHNearestNeighborIndex

        """
        # Controlling merge here so we can control known comment stripping from
        # default config.
        if merge_default:
            merged = cls.get_default_config()
            merge_dict(merged, config_dict)
        else:
            merged = config_dict

        merged['lsh_functor'] = \
            plugin.from_plugin_config(merged['lsh_functor'],
                                      get_lsh_functor_impls())
        merged['descriptor_index'] = \
            plugin.from_plugin_config(merged['descriptor_index'],
                                      get_descriptor_index_impls())

        # Hash index may be None for a default at-query-time linear indexing
        if merged['hash_index'] and merged['hash_index']['type']:
            merged['hash_index'] = \
                plugin.from_plugin_config(merged['hash_index'],
                                          get_hash_index_impls())
        else:
            cls.get_logger().debug(
                "No HashIndex impl given. Passing ``None``.")
            merged['hash_index'] = None

        # remove possible comment added by default generator
        if 'hash_index_comment' in merged:
            del merged['hash_index_comment']

        merged['hash2uuids_kvstore'] = \
            plugin.from_plugin_config(merged['hash2uuids_kvstore'],
                                      get_key_value_store_impls())

        return super(LSHNearestNeighborIndex, cls).from_config(merged, False)
Ejemplo n.º 6
0
    def from_config(cls, config_dict, merge_default=True):
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        This method should not be called via super unless and instance of the
        class is desired.

        :param config_dict: JSON compliant dictionary encapsulating
            a configuration.
        :type config_dict: dict

        :param merge_default: Merge the given configuration on top of the
            default provided by ``get_default_config``.
        :type merge_default: bool

        :return: Constructed instance from the provided config.
        :rtype: LSHNearestNeighborIndex

        """
        # Controlling merge here so we can control known comment stripping from
        # default config.
        if merge_default:
            merged = cls.get_default_config()
            merge_dict(merged, config_dict)
        else:
            merged = config_dict

        merged['lsh_functor'] = \
            plugin.from_plugin_config(merged['lsh_functor'],
                                      get_lsh_functor_impls())
        merged['descriptor_index'] = \
            plugin.from_plugin_config(merged['descriptor_index'],
                                      get_descriptor_index_impls())

        # Hash index may be None for a default at-query-time linear indexing
        if merged['hash_index'] and merged['hash_index']['type']:
            merged['hash_index'] = \
                plugin.from_plugin_config(merged['hash_index'],
                                          get_hash_index_impls())
        else:
            cls.get_logger().debug("No HashIndex impl given. Passing ``None``.")
            merged['hash_index'] = None

        # remove possible comment added by default generator
        if 'hash_index_comment' in merged:
            del merged['hash_index_comment']

        merged['hash2uuids_kvstore'] = \
            plugin.from_plugin_config(merged['hash2uuids_kvstore'],
                                      get_key_value_store_impls())

        return super(LSHNearestNeighborIndex, cls).from_config(merged, False)
Ejemplo n.º 7
0
def default_config():
    return {
        "utility": {
            "report_interval": 1.0,
            "use_multiprocessing": False,
        },
        "plugins": {
            "descriptor_index":
            plugin.make_config(get_descriptor_index_impls()),
            "lsh_functor": plugin.make_config(get_lsh_functor_impls()),
            "hash2uuid_kvstore":
            plugin.make_config(get_key_value_store_impls()),
        },
    }
Ejemplo n.º 8
0
def default_config():
    return {
        "utility": {
            "report_interval": 1.0,
            "use_multiprocessing": False,
        },
        "plugins": {
            "descriptor_index":
                plugin.make_config(get_descriptor_index_impls()),
            "lsh_functor": plugin.make_config(get_lsh_functor_impls()),
            "hash2uuid_kvstore":
                plugin.make_config(get_key_value_store_impls()),
        },
    }
Ejemplo n.º 9
0
    def get_default_config(cls):
        """
        Generate and return a default configuration dictionary for this class.

        It is not be guaranteed that the configuration dictionary returned
        from this method is valid for construction of an instance of this class.

        :return: Default configuration dictionary for the class.
        :rtype: dict
        """
        c = super(KVSDataSet, cls).get_default_config()
        c['kvstore'] = merge_dict(
            plugin.make_config(get_key_value_store_impls()),
            plugin.to_plugin_config(c['kvstore']))
        return c
Ejemplo n.º 10
0
    def get_default_config(cls):
        """
        Generate and return a default configuration dictionary for this class.

        It is not be guaranteed that the configuration dictionary returned
        from this method is valid for construction of an instance of this class.

        :return: Default configuration dictionary for the class.
        :rtype: dict
        """
        c = super(KVSDataSet, cls).get_default_config()
        c['kvstore'] = merge_dict(
            plugin.make_config(get_key_value_store_impls()),
            plugin.to_plugin_config(c['kvstore'])
        )
        return c
Ejemplo n.º 11
0
    def get_default_config(cls):
        """
        Generate and return a default configuration dictionary for this class.
        This will be primarily used for generating what the configuration
        dictionary would look like for this class without instantiating it.

        By default, we observe what this class's constructor takes as
        arguments, turning those argument names into configuration dictionary
        keys. If any of those arguments have defaults, we will add those values
        into the configuration dictionary appropriately. The dictionary
        returned should only contain JSON compliant value types.

        It is not be guaranteed that the configuration dictionary returned
        from this method is valid for construction of an instance of this
        class.

        :return: Default configuration dictionary for the class.
        :rtype: dict

        """
        default = super(LSHNearestNeighborIndex, cls).get_default_config()

        lf_default = plugin.make_config(get_lsh_functor_impls())
        default['lsh_functor'] = lf_default

        di_default = plugin.make_config(get_descriptor_index_impls())
        default['descriptor_index'] = di_default

        hi_default = plugin.make_config(get_hash_index_impls())
        default['hash_index'] = hi_default
        default['hash_index_comment'] = "'hash_index' may also be null to " \
                                        "default to a linear index built at " \
                                        "query time."

        h2u_default = plugin.make_config(get_key_value_store_impls())
        default['hash2uuids_kvstore'] = h2u_default

        return default
Ejemplo n.º 12
0
    def get_default_config(cls):
        """
        Generate and return a default configuration dictionary for this class.
        This will be primarily used for generating what the configuration
        dictionary would look like for this class without instantiating it.

        By default, we observe what this class's constructor takes as
        arguments, turning those argument names into configuration dictionary
        keys. If any of those arguments have defaults, we will add those values
        into the configuration dictionary appropriately. The dictionary
        returned should only contain JSON compliant value types.

        It is not be guaranteed that the configuration dictionary returned
        from this method is valid for construction of an instance of this
        class.

        :return: Default configuration dictionary for the class.
        :rtype: dict

        """
        default = super(LSHNearestNeighborIndex, cls).get_default_config()

        lf_default = plugin.make_config(get_lsh_functor_impls())
        default['lsh_functor'] = lf_default

        di_default = plugin.make_config(get_descriptor_index_impls())
        default['descriptor_index'] = di_default

        hi_default = plugin.make_config(get_hash_index_impls())
        default['hash_index'] = hi_default
        default['hash_index_comment'] = "'hash_index' may also be null to " \
                                        "default to a linear index built at " \
                                        "query time."

        h2u_default = plugin.make_config(get_key_value_store_impls())
        default['hash2uuids_kvstore'] = h2u_default

        return default
Ejemplo n.º 13
0
def main():
    args = cli_parser().parse_args()
    config = bin_utils.utility_main_helper(default_config, args)
    log = logging.getLogger(__name__)

    #
    # Load configuration contents
    #
    uuid_list_filepath = args.uuids_list
    report_interval = config['utility']['report_interval']
    use_multiprocessing = config['utility']['use_multiprocessing']

    #
    # Checking input parameters
    #
    if (uuid_list_filepath is not None) and \
            not os.path.isfile(uuid_list_filepath):
        raise ValueError("UUIDs list file does not exist!")

    #
    # Loading stuff
    #
    log.info("Loading descriptor index")
    #: :type: smqtk.representation.DescriptorIndex
    descriptor_index = plugin.from_plugin_config(
        config['plugins']['descriptor_index'], get_descriptor_index_impls())
    log.info("Loading LSH functor")
    #: :type: smqtk.algorithms.LshFunctor
    lsh_functor = plugin.from_plugin_config(config['plugins']['lsh_functor'],
                                            get_lsh_functor_impls())
    log.info("Loading Key/Value store")
    #: :type: smqtk.representation.KeyValueStore
    hash2uuids_kvstore = plugin.from_plugin_config(
        config['plugins']['hash2uuid_kvstore'], get_key_value_store_impls())

    # Iterate either over what's in the file given, or everything in the
    # configured index.
    def iter_uuids():
        if uuid_list_filepath:
            log.info("Using UUIDs list file")
            with open(uuid_list_filepath) as f:
                for l in f:
                    yield l.strip()
        else:
            log.info("Using all UUIDs resent in descriptor index")
            for k in descriptor_index.keys():
                yield k

    #
    # Compute codes
    #
    log.info("Starting hash code computation")
    kv_update = {}
    for uuid, hash_int in \
            compute_hash_codes(uuids_for_processing(iter_uuids(),
                                                    hash2uuids_kvstore),
                               descriptor_index, lsh_functor,
                               report_interval,
                               use_multiprocessing, True):
        # Get original value in KV-store if not in update dict.
        if hash_int not in kv_update:
            kv_update[hash_int] = hash2uuids_kvstore.get(hash_int, set())
        kv_update[hash_int] |= {uuid}

    if kv_update:
        log.info("Updating KV store... (%d keys)" % len(kv_update))
        hash2uuids_kvstore.add_many(kv_update)

    log.info("Done")
Ejemplo n.º 14
0
def main():
    args = cli_parser().parse_args()
    config = bin_utils.utility_main_helper(default_config, args)
    log = logging.getLogger(__name__)

    #
    # Load configuration contents
    #
    uuid_list_filepath = args.uuids_list
    report_interval = config['utility']['report_interval']
    use_multiprocessing = config['utility']['use_multiprocessing']

    #
    # Checking input parameters
    #
    if (uuid_list_filepath is not None) and \
            not os.path.isfile(uuid_list_filepath):
        raise ValueError("UUIDs list file does not exist!")

    #
    # Loading stuff
    #
    log.info("Loading descriptor index")
    #: :type: smqtk.representation.DescriptorIndex
    descriptor_index = plugin.from_plugin_config(
        config['plugins']['descriptor_index'],
        get_descriptor_index_impls()
    )
    log.info("Loading LSH functor")
    #: :type: smqtk.algorithms.LshFunctor
    lsh_functor = plugin.from_plugin_config(
        config['plugins']['lsh_functor'],
        get_lsh_functor_impls()
    )
    log.info("Loading Key/Value store")
    #: :type: smqtk.representation.KeyValueStore
    hash2uuids_kvstore = plugin.from_plugin_config(
        config['plugins']['hash2uuid_kvstore'],
        get_key_value_store_impls()
    )

    # Iterate either over what's in the file given, or everything in the
    # configured index.
    def iter_uuids():
        if uuid_list_filepath:
            log.info("Using UUIDs list file")
            with open(uuid_list_filepath) as f:
                for l in f:
                    yield l.strip()
        else:
            log.info("Using all UUIDs resent in descriptor index")
            for k in descriptor_index.keys():
                yield k

    #
    # Compute codes
    #
    log.info("Starting hash code computation")
    kv_update = {}
    for uuid, hash_int in \
            compute_hash_codes(uuids_for_processing(iter_uuids(),
                                                    hash2uuids_kvstore),
                               descriptor_index, lsh_functor,
                               report_interval,
                               use_multiprocessing, True):
        # Get original value in KV-store if not in update dict.
        if hash_int not in kv_update:
            kv_update[hash_int] = hash2uuids_kvstore.get(hash_int, set())
        kv_update[hash_int] |= {uuid}

    if kv_update:
        log.info("Updating KV store... (%d keys)" % len(kv_update))
        hash2uuids_kvstore.add_many(kv_update)

    log.info("Done")