예제 #1
0
def cli_config(output_filepath, input_config, overwrite):
    """
    Generate a default or template JSON configuration file for this tool.
    """
    if input_config is not None:
        c_dict, success = load_config(input_config, build_default_config())
        if not success:
            raise RuntimeError("Did not load input configuration file '{}' "
                               "successfully.")
    else:
        c_dict = build_default_config()
    output_config(output_filepath, c_dict, overwrite=overwrite)
예제 #2
0
def cli_build(config_filepath):
    """
    Build a new nearest-neighbors index from the configured descriptor set's
    contents.
    """
    config_dict, success = load_config(config_filepath,
                                       defaults=build_default_config())
    # Defaults are insufficient so we assert that the configuration file was
    # (successfully) loaded.
    if not success:
        raise RuntimeError("Failed to load configuration file.")

    descr_set = from_config_dict(config_dict['descriptor_set'],
                                 DescriptorSet.get_impls())

    nn_index = from_config_dict(config_dict['neighbor_index'],
                                NearestNeighborsIndex.get_impls())

    # TODO: reduced amount used for building ("training") and remainder used
    #       for update.
    nn_index.build_index(descr_set)
예제 #3
0
def main():
    args = cli_parser().parse_args()

    ui_config_filepath, iqr_config_filepath = args.config
    llevel = logging.DEBUG if args.verbose else logging.INFO
    tab = args.tab
    input_files_globs = args.input_files

    # Not using `cli.utility_main_helper`` due to deviating from single-
    # config-with-default usage.
    cli.initialize_logging(logging.getLogger('smqtk'), llevel)
    cli.initialize_logging(logging.getLogger('__main__'), llevel)
    log = logging.getLogger(__name__)

    log.info("Loading UI config: '{}'".format(ui_config_filepath))
    ui_config, ui_config_loaded = cli.load_config(ui_config_filepath)
    log.info("Loading IQR config: '{}'".format(iqr_config_filepath))
    iqr_config, iqr_config_loaded = cli.load_config(iqr_config_filepath)
    if not (ui_config_loaded and iqr_config_loaded):
        raise RuntimeError("One or both configuration files failed to load.")

    # Ensure the given "tab" exists in UI configuration.
    if tab is None:
        log.error("No configuration tab provided to drive model generation.")
        exit(1)
    if tab not in ui_config["iqr_tabs"]:
        log.error("Invalid tab provided: '{}'. Available tags: {}".format(
            tab, list(ui_config["iqr_tabs"])))
        exit(1)

    #
    # Gather Configurations
    #
    log.info("Extracting plugin configurations")

    ui_tab_config = ui_config["iqr_tabs"][tab]
    iqr_plugins_config = iqr_config['iqr_service']['plugins']

    # Configure DataSet implementation and parameters
    data_set_config = ui_tab_config['data_set']

    # Configure DescriptorElementFactory instance, which defines what
    # implementation of DescriptorElement to use for storing generated
    # descriptor vectors below.
    descriptor_elem_factory_config = iqr_plugins_config['descriptor_factory']

    # Configure DescriptorGenerator algorithm implementation, parameters and
    # persistent model component locations (if implementation has any).
    descriptor_generator_config = iqr_plugins_config['descriptor_generator']

    # Configure NearestNeighborIndex algorithm implementation, parameters and
    # persistent model component locations (if implementation has any).
    nn_index_config = iqr_plugins_config['neighbor_index']

    #
    # Initialize data/algorithms
    #
    # Constructing appropriate data structures and algorithms, needed for the
    # IQR demo application, in preparation for model training.
    #
    log.info("Instantiating plugins")
    #: :type: representation.DataSet
    data_set = \
        from_config_dict(data_set_config, representation.DataSet.get_impls())
    descriptor_elem_factory = \
        representation.DescriptorElementFactory \
        .from_config(descriptor_elem_factory_config)
    #: :type: algorithms.DescriptorGenerator
    descriptor_generator = \
        from_config_dict(descriptor_generator_config,
                         algorithms.DescriptorGenerator.get_impls())

    #: :type: algorithms.NearestNeighborsIndex
    nn_index = \
        from_config_dict(nn_index_config,
                         algorithms.NearestNeighborsIndex.get_impls())

    #
    # Build models
    #
    log.info("Adding files to dataset '{}'".format(data_set))
    for g in input_files_globs:
        g = osp.expanduser(g)
        if osp.isfile(g):
            data_set.add_data(DataFileElement(g, readonly=True))
        else:
            log.debug("Expanding glob: %s" % g)
            for fp in glob.iglob(g):
                data_set.add_data(DataFileElement(fp, readonly=True))

    # Generate a model if the generator defines a known generation method.
    try:
        log.debug("descriptor generator as model to generate?")
        descriptor_generator.generate_model(data_set)
    except AttributeError as ex:
        log.debug(
            "descriptor generator as model to generate - Nope: {}".format(
                str(ex)))

    # Generate descriptors of data for building NN index.
    log.info("Computing descriptors for data set with {}".format(
        descriptor_generator))
    data2descriptor = descriptor_generator.compute_descriptor_async(
        data_set, descriptor_elem_factory)

    # Possible additional support steps before building NNIndex
    try:
        # Fit the LSH index functor
        log.debug("Has LSH Functor to fit?")
        nn_index.lsh_functor.fit(six.itervalues(data2descriptor))
    except AttributeError as ex:
        log.debug("Has LSH Functor to fit - Nope: {}".format(str(ex)))

    log.info("Building nearest neighbors index {}".format(nn_index))
    nn_index.build_index(six.itervalues(data2descriptor))