Esempio n. 1
0
def main():
    usage = "%prog [options] GLOB [ GLOB [ ... ] ]"
    description = "Create a file-based ingest from a set of local file paths " \
                  "or shell-style glob strings."

    parser = bin_utils.SMQTKOptParser(usage, description=description)
    parser.add_option('-s', '--set-label',
                      help="Configured ingest to 'ingest' into.")
    parser.add_option('-l', '--list-ingests', action='store_true',
                      default=False,
                      help="List available ingests we can ingest new data "
                           "into. See the system_config.json file in the etc "
                           "directory for more details.")
    parser.add_option('-v', '--verbose', action='store_true', default=False,
                      help='Add debug messaged to output logging.')
    opts, args = parser.parse_args()

    bin_utils.initialize_logging(logging.getLogger(),
                                logging.INFO - (10*opts.verbose))
    log = logging.getLogger("main")

    if opts.list_ingests:
        # Find labels for configured data sets that are of the FileSet type
        file_ds_labels = [
            l
            for l, dsc in smqtk_config.SYSTEM_CONFIG['DataSets'].iteritems()
            if dsc['type'] == "DataFileSet"
        ]

        log.info("")
        log.info("Available File-based datasets:")
        for k in sorted(file_ds_labels):
            log.info("\t%s", k)
        log.info("")
        exit(0)

    if opts.set_label is None:
        log.info("")
        log.info("ERROR: Please provide data set configuration label.")
        log.info("")
        exit(1)

    fds = DataSetConfiguration.new_inst(opts.set_label)
    log.debug("Script arguments:\n%s" % args)

    def ingest_file(fp):
        fds.add_data(DataFileElement(fp))

    for f in args:
        f = osp.expanduser(f)
        if osp.isfile(f):
            ingest_file(f)
        else:
            log.debug("Expanding glob: %s" % f)
            for g in glob.glob(f):
                ingest_file(g)
Esempio n. 2
0
def main():
    import optparse
    description = \
        "Generate the model for the given indexer type, using features " \
        "from the given feature descriptor type. We use configured valued in " \
        "the smqtk_config module and from the system configuration JSON file " \
        "(etc/system_config.json) unless otherwise specified by options to " \
        "this script. Specific ingest used is determined by the ingest type " \
        "provided (-t/--type)."
    parser = bin_utils.SMQTKOptParser(description=description)
    group_required = optparse.OptionGroup(parser, "Required Options")
    group_optional = optparse.OptionGroup(parser, "Optional")

    group_required.add_option('-d', '--data-set',
                              help="Data set to use for model generation.")
    group_required.add_option('-c', '--content-descriptor',
                              help="Feature descriptor type for model and "
                                   "feature generation.")
    group_required.add_option('-i', '--indexer',
                              help="(Optional) Indexer type for model "
                                   "generation.")

    group_optional.add_option('--sys-json',
                              help="Custom system configuration JSON file to "
                                   "use. Otherwise we use the one specified in "
                                   "the smqtk_config module.")
    group_optional.add_option('-l', '--list',
                              action='store_true', default=False,
                              help="List available ingest configurations. If "
                                   "a valid ingest configuration has been "
                                   "specified, we list available "
                                   "FeatureDetector and Indexer configurations "
                                   "available.")
    group_optional.add_option('-t', '--threads', type=int, default=None,
                              help='Number of threads/processes to use for '
                                   'processing. By default we use all '
                                   'available cores/threads.')
    group_optional.add_option('-v', '--verbose', action='store_true',
                              default=False,
                              help='Add debug messaged to output logging.')

    parser.add_option_group(group_required)
    parser.add_option_group(group_optional)
    opts, args = parser.parse_args()

    bin_utils.initialize_logging(logging.getLogger(),
                                logging.INFO - (10*opts.verbose))
    log = logging.getLogger("main")

    dset_label = opts.data_set
    cd_label = opts.content_descriptor
    idxr_label = opts.indexer
    parallel = opts.threads

    # Prep custom JSON configuration if one was given
    if opts.sys_json:
        with open(opts.sys_json) as json_file:
            json_config = json.loads(jsmin(json_file.read()))
        ConfigurationInterface.BASE_CONFIG = json_config['Ingests']

    if opts.list:
        log.info("")
        log.info("Available Data Sets:")
        log.info("")
        for l in DataSetConfiguration.available_labels():
            log.info("\t%s" % l)
        log.info("")
        log.info("Available ContentDescriptor types:")
        log.info("")
        for l in ContentDescriptorConfiguration.available_labels():
            log.info("\t%s" % l)
        log.info("")
        log.info("Available Indexer types:")
        log.info("")
        for l in IndexerConfiguration.available_labels():
            log.info("\t%s", l)
        log.info("")
        exit(0)

    # Check given labels
    fail = False
    if dset_label and dset_label not in DataSetConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "data set configuration!", dset_label)
        fail = True
    if cd_label and cd_label not in ContentDescriptorConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "content descriptor configuration!", cd_label)
        fail = True
    if idxr_label and idxr_label not in IndexerConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "indexer configuration!", idxr_label)
        fail = True
    if fail:
        exit(1)
    del fail

    log.info("Loading data-set instance...")
    #: :type: DataIngest or VideoIngest
    dset = DataSetConfiguration.new_inst(dset_label)

    log.info("Loading descriptor instance...")
    #: :type: smqtk.content_description.ContentDescriptor
    descriptor = ContentDescriptorConfiguration.new_inst(cd_label)
    # Generate any model files needed by the chosen descriptor
    descriptor.PARALLEL = parallel
    descriptor.generate_model(dset)

    # Don't do indexer model generation if a type was not provided
    if idxr_label:
        log.info("Loading indexer instance...")
        #: :type: smqtk.indexing.Indexer
        indexer = IndexerConfiguration.new_inst(idxr_label)

        # It is not guaranteed that the feature computation method is doing
        # anything in parallel, but if it is, request that it perform serially
        # in order to allow multiple high-level feature computation jobs, else
        # we could be overrun with threads.
        descriptor.PARALLEL = 1
        # Using NonDaemonicPool because content_description that might to
        # parallel processing might use multiprocessing.Pool instances, too.
        # Pools don't usually allow daemonic processes, so this custom top-level
        # pool allows worker processes to spawn pools themselves.
        fmap = descriptor.compute_descriptor_async(
            dset,
            parallel=parallel,
            pool_type=NonDaemonicPool
        )

        indexer.generate_model(fmap, parallel=parallel)
Esempio n. 3
0
    def __init__(self, config_filepath=None):
        super(SMQTKSearchApp, self).__init__(
            self.__class__.__name__,
            static_folder=os.path.join(SCRIPT_DIR, 'static'),
            template_folder=os.path.join(SCRIPT_DIR, 'templates')
        )

        #
        # Configuration setup
        #
        config_env_loaded = config_file_loaded = None

        # Load default -- This should always be present, aka base defaults
        self.config.from_object('smqtk_config')
        config_default_loaded = True

        # Load from env var if present
        if self.ENV_CONFIG in os.environ:
            self.log.info("Loading config from env var (%s)...",
                          self.ENV_CONFIG)
            self.config.from_envvar(self.ENV_CONFIG)
            config_env_loaded = True

        # Load from configuration file if given
        if config_filepath and os.path.isfile(config_filepath):
            config_file_path = os.path.expanduser(os.path.abspath(config_filepath))
            self.log.info("Loading config from file (%s)...", config_file_path)
            self.config.from_pyfile(config_file_path)
            config_file_loaded = True

        self.log.debug("Config defaults loaded : %s", config_default_loaded)
        self.log.debug("Config from env loaded : %s", config_env_loaded)
        self.log.debug("Config from file loaded: %s", config_file_loaded)
        if not (config_default_loaded or config_env_loaded or config_file_loaded):
            raise RuntimeError("No configuration file specified for loading. "
                               "(%s=%s) (file=%s)"
                               % (self.ENV_CONFIG,
                                  os.environ.get(self.ENV_CONFIG, None),
                                  config_filepath))

        self.log.debug("Configuration loaded: %s", self.config)

        #
        # Security
        #
        self.secret_key = self.config['SECRET_KEY']

        #
        # Database setup using Mongo
        #
        h, p = self.config['MONGO_SERVER'].split(':')
        n = "SMQTKSearchApp"
        self.db_info = DatabaseInfo(h, p, n)

        # Use mongo for session storage.
        # -> This allows session modification during Flask methods called from
        #    AJAX routines (default Flask sessions do not)
        self.session_interface = MongoSessionInterface(self.db_info.host,
                                                       self.db_info.port,
                                                       self.db_info.name)

        #
        # Misc. Setup
        #

        # Add 'do' statement usage
        self.jinja_env.add_extension('jinja2.ext.do')

        #
        # Modules
        #
        # Load up required and optional module blueprints
        #

        # Navigable blueprints. This should contain the blueprints that a user
        # should be able to navigate to. Not all blueprints have navigable
        # content or should allow user explicit navigation to, thus this
        # structure.
        #: :type: list of flask.Blueprint
        self._navigable_blueprints = []

        # Login module
        self.log.info("Initializing Login Blueprint")
        from .modules.login import LoginMod
        self.module_login = LoginMod('login', self)
        self.register_blueprint(self.module_login)

        # IQR modules
        # TODO: At the moment, for simplicity, we're fixing the feature detector
        #       and indexer types. In the future this should either be moved
        #       to something that can be chosen by the user or a
        #       multi-feature/indexer fusion system.
        from .modules.iqr import IQRSearch, IQRSearchFusion

        with SimpleTimer("Loading Example Image ingest + IQR...", self.log.info):
            ds_example_image = DataSetConfiguration.new_inst("example_image")

            self.mod_example_image = IQRSearch(
                "Image Search - Example Imagery",
                self, ds_example_image,
                "CD_CSIFT_Image_example", "SVM_HIK-CD_CSIFT-Image",
                url_prefix='/image_example'
            )
            self.register_blueprint(self.mod_example_image)
            self.add_navigable_blueprint(self.mod_example_image)

        with SimpleTimer("Loading Example Image ingest + IQR Fusion", self.log.info):
            self.mod_example_image_fusion = IQRSearchFusion(
                "Image Search Fusion - Example Imagery",
                self, ds_example_image,
                "example_image",
                url_prefix='/image_example_fusion'
            )
            self.register_blueprint(self.mod_example_image_fusion)
            self.add_navigable_blueprint(self.mod_example_image_fusion)

        # with SimpleTimer("Loading Example Video ingest + IQR...", self.log.info):
        #     ds_example_video = DataSetConfiguration.new_inst("example_video")
        #     self.mod_example_video = IQRSearch(
        #         "Video Search - Example Videos",
        #         self, ds_example_video,
        #         "CD_CSIFT_Video_example", "SVM_HIK-CD_CSIFT-Video",
        #         url_prefix='/video_example'
        #     )
        #     self.register_blueprint(self.mod_example_video)
        #     self.add_navigable_blueprint(self.mod_example_video)

        #
        # Basic routing
        #

        @self.route('/home')
        @self.route('/')
        def smqtk_index():
            self.log.info("Session: %s", flask.session.items())
            # noinspection PyUnresolvedReferences
            return flask.render_template("index.html", **self.nav_bar_content())