예제 #1
0
    def get_current_iqr_session(self):
        """
        Get the current IQR Session instance.

        :rtype: smqtk.IQR.iqr_session.IqrSession

        """
        with self._iqr_controller:
            sid = flask.session.sid
            if not self._iqr_controller.has_session_uuid(sid):
                sid_work_dir = osp.join(self.work_dir, sid)

                descriptor = ContentDescriptorConfiguration.new_inst(self._fd_type_str)
                indexer = IndexerConfiguration.new_inst(self._idxr_type_str)

                iqr_sess = IqrSession(sid_work_dir, descriptor, indexer, sid)
                self._iqr_controller.add_session(iqr_sess, sid)

                # If there are things already in our extension ingest, extend
                # the base indexer
                feat_map = \
                    descriptor.compute_descriptor_async(iqr_sess.extension_ds)
                indexer.extend_model(feat_map)

            return self._iqr_controller.get_session(sid)
예제 #2
0
    def get_descriptor_inst(self, label):
        """
        Get the cached content descriptor instance for a configuration label
        :type label: str
        :rtype: smqtk.content_description.ContentDescriptor
        """
        with self.descriptor_cache_lock:
            if label not in self.descriptor_cache:
                self.log.debug("Caching descriptor '%s'", label)
                self.descriptor_cache[label] = \
                    ContentDescriptorConfiguration.new_inst(label)

            return self.descriptor_cache[label]
예제 #3
0
        def all_content_types():
            """
            Of available descriptors, what content types are processable, and
            what types are associated to which available descriptor generator.
            """
            r = {}
            all_types = set()
            for l in ContentDescriptorConfiguration.available_labels():
                d = self.get_descriptor_inst(l)
                all_types.update(d.valid_content_types())
                r[l] = sorted(d.valid_content_types())

            return flask.jsonify({
                "all": sorted(all_types),
                "labels": r
            })
예제 #4
0
def main():
    import optparse
    description = \
        "Generate the model for the given indexer type, using features " \
        "from the given feature descriptor type. We use configured valued in " \
        "the smqtk_config module and from the system configuration JSON file " \
        "(etc/system_config.json) unless otherwise specified by options to " \
        "this script. Specific ingest used is determined by the ingest type " \
        "provided (-t/--type)."
    parser = bin_utils.SMQTKOptParser(description=description)
    group_required = optparse.OptionGroup(parser, "Required Options")
    group_optional = optparse.OptionGroup(parser, "Optional")

    group_required.add_option('-d', '--data-set',
                              help="Data set to use for model generation.")
    group_required.add_option('-c', '--content-descriptor',
                              help="Feature descriptor type for model and "
                                   "feature generation.")
    group_required.add_option('-i', '--indexer',
                              help="(Optional) Indexer type for model "
                                   "generation.")

    group_optional.add_option('--sys-json',
                              help="Custom system configuration JSON file to "
                                   "use. Otherwise we use the one specified in "
                                   "the smqtk_config module.")
    group_optional.add_option('-l', '--list',
                              action='store_true', default=False,
                              help="List available ingest configurations. If "
                                   "a valid ingest configuration has been "
                                   "specified, we list available "
                                   "FeatureDetector and Indexer configurations "
                                   "available.")
    group_optional.add_option('-t', '--threads', type=int, default=None,
                              help='Number of threads/processes to use for '
                                   'processing. By default we use all '
                                   'available cores/threads.')
    group_optional.add_option('-v', '--verbose', action='store_true',
                              default=False,
                              help='Add debug messaged to output logging.')

    parser.add_option_group(group_required)
    parser.add_option_group(group_optional)
    opts, args = parser.parse_args()

    bin_utils.initialize_logging(logging.getLogger(),
                                logging.INFO - (10*opts.verbose))
    log = logging.getLogger("main")

    dset_label = opts.data_set
    cd_label = opts.content_descriptor
    idxr_label = opts.indexer
    parallel = opts.threads

    # Prep custom JSON configuration if one was given
    if opts.sys_json:
        with open(opts.sys_json) as json_file:
            json_config = json.loads(jsmin(json_file.read()))
        ConfigurationInterface.BASE_CONFIG = json_config['Ingests']

    if opts.list:
        log.info("")
        log.info("Available Data Sets:")
        log.info("")
        for l in DataSetConfiguration.available_labels():
            log.info("\t%s" % l)
        log.info("")
        log.info("Available ContentDescriptor types:")
        log.info("")
        for l in ContentDescriptorConfiguration.available_labels():
            log.info("\t%s" % l)
        log.info("")
        log.info("Available Indexer types:")
        log.info("")
        for l in IndexerConfiguration.available_labels():
            log.info("\t%s", l)
        log.info("")
        exit(0)

    # Check given labels
    fail = False
    if dset_label and dset_label not in DataSetConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "data set configuration!", dset_label)
        fail = True
    if cd_label and cd_label not in ContentDescriptorConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "content descriptor configuration!", cd_label)
        fail = True
    if idxr_label and idxr_label not in IndexerConfiguration.available_labels():
        log.error("Given label '%s' is NOT associated to an existing "
                  "indexer configuration!", idxr_label)
        fail = True
    if fail:
        exit(1)
    del fail

    log.info("Loading data-set instance...")
    #: :type: DataIngest or VideoIngest
    dset = DataSetConfiguration.new_inst(dset_label)

    log.info("Loading descriptor instance...")
    #: :type: smqtk.content_description.ContentDescriptor
    descriptor = ContentDescriptorConfiguration.new_inst(cd_label)
    # Generate any model files needed by the chosen descriptor
    descriptor.PARALLEL = parallel
    descriptor.generate_model(dset)

    # Don't do indexer model generation if a type was not provided
    if idxr_label:
        log.info("Loading indexer instance...")
        #: :type: smqtk.indexing.Indexer
        indexer = IndexerConfiguration.new_inst(idxr_label)

        # It is not guaranteed that the feature computation method is doing
        # anything in parallel, but if it is, request that it perform serially
        # in order to allow multiple high-level feature computation jobs, else
        # we could be overrun with threads.
        descriptor.PARALLEL = 1
        # Using NonDaemonicPool because content_description that might to
        # parallel processing might use multiprocessing.Pool instances, too.
        # Pools don't usually allow daemonic processes, so this custom top-level
        # pool allows worker processes to spawn pools themselves.
        fmap = descriptor.compute_descriptor_async(
            dset,
            parallel=parallel,
            pool_type=NonDaemonicPool
        )

        indexer.generate_model(fmap, parallel=parallel)
# Chuck files listing image files to process
CHUNK_FILES_GLOB = "/home/mattmann/data/exp5/image_catalog/deploy/data/archive/chunks/*/filelist_chunk_*.txt"
# Directory where we will detail what stages have already been completed
STAMP_FILES_DIR = "/data/local/memex/kitware/smqtk/stage_markers"
# Working directory to clean after completing every chunk for disk space conservation
CLEAN_WORK_DIR = "/data/local/memex/kitware/smqtk/work/ContentDescriptors"
# Parallel processes to run
PARALLEL = 4

# Descriptor Generator configuration type
DESCR_GENERATOR_CONFIG = "CD_CSIFT_RoxyImages_spatial"
# Descritpor Factory configuration to use
DESCR_FACTORY_CONFIG = "LocalDiskFactory"

# Descriptor generator to use
DESCR_GENERATOR = ContentDescriptorConfiguration.new_inst(
    DESCR_GENERATOR_CONFIG)
DESCR_GENERATOR.PARALLEL = 1
# Descriptor Factory to use
DESCR_FACTORY = DescriptorFactoryConfiguration.new_inst(DESCR_FACTORY_CONFIG)


def check_stage(label):
    """
    Check if a stage has been completed.
    :return: True if the given stage label has been marked complete.
    """
    return osp.isfile(osp.join(STAMP_FILES_DIR, label))


def mark_stage(label):
    """
예제 #6
0
    def __init__(self, name, parent_app, data_set,
                 descriptor_type, indexer_type,
                 url_prefix=None):
        """
        Initialize a generic IQR Search module with a single descriptor and
        indexer.

        :param name: Name of this blueprint instance
        :type name: str

        :param parent_app: Parent containing flask app instance
        :type parent_app: smqtk.web.search_app.app.search_app

        :param data_set: Data set to work over
        :type data_set: SMQTK.data_rep.DataSet

        :param descriptor_type: Feature Descriptor type string
        :type descriptor_type: str

        :param indexer_type: indexer type string
        :type indexer_type: str

        :param url_prefix: Web address prefix for this blueprint.
        :type url_prefix: str

        :raises ValueError: Invalid Descriptor or indexer type

        """
        super(IQRSearch, self).__init__(
            name, import_name=__name__,
            static_folder=os.path.join(SCRIPT_DIR, "static"),
            template_folder=os.path.join(SCRIPT_DIR, "templates"),
            url_prefix=url_prefix
        )

        # Make sure that the configured descriptor/indexer types exist, as
        # we as their system configuration sections
        if descriptor_type not in ContentDescriptorConfiguration.available_labels():
            raise ValueError("'%s' not a valid descriptor type" % descriptor_type)
        if indexer_type not in IndexerConfiguration.available_labels():
            raise ValueError("'%s' not a valid indexer type" % indexer_type)

        self._parent_app = parent_app
        self._data_set = data_set
        self._fd_type_str = descriptor_type
        self._idxr_type_str = indexer_type

        self._explicit_uids = set()
        self._explicit_uids_lock = multiprocessing.RLock()
        # TODO: Read in dict from save file

        # Uploader Sub-Module
        self.upload_work_dir = os.path.join(self.work_dir, "uploads")
        self.mod_upload = FileUploadMod('%s_uploader' % self.name, parent_app,
                                        self.upload_work_dir,
                                        url_prefix='/uploader')
        self.register_blueprint(self.mod_upload)

        # IQR Session control
        # TODO: Move session management to database. Create web-specific
        #       IqrSession class that stores/gets its state directly from
        #       database.
        self._iqr_controller = IqrController()

        # structures for session ingest progress
        # Two levels: SID -> FID
        self._ingest_progress_locks = {}
        self._ingest_progress = {}

        # Preview Image Caching
        # TODO: Initialize this into static directory that is being served.
        self._preview_cache = PreviewCache(osp.join(self.work_dir, "Previews"))

        # Directory to write data for static viewing
        self._static_data_dir = os.path.join(self.static_folder, 'tmp_data')
        # Cache mapping of written static files for data elements
        self._static_cache = {}

        #
        # Routing
        #

        @self.route("/")
        @self._parent_app.module_login.login_required
        def index():
            r = {
                "module_name": self.name,
                "uploader_url": self.mod_upload.url_prefix,
                "uploader_post_url": self.mod_upload.upload_post_url(),
            }
            r.update(parent_app.nav_bar_content())
            # noinspection PyUnresolvedReferences
            return flask.render_template("iqr_search_index.html", **r)

        @self.route('/iqr_session_info', methods=["GET"])
        @self._parent_app.module_login.login_required
        def iqr_session_info():
            """
            Get information about the current IRQ session
            """
            with self.get_current_iqr_session() as iqrs:
                # noinspection PyProtectedMember
                return flask.jsonify({
                    "uuid": iqrs.uuid,
                    "positive_uids": tuple(iqrs.positive_ids),
                    "negative_uids": tuple(iqrs.negative_ids),
                    "extension_ingest_contents":
                        dict((uid, str(df))
                             for uid, df in iqrs.extension_ds.iteritems()),
                    "FeatureMemory": {
                    }
                })

        @self.route("/check_current_iqr_session")
        @self._parent_app.module_login.login_required
        def check_current_iqr_session():
            """
            Check that the current IQR session exists and is initialized.

            :rtype: {
                    success: bool
                }
            """
            # Getting the current IQR session ensures that one has been
            # constructed for the current session.
            with self.get_current_iqr_session():
                return flask.jsonify({
                    "success": True
                })

        @self.route('/iqr_ingest_file', methods=['POST'])
        @self._parent_app.module_login.login_required
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: status message
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                iqr_sess = self.get_current_iqr_session()
                fid = flask.request.form['fid']

                self.log.debug("[%s::%s] Getting temporary filepath from "
                               "uploader module", iqr_sess.uuid, fid)
                upload_filepath = self.mod_upload.get_path_for_id(fid)
                self.mod_upload.clear_completed(fid)

                # Extend session ingest -- modifying
                with iqr_sess:
                    self.log.debug("[%s::%s] Adding new file to extension "
                                   "ingest", iqr_sess.uuid, fid)
                    sess_upload = osp.join(iqr_sess.work_dir,
                                           osp.basename(upload_filepath))
                    os.rename(upload_filepath, sess_upload)
                    upload_data = DataFileElement(sess_upload)
                    iqr_sess.extension_ds.add_data(upload_data)

                # Compute feature for data -- non-modifying
                self.log.debug("[%s::%s] Computing feature for file",
                               iqr_sess.uuid, fid)
                feat = iqr_sess.descriptor.compute_descriptor(upload_data)

                # Extend indexer model with feature data -- modifying
                with iqr_sess:
                    self.log.debug("[%s::%s] Extending indexer model with "
                                   "feature", iqr_sess.uuid, fid)
                    iqr_sess.indexer.extend_model({upload_data.uuid(): feat})

                    # of course, add the new data element as a positive
                    iqr_sess.adjudicate((upload_data.uuid(),))

                return "Finished Ingestion"

        @self.route("/adjudicate", methods=["POST", "GET"])
        @self._parent_app.module_login.login_required
        def adjudicate():
            """
            Update adjudication for this session

            :return: {
                    success: <bool>,
                    message: <str>
                }
            """
            if flask.request.method == "POST":
                fetch = flask.request.form
            elif flask.request.method == "GET":
                fetch = flask.request.args
            else:
                raise RuntimeError("Invalid request method '%s'"
                                   % flask.request.method)

            pos_to_add = json.loads(fetch.get('add_pos', '[]'))
            pos_to_remove = json.loads(fetch.get('remove_pos', '[]'))
            neg_to_add = json.loads(fetch.get('add_neg', '[]'))
            neg_to_remove = json.loads(fetch.get('remove_neg', '[]'))

            self.log.debug("Adjudicated Positive{+%s, -%s}, Negative{+%s, -%s} "
                           % (pos_to_add, pos_to_remove,
                              neg_to_add, neg_to_remove))

            with self.get_current_iqr_session() as iqrs:
                iqrs.adjudicate(pos_to_add, neg_to_add,
                                pos_to_remove, neg_to_remove)
            return flask.jsonify({
                "success": True,
                "message": "Adjudicated Positive{+%s, -%s}, Negative{+%s, -%s} "
                           % (pos_to_add, pos_to_remove,
                              neg_to_add, neg_to_remove)
            })

        @self.route("/get_item_adjudication", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_adjudication():
            """
            Get the adjudication status of a particular result by ingest ID.

            This should only ever return a dict where one of the two, or
            neither, are labeled True.

            :return: {
                    is_pos: <bool>,
                    is_neg: <bool>
                }
            """
            ingest_uid = flask.request.args['uid']
            with self.get_current_iqr_session() as iqrs:
                return flask.jsonify({
                    "is_pos": ingest_uid in iqrs.positive_ids,
                    "is_neg": ingest_uid in iqrs.negative_ids
                })

        @self.route("/get_positive_uids", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_positive_uids():
            """
            Get a list of the positive ingest UIDs

            :return: {
                    uids: list of <int>
                }
            """
            with self.get_current_iqr_session() as iqrs:
                return flask.jsonify({
                    "uids": list(iqrs.positive_ids)
                })

        @self.route("/get_random_uids")
        @self._parent_app.module_login.login_required
        def get_random_uids():
            """
            Return to the client a list of all known dataset IDs but in a random
            order. If there is currently an active IQR session with elements in
            its extension ingest, then those IDs are included in the random
            list.

            :return: {
                    uids: list of int
                }
            """
            all_ids = self._data_set.uuids()
            with self.get_current_iqr_session() as iqrs:
                all_ids.update(iqrs.extension_ds.uuids())
            all_ids = list(all_ids)
            random.shuffle(all_ids)
            return flask.jsonify({
                "uids": all_ids
            })

        @self.route("/get_ingest_image_preview_data", methods=["GET"])
        @self._parent_app.module_login.login_required
        def get_ingest_item_image_rep():
            """
            Return the base64 preview image data for the data file associated
            with the give UID.
            """
            uid = flask.request.args['uid']

            info = {
                "success": True,
                "message": None,
                "is_explicit": None,
                "shape": None,  # (width, height)
                "data": None,
                "ext": None,
                "static_file_link": None,
            }

            #: :type: smqtk.data_rep.DataElement
            de = None
            if self._data_set.has_uuid(uid):
                de = self._data_set.get_data(uid)
                with self._explicit_uids_lock:
                    info["is_explicit"] = uid in self._explicit_uids
            else:
                with self.get_current_iqr_session() as iqrs:
                    if iqrs.extension_ds.has_uuid(uid):
                        de = iqrs.extension_ds.get_data(uid)
                        info["is_explicit"] = uid in self._explicit_uids

            if not de:
                info["success"] = False
                info["message"] = "UUID not part of the active data set!"
            else:
                # TODO: Have data-file return an HTML chunk for implementation
                #       defined visualization?
                img_path = self._preview_cache.get_preview_image(de)
                img = PIL.Image.open(img_path)
                info["shape"] = img.size
                with open(img_path, 'rb') as img_file:
                    info["data"] = base64.encodestring(img_file.read())
                info["ext"] = osp.splitext(img_path)[1].lstrip('.')

                if de.uuid() not in self._static_cache:
                    self._static_cache[de.uuid()] = \
                        de.write_temp(self._static_data_dir)
                info['static_file_link'] = 'static/' \
                    + os.path.relpath(self._static_cache[de.uuid()],
                                      self.static_folder)

            return flask.jsonify(info)

        @self.route("/mark_uid_explicit", methods=["POST"])
        @self._parent_app.module_login.login_required
        def mark_uid_explicit():
            """
            Mark a given UID as explicit in its containing ingest.

            :return: Success value of True if the given UID was valid and set
                as explicit in its containing ingest.
            :rtype: {
                "success": bool
            }
            """
            uid = flask.request.form['uid']
            self._explicit_uids.add(uid)
            # TODO: Save out dict

            return flask.jsonify({'success': True})

        @self.route("/iqr_refine", methods=["POST"])
        @self._parent_app.module_login.login_required
        def iqr_refine():
            """
            Classify current IQR session indexer, updating ranking for
            display.

            Fails gracefully if there are no positive[/negative] adjudications.

            Expected Args:
            """
            pos_to_add = json.loads(flask.request.form.get('add_pos', '[]'))
            pos_to_remove = json.loads(flask.request.form.get('remove_pos', '[]'))
            neg_to_add = json.loads(flask.request.form.get('add_neg', '[]'))
            neg_to_remove = json.loads(flask.request.form.get('remove_neg', '[]'))

            with self.get_current_iqr_session() as iqrs:
                try:
                    iqrs.refine(pos_to_add, neg_to_add,
                                pos_to_remove, neg_to_remove)
                    return flask.jsonify({
                        "success": True,
                        "message": "Completed refinement"
                    })
                except Exception, ex:
                    return flask.jsonify({
                        "success": False,
                        "message": "ERROR: %s: %s" % (type(ex).__name__,
                                                      ex.message)
                    })
예제 #7
0
# Chuck files listing image files to process
CHUNK_FILES_GLOB = "/home/mattmann/data/exp5/image_catalog/deploy/data/archive/chunks/*/filelist_chunk_*.txt"
# Directory where we will detail what stages have already been completed
STAMP_FILES_DIR = "/data/local/memex/kitware/smqtk/stage_markers"
# Working directory to clean after completing every chunk for disk space conservation
CLEAN_WORK_DIR = "/data/local/memex/kitware/smqtk/work/ContentDescriptors"
# Parallel processes to run
PARALLEL = 4

# Descriptor Generator configuration type
DESCR_GENERATOR_CONFIG = "CD_CSIFT_RoxyImages_spatial"
# Descritpor Factory configuration to use 
DESCR_FACTORY_CONFIG = "LocalDiskFactory"

# Descriptor generator to use
DESCR_GENERATOR = ContentDescriptorConfiguration.new_inst(DESCR_GENERATOR_CONFIG)
DESCR_GENERATOR.PARALLEL = 1
# Descriptor Factory to use
DESCR_FACTORY = DescriptorFactoryConfiguration.new_inst(DESCR_FACTORY_CONFIG)


def check_stage(label):
    """
    Check if a stage has been completed.
    :return: True if the given stage label has been marked complete.
    """
    return osp.isfile(osp.join(STAMP_FILES_DIR, label))


def mark_stage(label):
    """
예제 #8
0
def main():
    usage = "%prog [OPTIONS] INPUT_FILE"
    description = "Compute a feature vector for a given data file, outputting " \
                  "the generated feature vector to standard out, or to an " \
                  "output file if one was specified.\n" \
                  "\n" \
                  "An ingest " \
                  "configuration must be specified for the purpose of " \
                  "identifying which model files to use (assuming a given " \
                  "descriptor has/uses model files). The ingest configuration " \
                  "also informs where to put temporary working files. "
    parser = bin_utils.SMQTKOptParser(usage, description=description)
    parser.add_option('-c', '--content-descriptor',
                      help='The descriptor type to use. This must be a type '
                           'available in system configuration')
    parser.add_option('-o', '--output-filepath',
                      help='Optional path to a file to output feature vector '
                           'to. Otherwise the feature vector is printed to '
                           'standard out. Output is saved in numpy binary '
                           'format (.npy suffix recommended).')
    parser.add_option('-l', '--list', action='store_true', default=False,
                      help='List available descriptor types.')
    parser.add_option('-v', '--verbose', action='store_true', default=False,
                      help='Print additional debugging messages. All logging '
                           'goes to standard error.')
    opts, args = parser.parse_args()

    output_filepath = opts.output_filepath
    descriptor_type = opts.content_descriptor
    verbose = opts.verbose

    llevel = logging.DEBUG if verbose else logging.INFO
    bin_utils.initialize_logging(logging.getLogger(), llevel)
    log = logging.getLogger("main")

    if opts.list:
        log.info("")
        log.info("Available ContentDescriptor types:")
        log.info("")
        for dl in ContentDescriptorConfiguration.available_labels():
            log.info("\t%s", dl)
        log.info("")
        exit(0)

    if len(args) == 0:
        log.error("Failed to provide an input file path")
        exit(1)
    if len(args) > 1:
        log.warning("More than one filepath provided as an argument. Only "
                    "computing for the first one.")

    input_filepath = args[0]
    data_element = DataFileElement(input_filepath)

    fd = ContentDescriptorConfiguration.new_inst(descriptor_type)
    feat = fd.compute_descriptor(data_element)

    if output_filepath:
        numpy.save(output_filepath, feat)
    else:
        # Construct string, because numpy
        s = []
        for f in feat:
            s.append('%15f' % f)
        print ' '.join(s)
예제 #9
0
    def __init__(self, config_filepath=None):
        super(DescriptorServiceServer, self).__init__(
            self.__class__.__name__,
            static_folder=os.path.join(SCRIPT_DIR, 'static'),
            template_folder=os.path.join(SCRIPT_DIR, 'templates')
        )

        #
        # Configuration setup
        #
        config_env_loaded = config_file_loaded = None

        # Load default -- This should always be present, aka base defaults
        self.config.from_object('smqtk_config')
        config_default_loaded = True

        # Load from env var if present
        if self.ENV_CONFIG in os.environ:
            self.log.info("Loading config from env var (%s)...",
                          self.ENV_CONFIG)
            self.config.from_envvar(self.ENV_CONFIG)
            config_env_loaded = True

        # Load from configuration file if given
        if config_filepath and os.path.isfile(config_filepath):
            config_file_path = \
                os.path.expanduser(os.path.abspath(config_filepath))
            self.log.info("Loading config from file (%s)...", config_file_path)
            self.config.from_pyfile(config_file_path)
            config_file_loaded = True

        self.log.debug("Config defaults loaded : %s", config_default_loaded)
        self.log.debug("Config from env loaded : %s", config_env_loaded)
        self.log.debug("Config from file loaded: %s", config_file_loaded)
        if not (config_default_loaded or config_env_loaded
                or config_file_loaded):
            raise RuntimeError("No configuration file specified for loading. "
                               "(%s=%s) (file=%s)"
                               % (self.ENV_CONFIG,
                                  os.environ.get(self.ENV_CONFIG, None),
                                  config_filepath))

        # Descriptor factory setup
        if self.ENV_DSS_DE_FACTORY not in os.environ:
            raise RuntimeError("Missing environment configuration variable "
                               "`%s`, which should be set to the configuration "
                               "label of the DescriptorElementFactory to use."
                               % self.ENV_DSS_DE_FACTORY)
        self.de_factory_label = os.environ.get(self.ENV_DSS_DE_FACTORY,
                                               "MemoryDescriptorFactory")
        self.log.info("Using Descriptor factory: \"%s\"", self.de_factory_label)
        try:
            self.descr_elem_factory = \
                DescriptorFactoryConfiguration.new_inst(self.de_factory_label)
        except KeyError:
            raise ValueError("Invalid factory label set to %s: \"%s\""
                             % (self.ENV_DSS_DE_FACTORY, self.de_factory_label))

        # Cache of ContentDescriptor instances
        self.descriptor_cache = {}
        self.descriptor_cache_lock = multiprocessing.RLock()

        #
        # Security
        #
        self.secret_key = self.config['SECRET_KEY']

        @self.route("/")
        def list_ingest_labels():
            return flask.jsonify({
                "labels": sorted(ContentDescriptorConfiguration
                                 .available_labels())
            })

        @self.route("/all/content_types")
        def all_content_types():
            """
            Of available descriptors, what content types are processable, and
            what types are associated to which available descriptor generator.
            """
            r = {}
            all_types = set()
            for l in ContentDescriptorConfiguration.available_labels():
                d = self.get_descriptor_inst(l)
                all_types.update(d.valid_content_types())
                r[l] = sorted(d.valid_content_types())

            return flask.jsonify({
                "all": sorted(all_types),
                "labels": r
            })

        @self.route("/all/compute/<path:uri>")
        def all_compute(uri):
            """
            Compute descriptors over the specified content for all generators
            that function over the data's content type.

            # JSON Return format
                {
                    "success": <bool>

                    "content_type": <str>

                    "message": <str>

                    "descriptors": {  "<label>":  <list[float]>, ... } | None

                    "reference_uri": <str>
                }

            """
            message = "execution nominal"

            data_elem = None
            try:
                data_elem = self.resolve_data_element(uri)
            except ValueError, ex:
                message = "Failed URI resolution: %s" % str(ex)

            descriptors = {}
            finished_loop = False
            if data_elem:
                for l in ContentDescriptorConfiguration.available_labels():
                    if data_elem.content_type() \
                            in self.get_descriptor_inst(l).valid_content_types():
                        d = None
                        try:
                            d = self.generate_descriptor(data_elem, l)
                        except RuntimeError, ex:
                            message = "Descriptor extraction failure: %s" \
                                      % str(ex)
                        except ValueError, ex:
                            message = "Data content type issue: %s" % str(ex)

                        descriptors[l] = d and d.vector().tolist()
예제 #10
0
def main():
    usage = "%prog [OPTIONS] INPUT_FILE"
    description = """\
Compute a descriptor vector for a given data file, outputting the generated
feature vector to standard out, or to an output file if one was specified (in
numpy format).
"""
    parser = bin_utils.SMQTKOptParser(usage, description=description)

    group_labels = optparse.OptionGroup(parser, "Configuration Labels")
    group_labels.add_option('-c', '--content-descriptor',
                            help='The descriptor type to use. This must be a '
                                 'type available in the system configuration')
    group_labels.add_option('-f', '--factory-type',
                            help='The DescriptorElementFactory configuration '
                                 'to use when computing the descriptor. This '
                                 'must be a type available in the system '
                                 'configuration.')
    parser.add_option_group(group_labels)

    group_optional = optparse.OptionGroup(parser, "Optional Parameters")
    group_optional.add_option('-l', '--list',
                              action='store_true', default=False,
                              help='List available descriptor types.')
    group_optional.add_option('--overwrite',
                              action='store_true', default=False,
                              help="Force descriptor computation even if an "
                                   "existing descriptor vector was discovered "
                                   "based on the given content descriptor type "
                                   "and data combination.")
    group_optional.add_option('-o', '--output-filepath',
                              help='Optional path to a file to output feature '
                                   'vector to. Otherwise the feature vector is '
                                   'printed to standard out. Output is saved '
                                   'in numpy binary format (.npy suffix '
                                   'recommended).')
    group_optional.add_option('-v', '--verbose',
                              action='store_true', default=False,
                              help='Print additional debugging messages. All '
                                   'logging goes to standard error.')
    parser.add_option_group(group_optional)

    opts, args = parser.parse_args()

    output_filepath = opts.output_filepath
    descriptor_label = opts.content_descriptor
    factory_label = opts.factory_type
    overwrite = opts.overwrite
    verbose = opts.verbose

    llevel = logging.DEBUG if verbose else logging.INFO
    bin_utils.initialize_logging(logging.getLogger(), llevel)
    log = logging.getLogger("main")

    if opts.list:
        log.info("")
        log.info("Available ContentDescriptor types:")
        log.info("")
        for dl in ContentDescriptorConfiguration.available_labels():
            log.info("\t%s", dl)
        log.info("")
        log.info("Available DescriptorElementFactory types:")
        log.info("")
        for df in DescriptorFactoryConfiguration.available_labels():
            log.info("\t%s", df)
        log.info("")
        exit(0)

    if len(args) == 0:
        log.error("Failed to provide an input file path")
        exit(1)
    if len(args) > 1:
        log.warning("More than one filepath provided as an argument. Only "
                    "computing for the first one.")

    input_filepath = args[0]
    data_element = DataFileElement(input_filepath)

    cd = ContentDescriptorConfiguration.new_inst(descriptor_label)
    factory = DescriptorFactoryConfiguration.new_inst(factory_label)
    descr_elem = cd.compute_descriptor(data_element, factory, overwrite)
    vec = descr_elem.vector()

    if vec is None:
        log.error("Failed to generate a descriptor vector for the input data!")

    if output_filepath:
        numpy.save(output_filepath, vec)
    else:
        # Construct string, because numpy
        s = []
        # noinspection PyTypeChecker
        for f in vec:
            s.append('%15f' % f)
        print ' '.join(s)