# Working directory to clean after completing every chunk for disk space conservation
CLEAN_WORK_DIR = "/data/local/memex/kitware/smqtk/work/ContentDescriptors"
# Parallel processes to run
PARALLEL = 4

# Descriptor Generator configuration type
DESCR_GENERATOR_CONFIG = "CD_CSIFT_RoxyImages_spatial"
# Descritpor Factory configuration to use
DESCR_FACTORY_CONFIG = "LocalDiskFactory"

# Descriptor generator to use
DESCR_GENERATOR = ContentDescriptorConfiguration.new_inst(
    DESCR_GENERATOR_CONFIG)
DESCR_GENERATOR.PARALLEL = 1
# Descriptor Factory to use
DESCR_FACTORY = DescriptorFactoryConfiguration.new_inst(DESCR_FACTORY_CONFIG)


def check_stage(label):
    """
    Check if a stage has been completed.
    :return: True if the given stage label has been marked complete.
    """
    return osp.isfile(osp.join(STAMP_FILES_DIR, label))


def mark_stage(label):
    """
    Mark a stage identified by the given label as complete.
    """
    logging.getLogger("mark_stage").info("Marking stage '%s' complete", label)
Example #2
0
    def __init__(self, config_filepath=None):
        super(DescriptorServiceServer, self).__init__(
            self.__class__.__name__,
            static_folder=os.path.join(SCRIPT_DIR, 'static'),
            template_folder=os.path.join(SCRIPT_DIR, 'templates')
        )

        #
        # Configuration setup
        #
        config_env_loaded = config_file_loaded = None

        # Load default -- This should always be present, aka base defaults
        self.config.from_object('smqtk_config')
        config_default_loaded = True

        # Load from env var if present
        if self.ENV_CONFIG in os.environ:
            self.log.info("Loading config from env var (%s)...",
                          self.ENV_CONFIG)
            self.config.from_envvar(self.ENV_CONFIG)
            config_env_loaded = True

        # Load from configuration file if given
        if config_filepath and os.path.isfile(config_filepath):
            config_file_path = \
                os.path.expanduser(os.path.abspath(config_filepath))
            self.log.info("Loading config from file (%s)...", config_file_path)
            self.config.from_pyfile(config_file_path)
            config_file_loaded = True

        self.log.debug("Config defaults loaded : %s", config_default_loaded)
        self.log.debug("Config from env loaded : %s", config_env_loaded)
        self.log.debug("Config from file loaded: %s", config_file_loaded)
        if not (config_default_loaded or config_env_loaded
                or config_file_loaded):
            raise RuntimeError("No configuration file specified for loading. "
                               "(%s=%s) (file=%s)"
                               % (self.ENV_CONFIG,
                                  os.environ.get(self.ENV_CONFIG, None),
                                  config_filepath))

        # Descriptor factory setup
        if self.ENV_DSS_DE_FACTORY not in os.environ:
            raise RuntimeError("Missing environment configuration variable "
                               "`%s`, which should be set to the configuration "
                               "label of the DescriptorElementFactory to use."
                               % self.ENV_DSS_DE_FACTORY)
        self.de_factory_label = os.environ.get(self.ENV_DSS_DE_FACTORY,
                                               "MemoryDescriptorFactory")
        self.log.info("Using Descriptor factory: \"%s\"", self.de_factory_label)
        try:
            self.descr_elem_factory = \
                DescriptorFactoryConfiguration.new_inst(self.de_factory_label)
        except KeyError:
            raise ValueError("Invalid factory label set to %s: \"%s\""
                             % (self.ENV_DSS_DE_FACTORY, self.de_factory_label))

        # Cache of ContentDescriptor instances
        self.descriptor_cache = {}
        self.descriptor_cache_lock = multiprocessing.RLock()

        #
        # Security
        #
        self.secret_key = self.config['SECRET_KEY']

        @self.route("/")
        def list_ingest_labels():
            return flask.jsonify({
                "labels": sorted(ContentDescriptorConfiguration
                                 .available_labels())
            })

        @self.route("/all/content_types")
        def all_content_types():
            """
            Of available descriptors, what content types are processable, and
            what types are associated to which available descriptor generator.
            """
            r = {}
            all_types = set()
            for l in ContentDescriptorConfiguration.available_labels():
                d = self.get_descriptor_inst(l)
                all_types.update(d.valid_content_types())
                r[l] = sorted(d.valid_content_types())

            return flask.jsonify({
                "all": sorted(all_types),
                "labels": r
            })

        @self.route("/all/compute/<path:uri>")
        def all_compute(uri):
            """
            Compute descriptors over the specified content for all generators
            that function over the data's content type.

            # JSON Return format
                {
                    "success": <bool>

                    "content_type": <str>

                    "message": <str>

                    "descriptors": {  "<label>":  <list[float]>, ... } | None

                    "reference_uri": <str>
                }

            """
            message = "execution nominal"

            data_elem = None
            try:
                data_elem = self.resolve_data_element(uri)
            except ValueError, ex:
                message = "Failed URI resolution: %s" % str(ex)

            descriptors = {}
            finished_loop = False
            if data_elem:
                for l in ContentDescriptorConfiguration.available_labels():
                    if data_elem.content_type() \
                            in self.get_descriptor_inst(l).valid_content_types():
                        d = None
                        try:
                            d = self.generate_descriptor(data_elem, l)
                        except RuntimeError, ex:
                            message = "Descriptor extraction failure: %s" \
                                      % str(ex)
                        except ValueError, ex:
                            message = "Data content type issue: %s" % str(ex)

                        descriptors[l] = d and d.vector().tolist()
STAMP_FILES_DIR = "/data/local/memex/kitware/smqtk/stage_markers"
# Working directory to clean after completing every chunk for disk space conservation
CLEAN_WORK_DIR = "/data/local/memex/kitware/smqtk/work/ContentDescriptors"
# Parallel processes to run
PARALLEL = 4

# Descriptor Generator configuration type
DESCR_GENERATOR_CONFIG = "CD_CSIFT_RoxyImages_spatial"
# Descritpor Factory configuration to use 
DESCR_FACTORY_CONFIG = "LocalDiskFactory"

# Descriptor generator to use
DESCR_GENERATOR = ContentDescriptorConfiguration.new_inst(DESCR_GENERATOR_CONFIG)
DESCR_GENERATOR.PARALLEL = 1
# Descriptor Factory to use
DESCR_FACTORY = DescriptorFactoryConfiguration.new_inst(DESCR_FACTORY_CONFIG)


def check_stage(label):
    """
    Check if a stage has been completed.
    :return: True if the given stage label has been marked complete.
    """
    return osp.isfile(osp.join(STAMP_FILES_DIR, label))


def mark_stage(label):
    """
    Mark a stage identified by the given label as complete.
    """
    logging.getLogger("mark_stage").info("Marking stage '%s' complete", label)
Example #4
0
def main():
    usage = "%prog [OPTIONS] INPUT_FILE"
    description = """\
Compute a descriptor vector for a given data file, outputting the generated
feature vector to standard out, or to an output file if one was specified (in
numpy format).
"""
    parser = bin_utils.SMQTKOptParser(usage, description=description)

    group_labels = optparse.OptionGroup(parser, "Configuration Labels")
    group_labels.add_option('-c', '--content-descriptor',
                            help='The descriptor type to use. This must be a '
                                 'type available in the system configuration')
    group_labels.add_option('-f', '--factory-type',
                            help='The DescriptorElementFactory configuration '
                                 'to use when computing the descriptor. This '
                                 'must be a type available in the system '
                                 'configuration.')
    parser.add_option_group(group_labels)

    group_optional = optparse.OptionGroup(parser, "Optional Parameters")
    group_optional.add_option('-l', '--list',
                              action='store_true', default=False,
                              help='List available descriptor types.')
    group_optional.add_option('--overwrite',
                              action='store_true', default=False,
                              help="Force descriptor computation even if an "
                                   "existing descriptor vector was discovered "
                                   "based on the given content descriptor type "
                                   "and data combination.")
    group_optional.add_option('-o', '--output-filepath',
                              help='Optional path to a file to output feature '
                                   'vector to. Otherwise the feature vector is '
                                   'printed to standard out. Output is saved '
                                   'in numpy binary format (.npy suffix '
                                   'recommended).')
    group_optional.add_option('-v', '--verbose',
                              action='store_true', default=False,
                              help='Print additional debugging messages. All '
                                   'logging goes to standard error.')
    parser.add_option_group(group_optional)

    opts, args = parser.parse_args()

    output_filepath = opts.output_filepath
    descriptor_label = opts.content_descriptor
    factory_label = opts.factory_type
    overwrite = opts.overwrite
    verbose = opts.verbose

    llevel = logging.DEBUG if verbose else logging.INFO
    bin_utils.initialize_logging(logging.getLogger(), llevel)
    log = logging.getLogger("main")

    if opts.list:
        log.info("")
        log.info("Available ContentDescriptor types:")
        log.info("")
        for dl in ContentDescriptorConfiguration.available_labels():
            log.info("\t%s", dl)
        log.info("")
        log.info("Available DescriptorElementFactory types:")
        log.info("")
        for df in DescriptorFactoryConfiguration.available_labels():
            log.info("\t%s", df)
        log.info("")
        exit(0)

    if len(args) == 0:
        log.error("Failed to provide an input file path")
        exit(1)
    if len(args) > 1:
        log.warning("More than one filepath provided as an argument. Only "
                    "computing for the first one.")

    input_filepath = args[0]
    data_element = DataFileElement(input_filepath)

    cd = ContentDescriptorConfiguration.new_inst(descriptor_label)
    factory = DescriptorFactoryConfiguration.new_inst(factory_label)
    descr_elem = cd.compute_descriptor(data_element, factory, overwrite)
    vec = descr_elem.vector()

    if vec is None:
        log.error("Failed to generate a descriptor vector for the input data!")

    if output_filepath:
        numpy.save(output_filepath, vec)
    else:
        # Construct string, because numpy
        s = []
        # noinspection PyTypeChecker
        for f in vec:
            s.append('%15f' % f)
        print ' '.join(s)