# Working directory to clean after completing every chunk for disk space conservation CLEAN_WORK_DIR = "/data/local/memex/kitware/smqtk/work/ContentDescriptors" # Parallel processes to run PARALLEL = 4 # Descriptor Generator configuration type DESCR_GENERATOR_CONFIG = "CD_CSIFT_RoxyImages_spatial" # Descritpor Factory configuration to use DESCR_FACTORY_CONFIG = "LocalDiskFactory" # Descriptor generator to use DESCR_GENERATOR = ContentDescriptorConfiguration.new_inst( DESCR_GENERATOR_CONFIG) DESCR_GENERATOR.PARALLEL = 1 # Descriptor Factory to use DESCR_FACTORY = DescriptorFactoryConfiguration.new_inst(DESCR_FACTORY_CONFIG) def check_stage(label): """ Check if a stage has been completed. :return: True if the given stage label has been marked complete. """ return osp.isfile(osp.join(STAMP_FILES_DIR, label)) def mark_stage(label): """ Mark a stage identified by the given label as complete. """ logging.getLogger("mark_stage").info("Marking stage '%s' complete", label)
def __init__(self, config_filepath=None): super(DescriptorServiceServer, self).__init__( self.__class__.__name__, static_folder=os.path.join(SCRIPT_DIR, 'static'), template_folder=os.path.join(SCRIPT_DIR, 'templates') ) # # Configuration setup # config_env_loaded = config_file_loaded = None # Load default -- This should always be present, aka base defaults self.config.from_object('smqtk_config') config_default_loaded = True # Load from env var if present if self.ENV_CONFIG in os.environ: self.log.info("Loading config from env var (%s)...", self.ENV_CONFIG) self.config.from_envvar(self.ENV_CONFIG) config_env_loaded = True # Load from configuration file if given if config_filepath and os.path.isfile(config_filepath): config_file_path = \ os.path.expanduser(os.path.abspath(config_filepath)) self.log.info("Loading config from file (%s)...", config_file_path) self.config.from_pyfile(config_file_path) config_file_loaded = True self.log.debug("Config defaults loaded : %s", config_default_loaded) self.log.debug("Config from env loaded : %s", config_env_loaded) self.log.debug("Config from file loaded: %s", config_file_loaded) if not (config_default_loaded or config_env_loaded or config_file_loaded): raise RuntimeError("No configuration file specified for loading. " "(%s=%s) (file=%s)" % (self.ENV_CONFIG, os.environ.get(self.ENV_CONFIG, None), config_filepath)) # Descriptor factory setup if self.ENV_DSS_DE_FACTORY not in os.environ: raise RuntimeError("Missing environment configuration variable " "`%s`, which should be set to the configuration " "label of the DescriptorElementFactory to use." % self.ENV_DSS_DE_FACTORY) self.de_factory_label = os.environ.get(self.ENV_DSS_DE_FACTORY, "MemoryDescriptorFactory") self.log.info("Using Descriptor factory: \"%s\"", self.de_factory_label) try: self.descr_elem_factory = \ DescriptorFactoryConfiguration.new_inst(self.de_factory_label) except KeyError: raise ValueError("Invalid factory label set to %s: \"%s\"" % (self.ENV_DSS_DE_FACTORY, self.de_factory_label)) # Cache of ContentDescriptor instances self.descriptor_cache = {} self.descriptor_cache_lock = multiprocessing.RLock() # # Security # self.secret_key = self.config['SECRET_KEY'] @self.route("/") def list_ingest_labels(): return flask.jsonify({ "labels": sorted(ContentDescriptorConfiguration .available_labels()) }) @self.route("/all/content_types") def all_content_types(): """ Of available descriptors, what content types are processable, and what types are associated to which available descriptor generator. """ r = {} all_types = set() for l in ContentDescriptorConfiguration.available_labels(): d = self.get_descriptor_inst(l) all_types.update(d.valid_content_types()) r[l] = sorted(d.valid_content_types()) return flask.jsonify({ "all": sorted(all_types), "labels": r }) @self.route("/all/compute/<path:uri>") def all_compute(uri): """ Compute descriptors over the specified content for all generators that function over the data's content type. # JSON Return format { "success": <bool> "content_type": <str> "message": <str> "descriptors": { "<label>": <list[float]>, ... } | None "reference_uri": <str> } """ message = "execution nominal" data_elem = None try: data_elem = self.resolve_data_element(uri) except ValueError, ex: message = "Failed URI resolution: %s" % str(ex) descriptors = {} finished_loop = False if data_elem: for l in ContentDescriptorConfiguration.available_labels(): if data_elem.content_type() \ in self.get_descriptor_inst(l).valid_content_types(): d = None try: d = self.generate_descriptor(data_elem, l) except RuntimeError, ex: message = "Descriptor extraction failure: %s" \ % str(ex) except ValueError, ex: message = "Data content type issue: %s" % str(ex) descriptors[l] = d and d.vector().tolist()
STAMP_FILES_DIR = "/data/local/memex/kitware/smqtk/stage_markers" # Working directory to clean after completing every chunk for disk space conservation CLEAN_WORK_DIR = "/data/local/memex/kitware/smqtk/work/ContentDescriptors" # Parallel processes to run PARALLEL = 4 # Descriptor Generator configuration type DESCR_GENERATOR_CONFIG = "CD_CSIFT_RoxyImages_spatial" # Descritpor Factory configuration to use DESCR_FACTORY_CONFIG = "LocalDiskFactory" # Descriptor generator to use DESCR_GENERATOR = ContentDescriptorConfiguration.new_inst(DESCR_GENERATOR_CONFIG) DESCR_GENERATOR.PARALLEL = 1 # Descriptor Factory to use DESCR_FACTORY = DescriptorFactoryConfiguration.new_inst(DESCR_FACTORY_CONFIG) def check_stage(label): """ Check if a stage has been completed. :return: True if the given stage label has been marked complete. """ return osp.isfile(osp.join(STAMP_FILES_DIR, label)) def mark_stage(label): """ Mark a stage identified by the given label as complete. """ logging.getLogger("mark_stage").info("Marking stage '%s' complete", label)
def main(): usage = "%prog [OPTIONS] INPUT_FILE" description = """\ Compute a descriptor vector for a given data file, outputting the generated feature vector to standard out, or to an output file if one was specified (in numpy format). """ parser = bin_utils.SMQTKOptParser(usage, description=description) group_labels = optparse.OptionGroup(parser, "Configuration Labels") group_labels.add_option('-c', '--content-descriptor', help='The descriptor type to use. This must be a ' 'type available in the system configuration') group_labels.add_option('-f', '--factory-type', help='The DescriptorElementFactory configuration ' 'to use when computing the descriptor. This ' 'must be a type available in the system ' 'configuration.') parser.add_option_group(group_labels) group_optional = optparse.OptionGroup(parser, "Optional Parameters") group_optional.add_option('-l', '--list', action='store_true', default=False, help='List available descriptor types.') group_optional.add_option('--overwrite', action='store_true', default=False, help="Force descriptor computation even if an " "existing descriptor vector was discovered " "based on the given content descriptor type " "and data combination.") group_optional.add_option('-o', '--output-filepath', help='Optional path to a file to output feature ' 'vector to. Otherwise the feature vector is ' 'printed to standard out. Output is saved ' 'in numpy binary format (.npy suffix ' 'recommended).') group_optional.add_option('-v', '--verbose', action='store_true', default=False, help='Print additional debugging messages. All ' 'logging goes to standard error.') parser.add_option_group(group_optional) opts, args = parser.parse_args() output_filepath = opts.output_filepath descriptor_label = opts.content_descriptor factory_label = opts.factory_type overwrite = opts.overwrite verbose = opts.verbose llevel = logging.DEBUG if verbose else logging.INFO bin_utils.initialize_logging(logging.getLogger(), llevel) log = logging.getLogger("main") if opts.list: log.info("") log.info("Available ContentDescriptor types:") log.info("") for dl in ContentDescriptorConfiguration.available_labels(): log.info("\t%s", dl) log.info("") log.info("Available DescriptorElementFactory types:") log.info("") for df in DescriptorFactoryConfiguration.available_labels(): log.info("\t%s", df) log.info("") exit(0) if len(args) == 0: log.error("Failed to provide an input file path") exit(1) if len(args) > 1: log.warning("More than one filepath provided as an argument. Only " "computing for the first one.") input_filepath = args[0] data_element = DataFileElement(input_filepath) cd = ContentDescriptorConfiguration.new_inst(descriptor_label) factory = DescriptorFactoryConfiguration.new_inst(factory_label) descr_elem = cd.compute_descriptor(data_element, factory, overwrite) vec = descr_elem.vector() if vec is None: log.error("Failed to generate a descriptor vector for the input data!") if output_filepath: numpy.save(output_filepath, vec) else: # Construct string, because numpy s = [] # noinspection PyTypeChecker for f in vec: s.append('%15f' % f) print ' '.join(s)