def execute(self, args, config, study): # feature_cache = common.diskcache.DiskCache("cache_{}".format(common.dict_to_filename(config, ["images", ]))) data = common.RDict(config=config) for image_set in self.logger.loop( data["config"]["images"], entry_message="Processing {count} image sets", item_prefix="image set" ): self.logger.log("Processing query image '{}'...".format(image_set["query_image"])) query_image = common.load(config["readers"]["query"]).execute(image_set["query_image"], data=data) query_coefficients = common.load(config["curvelets"]["transform"]).execute(query_image, data=data) # data["images"][image_set_key]["query_features"] =\ query_features = common.load(config["features"]["extractor"]).execute(query_coefficients, data=data) for source_image_filename, features in self.logger.sync_loop( process_image, *common.augment_list(common.glob_list(image_set["source_images"]), data), entry_message="Processing {count} images...", item_prefix="image" ): self.logger.log("Processing image '{}'...".format(source_image_filename)) # if not feature_cache.contains(source_image_filename): # image = common.load(data["config"]["readers"]["image"]).execute(source_image_filename, data=data) # coefficients = common.load(data["config"]["curvelets"]["transform"]).execute(image, data=data) ##data["images"][image_set_key]["source_images"][source_image_filename]["image_features"] =\ # features = common.load(data["config"]["features"]["extractor"]).execute(coefficients, data=data) # feature_cache.set(source_image_filename, features) # else: # features = feature_cache.get(source_image_filename) data["distances"][image_set["query_image"]][source_image_filename] = common.load( config["metric"]["metric"] ).execute(query_features, features, data=data) correlations, mean_correlation = self.correlate_to_study(data["distances"], study) # self.logger.log("Mean correlation: {}".format(mean_correlation)) return (correlations, mean_correlation)
def execute(self, args, config, study): if args.codebook is not None: codebook = common.codebook.Codebook.load_from_path(args.codebook, size=config["codebook"]["codebook_size"]) else: codebook = common.codebook.Codebook.load_from_config(config) data = common.RDict(config=config) data["codewords"] = codebook.codewords for image_set in self.logger.loop( data["config"]["images"], entry_message="Processing {count} image sets", item_prefix="image set"): if image_set.get("skip_benchmark", False): self.logger.log("Skipping distractor image set...") else: self.logger.log("Processing query image '{}'...".format(image_set["query_image"])) query_image = common.load(config["readers"]["query"]).execute(image_set["query_image"], data=data) query_coefficients = common.load(config["curvelets"]["transform"]).execute(query_image, data=data) query_features = common.load(config["features"]["extractor"]).execute(query_coefficients, data=data) query_signature = codebook.quantize(query_features, use_stopwords=config["weights"]["use_stopwords"], use_weights=config["weights"]["use_weights"], ) #self.logger.log(query_signature) for source_image_filename, signature in self.logger.sync_loop( get_signature, *common.augment_list( common.glob_list(data["config"]["source_images"]), data, codebook, ), entry_message="Processing {count} images...", item_prefix="image"): self.logger.log("Processing image '{}'...".format(source_image_filename)) #self.logger.log(signature) data["distances"][image_set["query_image"]][source_image_filename] =\ common.load(config["metric"]["metric"]).execute(query_signature, signature, data=data) self.logger.log("Calculating precisions for '{}'...".format(image_set["query_image"])) a = data["precisions"][image_set["query_image"]] = self.get_precision_recall(image_set["query_image"], data["distances"][image_set["query_image"]], study) self.logger.log("Precisions: {}".format(a)) #correlations, mean_correlation = self.correlate_to_study(data["distances"], study) #precision_recall_stats, mean_stats = self.correlate_to_study(data["distances"], study) #self.logger.log("Mean correlation: {}".format(mean_correlation)) return (data["precisions"], self.get_mean_average_precision(data["precisions"]))
def preprocess(self, args, config): data = common.RDict(config=common.RDict.from_dict(config)) if args.key is not None: key_expression = re.compile(args.key) image_sets = [image_set for image_set in data["config"]["images"] if key_expression.search(image_set["key"]) is not None] else: image_sets = data["config"]["images"] for image_set in self.logger.loop( image_sets, entry_message="Processing {count} image sets", item_prefix="image set"): for source_image_filename, features in self.logger.sync_loop( process_image, *common.augment_list( common.glob_list(image_set["source_images"]), data, ), entry_message="Processing {count} images...", item_prefix="image"): self.logger.log("Processing image '{}'...".format(source_image_filename))
def create(self, args, config): if args.codebook is not None: codebook = common.codebook.Codebook.create_from_path(args.codebook, size=config["codebook"]["codebook_size"]) else: codebook = common.codebook.Codebook.create_from_config(config) codebook.storage.update_metadata(dict( config_file=args.config, config=config, )) # add global image set if "source_images" in config: config["images"].append(dict( query_image=None, key="global images", source_images=config["source_images"], )) data = common.RDict(config=common.RDict.from_dict(config)) for image_set in self.logger.loop( data["config"]["images"], entry_message="Processing {count} image sets", item_prefix="image set"): for source_image_filename, features in self.logger.sync_loop( process_image, *common.augment_list( common.glob_list(image_set["source_images"]), data, ), entry_message="Processing {count} images...", item_prefix="image"): self.logger.log("Processing image '{}'...".format(source_image_filename)) codebook.add_observations(features) self.logger.log("Clustering observations...") codebook.cluster() self.logger.log("Saving codebook...") codebook.save()
def convert(self, args, config, study): distances = common.RDict() for image_set in self.logger.loop( config["images"], entry_message="Processing {count} image sets", item_prefix="image set"): self.logger.log("Processing query image '{}'...".format(image_set["query_image"])) source_image_filenames = dict((str(pathlib.Path(str(f)).parts[-2:]), str(f)) for f in common.glob_list(image_set["source_images"])) query_image_filename = image_set["query_image"] commandline = config["search"]["commandline"].format(query_image=query_image_filename) search_output = subprocess.check_output(commandline, shell=True) for index, line in enumerate(search_output.splitlines()): parts = line.split() if len(parts) == 3: number, similarity, filename = parts if filename in source_image_filenames: distances[query_image_filename][source_image_filenames[filename]] = index correlations, mean_correlation = self.correlate_to_study(distances, study) return (correlations, mean_correlation)