def execute(self, args, config, study):
        # feature_cache = common.diskcache.DiskCache("cache_{}".format(common.dict_to_filename(config, ["images", ])))
        data = common.RDict(config=config)
        for image_set in self.logger.loop(
            data["config"]["images"], entry_message="Processing {count} image sets", item_prefix="image set"
        ):
            self.logger.log("Processing query image '{}'...".format(image_set["query_image"]))

            query_image = common.load(config["readers"]["query"]).execute(image_set["query_image"], data=data)
            query_coefficients = common.load(config["curvelets"]["transform"]).execute(query_image, data=data)
            # data["images"][image_set_key]["query_features"] =\
            query_features = common.load(config["features"]["extractor"]).execute(query_coefficients, data=data)

            for source_image_filename, features in self.logger.sync_loop(
                process_image,
                *common.augment_list(common.glob_list(image_set["source_images"]), data),
                entry_message="Processing {count} images...",
                item_prefix="image"
            ):
                self.logger.log("Processing image '{}'...".format(source_image_filename))

                # if not feature_cache.contains(source_image_filename):
                # image = common.load(data["config"]["readers"]["image"]).execute(source_image_filename, data=data)
                # coefficients = common.load(data["config"]["curvelets"]["transform"]).execute(image, data=data)
                ##data["images"][image_set_key]["source_images"][source_image_filename]["image_features"] =\
                # features = common.load(data["config"]["features"]["extractor"]).execute(coefficients, data=data)
                # feature_cache.set(source_image_filename, features)
                # else:
                # features = feature_cache.get(source_image_filename)
                data["distances"][image_set["query_image"]][source_image_filename] = common.load(
                    config["metric"]["metric"]
                ).execute(query_features, features, data=data)
        correlations, mean_correlation = self.correlate_to_study(data["distances"], study)
        # self.logger.log("Mean correlation: {}".format(mean_correlation))
        return (correlations, mean_correlation)
    def execute(self, args, config, study):
        if args.codebook is not None:
            codebook = common.codebook.Codebook.load_from_path(args.codebook, size=config["codebook"]["codebook_size"])
        else:
            codebook = common.codebook.Codebook.load_from_config(config)

        data = common.RDict(config=config)
        data["codewords"] = codebook.codewords
        for image_set in self.logger.loop(
                data["config"]["images"],
                entry_message="Processing {count} image sets",
                item_prefix="image set"):
            if image_set.get("skip_benchmark", False):
                self.logger.log("Skipping distractor image set...")
            else:
                self.logger.log("Processing query image '{}'...".format(image_set["query_image"]))

                query_image = common.load(config["readers"]["query"]).execute(image_set["query_image"], data=data)
                query_coefficients = common.load(config["curvelets"]["transform"]).execute(query_image, data=data)
                query_features = common.load(config["features"]["extractor"]).execute(query_coefficients, data=data)
                query_signature = codebook.quantize(query_features,
                        use_stopwords=config["weights"]["use_stopwords"],
                        use_weights=config["weights"]["use_weights"],
                        )
                #self.logger.log(query_signature)

                for source_image_filename, signature in self.logger.sync_loop(
                        get_signature,
                        *common.augment_list(
                            common.glob_list(data["config"]["source_images"]),
                            data,
                            codebook,
                            ),
                        entry_message="Processing {count} images...",
                        item_prefix="image"):
                    self.logger.log("Processing image '{}'...".format(source_image_filename))

                    #self.logger.log(signature)
                    data["distances"][image_set["query_image"]][source_image_filename] =\
                            common.load(config["metric"]["metric"]).execute(query_signature, signature, data=data)
                self.logger.log("Calculating precisions for '{}'...".format(image_set["query_image"]))
                a = data["precisions"][image_set["query_image"]] = self.get_precision_recall(image_set["query_image"], data["distances"][image_set["query_image"]], study)
                self.logger.log("Precisions: {}".format(a))

        #correlations, mean_correlation = self.correlate_to_study(data["distances"], study)
        #precision_recall_stats, mean_stats = self.correlate_to_study(data["distances"], study)
        #self.logger.log("Mean correlation: {}".format(mean_correlation))
        return (data["precisions"], self.get_mean_average_precision(data["precisions"]))
Exemple #3
0
 def preprocess(self, args, config):
     data = common.RDict(config=common.RDict.from_dict(config))
     if args.key is not None:
         key_expression = re.compile(args.key)
         image_sets = [image_set for image_set in data["config"]["images"] if key_expression.search(image_set["key"]) is not None]
     else:
         image_sets = data["config"]["images"]
     for image_set in self.logger.loop(
             image_sets,
             entry_message="Processing {count} image sets",
             item_prefix="image set"):
         for source_image_filename, features in self.logger.sync_loop(
                 process_image,
                 *common.augment_list(
                     common.glob_list(image_set["source_images"]),
                     data,
                     ),
                 entry_message="Processing {count} images...",
                 item_prefix="image"):
             self.logger.log("Processing image '{}'...".format(source_image_filename))
Exemple #4
0
    def create(self, args, config):
        if args.codebook is not None:
            codebook = common.codebook.Codebook.create_from_path(args.codebook, size=config["codebook"]["codebook_size"])
        else:
            codebook = common.codebook.Codebook.create_from_config(config)
        codebook.storage.update_metadata(dict(
            config_file=args.config,
            config=config,
            ))

        # add global image set
        if "source_images" in config:
            config["images"].append(dict(
                query_image=None,
                key="global images",
                source_images=config["source_images"],
                ))

        data = common.RDict(config=common.RDict.from_dict(config))
        for image_set in self.logger.loop(
                data["config"]["images"],
                entry_message="Processing {count} image sets",
                item_prefix="image set"):
            for source_image_filename, features in self.logger.sync_loop(
                    process_image,
                    *common.augment_list(
                        common.glob_list(image_set["source_images"]),
                        data,
                        ),
                    entry_message="Processing {count} images...",
                    item_prefix="image"):
                self.logger.log("Processing image '{}'...".format(source_image_filename))
                codebook.add_observations(features)
        self.logger.log("Clustering observations...")
        codebook.cluster()
        self.logger.log("Saving codebook...")
        codebook.save()
    def convert(self, args, config, study):
        distances = common.RDict()
        for image_set in self.logger.loop(
                config["images"],
                entry_message="Processing {count} image sets",
                item_prefix="image set"):
            self.logger.log("Processing query image '{}'...".format(image_set["query_image"]))

            source_image_filenames = dict((str(pathlib.Path(str(f)).parts[-2:]), str(f)) for f in common.glob_list(image_set["source_images"]))
            query_image_filename = image_set["query_image"]
            commandline = config["search"]["commandline"].format(query_image=query_image_filename)
            search_output = subprocess.check_output(commandline, shell=True)

            for index, line in enumerate(search_output.splitlines()):
                parts = line.split()
                if len(parts) == 3:
                    number, similarity, filename = parts
                    if filename in source_image_filenames:
                        distances[query_image_filename][source_image_filenames[filename]] = index

        correlations, mean_correlation = self.correlate_to_study(distances, study)
        return (correlations, mean_correlation)