def score(self, sim, temp_dir: str): """ For the input similarity scores of the model, calculate the mean AP metric and mean Precision@k metrics. """ sim = sim.T # Credits: https://github.com/filipradenovic/revisitop/blob/master/python/example_evaluate.py # NOQA ranks = np.argsort(-sim, axis=0) # revisited evaluation gnd = self.cfg["gnd"] # evaluate ranks ks = [1, 5, 10] # search for easy gnd_t = [] for i in range(len(gnd)): g = {} g["ok"] = np.concatenate([gnd[i]["easy"]]) g["junk"] = np.concatenate([gnd[i]["junk"], gnd[i]["hard"]]) gnd_t.append(g) mapE, apsE, mprE, prsE = compute_map(ranks, gnd_t, ks) # search for easy & hard gnd_t = [] for i in range(len(gnd)): g = {} g["ok"] = np.concatenate([gnd[i]["easy"], gnd[i]["hard"]]) g["junk"] = np.concatenate([gnd[i]["junk"]]) gnd_t.append(g) mapM, apsM, mprM, prsM = compute_map(ranks, gnd_t, ks) # search for hard gnd_t = [] for i in range(len(gnd)): g = {} g["ok"] = np.concatenate([gnd[i]["hard"]]) g["junk"] = np.concatenate([gnd[i]["junk"], gnd[i]["easy"]]) gnd_t.append(g) mapH, apsH, mprH, prsH = compute_map(ranks, gnd_t, ks) logging.info(">> {}: mAP E: {}, M: {}, H: {}".format( self.cfg["dataset"], np.around(mapE * 100, decimals=2), np.around(mapM * 100, decimals=2), np.around(mapH * 100, decimals=2), )) logging.info(">> {}: mP@k{} E: {}, M: {}, H: {}".format( self.cfg["dataset"], np.array(ks), np.around(mprE * 100, decimals=2), np.around(mprM * 100, decimals=2), np.around(mprH * 100, decimals=2), ))
def score(self, sim, temp_dir=None): """ For the input similarity scores of the model, calculate the mean AP metric and mean Precision@k metrics. """ # Map at rank K ks = [1, 5, 10] query_filenames = self.query_filenames[0:sim.shape[0]] database_filenames = self.database_filenames[0:sim.shape[1]] # Calculate map for each query split results = {} for query_split in self.query_splits: # Get indeces of split queries. query_indeces = [] for i, query_split_filename in enumerate(query_filenames): if query_split in query_split_filename: query_indeces.append(i) # No queries for this split. Used for DEBUG_MODE when imposing data limit. if len(query_indeces) == 0: continue # Choose only rows of the split. query_split_filenames = query_filenames[query_indeces] split_sim = sim[query_indeces].T # Calculate the ranks. ranks = np.argsort(-split_sim, axis=0) has_query_match = False query_matches = [] # Find matching database image for each query. for query_filename in query_split_filenames: matching_indices = [] for i, database_filename in enumerate(database_filenames): if self._is_query_database_match(database_filename, query_filename): has_query_match = True matching_indices.append(i) matches = {"ok": np.array(matching_indices)} query_matches.append(matches) # No database matches to compute mAP. Used in DEBUG_MODE when imposing data limit. if not has_query_match: continue # Compute macro average precision map_metric, _, mpr, _ = compute_map(ranks, query_matches, ks) results[query_split] = { "mAP": map_metric, "mp@k": { "k": ks, "mAP": mpr.tolist() }, } return results