def get_storage_graph(self) -> GraphDataStruct: """ # TODO : Move to API ? Export the current state of the database as a graph datastructure. This represents the storage graph of the server. :return: The storage graph of the server, as is in the database """ # Create a graphe structure tmp_meta = Metadata(Source.DBDUMP) tmp_graph = GraphDataStruct(tmp_meta) # Get all clusters cluster_list = self.get_cluster_list() # For each cluster, fetch all pictures and store it for cluster_id in cluster_list: tmp_graph.add_cluster(Cluster(label="", tmp_id=cluster_id, image="")) picture_list = self.get_pictures_of_cluster(cluster_id, with_score=True) self.logger.info(f"Picture list : {picture_list}") for picture in picture_list: # Label = picture score, here tmp_graph.add_node(Node(label=picture[1], tmp_id=picture[0], image="")) tmp_graph.add_edge(Edge(_from=cluster_id, _to=picture[0])) return tmp_graph
def generate_basic_graph_with_mapping( VISJS=False) -> (GraphDataStruct, dict): mapping = {} # Create a graphe structure if VISJS: tmp_meta = Metadata(Source.VISJS) else: tmp_meta = Metadata(Source.DBDUMP) tmp_graph = GraphDataStruct(tmp_meta) # For each cluster, fetch all pictures and store it for cluster_id in range(0, 2): tmp_graph.add_cluster( Cluster(label="", tmp_id=cluster_id, image="")) for id in range(0, 3): pic_id = str(cluster_id) + "_" + str(id) + "OLD" pic_image = str(cluster_id) + "_" + str(id) + "IMAGE" # Prepare mapping mapping[pic_image] = str(cluster_id) + "_" + str(id) + "NEW" # Label = picture score, here tmp_graph.add_node( Node(label="picture name +" + pic_id, tmp_id=pic_id, image=pic_image)) tmp_graph.add_edge(Edge(_from=cluster_id, _to=pic_id)) return tmp_graph, mapping
def test_graph_import_export_consistency(self): """ # Create a graphe structure tmp_meta = Metadata(Source.DBDUMP) tmp_graph = GraphDataStruct(tmp_meta) # For each cluster, fetch all pictures and store it for cluster_id in range(0, 2): tmp_graph.add_cluster(Cluster(label="", id=cluster_id, image="")) for id in range(0, 3): pic_id = str(cluster_id) + "_" + str(id) # Label = picture score, here tmp_graph.add_node(Node(label="picture name +" + pic_id, id=pic_id, image="")) tmp_graph.add_edge(Edge(_from=cluster_id, _to=pic_id)) """ tmp_graph = self.generate_basic_graph() print("Exported dict : ") val = tmp_graph.export_as_dict() pprint.pprint(val) print("Import graphe : ") new_graph = GraphDataStruct.load_from_dict(val) pprint.pprint(new_graph) print("Exported dict (after import): ") new_val = new_graph.export_as_dict() pprint.pprint(new_val) self.assertDictEqual(val, self.expected_json) self.assertDictEqual(val, new_val)
def get_perf_list( self, list_results: List, gt_graph: GraphDataStruct, output_folder: pathlib.Path = None) -> List[perf_datastruct.Perf]: """ Extract a list of performance datastructure from a list of results (list_results) compared to a ground truth file (gt_graph). Can store provided list and ground truth results if a (output_folder) is given. :param list_results: The list of results extracted from server (one result for each node of the graph) :param gt_graph: The ground truth file that serves as reference :param output_folder: Faculatative output folder to save inputs :return: a list of performance datastructure, each having a threshold and a stats datastructure. This means that for each computed threshold, we know the quality of the graph. """ # DEBUG purposes / Display arguments self.logger.debug("Received requests results :") self.logger.debug(pformat(list_results)) self.logger.debug("Received ground truth graph :") self.logger.debug(pformat(gt_graph.export_as_dict())) # TODO : Remove output folder ? if output_folder is not None: # Saving ground truth graph json_import_export.save_json( list_results, get_homedir() / "requests_result.json") # Saving list results json_import_export.save_json(gt_graph.export_as_dict(), get_homedir() / "gt_graph.json") else: self.logger.debug( "List results and ground truth graph can't be saved : no output_folder specified." ) perfs_list = self._compute_perfs_list(list_results, gt_graph) return perfs_list
def get_db_dump_as_graph(self) -> GraphDataStruct: """ Ask the server a copy of the database, convert it as graphe and returns it :return: A graph datastructure of the server's storage """ # Dump DB as graphe / clusters is_success, db = self.export_db_server() if is_success: print(f"Database fetched successfully.") # The upload had been successful graphe_struct = GraphDataStruct.load_from_dict(db) return graphe_struct else: raise Exception(f"Error during db dump of {db}")
def generate_basic_graph() -> GraphDataStruct: # Create a graphe structure tmp_meta = Metadata(Source.DBDUMP) tmp_graph = GraphDataStruct(tmp_meta) # For each cluster, fetch all pictures and store it for cluster_id in range(0, 2): tmp_graph.add_cluster( Cluster(label="", tmp_id=cluster_id, image="")) for id in range(0, 3): pic_id = str(cluster_id) + "_" + str(id) # Label = picture score, here tmp_graph.add_node( Node(label="picture name +" + pic_id, tmp_id=pic_id, image="")) tmp_graph.add_edge(Edge(_from=cluster_id, _to=pic_id)) return tmp_graph
def test_compute_score_for_one_threshold(self): # Graph example. Please check documentation for more information cal_conf = Default_calibrator_conf() quality_evaluator = similarity_graph_quality_evaluator.similarity_graph_quality_evaluator( cal_conf) requests_results = [ # 1 to 2 and 3 { "list_pictures": [{ "cluster_id": "XXX", "decision": "YES", "distance": 0.1, "image_id": "2" }, { "cluster_id": "XXX", "decision": "YES", "distance": 0.6, "image_id": "3" }], "request_id": "1", "status": "matches_found", "request_time": 0 }, { "list_pictures": [{ "cluster_id": "XXX", "decision": "YES", "distance": 0.3, "image_id": "6" }], "request_id": "2", "status": "matches_found", "request_time": 0 }, { "list_pictures": [{ "cluster_id": "XXX", "decision": "YES", "distance": 0.2, "image_id": "1" }], "request_id": "3", "status": "matches_found", "request_time": 0 }, { "list_pictures": [{ "cluster_id": "XXX", "decision": "YES", "distance": 0.4, "image_id": "2" }], "request_id": "4", "status": "matches_found", "request_time": 0 }, { "list_pictures": [{ "cluster_id": "XXX", "decision": "YES", "distance": 0.6, "image_id": "4" }], "request_id": "5", "status": "matches_found", "request_time": 0 }, { "list_pictures": [{ "cluster_id": "XXX", "decision": "YES", "distance": 0.7, "image_id": "2" }], "request_id": "6", "status": "matches_found", "request_time": 0 } ] # Create the reference graph graph_data_struct = GraphDataStruct(Metadata(Source.DBDUMP)) graph_data_struct.add_cluster(Cluster(label="A", tmp_id="A", image="A")) graph_data_struct.add_cluster(Cluster(label="B", tmp_id="B", image="B")) graph_data_struct.add_cluster(Cluster(label="C", tmp_id="C", image="C")) graph_data_struct.add_node(Node(label="1", tmp_id="1", image="1")) graph_data_struct.add_node(Node(label="2", tmp_id="2", image="2")) graph_data_struct.add_node(Node(label="3", tmp_id="3", image="3")) graph_data_struct.add_node(Node(label="4", tmp_id="4", image="4")) graph_data_struct.add_node(Node(label="5", tmp_id="5", image="5")) graph_data_struct.add_node(Node(label="6", tmp_id="6", image="6")) graph_data_struct.add_edge(Edge(_from="1", _to="A")) graph_data_struct.add_edge(Edge(_from="2", _to="A")) graph_data_struct.add_edge(Edge(_from="3", _to="A")) graph_data_struct.add_edge(Edge(_from="4", _to="B")) graph_data_struct.add_edge(Edge(_from="5", _to="B")) graph_data_struct.add_edge(Edge(_from="6", _to="C")) pprint.pprint(requests_results) pprint.pprint(graph_data_struct.export_as_dict()) quality_evaluator.cal_conf.NB_TO_CHECK = 1 dist_threshold = 0 stats_datastruct = quality_evaluator.compute_score_for_one_threshold( requests_results, graph_data_struct, dist_threshold) print(stats_datastruct) self.assertEqual(stats_datastruct.P, 3) self.assertEqual(stats_datastruct.N, 3) self.assertAlmostEqual(stats_datastruct.TPR, 0.0, delta=0.05) self.assertAlmostEqual(stats_datastruct.TNR, 1.0, delta=0.05) self.assertAlmostEqual(stats_datastruct.FPR, 0.0, delta=0.05) self.assertAlmostEqual(stats_datastruct.FNR, 1.0, delta=0.05) dist_threshold = 0.5 stats_datastruct = quality_evaluator.compute_score_for_one_threshold( requests_results, graph_data_struct, dist_threshold) print(stats_datastruct) self.assertEqual(stats_datastruct.P, 3) self.assertEqual(stats_datastruct.N, 3) self.assertAlmostEqual(stats_datastruct.TPR, 0.66, delta=0.05) self.assertAlmostEqual(stats_datastruct.TNR, 0.33, delta=0.05) self.assertAlmostEqual(stats_datastruct.FPR, 0.66, delta=0.05) self.assertAlmostEqual(stats_datastruct.FNR, 0.33, delta=0.05) dist_threshold = 1 stats_datastruct = quality_evaluator.compute_score_for_one_threshold( requests_results, graph_data_struct, dist_threshold) print(stats_datastruct) self.assertEqual(stats_datastruct.P, 3) self.assertEqual(stats_datastruct.N, 3) self.assertAlmostEqual(stats_datastruct.TPR, 1.0, delta=0.05) self.assertAlmostEqual(stats_datastruct.TNR, 0.0, delta=0.05) self.assertAlmostEqual(stats_datastruct.FPR, 1.0, delta=0.05) self.assertAlmostEqual(stats_datastruct.FNR, 0.0, delta=0.05) quality_evaluator.cal_conf.NB_TO_CHECK = 3 dist_threshold = 0.5 stats_datastruct = quality_evaluator.compute_score_for_one_threshold( requests_results, graph_data_struct, dist_threshold) print(stats_datastruct) self.assertEqual(stats_datastruct.P, 4) self.assertEqual(stats_datastruct.N, 3) self.assertAlmostEqual(stats_datastruct.TPR, 0.5, delta=0.05) self.assertAlmostEqual(stats_datastruct.TNR, 0.33, delta=0.05) self.assertAlmostEqual(stats_datastruct.FPR, 0.66, delta=0.05) self.assertAlmostEqual(stats_datastruct.FNR, 0.5, delta=0.05)
def results_list_to_graph(requests_list, nb_match: int = 1, yes_maybe_no_mode: bool = False) -> GraphDataStruct: """ Construct a graph from results list of requests on the database Hypothesis : All database pictures are requested pictures Edges are colored : from green to red depending on the match index (Best is green) :param requests_list: a List of results extracted from server :param nb_match: Number of matches per pictures to add to the graph (1=first level match/best match, 2 = 2 best match per picture, etc.) :return: A graph datastructure """ logger = logging.getLogger(__name__) # logger.debug(f"Received request_list : {pformat(requests_list)}") graph = GraphDataStruct(meta=Metadata(source=Source.DBDUMP)) # Color Managemement # FF0000 = red # 00FF00 = green short_color_list = ["#00FF00", "#887700", "#CC3300", "#FF0000"] color_list = ["#00FF00", "#11EE00", "#22DD00", "#33CC00", "#44BB00", "#55AA00", "#669900", "#778800", "#887700", "#996600", "#AA5500", "#BB4400", "#CC3300", "#DD2200", "#EE1100", "#FF0000"] if nb_match < 4: # We only have 4 colors if we don't want that much matches. # This way, first match is green, second orange, third red, etc. color_list = short_color_list # TODO : Print all YES match # For each request for curr_req_1 in requests_list: # logger.debug(f"Curent request : {pformat(curr_req_1)}") req_id = curr_req_1.get("request_id", None) # Requested picture => add a node graph.add_node(Node(label=req_id, tmp_id=req_id, image=req_id)) # For each request for curr_req_2 in requests_list: # We remove the picture "itself" from the matches tmp_clean_matches = get_clear_matches(curr_req_2) req_id = curr_req_2.get("request_id", None) # Add edge for each best pictures for i in range(min(nb_match, len(tmp_clean_matches))): dist = round(tmp_clean_matches[i].get("distance", None), 4) deci = tmp_clean_matches[i].get("decision", "UNKNOWN") dest_id = tmp_clean_matches[i].get("image_id", None) if dist is None: logger.error(f"Problem with request, no distance: {pformat(curr_req_2)}") continue if dest_id is None: logger.error(f"Problem with request, no match's image id: {pformat(curr_req_2)}") continue if yes_maybe_no_mode: # set threshold depending on Yes/Maybe/No in VisJS # By creatin a fictive distance, depending on the decision fictive_dist = scoring_datastrutures.DecisionTypes.get_fictive_dist(deci) # Add a fictive edge graph.add_edge(Edge(_from=req_id, _to=dest_id, color={"color": color_list[i % len(color_list)]}, label=deci + ":" + str(dist), value=fictive_dist)) else: graph.add_edge(Edge(_from=req_id, _to=dest_id, color={"color": color_list[i % len(color_list)]}, label=deci + ":" + str(dist), value=dist)) return graph
def compute_score_for_one_threshold( self, list_results: List, gt_graph: GraphDataStruct, dist_threshold: float) -> stats_datastruct.Stats_datastruct: """ Compute stats about the quality of a result (requests_result), given a specific threshold (dist_threshold) and compared to a ground truth graph (gt_graph) :param list_results: Result of a similarity request to server :param gt_graph: Ground truth file to provide to compute if matches are good or not :param dist_threshold: threshold to apply to the results to compare to ground truth graph :return: stats about the quality of a result """ # Create ready to go (with 0 valued) score object tmp_score = stats_datastruct.Stats_datastruct() tmp_score.reset_basics_values() # TODO : Construct good datastructure to perform the matching # Sort cand_graph to mapping [node.id] -> [node.id sorted by distance increasing] # For each node and its neighbourhood (by distance) for curr_result in list_results: # Check if node is correctly formatted if self.is_correct(curr_result): # Remove its own occurence from the list if presents. matches_list = dict_utilities.get_clear_matches(curr_result) # For all N first matches of the current picture (or below if less matches) nb_matches_to_process = min(self.cal_conf.NB_TO_CHECK, len(matches_list)) for i in range(0, nb_matches_to_process): # fetch the match to process curr_matched_node = matches_list[i] # Please note : # If the two nodes are in the same cluster in gt, then it should be a positive value. # Then this link is counted as a positive value in the entire dataset. # The distance and threshold DOES NOT IMPACT the Positive/Negative counts ! if curr_matched_node.get("distance") <= dist_threshold: # Even if it's request_id, it the current name of the file. if gt_graph.are_names_in_same_cluster( curr_result.get("request_id"), curr_matched_node.get("image_id")): tmp_score.TP += 1 # Match but good tmp_score.P += 1 # Should be good else: tmp_score.FP += 1 # No match but not good tmp_score.N += 1 # Should be not good elif curr_matched_node.get("distance") > dist_threshold: # Even if it's request_id, it the current name of the file. if gt_graph.are_names_in_same_cluster( curr_result.get("request_id"), curr_matched_node.get("image_id")): tmp_score.FN += 1 # No match but not good tmp_score.P += 1 # Should be good else: tmp_score.TN += 1 # No match but good tmp_score.N += 1 # Should be not good else: cluster = gt_graph.get_clusters_of( curr_result.get("request_id")) if cluster is None or len(cluster.members) <= 1: # this picture has no cluster OR Only one element in the cluster, # so it's the node = Good if no match tmp_score.TN += 1 # No match but good tmp_score.N += 1 # Should be not good else: # No matches, but not alone in the cluster, so should have been one. tmp_score.FN += 1 # No match but not good tmp_score.P += 1 # Should be good tmp_score.total_nb_elements = tmp_score.P + tmp_score.N tmp_score.compute_in_good_order() return tmp_score