def dump(self, args): ''' Dump the database and transmit it to the client, and save it in a file(args.dbfile) Translate back the provided ids of the server with the filenames to id mapping saved previously (args.mapfile). Can duplicate id of picture to their "image" and "shape" attributes. Allows to visualize the database with visjs-classificator (args.copyids) :param args: arguments as described :return: The database as a Dict of a graphe (visjs-classificator style) ''' print(f"Requesting server to dump its database") graphe_struct = self.ext_api.get_db_dump_as_graph() db = graphe_struct.export_as_dict() # TODO : Handle it properly with graphe structure calls ? For now, only operation on dict # If mapfile is provided, reverse the id. Otherwise, do nothing if args.mapfile: print(f"Mapping file detected. Reversing the ids ... ") mapping = load_json(args.mapfile) db = apply_revert_mapping(db, mapping) # TODO : graphe_struct.replace_id_from_mapping(mapping) # Cleaner # If Copy_ids is true, we copy the value of the picture's ids # to their image and shape attributes if args.copyids: print(args.copyids) print(f"ID to image copy option detected. Copying ... ") db = copy_id_to_image(db) # TODO : graphe_struct.copy_ids_to_image() # Cleaner save_json(db, args.dbfile) return db
def build_list_and_evaluate_and_save_chart( self, list_results: List, gt_graph: GraphDataStruct, only_decisions: List[scoring_datastrutures.DecisionTypes], output_folder: pathlib.Path): # Generate name of the list generated_name = "".join([d.name + "_" for d in only_decisions]) + "only" # Filter out results results_list_filtered = [ self.filter_out_request_result(r, only_decisions) for r in list_results ] json_import_export.save_json( results_list_filtered, pathlib.Path(get_homedir() / (generated_name + ".json"))) perfs_list_filtered = self._compute_perfs_list(results_list_filtered, gt_graph) # Save to graph twoDplot = two_dimensions_plot.TwoDimensionsPlot() twoDplot.print_graph(perfs_list_filtered, output_folder, file_name=(generated_name + ".png"))
def export_file(self, outputfile_path: pathlib.Path): if self.to_export is not None: save_json(self.to_export, outputfile_path) print( f"File exported to : {outputfile_path} with {len(self.to_export)} entries." )
def test_json_import_export_consistency(self): # Test consistency between import and export function try: json_import_export.save_json( self.simple_object, self.test_file_path / "export_test.json") obj = json_import_export.load_json(self.test_file_path / "export_test.json") self.assertEqual(self.simple_object, obj) self.assertTrue(True) except Exception as e: self.assertTrue(False) try: json_import_export.save_json( self.path_object, self.test_file_path / "export_test.json") obj = json_import_export.load_json(self.test_file_path / "export_test.json") # TODO : Create pathlib.Path from string path. For not, can't be equal self.assertNotEqual(self.path_object, obj) self.assertTrue(True) except Exception as e: self.assertTrue(False)
def get_proximity_graph_and_evaluate(self, image_folder: pathlib.Path, output_path: pathlib.Path, gt_path: pathlib.Path): list_results, _ = self.get_proximity_graph(image_folder, output_path) perfs_list = self.evaluate_list_results(list_results, gt_path, output_path) # Save to file json_import_export.save_json(perfs_list, output_path / "perfs_list.json") self.logger.debug(f"Performance list saved.") # Save to graph twoDplot = two_dimensions_plot.TwoDimensionsPlot() twoDplot.print_graph(perfs_list, output_path)
def print_data(self, scalabilitygraph: ScalabilityData, output_folder: pathlib.Path, file_name: str = "scalability_graph.pdf"): twoDplot = two_dimensions_plot.TwoDimensionsPlot() twoDplot.print_scalability_data(scalabilitygraph, output_folder, file_name) # Save to file json_import_export.save_json(scalabilitygraph.list_request_time, output_folder / "scalability_graph.json") self.logger.info("Results scalability_graph json saved.")
def upload(self, args) -> Dict[str, str]: ''' Perform the upload of all picture in the provided folder (args.path) and save the mapping (original_file_name)->(id_given_by_server) in provided file (args.mapfile) :param args: arguments as described :return: Mapping filename to id ''' print(f"Uploading pictures from {args.path}") mapping, nb = self.ext_api.add_many_pictures_no_wait(args.path) print(f"{nb} pictures uploaded.") save_json(mapping, args.mapfile) print(f"Mapping file_name / Server ID saved to {args.mapfile}.") return mapping
def test_json_export(self): # Test json export function try: json_import_export.save_json( self.simple_object, self.test_file_path / "export_test.json") self.assertTrue(True) except Exception as e: self.assertTrue(False) try: json_import_export.save_json( self.path_object, self.test_file_path / "export_test.json") self.assertTrue(True) except Exception as e: self.assertTrue(False)
def get_proximity_graph_from_list_result(self, list_results: List[Dict], output_path: pathlib.Path) -> GraphDataStruct: # Construct graph from the list of distance results tmp_graph = self.results_list_to_graph(list_results, nb_match=2) # Save to file json_import_export.save_json(tmp_graph.export_as_dict(), output_path / "distance_graph.json") self.logger.debug(f"Distance graph json saved.") # Construct graph from the list of distance results tmp_graph = self.results_list_to_graph(list_results, nb_match=2, yes_maybe_no_mode=True) # Save to file json_import_export.save_json(tmp_graph.export_as_dict(), output_path / "distance_graph_yes_maybe_no.json") self.logger.debug(f"Distance graph yes-maybe-no json saved.") return tmp_graph
def request(self, args) -> Dict: ''' Request the similar pictures of the provided picture (args.path) if we get an answer before timeout (args.waittime). Translate back the provided ids of the server with the filenames to id mapping saved previously (args.mapfile) :param args: arguments as described :return: A dict of results # TODO : Add an example of dict of results ''' results = self.ext_api.request_one_picture_and_wait(args.path, args.waittime) # If mapfile is provided, reverse the id. Otherwise, do nothing if args.mapfile: print(f"Mapping file detected. Reversing the ids ... ") mapping = load_json(args.mapfile) results = apply_revert_mapping(results, mapping) save_json(results, args.resultfile) return results
def reference_all_files(self, path: pathlib.Path): path = path.resolve() p = path.resolve().glob('**/*') files = [x for x in p if x.is_file()] files.sort() # To prevent System's way of sorting paths. # Therefore, we are sure about the order of treatment on any machine (determinism) for f in files: self.check_correctness(f.name) tmp_file = f.read_bytes() hash_list = self.hash_file(tmp_file) self.store_hash(f.name, hash_list) save_json(self.already_generated, path.parent / (str(path.name) + "_references.json")) print(f"Done. {len(files)} hashed and stored in.")
def launch(self, db_conf: database_conf.Default_database_conf = None, dist_conf: distance_engine_conf.Default_distance_engine_conf = None, fe_conf: feature_extractor_conf.Default_feature_extractor_conf = None, ws_conf: webservice_conf.Default_webservice_conf = None, mode=None): """ Construct an argument list and launch the process. :param db_conf: configuration file :param dist_conf: configuration file :param fe_conf: configuration file :param ws_conf: configuration file :param mode: configuration element :return: Nothing """ # Construct an argument list to be 'popen' as a new process arg_list = [str(self.worker_path)] # Save current configuration in files # Using self.worker_path.parent if db_conf is not None: tmp_db_conf_path = get_homedir() / "tmp_db_conf.json" json_import_export.save_json(db_conf, file_path=tmp_db_conf_path) arg_list.append(ConfArgs.DB_CONF_ARG) arg_list.append(str(tmp_db_conf_path.resolve())) if dist_conf is not None: tmp_dist_conf_path = get_homedir() / "tmp_dist_conf.json" json_import_export.save_json(dist_conf, file_path=tmp_dist_conf_path) arg_list.append(ConfArgs.DIST_CONF_ARG) arg_list.append(str(tmp_dist_conf_path.resolve())) if fe_conf is not None: tmp_fe_conf_path = get_homedir() / "tmp_fe_conf.json" json_import_export.save_json(fe_conf, file_path=tmp_fe_conf_path) arg_list.append(ConfArgs.FE_CONF_ARG) arg_list.append(str(tmp_fe_conf_path.resolve())) if ws_conf is not None: tmp_ws_conf_path = get_homedir() / "tmp_ws_conf.json" json_import_export.save_json(ws_conf, file_path=tmp_ws_conf_path) arg_list.append(ConfArgs.WS_CONF_ARG) arg_list.append(str(tmp_ws_conf_path.resolve())) if mode is not None: arg_list.append(ConfArgs.MODE_ARG) arg_list.append(mode) # Save starting time self.start_time = datetime.datetime.now() # Launch worker self.logger.debug(f"launching process as : {arg_list}") self.process = subprocess.Popen(arg_list)
def get_proximity_graph( self, image_folder: pathlib.Path, output_path: pathlib.Path) -> (List[Dict], GraphDataStruct): """ Extract a proximity graph from a folder of pictures, sent to DB and requested one by one. :param image_folder: The folder of picture to send and request, to build the similarity graph from :param output_path: The output path where the graph and other data will be stored :return: the proximity graph """ # Get distance results for each picture list_results = self.ext_api.add_and_request_and_dump_pictures( image_folder) # Save to file json_import_export.save_json(list_results, output_path / "requests_result.json") # DEBUG IF FAILURE FOR MANUAL RECOVERY #list_results = json_import_export.load_json(output_path / "requests_result.json") self.logger.debug(f"Results raw json saved.") # DEBUG IF FAILURE FOR MANUAL RECOVERY #list_results = [r for r in list_results if r is not None and r.get("request_id", None) is not None] # Extract tmp_graph and save as graphs tmp_graph = self.get_proximity_graph_from_list_result( list_results, output_path) ''' # Construct graph from the list of distance results tmp_graph = self.results_list_to_graph(list_results, nb_match=2) # Save to file json_import_export.save_json(tmp_graph.export_as_dict(), output_path / "distance_graph.json") self.logger.debug(f"Distance graph json saved.") # Construct graph from the list of distance results tmp_graph = self.results_list_to_graph(list_results, nb_match=2, yes_maybe_no_mode=True) # Save to file json_import_export.save_json(tmp_graph.export_as_dict(), output_path / "distance_graph_yes_maybe_no.json") self.logger.debug(f"Distance graph yes-maybe-no json saved.") ''' return list_results, tmp_graph
def get_perf_list( self, list_results: List, gt_graph: GraphDataStruct, output_folder: pathlib.Path = None) -> List[perf_datastruct.Perf]: """ Extract a list of performance datastructure from a list of results (list_results) compared to a ground truth file (gt_graph). Can store provided list and ground truth results if a (output_folder) is given. :param list_results: The list of results extracted from server (one result for each node of the graph) :param gt_graph: The ground truth file that serves as reference :param output_folder: Faculatative output folder to save inputs :return: a list of performance datastructure, each having a threshold and a stats datastructure. This means that for each computed threshold, we know the quality of the graph. """ # DEBUG purposes / Display arguments self.logger.debug("Received requests results :") self.logger.debug(pformat(list_results)) self.logger.debug("Received ground truth graph :") self.logger.debug(pformat(gt_graph.export_as_dict())) # TODO : Remove output folder ? if output_folder is not None: # Saving ground truth graph json_import_export.save_json( list_results, get_homedir() / "requests_result.json") # Saving list results json_import_export.save_json(gt_graph.export_as_dict(), get_homedir() / "gt_graph.json") else: self.logger.debug( "List results and ground truth graph can't be saved : no output_folder specified." ) perfs_list = self._compute_perfs_list(list_results, gt_graph) return perfs_list
def export_db_as_graphe(self) -> Dict: """ Handle a dump of the database request :return: The result json (status of the request, etc.) """ result_json = {} result_json["Called_function"] = EndPoints.REQUEST_DB result_json = self.add_std_info(result_json) # Answer to PUT HTTP request if flask.request.method == 'GET': try: # Request export of the database and save it as a json graph self.db_utils = db_utils.DBUtilities( db_access_decode=self.database_worker.storage_db_decode, db_access_no_decode=self.database_worker. storage_db_no_decode) graph = self.db_utils.get_storage_graph() graph_dict = graph.export_as_dict() # Save to file json_import_export.save_json( graph_dict, get_homedir() / "export_folder" / "db_graphe.json") result_json["Status"] = "Success" result_json["db"] = graph_dict except Exception as e: self.logger.error(f"Error during GET handling {e}") result_json["Status"] = "Failure" result_json["Error"] = "Error during db exportation to file" else: result_json = self.add_bad_method_info(result_json, good_method_instead="GET") return result_json
def add_and_request_and_dump_pictures(self, image_folder: pathlib.Path) -> List: """ Send pictures of a folder, request all pictures one by one, construct a list of results, revert the mapping to get back pictures names :param image_folder: The folder of images to send :return: The list of results """ self.logger.info( "Automated launch : add pictures from folder, request all, and dump graph / list of results" ) # 1- Send pictures to DB and get id mapping mapping_old_filename_to_new_id, nb_pictures = self.add_many_pictures_and_wait_global( image_folder) # add_many_pictures_and_wait_global # add_many_picture_and_wait_for_each # TODO : To remove , debug only json_import_export.save_json( mapping_old_filename_to_new_id, pathlib.Path(get_homedir() / "mapping_old_filename_to_new_id.json")) # 2 - Get a DB dump list_results, nb_pictures = self.request_many_pictures_and_wait_global( image_folder) # request_many_pictures_and_wait_global # request_many_pictures_and_wait_for_each # TODO : To remove , debug only json_import_export.save_json( list_results, pathlib.Path(get_homedir() / "list_result_before_reversion.json")) list_results = dict_utilities.apply_revert_mapping( list_results, mapping_old_filename_to_new_id) # TODO : do it with graphes ? graphe_struct.replace_id_from_mapping(mapping) # TODO : To remove , debug only json_import_export.save_json( list_results, pathlib.Path(get_homedir() / "list_result_after_reversion.json")) # We guarantee that each request is not None and each request has a request_id clean_list_results = [ r for r in list_results if r is not None and r.get("request_id", None) is not None ] if len(list_results) != len(clean_list_results): self.logger.critical( f"Errors during results fetching. {abs(len(list_results) - len(clean_list_results))} elements were null or without request id." ) return clean_list_results
def export_graph(self, output_folder: pathlib.Path, global_mapping): db_dump = self.ext_api.get_db_dump_as_graph() db_dump_dict = db_dump.export_as_dict() save_path_json = output_folder / "original_storage_graph_dump.json" json_import_export.save_json(db_dump_dict, save_path_json) # Full of new ids save_path_json = output_folder / "global_mapping.json" json_import_export.save_json(global_mapping, save_path_json) # old name -> new id db_dump_dict = dict_utilities.apply_revert_mapping( db_dump_dict, global_mapping) # db_dump.replace_id_from_mapping(mapping) db_dump_dict = dict_utilities.copy_id_to_image(db_dump_dict) save_path_json = output_folder / "modified_storage_graph_dump.json" json_import_export.save_json(db_dump_dict, save_path_json)
def get_scalability_list( self, list_boxes_sizes: List[int], pictures_set: Set[ pathlib.Path], # pics_to_evaluate: Set[pathlib.Path], dist_conf: dec.Default_distance_engine_conf = dec. Default_distance_engine_conf(), fe_conf: fec.Default_feature_extractor_conf = fec. Default_feature_extractor_conf(), output_folder: pathlib.Path = None): # ==== Upload pictures + Make requests ==== scalability_data = ScalabilityData() db_conf = test_database_only_conf.TestInstance_database_conf( ) # For test sockets only # Launch a modified server self.logger.debug( f"Creation of a full instance of redis (Test only) ... ") test_db_handler = test_database_handler.TestInstanceLauncher() test_db_handler.create_full_instance(db_conf=db_conf, dist_conf=dist_conf, fe_conf=fe_conf) # Get direct access to DB to retrieve statistics db_access_no_decode = redis.Redis( unix_socket_path=test_db_handler.db_handler.get_socket_path( 'test'), decode_responses=False) db_access_decode = redis.Redis( unix_socket_path=test_db_handler.db_handler.get_socket_path( 'test'), decode_responses=True) db_utils = DBUtilities(db_access_decode=db_access_decode, db_access_no_decode=db_access_no_decode) nb_picture_total_in_db = 0 global_mapping = {} # For each box for i, curr_box_size in enumerate(list_boxes_sizes): # Get a list of pictures to send pictures_set, pics_to_request = self.biner( pictures_set, self.scalability_conf.NB_PICS_TO_REQUEST) pictures_set, pics_to_store = self.biner(pictures_set, curr_box_size) self.logger.info( f"Nb of pictures left to be uploaded later : {len(pictures_set)}" ) self.logger.info( f"Nb of pictures to upload (adding) : {len(pics_to_store)}") # If we are not out of pictures to send if len(pics_to_store) != 0: # Evaluate time for this database size and store it tmp_scal_datastruct, mapping, request_list = self.evaluate_scalability_lists( list_pictures_eval=pics_to_request, # pics_to_evaluate, list_picture_to_up=pics_to_store, tmp_id=i) global_mapping = {**global_mapping, **mapping} # Store few more values # Nb of picture in teh database right now nb_picture_total_in_db += tmp_scal_datastruct.nb_picture_added tmp_scal_datastruct.nb_picture_total_in_db = db_utils.get_nb_stored_pictures( ) # Nb of pictures sent at the beginning to be added tmp_scal_datastruct.nb_picture_tried_to_be_added = len( pics_to_store) tmp_scal_datastruct.nb_picture_tried_to_be_requested = len( pics_to_request) # Nb of cluster and their content tmp_scal_datastruct.nb_clusters_in_db = len( db_utils.get_cluster_list()) tmp_scal_datastruct.clusters_sizes = db_utils.get_list_cluster_sizes( ) # Print error if tmp_scal_datastruct.nb_picture_total_in_db != nb_picture_total_in_db: self.logger.error( f"Error in scalability evaluator, number of picture really in DB and computed as should being in DB are differents : {tmp_scal_datastruct.nb_picture_total_in_db} {nb_picture_total_in_db}" ) scalability_data.list_request_time.append(tmp_scal_datastruct) if output_folder is not None: save_path_json = output_folder / ("global_mapping" + str(i) + ".json") json_import_export.save_json(request_list, save_path_json) save_path_json = output_folder / ("mapping" + str(i) + ".json") json_import_export.save_json(mapping, save_path_json) # Export graph if output_folder is not None: self.export_graph(output_folder, global_mapping) ''' db_dump = self.ext_api.get_db_dump_as_graph() db_dump_dict = db_dump.export_as_dict() save_path_json = output_folder / "original_storage_graph_dump.json" json_import_export.save_json(db_dump_dict, save_path_json) # Full of new ids save_path_json = output_folder / "global_mapping.json" json_import_export.save_json(global_mapping, save_path_json) # old name -> new id db_dump_dict = dict_utilities.apply_revert_mapping(db_dump_dict, global_mapping) # db_dump.replace_id_from_mapping(mapping) db_dump_dict = dict_utilities.copy_id_to_image(db_dump_dict) save_path_json = output_folder / "modified_storage_graph_dump.json" json_import_export.save_json(db_dump_dict, save_path_json) ''' else: self.logger.critical("outputfolder is None ! ") # node server.js -i ./../DATASETS/PHISHING/PHISHING-DATASET-DISTRIBUTED-DEDUPLICATED/ -t ./TMP -o ./TMP -j ./../douglas-quaid/datasets/OUTPUT_EVALUATION/threshold_0.0195/modified_storage_graph_dump.json # Kill server instance self.logger.debug(f"Shutting down Redis test instance") test_db_handler.tearDown() return scalability_data
def export_dicts(self): for i, curr_dict in enumerate(self.dict_to_export): json_io.save_json(curr_dict, self.dst_folder / ("labels_" + str(i) + ".json"))
def get_storage_graph(self, image_folder: pathlib.Path, visjs_json_path: pathlib.Path, output_path: pathlib.Path) -> Dict: """ Extract a storage graph from a folder of pictures, sent to DB and a dump request to the DB. Store all pictures in the server and dump the database as is. :param image_folder: The folder of picture to send and request, to build the storage graph from :param visjs_json_path: :param output_path: The output path where the graph and other data will be stored :return: """ # ========= MANUAL EVALUATION ========= # Load ground truth file gt_graph = graph_datastructure.load_visjs_to_graph(visjs_json_path) # ========= AUTO EVALUATION ========= # Send pictures to DB and get id mapping mapping_old_filename_to_new_id, nb_pictures = self.API.add_many_picture_and_wait_for_each( image_folder) # Get a DB dump db_dump = self.API.get_db_dump_as_graph() # ========= COMPARISON ========= # Apply name mapping to dict (find back original names) gt_graph.replace_id_from_mapping(mapping_old_filename_to_new_id) # Extract the lists of clusters candidate = db_dump.get_clusters() original = gt_graph.get_clusters() # Match clusters # 1. Manually ? (Go back to visjs + rename manually to have the same matching names) # 2. Automatically ? (Match on number of common elements ~, see function) # 2 was chosen. # Groups clusters per pair, by matching original clusters with Candidate clusters matching = self.match_clusters(original, candidate) # Compute from each pair of clusters their Quality score matching_with_perf = ClusterMatchingQualityEvaluator.evaluate_performance( matching, nb_pictures) # Store performance in a file perfs = ClusterMatchingQualityEvaluator.export_as_json( matching_with_perf) save_path_perf = output_path / "perf.json" json_import_export.save_json(perfs, save_path_perf) self.logger.debug(f"Json saved in : {save_path_perf}") # ========= RESULT VISUALIZATON ========= # Convert matching with performance to confusion matrix matrix_creator = ConfusionMatrixGenerator() matrix_creator.create_and_export_confusion_matrix( original, candidate, output_path / "matrix.pdf") # Convert dumped graph to visjs graphe # ==> red if linked made by algo, but non existant + Gray, true link that should have been created ( # ==> Green if linked made by algo and existant output_graph = graph_datastructure.merge_graphs( gt_graph, db_dump, matching) save_path_json = output_path / "merged_graph.json" json_import_export.save_json(output_graph, save_path_json) self.logger.debug(f"DB Dump json saved in : {save_path_json}") # ============================== return perfs