Python GraphDataStruct Examples, common.Graph.graph_datastructure.GraphDataStruct Python Examples

Example #1

0

Show file

File: database_utilities.py Project: loongst/douglas-quaid

    def get_storage_graph(self) -> GraphDataStruct:
        """
        # TODO : Move to API ?
        Export the current state of the database as a graph datastructure. This represents the storage graph of the server.
        :return: The storage graph of the server, as is in the database
        """

        # Create a graphe structure
        tmp_meta = Metadata(Source.DBDUMP)
        tmp_graph = GraphDataStruct(tmp_meta)

        # Get all clusters
        cluster_list = self.get_cluster_list()

        # For each cluster, fetch all pictures and store it
        for cluster_id in cluster_list:
            tmp_graph.add_cluster(Cluster(label="", tmp_id=cluster_id, image=""))

            picture_list = self.get_pictures_of_cluster(cluster_id, with_score=True)
            self.logger.info(f"Picture list : {picture_list}")

            for picture in picture_list:
                # Label = picture score, here
                tmp_graph.add_node(Node(label=picture[1], tmp_id=picture[0], image=""))
                tmp_graph.add_edge(Edge(_from=cluster_id, _to=picture[0]))

        return tmp_graph

Example #2

0

Show file

    def generate_basic_graph_with_mapping(
            VISJS=False) -> (GraphDataStruct, dict):
        mapping = {}

        # Create a graphe structure
        if VISJS:
            tmp_meta = Metadata(Source.VISJS)
        else:
            tmp_meta = Metadata(Source.DBDUMP)
        tmp_graph = GraphDataStruct(tmp_meta)

        # For each cluster, fetch all pictures and store it
        for cluster_id in range(0, 2):
            tmp_graph.add_cluster(
                Cluster(label="", tmp_id=cluster_id, image=""))

            for id in range(0, 3):
                pic_id = str(cluster_id) + "_" + str(id) + "OLD"
                pic_image = str(cluster_id) + "_" + str(id) + "IMAGE"

                # Prepare mapping
                mapping[pic_image] = str(cluster_id) + "_" + str(id) + "NEW"

                # Label = picture score, here
                tmp_graph.add_node(
                    Node(label="picture name +" + pic_id,
                         tmp_id=pic_id,
                         image=pic_image))
                tmp_graph.add_edge(Edge(_from=cluster_id, _to=pic_id))

        return tmp_graph, mapping

Example #3

0

Show file

    def test_graph_import_export_consistency(self):
        """
        # Create a graphe structure
        tmp_meta = Metadata(Source.DBDUMP)
        tmp_graph = GraphDataStruct(tmp_meta)

        # For each cluster, fetch all pictures and store it
        for cluster_id in range(0, 2):
            tmp_graph.add_cluster(Cluster(label="", id=cluster_id, image=""))

            for id in range(0, 3):
                pic_id = str(cluster_id) + "_" + str(id)
                # Label = picture score, here
                tmp_graph.add_node(Node(label="picture name +" + pic_id, id=pic_id, image=""))
                tmp_graph.add_edge(Edge(_from=cluster_id, _to=pic_id))
        """

        tmp_graph = self.generate_basic_graph()

        print("Exported dict : ")
        val = tmp_graph.export_as_dict()
        pprint.pprint(val)
        print("Import graphe : ")
        new_graph = GraphDataStruct.load_from_dict(val)
        pprint.pprint(new_graph)
        print("Exported dict (after import): ")
        new_val = new_graph.export_as_dict()
        pprint.pprint(new_val)

        self.assertDictEqual(val, self.expected_json)
        self.assertDictEqual(val, new_val)

Example #4

0

Show file

    def get_perf_list(
            self,
            list_results: List,
            gt_graph: GraphDataStruct,
            output_folder: pathlib.Path = None) -> List[perf_datastruct.Perf]:
        """
        Extract a list of performance datastructure from a list of results (list_results)
        compared to a ground truth file (gt_graph). Can store provided list and ground truth results if a (output_folder) is given.
        :param list_results: The list of results extracted from server (one result for each node of the graph)
        :param gt_graph: The ground truth file that serves as reference
        :param output_folder: Faculatative output folder to save inputs
        :return: a list of performance datastructure, each having a threshold and a stats datastructure. This means that for each computed threshold, we know the quality of the graph.
        """

        # DEBUG purposes / Display arguments
        self.logger.debug("Received requests results :")
        self.logger.debug(pformat(list_results))
        self.logger.debug("Received ground truth graph :")
        self.logger.debug(pformat(gt_graph.export_as_dict()))

        # TODO : Remove output folder ?
        if output_folder is not None:
            # Saving ground truth graph
            json_import_export.save_json(
                list_results,
                get_homedir() / "requests_result.json")

            # Saving list results
            json_import_export.save_json(gt_graph.export_as_dict(),
                                         get_homedir() / "gt_graph.json")
        else:
            self.logger.debug(
                "List results and ground truth graph can't be saved : no output_folder specified."
            )

        perfs_list = self._compute_perfs_list(list_results, gt_graph)

        return perfs_list

Example #5

0

Show file

    def get_db_dump_as_graph(self) -> GraphDataStruct:
        """
        Ask the server a copy of the database, convert it as graphe and returns it
        :return: A graph datastructure of the server's storage
        """

        # Dump DB as graphe / clusters
        is_success, db = self.export_db_server()

        if is_success:
            print(f"Database fetched successfully.")
            # The upload had been successful
            graphe_struct = GraphDataStruct.load_from_dict(db)
            return graphe_struct
        else:
            raise Exception(f"Error during db dump of {db}")

Example #6

0

Show file

    def generate_basic_graph() -> GraphDataStruct:
        # Create a graphe structure
        tmp_meta = Metadata(Source.DBDUMP)
        tmp_graph = GraphDataStruct(tmp_meta)

        # For each cluster, fetch all pictures and store it
        for cluster_id in range(0, 2):
            tmp_graph.add_cluster(
                Cluster(label="", tmp_id=cluster_id, image=""))

            for id in range(0, 3):
                pic_id = str(cluster_id) + "_" + str(id)
                # Label = picture score, here
                tmp_graph.add_node(
                    Node(label="picture name +" + pic_id,
                         tmp_id=pic_id,
                         image=""))
                tmp_graph.add_edge(Edge(_from=cluster_id, _to=pic_id))

        return tmp_graph

Example #7

0

Show file

    def test_compute_score_for_one_threshold(self):
        # Graph example. Please check documentation for more information
        cal_conf = Default_calibrator_conf()
        quality_evaluator = similarity_graph_quality_evaluator.similarity_graph_quality_evaluator(
            cal_conf)

        requests_results = [
            # 1 to 2 and 3
            {
                "list_pictures": [{
                    "cluster_id": "XXX",
                    "decision": "YES",
                    "distance": 0.1,
                    "image_id": "2"
                }, {
                    "cluster_id": "XXX",
                    "decision": "YES",
                    "distance": 0.6,
                    "image_id": "3"
                }],
                "request_id":
                "1",
                "status":
                "matches_found",
                "request_time":
                0
            },
            {
                "list_pictures": [{
                    "cluster_id": "XXX",
                    "decision": "YES",
                    "distance": 0.3,
                    "image_id": "6"
                }],
                "request_id":
                "2",
                "status":
                "matches_found",
                "request_time":
                0
            },
            {
                "list_pictures": [{
                    "cluster_id": "XXX",
                    "decision": "YES",
                    "distance": 0.2,
                    "image_id": "1"
                }],
                "request_id":
                "3",
                "status":
                "matches_found",
                "request_time":
                0
            },
            {
                "list_pictures": [{
                    "cluster_id": "XXX",
                    "decision": "YES",
                    "distance": 0.4,
                    "image_id": "2"
                }],
                "request_id":
                "4",
                "status":
                "matches_found",
                "request_time":
                0
            },
            {
                "list_pictures": [{
                    "cluster_id": "XXX",
                    "decision": "YES",
                    "distance": 0.6,
                    "image_id": "4"
                }],
                "request_id":
                "5",
                "status":
                "matches_found",
                "request_time":
                0
            },
            {
                "list_pictures": [{
                    "cluster_id": "XXX",
                    "decision": "YES",
                    "distance": 0.7,
                    "image_id": "2"
                }],
                "request_id":
                "6",
                "status":
                "matches_found",
                "request_time":
                0
            }
        ]

        # Create the reference graph
        graph_data_struct = GraphDataStruct(Metadata(Source.DBDUMP))
        graph_data_struct.add_cluster(Cluster(label="A", tmp_id="A",
                                              image="A"))
        graph_data_struct.add_cluster(Cluster(label="B", tmp_id="B",
                                              image="B"))
        graph_data_struct.add_cluster(Cluster(label="C", tmp_id="C",
                                              image="C"))
        graph_data_struct.add_node(Node(label="1", tmp_id="1", image="1"))
        graph_data_struct.add_node(Node(label="2", tmp_id="2", image="2"))
        graph_data_struct.add_node(Node(label="3", tmp_id="3", image="3"))
        graph_data_struct.add_node(Node(label="4", tmp_id="4", image="4"))
        graph_data_struct.add_node(Node(label="5", tmp_id="5", image="5"))
        graph_data_struct.add_node(Node(label="6", tmp_id="6", image="6"))
        graph_data_struct.add_edge(Edge(_from="1", _to="A"))
        graph_data_struct.add_edge(Edge(_from="2", _to="A"))
        graph_data_struct.add_edge(Edge(_from="3", _to="A"))
        graph_data_struct.add_edge(Edge(_from="4", _to="B"))
        graph_data_struct.add_edge(Edge(_from="5", _to="B"))
        graph_data_struct.add_edge(Edge(_from="6", _to="C"))

        pprint.pprint(requests_results)
        pprint.pprint(graph_data_struct.export_as_dict())

        quality_evaluator.cal_conf.NB_TO_CHECK = 1

        dist_threshold = 0
        stats_datastruct = quality_evaluator.compute_score_for_one_threshold(
            requests_results, graph_data_struct, dist_threshold)
        print(stats_datastruct)
        self.assertEqual(stats_datastruct.P, 3)
        self.assertEqual(stats_datastruct.N, 3)
        self.assertAlmostEqual(stats_datastruct.TPR, 0.0, delta=0.05)
        self.assertAlmostEqual(stats_datastruct.TNR, 1.0, delta=0.05)
        self.assertAlmostEqual(stats_datastruct.FPR, 0.0, delta=0.05)
        self.assertAlmostEqual(stats_datastruct.FNR, 1.0, delta=0.05)

        dist_threshold = 0.5
        stats_datastruct = quality_evaluator.compute_score_for_one_threshold(
            requests_results, graph_data_struct, dist_threshold)
        print(stats_datastruct)
        self.assertEqual(stats_datastruct.P, 3)
        self.assertEqual(stats_datastruct.N, 3)
        self.assertAlmostEqual(stats_datastruct.TPR, 0.66, delta=0.05)
        self.assertAlmostEqual(stats_datastruct.TNR, 0.33, delta=0.05)
        self.assertAlmostEqual(stats_datastruct.FPR, 0.66, delta=0.05)
        self.assertAlmostEqual(stats_datastruct.FNR, 0.33, delta=0.05)

        dist_threshold = 1
        stats_datastruct = quality_evaluator.compute_score_for_one_threshold(
            requests_results, graph_data_struct, dist_threshold)
        print(stats_datastruct)
        self.assertEqual(stats_datastruct.P, 3)
        self.assertEqual(stats_datastruct.N, 3)
        self.assertAlmostEqual(stats_datastruct.TPR, 1.0, delta=0.05)
        self.assertAlmostEqual(stats_datastruct.TNR, 0.0, delta=0.05)
        self.assertAlmostEqual(stats_datastruct.FPR, 1.0, delta=0.05)
        self.assertAlmostEqual(stats_datastruct.FNR, 0.0, delta=0.05)

        quality_evaluator.cal_conf.NB_TO_CHECK = 3

        dist_threshold = 0.5
        stats_datastruct = quality_evaluator.compute_score_for_one_threshold(
            requests_results, graph_data_struct, dist_threshold)
        print(stats_datastruct)
        self.assertEqual(stats_datastruct.P, 4)
        self.assertEqual(stats_datastruct.N, 3)
        self.assertAlmostEqual(stats_datastruct.TPR, 0.5, delta=0.05)
        self.assertAlmostEqual(stats_datastruct.TNR, 0.33, delta=0.05)
        self.assertAlmostEqual(stats_datastruct.FPR, 0.66, delta=0.05)
        self.assertAlmostEqual(stats_datastruct.FNR, 0.5, delta=0.05)

Example #8

0

Show file

File: similarity_graph_extractor.py Project: loongst/douglas-quaid

    def results_list_to_graph(requests_list, nb_match: int = 1, yes_maybe_no_mode: bool = False) -> GraphDataStruct:
        """
        Construct a graph from results list of requests on the database
        Hypothesis : All database pictures are requested pictures
        Edges are colored : from green to red depending on the match index (Best is green)
        :param requests_list: a List of results extracted from server
        :param nb_match: Number of matches per pictures to add to the graph (1=first level match/best match, 2 = 2 best match per picture, etc.)
        :return: A graph datastructure
        """
        logger = logging.getLogger(__name__)
        # logger.debug(f"Received request_list : {pformat(requests_list)}")

        graph = GraphDataStruct(meta=Metadata(source=Source.DBDUMP))

        # Color Managemement
        # FF0000 = red
        # 00FF00 = green
        short_color_list = ["#00FF00", "#887700", "#CC3300", "#FF0000"]
        color_list = ["#00FF00", "#11EE00", "#22DD00", "#33CC00", "#44BB00", "#55AA00",
                      "#669900", "#778800", "#887700", "#996600", "#AA5500", "#BB4400",
                      "#CC3300", "#DD2200", "#EE1100", "#FF0000"]

        if nb_match < 4:
            # We only have 4 colors if we don't want that much matches.
            # This way, first match is green, second orange, third red, etc.
            color_list = short_color_list
        # TODO : Print all YES match

        # For each request
        for curr_req_1 in requests_list:
            # logger.debug(f"Curent request : {pformat(curr_req_1)}")
            req_id = curr_req_1.get("request_id", None)

            # Requested picture => add a node
            graph.add_node(Node(label=req_id, tmp_id=req_id, image=req_id))

        # For each request
        for curr_req_2 in requests_list:

            # We remove the picture "itself" from the matches
            tmp_clean_matches = get_clear_matches(curr_req_2)

            req_id = curr_req_2.get("request_id", None)

            # Add edge for each best pictures
            for i in range(min(nb_match, len(tmp_clean_matches))):

                dist = round(tmp_clean_matches[i].get("distance", None), 4)
                deci = tmp_clean_matches[i].get("decision", "UNKNOWN")
                dest_id = tmp_clean_matches[i].get("image_id", None)

                if dist is None:
                    logger.error(f"Problem with request, no distance: {pformat(curr_req_2)}")
                    continue
                if dest_id is None:
                    logger.error(f"Problem with request, no match's image id: {pformat(curr_req_2)}")
                    continue

                if yes_maybe_no_mode:
                    # set threshold depending on Yes/Maybe/No in VisJS
                    # By creatin a fictive distance, depending on the decision
                    fictive_dist = scoring_datastrutures.DecisionTypes.get_fictive_dist(deci)

                    # Add a fictive edge
                    graph.add_edge(Edge(_from=req_id,
                                        _to=dest_id,
                                        color={"color": color_list[i % len(color_list)]},
                                        label=deci + ":" + str(dist),
                                        value=fictive_dist))
                else:
                    graph.add_edge(Edge(_from=req_id,
                                        _to=dest_id,
                                        color={"color": color_list[i % len(color_list)]},
                                        label=deci + ":" + str(dist),
                                        value=dist))

        return graph

Example #9

0

Show file

    def compute_score_for_one_threshold(
            self, list_results: List, gt_graph: GraphDataStruct,
            dist_threshold: float) -> stats_datastruct.Stats_datastruct:
        """
        Compute stats about the quality of a result (requests_result), given a specific threshold (dist_threshold)
        and compared to a ground truth graph (gt_graph)
        :param list_results: Result of a similarity request to server
        :param gt_graph: Ground truth file to provide to compute if matches are good or not
        :param dist_threshold: threshold to apply to the results to compare to ground truth graph
        :return: stats about the quality of a result
        """

        # Create ready to go (with 0 valued) score object
        tmp_score = stats_datastruct.Stats_datastruct()
        tmp_score.reset_basics_values()

        # TODO : Construct good datastructure to perform the matching
        # Sort cand_graph to mapping [node.id] -> [node.id sorted by distance increasing]

        # For each node and its neighbourhood (by distance)
        for curr_result in list_results:

            # Check if node is correctly formatted
            if self.is_correct(curr_result):

                # Remove its own occurence from the list if presents.
                matches_list = dict_utilities.get_clear_matches(curr_result)

                # For all N first matches of the current picture (or below if less matches)
                nb_matches_to_process = min(self.cal_conf.NB_TO_CHECK,
                                            len(matches_list))

                for i in range(0, nb_matches_to_process):
                    # fetch the match to process
                    curr_matched_node = matches_list[i]

                    # Please note :
                    # If the two nodes are in the same cluster in gt, then it should be a positive value.
                    # Then this link is counted as a positive value in the entire dataset.
                    # The distance and threshold DOES NOT IMPACT the Positive/Negative counts !

                    if curr_matched_node.get("distance") <= dist_threshold:
                        # Even if it's request_id, it the current name of the file.
                        if gt_graph.are_names_in_same_cluster(
                                curr_result.get("request_id"),
                                curr_matched_node.get("image_id")):
                            tmp_score.TP += 1  # Match but good
                            tmp_score.P += 1  # Should be good

                        else:
                            tmp_score.FP += 1  # No match but not good
                            tmp_score.N += 1  # Should be not good

                    elif curr_matched_node.get("distance") > dist_threshold:

                        # Even if it's request_id, it the current name of the file.
                        if gt_graph.are_names_in_same_cluster(
                                curr_result.get("request_id"),
                                curr_matched_node.get("image_id")):
                            tmp_score.FN += 1  # No match but not good
                            tmp_score.P += 1  # Should be good

                        else:
                            tmp_score.TN += 1  # No match but good
                            tmp_score.N += 1  # Should be not good

            else:
                cluster = gt_graph.get_clusters_of(
                    curr_result.get("request_id"))

                if cluster is None or len(cluster.members) <= 1:
                    # this picture has no cluster OR Only one element in the cluster,
                    # so it's the node = Good if no match
                    tmp_score.TN += 1  # No match but good
                    tmp_score.N += 1  # Should be not good
                else:
                    # No matches, but not alone in the cluster, so should have been one.
                    tmp_score.FN += 1  # No match but not good
                    tmp_score.P += 1  # Should be good

            tmp_score.total_nb_elements = tmp_score.P + tmp_score.N
            tmp_score.compute_in_good_order()

        return tmp_score