コード例 #1
0
ファイル: evaluation.py プロジェクト: nle-sztyler/OpenBioLink
 def get_rank_lists(self, params):
     unfiltered_head_ranks = None
     unfiltered_tail_ranks = None
     filtered_head_ranks = None
     filtered_tail_ranks = None
     pos_example, mapped_nodes, nodes_dic, mapped_pos_triples, filtered_setting, unfiltered_setting = params
     (
         unfiltered_corrupted_head,
         unfiltered_corrupted_tail,
         filtered_corrupted_head,
         filtered_corrupted_tail,
     ) = utils.calc_corrupted_triples(
         pos_example=pos_example,
         nodes=mapped_nodes,
         nodes_dic=nodes_dic,
         filtered=filtered_setting,
         pos_examples=mapped_pos_triples,
     )
     if unfiltered_setting:
         unfiltered_head_ranks = self.get_rank_for_corrupted_examples(
             unfiltered_corrupted_head, pos_example)
         unfiltered_tail_ranks = self.get_rank_for_corrupted_examples(
             unfiltered_corrupted_tail, pos_example)
     if filtered_setting:
         filtered_head_ranks = self.get_rank_for_corrupted_examples(
             filtered_corrupted_head, pos_example)
         filtered_tail_ranks = self.get_rank_for_corrupted_examples(
             filtered_corrupted_tail, pos_example)
     return unfiltered_head_ranks, unfiltered_tail_ranks, filtered_head_ranks, filtered_tail_ranks
コード例 #2
0
ファイル: evaluation.py プロジェクト: nle-sztyler/OpenBioLink
    def evaluate_ranked_metrics_1(self,
                                  ks,
                                  metrics,
                                  unfiltered_setting=True,
                                  filtered_setting=False):
        metric_results = {}

        # get corrupted triples
        pos_test_examples = self.test_examples[self.test_examples[
            globConst.VALUE_COL_NAME] == 1]
        pos_test_examples_array = pos_test_examples.values
        nodes_array = self.nodes.values

        mapped_pos_triples, mapped_nodes = self.get_mapped_triples_and_nodes(
            triples=pos_test_examples_array, nodes=nodes_array)

        node_types = np.unique(mapped_nodes[:, 1])
        nodes_dic = {
            nodeType: mapped_nodes[np.where(mapped_nodes[:, 1] == nodeType)][:,
                                                                             0]
            for nodeType in node_types
        }

        filtered_ranks_corrupted_heads = []
        filtered_ranks_corrupted_tails = []
        unfiltered_ranks_corrupted_heads = []
        unfiltered_ranks_corrupted_tails = []

        print("calculating corrupted triples")
        tqdmbuffer = TqdmBuffer() if globConst.GUI_MODE else None
        for pos_example in tqdm(mapped_pos_triples,
                                total=mapped_pos_triples.shape[0],
                                file=tqdmbuffer):
            (
                unfiltered_corrupted_head,
                unfiltered_corrupted_tail,
                filtered_corrupted_head,
                filtered_corrupted_tail,
            ) = utils.calc_corrupted_triples(
                pos_example=pos_example,
                nodes=mapped_nodes,
                nodes_dic=nodes_dic,
                filtered=filtered_setting,
                pos_examples=mapped_pos_triples,
            )
            if unfiltered_setting:
                unfiltered_ranks_corrupted_heads.append(
                    self.get_rank_for_corrupted_examples(
                        unfiltered_corrupted_head, pos_example))
                unfiltered_ranks_corrupted_tails.append(
                    self.get_rank_for_corrupted_examples(
                        unfiltered_corrupted_tail, pos_example))
            if filtered_setting:
                filtered_ranks_corrupted_heads.append(
                    self.get_rank_for_corrupted_examples(
                        filtered_corrupted_head, pos_example))
                filtered_ranks_corrupted_tails.append(
                    self.get_rank_for_corrupted_examples(
                        filtered_corrupted_tail, pos_example))

        filtered_num_examples = len(filtered_ranks_corrupted_heads)
        unfiltered_num_examples = len(unfiltered_ranks_corrupted_heads)

        # HITS@K
        if RankMetricType.HITS_AT_K in metrics:
            metric_results[
                RankMetricType.HITS_AT_K] = self.calculate_hits_at_k(
                    ks=ks,
                    ranks_corrupted_heads=filtered_ranks_corrupted_heads,
                    ranks_corrupted_tails=filtered_ranks_corrupted_tails,
                    num_examples=filtered_num_examples,
                )
        # HITS@K unfiltered
        if RankMetricType.HITS_AT_K_UNFILTERED in metrics:
            metric_results[
                RankMetricType.
                HITS_AT_K_UNFILTERED] = self.calculate_hits_at_k(
                    ks=ks,
                    ranks_corrupted_heads=unfiltered_ranks_corrupted_heads,
                    ranks_corrupted_tails=unfiltered_ranks_corrupted_tails,
                    num_examples=unfiltered_num_examples,
                )
        # MRR
        if RankMetricType.MRR in metrics:
            metric_results[RankMetricType.MRR] = self.calculate_mrr(
                ranks_corrupted_heads=filtered_ranks_corrupted_heads,
                ranks_corrupted_tails=filtered_ranks_corrupted_tails,
                num_examples=filtered_num_examples,
            )
        # MRR unfiltered
        if RankMetricType.MRR_UNFILTERED in metrics:
            metric_results[RankMetricType.MRR] = self.calculate_mrr(
                ranks_corrupted_heads=unfiltered_ranks_corrupted_heads,
                ranks_corrupted_tails=unfiltered_ranks_corrupted_tails,
                num_examples=unfiltered_num_examples,
            )
        return metric_results
コード例 #3
0
ファイル: evaluation.py プロジェクト: nle-sztyler/OpenBioLink
    def evaluate_ranked_metrics_3(self,
                                  ks,
                                  metrics,
                                  unfiltered_setting=True,
                                  filtered_setting=False):
        metric_results = {}
        k_raw_corrupted_head = []
        for _ in ks:
            k_raw_corrupted_head.append([])
        k_raw_corrupted_tail = []
        for _ in ks:
            k_raw_corrupted_tail.append([])

        # get corrupted triples
        pos_test_examples = self.test_examples[self.test_examples[
            globConst.VALUE_COL_NAME] == 1]
        pos_test_examples_array = pos_test_examples.values
        nodes_array = self.nodes.values

        mapped_pos_triples, mapped_nodes = self.get_mapped_triples_and_nodes(
            triples=pos_test_examples_array, nodes=nodes_array)
        nodeTypes = np.unique(mapped_nodes[:, 1])
        nodes_dic = {
            nodeType:
            np.unique(mapped_nodes[np.where(mapped_nodes[:,
                                                         1] == nodeType)][:,
                                                                          0])
            for nodeType in nodeTypes
        }
        head_tuples = mapped_pos_triples[:, 0:2]
        head_tuples = np.unique(head_tuples, axis=0)
        tail_tuples = mapped_pos_triples[:, 1:3]
        tail_tuples = np.unique(tail_tuples, axis=0)
        head_ranks = []
        # corrupting tail
        tqdmbuffer = TqdmBuffer() if globConst.GUI_MODE else None
        for head, relation in tqdm(head_tuples, file=tqdmbuffer):
            data = mapped_pos_triples[np.where(
                (mapped_pos_triples[:, 0] == head) *
                (mapped_pos_triples[:, 1] == relation))]

            ranked_pos_examples, _ = self.model.get_ranked_and_sorted_predictions(
                data)
            _, corrupted_examples, _, _ = utils.calc_corrupted_triples(
                pos_example=data[0],
                nodes=mapped_nodes,
                nodes_dic=nodes_dic,
                filtered=False,
                pos_examples=mapped_pos_triples,
            )
            all_examples = np.unique(np.row_stack(
                (corrupted_examples, np.column_stack(
                    (data, [0] * len(data))))),
                                     axis=0)  # todo VERY WRONG!
            ranked_all_examples, _ = self.model.get_ranked_and_sorted_predictions(
                all_examples)
            increase_search_frame_by = [0] * len(ks)
            for example in ranked_pos_examples:
                search_data = ranked_all_examples[
                    0:ks[-1] + 1, :]  # fixme this should be more?
                for i, k in enumerate(ks):
                    current_k = k + increase_search_frame_by[i]
                    current_k = min(current_k, len(search_data))
                    index = np.where(search_data[:, 2] == example[2])[0]
                    if index <= current_k:
                        k_raw_corrupted_tail[i].append(1)
                        increase_search_frame_by[i] += 1
                    else:
                        k_raw_corrupted_tail[i].append(0)

        # corrupting head
        tqdmbuffer = TqdmBuffer() if globConst.GUI_MODE else None
        for relation, tail in tqdm(tail_tuples, file=tqdmbuffer):
            data = mapped_pos_triples[np.where(
                (mapped_pos_triples[:, 1] == relation) *
                (mapped_pos_triples[:, 2] == tail))]

            ranked_pos_examples, _ = self.model.get_ranked_and_sorted_predictions(
                data)
            corrupted_examples, _, _, _ = utils.calc_corrupted_triples(
                pos_example=data[0],
                nodes=mapped_nodes,
                nodes_dic=nodes_dic,
                filtered=False,
                pos_examples=mapped_pos_triples,
            )
            all_examples = np.unique(np.row_stack(
                (corrupted_examples, np.column_stack(
                    (data, [0] * len(data))))),
                                     axis=0)  # todo VERY WRONG!
            ranked_all_examples, _ = self.model.get_ranked_and_sorted_predictions(
                all_examples)
            increase_search_frame_by = [0] * len(ks)
            for example in ranked_pos_examples:
                search_data = ranked_all_examples[0:ks[-1] + 1, :]
                for i, k in enumerate(ks):
                    current_k = k + increase_search_frame_by[i]
                    current_k = min(current_k, len(search_data))
                    index = np.where(search_data[:, 0] == example[0])[0] + 1
                    if index <= current_k:
                        k_raw_corrupted_head[i].append(1)
                        increase_search_frame_by[i] += 1
                    else:
                        k_raw_corrupted_head[i].append(0)
        k_results_corrupted_head = []
        for i, k in enumerate(ks):
            k_results_corrupted_head.append(
                sum(k_raw_corrupted_head[i]) / len(k_raw_corrupted_head[i]))
        k_results_corrupted_tail = []
        for i, k in enumerate(ks):
            k_results_corrupted_tail.append(
                sum(k_raw_corrupted_tail[i]) / len(k_raw_corrupted_tail[i]))

        metric_results[RankMetricType.HITS_AT_K] = (k_results_corrupted_head,
                                                    k_results_corrupted_tail)
        return metric_results