def get_rank_lists(self, params): unfiltered_head_ranks = None unfiltered_tail_ranks = None filtered_head_ranks = None filtered_tail_ranks = None pos_example, mapped_nodes, nodes_dic, mapped_pos_triples, filtered_setting, unfiltered_setting = params ( unfiltered_corrupted_head, unfiltered_corrupted_tail, filtered_corrupted_head, filtered_corrupted_tail, ) = utils.calc_corrupted_triples( pos_example=pos_example, nodes=mapped_nodes, nodes_dic=nodes_dic, filtered=filtered_setting, pos_examples=mapped_pos_triples, ) if unfiltered_setting: unfiltered_head_ranks = self.get_rank_for_corrupted_examples( unfiltered_corrupted_head, pos_example) unfiltered_tail_ranks = self.get_rank_for_corrupted_examples( unfiltered_corrupted_tail, pos_example) if filtered_setting: filtered_head_ranks = self.get_rank_for_corrupted_examples( filtered_corrupted_head, pos_example) filtered_tail_ranks = self.get_rank_for_corrupted_examples( filtered_corrupted_tail, pos_example) return unfiltered_head_ranks, unfiltered_tail_ranks, filtered_head_ranks, filtered_tail_ranks
def evaluate_ranked_metrics_1(self, ks, metrics, unfiltered_setting=True, filtered_setting=False): metric_results = {} # get corrupted triples pos_test_examples = self.test_examples[self.test_examples[ globConst.VALUE_COL_NAME] == 1] pos_test_examples_array = pos_test_examples.values nodes_array = self.nodes.values mapped_pos_triples, mapped_nodes = self.get_mapped_triples_and_nodes( triples=pos_test_examples_array, nodes=nodes_array) node_types = np.unique(mapped_nodes[:, 1]) nodes_dic = { nodeType: mapped_nodes[np.where(mapped_nodes[:, 1] == nodeType)][:, 0] for nodeType in node_types } filtered_ranks_corrupted_heads = [] filtered_ranks_corrupted_tails = [] unfiltered_ranks_corrupted_heads = [] unfiltered_ranks_corrupted_tails = [] print("calculating corrupted triples") tqdmbuffer = TqdmBuffer() if globConst.GUI_MODE else None for pos_example in tqdm(mapped_pos_triples, total=mapped_pos_triples.shape[0], file=tqdmbuffer): ( unfiltered_corrupted_head, unfiltered_corrupted_tail, filtered_corrupted_head, filtered_corrupted_tail, ) = utils.calc_corrupted_triples( pos_example=pos_example, nodes=mapped_nodes, nodes_dic=nodes_dic, filtered=filtered_setting, pos_examples=mapped_pos_triples, ) if unfiltered_setting: unfiltered_ranks_corrupted_heads.append( self.get_rank_for_corrupted_examples( unfiltered_corrupted_head, pos_example)) unfiltered_ranks_corrupted_tails.append( self.get_rank_for_corrupted_examples( unfiltered_corrupted_tail, pos_example)) if filtered_setting: filtered_ranks_corrupted_heads.append( self.get_rank_for_corrupted_examples( filtered_corrupted_head, pos_example)) filtered_ranks_corrupted_tails.append( self.get_rank_for_corrupted_examples( filtered_corrupted_tail, pos_example)) filtered_num_examples = len(filtered_ranks_corrupted_heads) unfiltered_num_examples = len(unfiltered_ranks_corrupted_heads) # HITS@K if RankMetricType.HITS_AT_K in metrics: metric_results[ RankMetricType.HITS_AT_K] = self.calculate_hits_at_k( ks=ks, ranks_corrupted_heads=filtered_ranks_corrupted_heads, ranks_corrupted_tails=filtered_ranks_corrupted_tails, num_examples=filtered_num_examples, ) # HITS@K unfiltered if RankMetricType.HITS_AT_K_UNFILTERED in metrics: metric_results[ RankMetricType. HITS_AT_K_UNFILTERED] = self.calculate_hits_at_k( ks=ks, ranks_corrupted_heads=unfiltered_ranks_corrupted_heads, ranks_corrupted_tails=unfiltered_ranks_corrupted_tails, num_examples=unfiltered_num_examples, ) # MRR if RankMetricType.MRR in metrics: metric_results[RankMetricType.MRR] = self.calculate_mrr( ranks_corrupted_heads=filtered_ranks_corrupted_heads, ranks_corrupted_tails=filtered_ranks_corrupted_tails, num_examples=filtered_num_examples, ) # MRR unfiltered if RankMetricType.MRR_UNFILTERED in metrics: metric_results[RankMetricType.MRR] = self.calculate_mrr( ranks_corrupted_heads=unfiltered_ranks_corrupted_heads, ranks_corrupted_tails=unfiltered_ranks_corrupted_tails, num_examples=unfiltered_num_examples, ) return metric_results
def evaluate_ranked_metrics_3(self, ks, metrics, unfiltered_setting=True, filtered_setting=False): metric_results = {} k_raw_corrupted_head = [] for _ in ks: k_raw_corrupted_head.append([]) k_raw_corrupted_tail = [] for _ in ks: k_raw_corrupted_tail.append([]) # get corrupted triples pos_test_examples = self.test_examples[self.test_examples[ globConst.VALUE_COL_NAME] == 1] pos_test_examples_array = pos_test_examples.values nodes_array = self.nodes.values mapped_pos_triples, mapped_nodes = self.get_mapped_triples_and_nodes( triples=pos_test_examples_array, nodes=nodes_array) nodeTypes = np.unique(mapped_nodes[:, 1]) nodes_dic = { nodeType: np.unique(mapped_nodes[np.where(mapped_nodes[:, 1] == nodeType)][:, 0]) for nodeType in nodeTypes } head_tuples = mapped_pos_triples[:, 0:2] head_tuples = np.unique(head_tuples, axis=0) tail_tuples = mapped_pos_triples[:, 1:3] tail_tuples = np.unique(tail_tuples, axis=0) head_ranks = [] # corrupting tail tqdmbuffer = TqdmBuffer() if globConst.GUI_MODE else None for head, relation in tqdm(head_tuples, file=tqdmbuffer): data = mapped_pos_triples[np.where( (mapped_pos_triples[:, 0] == head) * (mapped_pos_triples[:, 1] == relation))] ranked_pos_examples, _ = self.model.get_ranked_and_sorted_predictions( data) _, corrupted_examples, _, _ = utils.calc_corrupted_triples( pos_example=data[0], nodes=mapped_nodes, nodes_dic=nodes_dic, filtered=False, pos_examples=mapped_pos_triples, ) all_examples = np.unique(np.row_stack( (corrupted_examples, np.column_stack( (data, [0] * len(data))))), axis=0) # todo VERY WRONG! ranked_all_examples, _ = self.model.get_ranked_and_sorted_predictions( all_examples) increase_search_frame_by = [0] * len(ks) for example in ranked_pos_examples: search_data = ranked_all_examples[ 0:ks[-1] + 1, :] # fixme this should be more? for i, k in enumerate(ks): current_k = k + increase_search_frame_by[i] current_k = min(current_k, len(search_data)) index = np.where(search_data[:, 2] == example[2])[0] if index <= current_k: k_raw_corrupted_tail[i].append(1) increase_search_frame_by[i] += 1 else: k_raw_corrupted_tail[i].append(0) # corrupting head tqdmbuffer = TqdmBuffer() if globConst.GUI_MODE else None for relation, tail in tqdm(tail_tuples, file=tqdmbuffer): data = mapped_pos_triples[np.where( (mapped_pos_triples[:, 1] == relation) * (mapped_pos_triples[:, 2] == tail))] ranked_pos_examples, _ = self.model.get_ranked_and_sorted_predictions( data) corrupted_examples, _, _, _ = utils.calc_corrupted_triples( pos_example=data[0], nodes=mapped_nodes, nodes_dic=nodes_dic, filtered=False, pos_examples=mapped_pos_triples, ) all_examples = np.unique(np.row_stack( (corrupted_examples, np.column_stack( (data, [0] * len(data))))), axis=0) # todo VERY WRONG! ranked_all_examples, _ = self.model.get_ranked_and_sorted_predictions( all_examples) increase_search_frame_by = [0] * len(ks) for example in ranked_pos_examples: search_data = ranked_all_examples[0:ks[-1] + 1, :] for i, k in enumerate(ks): current_k = k + increase_search_frame_by[i] current_k = min(current_k, len(search_data)) index = np.where(search_data[:, 0] == example[0])[0] + 1 if index <= current_k: k_raw_corrupted_head[i].append(1) increase_search_frame_by[i] += 1 else: k_raw_corrupted_head[i].append(0) k_results_corrupted_head = [] for i, k in enumerate(ks): k_results_corrupted_head.append( sum(k_raw_corrupted_head[i]) / len(k_raw_corrupted_head[i])) k_results_corrupted_tail = [] for i, k in enumerate(ks): k_results_corrupted_tail.append( sum(k_raw_corrupted_tail[i]) / len(k_raw_corrupted_tail[i])) metric_results[RankMetricType.HITS_AT_K] = (k_results_corrupted_head, k_results_corrupted_tail) return metric_results