def get_incremental_h1_for(model_name, dataset_name): entries = performances.read_filtered_ranks_entries_for( model_name, dataset_name) source_peers_interval_2_ranks = defaultdict(lambda: []) for entry in entries: peer_class = test_fact_2_peer_class[entry['head'] + ";" + entry['relation'] + ";" + entry['tail']] head_peers_class, tail_peers_class = peer_class.split("__") source_peers_interval_2_ranks[tail_peers_class].append( entry['head_rank_filtered']) source_peers_interval_2_ranks[head_peers_class].append( entry['tail_rank_filtered']) source_peers_interval_2_incremental_ranks = dict() for key in keys: source_peers_interval_2_incremental_ranks[ key] = source_peers_interval_2_ranks[key].copy() for i in range(len(keys)): key = keys[i] for j in range(i + 1, len(keys)): cur_key = keys[j] source_peers_interval_2_incremental_ranks[ cur_key] += source_peers_interval_2_ranks[key] source_peers_interval_2_incremental_hits1 = dict() for i in range(len(keys)): ranks = np.array(source_peers_interval_2_incremental_ranks[keys[i]]) hits_1_count = np.sum(ranks == 1.0) source_peers_interval_2_incremental_hits1[ keys[i]] = float(hits_1_count) / float(len(entries) * 2) return source_peers_interval_2_incremental_hits1
def get_mrr_by_bucket(dataset, model_name, fact_2_arity, arity_bucket_2_facts_count): model_entries = read_filtered_ranks_entries_for(model_name, dataset.name, entity_number=len(dataset.entities)) # === count and print the percentage of hits@1 for each sibling class === bucket_2_ranks = defaultdict(lambda: []) for entry in model_entries: head = entry['head'] relation = entry['relation'] tail = entry['tail'] head_rank_filtered = entry['head_rank_filtered'] tail_rank_filtered = entry['tail_rank_filtered'] fact_arity = fact_2_arity[";".join([head, relation, tail])] fact_arity_bucket = get_bucket_from_arity(fact_arity) bucket_2_ranks[fact_arity_bucket].append(head_rank_filtered) bucket_2_ranks[fact_arity_bucket].append(tail_rank_filtered) mrr_row = [] for bucket in BUCKETS: if arity_bucket_2_facts_count[bucket] > 0: mrr = compute_mrr(bucket_2_ranks[bucket]) mrr = round(mrr, 2) mrr_row.append(mrr) else: mrr_row.append('--') return mrr_row
def get_bucket_2_incremental_mrr(dataset, model_name, fact_2_support): model_entries = performances.read_filtered_ranks_entries_for( model_name, dataset.name, entity_number=len(dataset.entities)) support_bucket_2_head_ranks = defaultdict(lambda: []) support_bucket_2_tail_ranks = defaultdict(lambda: []) for entry in model_entries: head = entry['head'] relation = entry['relation'] tail = entry['tail'] head_rank_filtered = entry['head_rank_filtered'] tail_rank_filtered = entry['tail_rank_filtered'] fact_key = html.unescape(";".join([head, relation, tail])) support_value = fact_2_support[fact_key] support_bucket = get_support_bucket_from_support(support_value) support_bucket_2_head_ranks[support_bucket].append(head_rank_filtered) support_bucket_2_tail_ranks[support_bucket].append(tail_rank_filtered) support_bucket_2_incremental_head_ranks = copy.deepcopy( support_bucket_2_head_ranks) support_bucket_2_incremental_tail_ranks = copy.deepcopy( support_bucket_2_tail_ranks) for i in range(len(BUCKETS)): start_bucket = BUCKETS[i] for j in range(i + 1, len(BUCKETS)): bucket_to_increment = BUCKETS[j] support_bucket_2_incremental_head_ranks[ bucket_to_increment] += support_bucket_2_head_ranks[ start_bucket] support_bucket_2_incremental_tail_ranks[ bucket_to_increment] += support_bucket_2_tail_ranks[ start_bucket] support_bucket_2_incremental_head_mrr = dict() support_bucket_2_incremental_tail_mrr = dict() support_bucket_2_incremental_mrr = dict() for bucket in BUCKETS: support_bucket_2_incremental_head_mrr[bucket] = compute_mrr( support_bucket_2_incremental_head_ranks[bucket]) support_bucket_2_incremental_tail_mrr[bucket] = compute_mrr( support_bucket_2_incremental_tail_ranks[bucket]) support_bucket_2_incremental_mrr[bucket] = ( support_bucket_2_incremental_head_mrr[bucket] + support_bucket_2_incremental_tail_mrr[bucket]) / 2 return support_bucket_2_incremental_head_mrr, support_bucket_2_incremental_head_mrr, support_bucket_2_incremental_mrr
def get_interval_2_incremental_mrr(dataset, model_name): model_entries = performances.read_filtered_ranks_entries_for( model_name, dataset.name, entity_number=len(dataset.entities)) source_peers_interval_2_ranks = defaultdict(lambda: []) target_peers_interval_2_ranks = defaultdict(lambda: []) for entry in model_entries: head, relation, tail, head_rank_filtered, tail_rank_filtered = \ entry['head'], entry['relation'], entry['tail'], entry['head_rank_filtered'], entry['tail_rank_filtered'] fact_key = html.unescape(";".join([head, relation, tail])) peer_class = test_fact_2_peer_class[fact_key] head_peers, tail_peers = peer_class.split("__") source_peers_interval_2_ranks[head_peers].append(tail_rank_filtered) source_peers_interval_2_ranks[tail_peers].append(head_rank_filtered) target_peers_interval_2_ranks[head_peers].append(head_rank_filtered) target_peers_interval_2_ranks[tail_peers].append(tail_rank_filtered) source_peers_interval_2_incremental_ranks = copy.deepcopy( source_peers_interval_2_ranks) target_peers_interval_2_incremental_ranks = copy.deepcopy( target_peers_interval_2_ranks) for i in range(len(keys)): start_key = keys[i] for j in range(i + 1, len(keys)): key_to_increment = keys[j] source_peers_interval_2_incremental_ranks[ key_to_increment] += source_peers_interval_2_ranks[start_key] target_peers_interval_2_incremental_ranks[ key_to_increment] += target_peers_interval_2_ranks[start_key] source_peer_class_2_incremental_mrr = dict() target_peer_class_2_incremental_mrr = dict() for key in keys: source_peer_class_2_incremental_mrr[key] = compute_mrr( source_peers_interval_2_incremental_ranks[key]) target_peer_class_2_incremental_mrr[key] = compute_mrr( target_peers_interval_2_incremental_ranks[key]) return source_peer_class_2_incremental_mrr, target_peer_class_2_incremental_mrr
def get_bucket_2_incremental_h1(dataset, model_name, fact_2_support, rps_bucket_2_incremental_count): model_entries = performances.read_filtered_ranks_entries_for( model_name, dataset.name, entity_number=len(dataset.entities)) RPS_bucket_2_h1 = defaultdict(lambda: 0) for entry in model_entries: head = entry['head'] relation = entry['relation'] tail = entry['tail'] head_rank_filtered = entry['head_rank_filtered'] tail_rank_filtered = entry['tail_rank_filtered'] fact_key = html.unescape(";".join([head, relation, tail])) RPS_value = fact_2_support[fact_key] RPS_bucket = get_support_bucket_from_support(RPS_value) if head_rank_filtered == 1: RPS_bucket_2_h1[RPS_bucket] += 1 if tail_rank_filtered == 1: RPS_bucket_2_h1[RPS_bucket] += 1 support_bucket_2_incremental_h1 = copy.deepcopy(RPS_bucket_2_h1) for i in range(len(BUCKETS)): start_bucket = BUCKETS[i] for j in range(i + 1, len(BUCKETS)): bucket_to_increment = BUCKETS[j] support_bucket_2_incremental_h1[ bucket_to_increment] += RPS_bucket_2_h1[start_bucket] support_bucket_2_incremental_h1_percentage = dict() for bucket in BUCKETS: support_bucket_2_incremental_h1_percentage[ bucket] = support_bucket_2_incremental_h1[bucket] / float( rps_bucket_2_incremental_count[bucket]) return support_bucket_2_incremental_h1_percentage
def get_mrr_by_bucket(dataset_obj, model_name, fact_2_support, support_bucket_2_facts_count): model_entries = read_filtered_ranks_entries_for(model_name, dataset_obj.name, entity_number=len( dataset_obj.entities)) head_ranks = defaultdict(lambda: []) tail_ranks = defaultdict(lambda: []) for entry in model_entries: head = entry['head'] relation = entry['relation'] tail = entry['tail'] head_rank_filtered = entry['head_rank_filtered'] tail_rank_filtered = entry['tail_rank_filtered'] path_support = fact_2_support[";".join([head, relation, tail])] path_support_bucket = get_support_bucket_from_support(path_support) head_ranks[path_support_bucket].append(head_rank_filtered) tail_ranks[path_support_bucket].append(tail_rank_filtered) head_mrr_row = [] tail_mrr_row = [] for bucket in BUCKETS: if support_bucket_2_facts_count[bucket] > 0: head_mrr_row.append(compute_mrr(head_ranks[bucket])) tail_mrr_row.append(compute_mrr(tail_ranks[bucket])) else: head_mrr_row.append("--") tail_mrr_row.append("--") return head_mrr_row, tail_mrr_row
def get_hits_by_bucket(dataset_name, model_name, fact_2_support, support_bucket_2_facts_count): model_entries = read_filtered_ranks_entries_for(model_name, dataset_name) # === count and print the percentage of hits@1 for each sibling class === all_head_hits = 0 head_hits = defaultdict(lambda: 0) all_head_misses = 0 head_misses = defaultdict(lambda: 0) all_tail_hits = 0 tail_hits = defaultdict(lambda: 0) all_tail_misses = 0 tail_misses = defaultdict(lambda: 0) for entry in model_entries: head = entry['head'] relation = entry['relation'] tail = entry['tail'] head_rank_filtered = entry['head_rank_filtered'] tail_rank_filtered = entry['tail_rank_filtered'] path_support = fact_2_support[";".join([head, relation, tail])] path_support_bucket = get_support_bucket_from_support(path_support) if head_rank_filtered == 1: all_head_hits += 1 head_hits[path_support_bucket] += 1 else: all_head_misses += 1 head_misses[path_support_bucket] += 1 if tail_rank_filtered == 1: all_tail_hits += 1 tail_hits[path_support_bucket] += 1 else: all_tail_misses += 1 tail_misses[path_support_bucket] += 1 head_hits_row = [] for bucket in BUCKETS: if support_bucket_2_facts_count[bucket] > 0: hits_1_perc = round( float(head_hits[bucket]) / float(support_bucket_2_facts_count[bucket]), 2) head_hits_row.append(hits_1_perc) else: head_hits_row.append('--') tail_hits_row = [] for bucket in BUCKETS: if support_bucket_2_facts_count[bucket] > 0: hits_1_perc = round( float(tail_hits[bucket]) / float(support_bucket_2_facts_count[bucket]), 2) tail_hits_row.append(str(hits_1_perc) + " ") else: tail_hits_row.append('--') return head_hits_row, tail_hits_row
dtype=np.float) def compute_mrr(ranks): mrr = np.average([1.0 / rank for rank in ranks]) return round(mrr, 2) for i in range(len(all_models_names)): model_name = all_models_names[i] # === count the MRR for each source peers interval === source_peer_interval_2_ranks_list = defaultdict(lambda: []) model_entries = performances.read_filtered_ranks_entries_for( model_name, dataset_name, entity_number=entities_count) for entry in model_entries: head, relation, tail = entry['head'], entry['relation'], entry['tail'] peer_class = test_fact_2_peer_class[";".join([head, relation, tail])] head_peers_interval, tail_peers_interval = peer_class.split("__") head_pred_source_peers_interval = tail_peers_interval tail_pred_source_peers_interval = head_peers_interval source_peer_interval_2_ranks_list[ head_pred_source_peers_interval].append( entry['head_rank_filtered']) source_peer_interval_2_ranks_list[ tail_pred_source_peers_interval].append(
from performances import read_filtered_ranks_entries_for BUCKETS = ((0.0, 0.1), (0.1, 0.2), (0.2, 0.3), (0.3, 0.4), (0.4, 0.5), (0.5, 0.6), (0.6, 0.7), (0.7, 0.8), (0.8, 0.9), (0.9, 1.0)) def get_support_bucket_from_support(support_value): for bucket in BUCKETS: if bucket[0] <= support_value < bucket[1]: return bucket if support_value == 1.0: return 0.9, 1.0 dataset_name = FB15K_237 model_entries = read_filtered_ranks_entries_for(ANYBURL, dataset_name) test_fact_2_path_support = tfidf_support.read(Dataset(dataset_name)) path_support_bucket_2_test_facts = defaultdict(lambda: []) for test_fact in test_fact_2_path_support: support = test_fact_2_path_support[test_fact] support_bucket = get_support_bucket_from_support(support) path_support_bucket_2_test_facts[support_bucket].append(test_fact) all_test_facts_count = float(len(test_fact_2_path_support)) path_support_bucket_2_facts_count = dict() path_support_bucket_2_facts_percentage = dict() for path_support_bucket in BUCKETS: test_facts_count = float( len(path_support_bucket_2_test_facts[path_support_bucket]))
from dataset_analysis.degrees import degree_classes from dataset_analysis.degrees.degree_classes import CLASSES, INTERVALS from datasets import FB15K from io_utils import * from models import SIMPLE, TRANSE, ROTATE, CONVE, RSN, TUCKER, ANYBURL from performances import read_filtered_ranks_entries_for model_entries = read_filtered_ranks_entries_for(ANYBURL, FB15K) test_fact_2_class = degree_classes.read(FB15K, return_fact_2_class=True) overall_all = 0.0 # build a dict that, for each degree class, tells us how many test facts belong to that degree class degree_class_2_overall_counts = dict() for degree_class in CLASSES: degree_class_2_overall_counts[degree_class] = 0.0 for entry in model_entries: head, relation, tail = entry['head'], entry['relation'], entry['tail'] degree_class = test_fact_2_class[";".join([head, relation, tail])] overall_all += 1 degree_class_2_overall_counts[degree_class] += 1 # these will be used to generate the CSV data degree_class_2_head_hits_percentage = dict() degree_class_2_tail_hits_percentage = dict() degree_class_2_head_misses_percentage = dict() degree_class_2_tail_misses_percentage = dict() # compute # - the number of all head hits@1 in the entire test set
def plot_mrr_heatmap_for(model_name, dataset_name, test_fact_2_peer_class): # === count the percentage of hits@1 for each peer class === st_peer_class_2_ranks = defaultdict(lambda: []) model_entries = performances.read_filtered_ranks_entries_for( model_name, dataset_name, entity_number=entities_count) for entry in model_entries: head, relation, tail = entry['head'], entry['relation'], entry['tail'] peer_class = test_fact_2_peer_class[";".join([head, relation, tail])] head_peers, tail_peers = peer_class.split("__") head_prediction_st_peers_class = tail_peers + "__" + head_peers tail_prediction_st_peers_class = head_peers + "__" + tail_peers st_peer_class_2_ranks[head_prediction_st_peers_class].append( entry['head_rank_filtered']) st_peer_class_2_ranks[tail_prediction_st_peers_class].append( entry['tail_rank_filtered']) st_peer_class_2_mrr = dict() for peer_class in PEER_CLASSES: if st_peers_class_2_overall_counts[peer_class] > 0: st_peer_class_2_mrr[peer_class] = compute_mrr( st_peer_class_2_ranks[peer_class]) else: st_peer_class_2_ranks[peer_class] = None mrr_matrix = np.zeros(shape=(len(PEER_INTERVALS), len(PEER_INTERVALS)), dtype=np.float) for i in range(len(PEER_INTERVALS)): source_peer_interval = PEER_INTERVALS[i] source_peer_interval_str = str(source_peer_interval[0]) + "-" + str( source_peer_interval[1]) for j in range(len(PEER_INTERVALS)): target_peer_interval = PEER_INTERVALS[j] target_peer_interval_str = str( target_peer_interval[0]) + "-" + str(target_peer_interval[1]) st_peer_class = source_peer_interval_str + "__" + target_peer_interval_str if st_peer_class in st_peer_class_2_mrr: mrr_matrix[i, j] = st_peer_class_2_mrr[st_peer_class] else: mrr_matrix[i, j] = None sns.heatmap(mrr_matrix, linewidth=0.5, annot=True, square=True, xticklabels=heatmap_x_ticks, yticklabels=heatmap_y_ticks, cmap="coolwarm_r", vmin=0.0, vmax=1.0) plt.xlabel("Target Peers") plt.ylabel("Source Peers") plt.title(model_name + " MRR by number of peers") plt.show()
def get_hits_at_k_by_bucket(k, dataset, model_name, fact_2_arity, arity_bucket_2_facts_count): model_entries = read_filtered_ranks_entries_for(model_name, dataset.name, entity_number=len( dataset.entities)) # === count and print the percentage of hits@1 for each sibling class === all_head_hits = 0 head_hits = defaultdict(lambda: 0) all_head_misses = 0 head_misses = defaultdict(lambda: 0) all_tail_hits = 0 tail_hits = defaultdict(lambda: 0) all_tail_misses = 0 tail_misses = defaultdict(lambda: 0) for entry in model_entries: head = entry['head'] relation = entry['relation'] tail = entry['tail'] head_rank_filtered = entry['head_rank_filtered'] tail_rank_filtered = entry['tail_rank_filtered'] fact_arity = fact_2_arity[";".join([head, relation, tail])] fact_arity_bucket = get_bucket_from_arity(fact_arity) if head_rank_filtered <= k: all_head_hits += 1 head_hits[fact_arity_bucket] += 1 else: all_head_misses += 1 head_misses[fact_arity_bucket] += 1 if tail_rank_filtered <= k: all_tail_hits += 1 tail_hits[fact_arity_bucket] += 1 else: all_tail_misses += 1 tail_misses[fact_arity_bucket] += 1 head_hits_row = [] for bucket in BUCKETS: if arity_bucket_2_facts_count[bucket] > 0: hits_1_perc = round( float(head_hits[bucket]) / float(arity_bucket_2_facts_count[bucket]), 2) head_hits_row.append(hits_1_perc) else: head_hits_row.append('--') tail_hits_row = [] for bucket in BUCKETS: if arity_bucket_2_facts_count[bucket] > 0: hits_1_perc = round( float(tail_hits[bucket]) / float(arity_bucket_2_facts_count[bucket]), 2) tail_hits_row.append(str(hits_1_perc) + " ") else: tail_hits_row.append('--') return head_hits_row, tail_hits_row
support_2_head_avg_rank = dict() support_2_tail_avg_rank = dict() for s in support_2_head_ranks: support_2_head_avg_rank[s] = np.average(support_2_head_ranks[s]) for s in support_2_tail_ranks: support_2_tail_avg_rank[s] = np.average(support_2_tail_ranks[s]) return support_2_head_avg_rank, support_2_tail_avg_rank dataset_name = FB15K models_names = [ROTATE] test_fact_2_support = tfidf_support.read(Dataset(dataset_name)) for model_name in models_names: model_entries = performances.read_filtered_ranks_entries_for( model_name, dataset_name) support_2_head_avg_rank, support_2_tail_avg_rank = process( model_entries, test_fact_2_support) plot_dict(support_2_head_avg_rank, model_name + " support vs avg head rank", "support", "avg head rank of test facts with that support") plot_dict(support_2_tail_avg_rank, model_name + " support vs avg tail rank", "support", "avg head rank of test facts with that support")
head_peers_number_2_tail_ranks[key] = numpy.average( head_peers_number_2_tail_ranks[key]) for key in tail_peers_number_2_head_ranks: tail_peers_number_2_head_ranks[key] = numpy.average( tail_peers_number_2_head_ranks[key]) tail_peers_number_2_tail_ranks[key] = numpy.average( tail_peers_number_2_tail_ranks[key]) return head_peers_number_2_head_ranks, tail_peers_number_2_head_ranks, head_peers_number_2_tail_ranks, tail_peers_number_2_tail_ranks head_prediction_2_peers, tail_prediction_2_peers = peers.read(FB15K) test_triples = Dataset(FB15K).test_triples transE_entries = performances.read_filtered_ranks_entries_for(TRANSE, FB15K) rotatE_entries = performances.read_filtered_ranks_entries_for(ROTATE, FB15K) tuckER_entries = performances.read_filtered_ranks_entries_for(TUCKER, FB15K) convE_entries = performances.read_filtered_ranks_entries_for(CONVE, FB15K) simplE_entries = performances.read_filtered_ranks_entries_for(SIMPLE, FB15K) anyburl_entries = performances.read_filtered_ranks_entries_for(ANYBURL, FB15K) rotatE_h_peers_2_h_rank, rotatE_t_peers_2_h_rank, rotatE_h_peers_2_t_rank, rotatE_t_peers_2_t_rank = analyze( rotatE_entries, head_prediction_2_peers, tail_prediction_2_peers) plot_dict( rotatE_h_peers_2_h_rank, "RotatE head peers vs head mean rank", "head peers", "mean rank of all head predictions of entities with that number of head peers" ) plot_dict(
from dataset_analysis.peers.peer_classes import PEER_CLASSES, PEER_INTERVALS from dataset_analysis.peers import peer_classes from datasets import FB15K, FB15K_237, WN18, WN18RR, YAGO3_10 from io_utils import * from models import ANYBURL, TRANSE, CONVE, TUCKER, ROTATE, SIMPLE, RSN, CONVR, CROSSE from performances import read_filtered_ranks_entries_for dataset_name = WN18RR model_entries = read_filtered_ranks_entries_for(CONVR, dataset_name) test_fact_2_class = peer_classes.read(dataset_name, return_fact_2_class=True) # === count and print the percentage of facts in each peer class === overall_all = 0.0 peers_class_2_overall_counts = dict() peers_class_2_percentage = dict() # initialize the data structure for peer_class in PEER_CLASSES: peers_class_2_overall_counts[peer_class] = 0.0 # count the occurrences for each peer class and overall for entry in model_entries: head, relation, tail = entry['head'], entry['relation'], entry['tail'] peer_class = test_fact_2_class[";".join([head, relation, tail])] overall_all += 1 peers_class_2_overall_counts[peer_class] += 1 # compute the percentage for each peer class for peer_class in peers_class_2_overall_counts: peers_class_2_percentage[peer_class] = float( peers_class_2_overall_counts[peer_class]) / float(overall_all)
def plot_hits1_heatmap_for(model_name, dataset_name, test_fact_2_peer_class): # === count the percentage of hits@1 for each peer class === all_head_hits = 0 peer_class_2_head_hits_count = defaultdict(lambda: 0) all_tail_hits = 0 peer_class_2_tail_hits_count = defaultdict(lambda: 0) model_entries = performances.read_filtered_ranks_entries_for( model_name, dataset_name) for entry in model_entries: head, relation, tail = entry['head'], entry['relation'], entry['tail'] head_rank_filtered = entry['head_rank_filtered'] tail_rank_filtered = entry['tail_rank_filtered'] peer_class = test_fact_2_peer_class[";".join([head, relation, tail])] if head_rank_filtered == 1: all_head_hits += 1 peer_class_2_head_hits_count[peer_class] += 1 if tail_rank_filtered == 1: all_tail_hits += 1 peer_class_2_tail_hits_count[peer_class] += 1 peer_class_2_head_hits_perc = dict() peer_class_2_tail_hits_perc = dict() for peer_class in PEER_CLASSES: if peers_class_2_overall_counts[peer_class] > 0: head_hits_perc = float( peer_class_2_head_hits_count[peer_class]) / float( peers_class_2_overall_counts[peer_class]) head_hits_perc = round(head_hits_perc, 2) tail_hits_perc = float( peer_class_2_tail_hits_count[peer_class]) / float( peers_class_2_overall_counts[peer_class]) tail_hits_perc = round(tail_hits_perc, 2) peer_class_2_head_hits_perc[peer_class] = head_hits_perc peer_class_2_tail_hits_perc[peer_class] = tail_hits_perc else: peer_class_2_head_hits_perc[peer_class] = None peer_class_2_tail_hits_perc[peer_class] = None head_hits_percentages = np.zeros(shape=(len(PEER_INTERVALS), len(PEER_INTERVALS)), dtype=np.float) tail_hits_percentages = np.zeros(shape=(len(PEER_INTERVALS), len(PEER_INTERVALS)), dtype=np.float) for i in range(len(PEER_INTERVALS)): head_peer_interval = PEER_INTERVALS[i] head_peer_interval_str = str(head_peer_interval[0]) + "-" + str( head_peer_interval[1]) for j in range(len(PEER_INTERVALS)): tail_peer_interval = PEER_INTERVALS[j] tail_peer_interval_str = str(tail_peer_interval[0]) + "-" + str( tail_peer_interval[1]) peer_class = head_peer_interval_str + "__" + tail_peer_interval_str head_hits_percentages[i, j] = peer_class_2_head_hits_perc[peer_class] tail_hits_percentages[i, j] = peer_class_2_tail_hits_perc[peer_class] sns.heatmap(head_hits_percentages, linewidth=0.5, annot=True, xticklabels=heatmap_x_ticks, yticklabels=heatmap_y_ticks, cmap="coolwarm_r") plt.show() sns.heatmap(tail_hits_percentages, linewidth=0.5, annot=True, xticklabels=heatmap_x_ticks, yticklabels=heatmap_y_ticks, cmap="coolwarm_r") plt.show()
hits_3_perc = float(hits_3) * 100 / len(all_ranks) hits_5_perc = float(hits_5) * 100 / len(all_ranks) hits_10_perc = float(hits_10) * 100 / len(all_ranks) print("Mean Rank:\t\t\t\t\t%f" % mean_rank) print("Mean Reciprocal Rank: \t\t%f%%" % mean_reciprocal_rank) print("Hits@1:\t\t\t\t\t\t%f%%" % hits_1_perc) print("Hits@3:\t\t\t\t\t\t%f%%" % hits_3_perc) print("Hits@5:\t\t\t\t\t\t%f%%" % hits_5_perc) print("Hits@10:\t\t\t\t\t%s" % hits_10_perc) print( str(round(hits_1_perc, 2)) + " " + str(round(hits_10_perc, 2)) + " " + str(round(mean_reciprocal_rank, 3))) #a, b, c = str(hits_1_perc), str(hits_10_perc), str(mean_reciprocal_rank) #a, b, c = a.replace("0.", "."), b.replace("0.", "."), c.replace("0.", ".") #print(a + " " + b + " " + c) entity_2_in_degree, entity_2_out_degree, entity_2_degree = entity_degrees.read( datasets.FB15K) filtered_ranks_entries_avg = performances.read_filtered_ranks_entries_for( models.RSN, datasets.FB15K, "avg") filtered_ranks_entries_min = performances.read_filtered_ranks_entries_for( models.RSN, datasets.FB15K, "min") for cur_dataset_name in datasets.ALL_DATASET_NAMES: for cur_model_name in models.ALL_MODEL_NAMES: print_metrics_for(cur_model_name, cur_dataset_name) print()
import math from dataset_analysis.paths import tfidf_support from datasets import FB15K, FB15K_237, WN18, WN18RR, YAGO3_10, Dataset from io_utils import * from models import ANYBURL, TRANSE, CONVE, TUCKER, ROTATE, SIMPLE, RSN, CONVR, CROSSE from performances import read_filtered_ranks_entries_for import numpy as np dataset_name = FB15K model_entries = read_filtered_ranks_entries_for(ROTATE, dataset_name) BUCKETS = ((0.0, 0.1), (0.1, 0.2), (0.2, 0.3), (0.3, 0.4), (0.4, 0.5), (0.5, 0.6), (0.6, 0.7), (0.7, 0.8), (0.8, 0.9), (0.9, 1.0)) def get_support_bucket_from_support(support_value): for bucket in BUCKETS: if bucket[0] <= support_value < bucket[1]: return bucket if support_value == 1.0: return 0.9, 1.0 def compute_mrr(ranks): return np.average([1.0 / rank for rank in ranks]) test_fact_2_path_support = tfidf_support.read(Dataset(dataset_name)) path_support_bucket_2_test_facts = defaultdict(lambda: []) for test_fact in test_fact_2_path_support: