def get_incremental_h1_for(model_name, dataset_name):
    entries = performances.read_filtered_ranks_entries_for(
        model_name, dataset_name)
    source_peers_interval_2_ranks = defaultdict(lambda: [])
    for entry in entries:
        peer_class = test_fact_2_peer_class[entry['head'] + ";" +
                                            entry['relation'] + ";" +
                                            entry['tail']]
        head_peers_class, tail_peers_class = peer_class.split("__")

        source_peers_interval_2_ranks[tail_peers_class].append(
            entry['head_rank_filtered'])
        source_peers_interval_2_ranks[head_peers_class].append(
            entry['tail_rank_filtered'])

    source_peers_interval_2_incremental_ranks = dict()
    for key in keys:
        source_peers_interval_2_incremental_ranks[
            key] = source_peers_interval_2_ranks[key].copy()

    for i in range(len(keys)):
        key = keys[i]
        for j in range(i + 1, len(keys)):
            cur_key = keys[j]
            source_peers_interval_2_incremental_ranks[
                cur_key] += source_peers_interval_2_ranks[key]

    source_peers_interval_2_incremental_hits1 = dict()
    for i in range(len(keys)):
        ranks = np.array(source_peers_interval_2_incremental_ranks[keys[i]])
        hits_1_count = np.sum(ranks == 1.0)
        source_peers_interval_2_incremental_hits1[
            keys[i]] = float(hits_1_count) / float(len(entries) * 2)

    return source_peers_interval_2_incremental_hits1
Example #2
0
def get_mrr_by_bucket(dataset,
                       model_name,
                       fact_2_arity,
                       arity_bucket_2_facts_count):

    model_entries = read_filtered_ranks_entries_for(model_name, dataset.name, entity_number=len(dataset.entities))

    # === count and print the percentage of hits@1 for each sibling class ===

    bucket_2_ranks = defaultdict(lambda: [])

    for entry in model_entries:

        head = entry['head']
        relation = entry['relation']
        tail = entry['tail']
        head_rank_filtered = entry['head_rank_filtered']
        tail_rank_filtered = entry['tail_rank_filtered']

        fact_arity = fact_2_arity[";".join([head, relation, tail])]
        fact_arity_bucket = get_bucket_from_arity(fact_arity)
        bucket_2_ranks[fact_arity_bucket].append(head_rank_filtered)
        bucket_2_ranks[fact_arity_bucket].append(tail_rank_filtered)


    mrr_row = []
    for bucket in BUCKETS:
        if arity_bucket_2_facts_count[bucket] > 0:
            mrr = compute_mrr(bucket_2_ranks[bucket])
            mrr = round(mrr, 2)
            mrr_row.append(mrr)
        else:
            mrr_row.append('--')

    return mrr_row
def get_bucket_2_incremental_mrr(dataset, model_name, fact_2_support):
    model_entries = performances.read_filtered_ranks_entries_for(
        model_name, dataset.name, entity_number=len(dataset.entities))

    support_bucket_2_head_ranks = defaultdict(lambda: [])
    support_bucket_2_tail_ranks = defaultdict(lambda: [])

    for entry in model_entries:

        head = entry['head']
        relation = entry['relation']
        tail = entry['tail']
        head_rank_filtered = entry['head_rank_filtered']
        tail_rank_filtered = entry['tail_rank_filtered']

        fact_key = html.unescape(";".join([head, relation, tail]))
        support_value = fact_2_support[fact_key]
        support_bucket = get_support_bucket_from_support(support_value)

        support_bucket_2_head_ranks[support_bucket].append(head_rank_filtered)
        support_bucket_2_tail_ranks[support_bucket].append(tail_rank_filtered)

    support_bucket_2_incremental_head_ranks = copy.deepcopy(
        support_bucket_2_head_ranks)
    support_bucket_2_incremental_tail_ranks = copy.deepcopy(
        support_bucket_2_tail_ranks)

    for i in range(len(BUCKETS)):
        start_bucket = BUCKETS[i]
        for j in range(i + 1, len(BUCKETS)):
            bucket_to_increment = BUCKETS[j]
            support_bucket_2_incremental_head_ranks[
                bucket_to_increment] += support_bucket_2_head_ranks[
                    start_bucket]
            support_bucket_2_incremental_tail_ranks[
                bucket_to_increment] += support_bucket_2_tail_ranks[
                    start_bucket]

    support_bucket_2_incremental_head_mrr = dict()
    support_bucket_2_incremental_tail_mrr = dict()
    support_bucket_2_incremental_mrr = dict()

    for bucket in BUCKETS:
        support_bucket_2_incremental_head_mrr[bucket] = compute_mrr(
            support_bucket_2_incremental_head_ranks[bucket])
        support_bucket_2_incremental_tail_mrr[bucket] = compute_mrr(
            support_bucket_2_incremental_tail_ranks[bucket])
        support_bucket_2_incremental_mrr[bucket] = (
            support_bucket_2_incremental_head_mrr[bucket] +
            support_bucket_2_incremental_tail_mrr[bucket]) / 2
    return support_bucket_2_incremental_head_mrr, support_bucket_2_incremental_head_mrr, support_bucket_2_incremental_mrr
Example #4
0
def get_interval_2_incremental_mrr(dataset, model_name):
    model_entries = performances.read_filtered_ranks_entries_for(
        model_name, dataset.name, entity_number=len(dataset.entities))

    source_peers_interval_2_ranks = defaultdict(lambda: [])
    target_peers_interval_2_ranks = defaultdict(lambda: [])

    for entry in model_entries:

        head, relation, tail, head_rank_filtered, tail_rank_filtered = \
            entry['head'], entry['relation'], entry['tail'], entry['head_rank_filtered'], entry['tail_rank_filtered']

        fact_key = html.unescape(";".join([head, relation, tail]))
        peer_class = test_fact_2_peer_class[fact_key]
        head_peers, tail_peers = peer_class.split("__")

        source_peers_interval_2_ranks[head_peers].append(tail_rank_filtered)
        source_peers_interval_2_ranks[tail_peers].append(head_rank_filtered)
        target_peers_interval_2_ranks[head_peers].append(head_rank_filtered)
        target_peers_interval_2_ranks[tail_peers].append(tail_rank_filtered)

    source_peers_interval_2_incremental_ranks = copy.deepcopy(
        source_peers_interval_2_ranks)
    target_peers_interval_2_incremental_ranks = copy.deepcopy(
        target_peers_interval_2_ranks)

    for i in range(len(keys)):
        start_key = keys[i]
        for j in range(i + 1, len(keys)):
            key_to_increment = keys[j]
            source_peers_interval_2_incremental_ranks[
                key_to_increment] += source_peers_interval_2_ranks[start_key]
            target_peers_interval_2_incremental_ranks[
                key_to_increment] += target_peers_interval_2_ranks[start_key]

    source_peer_class_2_incremental_mrr = dict()
    target_peer_class_2_incremental_mrr = dict()

    for key in keys:
        source_peer_class_2_incremental_mrr[key] = compute_mrr(
            source_peers_interval_2_incremental_ranks[key])
        target_peer_class_2_incremental_mrr[key] = compute_mrr(
            target_peers_interval_2_incremental_ranks[key])

    return source_peer_class_2_incremental_mrr, target_peer_class_2_incremental_mrr
def get_bucket_2_incremental_h1(dataset, model_name, fact_2_support,
                                rps_bucket_2_incremental_count):
    model_entries = performances.read_filtered_ranks_entries_for(
        model_name, dataset.name, entity_number=len(dataset.entities))

    RPS_bucket_2_h1 = defaultdict(lambda: 0)

    for entry in model_entries:

        head = entry['head']
        relation = entry['relation']
        tail = entry['tail']
        head_rank_filtered = entry['head_rank_filtered']
        tail_rank_filtered = entry['tail_rank_filtered']

        fact_key = html.unescape(";".join([head, relation, tail]))
        RPS_value = fact_2_support[fact_key]
        RPS_bucket = get_support_bucket_from_support(RPS_value)

        if head_rank_filtered == 1:
            RPS_bucket_2_h1[RPS_bucket] += 1

        if tail_rank_filtered == 1:
            RPS_bucket_2_h1[RPS_bucket] += 1

    support_bucket_2_incremental_h1 = copy.deepcopy(RPS_bucket_2_h1)

    for i in range(len(BUCKETS)):
        start_bucket = BUCKETS[i]
        for j in range(i + 1, len(BUCKETS)):
            bucket_to_increment = BUCKETS[j]
            support_bucket_2_incremental_h1[
                bucket_to_increment] += RPS_bucket_2_h1[start_bucket]

    support_bucket_2_incremental_h1_percentage = dict()

    for bucket in BUCKETS:
        support_bucket_2_incremental_h1_percentage[
            bucket] = support_bucket_2_incremental_h1[bucket] / float(
                rps_bucket_2_incremental_count[bucket])

    return support_bucket_2_incremental_h1_percentage
def get_mrr_by_bucket(dataset_obj, model_name, fact_2_support,
                      support_bucket_2_facts_count):

    model_entries = read_filtered_ranks_entries_for(model_name,
                                                    dataset_obj.name,
                                                    entity_number=len(
                                                        dataset_obj.entities))

    head_ranks = defaultdict(lambda: [])
    tail_ranks = defaultdict(lambda: [])

    for entry in model_entries:

        head = entry['head']
        relation = entry['relation']
        tail = entry['tail']
        head_rank_filtered = entry['head_rank_filtered']
        tail_rank_filtered = entry['tail_rank_filtered']

        path_support = fact_2_support[";".join([head, relation, tail])]
        path_support_bucket = get_support_bucket_from_support(path_support)

        head_ranks[path_support_bucket].append(head_rank_filtered)
        tail_ranks[path_support_bucket].append(tail_rank_filtered)

    head_mrr_row = []
    tail_mrr_row = []

    for bucket in BUCKETS:
        if support_bucket_2_facts_count[bucket] > 0:
            head_mrr_row.append(compute_mrr(head_ranks[bucket]))
            tail_mrr_row.append(compute_mrr(tail_ranks[bucket]))
        else:
            head_mrr_row.append("--")
            tail_mrr_row.append("--")

    return head_mrr_row, tail_mrr_row
Example #7
0
def get_hits_by_bucket(dataset_name, model_name, fact_2_support,
                       support_bucket_2_facts_count):

    model_entries = read_filtered_ranks_entries_for(model_name, dataset_name)

    # === count and print the percentage of hits@1 for each sibling class ===

    all_head_hits = 0
    head_hits = defaultdict(lambda: 0)
    all_head_misses = 0
    head_misses = defaultdict(lambda: 0)

    all_tail_hits = 0
    tail_hits = defaultdict(lambda: 0)
    all_tail_misses = 0
    tail_misses = defaultdict(lambda: 0)

    for entry in model_entries:

        head = entry['head']
        relation = entry['relation']
        tail = entry['tail']
        head_rank_filtered = entry['head_rank_filtered']
        tail_rank_filtered = entry['tail_rank_filtered']

        path_support = fact_2_support[";".join([head, relation, tail])]
        path_support_bucket = get_support_bucket_from_support(path_support)

        if head_rank_filtered == 1:
            all_head_hits += 1
            head_hits[path_support_bucket] += 1
        else:
            all_head_misses += 1
            head_misses[path_support_bucket] += 1

        if tail_rank_filtered == 1:
            all_tail_hits += 1
            tail_hits[path_support_bucket] += 1
        else:
            all_tail_misses += 1
            tail_misses[path_support_bucket] += 1

    head_hits_row = []
    for bucket in BUCKETS:
        if support_bucket_2_facts_count[bucket] > 0:
            hits_1_perc = round(
                float(head_hits[bucket]) /
                float(support_bucket_2_facts_count[bucket]), 2)
            head_hits_row.append(hits_1_perc)
        else:
            head_hits_row.append('--')

    tail_hits_row = []
    for bucket in BUCKETS:
        if support_bucket_2_facts_count[bucket] > 0:
            hits_1_perc = round(
                float(tail_hits[bucket]) /
                float(support_bucket_2_facts_count[bucket]), 2)
            tail_hits_row.append(str(hits_1_perc) + "   ")
        else:
            tail_hits_row.append('--')

    return head_hits_row, tail_hits_row
Example #8
0
                      dtype=np.float)


def compute_mrr(ranks):
    mrr = np.average([1.0 / rank for rank in ranks])
    return round(mrr, 2)


for i in range(len(all_models_names)):
    model_name = all_models_names[i]

    # === count the MRR for each source peers interval ===

    source_peer_interval_2_ranks_list = defaultdict(lambda: [])

    model_entries = performances.read_filtered_ranks_entries_for(
        model_name, dataset_name, entity_number=entities_count)

    for entry in model_entries:
        head, relation, tail = entry['head'], entry['relation'], entry['tail']

        peer_class = test_fact_2_peer_class[";".join([head, relation, tail])]
        head_peers_interval, tail_peers_interval = peer_class.split("__")

        head_pred_source_peers_interval = tail_peers_interval
        tail_pred_source_peers_interval = head_peers_interval

        source_peer_interval_2_ranks_list[
            head_pred_source_peers_interval].append(
                entry['head_rank_filtered'])
        source_peer_interval_2_ranks_list[
            tail_pred_source_peers_interval].append(
from performances import read_filtered_ranks_entries_for

BUCKETS = ((0.0, 0.1), (0.1, 0.2), (0.2, 0.3), (0.3, 0.4), (0.4, 0.5),
           (0.5, 0.6), (0.6, 0.7), (0.7, 0.8), (0.8, 0.9), (0.9, 1.0))


def get_support_bucket_from_support(support_value):
    for bucket in BUCKETS:
        if bucket[0] <= support_value < bucket[1]:
            return bucket
    if support_value == 1.0:
        return 0.9, 1.0


dataset_name = FB15K_237
model_entries = read_filtered_ranks_entries_for(ANYBURL, dataset_name)

test_fact_2_path_support = tfidf_support.read(Dataset(dataset_name))
path_support_bucket_2_test_facts = defaultdict(lambda: [])
for test_fact in test_fact_2_path_support:
    support = test_fact_2_path_support[test_fact]
    support_bucket = get_support_bucket_from_support(support)
    path_support_bucket_2_test_facts[support_bucket].append(test_fact)

all_test_facts_count = float(len(test_fact_2_path_support))
path_support_bucket_2_facts_count = dict()
path_support_bucket_2_facts_percentage = dict()

for path_support_bucket in BUCKETS:
    test_facts_count = float(
        len(path_support_bucket_2_test_facts[path_support_bucket]))
from dataset_analysis.degrees import degree_classes
from dataset_analysis.degrees.degree_classes import CLASSES, INTERVALS
from datasets import FB15K
from io_utils import *
from models import SIMPLE, TRANSE, ROTATE, CONVE, RSN, TUCKER, ANYBURL
from performances import read_filtered_ranks_entries_for

model_entries = read_filtered_ranks_entries_for(ANYBURL, FB15K)

test_fact_2_class = degree_classes.read(FB15K, return_fact_2_class=True)

overall_all = 0.0

# build a dict that, for each degree class, tells us how many test facts belong to that degree class
degree_class_2_overall_counts = dict()
for degree_class in CLASSES:
    degree_class_2_overall_counts[degree_class] = 0.0
for entry in model_entries:
    head, relation, tail = entry['head'], entry['relation'], entry['tail']
    degree_class = test_fact_2_class[";".join([head, relation, tail])]
    overall_all += 1
    degree_class_2_overall_counts[degree_class] += 1

# these will be used to generate the CSV data
degree_class_2_head_hits_percentage = dict()
degree_class_2_tail_hits_percentage = dict()
degree_class_2_head_misses_percentage = dict()
degree_class_2_tail_misses_percentage = dict()

# compute
# - the number of all head hits@1 in the entire test set
Example #11
0
def plot_mrr_heatmap_for(model_name, dataset_name, test_fact_2_peer_class):
    # === count the percentage of hits@1 for each peer class ===

    st_peer_class_2_ranks = defaultdict(lambda: [])

    model_entries = performances.read_filtered_ranks_entries_for(
        model_name, dataset_name, entity_number=entities_count)

    for entry in model_entries:

        head, relation, tail = entry['head'], entry['relation'], entry['tail']

        peer_class = test_fact_2_peer_class[";".join([head, relation, tail])]
        head_peers, tail_peers = peer_class.split("__")

        head_prediction_st_peers_class = tail_peers + "__" + head_peers
        tail_prediction_st_peers_class = head_peers + "__" + tail_peers

        st_peer_class_2_ranks[head_prediction_st_peers_class].append(
            entry['head_rank_filtered'])
        st_peer_class_2_ranks[tail_prediction_st_peers_class].append(
            entry['tail_rank_filtered'])

    st_peer_class_2_mrr = dict()

    for peer_class in PEER_CLASSES:
        if st_peers_class_2_overall_counts[peer_class] > 0:
            st_peer_class_2_mrr[peer_class] = compute_mrr(
                st_peer_class_2_ranks[peer_class])
        else:
            st_peer_class_2_ranks[peer_class] = None

    mrr_matrix = np.zeros(shape=(len(PEER_INTERVALS), len(PEER_INTERVALS)),
                          dtype=np.float)

    for i in range(len(PEER_INTERVALS)):
        source_peer_interval = PEER_INTERVALS[i]
        source_peer_interval_str = str(source_peer_interval[0]) + "-" + str(
            source_peer_interval[1])
        for j in range(len(PEER_INTERVALS)):
            target_peer_interval = PEER_INTERVALS[j]
            target_peer_interval_str = str(
                target_peer_interval[0]) + "-" + str(target_peer_interval[1])

            st_peer_class = source_peer_interval_str + "__" + target_peer_interval_str

            if st_peer_class in st_peer_class_2_mrr:
                mrr_matrix[i, j] = st_peer_class_2_mrr[st_peer_class]
            else:
                mrr_matrix[i, j] = None

    sns.heatmap(mrr_matrix,
                linewidth=0.5,
                annot=True,
                square=True,
                xticklabels=heatmap_x_ticks,
                yticklabels=heatmap_y_ticks,
                cmap="coolwarm_r",
                vmin=0.0,
                vmax=1.0)
    plt.xlabel("Target Peers")
    plt.ylabel("Source Peers")
    plt.title(model_name + " MRR by number of peers")

    plt.show()
def get_hits_at_k_by_bucket(k, dataset, model_name, fact_2_arity,
                            arity_bucket_2_facts_count):

    model_entries = read_filtered_ranks_entries_for(model_name,
                                                    dataset.name,
                                                    entity_number=len(
                                                        dataset.entities))

    # === count and print the percentage of hits@1 for each sibling class ===

    all_head_hits = 0
    head_hits = defaultdict(lambda: 0)
    all_head_misses = 0
    head_misses = defaultdict(lambda: 0)

    all_tail_hits = 0
    tail_hits = defaultdict(lambda: 0)
    all_tail_misses = 0
    tail_misses = defaultdict(lambda: 0)

    for entry in model_entries:

        head = entry['head']
        relation = entry['relation']
        tail = entry['tail']
        head_rank_filtered = entry['head_rank_filtered']
        tail_rank_filtered = entry['tail_rank_filtered']

        fact_arity = fact_2_arity[";".join([head, relation, tail])]
        fact_arity_bucket = get_bucket_from_arity(fact_arity)

        if head_rank_filtered <= k:
            all_head_hits += 1
            head_hits[fact_arity_bucket] += 1
        else:
            all_head_misses += 1
            head_misses[fact_arity_bucket] += 1

        if tail_rank_filtered <= k:
            all_tail_hits += 1
            tail_hits[fact_arity_bucket] += 1
        else:
            all_tail_misses += 1
            tail_misses[fact_arity_bucket] += 1

    head_hits_row = []
    for bucket in BUCKETS:
        if arity_bucket_2_facts_count[bucket] > 0:
            hits_1_perc = round(
                float(head_hits[bucket]) /
                float(arity_bucket_2_facts_count[bucket]), 2)
            head_hits_row.append(hits_1_perc)
        else:
            head_hits_row.append('--')

    tail_hits_row = []
    for bucket in BUCKETS:
        if arity_bucket_2_facts_count[bucket] > 0:
            hits_1_perc = round(
                float(tail_hits[bucket]) /
                float(arity_bucket_2_facts_count[bucket]), 2)
            tail_hits_row.append(str(hits_1_perc) + "   ")
        else:
            tail_hits_row.append('--')

    return head_hits_row, tail_hits_row
    support_2_head_avg_rank = dict()
    support_2_tail_avg_rank = dict()

    for s in support_2_head_ranks:
        support_2_head_avg_rank[s] = np.average(support_2_head_ranks[s])

    for s in support_2_tail_ranks:
        support_2_tail_avg_rank[s] = np.average(support_2_tail_ranks[s])

    return support_2_head_avg_rank, support_2_tail_avg_rank


dataset_name = FB15K
models_names = [ROTATE]

test_fact_2_support = tfidf_support.read(Dataset(dataset_name))
for model_name in models_names:
    model_entries = performances.read_filtered_ranks_entries_for(
        model_name, dataset_name)

    support_2_head_avg_rank, support_2_tail_avg_rank = process(
        model_entries, test_fact_2_support)

    plot_dict(support_2_head_avg_rank,
              model_name + " support vs avg head rank", "support",
              "avg head rank of test facts with that support")
    plot_dict(support_2_tail_avg_rank,
              model_name + " support vs avg tail rank", "support",
              "avg head rank of test facts with that support")
        head_peers_number_2_tail_ranks[key] = numpy.average(
            head_peers_number_2_tail_ranks[key])
    for key in tail_peers_number_2_head_ranks:
        tail_peers_number_2_head_ranks[key] = numpy.average(
            tail_peers_number_2_head_ranks[key])
        tail_peers_number_2_tail_ranks[key] = numpy.average(
            tail_peers_number_2_tail_ranks[key])

    return head_peers_number_2_head_ranks, tail_peers_number_2_head_ranks, head_peers_number_2_tail_ranks, tail_peers_number_2_tail_ranks


head_prediction_2_peers, tail_prediction_2_peers = peers.read(FB15K)

test_triples = Dataset(FB15K).test_triples

transE_entries = performances.read_filtered_ranks_entries_for(TRANSE, FB15K)
rotatE_entries = performances.read_filtered_ranks_entries_for(ROTATE, FB15K)
tuckER_entries = performances.read_filtered_ranks_entries_for(TUCKER, FB15K)
convE_entries = performances.read_filtered_ranks_entries_for(CONVE, FB15K)
simplE_entries = performances.read_filtered_ranks_entries_for(SIMPLE, FB15K)
anyburl_entries = performances.read_filtered_ranks_entries_for(ANYBURL, FB15K)

rotatE_h_peers_2_h_rank, rotatE_t_peers_2_h_rank, rotatE_h_peers_2_t_rank, rotatE_t_peers_2_t_rank = analyze(
    rotatE_entries, head_prediction_2_peers, tail_prediction_2_peers)

plot_dict(
    rotatE_h_peers_2_h_rank, "RotatE head peers vs head mean rank",
    "head peers",
    "mean rank of all head predictions of entities with that number of head peers"
)
plot_dict(
Example #15
0
from dataset_analysis.peers.peer_classes import PEER_CLASSES, PEER_INTERVALS
from dataset_analysis.peers import peer_classes
from datasets import FB15K, FB15K_237, WN18, WN18RR, YAGO3_10
from io_utils import *
from models import ANYBURL, TRANSE, CONVE, TUCKER, ROTATE, SIMPLE, RSN, CONVR, CROSSE
from performances import read_filtered_ranks_entries_for

dataset_name = WN18RR
model_entries = read_filtered_ranks_entries_for(CONVR, dataset_name)

test_fact_2_class = peer_classes.read(dataset_name, return_fact_2_class=True)

# === count and print the percentage of facts in each peer class ===
overall_all = 0.0
peers_class_2_overall_counts = dict()
peers_class_2_percentage = dict()
# initialize the data structure
for peer_class in PEER_CLASSES:
    peers_class_2_overall_counts[peer_class] = 0.0

# count the occurrences for each peer class and overall
for entry in model_entries:
    head, relation, tail = entry['head'], entry['relation'], entry['tail']
    peer_class = test_fact_2_class[";".join([head, relation, tail])]
    overall_all += 1
    peers_class_2_overall_counts[peer_class] += 1
# compute the percentage for each peer class
for peer_class in peers_class_2_overall_counts:
    peers_class_2_percentage[peer_class] = float(
        peers_class_2_overall_counts[peer_class]) / float(overall_all)
Example #16
0
def plot_hits1_heatmap_for(model_name, dataset_name, test_fact_2_peer_class):
    # === count the percentage of hits@1 for each peer class ===

    all_head_hits = 0
    peer_class_2_head_hits_count = defaultdict(lambda: 0)

    all_tail_hits = 0
    peer_class_2_tail_hits_count = defaultdict(lambda: 0)

    model_entries = performances.read_filtered_ranks_entries_for(
        model_name, dataset_name)

    for entry in model_entries:

        head, relation, tail = entry['head'], entry['relation'], entry['tail']

        head_rank_filtered = entry['head_rank_filtered']
        tail_rank_filtered = entry['tail_rank_filtered']

        peer_class = test_fact_2_peer_class[";".join([head, relation, tail])]

        if head_rank_filtered == 1:
            all_head_hits += 1
            peer_class_2_head_hits_count[peer_class] += 1

        if tail_rank_filtered == 1:
            all_tail_hits += 1
            peer_class_2_tail_hits_count[peer_class] += 1

    peer_class_2_head_hits_perc = dict()
    peer_class_2_tail_hits_perc = dict()

    for peer_class in PEER_CLASSES:
        if peers_class_2_overall_counts[peer_class] > 0:
            head_hits_perc = float(
                peer_class_2_head_hits_count[peer_class]) / float(
                    peers_class_2_overall_counts[peer_class])
            head_hits_perc = round(head_hits_perc, 2)

            tail_hits_perc = float(
                peer_class_2_tail_hits_count[peer_class]) / float(
                    peers_class_2_overall_counts[peer_class])
            tail_hits_perc = round(tail_hits_perc, 2)

            peer_class_2_head_hits_perc[peer_class] = head_hits_perc
            peer_class_2_tail_hits_perc[peer_class] = tail_hits_perc
        else:
            peer_class_2_head_hits_perc[peer_class] = None
            peer_class_2_tail_hits_perc[peer_class] = None

    head_hits_percentages = np.zeros(shape=(len(PEER_INTERVALS),
                                            len(PEER_INTERVALS)),
                                     dtype=np.float)
    tail_hits_percentages = np.zeros(shape=(len(PEER_INTERVALS),
                                            len(PEER_INTERVALS)),
                                     dtype=np.float)

    for i in range(len(PEER_INTERVALS)):
        head_peer_interval = PEER_INTERVALS[i]
        head_peer_interval_str = str(head_peer_interval[0]) + "-" + str(
            head_peer_interval[1])
        for j in range(len(PEER_INTERVALS)):
            tail_peer_interval = PEER_INTERVALS[j]
            tail_peer_interval_str = str(tail_peer_interval[0]) + "-" + str(
                tail_peer_interval[1])
            peer_class = head_peer_interval_str + "__" + tail_peer_interval_str
            head_hits_percentages[i,
                                  j] = peer_class_2_head_hits_perc[peer_class]
            tail_hits_percentages[i,
                                  j] = peer_class_2_tail_hits_perc[peer_class]

    sns.heatmap(head_hits_percentages,
                linewidth=0.5,
                annot=True,
                xticklabels=heatmap_x_ticks,
                yticklabels=heatmap_y_ticks,
                cmap="coolwarm_r")
    plt.show()

    sns.heatmap(tail_hits_percentages,
                linewidth=0.5,
                annot=True,
                xticklabels=heatmap_x_ticks,
                yticklabels=heatmap_y_ticks,
                cmap="coolwarm_r")
    plt.show()
Example #17
0
        hits_3_perc = float(hits_3) * 100 / len(all_ranks)
        hits_5_perc = float(hits_5) * 100 / len(all_ranks)
        hits_10_perc = float(hits_10) * 100 / len(all_ranks)

        print("Mean Rank:\t\t\t\t\t%f" % mean_rank)
        print("Mean Reciprocal Rank: \t\t%f%%" % mean_reciprocal_rank)
        print("Hits@1:\t\t\t\t\t\t%f%%" % hits_1_perc)
        print("Hits@3:\t\t\t\t\t\t%f%%" % hits_3_perc)
        print("Hits@5:\t\t\t\t\t\t%f%%" % hits_5_perc)
        print("Hits@10:\t\t\t\t\t%s" % hits_10_perc)
        print(
            str(round(hits_1_perc, 2)) + " " + str(round(hits_10_perc, 2)) +
            " " + str(round(mean_reciprocal_rank, 3)))
        #a, b, c = str(hits_1_perc), str(hits_10_perc), str(mean_reciprocal_rank)
        #a, b, c = a.replace("0.", "."), b.replace("0.", "."), c.replace("0.", ".")
        #print(a + " " + b + " " + c)


entity_2_in_degree, entity_2_out_degree, entity_2_degree = entity_degrees.read(
    datasets.FB15K)

filtered_ranks_entries_avg = performances.read_filtered_ranks_entries_for(
    models.RSN, datasets.FB15K, "avg")
filtered_ranks_entries_min = performances.read_filtered_ranks_entries_for(
    models.RSN, datasets.FB15K, "min")

for cur_dataset_name in datasets.ALL_DATASET_NAMES:
    for cur_model_name in models.ALL_MODEL_NAMES:
        print_metrics_for(cur_model_name, cur_dataset_name)
        print()
import math

from dataset_analysis.paths import tfidf_support
from datasets import FB15K, FB15K_237, WN18, WN18RR, YAGO3_10, Dataset
from io_utils import *
from models import ANYBURL, TRANSE, CONVE, TUCKER, ROTATE, SIMPLE, RSN, CONVR, CROSSE
from performances import read_filtered_ranks_entries_for
import numpy as np

dataset_name = FB15K
model_entries = read_filtered_ranks_entries_for(ROTATE, dataset_name)

BUCKETS = ((0.0, 0.1), (0.1, 0.2), (0.2, 0.3), (0.3, 0.4), (0.4, 0.5),
           (0.5, 0.6), (0.6, 0.7), (0.7, 0.8), (0.8, 0.9), (0.9, 1.0))


def get_support_bucket_from_support(support_value):
    for bucket in BUCKETS:
        if bucket[0] <= support_value < bucket[1]:
            return bucket
    if support_value == 1.0:
        return 0.9, 1.0


def compute_mrr(ranks):
    return np.average([1.0 / rank for rank in ranks])


test_fact_2_path_support = tfidf_support.read(Dataset(dataset_name))
path_support_bucket_2_test_facts = defaultdict(lambda: [])
for test_fact in test_fact_2_path_support: