Beispiel #1
0
def compute(dataset_name):
    """
    Compute the mappings  < degree class -> facts that belong to that degree class >
    from all the test facts of a dataset, given the dataset name.

    :param dataset_name: the name of the dataset to compute the mappings for
    :return: a dict that associates each degree class to the corresponding test facts
    """

    print(
        "Computing the mappings <degree class -> list of test facts belonging to that degree class> for dataset %s ..."
        % dataset_name)

    degree_class_2_facts = defaultdict(lambda: [])

    # get the mappings entity mid -> degree
    _, _, mid_2_degree = entity_degrees.read(dataset_name)

    # for each test fact, get the head degree and tail degree
    dataset = Dataset(dataset_name)
    for (head, relation, tail) in dataset.test_triples:
        head_degree = mid_2_degree[head]
        tail_degree = mid_2_degree[tail]

        for degree_class in CLASSES:
            if _fact_belongs_to_degree_class(head_degree, tail_degree,
                                             degree_class):
                degree_class_2_facts[degree_class].append(
                    (head, relation, tail))
                break

    return degree_class_2_facts
Beispiel #2
0
def read_filtered_ranks_entries_for(model_name,
                                    dataset_name,
                                    entity_number=None,
                                    tie_policy="average"):
    print("Reading filtered rank entries for " + model_name + " results on " +
          dataset_name + " with tie policy " + tie_policy + "...")

    filepath = filtered_ranks_path(model_name, dataset_name, tie_policy)

    if entity_number is None:
        _, _, entity_2_degree = entity_degrees.read(dataset_name)
        entity_number = len(entity_2_degree)

    entries = []

    with open(filepath) as input_data:
        lines = input_data.readlines()
        for line in lines:

            line = html.unescape(line)
            (head, relation, tail, rank_head_filtered,
             rank_tail_filtered) = line.strip().split(";")

            entry = dict()
            entry["head"] = head
            entry["relation"] = relation
            entry["tail"] = tail

            if rank_head_filtered.startswith("MISS_"):
                if tie_policy == "max":
                    entry["head_rank_filtered"] = float(entity_number)
                elif tie_policy == "average":
                    entry["head_rank_filtered"] = (
                        int(rank_head_filtered.replace("MISS_", "")) +
                        float(entity_number)) / 2
                elif tie_policy == "min":
                    entry["head_rank_filtered"] = float(
                        rank_head_filtered.replace("MISS_", ""))
            else:
                entry["head_rank_filtered"] = float(rank_head_filtered)

            if rank_tail_filtered.startswith("MISS_"):
                if tie_policy == "max":
                    entry["tail_rank_filtered"] = float(entity_number)
                elif tie_policy == "average":
                    entry["tail_rank_filtered"] = (
                        int(rank_tail_filtered.replace("MISS_", "")) +
                        float(entity_number)) / 2
                elif tie_policy == "min":
                    entry["tail_rank_filtered"] = float(
                        rank_tail_filtered.replace("MISS_", ""))
            else:
                entry["tail_rank_filtered"] = float(rank_tail_filtered)

            entries.append(entry)

    return entries
Beispiel #3
0
def get_dicts(dataset_name):
    _, _, entity_2_degree = entity_degrees.read(dataset_name)
    relation_2_mentions = relation_mentions.read(dataset_name)
    dataset = Dataset(dataset_name)

    head_degree_2_amount_of_facts = defaultdict(lambda: 0)
    tail_degree_2_amount_of_facts = defaultdict(lambda: 0)
    relation_mentions_2_amount_of_facts = defaultdict(lambda: 0)

    for (head, relation, tail) in dataset.train_triples:

        head_degree = entity_2_degree[head]
        tail_degree = entity_2_degree[tail]
        rel_mentions = relation_2_mentions[relation]

        head_degree_2_amount_of_facts[head_degree] += 1
        tail_degree_2_amount_of_facts[tail_degree] += 1
        relation_mentions_2_amount_of_facts[rel_mentions] += 1

    return head_degree_2_amount_of_facts, tail_degree_2_amount_of_facts, relation_mentions_2_amount_of_facts
Beispiel #4
0
from io_utils import *
from collections import defaultdict


def plot(x, y, title, xlabel, ylabel):
    plt.scatter(x, y, s=1, color='blue')
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    #plt.xscale('log')
    #plt.yscale('log')
    plt.show()


entity_2_in_degree, entity_2_out_degree, entity_2_degree, = entity_degrees.read(
    FB15K)
relation_2_mentions = relation_mentions.read(FB15K)
relation_2_coarse_class = relation_coarse_classes.read(FB15K,
                                                       return_rel_2_class=True)

# for each entity, compute a dict "type of relation" -> number of times that the entity occurs as head for that type of relation
entity_2_head_counts = defaultdict(lambda: defaultdict(lambda: 0))

# for each entity, compute a dict "type of relation" -> number of times that the entity occurs as tail for that type of relation
entity_2_tail_counts = defaultdict(lambda: defaultdict(lambda: 0))

dataset = Dataset(FB15K)

for (head, relation, tail) in dataset.train_triples:
    entity_2_head_counts[head][relation_2_coarse_class[relation]] += 1
    entity_2_tail_counts[tail][relation_2_coarse_class[relation]] += 1
Beispiel #5
0
        hits_3_perc = float(hits_3) * 100 / len(all_ranks)
        hits_5_perc = float(hits_5) * 100 / len(all_ranks)
        hits_10_perc = float(hits_10) * 100 / len(all_ranks)

        print("Mean Rank:\t\t\t\t\t%f" % mean_rank)
        print("Mean Reciprocal Rank: \t\t%f%%" % mean_reciprocal_rank)
        print("Hits@1:\t\t\t\t\t\t%f%%" % hits_1_perc)
        print("Hits@3:\t\t\t\t\t\t%f%%" % hits_3_perc)
        print("Hits@5:\t\t\t\t\t\t%f%%" % hits_5_perc)
        print("Hits@10:\t\t\t\t\t%s" % hits_10_perc)
        print(
            str(round(hits_1_perc, 2)) + " " + str(round(hits_10_perc, 2)) +
            " " + str(round(mean_reciprocal_rank, 3)))
        #a, b, c = str(hits_1_perc), str(hits_10_perc), str(mean_reciprocal_rank)
        #a, b, c = a.replace("0.", "."), b.replace("0.", "."), c.replace("0.", ".")
        #print(a + " " + b + " " + c)


entity_2_in_degree, entity_2_out_degree, entity_2_degree = entity_degrees.read(
    datasets.FB15K)

filtered_ranks_entries_avg = performances.read_filtered_ranks_entries_for(
    models.RSN, datasets.FB15K, "avg")
filtered_ranks_entries_min = performances.read_filtered_ranks_entries_for(
    models.RSN, datasets.FB15K, "min")

for cur_dataset_name in datasets.ALL_DATASET_NAMES:
    for cur_model_name in models.ALL_MODEL_NAMES:
        print_metrics_for(cur_model_name, cur_dataset_name)
        print()
    relation_mentions_2_mean_rank = dict()

    for item in entity_degree_2_ranks.items():
        if len(item[1]) > 100:
            entity_degree_2_mean_rank[item[0]] = np.average(item[1])
        else:
            entity_degree_2_mean_rank[item[0]] = None

    for item in relation_mentions_2_ranks.items():

        relation_mentions_2_mean_rank[item[0]] = np.average(item[1])

    return entity_degree_2_mean_rank, relation_mentions_2_mean_rank


_, _, entity2degree = entity_degrees.read(FB15K)
relation2mentions = relation_mentions.read(FB15K)

transE_entries = performances.read_filtered_ranks_entries_for(TRANSE, FB15K)
rotatE_entries = performances.read_filtered_ranks_entries_for(ROTATE, FB15K)
convE_entries = performances.read_filtered_ranks_entries_for(CONVE, FB15K)
simplE_entries = performances.read_filtered_ranks_entries_for(SIMPLE, FB15K)
anyburl_entries = performances.read_filtered_ranks_entries_for(ANYBURL, FB15K)

transE_entity_degree_2_rank, transE_relation_mentions_2_rank = get_dicts_from_entries(
    transE_entries, entity2degree, relation2mentions)
rotatE_entity_degree_2_rank, rotatE_relation_mentions_2_rank = get_dicts_from_entries(
    rotatE_entries, entity2degree, relation2mentions)
convE_entity_degree_2_rank, convE_relation_mentions_2_rank = get_dicts_from_entries(
    convE_entries, entity2degree, relation2mentions)
simplE_entity_degree_2_rank, simplE_relation_mentions_2_rank = get_dicts_from_entries(
Beispiel #7
0
            entity_degree_2_hits[tail_degree] += 1.0
            relation_mentions_2_hits[relation_mentions_number] += 1.0

    for key in entity_degree_2_hits:
        entity_degree_2_hits[
            key] = entity_degree_2_hits[key] / entity_degree_2_count[key]
    for key in relation_mentions_2_hits:
        relation_mentions_2_hits[key] = relation_mentions_2_hits[
            key] / relation_mentions_2_count[key]

    return entity_degree_2_hits, relation_mentions_2_hits


dataset_name = FB15K
models_names = [ROTATE]

_, _, entity_2_degree = entity_degrees.read(dataset_name)
relation_2_mentions = relation_mentions.read(dataset_name)

for model_name in models_names:
    model_entries = performances.read_filtered_ranks_entries_for(
        model_name, dataset_name)
    model_entity_degree_2_hits, model_relation_mentions_2_hits = get_dicts_from_entries(
        model_entries, entity_2_degree, relation_2_mentions)

    plot_dict(
        model_entity_degree_2_hits, model_name + " entity degree vs hits@1",
        "degree",
        "percentage of hits@1 on all predictions of entities with that degree")
    # plot_dict(rotatE_relation_mentions_2_hits, "RotatE relation mentions vs mean rank", "relation mentions", "percentage of hits@1 on all predictions of entities with that degree")
    plt.grid(True)

    plt.show()


def distribution_of(name_2_count):
    count_2_amount_of_names = defaultdict(lambda: 0)

    for (name, count) in name_2_count.items():
        count_2_amount_of_names[count] += 1

    return count_2_amount_of_names


_, _, fb15k_entity_degrees = entity_degrees.read(FB15K)
fb15k_relation_mentions = relation_mentions.read(FB15K)

_, _, wn18_entity_degrees = entity_degrees.read(WN18)
wn18_relation_mentions = relation_mentions.read(WN18)

_, _, fb15k237_entity_degrees = entity_degrees.read(FB15K_237)
fb15k237_relation_mentions = relation_mentions.read(FB15K_237)

_, _, wn18rr_entity_degrees = entity_degrees.read(WN18RR)
wn18rr_relation_mentions = relation_mentions.read(WN18RR)

fb15k_entity_degrees_distribution = distribution_of(fb15k_entity_degrees)
fb15k_relation_mentions_distribution = distribution_of(fb15k_relation_mentions)

wn18_entity_degrees_distribution = distribution_of(wn18_entity_degrees)
Beispiel #9
0
def read_filtered_details_entries_for(model_name,
                                      dataset_name,
                                      entity_number=None,
                                      tie_policy="average"):
    print("Reading filtered details entries for " + model_name +
          " results on " + dataset_name + " with tie policy " + tie_policy +
          "...")

    filepath = filtered_details_path(model_name, dataset_name, tie_policy)

    if entity_number is None:
        _, _, entity_2_degree = entity_degrees.read(dataset_name)
        entity_number = len(entity_2_degree)

    fact_2_head_tail_details = defaultdict(lambda: dict())

    with open(filepath) as input_data:
        lines = input_data.readlines()
        for line in lines:

            line = html.unescape(line)
            (head, relation, tail, type, details) = line.strip().split(";", 4)
            details = details[1:-1].split(";")

            key = ";".join([head, relation, tail])
            fact_2_head_tail_details[key][type] = details

    entries = []
    for key in fact_2_head_tail_details:

        entry = dict()

        head, relation, tail = key.split(";")

        entry["head"] = head
        entry["relation"] = relation
        entry["tail"] = tail
        head_details = fact_2_head_tail_details[key]["predict head"]
        tail_details = fact_2_head_tail_details[key]["predict tail"]

        entry["head_details_filtered"] = head_details
        entry["tail_details_filtered"] = tail_details

        if head_details[-1].startswith("MISS_"):
            if tie_policy == "max":
                entry["head_rank_filtered"] = float(entity_number)
            elif tie_policy == "average":
                entry["head_rank_filtered"] = (float(len(head_details)) +
                                               float(entity_number)) / 2
            elif tie_policy == "min":
                entry["head_rank_filtered"] = float(len(head_details))
        else:
            entry["head_rank_filtered"] = float(len(head_details))

        if tail_details[-1].startswith("MISS_"):
            if tie_policy == "max":
                entry["tail_rank_filtered"] = float(entity_number)
            elif tie_policy == "average":
                entry["tail_rank_filtered"] = (float(len(tail_details)) +
                                               float(entity_number)) / 2
            elif tie_policy == "min":
                entry["tail_rank_filtered"] = float(len(tail_details))
        else:
            entry["tail_rank_filtered"] = float(len(tail_details))

        entries.append(entry)
    return entries
import math
from collections import defaultdict

from dataset_analysis.degrees import entity_degrees
from dataset_analysis.degrees import degree_classes
from datasets import FB15K, Dataset

dataset = Dataset(FB15K)

test_fact_2_degree_class = degree_classes.read(FB15K, return_fact_2_class=True)
_, _, mid_2_degree = entity_degrees.read(FB15K)

degree_class_2_count = defaultdict(lambda: 0)
all_count = len(test_fact_2_degree_class)

for test_fact in test_fact_2_degree_class:
    degree_class = test_fact_2_degree_class[test_fact]
    degree_class_2_count[degree_class] += 1

for degree_class in degree_class_2_count:
    perc = 100 * float(degree_class_2_count[degree_class]) / float(all_count)
    perc = round(perc, 2)
    print(degree_class)
    print(str(perc) + "%")
    print()