Beispiel #1
0
    def re_read_context_and_negatives(self):
        log_text(self.log_path,
                 "...... Reading Data for Offline Batch Generation ......")
        for index in range(len(self.names)):
            name = self.names[index]
            self.context_heads[index].clear()
            self.context_head_relations[index].clear()
            self.context_tail_relations[index].clear()
            self.context_tails[index].clear()
            self.read_dict(
                self.context_heads[index],
                load_data(self.output_path + "%s_context_head.pickle" % name,
                          self.log_path, "self.%s_context_head" % name))
            self.read_dict(
                self.context_head_relations[index],
                load_data(
                    self.output_path +
                    "%s_context_head_relation.pickle" % name, self.log_path,
                    "self.%s_context_head_relation" % name))
            self.read_dict(
                self.context_tail_relations[index],
                load_data(
                    self.output_path +
                    "%s_context_tail_relation.pickle" % name, self.log_path,
                    "self.%s_context_tail_relation" % name))
            self.read_dict(
                self.context_tails[index],
                load_data(self.output_path + "%s_context_tail.pickle" % name,
                          self.log_path, "self.%s_context_tail" % name))

            self.negatives[index].clear()
            self.read_dict(
                self.negatives[index],
                load_data(self.output_path + "%s_negatives.pickle" % name,
                          self.log_path, "self.%s_negatives" % name))
Beispiel #2
0
 def input(self):
     self.entity_embeddings.weight.data = load_data(
         self.result_path + "entity_embeddings.pickle", self.log_path,
         "self.entity_embeddings.weight.data")
     self.relation_embeddings.weight.data = load_data(
         self.result_path + "relation_embeddings.pickle", self.log_path,
         "self.relation_embeddings.weight.data")
 def read_data(self):
     self.id_validate_triples = load_data(self.output_path + "id_valid_triples.pickle", self.log_path, "self.id_validate_triples")
     self.id_test_triples = load_data(self.output_path + "id_test_triples.pickle", self.log_path, "self.id_test_triples")
     self.statistics = load_data(self.output_path + "statistics.pickle", self.log_path, "self.statistics")
     self.num_of_entities, self.num_of_relations, self.num_of_validate_triples, self.num_of_test_triples = \
     self.statistics["num_of_entities"], self.statistics["num_of_relations"], self.statistics["num_of_validate_triples"], self.statistics["num_of_test_triples"]
     self.num_of_train_entities = self.statistics["num_of_train_entities"]
     self.train_entities = load_data(self.output_path + "train_entities.pickle", self.log_path, "self.train_entities")
    def result_validation(self):
        log_text(self.log_path,
                 "...... Result of Entity Classification ......")
        for name in self.names:
            log_text(
                self.log_path,
                load_data(self.output_path + "%s_entities.pickle" % name,
                          self.log_path, ""))
            log_text(
                self.log_path,
                load_data(self.output_path + "%s_head_entities.pickle" % name,
                          self.log_path, ""))
            log_text(
                self.log_path,
                load_data(self.output_path + "%s_tail_entities.pickle" % name,
                          self.log_path, ""))
            log_text(
                self.log_path,
                load_data(self.output_path + "%s_both_entities.pickle" % name,
                          self.log_path, ""))

        log_text(self.log_path, "...... Result of Context Sampling ......")
        for name in self.names:
            log_text(
                self.log_path,
                load_data(self.output_path + "%s_context_head.pickle" % name,
                          self.log_path, ""))
            log_text(
                self.log_path,
                load_data(
                    self.output_path +
                    "%s_context_head_relation.pickle" % name, self.log_path,
                    ""))
            log_text(
                self.log_path,
                load_data(
                    self.output_path +
                    "%s_context_tail_relation.pickle" % name, self.log_path,
                    ""))
            log_text(
                self.log_path,
                load_data(self.output_path + "%s_context_tail.pickle" % name,
                          self.log_path, ""))

        log_text(self.log_path, "...... Result of Negative Sampling ......")
        for name in self.names:
            log_text(
                self.log_path,
                load_data(self.output_path + "%s_negatives.pickle" % name,
                          self.output_path, ""))

        log_text(self.log_path, "...... Other Results ......")
        log_text(
            self.log_path,
            load_data(self.output_path + "statistics.pickle", self.log_path,
                      "statistics"))
    def read_data(self):
        for index in range(len(self.names)):
            name = self.names[index]
            self.read_dict(
                self.head_relation_to_tails[index],
                load_data(
                    self.output_path +
                    "%s_head_relation_to_tail.pickle" % name, self.log_path,
                    "self.%s_head_relation_to_tail" % name))
            self.read_dict(
                self.tail_relation_to_heads[index],
                load_data(
                    self.output_path +
                    "%s_tail_relation_to_head.pickle" % name, self.log_path,
                    "self.%s_tail_relation_to_head" % name))
            self.read_dict(
                self.head_context_statistics[index],
                load_data(
                    self.output_path +
                    "%s_head_context_statistics.pickle" % name, self.log_path,
                    "self.%s_head_context_statistics" % name))
            self.read_dict(
                self.tail_context_statistics[index],
                load_data(
                    self.output_path +
                    "%s_tail_context_statistics.pickle" % name, self.log_path,
                    "self.%s_tail_context_statistics" % name))

            self.read_dict(
                self.head_context_heads[index],
                load_data(
                    self.output_path + "%s_head_context_head.pickle" % name,
                    self.log_path, "self.%s_head_context_head" % name))
            self.read_dict(
                self.head_context_relations[index],
                load_data(
                    self.output_path +
                    "%s_head_context_relation.pickle" % name, self.log_path,
                    "self.%s_head_context_relation" % name))
            self.read_dict(
                self.tail_context_relations[index],
                load_data(
                    self.output_path +
                    "%s_tail_context_relation.pickle" % name, self.log_path,
                    "self.%s_tail_context_relation" % name))
            self.read_dict(
                self.tail_context_tails[index],
                load_data(
                    self.output_path + "%s_tail_context_tail.pickle" % name,
                    self.log_path, "self.%s_tail_context_tail" % name))

        self.statistics = load_data(self.output_path + "statistics.pickle",
                                    self.log_path, "self.statistics")
        self.num_of_entities = [
            self.statistics["num_of_train_entities"],
            self.statistics["num_of_validate_entities"],
            self.statistics["num_of_test_entities"]
        ]
    def __init__(self):
        self.dataset = "FB15k"
        self.result_path = "./datasets/%s/result/" % self.dataset
        self.log_path = "./logs/tsne_process_on_%s.log" % self.dataset
        tmp_embeddings = load_data(
            self.result_path + "entity_embeddings.pickle", self.log_path,
            "self.entity_embeddings.weight.data")
        self.num_of_entities = tmp_embeddings.size()[0]
        self.entity_dimension = tmp_embeddings.size()[1]
        self.entity_embeddings = torch.nn.Embedding(self.num_of_entities,
                                                    self.entity_dimension)
        self.entity_embeddings.weight.data = tmp_embeddings

        self.run_funcs()
 def test(self, model):
     train_triple_tensor = load_data(
         self.output_path + "train_triple_tensor.pickle", self.log_path,
         "train_triple_tensor").to(self.device)
     test_dataset = MyDataset(self.num_of_test_triples)
     test_dataloader = DataLoader(test_dataset, self.test_batch_size, False)
     test_result = torch.zeros(4).to(
         self.device
     )  # [mean_rank, hit_n, filtered_mean_rank, filtered_hit_n]
     log_text(self.log_path,
              "number of test triples: %d" % self.num_of_test_triples)
     count = 0
     for test_batch in test_dataloader:
         if count % 1000 == 0:
             print "%d test triples processed" % count
         count += self.test_batch_size
         model.test_calc(
             self.n_of_hit, test_result, train_triple_tensor,
             torch.tensor([
                 self.id_test_triples["id_heads"][index]
                 for index in test_batch
             ]).to(self.device),
             torch.tensor([
                 self.id_test_triples["id_relations"][index]
                 for index in test_batch
             ]).to(self.device),
             torch.tensor([
                 self.id_test_triples["id_tails"][index]
                 for index in test_batch
             ]).to(self.device))
     log_text(
         self.log_path, "raw mean rank: %f" %
         (test_result[0].item() / float(self.num_of_test_triples)))
     log_text(
         self.log_path,
         "raw hit@%d: %f%%" % (self.n_of_hit, 100. * test_result[1].item() /
                               float(2. * self.num_of_test_triples)))
     log_text(
         self.log_path, "filtered mean rank: %f" %
         (test_result[2].item() / float(self.num_of_test_triples)))
     log_text(
         self.log_path, "filtered hit@%d: %f%%" %
         (self.n_of_hit, 100. * test_result[3].item() /
          float(2. * self.num_of_test_triples)))
    def result_validation(self):
        names = ["train", "valid", "test"]
        log_text(self.log_path, "......Result of Reading Data......")
        for name in names:
            log_text(
                self.log_path,
                load_data(self.output_path + "string_%s_triples.pickle" % name,
                          self.log_path, ""))
            log_text(
                self.log_path,
                load_data(self.output_path + "id_%s_triples.pickle" % name,
                          self.log_path, ""))
        log_text(
            self.log_path,
            load_data(self.output_path + "entity2id.pickle", self.log_path,
                      ""))
        log_text(
            self.log_path,
            load_data(self.output_path + "relation2id.pickle", self.log_path,
                      ""))

        log_text(self.log_path,
                 "......Result of Head Relation to Tail and Reserve......")
        for name in names:
            log_text(
                self.log_path,
                load_data(
                    self.output_path +
                    "%s_head_relation_to_tail.pickle" % name, self.log_path,
                    ""))
            log_text(
                self.log_path,
                load_data(
                    self.output_path +
                    "%s_tail_relation_to_head.pickle" % name, self.log_path,
                    ""))

        log_text(self.log_path,
                 "......Result of Entity Context Extraction......")
        for name in names:
            log_text(
                self.log_path,
                load_data(
                    self.output_path + "%s_head_context_head.pickle" % name,
                    self.log_path, ""))
            log_text(
                self.log_path,
                load_data(
                    self.output_path +
                    "%s_head_context_relation.pickle" % name, self.log_path,
                    ""))
            log_text(
                self.log_path,
                load_data(
                    self.output_path +
                    "%s_head_context_statistics.pickle" % name, self.log_path,
                    ""))
            log_text(
                self.log_path,
                load_data(
                    self.output_path +
                    "%s_tail_context_relation.pickle" % name, self.log_path,
                    ""))
            log_text(
                self.log_path,
                load_data(
                    self.output_path + "%s_tail_context_tail.pickle" % name,
                    self.log_path, ""))
            log_text(
                self.log_path,
                load_data(
                    self.output_path +
                    "%s_tail_context_statistics.pickle" % name, self.log_path,
                    ""))

        log_text(self.log_path, "......Other Results......")
        log_text(
            self.log_path,
            load_data(self.output_path + "statistics.pickle", self.log_path,
                      ""))
        log_text(
            self.log_path,
            load_data(self.output_path + "train_triple_tensor.pickle",
                      self.log_path, ""))
    def read_data(self):
        self.id_train_triples = load_data(
            self.output_path + "id_train_triples.pickle", self.log_path,
            "self.id_train_triples")
        self.id_validate_triples = load_data(
            self.output_path + "id_valid_triples.pickle", self.log_path,
            "self.id_validate_triples")
        self.id_test_triples = load_data(
            self.output_path + "id_test_triples.pickle", self.log_path,
            "self.id_test_triples")
        self.statistics = load_data(self.output_path + "statistics.pickle",
                                    self.log_path, "self.statistics")
        self.num_of_entities, self.num_of_relations, self.num_of_validate_triples, self.num_of_test_triples = \
        self.statistics["num_of_entities"], self.statistics["num_of_relations"], self.statistics[
            "num_of_validate_triples"], self.statistics["num_of_test_triples"]
        self.num_of_train_entities, self.num_of_validate_entities, self.num_of_test_entities = \
        self.statistics["num_of_train_entities"], self.statistics["num_of_validate_entities"], self.statistics["num_of_test_entities"]
        self.head_context_head = load_data(
            self.output_path + "train_head_context_head.pickle", self.log_path,
            "self.head_context_head")
        self.head_context_relation = load_data(
            self.output_path + "train_head_context_relation.pickle",
            self.log_path, "self.head_context_relation")
        self.head_context_statistics = load_data(
            self.output_path + "train_head_context_statistics.pickle",
            self.log_path, "self.head_context_statistics")
        self.tail_context_relation = load_data(
            self.output_path + "train_tail_context_relation.pickle",
            self.log_path, "self.tail_context_relation")
        self.tail_context_tail = load_data(
            self.output_path + "train_tail_context_tail.pickle", self.log_path,
            "self.tail_context_tail")
        self.tail_context_statistics = load_data(
            self.output_path + "train_tail_context_statistics.pickle",
            self.log_path, "self.tail_context_statistics")

        self.train_entities = load_data(
            self.output_path + "train_entities.pickle", self.log_path,
            "self.train_entities")
        self.train_head_entities = load_data(
            self.output_path + "train_head_entities.pickle", self.log_path,
            "self.train_head_entities")
        self.train_tail_entities = load_data(
            self.output_path + "train_tail_entities.pickle", self.log_path,
            "self.train_tail_entities")
        self.train_both_entities = load_data(
            self.output_path + "train_both_entities.pickle", self.log_path,
            "self.train_both_entities")