def re_read_context_and_negatives(self): log_text(self.log_path, "...... Reading Data for Offline Batch Generation ......") for index in range(len(self.names)): name = self.names[index] self.context_heads[index].clear() self.context_head_relations[index].clear() self.context_tail_relations[index].clear() self.context_tails[index].clear() self.read_dict( self.context_heads[index], load_data(self.output_path + "%s_context_head.pickle" % name, self.log_path, "self.%s_context_head" % name)) self.read_dict( self.context_head_relations[index], load_data( self.output_path + "%s_context_head_relation.pickle" % name, self.log_path, "self.%s_context_head_relation" % name)) self.read_dict( self.context_tail_relations[index], load_data( self.output_path + "%s_context_tail_relation.pickle" % name, self.log_path, "self.%s_context_tail_relation" % name)) self.read_dict( self.context_tails[index], load_data(self.output_path + "%s_context_tail.pickle" % name, self.log_path, "self.%s_context_tail" % name)) self.negatives[index].clear() self.read_dict( self.negatives[index], load_data(self.output_path + "%s_negatives.pickle" % name, self.log_path, "self.%s_negatives" % name))
def re_sampling(self): for index in range(3): self.entity_heads[index].clear() self.entity_head_relations[index].clear() self.entity_tail_relations[index].clear() self.entity_tails[index].clear() self.negatives[index].clear() log_text(self.log_path, "...... Context Sampling ......") self.context_sampling() log_text(self.log_path, "...... Negative Sampling ......") self.negative_sampling()
def read_dataset(self): names = ["train", "valid", "test"] string_triples = [ self.string_train_triples, self.string_validate_triples, self.string_test_triples ] id_triples = [ self.id_train_triples, self.id_validate_triples, self.id_test_triples ] num_of_triples = [0, 0, 0] for index in range(3): name = names[index] string_triple = string_triples[index] id_triple = id_triples[index] log_text(self.log_path, "reading file %s" % self.input_path + name + ".txt") with open(self.input_path + name + ".txt") as data_reader: tmp_line = data_reader.readline() while tmp_line and tmp_line not in ["\n", "\r\n", "\r"]: tmp_head = tmp_line.split()[0] tmp_relation = tmp_line.split()[1] tmp_tail = tmp_line.split()[2] string_triple["heads"].append(tmp_head) string_triple["relations"].append(tmp_relation) string_triple["tails"].append(tmp_tail) id_triple["id_heads"].append( self.entity_id_generation(tmp_head)) id_triple["id_relations"].append( self.relation_id_generation(tmp_relation)) id_triple["id_tails"].append( self.entity_id_generation(tmp_tail)) num_of_triples[index] += 1 tmp_line = data_reader.readline() dump_data(string_triple, self.output_path + "string_%s_triples.pickle" % name, self.log_path, "string_%s_triples" % name) dump_data(id_triple, self.output_path + "id_%s_triples.pickle" % name, self.log_path, "id_%s_triples" % name) dump_data(self.entity2id, self.output_path + "entity2id.pickle", self.log_path, "self.entity2id") dump_data(self.relation2id, self.output_path + "relation2id.pickle", self.log_path, "self.relation2id") self.num_of_train_triples = num_of_triples[0] self.num_of_validate_triples = num_of_triples[1] self.num_of_test_triples = num_of_triples[2]
def run_funcs(self): log_text( self.log_path, "...... Reading Data for Context and Negatives Sampling ......") self.read_data() log_text(self.log_path, "...... Entity Classification ......") self.entity_classification() log_text(self.log_path, "...... Context Sampling ......") self.context_sampling() log_text(self.log_path, "...... Negative Sampling ......") self.negative_sampling() if self.print_results_for_validation: log_text(self.log_path, "...... Result Validation ......") self.result_validation()
def test(self, model): train_triple_tensor = load_data( self.output_path + "train_triple_tensor.pickle", self.log_path, "train_triple_tensor").to(self.device) test_dataset = MyDataset(self.num_of_test_triples) test_dataloader = DataLoader(test_dataset, self.test_batch_size, False) test_result = torch.zeros(4).to( self.device ) # [mean_rank, hit_n, filtered_mean_rank, filtered_hit_n] log_text(self.log_path, "number of test triples: %d" % self.num_of_test_triples) count = 0 for test_batch in test_dataloader: if count % 1000 == 0: print "%d test triples processed" % count count += self.test_batch_size model.test_calc( self.n_of_hit, test_result, train_triple_tensor, torch.tensor([ self.id_test_triples["id_heads"][index] for index in test_batch ]).to(self.device), torch.tensor([ self.id_test_triples["id_relations"][index] for index in test_batch ]).to(self.device), torch.tensor([ self.id_test_triples["id_tails"][index] for index in test_batch ]).to(self.device)) log_text( self.log_path, "raw mean rank: %f" % (test_result[0].item() / float(self.num_of_test_triples))) log_text( self.log_path, "raw hit@%d: %f%%" % (self.n_of_hit, 100. * test_result[1].item() / float(2. * self.num_of_test_triples))) log_text( self.log_path, "filtered mean rank: %f" % (test_result[2].item() / float(self.num_of_test_triples))) log_text( self.log_path, "filtered hit@%d: %f%%" % (self.n_of_hit, 100. * test_result[3].item() / float(2. * self.num_of_test_triples)))
def statistics(self): log_text(self.log_path, "number of train triples: %d" % self.num_of_train_triples) log_text( self.log_path, "number of validate triples: %d" % self.num_of_validate_triples) log_text(self.log_path, "number of test triples: %d" % self.num_of_test_triples) log_text(self.log_path, "number of entities: %d" % self.num_of_entities) log_text(self.log_path, "number of relations: %d" % self.num_of_relations) statistics = { "num_of_train_triples": self.num_of_train_triples, "num_of_validate_triples": self.num_of_validate_triples, "num_of_test_triples": self.num_of_test_triples, "num_of_entities": self.num_of_entities, "num_of_relations": self.num_of_relations, "num_of_train_entities": None, "num_of_validate_entities": None, "num_of_test_entities": None } dump_data(statistics, self.output_path + "statistics.pickle", self.log_path, "statistics")
def train(self): model = Model(self.result_path, self.log_path, self.entity_dimension, self.relation_dimension, self.num_of_entities, self.num_of_relations, self.norm, self.device) if self.continue_learning: model.input() model.to(self.device) optimizer = torch.optim.Adam(model.parameters(), self.learning_rate) PrintGPUStatus.print_gpu_status("after the initialization of model") self.offline_batch_retrieve = OfflineBatchRetrieve( self.names, self.dataset) current_validate_loss = self.validate(model) log_text(self.log_path, "initial loss (validation): %f" % current_validate_loss) optimal_validate_loss = current_validate_loss self.optimal_entity_embeddings = model.entity_embeddings.weight.data.clone( ) self.optimal_relation_embeddings = model.relation_embeddings.weight.data.clone( ) entity_set = MyDataset(self.num_of_train_entities) entity_loader = DataLoader(entity_set, self.batch_size, True) patience_count = 0 for epoch in range(self.num_of_epochs): epoch_loss = 0. if epoch != 0 and epoch % self.re_sampling_freq == 0: self.context_and_negatives.re_sampling() self.offline_batch_retrieve.re_read_context_and_negatives() for entity_id_batch in entity_loader: model.normalize() optimizer.zero_grad() entity_batch = [ self.train_entities[entity_id.item()] for entity_id in entity_id_batch ] head_batch, tail_batch, both_batch = self.offline_batch_retrieve.batch_classification( "train", entity_batch) batch_loss = self.loss_compute("train", model, head_batch, tail_batch, both_batch) batch_loss.backward() optimizer.step() epoch_loss += batch_loss log_text( self.log_path, "\r\nepoch " + str(epoch) + ": , loss: " + str(epoch_loss)) if epoch % self.validation_freq == 0: current_validate_loss = self.validate(model) if current_validate_loss < optimal_validate_loss: log_text( self.log_path, "optimal validate loss: " + str(optimal_validate_loss) + " -> " + str(current_validate_loss)) patience_count = 0 optimal_validate_loss = current_validate_loss self.optimal_entity_embeddings = model.entity_embeddings.weight.data.clone( ) self.optimal_relation_embeddings = model.relation_embeddings.weight.data.clone( ) else: patience_count += 1 log_text( self.log_path, "early stop patience: " + str(self.early_stop_patience) + ", patience count: " + str(patience_count) + ", current validate loss: " + str(current_validate_loss) + ", optimal validate loss: " + str(optimal_validate_loss)) if patience_count == self.patience: if self.early_stop_patience == 1: dump_data( self.optimal_entity_embeddings.to("cpu"), self.result_path + "optimal_entity_embedding.pickle", self.log_path, "self.optimal_entity_embeddings") dump_data( self.optimal_relation_embeddings.to("cpu"), self.result_path + "optimal_relation_embedding.pickle", self.log_path, "self.optimal_relation_embeddings") break log_text( self.log_path, "learning rate: " + str(self.learning_rate) + " -> " + str(self.learning_rate / 2)) self.learning_rate = self.learning_rate / 2 model.entity_embeddings.weight.data = self.optimal_entity_embeddings.clone( ) model.relation_embeddings.weight.data = self.optimal_relation_embeddings.clone( ) optimizer = torch.optim.Adam(model.parameters(), lr=self.learning_rate) patience_count = 0 self.early_stop_patience -= 1 if (epoch + 1) % self.output_freq == 0: model.output() dump_data(self.optimal_entity_embeddings.to("cpu"), self.result_path + "optimal_entity_embedding.pickle", self.log_path, "self.optimal_entity_embeddings") dump_data( self.optimal_relation_embeddings.to("cpu"), self.result_path + "optimal_relation_embedding.pickle", self.log_path, "self.optimal_relation_embeddings") print "test loss: %f" % self.test(model)
def result_validation(self): names = ["train", "valid", "test"] log_text(self.log_path, "......Result of Reading Data......") for name in names: log_text( self.log_path, load_data(self.output_path + "string_%s_triples.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data(self.output_path + "id_%s_triples.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data(self.output_path + "entity2id.pickle", self.log_path, "")) log_text( self.log_path, load_data(self.output_path + "relation2id.pickle", self.log_path, "")) log_text(self.log_path, "......Result of Head Relation to Tail and Reserve......") for name in names: log_text( self.log_path, load_data( self.output_path + "%s_head_relation_to_tail.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data( self.output_path + "%s_tail_relation_to_head.pickle" % name, self.log_path, "")) log_text(self.log_path, "......Result of Entity Context Extraction......") for name in names: log_text( self.log_path, load_data( self.output_path + "%s_head_context_head.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data( self.output_path + "%s_head_context_relation.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data( self.output_path + "%s_head_context_statistics.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data( self.output_path + "%s_tail_context_relation.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data( self.output_path + "%s_tail_context_tail.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data( self.output_path + "%s_tail_context_statistics.pickle" % name, self.log_path, "")) log_text(self.log_path, "......Other Results......") log_text( self.log_path, load_data(self.output_path + "statistics.pickle", self.log_path, "")) log_text( self.log_path, load_data(self.output_path + "train_triple_tensor.pickle", self.log_path, ""))
def run_functions(self): log_text(self.log_path, "\r\n---------------------Start-------------------------") log_text(self.log_path, "...... Reading Data ......") self.read_dataset() log_text(self.log_path, "...... Head Relation to Tail and the Reverse ......") self.head_relation_to_tail_and_reverse() log_text(self.log_path, "...... Entity Context Extraction ......") self.context_process() log_text(self.log_path, "...... Other Operations ......") self.train_triple_tensor_generation() self.statistics() if self.print_results_for_validation: log_text(self.log_path, "...... Result Validation ......") self.result_validation() log_text(self.log_path, "---------------------End-------------------------")
def dump_data(obj, path, log_path, obj_name): log_text(log_path, "dumping %s to %s" % (obj_name, path)) with open(path, "w") as writer: pickle.dump(obj, writer)
def train(self): model = Model(self.result_path, self.log_path, self.entity_dimension, self.relation_dimension, self.num_of_entities, self.num_of_relations, self.norm, self.device) if self.continue_learning: model.input() model.to(self.device) optimizer = torch.optim.Adam(model.parameters(), self.learning_rate) PrintGPUStatus.print_gpu_status("after the initialization of model") self.offline_batch_retrieve = OfflineBatchRetrieve(self.names, self.dataset) current_mean_rank = self.validate(model) log_text(self.log_path, "initial mean rank (validation): %f" % current_mean_rank) optimal_mean_rank = current_mean_rank self.optimal_entity_embeddings = model.entity_embeddings.weight.data.clone() self.optimal_relation_embeddings = model.relation_embeddings.weight.data.clone() entity_set = MyDataset(self.num_of_train_entities) entity_loader = DataLoader(entity_set, self.batch_size, True) patience_count = 0 for epoch in range(self.num_of_epochs): epoch_loss = 0. if epoch != 0 and epoch % self.re_sampling_freq == 0: self.context_and_negatives.re_sampling() self.offline_batch_retrieve.re_read_context_and_negatives() for entity_id_batch in entity_loader: model.normalize() optimizer.zero_grad() entity_batch = [self.train_entities[entity_id.item()] for entity_id in entity_id_batch] head_loss, tail_loss, both_loss, batch_loss = 0., 0., 0., 0. head_batch, tail_batch, both_batch = self.offline_batch_retrieve.batch_classification("train", entity_batch) if len(head_batch) > 0: head_head, head_relation = self.offline_batch_retrieve.head_context_retrieve("train", head_batch) negative_head_batch = self.offline_batch_retrieve.negative_retrieves("train", head_batch) head_batch = torch.LongTensor(head_batch) head_loss = -1. * model(head_batch.to(self.device), head_head.to(self.device), head_relation.to(self.device), None, None, negative_head_batch.to(self.device)) if len(tail_batch) > 0: tail_relation, tail_tail = self.offline_batch_retrieve.tail_context_retrieve("train", tail_batch) negative_tail_batch = self.offline_batch_retrieve.negative_retrieves("train", tail_batch) tail_batch = torch.LongTensor(tail_batch) tail_loss = -1. * model(tail_batch.to(self.device), None, None, tail_relation.to(self.device), tail_tail.to(self.device), negative_tail_batch.to(self.device)) if len(both_batch) > 0: both_head, both_head_relation = self.offline_batch_retrieve.head_context_retrieve("train", both_batch) both_tail_relation, both_tail = self.offline_batch_retrieve.tail_context_retrieve("train", both_batch) negative_both_batch = self.offline_batch_retrieve.negative_retrieves("train", both_batch) both_batch = torch.LongTensor(both_batch) both_loss = -1. * model(both_batch.to(self.device), both_head.to(self.device), both_head_relation.to(self.device), both_tail_relation.to(self.device), both_tail.to(self.device), negative_both_batch.to(self.device)) batch_loss += head_loss + tail_loss + both_loss batch_loss.backward() optimizer.step() epoch_loss += batch_loss log_text(self.log_path, "\r\nepoch " + str(epoch) + ": , loss: " + str(epoch_loss)) if epoch % self.validation_freq == 0: current_mean_rank = self.validate(model) if current_mean_rank < optimal_mean_rank: log_text(self.log_path, "optimal average raw mean rank: " + str(optimal_mean_rank) + " -> " + str(current_mean_rank)) patience_count = 0 optimal_mean_rank = current_mean_rank self.optimal_entity_embeddings = model.entity_embeddings.weight.data.clone() self.optimal_relation_embeddings = model.relation_embeddings.weight.data.clone() else: patience_count += 1 log_text(self.log_path, "early stop patience: " + str(self.early_stop_patience) + ", patience count: " + str(patience_count) + ", current rank: " + str(current_mean_rank) + ", best rank: " + str(optimal_mean_rank)) if patience_count == self.patience: if self.early_stop_patience == 1: dump_data(self.optimal_entity_embeddings.to("cpu"), self.result_path + "optimal_entity_embedding.pickle", self.log_path, "self.optimal_entity_embeddings") dump_data(self.optimal_relation_embeddings.to("cpu"), self.result_path + "optimal_relation_embedding.pickle", self.log_path, "self.optimal_relation_embeddings") break log_text(self.log_path, "learning rate: " + str(self.learning_rate) + " -> " + str(self.learning_rate / 2)) self.learning_rate = self.learning_rate / 2 model.entity_embeddings.weight.data = self.optimal_entity_embeddings.clone() model.relation_embeddings.weight.data = self.optimal_relation_embeddings.clone() optimizer = torch.optim.Adam(model.parameters(), lr=self.learning_rate) patience_count = 0 self.early_stop_patience -= 1 if (epoch + 1) % self.output_freq == 0: model.output() dump_data(self.optimal_entity_embeddings.to("cpu"), self.result_path + "optimal_entity_embedding.pickle", self.log_path, "self.optimal_entity_embeddings") dump_data(self.optimal_relation_embeddings.to("cpu"), self.result_path + "optimal_relation_embedding.pickle", self.log_path, "self.optimal_relation_embeddings") self.test(model)
def run_functions(self): log_text(self.log_path, "\r\n---------------------Start-------------------------") log_text(self.log_path, "dataset: %s" % self.dataset) log_text(self.log_path, "head_context_size: %d" % self.head_context_size) log_text(self.log_path, "tail_context_size: %d" % self.tail_context_size) log_text(self.log_path, "negative_batch_size: %d" % self.negative_batch_size) log_text(self.log_path, "number of epochs: %d" % self.num_of_epochs) log_text(self.log_path, "batch size: %d" % self.batch_size) log_text(self.log_path, "norm: %d" % self.norm) log_text(self.log_path, "learning rate: %f" % self.learning_rate) log_text(self.log_path, "device: %s" % self.device) log_text(self.log_path, "continue learning: %s" % self.continue_learning) log_text(self.log_path, "entity dimension: %d" % self.entity_dimension) log_text(self.log_path, "relation dimension: %d" % self.relation_dimension) log_text(self.log_path, "patience: %d" % self.patience) log_text(self.log_path, "early stop patience: %d" % self.early_stop_patience) log_text(self.log_path, "output frequency: %d" % self.output_freq) log_text(self.log_path, "validation batch size: %d" % self.validation_batch_size) log_text(self.log_path, "test batch size: %d" % self.test_batch_size) log_text(self.log_path, "hit@: %d" % self.n_of_hit) self.read_data() self.train() log_text(self.log_path, "---------------------End-------------------------")
def train(self): entity_set = MyDataset(self.num_of_train_entities) entity_loader = DataLoader(entity_set, self.batch_size, True) batch_process = BatchProcess( self.train_entities, self.train_head_entities, self.train_tail_entities, self.train_both_entities, self.head_context_head, self.head_context_relation, self.head_context_statistics, self.tail_context_relation, self.tail_context_tail, self.tail_context_statistics, self.head_context_size, self.tail_context_size, self.num_of_train_entities, self.negative_batch_size, self.device) model = Model(self.result_path, self.log_path, self.entity_dimension, self.relation_dimension, self.num_of_entities, self.num_of_relations, self.norm, self.device) if self.continue_learning: model.input() model.to(self.device) optimizer = torch.optim.Adam(model.parameters(), self.learning_rate) current_mean_rank = self.validate(model) log_text(self.log_path, "initial mean rank (validation): %f" % current_mean_rank) optimal_mean_rank = current_mean_rank self.optimal_entity_embeddings = model.entity_embeddings.weight.data.clone( ) self.optimal_relation_embeddings = model.relation_embeddings.weight.data.clone( ) patience_count = 0 for epoch in range(self.num_of_epochs): epoch_loss = 0. count = 0 for entity_id_batch in entity_loader: if count % 200 == 0: print "%d batches processed " % count + time.strftime( '%m-%d-%Y %H:%M:%S', time.localtime(time.time())) count += 1 model.normalize() optimizer.zero_grad() entity_id_batch = entity_id_batch.tolist() entity_batch = [ self.train_entities[entity_id] for entity_id in entity_id_batch ] head_loss, tail_loss, both_loss, batch_loss = 0., 0., 0., 0. head_batch, tail_batch, both_batch = batch_process.batch_classification( entity_batch) if len(head_batch) > 0: head_head, head_relation = batch_process.head_context_process( head_batch) negative_head_batch = batch_process.negative_batch_generation( head_batch) head_batch = torch.LongTensor(head_batch) head_loss = -1. * model( head_batch.to(self.device), head_head.to(self.device), head_relation.to(self.device), None, None, negative_head_batch.to(self.device)) if len(tail_batch) > 0: tail_relation, tail_tail = batch_process.tail_context_process( tail_batch) negative_tail_batch = batch_process.negative_batch_generation( tail_batch) tail_batch = torch.LongTensor(tail_batch) tail_loss = -1. * model( tail_batch.to(self.device), None, None, tail_relation.to(self.device), tail_tail.to( self.device), negative_tail_batch.to(self.device)) if len(both_batch) > 0: both_head, both_head_relation = batch_process.head_context_process( both_batch) both_tail_relation, both_tail = batch_process.tail_context_process( both_batch) negative_both_batch = batch_process.negative_batch_generation( both_batch) both_batch = torch.LongTensor(both_batch) both_loss = -1. * model( both_batch.to(self.device), both_head.to(self.device), both_head_relation.to(self.device), both_tail_relation.to(self.device), both_tail.to(self.device), negative_both_batch.to(self.device)) batch_loss += head_loss + tail_loss + both_loss batch_loss.backward() optimizer.step() epoch_loss += batch_loss log_text( self.log_path, "\r\nepoch " + str(epoch) + ": , loss: " + str(epoch_loss)) current_mean_rank = self.validate(model) if current_mean_rank < optimal_mean_rank: log_text( self.log_path, "optimal average raw mean rank: " + str(optimal_mean_rank) + " -> " + str(current_mean_rank)) patience_count = 0 optimal_mean_rank = current_mean_rank self.optimal_entity_embeddings = model.entity_embeddings.weight.data.clone( ) self.optimal_relation_embeddings = model.relation_embeddings.weight.data.clone( ) else: patience_count += 1 log_text( self.log_path, "early stop patience: " + str(self.early_stop_patience) + ", patience count: " + str(patience_count) + ", current rank: " + str(current_mean_rank) + ", best rank: " + str(optimal_mean_rank)) if patience_count == self.patience: if self.early_stop_patience == 1: dump_data( self.optimal_entity_embeddings.to("cpu"), self.result_path + "optimal_entity_embedding.pickle", self.log_path, "self.optimal_entity_embeddings") dump_data( self.optimal_relation_embeddings.to("cpu"), self.result_path + "optimal_relation_embedding.pickle", self.log_path, "self.optimal_relation_embeddings") break log_text( self.log_path, "learning rate: " + str(self.learning_rate) + " -> " + str(self.learning_rate / 2)) self.learning_rate = self.learning_rate / 2 model.entity_embeddings.weight.data = self.optimal_entity_embeddings.clone( ) model.relation_embeddings.weight.data = self.optimal_relation_embeddings.clone( ) optimizer = torch.optim.Adam(model.parameters(), lr=self.learning_rate) patience_count = 0 self.early_stop_patience -= 1 if epoch % self.output_freq == 0: model.output() dump_data(self.optimal_entity_embeddings.to("cpu"), self.result_path + "optimal_entity_embedding.pickle", self.log_path, "self.optimal_entity_embeddings") dump_data( self.optimal_relation_embeddings.to("cpu"), self.result_path + "optimal_relation_embedding.pickle", self.log_path, "self.optimal_relation_embeddings") self.test(model)
def train(self): model = Model(self.result_path, self.log_path, self.entity_dimension, self.relation_dimension, self.num_of_entities, self.num_of_relations, self.norm, self.device) if self.continue_learning: model.input() model.to(self.device) optimizer = torch.optim.Adam(model.parameters(), self.learning_rate) PrintGPUStatus.print_gpu_status("after the initialization of model") self.offline_batch_retrieve = OfflineBatchRetrieve( self.names, self.dataset) entity_set = MyDataset(self.num_of_train_entities) entity_loader = DataLoader(entity_set, self.batch_size, True) for epoch in range(self.num_of_epochs): epoch_loss = 0. if epoch != 0 and epoch % self.re_sampling_freq == 0: self.context_and_negatives.re_sampling() self.offline_batch_retrieve.re_read_context_and_negatives() for entity_id_batch in entity_loader: model.normalize() optimizer.zero_grad() entity_batch = [ self.train_entities[entity_id.item()] for entity_id in entity_id_batch ] head_loss, tail_loss, both_loss, batch_loss = 0., 0., 0., 0. head_batch, tail_batch, both_batch = self.offline_batch_retrieve.batch_classification( "train", entity_batch) if len(head_batch) > 0: head_head, head_relation = self.offline_batch_retrieve.head_context_retrieve( "train", head_batch) negative_head_batch = self.offline_batch_retrieve.negative_retrieves( "train", head_batch) head_batch = torch.LongTensor(head_batch) head_loss = -1. * model( head_batch.to(self.device), head_head.to(self.device), head_relation.to(self.device), None, None, negative_head_batch.to(self.device)) if len(tail_batch) > 0: tail_relation, tail_tail = self.offline_batch_retrieve.tail_context_retrieve( "train", tail_batch) negative_tail_batch = self.offline_batch_retrieve.negative_retrieves( "train", tail_batch) tail_batch = torch.LongTensor(tail_batch) tail_loss = -1. * model( tail_batch.to(self.device), None, None, tail_relation.to(self.device), tail_tail.to( self.device), negative_tail_batch.to(self.device)) if len(both_batch) > 0: both_head, both_head_relation = self.offline_batch_retrieve.head_context_retrieve( "train", both_batch) both_tail_relation, both_tail = self.offline_batch_retrieve.tail_context_retrieve( "train", both_batch) negative_both_batch = self.offline_batch_retrieve.negative_retrieves( "train", both_batch) both_batch = torch.LongTensor(both_batch) both_loss = -1. * model( both_batch.to(self.device), both_head.to(self.device), both_head_relation.to(self.device), both_tail_relation.to(self.device), both_tail.to(self.device), negative_both_batch.to(self.device)) batch_loss += head_loss + tail_loss + both_loss batch_loss.backward() optimizer.step() epoch_loss += batch_loss log_text( self.log_path, "\r\nepoch " + str(epoch) + ": , loss: " + str(epoch_loss)) if (epoch + 1) % self.output_freq == 0: model.output()
def result_validation(self): log_text(self.log_path, "...... Result of Entity Classification ......") for name in self.names: log_text( self.log_path, load_data(self.output_path + "%s_entities.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data(self.output_path + "%s_head_entities.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data(self.output_path + "%s_tail_entities.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data(self.output_path + "%s_both_entities.pickle" % name, self.log_path, "")) log_text(self.log_path, "...... Result of Context Sampling ......") for name in self.names: log_text( self.log_path, load_data(self.output_path + "%s_context_head.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data( self.output_path + "%s_context_head_relation.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data( self.output_path + "%s_context_tail_relation.pickle" % name, self.log_path, "")) log_text( self.log_path, load_data(self.output_path + "%s_context_tail.pickle" % name, self.log_path, "")) log_text(self.log_path, "...... Result of Negative Sampling ......") for name in self.names: log_text( self.log_path, load_data(self.output_path + "%s_negatives.pickle" % name, self.output_path, "")) log_text(self.log_path, "...... Other Results ......") log_text( self.log_path, load_data(self.output_path + "statistics.pickle", self.log_path, "statistics"))
def run_functions(self): log_text(self.log_path, "\r\n---------------------Start-------------------------") log_text(self.log_path, "dataset: %s" % self.dataset) log_text(self.log_path, "head_context_size: %d" % self.head_context_size) log_text(self.log_path, "tail_context_size: %d" % self.tail_context_size) log_text(self.log_path, "negative_batch_size: %d" % self.negative_batch_size) log_text(self.log_path, "number of epochs: %d" % self.num_of_epochs) log_text(self.log_path, "batch size: %d" % self.batch_size) log_text(self.log_path, "norm: %d" % self.norm) log_text(self.log_path, "learning rate: %f" % self.learning_rate) log_text(self.log_path, "device: %s" % self.device) log_text(self.log_path, "continue learning: %s" % self.continue_learning) log_text(self.log_path, "entity dimension: %d" % self.entity_dimension) log_text(self.log_path, "relation dimension: %d" % self.relation_dimension) log_text(self.log_path, "output frequency: %d" % self.output_freq) log_text(self.log_path, "...... Context and Negatives Preparation ......") self.prepare_context_and_negatives() log_text(self.log_path, "...... Reading Data for ISWC Training ......") self.read_data() log_text(self.log_path, "...... ISWC Training ......") self.train() log_text(self.log_path, "---------------------End-------------------------")
def load_data(path, log_path, obj_name): log_text(log_path, "loading data from %s to %s" % (path, obj_name)) with open(path) as reader: return pickle.load(reader)