def train_and_eval(self): print("Training the LSTM TuckER on {}...".format(args.dataset)) self.entity_idxs = {d.entities[i]: i for i in range(len(d.entities))} self.relation_idxs = { d.relations[i]: i for i in range(len(d.relations)) } train_data_idxs = self.get_data_idxs(d.train_data) #data_idxs = self.get_data_idxs(d.data) print("Number of training data points: %d" % len(train_data_idxs)) #print("Number of all data points: %d" % len(data_idxs)) ######## #print("entities="+str(d.entities)) entities_ids, self.Evocab = self.strings_to_ids(vocab=self.Evocab, data=d.entities) #print("entities_ids = "+str(entities_ids)) relation_ids, self.Rvocab = self.strings_to_ids(vocab=self.Rvocab, data=d.relations) print("entities_ids len=%d" % len(entities_ids)) print("relation_ids len=%d" % len(relation_ids)) print("read vocab ready.") d.Etextdata = d.get_index( entities_ids, self.maxlength) # list, contained padding entities #self.Elen = np.array(Elen) self.Etextdata = np.array(d.Etextdata) d.Rtextdata = d.get_index(relation_ids, self.maxlength) #self.Rlen = np.array(Rlen) self.Rtextdata = np.array(d.Rtextdata) # self.textdata = np.array(d.Etextdata + d.Rtextdata) # self.check_textdata() print("text data ready") cfg = config(dict(read_json(args.config))) es_idx = torch.LongTensor(self.Etextdata) if self.cuda: es_idx = es_idx.cuda() #print(cfg) model = LSTMTuckER(d=d, es_idx=es_idx, ent_vec_dim=self.ent_vec_dim, rel_vec_dim=self.rel_vec_dim, \ cfg=cfg, Evocab=len(self.Evocab), Rvocab=len(self.Rvocab), n_ctx = self.maxlength, **self.kwargs)# n_ctx = 52为COMET中计算出的 print("model ready") ######## if self.cuda: model.cuda() #model.init() opt = torch.optim.Adam(model.parameters(), lr=self.learning_rate) if self.decay_rate: scheduler = ExponentialLR(opt, self.decay_rate) er_vocab = self.get_er_vocab(train_data_idxs) #dict (e1,r)->e2 er_vocab_pairs = list(er_vocab.keys()) #list [...,(e1,r),...] print("Starting training...") for it in range(1, self.num_iterations + 1): start_train = time.time() model.train() losses = [] np.random.shuffle(er_vocab_pairs) for j in range(0, len(er_vocab_pairs), self.batch_size): data_batch, targets = self.get_batch(er_vocab, er_vocab_pairs, j) #target: tensor [batch, len(d.entities), 0./1.] opt.zero_grad() #print(data_batch[:,0].reshape(-1, 1)) #print(textdata) #print(textdata[data_batch[:,0].reshape(-1, 1)]) e1 = data_batch[:, 0] r = data_batch[:, 1] e1_idx = torch.LongTensor(self.Etextdata[e1]) r_idx = torch.LongTensor(self.Rtextdata[r]) if self.cuda: e1_idx = e1_idx.cuda() r_idx = r_idx.cuda() if e1_idx.size(0) == 1: print(j) continue predictions = model.forward(e1_idx, r_idx) #print('predictions size:'+str(predictions.size())) #print('targets size:' + str(targets.size())) if self.label_smoothing: targets = ((1.0 - self.label_smoothing) * targets) + (1.0 / targets.size(1)) loss = model.loss(predictions, targets) loss.backward() opt.step() losses.append(loss.item()) if self.decay_rate: scheduler.step() print(it) print(time.time() - start_train) print('loss=' + str(np.mean(losses))) model.eval() with torch.no_grad(): if not it % 2: if it % 10 == 0: print("Train:") start_test = time.time() self.evaluate(model, d.train_data) print(time.time() - start_test) # print("Valid:") # start_test = time.time() # self.evaluate(model, d.valid_data) # print(time.time() - start_test) print("Test:") start_test = time.time() self.evaluate(model, d.test_data) print(time.time() - start_test)
def train_and_eval(self): print("Training the TuckER model...") self.entity_idxs = {d.entities[i]: i for i in range(len(d.entities))} self.relation_idxs = {d.relations[i]: i for i in range(len(d.relations))} train_data_idxs = self.get_data_idxs(d.train_data) data_idxs = self.get_data_idxs(d.data) print("Number of training data points: %d" % len(train_data_idxs)) ######## data_ids, self.vocab = self.strings_to_ids(data=d.data, vocab=self.vocab) print("read vocab ready.") d.textdata = d.get_index(data_ids, self.maxlength) self.textdata = np.array(d.textdata) print("text data ready") cfg = config(dict(read_json(args.config))) # print(cfg) model = TransformerTucker(d, self.ent_vec_dim, self.rel_vec_dim, cfg=cfg, vocab=40508, n_ctx=self.maxlength, **self.kwargs) # n_ctx = 52为COMET中计算出的 print("model ready") load_openai_pretrained_model( model.transformer, n_ctx=self.maxlength) print("loading model ready") ######## if self.cuda: model.cuda() # model.init() opt = torch.optim.Adam(model.parameters(), lr=self.learning_rate) if self.decay_rate: scheduler = ExponentialLR(opt, self.decay_rate) er_vocab = self.get_er_vocab(train_data_idxs) er_vocab_pairs = list(er_vocab.keys()) print("Starting training...") for it in range(1, self.num_iterations + 1): start_train = time.time() model.train() losses = [] np.random.shuffle(er_vocab_pairs) for j in range(0, len(er_vocab_pairs), self.batch_size): data_batch, targets = self.get_batch(er_vocab, er_vocab_pairs, j) opt.zero_grad() # print(data_batch[:,0].reshape(-1, 1)) # print(textdata) # print(textdata[data_batch[:,0].reshape(-1, 1)]) e1_idx = torch.LongTensor(self.textdata[data_batch[:, 0]][:, :, np.newaxis]) r_idx = torch.LongTensor(self.textdata[data_batch[:, 1]][:, :, np.newaxis]) e1_idx = prepare_position_embeddings(encoder_vocab=self.vocab, sequences=e1_idx) r_idx = prepare_position_embeddings(encoder_vocab=self.vocab, sequences=r_idx) if self.cuda: e1_idx = e1_idx.cuda() r_idx = r_idx.cuda() predictions = model.forward(e1_idx, r_idx) if self.label_smoothing: targets = ((1.0 - self.label_smoothing) * targets) + (1.0 / targets.size(1)) loss = model.loss(predictions, targets) loss.backward() opt.step() losses.append(loss.item()) if self.decay_rate: scheduler.step() print(it) print(time.time() - start_train) print(np.mean(losses)) model.eval() with torch.no_grad(): if it % 5: print("Validation:") self.evaluate(model, d.valid_data) if not it % 2: print("Test:") start_test = time.time() self.evaluate(model, d.test_data) print(time.time() - start_test)
def train_and_eval(self): print("Training the {} model on {}...".format(args.model, args.dataset)) self.entity_idxs = {d.entities[i]: i for i in range(len(d.entities))} self.relation_idxs = { d.relations[i]: i for i in range(len(d.relations)) } train_data_idxs = self.get_data_idxs(d.train_data) # data_idxs = self.get_data_idxs(d.data) print("Number of training data points: %d" % len(train_data_idxs)) # print("Number of all data points: %d" % len(data_idxs)) ######## # data_ids, self.vocab = self.strings_to_ids(vocab=self.vocab, data=d.data) #print('d.entities='+str(len(d.entities))) entities_ids, self.Evocab = self.strings_to_ids(vocab=[ 'NULL', ], data=d.entities) #print("entities_ids = " + str(entities_ids)) relation_ids, self.Rvocab = self.strings_to_ids(vocab=[ 'NULL', ], data=d.relations) print("entities_ids len=%d" % len(entities_ids)) print("relation_ids len=%d" % len(relation_ids)) #print('XXX = ' + str([len(i) for i in entities_ids].index(0))) #print('YYY = ' + str([len(i) for i in entities_ids].index(0))) cfg = config(dict(read_json(args.config))) if args.do_pretrain == 1: cfg.hSize = 768 Eembs = self.get_vocab_emb(self.Evocab, cfg.hSize) print("read vocab ready.") d.Etextdata = d.get_index( entities_ids, self.maxlength) # list, contained padding entities self.Etextdata = np.array(d.Etextdata) d.Rtextdata = d.get_index(relation_ids, 1) self.Rtextdata = np.array(d.Rtextdata) # self.textdata = np.array(d.Etextdata + d.Rtextdata) #self.check_textdata() print("text data ready") es_idx = torch.LongTensor(self.Etextdata) if self.cuda: es_idx = es_idx.cuda() print("es ready") if args.model == 'Mean': model = MeanTuckER(d=d, es_idx=es_idx, ent_vec_dim=self.ent_vec_dim, rel_vec_dim=self.rel_vec_dim, cfg=cfg, Evocab=len(self.Evocab), Rvocab=len(self.Rvocab)) elif args.model == 'CNN': model = CNNTuckER(d=d, es_idx=es_idx, ent_vec_dim=self.ent_vec_dim, rel_vec_dim=self.rel_vec_dim, cfg=cfg, max_length=self.maxlength, Evocab=len(self.Evocab), Rvocab=len(self.Rvocab)) elif args.model == 'LSTM': model = LSTMTuckER(d=d, es_idx=es_idx, ent_vec_dim=self.ent_vec_dim, rel_vec_dim=self.rel_vec_dim, cfg=cfg, max_length=self.maxlength, Evocab=len(self.Evocab), Rvocab=len(self.Rvocab)) else: print("No Model") exit(0) print("model ready") if args.do_pretrain == 1: model.Eembed.weight.data.copy_(torch.from_numpy(np.array(Eembs))) print("Embedding Loaded") ######## if self.cuda: model.cuda() #model.init() opt = torch.optim.Adam(model.parameters(), lr=self.learning_rate) if self.decay_rate: scheduler = ExponentialLR(opt, self.decay_rate) er_vocab = self.get_er_vocab(train_data_idxs) # dict (e1,r)->e2 er_vocab_pairs = list(er_vocab.keys()) # list [...,(e1,r),...] print("Starting training...") for it in range(1, self.num_iterations + 1): start_train = time.time() model.train() losses = [] np.random.shuffle(train_data_idxs) for j in range(0, len(train_data_idxs), self.batch_size): data_batch, e2n_idx = self.get_batch_train( er_vocab, train_data_idxs, j) # target: tensor [batch, len(d.entities), 0./1.] opt.zero_grad() e1_idx = torch.LongTensor(self.Etextdata[data_batch[:, 0]]) r_idx = torch.LongTensor(self.Rtextdata[data_batch[:, 1]]) e2p_idx = torch.LongTensor(self.Etextdata[data_batch[:, 2]]) e2n_idx = torch.LongTensor(self.Etextdata[e2n_idx]) targets = torch.cat((torch.ones( e2p_idx.size(0)), torch.zeros(e2n_idx.size(0))), 0) #e2_idx = torch.LongTensor(data_batch[:, 2]) # e2 are not used for model forward if self.cuda: e1_idx = e1_idx.cuda() r_idx = r_idx.cuda() e2p_idx = e2p_idx.cuda() e2n_idx = e2n_idx.cuda() targets = targets.cuda() if e1_idx.size(0) == 1: print(j) continue pred_p, pred_n = model.forward(e1_idx, r_idx, e2p_idx, e2n_idx) #print("predictions="+str(predictions)) predication = torch.cat((pred_p, pred_n), 0) if self.label_smoothing: targets = ((1.0 - self.label_smoothing) * targets) + (1.0 / len(d.entities)) loss = model.loss(predication, targets) loss.backward() opt.step() losses.append(loss.item()) if self.decay_rate: scheduler.step() print(it) print(time.time() - start_train) print("loss=" + str(np.mean(losses))) model.eval() with torch.no_grad(): # print("Validation:") # self.evaluate(model, d.valid_data) # if not it % 2: # if it % 10 == 0: # print("Train:") # start_test = time.time() # self.evaluate(model, d.train_data) # print(time.time() - start_test) print("Test:") start_test = time.time() self.evaluate(model, d.test_data) print(time.time() - start_test)