def train(): # dataloader for training train_dataloader = TrainDataLoader(in_path='./data/kg/', nbatches=100, threads=8, sampling_mode="normal", bern_flag=1, filter_flag=1, neg_ent=25, neg_rel=0) # define the model transe = TransE(ent_tot=train_dataloader.get_ent_tot(), rel_tot=train_dataloader.get_rel_tot(), dim=Config.entity_embedding_dim, p_norm=1, norm_flag=True) # define the loss function model = NegativeSampling(model=transe, loss=MarginLoss(margin=5.0), batch_size=train_dataloader.get_batch_size()) # train the model trainer = Trainer(model=model, data_loader=train_dataloader, train_times=1000, alpha=1.0, use_gpu=True) trainer.run() transe.save_checkpoint('./data/kg/transe.ckpt')
def generate(): # dataloader for training train_dataloader = TrainDataLoader(in_path='./data/kg/', nbatches=100, threads=8, sampling_mode="normal", bern_flag=1, filter_flag=1, neg_ent=25, neg_rel=0) # define the model transe = TransE(ent_tot=train_dataloader.get_ent_tot(), rel_tot=train_dataloader.get_rel_tot(), dim=Config.entity_embedding_dim, p_norm=1, norm_flag=True) transe.load_checkpoint('./data/kg/transe.ckpt') entity_embedding = transe.get_parameters()['ent_embeddings.weight'] entity_embedding[0] = 0 np.save('./data/kg/entity.npy', entity_embedding) context_embedding = np.empty_like(entity_embedding) context_embedding[0] = 0 relation = pd.read_table('./data/sub_kg/triple2id.txt', header=None)[[0, 1]] entity = pd.read_table('./data/sub_kg/entity2name.txt', header=None)[[0]].to_numpy().flatten() for e in entity: df = pd.concat( [relation[relation[0] == e], relation[relation[1] == e]]) context = list(set(np.append(df.to_numpy().flatten(), e))) context_embedding[e] = np.mean(entity_embedding[context, :], axis=0) np.save('./data/kg/context.npy', context_embedding)
train_dataloader = TrainDataLoader(in_path="./benchmarks/LUMB/", nbatches=100, threads=8, sampling_mode="normal", bern_flag=1, filter_flag=1, neg_ent=25, neg_rel=0) # dataloader for test test_dataloader = TestDataLoader("./benchmarks/LUMB/", "link") # define the model transe = TransE(ent_tot=train_dataloader.get_ent_tot(), rel_tot=train_dataloader.get_rel_tot(), dim=200, p_norm=1, norm_flag=True) # define the loss function model = NegativeSampling(model=transe, loss=MarginLoss(margin=5.0), batch_size=train_dataloader.get_batch_size()) # train the model trainer = Trainer(model=model, data_loader=train_dataloader, train_times=100, alpha=1.0, use_gpu=False) trainer.run()
train_dataloader = TrainDataLoader(in_path="./benchmarks/WN18RR/", batch_size=2000, threads=8, sampling_mode="cross", bern_flag=0, filter_flag=1, neg_ent=64, neg_rel=0) # dataloader for test test_dataloader = TestDataLoader("./benchmarks/WN18RR/", "link") # define the model transe = TransE(ent_tot=train_dataloader.get_ent_tot(), rel_tot=train_dataloader.get_rel_tot(), dim=1024, p_norm=1, norm_flag=False, margin=6.0) # define the loss function model = NegativeSampling(model=transe, loss=SigmoidLoss(adv_temperature=1), batch_size=train_dataloader.get_batch_size(), regul_rate=0.0) # train the model trainer = Trainer(model=model, data_loader=train_dataloader, train_times=3000, alpha=2e-5, use_gpu=False,
filter_flag=1, neg_ent=1, neg_rel=0) # dataloader for test #test_dataloader = TestDataLoader("../openke_data", "link") pretrain_init = { 'entity': '../concept_glove.max.npy', 'relation': '../relation_glove.max.npy' } # define the model transe = TransE(ent_tot=train_dataloader.get_ent_tot(), rel_tot=train_dataloader.get_rel_tot(), dim=100, p_norm=1, margin=1.0, norm_flag=True, init='pretrain', init_weights=pretrain_init) # define the loss function model = NegativeSampling(model=transe, loss=SigmoidLoss(adv_temperature=1), batch_size=train_dataloader.get_batch_size()) # train the model checkpoint_dir = Path('./checkpoint/') checkpoint_dir.mkdir(exist_ok=True, parents=True) alpha = 0.001 trainer = Trainer(model=model, data_loader=train_dataloader,
TASK_REV_MEDIUMHAND, TASK_LABELS, ) import metrics from utils import Task, openke_predict, get_entity_relationship_dicts parser = argparse.ArgumentParser() parser.add_argument("--model", default='transe') args = parser.parse_args() ent_list, rel_list = get_entity_relationship_dicts() if args.model == 'transe': model = TransE(ent_tot=len(ent_list), rel_tot=len(rel_list), dim=200, p_norm=1, norm_flag=True) elif args.model == 'transd': model = TransD(ent_tot=len(ent_list), rel_tot=len(rel_list), dim_e=200, dim_r=200, p_norm=1, norm_flag=True) elif args.model == 'rescal': model = RESCAL(ent_tot=len(ent_list), rel_tot=len(rel_list), dim=50) elif args.model == 'distmult': model = DistMult(ent_tot=len(ent_list), rel_tot=len(rel_list), dim=200) elif args.model == 'complex': model = ComplEx(ent_tot=len(ent_list), rel_tot=len(rel_list), dim=200)
def __init__(self, embed_model_path, bert_path, bert_name, n_clusters, embed_method='rotatE', fine_tune=True, attention=True, use_lstm=False, use_dnn=True, attention_method='mine', num_layers=2, bidirectional=False): super(QuestionAnswerModel, self).__init__() self.embed_method = embed_method self.device = 'cuda' if torch.cuda.is_available() else 'cpu' logger.info('using device: {}'.format(self.device)) self.relation_predictor = RelationPredictor( bert_path=bert_path, bert_name=bert_name, fine_tune=fine_tune, attention=attention, use_lstm=use_lstm, use_dnn=use_dnn, attention_method=attention_method, num_layers=num_layers, bidirectional=bidirectional).to(self.device) if self.embed_method == 'rotatE': self.score_func = self.rotatE self.KG_embed = RotatE(ent_tot=43234, rel_tot=18, dim=256, margin=6.0, epsilon=2.0) elif self.embed_method == 'complEx': self.score_func = self.complEx self.KG_embed = ComplEx(ent_tot=43234, rel_tot=18, dim=200) elif self.embed_method == 'DistMult': self.score_func = self.DistMult self.KG_embed = DistMult(ent_tot=43234, rel_tot=18, dim=200) elif self.embed_method == 'TransE': self.score_func = self.TransE self.KG_embed = TransE(ent_tot=43234, rel_tot=18, dim=200, p_norm=1, norm_flag=True) else: raise Exception('embed method not specified!') self.embed_model_path = embed_model_path self.KG_embed.load_checkpoint(self.embed_model_path) self.KG_embed.to(self.device) for param in self.KG_embed.parameters(): param.requires_grad = False logger.info('loading pretrained KG embedding from {}'.format( self.embed_model_path)) if self.embed_method == 'rotatE': self.cluster = KMeans(n_clusters=n_clusters) self.cluster2ent = [[] for _ in range(n_clusters)] for idx, label in enumerate( self.cluster.fit_predict( self.KG_embed.ent_embeddings.weight.cpu())): self.cluster2ent[label].append(idx) self.candidate_generator = CandidateGenerator( './MetaQA/KGE_data/train2id.txt')
not_found = 0 for i in range(max_id): entity = id2entity[i] word = entity2name[entity] try: weights_matrix[i] = glove[word] except KeyError: weights_matrix[i] = glove['unk'] not_found += 1 # define the model transe = TransE( ent_tot=train_dataloader.get_ent_tot(), rel_tot=train_dataloader.get_rel_tot(), ent_weight=weights_matrix, # rel_weight = cur_rel_weight, dim=200, p_norm=1, norm_flag=True) # define the loss function model = NegativeSampling(model=transe, loss=MarginLoss(margin=5.0), batch_size=train_dataloader.get_batch_size()) # train the model trainer = Trainer(model=model, data_loader=train_dataloader, train_times=1000, alpha=1.0, use_gpu=True)
if result_code == 1: result = "FAKE NEWS" break else: result_true_count += 1 if result is None: if result_true_count >= len(triples) / 2: result = "TRUE NEWS" else: result = "I'M NOT SURE" return result transe = TransE( ent_tot = len(entity_map.keys()), rel_tot = len(relation_map.keys()), dim = 1024, p_norm = 1, norm_flag = False, margin = 6.0 ) transe.load_checkpoint('./checkpoint/transe_fn.ckpt') tester = Tester(model = transe, use_gpu = False) number_of_test_example = 100 db = Mongo().get_client() print("Predicting random entity and relation ...") result_number = 0 news_list = db['covid_news_data'].find({ "status": 2 }) if news_list:
from openke.data import TrainDataLoader, TestDataLoader import pickle import pathlib # # dataloader for training train_dataloader = TrainDataLoader(in_path="./dbpedia50_openKE/kb2E/", nbatches=100, threads=8, bern_flag=1) # dataloader for test test_dataloader = TestDataLoader("./dbpedia50_openKE/kb2E/", "link") # define the model transe = TransE(ent_tot=train_dataloader.get_ent_tot(), rel_tot=train_dataloader.get_rel_tot(), dim=300) # define the loss function model = NegativeSampling(model=transe, loss=MarginLoss(), batch_size=train_dataloader.get_batch_size()) # train the model trainer = Trainer(model=model, data_loader=train_dataloader, train_times=1000, alpha=0.01, use_gpu=True, opt_method='adagrad') trainer.run()
train_dataloader = TrainDataLoader(in_path=data_dir, nbatches=nbatches, threads=8, sampling_mode="cross", bern_flag=1, filter_flag=1, neg_ent=negative_samples, neg_rel=0) # dataloader for test test_dataloader = TestDataLoader(data_dir, "triple") # define the model transe = TransE(ent_tot=train_dataloader.get_ent_tot(), rel_tot=train_dataloader.get_rel_tot(), dim=embed_dim, p_norm=2, norm_flag=True) # define the loss function model = NegativeSampling(model=transe, loss=MarginLoss(margin=margin), batch_size=train_dataloader.get_batch_size()) # train the model trainer = Trainer(model = model, data_loader = train_dataloader, opt_method = "adam", train_times = train_times, \ alpha = alpha, use_gpu = True, checkpoint_dir=ckpt_path, save_steps=100) tester = Tester(model=transe, data_loader=test_dataloader, use_gpu=True) trainer.run(tester, test_every=100) print("Saving model to {0}...".format(ckpt_path)) transe.save_checkpoint(ckpt_path)