Example #1
0
def train():
    # dataloader for training
    train_dataloader = TrainDataLoader(in_path='./data/kg/',
                                       nbatches=100,
                                       threads=8,
                                       sampling_mode="normal",
                                       bern_flag=1,
                                       filter_flag=1,
                                       neg_ent=25,
                                       neg_rel=0)

    # define the model
    transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                    rel_tot=train_dataloader.get_rel_tot(),
                    dim=Config.entity_embedding_dim,
                    p_norm=1,
                    norm_flag=True)

    # define the loss function
    model = NegativeSampling(model=transe,
                             loss=MarginLoss(margin=5.0),
                             batch_size=train_dataloader.get_batch_size())

    # train the model
    trainer = Trainer(model=model,
                      data_loader=train_dataloader,
                      train_times=1000,
                      alpha=1.0,
                      use_gpu=True)
    trainer.run()
    transe.save_checkpoint('./data/kg/transe.ckpt')
Example #2
0
def generate():
    # dataloader for training
    train_dataloader = TrainDataLoader(in_path='./data/kg/',
                                       nbatches=100,
                                       threads=8,
                                       sampling_mode="normal",
                                       bern_flag=1,
                                       filter_flag=1,
                                       neg_ent=25,
                                       neg_rel=0)

    # define the model
    transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                    rel_tot=train_dataloader.get_rel_tot(),
                    dim=Config.entity_embedding_dim,
                    p_norm=1,
                    norm_flag=True)

    transe.load_checkpoint('./data/kg/transe.ckpt')
    entity_embedding = transe.get_parameters()['ent_embeddings.weight']
    entity_embedding[0] = 0
    np.save('./data/kg/entity.npy', entity_embedding)
    context_embedding = np.empty_like(entity_embedding)
    context_embedding[0] = 0
    relation = pd.read_table('./data/sub_kg/triple2id.txt',
                             header=None)[[0, 1]]
    entity = pd.read_table('./data/sub_kg/entity2name.txt',
                           header=None)[[0]].to_numpy().flatten()

    for e in entity:
        df = pd.concat(
            [relation[relation[0] == e], relation[relation[1] == e]])
        context = list(set(np.append(df.to_numpy().flatten(), e)))
        context_embedding[e] = np.mean(entity_embedding[context, :], axis=0)

    np.save('./data/kg/context.npy', context_embedding)
Example #3
0
train_dataloader = TrainDataLoader(in_path="./benchmarks/LUMB/",
                                   nbatches=100,
                                   threads=8,
                                   sampling_mode="normal",
                                   bern_flag=1,
                                   filter_flag=1,
                                   neg_ent=25,
                                   neg_rel=0)

# dataloader for test
test_dataloader = TestDataLoader("./benchmarks/LUMB/", "link")

# define the model
transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                rel_tot=train_dataloader.get_rel_tot(),
                dim=200,
                p_norm=1,
                norm_flag=True)

# define the loss function
model = NegativeSampling(model=transe,
                         loss=MarginLoss(margin=5.0),
                         batch_size=train_dataloader.get_batch_size())

# train the model
trainer = Trainer(model=model,
                  data_loader=train_dataloader,
                  train_times=100,
                  alpha=1.0,
                  use_gpu=False)
trainer.run()
train_dataloader = TrainDataLoader(in_path="./benchmarks/WN18RR/",
                                   batch_size=2000,
                                   threads=8,
                                   sampling_mode="cross",
                                   bern_flag=0,
                                   filter_flag=1,
                                   neg_ent=64,
                                   neg_rel=0)

# dataloader for test
test_dataloader = TestDataLoader("./benchmarks/WN18RR/", "link")

# define the model
transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                rel_tot=train_dataloader.get_rel_tot(),
                dim=1024,
                p_norm=1,
                norm_flag=False,
                margin=6.0)

# define the loss function
model = NegativeSampling(model=transe,
                         loss=SigmoidLoss(adv_temperature=1),
                         batch_size=train_dataloader.get_batch_size(),
                         regul_rate=0.0)

# train the model
trainer = Trainer(model=model,
                  data_loader=train_dataloader,
                  train_times=3000,
                  alpha=2e-5,
                  use_gpu=False,
Example #5
0
                                   filter_flag=1,
                                   neg_ent=1,
                                   neg_rel=0)

# dataloader for test
#test_dataloader = TestDataLoader("../openke_data", "link")

pretrain_init = {
    'entity': '../concept_glove.max.npy',
    'relation': '../relation_glove.max.npy'
}
# define the model
transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                rel_tot=train_dataloader.get_rel_tot(),
                dim=100,
                p_norm=1,
                margin=1.0,
                norm_flag=True,
                init='pretrain',
                init_weights=pretrain_init)

# define the loss function
model = NegativeSampling(model=transe,
                         loss=SigmoidLoss(adv_temperature=1),
                         batch_size=train_dataloader.get_batch_size())

# train the model
checkpoint_dir = Path('./checkpoint/')
checkpoint_dir.mkdir(exist_ok=True, parents=True)
alpha = 0.001
trainer = Trainer(model=model,
                  data_loader=train_dataloader,
    TASK_REV_MEDIUMHAND,
    TASK_LABELS,
)
import metrics
from utils import Task, openke_predict, get_entity_relationship_dicts

parser = argparse.ArgumentParser()
parser.add_argument("--model", default='transe')
args = parser.parse_args()

ent_list, rel_list = get_entity_relationship_dicts()

if args.model == 'transe':
    model = TransE(ent_tot=len(ent_list),
                   rel_tot=len(rel_list),
                   dim=200,
                   p_norm=1,
                   norm_flag=True)
elif args.model == 'transd':
    model = TransD(ent_tot=len(ent_list),
                   rel_tot=len(rel_list),
                   dim_e=200,
                   dim_r=200,
                   p_norm=1,
                   norm_flag=True)
elif args.model == 'rescal':
    model = RESCAL(ent_tot=len(ent_list), rel_tot=len(rel_list), dim=50)
elif args.model == 'distmult':
    model = DistMult(ent_tot=len(ent_list), rel_tot=len(rel_list), dim=200)
elif args.model == 'complex':
    model = ComplEx(ent_tot=len(ent_list), rel_tot=len(rel_list), dim=200)
Example #7
0
 def __init__(self,
              embed_model_path,
              bert_path,
              bert_name,
              n_clusters,
              embed_method='rotatE',
              fine_tune=True,
              attention=True,
              use_lstm=False,
              use_dnn=True,
              attention_method='mine',
              num_layers=2,
              bidirectional=False):
     super(QuestionAnswerModel, self).__init__()
     self.embed_method = embed_method
     self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
     logger.info('using device: {}'.format(self.device))
     self.relation_predictor = RelationPredictor(
         bert_path=bert_path,
         bert_name=bert_name,
         fine_tune=fine_tune,
         attention=attention,
         use_lstm=use_lstm,
         use_dnn=use_dnn,
         attention_method=attention_method,
         num_layers=num_layers,
         bidirectional=bidirectional).to(self.device)
     if self.embed_method == 'rotatE':
         self.score_func = self.rotatE
         self.KG_embed = RotatE(ent_tot=43234,
                                rel_tot=18,
                                dim=256,
                                margin=6.0,
                                epsilon=2.0)
     elif self.embed_method == 'complEx':
         self.score_func = self.complEx
         self.KG_embed = ComplEx(ent_tot=43234, rel_tot=18, dim=200)
     elif self.embed_method == 'DistMult':
         self.score_func = self.DistMult
         self.KG_embed = DistMult(ent_tot=43234, rel_tot=18, dim=200)
     elif self.embed_method == 'TransE':
         self.score_func = self.TransE
         self.KG_embed = TransE(ent_tot=43234,
                                rel_tot=18,
                                dim=200,
                                p_norm=1,
                                norm_flag=True)
     else:
         raise Exception('embed method not specified!')
     self.embed_model_path = embed_model_path
     self.KG_embed.load_checkpoint(self.embed_model_path)
     self.KG_embed.to(self.device)
     for param in self.KG_embed.parameters():
         param.requires_grad = False
     logger.info('loading pretrained KG embedding from {}'.format(
         self.embed_model_path))
     if self.embed_method == 'rotatE':
         self.cluster = KMeans(n_clusters=n_clusters)
         self.cluster2ent = [[] for _ in range(n_clusters)]
         for idx, label in enumerate(
                 self.cluster.fit_predict(
                     self.KG_embed.ent_embeddings.weight.cpu())):
             self.cluster2ent[label].append(idx)
     self.candidate_generator = CandidateGenerator(
         './MetaQA/KGE_data/train2id.txt')
Example #8
0
not_found = 0

for i in range(max_id):
    entity = id2entity[i]
    word = entity2name[entity]
    try:
        weights_matrix[i] = glove[word]
    except KeyError:
        weights_matrix[i] = glove['unk']
        not_found += 1

# define the model
transe = TransE(
    ent_tot=train_dataloader.get_ent_tot(),
    rel_tot=train_dataloader.get_rel_tot(),
    ent_weight=weights_matrix,
    # rel_weight = cur_rel_weight,
    dim=200,
    p_norm=1,
    norm_flag=True)

# define the loss function
model = NegativeSampling(model=transe,
                         loss=MarginLoss(margin=5.0),
                         batch_size=train_dataloader.get_batch_size())

# train the model
trainer = Trainer(model=model,
                  data_loader=train_dataloader,
                  train_times=1000,
                  alpha=1.0,
                  use_gpu=True)
Example #9
0
			if result_code == 1:
				result = "FAKE NEWS"
				break
			else:
				result_true_count += 1
	if result is None:
		if result_true_count >= len(triples) / 2:
			result = "TRUE NEWS"
		else:
			result = "I'M NOT SURE"
	return result

transe = TransE(
	ent_tot = len(entity_map.keys()),
	rel_tot = len(relation_map.keys()),
	dim = 1024,
	p_norm = 1,
	norm_flag = False,
	margin = 6.0
)
transe.load_checkpoint('./checkpoint/transe_fn.ckpt')
tester = Tester(model = transe, use_gpu = False)

number_of_test_example = 100
db = Mongo().get_client()

print("Predicting random entity and relation ...")
result_number = 0
news_list = db['covid_news_data'].find({
    "status": 2
})
if news_list:
from openke.data import TrainDataLoader, TestDataLoader
import pickle
import pathlib

# # dataloader for training
train_dataloader = TrainDataLoader(in_path="./dbpedia50_openKE/kb2E/",
                                   nbatches=100,
                                   threads=8,
                                   bern_flag=1)

# dataloader for test
test_dataloader = TestDataLoader("./dbpedia50_openKE/kb2E/", "link")

# define the model
transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                rel_tot=train_dataloader.get_rel_tot(),
                dim=300)

# define the loss function
model = NegativeSampling(model=transe,
                         loss=MarginLoss(),
                         batch_size=train_dataloader.get_batch_size())

# train the model
trainer = Trainer(model=model,
                  data_loader=train_dataloader,
                  train_times=1000,
                  alpha=0.01,
                  use_gpu=True,
                  opt_method='adagrad')
trainer.run()
Example #11
0
train_dataloader = TrainDataLoader(in_path=data_dir,
                                   nbatches=nbatches,
                                   threads=8,
                                   sampling_mode="cross",
                                   bern_flag=1,
                                   filter_flag=1,
                                   neg_ent=negative_samples,
                                   neg_rel=0)

# dataloader for test
test_dataloader = TestDataLoader(data_dir, "triple")

# define the model
transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                rel_tot=train_dataloader.get_rel_tot(),
                dim=embed_dim,
                p_norm=2,
                norm_flag=True)

# define the loss function
model = NegativeSampling(model=transe,
                         loss=MarginLoss(margin=margin),
                         batch_size=train_dataloader.get_batch_size())

# train the model
trainer = Trainer(model = model, data_loader = train_dataloader, opt_method = "adam", train_times = train_times, \
 alpha = alpha, use_gpu = True, checkpoint_dir=ckpt_path, save_steps=100)
tester = Tester(model=transe, data_loader=test_dataloader, use_gpu=True)
trainer.run(tester, test_every=100)
print("Saving model to {0}...".format(ckpt_path))
transe.save_checkpoint(ckpt_path)