Esempio n. 1
0
def train():
    # dataloader for training
    train_dataloader = TrainDataLoader(in_path='./data/kg/',
                                       nbatches=100,
                                       threads=8,
                                       sampling_mode="normal",
                                       bern_flag=1,
                                       filter_flag=1,
                                       neg_ent=25,
                                       neg_rel=0)

    # define the model
    transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                    rel_tot=train_dataloader.get_rel_tot(),
                    dim=Config.entity_embedding_dim,
                    p_norm=1,
                    norm_flag=True)

    # define the loss function
    model = NegativeSampling(model=transe,
                             loss=MarginLoss(margin=5.0),
                             batch_size=train_dataloader.get_batch_size())

    # train the model
    trainer = Trainer(model=model,
                      data_loader=train_dataloader,
                      train_times=1000,
                      alpha=1.0,
                      use_gpu=True)
    trainer.run()
    transe.save_checkpoint('./data/kg/transe.ckpt')
Esempio n. 2
0
def run_experiment(negative_rate, in_dim, out_dim, alpha):
    train_dataloader = TrainDataLoader(in_path="./benchmarks/FB15K237/",
                                       batch_size=10000,
                                       threads=1,
                                       sampling_mode="cross",
                                       bern_flag=0,
                                       filter_flag=1,
                                       neg_ent=negative_rate,
                                       neg_rel=0)

    facts = TrainDataLoader(in_path="./benchmarks/FB15K237/",
                            batch_size=train_dataloader.get_triple_tot(),
                            threads=1,
                            sampling_mode="normal",
                            bern_flag=0,
                            filter_flag=1,
                            neg_ent=0,
                            neg_rel=0)

    h, t, r, _, _ = [f for f in facts][0].values()
    h = torch.Tensor(h).to(torch.long)
    t = torch.Tensor(t).to(torch.long)
    r = torch.Tensor(r).to(torch.long)

    facts = torch.stack((h, r, t)).cuda().t()

    test_dataloader = TestDataLoader("./benchmarks/FB15K237/", "link")

    rotatte = RotAtte(
        n_ent=train_dataloader.get_ent_tot(),
        n_rel=train_dataloader.get_rel_tot(),
        in_dim=in_dim,
        out_dim=in_dim,
        facts=facts,
        negative_rate=negative_rate,
    )

    model = NegativeSampling(model=rotatte,
                             loss=SigmoidLoss(adv_temperature=2),
                             batch_size=train_dataloader.get_batch_size(),
                             regul_rate=0.0)

    trainer = Trainer(model=model,
                      data_loader=train_dataloader,
                      train_times=100,
                      alpha=alpha,
                      use_gpu=True,
                      opt_method="adam")

    trainer.run()
    tester = Tester(model=rotatte, data_loader=test_dataloader, use_gpu=True)
    result = tester.run_link_prediction(type_constrain=False)

    MRR, MR, hits10, hits3, hits1 = result

    return MRR
Esempio n. 3
0
def generate():
    # dataloader for training
    train_dataloader = TrainDataLoader(in_path='./data/kg/',
                                       nbatches=100,
                                       threads=8,
                                       sampling_mode="normal",
                                       bern_flag=1,
                                       filter_flag=1,
                                       neg_ent=25,
                                       neg_rel=0)

    # define the model
    transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                    rel_tot=train_dataloader.get_rel_tot(),
                    dim=Config.entity_embedding_dim,
                    p_norm=1,
                    norm_flag=True)

    transe.load_checkpoint('./data/kg/transe.ckpt')
    entity_embedding = transe.get_parameters()['ent_embeddings.weight']
    entity_embedding[0] = 0
    np.save('./data/kg/entity.npy', entity_embedding)
    context_embedding = np.empty_like(entity_embedding)
    context_embedding[0] = 0
    relation = pd.read_table('./data/sub_kg/triple2id.txt',
                             header=None)[[0, 1]]
    entity = pd.read_table('./data/sub_kg/entity2name.txt',
                           header=None)[[0]].to_numpy().flatten()

    for e in entity:
        df = pd.concat(
            [relation[relation[0] == e], relation[relation[1] == e]])
        context = list(set(np.append(df.to_numpy().flatten(), e)))
        context_embedding[e] = np.mean(entity_embedding[context, :], axis=0)

    np.save('./data/kg/context.npy', context_embedding)
import openke
from openke.config import Trainer, Tester
from openke.module.model import TransE
from openke.module.loss import SigmoidLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader
from utils.mongo import Mongo

# dataloader for training
train_dataloader = TrainDataLoader(
	in_path = "./benchmarks/FAKE_NEWS/", 
	# batch_size = 2000,
    batch_size = 1,
	threads = 8,
	sampling_mode = "cross", 
	bern_flag = 0, 
	filter_flag = 1, 
	# neg_ent = 64,
	# neg_rel = 0
)

# dataloader for test
test_dataloader = TestDataLoader("./benchmarks/FAKE_NEWS/", "link")

# define the model
transe = TransE(
	ent_tot = train_dataloader.get_ent_tot(),
	rel_tot = train_dataloader.get_rel_tot(),
	dim = 1024, 
	p_norm = 1,
	norm_flag = False,
Esempio n. 5
0
parser.add_argument('--mode',
                    '-e',
                    type=str,
                    default='train',
                    help='train or dev')
args = parser.parse_args()

phase = args.mode
base_path = './benchmarks/transe_ske_pso' + '/' + phase + '/'

# dataloader for training
train_dataloader = TrainDataLoader(
    #in_path = "./benchmarks/transe_ske/",
    in_path=base_path,
    nbatches=100,
    threads=8,
    sampling_mode="normal",
    bern_flag=1,
    filter_flag=1,
    neg_ent=25,
    neg_rel=5)

# define the model
transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                rel_tot=train_dataloader.get_rel_tot(),
                dim=200,
                p_norm=2,
                norm_flag=True)

# define the loss function
model = NegativeSampling(model=transe,
                         loss=MarginLoss(margin=5.0),
Esempio n. 6
0
from openke.config import Trainer, Tester
from openke.module.model import TransD
from openke.module.loss import MarginLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader

# dataloader for training
train_dataloader = TrainDataLoader(
    in_path="./benchmarks/FB15K237/",
    nbatches=100,
    threads=8,
    sampling_mode="normal",
    bern_flag=1,
    filter_flag=1,
    neg_ent=25,
    neg_rel=0)

# dataloader for test
test_dataloader = TestDataLoader("./benchmarks/FB15K237/", "link")

# define the model
transd = TransD(ent_tot=train_dataloader.get_ent_tot(), rel_tot=train_dataloader.get_rel_tot(), dim_e=1, dim_r=1, p_norm=1, norm_flag=True)

# define the loss function
model = NegativeSampling(model=transd,loss=MarginLoss(margin=4.0),batch_size=train_dataloader.get_batch_size())

# train the model
trainer = Trainer(model=model, data_loader=train_dataloader, train_times=100, alpha=1.0, use_gpu=False)
trainer.run()
transd.save_checkpoint('./checkpoint/transd_100.ckpt')
Esempio n. 7
0
#import openke
from openke.config import Trainer, Tester
from openke.module.model import TransE, TransR
from openke.module.loss import MarginLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader

# dataloader for training
train_dataloader = TrainDataLoader(in_path="./benchmarks/LUMB/",
                                   nbatches=30,
                                   threads=4,
                                   sampling_mode="normal",
                                   bern_flag=1,
                                   filter_flag=1,
                                   neg_ent=25,
                                   neg_rel=0)

# dataloader for test
test_dataloader = TestDataLoader(in_path="./benchmarks/LUMB/",
                                 sampling_mode='link')

# define the model
transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                rel_tot=train_dataloader.get_rel_tot(),
                dim=20,
                p_norm=1,
                norm_flag=True)

model_e = NegativeSampling(model=transe,
                           loss=MarginLoss(margin=5.0),
                           batch_size=train_dataloader.get_batch_size())
import openke
from openke.config import Trainer, Tester
from openke.module.model import TransE
from openke.module.loss import MarginLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader
import pickle
import pathlib

# # dataloader for training
train_dataloader = TrainDataLoader(in_path="./dbpedia50_openKE/kb2E/",
                                   nbatches=100,
                                   threads=8,
                                   bern_flag=1)

# dataloader for test
test_dataloader = TestDataLoader("./dbpedia50_openKE/kb2E/", "link")

# define the model
transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                rel_tot=train_dataloader.get_rel_tot(),
                dim=300)

# define the loss function
model = NegativeSampling(model=transe,
                         loss=MarginLoss(),
                         batch_size=train_dataloader.get_batch_size())

# train the model
trainer = Trainer(model=model,
                  data_loader=train_dataloader,
Esempio n. 9
0
import openke
from openke.config import Trainer, Tester
from openke.module.model import RotatE
from openke.module.loss import SigmoidLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader

# dataloader for training
train_dataloader = TrainDataLoader(
    in_path = "./data/MetaQA/",
    batch_size = 2000,
    threads = 8,
    sampling_mode = "cross",
    bern_flag = 0,
    filter_flag = 1,
    neg_ent = 64,
    neg_rel = 0
)

# dataloader for test
test_dataloader = TestDataLoader("./data/MetaQA/", "link")

# define the model
rotate = RotatE(
    ent_tot = train_dataloader.get_ent_tot(),
    rel_tot = train_dataloader.get_rel_tot(),
    dim = 1024,
    margin = 6.0,
    epsilon = 2.0,
)
Esempio n. 10
0
import openke
from openke.config import Trainer, Tester
from openke.module.model import ComplEx
from openke.module.loss import SoftplusLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader

# dataloader for training
train_dataloader = TrainDataLoader(in_path="./data/MetaQA/",
                                   nbatches=100,
                                   threads=8,
                                   sampling_mode="normal",
                                   bern_flag=1,
                                   filter_flag=1,
                                   neg_ent=25,
                                   neg_rel=0)

# dataloader for test
test_dataloader = TestDataLoader("./data/MetaQA/", "link")

# define the model
complEx = ComplEx(ent_tot=train_dataloader.get_ent_tot(),
                  rel_tot=train_dataloader.get_rel_tot(),
                  dim=200)

# define the loss function
model = NegativeSampling(model=complEx,
                         loss=SoftplusLoss(),
                         batch_size=train_dataloader.get_batch_size(),
                         regul_rate=1.0)
                        "--checkpoint",
                        default=os.path.join(os.path.curdir, "checkpoint"),
                        help="Path to model checkpoint")
    parser.add_argument("-e",
                        "--embedding",
                        default=os.path.join(os.path.curdir, "kg_embed"),
                        help="Path to saving embeddings")

    args = parser.parse_args()
    bench_path, ckpt_path, emb_path = args.benchmark, args.checkpoint, args.embedding

    # dataloader for training
    train_dataloader = TrainDataLoader(in_path=bench_path,
                                       nbatches=100,
                                       threads=16,
                                       sampling_mode="normal",
                                       bern_flag=1,
                                       filter_flag=1,
                                       neg_ent=25,
                                       neg_rel=0)

    # define the model
    transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                    rel_tot=train_dataloader.get_rel_tot(),
                    dim=100,
                    p_norm=1,
                    norm_flag=True)

    transe.load_checkpoint(os.path.join(ckpt_path, "transe.ckpt"))
    params = transe.get_parameters()
    np.savetxt(os.path.join(emb_path, "entity2vec.vec"),
               params["ent_embeddings.weight"])
Esempio n. 12
0
from openke.config import Trainer, Tester
from openke.module.model import TransE
from openke.module.loss import MarginLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader

# dataloader for training
train_dataloader = TrainDataLoader(
    in_path='./benchmarks/FB15K237/',
    nbatches=100,
    threads=8,
    sampling_mode='normal',
    bern_flag=1,
    filter_flag=1,
    neg_ent=25,
    neg_rel=0)

# dataloader for test
test_dataloader = TestDataLoader('./benchmarks/FB15K237/', 'link')

# define the model
transe = TransE(
    ent_tot=train_dataloader.get_ent_tot(),
    rel_tot=train_dataloader.get_rel_tot(),
    dim=200, p_norm=1, norm_flag=True)

# define the loss function
model = NegativeSampling(
    model=transe,
    loss=MarginLoss(margin=5.0),
    batch_size=train_dataloader.get_batch_size())
Esempio n. 13
0
from openke.config import Trainer, Tester
from openke.module.model import TransE
from openke.module.loss import SigmoidLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader

# dataloader for training
train_dataloader = TrainDataLoader(
    in_path='./benchmarks/WN18RR/',
    batch_size=2000,
    threads=8,
    sampling_mode='cross',
    bern_flag=0,
    filter_flag=1,
    neg_ent=64,
    neg_rel=0)

# dataloader for test
test_dataloader = TestDataLoader('./benchmarks/WN18RR/', 'link')

# define the model
transe = TransE(
    ent_tot=train_dataloader.get_ent_tot(),
    rel_tot=train_dataloader.get_rel_tot(),
    dim=1024,
    p_norm=1,
    norm_flag=False, margin=6.0)

# define the loss function
model = NegativeSampling(
    model=transe,
Esempio n. 14
0
import openke
from openke.config import Trainer, Tester
from openke.module.model import TransE
from openke.module.loss import MarginLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader

# dataloader for training
train_dataloader = TrainDataLoader(in_path="./benchmarks/CommonGen/",
                                   nbatches=500,
                                   threads=8,
                                   sampling_mode="normal",
                                   bern_flag=0,
                                   filter_flag=1,
                                   neg_ent=64,
                                   neg_rel=0)

# dataloader for test
test_dataloader = TestDataLoader("./benchmarks/CommonGen/", "link")

# define the model
transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                rel_tot=train_dataloader.get_rel_tot(),
                dim=1024,
                p_norm=1,
                norm_flag=True)

# define the loss function
model = NegativeSampling(model=transe,
                         loss=MarginLoss(margin=0.0),
                         batch_size=train_dataloader.get_batch_size())
Esempio n. 15
0
import openke
from openke.config import Trainer, Tester
from openke.module.model import TransE
from openke.module.loss import MarginLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader

# dataloader for training
train_dataloader = TrainDataLoader(
	in_path = "../OpenKEfiles/DBpedia/Restricted/",
	nbatches = 1000,
	threads = 8,
	sampling_mode = "normal",
	bern_flag = 1,
	filter_flag = 1,
	neg_ent = 25,
	neg_rel = 0)

# dataloader for test
test_dataloader = TestDataLoader("../OpenKEfiles/DBpedia/Restricted/", "link", type_constrain =False)

# define the model
transe = TransE(
	ent_tot = train_dataloader.get_ent_tot(),
	rel_tot = train_dataloader.get_rel_tot(),
	dim = 200,
	p_norm = 1,
	norm_flag = True)

# test the model
transe.load_checkpoint('../checkpoint/dbpedia/restricted/transe.ckpt')
Esempio n. 16
0
import openke
from openke.config import Trainer, Tester
from openke.module.model import ComplEx
from openke.module.loss import SoftplusLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader
import pickle
import pathlib

# # dataloader for training
train_dataloader = TrainDataLoader(in_path="./dbpedia50_openKE/kb2E/",
                                   nbatches=100,
                                   threads=12,
                                   sampling_mode="normal",
                                   bern_flag=1,
                                   neg_ent=25,
                                   neg_rel=0)

# dataloader for test
test_dataloader = TestDataLoader("./dbpedia50_openKE/kb2E/", "link")

# define the model
complEx = ComplEx(ent_tot=train_dataloader.get_ent_tot(),
                  rel_tot=train_dataloader.get_rel_tot(),
                  dim=300)

# define the loss function
model = NegativeSampling(model=complEx,
                         loss=SoftplusLoss(),
                         batch_size=train_dataloader.get_batch_size(),
                         regul_rate=1.0)
Esempio n. 17
0
ckpt_path = os.path.join(ckpt_dir, ckpt_fname)

# hyper-parameter tunings
embed_dim = 100 # 100, 200, 300
margin = 6 # 1, 3, 6, 10, 15, 20
negative_samples = 16 # 64, 128
batch_size = 4092 # 512, 1024, 2048
alpha = 0.001 # 0.001,0.01
train_times = 400

# dataloader for training
train_dataloader = TrainDataLoader(
    in_path = data_dir, 
    batch_size = batch_size,
    threads = 2,
    sampling_mode = "cross", 
    bern_flag = 0, 
    filter_flag = 1, 
    neg_ent = negative_samples,
    neg_rel = 0
)

# dataloader for test
test_dataloader = TestDataLoader(data_dir, "triple")
# test_dataloader = TestDataLoader(data_dir, "link")


# define the model
rotate = RotatE(
    ent_tot = train_dataloader.get_ent_tot(),
    rel_tot = train_dataloader.get_rel_tot(),
    dim = embed_dim,
Esempio n. 18
0
import timeit

from data import (
    TASK_REV_MEDIUMHAND,
    TASK_LABELS,
)
import metrics

if not os.path.exists('checkpoint'):
    os.makedirs('checkpoint')

# dataloader for training
train_dataloader = TrainDataLoader(in_path="./data/kge/openke/",
                                   nbatches=100,
                                   threads=8,
                                   sampling_mode="normal",
                                   bern_flag=1,
                                   filter_flag=1,
                                   neg_ent=25,
                                   neg_rel=0)

# dataloader for test
test_dataloader = TestDataLoader("./data/kge/openke/", "link")

# define the model
distmult = DistMult(ent_tot=train_dataloader.get_ent_tot(),
                    rel_tot=train_dataloader.get_rel_tot(),
                    dim=200)

# define the loss function
model = NegativeSampling(model=distmult,
                         loss=SoftplusLoss(),
import openke
from openke.config import Trainer, Tester
from openke.module.model import TransE
from openke.module.loss import SigmoidLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader

# dataloader for training
train_dataloader = TrainDataLoader(in_path="./benchmarks/WN18RR/",
                                   batch_size=2000,
                                   threads=8,
                                   sampling_mode="cross",
                                   bern_flag=0,
                                   filter_flag=1,
                                   neg_ent=64,
                                   neg_rel=0)

# dataloader for test
test_dataloader = TestDataLoader("./benchmarks/WN18RR/", "link")

# define the model
transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                rel_tot=train_dataloader.get_rel_tot(),
                dim=1024,
                p_norm=1,
                norm_flag=False,
                margin=6.0)

# define the loss function
model = NegativeSampling(model=transe,
                         loss=SigmoidLoss(adv_temperature=1),
Esempio n. 20
0
import openke, torch
from openke.config import Trainer, Tester
from openke.module.model import TransE
from openke.module.loss import MarginLoss, SigmoidLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader
from pathlib import Path

# dataloader for training
train_dataloader = TrainDataLoader(in_path="../openke_data/",
                                   nbatches=512,
                                   threads=24,
                                   sampling_mode="normal",
                                   bern_flag=0,
                                   filter_flag=1,
                                   neg_ent=1,
                                   neg_rel=0)

# dataloader for test
#test_dataloader = TestDataLoader("../openke_data", "link")

pretrain_init = {
    'entity': '../concept_glove.max.npy',
    'relation': '../relation_glove.max.npy'
}
# define the model
transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                rel_tot=train_dataloader.get_rel_tot(),
                dim=100,
                p_norm=1,
                margin=1.0,
Esempio n. 21
0
import openke
from openke.config import Trainer, Tester
from openke.module.model import HolE
from openke.module.loss import SoftplusLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader

# dataloader for training
train_dataloader = TrainDataLoader(
	in_path = "./benchmarks/WN18RR/", 
	nbatches = 100,
	threads = 8, 
	sampling_mode = "normal", 
	bern_flag = 1, 
	filter_flag = 1,
	neg_ent = 25,
	neg_rel = 0
)

# dataloader for test
test_dataloader = TestDataLoader("./benchmarks/WN18RR/", "link")

# define the model
hole = HolE(
	ent_tot = train_dataloader.get_ent_tot(),
	rel_tot = train_dataloader.get_rel_tot(),
	dim = 100
)

# define the loss function
model = NegativeSampling(
Esempio n. 22
0
ckpt_fname = "TransE{0}.ckpt".format(config_id)
ckpt_path = os.path.join(ckpt_dir, ckpt_fname)

# hyper-parameter tunings
embed_dim = 100
margin = 0.4
negative_samples = 4
nbatches = 100
alpha = 0.001
train_times = 800

# dataloader for training
train_dataloader = TrainDataLoader(in_path=data_dir,
                                   nbatches=nbatches,
                                   threads=8,
                                   sampling_mode="cross",
                                   bern_flag=1,
                                   filter_flag=1,
                                   neg_ent=negative_samples,
                                   neg_rel=0)

# dataloader for test
test_dataloader = TestDataLoader(data_dir, "triple")

# define the model
transe = TransE(ent_tot=train_dataloader.get_ent_tot(),
                rel_tot=train_dataloader.get_rel_tot(),
                dim=embed_dim,
                p_norm=2,
                norm_flag=True)

# define the loss function