Example #1
0
    def validate(self, val_kg):
        losses = []
        try:
            dataloader = DataLoader(val_kg,
                                    batch_size=self.b_size,
                                    use_cuda='all')
        except AssertionError:
            dataloader = DataLoader(val_kg, batch_size=self.b_size)

        for batch in dataloader:
            h, t, r = batch
            n_h, n_t = self.sampler.corrupt_batch(h, t, r)
            pos, neg = self.model(h, t, n_h, n_t, r)
            loss = self.loss_fn(pos, neg)
            losses.append(loss.item())
        return np.mean(losses)
Example #2
0
    def __init__(self, kg, model_type, ts, **kwargs):
        self.kg = kg
        self.truth_share = ts
        self.emb_dim = kwargs.pop('emb_dim', 250)
        self.model_type = model_type
        self.model = getattr(torchkge.models.bilinear, self.model_type +
                             'Model')(emb_dim=self.emb_dim,
                                      n_entities=self.kg.n_ent,
                                      n_relations=self.kg.n_rel)
        all_is = [
            int(d.split('_')[1]) for d in os.listdir(wot.models_path)
            if os.path.isdir(join(wot.models_path, d))
            and f'{self.model_type}_' in d
        ]
        i = [x for x in range(1, len(all_is) + 2) if x not in all_is][0]
        self.model_path = join(models_path,
                               f'{self.model_type}_{str(i+1).zfill(2)}')
        os.makedirs(self.model_path, exist_ok=True)
        self.logfile = join(self.model_path, 'log.txt')

        ## Hyperparameters
        self.lr = kwargs.pop('lr', 0.0004)
        self.n_epochs = kwargs.pop('n_epochs', 100)
        self.b_size = kwargs.pop('b_size', 32)
        self.logline(
            tabulate([(k, v) for k, v in vars(self).items()],
                     headers=['variable', 'value']))

        try:
            self.dataloader = DataLoader(self.kg,
                                         batch_size=self.b_size,
                                         use_cuda='all')
        except AssertionError:
            self.dataloader = DataLoader(self.kg, batch_size=self.b_size)

        ## Logger
        self.epochs = 0
        self.tr_losses = []
        self.best_epoch = -1
        self.val_losses = []
        self.val_epochs = []
def main():
    # Define some hyper-parameters for training
    global optimizer
    benchmarks = 'GeoDBpedia21'
    model_name = 'TransR_GDR'
    opt_method = 'Adam'  # "Adagrad" "Adadelta" "Adam" "SGD"
    GDR = True  # 是否引入坐标信息

    emb_dim = 100  # TransE model
    ent_dim = emb_dim
    rel_dim = emb_dim
    lr = 0.001
    margin = 0.5

    n_epochs = 20000
    train_b_size = 256  # 训练时batch size
    eval_b_size = 64  # 测评valid test 时batch size

    #     save_time_freq = 5
    #     require_improvement = save_time_freq*5

    validation_freq = 10  # 多少轮进行在验证集进行一次测试 同时保存最佳模型
    require_improvement = validation_freq * 3  # 验证集top_k超过多少epoch没下降,结束训练
    model_save_path = './checkpoint/' + benchmarks + '_' + model_name + '_' + opt_method + '.ckpt'  # 保存最佳hits k (ent)模型
    device = 'cuda:0' if cuda.is_available() else 'cpu'

    # Load dataset
    module = getattr(import_module('torchkge.models'), model_name + 'Model')
    load_data = getattr(import_module('torchkge.utils.datasets'), 'load_' + benchmarks)

    print('Loading data...')
    kg_train, kg_val, kg_test = load_data(GDR=GDR)
    print(f'Train set: {kg_train.n_ent} entities, {kg_train.n_rel} relations, {kg_train.n_facts} triplets.')
    print(f'Valid set: {kg_val.n_facts} triplets, Test set: {kg_test.n_facts} triplets.')

    # Define the model and criterion
    print('Loading model...')
    if 'TransE' in model_name:
        model = module(emb_dim, kg_train.n_ent, kg_train.n_rel, dissimilarity_type='L2')
    else:
        model = module(ent_dim, rel_dim, kg_train.n_ent, kg_train.n_rel)
    criterion = MarginLoss(margin)

    # Move everything to CUDA if available
    if device == 'cuda:0':
        cuda.empty_cache()
        model.to(device)
        criterion.to(device)
        dataloader = DataLoader(kg_train, batch_size=train_b_size, use_cuda='all')
    else:
        dataloader = DataLoader(kg_train, batch_size=train_b_size, use_cuda=None)

    # Define the torch optimizer to be used
    optimizer = optimizer(model, opt_method=opt_method, lr=lr)
    # optimizer = Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    sampler = BernoulliNegativeSampler(kg_train)

    start_epoch = 1
    best_score = float('-inf')
    if os.path.exists(model_save_path):  # 存在则加载模型 并继续训练
        start_epoch, best_score = load_ckpt(model_save_path, model, optimizer)
        print(f'loading ckpt sucessful, start on epoch {start_epoch}...')
    print(model)
    print('lr: {}, margin: {}, dim {}, total epoch: {}, device: {}, batch size: {}, optim: {}, GDR: {}' \
          .format(lr, margin, emb_dim, n_epochs, device, train_b_size, opt_method, GDR))

    print('Training...')
    last_improve = start_epoch  # 记录上次验证集loss下降的epoch数
    start = time.time()
    #     last_improve = start
    #     save_time = start
    for epoch in range(start_epoch, n_epochs + 1):
        #         model.normalize_parameters()

        running_loss = 0.0
        model.train()
        for i, batch in enumerate(dataloader):
            if GDR:
                h, t, r, point = batch[0], batch[1], batch[2], batch[3]
                n_h, n_t = sampler.corrupt_batch(h, t, r)  # 1:1 negative sampling
                n_point = id2point(n_h, n_t, kg_train.id2point)
                optimizer.zero_grad()

                # forward + backward + optimize
                pos, neg = model(h, t, n_h, n_t, r)
                loss = criterion(pos, neg, point, n_point)
            else:
                h, t, r = batch[0], batch[1], batch[2]
                n_h, n_t = sampler.corrupt_batch(h, t, r)
                optimizer.zero_grad()
                pos, neg = model(h, t, n_h, n_t, r)
                loss = criterion(pos, neg)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        model.normalize_parameters()
        #         print('\rEpoch [{:>4}/{:>4}] | mean loss: {:>8.3f}, time: {}'.format(epoch, n_epochs, running_loss / len(dataloader), time_since(start)), end='', flush=True)

        #         # test
        if epoch % validation_freq == 0:
            create_dir_not_exists('./checkpoint')
            model.eval()
            evaluator = LinkPredictionEvaluator(model, kg_val)
            evaluator.evaluate(b_size=eval_b_size, verbose=False)
            _, hit_at_k = evaluator.hit_at_k(10)  # val filter hit_k
            print('Epoch [{:>5}/{:>5}] '.format(epoch, n_epochs), end='')
            if hit_at_k > best_score:
                save_ckpt(model, optimizer, epoch, best_score, model_save_path)
                best_score = hit_at_k
                improve = '*'  # 在有提升的结果后面加上*标注
                last_improve = epoch  # 验证集hit_k增大即认为有提升
            else:
                improve = ''
            msg = '| Train loss: {:>8.3f}, Val Hit@10: {:>5.2%}, Time {} {}'
            print(msg.format(running_loss / len(dataloader), hit_at_k, time_since(start), improve))
        #         model.normalize_parameters()
        if epoch - last_improve > require_improvement:
            # 验证集top_k超过一定epoch没增加,结束训练
            print("\nNo optimization for a long time, auto-stopping...")
            break

    #         # test
    #         if (time.time() - save_time)/60 > save_time_freq:
    #             create_dir_not_exists('./checkpoint')
    #             model.eval()
    #             evaluator = LinkPredictionEvaluator(model, kg_val)
    #             evaluator.evaluate(b_size=eval_b_size, verbose=False)
    #             _, hit_at_k = evaluator.hit_at_k(10)  # val filter hit_k
    #             if hit_at_k > best_score:
    #                 save_ckpt(model, optimizer, epoch, best_score, model_save_path)
    #                 best_score = hit_at_k
    #                 improve = '*'  # 在有提升的结果后面加上*标注
    #                 last_improve = time.time()  # 验证集hit_k增大即认为有提升
    #             else:
    #                 improve = ''
    #             save_time = time.time()
    #             msg = ', Val Hit@10: {:>5.2%} {}'
    #             print(msg.format(hit_at_k, improve))
    #         model.normalize_parameters()
    #         if (time.time() - last_improve)/60 > require_improvement:
    #             # 验证集top_k超过一定epoch没增加,结束训练
    #             print("\nNo optimization for a long time, auto-stopping...")
    #             break

    print('\nTraining done, start evaluate on test data...')
    print('model name: {}, lr: {}, dim {}, device: {}, eval batch size: {}, optim: {}, GDR: {}' \
          .format(model_name, lr, emb_dim, device, eval_b_size, opt_method, GDR))
    # Testing the best checkpoint on test dataset
    load_ckpt(model_save_path, model, optimizer)
    model.eval()
    lp_evaluator = LinkPredictionEvaluator(model, kg_test)
    lp_evaluator.evaluate(eval_b_size, verbose=False)
    lp_evaluator.print_results()
    rp_evaluator = RelationPredictionEvaluator(model, kg_test)
    rp_evaluator.evaluate(eval_b_size, verbose=False)
    rp_evaluator.print_results()
    print(f'Total time cost: {time_since(start)}')
# In[43]:

# Move everything to CUDA if available
if cuda.is_available():
    cuda.empty_cache()
    model.cuda()
    criterion.cuda()

# In[20]:

# Define the torch optimizer to be used
optimizer = Adam(model.parameters(), lr=lr, weight_decay=1e-5)

sampler = BernoulliNegativeSampler(kg_train)
dataloader = DataLoader(kg_train, batch_size=b_size, use_cuda='all')

iterator = tqdm(range(n_epochs), unit='epoch')
for epoch in iterator:
    running_loss = 0.0
    for i, batch in enumerate(dataloader):
        h, t, r = batch[0], batch[1], batch[2]
        n_h, n_t = sampler.corrupt_batch(h, t, r)

        optimizer.zero_grad()

        # forward + backward + optimize
        pos, neg = model(h, t, n_h, n_t, r)
        loss = criterion(pos, neg)
        loss.backward()
        optimizer.step()
Example #5
0
        print('multiple gpus are available')
        if args.gpu is not None:
            model = DataParallel(model, device_ids=args.gpu)
        else:
            model = DataParallel(model)

    checkpoint_manager = CheckpointManager(restore_dir)
    ckpt = checkpoint_manager.load_checkpoint(f'best_{args.model}.tar')
    model.load_state_dict(ckpt['model_state_dict'])
    criterion = MarginLoss(margin)

    model.to(device)
    criterion.to(device)

    sampler = BernoulliNegativeSampler(kg_test)
    test_dl = DataLoader(kg_test, batch_size=args.batch_size)

    model.eval()
    test_loss = 0
    for step, batch in tqdm(enumerate(test_dl),
                            desc='steps',
                            total=len(test_dl)):
        h, t, r = map(lambda elm: elm.to(device), batch)
        n_h, n_t = sampler.corrupt_batch(h, t, r)
        with torch.no_grad():
            pos, neg = model(h, t, n_h, n_t, r)
            loss = criterion(pos, neg)
            test_loss += loss.item()
    test_loss /= (step + 1)

    training_summary = previous_summary['Training Summary']
Example #6
0
            model = DataParallel(model, device_ids=device_ids)
        else:
            model = DataParallel(model)
    model.to(device)
    criterion.to(device)

    writer = SummaryWriter(save_dir / f'runs_{args.model}')
    checkpoint_manager = CheckpointManager(save_dir)
    summary_manager = SummaryManager(save_dir)
    summary_manager.update(experiment_summary)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 weight_decay=1e-5)
    sampler = BernoulliNegativeSampler(kg_train)
    tr_dl = DataLoader(kg_train, batch_size=args.batch_size)
    val_dl = DataLoader(kg_valid, batch_size=args.batch_size)

    best_val_loss = 1e+10
    for epoch in tqdm(range(args.epochs), desc='epochs'):
        tr_loss = 0
        model.train()

        for step, batch in enumerate(tr_dl):
            h, t, r = map(lambda elm: elm.to(device), batch)
            n_h, n_t = sampler.corrupt_batch(h, t, r)

            optimizer.zero_grad()

            pos, neg = model(h, t, n_h, n_t, r)
            loss = criterion(pos, neg)
Example #7
0
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        model.cuda()
        criterion.cuda()

    writer = SummaryWriter(save_dir / f'runs_{args.model}')
    checkpoint_manager = CheckpointManager(save_dir)
    summary_manager = SummaryManager(save_dir)
    summary_manager.update(experiment_summary)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 weight_decay=1e-5)
    sampler = BernoulliNegativeSampler(kg_train)
    tr_dl = DataLoader(kg_train, batch_size=args.batch_size, use_cuda='all')
    val_dl = DataLoader(kg_train, batch_size=args.batch_size, use_cuda='all')

    best_val_loss = 1e+10
    for epoch in tqdm(range(args.epochs), desc='epochs'):

        tr_loss = 0
        model.train()

        for step, batch in enumerate(tr_dl):
            h, t, r = batch[0], batch[1], batch[2]
            n_h, n_t = sampler.corrupt_batch(h, t, r)

            optimizer.zero_grad()

            pos, neg = model(h, t, n_h, n_t, r)
Example #8
0
    def __init__(self, kg, model_type, ts, **kwargs):
        self.kg = kg
        self.truth_share = ts
        self.model_type = model_type
        self.diss_type = kwargs.pop('diss_type', 'L2')
        if self.model_type in ['TransR', 'TransD', 'TorusE']:
            self.ent_emb_dim = kwargs.pop('ent_emb_dim', args.emb_dim)
            self.rel_emb_dim = kwargs.pop('rel_emb_dim', args.emb_dim)
            self.model = getattr(torchkge.models.translation, model_type +
                                 'Model')(ent_emb_dim=self.ent_emb_dim,
                                          rel_emb_dim=self.rel_emb_dim,
                                          n_entities=self.kg.n_ent,
                                          n_relations=self.kg.n_rel)
        else:
            self.emb_dim = kwargs.pop('emb_dim', args.emb_dim)
            if self.model_type is 'TransE':
                self.model = getattr(torchkge.models, f'{model_type}Model')(
                    emb_dim=self.emb_dim,
                    n_entities=kg.n_ent,
                    n_relations=kg.n_rel,
                    dissimilarity_type=self.diss_type)
            else:
                self.model = getattr(torchkge.models, f'{model_type}Model')(
                    emb_dim=self.emb_dim,
                    n_entities=kg.n_ent,
                    n_relations=kg.n_rel)
        self.n_entities = kg.n_ent
        self.n_relations = kg.n_rel
        all_is = [
            int(d.split('_')[1]) for d in os.listdir(wot.models_path)
            if os.path.isdir(join(wot.models_path, d))
            and f'{self.model_type}_' in d
        ]
        i = [x for x in range(1, len(all_is) + 2) if x not in all_is][0]
        self.model_path = join(wot.models_path,
                               f'{self.model_type}_{str(i+1).zfill(2)}')
        os.makedirs(self.model_path, exist_ok=True)
        self.logfile = join(self.model_path, 'log.txt')
        ## Hyperparameters
        self.lr = kwargs.pop('lr', args.lr)
        self.n_epochs = kwargs.pop('n_epochs', 100)
        self.b_size = kwargs.pop('b_size', 32)
        self.logline(
            tabulate([(k, v) for k, v in vars(self).items()],
                     headers=['variable', 'value']))

        # Legacy code
        # super(CustomTransModel, self).__init__(self.emb_dim, kg.n_ent, kg.n_rel,
        #                     dissimilarity_type=self.diss_type)

        try:
            self.dataloader = DataLoader(self.kg,
                                         batch_size=self.b_size,
                                         use_cuda='all')
        except AssertionError:
            self.dataloader = DataLoader(self.kg, batch_size=self.b_size)

        ## Logger
        self.epochs = 0
        self.tr_losses = []
        self.best_epoch = -1
        self.val_losses = []
        self.val_epochs = []