Beispiel #1
0
 def pretrain(self, train_data, corrupter, tester):
     src, rel, dst = train_data
     n_train = len(src)
     n_epoch = self.config.n_epoch
     n_batch = self.config.n_batch
     optimizer = Adam(self.mdl.parameters(), weight_decay=self.weight_decay)
     best_perf = 0
     for epoch in range(n_epoch):
         epoch_loss = 0
         if epoch % self.config.sample_freq == 0:
             rand_idx = t.randperm(n_train)
             src = src[rand_idx]
             rel = rel[rand_idx]
             dst = dst[rand_idx]
             src_corrupted, rel_corrupted, dst_corrupted = corrupter.corrupt(src, rel, dst)
             src_corrupted = src_corrupted.cuda()
             rel_corrupted = rel_corrupted.cuda()
             dst_corrupted = dst_corrupted.cuda()
         for ss, rs, ts in batch_by_num(n_batch, src_corrupted, rel_corrupted, dst_corrupted, n_sample=n_train):
             self.mdl.zero_grad()
             label = t.zeros(len(ss)).type(t.LongTensor).cuda()
             loss = t.sum(self.mdl.softmax_loss(Variable(ss), Variable(rs), Variable(ts), label))
             loss.backward()
             optimizer.step()
             epoch_loss += loss.data[0]
         logging.info('Epoch %d/%d, Loss=%f', epoch + 1, n_epoch, epoch_loss / n_train)
         if (epoch + 1) % self.config.epoch_per_test == 0:
             test_perf = tester()
             if test_perf > best_perf:
                 self.save(os.path.join(config().task.dir, self.config.model_file))
                 best_perf = test_perf
     return best_perf
Beispiel #2
0
    def pretrain(self, train_data, corrupter, tester):
        src, rel, dst = train_data
        n_train = len(src)
        n_epoch = self.config.n_epoch
        n_batch = self.config.n_batch
        optimizer = Adam(self.mdl.parameters(), weight_decay=self.weight_decay)
        best_perf = 0
        for epoch in range(n_epoch):
            epoch_loss = 0
            if epoch % self.config.sample_freq == 0:
                rand_idx = t.randperm(n_train)
                src = src[rand_idx]
                rel = rel[rand_idx]
                dst = dst[rand_idx]
                src_corrupted, rel_corrupted, dst_corrupted = corrupter.corrupt(src, rel, dst)
                src_corrupted = src_corrupted.cuda()
                rel_corrupted = rel_corrupted.cuda()
                dst_corrupted = dst_corrupted.cuda()
            for ss, rs, ts in batch_by_num(n_batch, src_corrupted, rel_corrupted, dst_corrupted, n_sample=n_train):
                self.mdl.zero_grad()
                label = t.zeros(len(ss)).type(t.LongTensor).cuda()

                loss = t.sum(self.mdl.module.softmax_loss(Variable(ss), Variable(rs), Variable(ts), label))
                loss.backward()
                optimizer.step()
                epoch_loss += loss.data[0]
            logging.info('Epoch %d/%d, Loss=%f', epoch + 1, n_epoch, epoch_loss / n_train)
            if (epoch + 1) % self.config.epoch_per_test == 0:
                test_perf = tester()
                if test_perf > best_perf:
                    self.save(os.path.join(config().task.dir, self.config.model_file))
                    best_perf = test_perf
        return best_perf
Beispiel #3
0
 def pretrain(self, train_data, corrupter, tester):
     src, rel, dst = train_data
     n_train = len(src)
     optimizer = Adam(self.mdl.parameters())
     #optimizer = SGD(self.mdl.parameters(), lr=1e-4)
     n_epoch = self.config.n_epoch
     n_batch = self.config.n_batch
     best_perf = 0
     for epoch in range(n_epoch):
         epoch_loss = 0
         rand_idx = t.randperm(n_train)
         src = src[rand_idx]
         rel = rel[rand_idx]
         dst = dst[rand_idx]
         src_corrupted, dst_corrupted = corrupter.corrupt(src, rel, dst)
         src_cuda = src.cuda()
         rel_cuda = rel.cuda()
         dst_cuda = dst.cuda()
         src_corrupted = src_corrupted.cuda()
         dst_corrupted = dst_corrupted.cuda()
         for s0, r, t0, s1, t1 in batch_by_num(n_batch, src_cuda, rel_cuda, dst_cuda, src_corrupted, dst_corrupted,
                                               n_sample=n_train):
             self.mdl.zero_grad()
             loss = t.sum(self.mdl.pair_loss(Variable(s0), Variable(r), Variable(t0), Variable(s1), Variable(t1)))
             loss.backward()
             optimizer.step()
             self.mdl.constraint()
             epoch_loss += loss.data[0]
         logging.info('Epoch %d/%d, Loss=%f', epoch + 1, n_epoch, epoch_loss / n_train)
         if (epoch + 1) % self.config.epoch_per_test == 0:
             test_perf = tester()
             if test_perf > best_perf:
                 self.save(os.path.join(config().task.dir, self.config.model_file))
                 best_perf = test_perf
     return best_perf
Beispiel #4
0
 def pretrain(self, train_data, corrupter, tester):
     src, rel, dst = train_data
     n_train = len(src)
     optimizer = Adam(self.mdl.parameters())
     #optimizer = SGD(self.mdl.parameters(), lr=1e-4)
     n_epoch = self.config.n_epoch
     n_batch = self.config.n_batch
     best_perf = 0
     for epoch in range(n_epoch):
         self.mdl.train()
         epoch_loss = 0
         rand_idx = t.randperm(n_train)
         src = src[rand_idx]
         rel = rel[rand_idx]
         dst = dst[rand_idx]
         src_corrupted, dst_corrupted = corrupter.corrupt(src, rel, dst)
         src_cuda = src.cuda()
         rel_cuda = rel.cuda()
         dst_cuda = dst.cuda()
         src_corrupted = src_corrupted.cuda()
         dst_corrupted = dst_corrupted.cuda()
         for s0, r, t0, s1, t1 in batch_by_num(n_batch,
                                               src_cuda,
                                               rel_cuda,
                                               dst_cuda,
                                               src_corrupted,
                                               dst_corrupted,
                                               n_sample=n_train):
             self.mdl.zero_grad()
             loss = t.sum(self.mdl.pair_loss(s0, r, t0, s1, t1))
             loss.backward()
             optimizer.step()
             self.mdl.constraint()
             epoch_loss += loss.item()
         logging.info('Epoch %d/%d, Loss=%f', epoch + 1, n_epoch,
                      epoch_loss / n_train)
         if (epoch + 1) % self.config.epoch_per_test == 0:
             start = time.time()
             self.mdl.eval()
             with t.no_grad():
                 test_perf = tester()
                 end = time.time()
                 time_used = end - start
                 logging.info('test time per epoch {}'.format(time_used))
                 if test_perf > best_perf:
                     self.save(
                         os.path.join(config().task.dir,
                                      self.config.model_file))
                     best_perf = test_perf
     return best_perf
Beispiel #5
0
 def pretrain(self, train_data, corrupter, tester):
     src, rel, dst = train_data
     n_train = len(src)
     optimizer = Adam(self.mdl.parameters())
     #optimizer = SGD(self.mdl.parameters(), lr=1e-4)
     n_epoch = self.config.n_epoch
     n_batch = self.config.n_batch
     best_perf = 0
     for epoch in range(n_epoch):
         epoch_loss = 0
         rand_idx = t.randperm(n_train)
         src = src[rand_idx]
         rel = rel[rand_idx]
         dst = dst[rand_idx]
         src_corrupted, dst_corrupted = corrupter.corrupt(src, rel, dst)
         src_cuda = src.cuda()
         rel_cuda = rel.cuda()
         dst_cuda = dst.cuda()
         src_corrupted = src_corrupted.cuda()
         dst_corrupted = dst_corrupted.cuda()
         for s0, r, t0, s1, t1 in batch_by_num(n_batch,
                                               src_cuda,
                                               rel_cuda,
                                               dst_cuda,
                                               src_corrupted,
                                               dst_corrupted,
                                               n_sample=n_train):
             self.mdl.zero_grad()
             loss = t.sum(
                 self.mdl.module.pair_loss(Variable(s0), Variable(r),
                                           Variable(t0), Variable(s1),
                                           Variable(t1)))
             loss.backward()
             optimizer.step()
             self.mdl.module.constraint()
             epoch_loss += loss.data[0]
         logging.info('Epoch %d/%d, Loss=%f', epoch + 1, n_epoch,
                      epoch_loss / n_train)
         if (epoch + 1) % self.config.epoch_per_test == 0:
             #                torch.cuda.set_device(2)
             test_perf = tester()
             if test_perf > best_perf:
                 self.save(
                     os.path.join(config().task.dir,
                                  self.config.model_file))
                 best_perf = test_perf
     return best_perf
Beispiel #6
0
dis.test_link(test_data, n_ent, filt_heads, filt_tails)

corrupter = BernCorrupterMulti(train_data, n_ent, n_rel, config().adv.n_sample)
src, rel, dst = train_data
n_train = len(src)
n_epoch = config().adv.n_epoch
n_batch = config().adv.n_batch
mdl_name = 'gan_dis_' + datetime.datetime.now().strftime("%m%d%H%M%S") + '.mdl'
best_perf = 0
avg_reward = 0
for epoch in range(n_epoch):
    epoch_d_loss = 0
    epoch_reward = 0
    src_cand, rel_cand, dst_cand = corrupter.corrupt(src, rel, dst, keep_truth=False)
    for s, r, t, ss, rs, ts in batch_by_num(n_batch, src, rel, dst, src_cand, rel_cand, dst_cand, n_sample=n_train):
        gen_step = gen.gen_step(ss, rs, ts, temperature=config().adv.temperature)
        src_smpl, dst_smpl = next(gen_step)
        losses, rewards = dis.dis_step(s, r, t, src_smpl.squeeze(), dst_smpl.squeeze())
        epoch_reward += torch.sum(rewards)
        rewards = rewards - avg_reward
        gen_step.send(rewards.unsqueeze(1))
        epoch_d_loss += torch.sum(losses)
    avg_loss = epoch_d_loss / n_train
    avg_reward = epoch_reward / n_train
    logging.info('Epoch %d/%d, D_loss=%f, reward=%f', epoch + 1, n_epoch, avg_loss, avg_reward)
    if (epoch + 1) % config().adv.epoch_per_test == 0:
        #gen.test_link(valid_data, n_ent, filt_heads, filt_tails)
        perf = dis.test_link(valid_data, n_ent, filt_heads, filt_tails)
        if perf > best_perf:
            best_perf = perf