Ejemplo n.º 1
0
    def train(self, train_data, tester_val, tester_tst):
        head, tail, rela = train_data
        # useful information related to cache
        n_train = len(head)

        if self.args.optim=='adam' or self.args.optim=='Adam':
            self.optimizer = Adam(self.model.parameters(), lr=self.args.lr)
        elif self.args.optim=='adagrad' or self.args.optim=='Adagrad':
            self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr)
        else:
            self.optimizer = SGD(self.model.parameters(), lr=self.args.lr)

        scheduler = ExponentialLR(self.optimizer, self.args.decay_rate)

        n_epoch = self.args.n_epoch
        n_batch = self.args.n_batch
        best_mrr = 0

        # used for counting repeated triplets for margin based loss

        for epoch in range(n_epoch):
            start = time.time()

            self.epoch = epoch
            rand_idx = torch.randperm(n_train)
            head = head[rand_idx].cuda()
            tail = tail[rand_idx].cuda()
            rela = rela[rand_idx].cuda()

            epoch_loss = 0

            for h, t, r in batch_by_size(n_batch, head, tail, rela, n_sample=n_train):
                self.model.zero_grad()

                loss = self.model.forward(h, t, r)
                loss += self.args.lamb * self.model.regul
                loss.backward()
                self.optimizer.step()
                self.prox_operator()
                epoch_loss += loss.data.cpu().numpy()

            self.time_tot += time.time() - start
            scheduler.step()

            if (epoch+1) %  self.args.epoch_per_test == 0:
                # output performance 
                valid_mrr, valid_mr, valid_10 = tester_val()
                test_mrr,  test_mr,  test_10 = tester_tst()
                out_str = '%.4f\t\t%.4f\t%.4f\t%.4f\n'%(epoch + 1, test_mr, test_mrr, test_10)

                # output the best performance info
                if valid_mrr > best_mrr:
                    best_mrr = valid_mrr
                    best_str = out_str
                if best_mrr < self.args.thres:
                    print('\tearly stopped in Epoch:{}, best_mrr:{}'.format(epoch+1, best_mrr), self.model.struct)
                    return best_str
        return best_mrr, best_str
Ejemplo n.º 2
0
 def test_link(self, test_data, n_ent, heads, tails, filt=True):
     mrr_tot = 0.
     mr_tot = 0
     #hit10_tot = 0
     hit_tot = np.zeros((3, ))
     count = 0
     for batch_h, batch_t, batch_r in batch_by_size(
             self.args.test_batch_size, *test_data):
         batch_size = batch_h.size(0)
         head_val = Variable(
             batch_h.unsqueeze(1).expand(batch_size, n_ent).cuda())
         tail_val = Variable(
             batch_t.unsqueeze(1).expand(batch_size, n_ent).cuda())
         rela_val = Variable(
             batch_r.unsqueeze(1).expand(batch_size, n_ent).cuda())
         all_val = Variable(
             torch.arange(0, n_ent).unsqueeze(0).expand(
                 batch_size, n_ent).type(torch.LongTensor).cuda())
         batch_head_scores = self.model.score(all_val, tail_val,
                                              rela_val).data
         batch_tail_scores = self.model.score(head_val, all_val,
                                              rela_val).data
         # for each positive, compute its head scores and tail scores
         for h, t, r, head_score, tail_score in zip(batch_h, batch_t,
                                                    batch_r,
                                                    batch_head_scores,
                                                    batch_tail_scores):
             h_idx = int(h.data.cpu().numpy())
             t_idx = int(t.data.cpu().numpy())
             r_idx = int(r.data.cpu().numpy())
             if filt:  # filtered setting
                 if tails[(h_idx, r_idx)]._nnz() > 1:
                     tmp = tail_score[t_idx].data.cpu().numpy()
                     idx = tails[(h_idx, r_idx)]._indices()
                     tail_score[idx] = 1e20
                     tail_score[t_idx] = torch.from_numpy(tmp).cuda()
                 if heads[(t_idx, r_idx)]._nnz() > 1:
                     tmp = head_score[h_idx].data.cpu().numpy()
                     idx = heads[(t_idx, r_idx)]._indices()
                     head_score[idx] = 1e20
                     head_score[h_idx] = torch.from_numpy(tmp).cuda()
             mrr, mr, hit = mrr_mr_hitk(tail_score, t_idx)
             mrr_tot += mrr
             mr_tot += mr
             hit_tot += hit
             mrr, mr, hit = mrr_mr_hitk(head_score, h_idx)
             mrr_tot += mrr
             mr_tot += mr
             hit_tot += hit
             count += 2
     logging.info('Test_MRR=%f, Test_MR=%f, Test_H=%f %f %f, Count=%d',
                  float(mrr_tot) / count,
                  float(mr_tot) / count, hit_tot[0] / count,
                  hit_tot[1] / count, hit_tot[2] / count, count)
     return float(mrr_tot) / count, mr_tot / count, hit_tot[
         0] / count, hit_tot[1] / count, hit_tot[2] / count
Ejemplo n.º 3
0
    def train(self,
              train_data,
              valid_data,
              tester_val,
              tester_tst,
              tester_trip=None):

        self.tester_val = tester_val

        if self.args.optim == 'adam' or self.args.optim == 'Adam':
            self.optimizer = Adam(self.model.parameters(), lr=self.args.lr)
        elif self.args.optim == 'adagrad' or self.args.optim == 'Adagrad':
            self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr)
        else:
            self.optimizer = SGD(self.model.parameters(), lr=self.args.lr)

        scheduler = ExponentialLR(self.optimizer, self.args.decay_rate)

        n_epoch = self.args.n_epoch
        n_batch = self.args.n_batch
        self.best_mrr = 0

        # useful information related to cache
        n_train = train_data.size(0)
        n_valid = valid_data.size(0)

        self.good_struct = []

        for epoch in range(n_epoch):

            self.model.train()

            start = time.time()

            self.epoch = epoch
            rand_idx = torch.randperm(n_train)

            if self.GPU:
                train_data = train_data[rand_idx].cuda()
            else:
                train_data = train_data[rand_idx]

            epoch_loss = 0

            for facts in batch_by_size(n_batch, train_data, n_sample=n_train):

                self.model.zero_grad()
                if self.n_arity == 3:
                    loss = self.model.forward(facts, self.op_idx)
                    loss.backward()

                elif self.n_arity == 4:
                    loss = self.model.forward_tri(facts, self.op_idx)
                    loss.backward()
                """kge step"""
                self.optimizer.step()
                self.prox_operator()

                epoch_loss += loss.data.cpu().numpy()

            scheduler.step()
            self.time_tot += time.time() - start
            print("Epoch: %d/%d, Loss=%.8f, Time=%.4f" %
                  (epoch + 1, n_epoch, epoch_loss / n_train,
                   time.time() - start))

            if (epoch + 1) % self.args.epoch_per_test == 0:

                valid_mrr, valid_mr, valid_1, valid_3, valid_10 = tester_val()
                test_mrr, test_mr, test_1, test_3, test_10 = tester_tst()
                if tester_trip is None:
                    out_str = '%d\t%.2f %.2f \t%.4f  %.1f %.4f %.4f %.4f\t%.4f %.1f %.4f %.4f %.4f\n' % (epoch, self.time_tot, epoch_loss/n_train, \
                        valid_mrr, valid_mr, valid_1, valid_3, valid_10, \
                        test_mrr, test_mr, test_1, test_3, test_10)

                with open(self.args.perf_file, 'a') as f:
                    f.write(out_str)

                if test_mrr > self.best_mrr:
                    self.best_mrr = test_mrr

        with open(self.args.perf_file, 'a') as f:
            f.write("arch:" + str(self.op_idx.tolist()) + "\n")
            f.write("best mrr:" + str(self.best_mrr) + "\n")

        return self.best_mrr
Ejemplo n.º 4
0
    def evaluate(self, test_data, e1_sp, e2_sp, e3_sp, arch=None):

        mrr_tot = 0.
        mr_tot = 0
        hit_tot = np.zeros((3, ))
        count = 0

        #        if arch is None:
        #            max_idx = torch.Tensor(self.asng.p_model.theta).argmax(1)
        #        else:
        #            max_idx = torch.LongTensor([item.index(True) for item in arch.tolist()])

        self.model.eval()

        max_idx = self.op_idx

        for facts in batch_by_size(self.args.test_batch_size, test_data):

            if self.GPU:
                r, e1, e2, e3 = facts[:, 0].cuda(), facts[:, 1].cuda(
                ), facts[:, 2].cuda(), facts[:, 3].cuda()
            else:
                r, e1, e2, e3 = facts[:, 0], facts[:, 1], facts[:, 2], facts[:,
                                                                             3]

            length = self.n_dim // self.K

            #            r_embed = self.model.rel_embed(r).view(-1, self.K, length)
            #            e1_embed = self.model.ent_embed(e1).view(-1, self.K, length)
            #            e2_embed = self.model.ent_embed(e2).view(-1, self.K, length)
            #            e3_embed = self.model.ent_embed(e3).view(-1, self.K, length)

            #print(max_idx)

            r_embed = self.model.bnr(self.model.rel_embed(r)).view(
                -1, self.K, length)
            e1_embed = self.model.input_dropout(
                self.model.bne(self.model.ent_embed(e1))).view(
                    -1, self.K, length)
            e2_embed = self.model.input_dropout(
                self.model.bne(self.model.ent_embed(e2))).view(
                    -1, self.K, length)
            e3_embed = self.model.input_dropout(
                self.model.bne(self.model.ent_embed(e3))).view(
                    -1, self.K, length)

            e1_scores = F.softmax(self.model.tri_neg_other(
                r_embed, e2_embed, e3_embed, max_idx, 1),
                                  dim=1).data
            e2_scores = F.softmax(self.model.tri_neg_other(
                r_embed, e1_embed, e3_embed, max_idx, 2),
                                  dim=1).data
            e3_scores = F.softmax(self.model.tri_neg_other(
                r_embed, e1_embed, e2_embed, max_idx, 3),
                                  dim=1).data

            for idx in range(len(r)):
                r_idx, e1_idx, e2_idx, e3_idx = int(
                    r[idx].data.cpu().numpy()), int(
                        e1[idx].data.cpu().numpy()), int(
                            e2[idx].data.cpu().numpy()), int(
                                e3[idx].data.cpu().numpy())

                if e1_sp[(r_idx, e2_idx, e3_idx)]._nnz() > 1:
                    tmp = e1_scores[idx][e1_idx].data.cpu().numpy()
                    indic = e1_sp[(r_idx, e2_idx, e3_idx)]._indices()
                    e1_scores[idx][indic] = 0.0
                    if self.GPU:
                        e1_scores[idx][e1_idx] = torch.from_numpy(tmp).cuda()
                    else:
                        e1_scores[idx][e1_idx] = torch.from_numpy(tmp)

                if e2_sp[(r_idx, e1_idx, e3_idx)]._nnz() > 1:
                    tmp = e2_scores[idx][e2_idx].data.cpu().numpy()
                    indic = e2_sp[(r_idx, e1_idx, e3_idx)]._indices()
                    e2_scores[idx][indic] = 0.0
                    if self.GPU:
                        e2_scores[idx][e2_idx] = torch.from_numpy(tmp).cuda()
                    else:
                        e2_scores[idx][e2_idx] = torch.from_numpy(tmp)

                if e3_sp[(r_idx, e1_idx, e2_idx)]._nnz() > 1:
                    tmp = e3_scores[idx][e3_idx].data.cpu().numpy()
                    indic = e3_sp[(r_idx, e1_idx, e2_idx)]._indices()
                    e3_scores[idx][indic] = 0.0
                    if self.GPU:
                        e3_scores[idx][e3_idx] = torch.from_numpy(tmp).cuda()
                    else:
                        e3_scores[idx][e3_idx] = torch.from_numpy(tmp)

                mrr, mr, hit = mrr_mr_hitk(e1_scores[idx], e1_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit

                mrr, mr, hit = mrr_mr_hitk(e2_scores[idx], e2_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit

                mrr, mr, hit = mrr_mr_hitk(e3_scores[idx], e3_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit

                count += 3

        #if arch is None:
        logging.info('Test_MRR=%f, Test_MR=%f, Test_H=%f %f %f, Count=%d',
                     float(mrr_tot) / count,
                     float(mr_tot) / count, hit_tot[0] / count,
                     hit_tot[1] / count, hit_tot[2] / count, count)

        return float(mrr_tot) / count, float(
            mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[
                2] / count  #, embed_time, mrr_time
Ejemplo n.º 5
0
    def test_link(self, test_data, n_ent, heads, tails, filt=True, arch=None):

        mrr_tot = 0.
        mr_tot = 0
        hit_tot = np.zeros((3, ))
        count = 0

        #        if arch is None:
        #            max_idx = torch.Tensor(self.asng.p_model.theta).argmax(1)
        #        else:
        #            max_idx = torch.LongTensor([item.index(True) for item in arch.tolist()])

        max_idx = self.op_idx

        for facts in batch_by_size(self.args.test_batch_size, test_data):

            if self.GPU:

                batch_h = facts[:, 0].cuda()
                batch_t = facts[:, 1].cuda()
                batch_r = facts[:, 2].cuda()
            else:
                batch_h = facts[:, 0]
                batch_t = facts[:, 1]
                batch_r = facts[:, 2]

            length = self.n_dim // self.K
            h_embed = self.model.ent_embed(batch_h).view(-1, self.K, length)
            t_embed = self.model.ent_embed(batch_t).view(-1, self.K, length)
            r_embed = self.model.rel_embed(batch_r).view(-1, self.K, length)

            head_scores = torch.sigmoid(
                self.model.bin_neg_other(r_embed, t_embed, max_idx, 1)).data
            tail_scores = torch.sigmoid(
                self.model.bin_neg_other(r_embed, h_embed, max_idx, 2)).data

            for h, t, r, head_score, tail_score in zip(batch_h, batch_t,
                                                       batch_r, head_scores,
                                                       tail_scores):
                h_idx = int(h.data.cpu().numpy())
                t_idx = int(t.data.cpu().numpy())
                r_idx = int(r.data.cpu().numpy())
                if filt:  # filter
                    if tails[(h_idx, r_idx)]._nnz() > 1:
                        tmp = tail_score[t_idx].data.cpu().numpy()
                        idx = tails[(h_idx, r_idx)]._indices()
                        tail_score[idx] = 0.0

                        if self.GPU:
                            tail_score[t_idx] = torch.from_numpy(tmp).cuda()
                        else:
                            tail_score[t_idx] = torch.from_numpy(tmp)

                    if heads[(t_idx, r_idx)]._nnz() > 1:
                        tmp = head_score[h_idx].data.cpu().numpy()
                        idx = heads[(t_idx, r_idx)]._indices()
                        head_score[idx] = 0.0
                        if self.GPU:
                            head_score[h_idx] = torch.from_numpy(tmp).cuda()
                        else:
                            head_score[h_idx] = torch.from_numpy(tmp)

                mrr, mr, hit = mrr_mr_hitk(tail_score, t_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit
                mrr, mr, hit = mrr_mr_hitk(head_score, h_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit
                count += 2

        if arch is None:
            logging.info('Test_MRR=%f, Test_MR=%f, Test_H=%f %f %f, Count=%d',
                         float(mrr_tot) / count,
                         float(mr_tot) / count, hit_tot[0] / count,
                         hit_tot[1] / count, hit_tot[2] / count, count)

        return float(mrr_tot) / count, float(
            mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[
                2] / count  #, total_loss/n_test
    def test_link(self,
                  struct,
                  test,
                  randint,
                  test_data,
                  n_ent,
                  heads,
                  tails,
                  filt=True):

        mrr_tot = 0.
        mr_tot = 0.
        hit_tot = np.zeros((3, ))
        count = 0

        self.n_cluster = len(struct)
        test_batch_size = self.args.n_batch

        head, tail, rela = test_data

        if randint is None:
            rand_idx = torch.randperm(len(head))
        else:
            np.random.seed(randint)
            rand_idx = torch.LongTensor(np.random.permutation(len(head)))

        if self.GPU:
            head = head[rand_idx].cuda()
            tail = tail[rand_idx].cuda()
            rela = rela[rand_idx].cuda()
        else:
            head = head[rand_idx]
            tail = tail[rand_idx]
            rela = rela[rand_idx]

        for batch_h, batch_t, batch_r in batch_by_size(test_batch_size, head,
                                                       tail, rela):

            if self.GPU:
                batch_h = batch_h.cuda()
                batch_t = batch_t.cuda()
                batch_r = batch_r.cuda()
            else:
                batch_h = batch_h
                batch_t = batch_t
                batch_r = batch_r

            h_embed = self.model.ent_embed(batch_h).view(
                -1, self.K, self.n_dim // self.K)
            t_embed = self.model.ent_embed(batch_t).view(
                -1, self.K, self.n_dim // self.K)

            length = self.n_dim // self.K

            # create a rela_embed with size (n_rel, 2K+1, length)
            rel_embed_pos = self.model.rel_embed.weight.view(
                -1, self.K, length)
            rel_embed_neg = -rel_embed_pos

            if self.GPU:
                rel_embed_zeros = torch.zeros(self.n_rel, 1, length).cuda()
            else:
                rel_embed_zeros = torch.zeros(self.n_rel, 1, length)

            self.rel_embed_2K_1 = torch.cat(
                (rel_embed_zeros, rel_embed_pos, rel_embed_neg), 1)

            # combine struct
            if self.GPU:
                self.r_embed = torch.zeros(self.n_rel, self.K * self.K,
                                           length).cuda()
            else:
                self.r_embed = torch.zeros(self.n_rel, self.K * self.K, length)

            for i_rc in range(self.n_cluster):
                max_idx_list = struct[i_rc]
                self.r_embed[
                    self.cluster_rela_dict[i_rc], :, :] = self.rel_embed_2K_1[
                        self.cluster_rela_dict[i_rc]][:, max_idx_list, :]

            self.r_embed = self.r_embed.view(-1, self.K, self.K, length)
            self.r_embed = self.r_embed[batch_r, :, :, :]

            head_scores = torch.sigmoid(
                self.model.test_head(self.r_embed, t_embed)).data
            tail_scores = torch.sigmoid(
                self.model.test_tail(h_embed, self.r_embed)).data

            for h, t, r, head_score, tail_score in zip(batch_h, batch_t,
                                                       batch_r, head_scores,
                                                       tail_scores):
                h_idx = int(h.data.cpu().numpy())
                t_idx = int(t.data.cpu().numpy())
                r_idx = int(r.data.cpu().numpy())
                if filt:  # filter
                    if tails[(h_idx, r_idx)]._nnz() > 1:
                        tmp = tail_score[t_idx].data.cpu().numpy()
                        idx = tails[(h_idx, r_idx)]._indices()
                        tail_score[idx] = 0.0

                        if self.GPU:
                            tail_score[t_idx] = torch.from_numpy(tmp).cuda()
                        else:
                            tail_score[t_idx] = torch.from_numpy(tmp)
                    if heads[(t_idx, r_idx)]._nnz() > 1:
                        tmp = head_score[h_idx].data.cpu().numpy()
                        idx = heads[(t_idx, r_idx)]._indices()
                        head_score[idx] = 0.0
                        if self.GPU:
                            head_score[h_idx] = torch.from_numpy(tmp).cuda()
                        else:
                            head_score[h_idx] = torch.from_numpy(tmp)
                mrr, mr, hit = mrr_mr_hitk(tail_score, t_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit

                mrr, mr, hit = mrr_mr_hitk(head_score, h_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit
                count += 2

            if not test:
                break  # one mini batch

        return float(mrr_tot) / count, float(
            mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[
                2] / count  #, total_loss/n_test
    def train_stand(self, train_data, valid_data, derived_struct, rela_cluster,
                    mrr):

        self.rela_to_dict(rela_cluster)

        #self.args.perf_file = os.path.join(self.args.out_dir, self.args.dataset + '_std_' + str(self.args.m) + "_" + str(self.args.n)  + "_" + str(mrr) + '.txt')
        #plot_config(self.args)

        head, tail, rela = train_data
        n_train = len(head)

        if self.args.optim == 'adam' or self.args.optim == 'Adam':
            self.optimizer = Adam(self.model.parameters(), lr=self.args.lr)
        elif self.args.optim == 'adagrad' or self.args.optim == 'Adagrad':
            self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr)
        else:
            self.optimizer = SGD(self.model.parameters(), lr=self.args.lr)
        scheduler = ExponentialLR(self.optimizer, self.args.decay_rate)

        n_batch = self.args.n_batch

        best_mrr = 0
        start = time.time()
        for epoch in range(self.args.n_stand_epoch):

            #self.epoch = epoch
            rand_idx = torch.randperm(n_train)

            if self.GPU:
                head = head[rand_idx].cuda()
                tail = tail[rand_idx].cuda()
                rela = rela[rand_idx].cuda()
            else:
                head = head[rand_idx]
                tail = tail[rand_idx]
                rela = rela[rand_idx]

            epoch_loss = 0
            n_iters = 0
            #lr = scheduler.get_lr()[0]

            # train model weights
            for h, t, r in batch_by_size(n_batch,
                                         head,
                                         tail,
                                         rela,
                                         n_sample=n_train):

                self.model.zero_grad()

                loss = self.model.forward(derived_struct, h, t, r,
                                          self.cluster_rela_dict)
                loss += self.args.lamb * self.model.regul
                loss.backward()

                self.optimizer.step()
                self.prox_operator()

                epoch_loss += loss.data.cpu().numpy()
                n_iters += 1

            scheduler.step()

            print("Epoch: %d/%d, Loss=%.2f, Stand Time=%.2f" %
                  (epoch + 1, self.args.n_stand_epoch, time.time() - start,
                   epoch_loss / n_train))

            if (epoch + 1) % 5 == 0:
                test, randint = True, None

                valid_mrr, valid_mr, valid_1, valid_3, valid_10 = self.tester_val(
                    derived_struct, test, randint)
                test_mrr, test_mr, test_1, test_3, test_10 = self.tester_tst(
                    derived_struct, test, randint)

                out_str = '%d \t %.2f \t %.2f \t %.4f  %.1f %.4f %.4f %.4f\t%.4f %.1f %.4f %.4f %.4f\n' % (epoch, self.time_tot, epoch_loss/n_train,\
                            valid_mrr, valid_mr, valid_1, valid_3, valid_10, \
                            test_mrr, test_mr, test_1, test_3, test_10)

                # output the best performance info
                if test_mrr > best_mrr:
                    best_mrr = test_mrr
                    best_str = out_str

                with open(self.args.perf_file, 'a+') as f:
                    f.write(out_str)

        with open(self.args.perf_file, 'a+') as f:
            f.write("best performance:" + best_str + "\n")
            f.write("struct:" + str(derived_struct) + "\n")
            f.write("rela:" + str(rela_cluster) + "\n")

        return best_mrr
    def train_oas(self, train_data, valid_data, derived_struct):

        head, tail, rela = train_data
        n_train = len(head)

        if self.args.optim == 'adam' or self.args.optim == 'Adam':
            self.optimizer = Adam(self.model.parameters(), lr=self.args.lr)
        elif self.args.optim == 'adagrad' or self.args.optim == 'Adagrad':
            self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr)
        else:
            self.optimizer = SGD(self.model.parameters(), lr=self.args.lr)

        scheduler = ExponentialLR(self.optimizer, self.args.decay_rate)

        n_batch = self.args.n_batch

        for epoch in range(self.args.n_oas_epoch):
            start = time.time()
            rand_idx = torch.randperm(n_train)

            if self.GPU:
                head = head[rand_idx].cuda()
                tail = tail[rand_idx].cuda()
                rela = rela[rand_idx].cuda()
            else:
                head = head[rand_idx]
                tail = tail[rand_idx]
                rela = rela[rand_idx]

            epoch_loss = 0
            n_iters = 0

            # train model weights
            for h, t, r in batch_by_size(n_batch,
                                         head,
                                         tail,
                                         rela,
                                         n_sample=n_train):

                self.model.zero_grad()

                loss = self.model.forward(derived_struct, h, t, r,
                                          self.cluster_rela_dict)
                loss += self.args.lamb * self.model.regul
                loss.backward()

                nn.utils.clip_grad_norm_(self.model.parameters(),
                                         self.args.grad_clip)
                self.optimizer.step()
                self.prox_operator()

                epoch_loss += loss.data.cpu().numpy()
                n_iters += 1

            scheduler.step()

            if self.cluster_way == "scu":
                self.rela_cluster = self.cluster()
                self.rela_cluster_history.append(self.rela_cluster)
                self.rela_to_dict(self.rela_cluster)

            # train controller
            self.train_controller()

            # derive structs
            self.time_tot += time.time(
            ) - start  # evaluation for the derived architecture is unnessary in searching procedure
            derived_struct, test_mrr = self.derive(sample_num=1)

            print(
                "Epoch: %d/%d, Search Time=%.2f, Loss=%.2f, Sampled Val MRR=%.8f, Tst MRR=%.8f"
                %
                (epoch + 1, self.args.n_oas_epoch, self.time_tot, epoch_loss /
                 n_train, self.derived_raward_history[-1], test_mrr))
Ejemplo n.º 9
0
    def train(self, train_data, caches, corrupter, tester_val, tester_tst):
        head, tail, rela = train_data
        # useful information related to cache
        head_idx, tail_idx, self.head_cache, self.tail_cache, self.head_pos, self.tail_pos = caches
        n_train = len(head)

        if self.args.optim=='adam' or self.args.optim=='Adam':
            self.optimizer = Adam(self.model.parameters(), lr=self.args.lr, weight_decay=self.weight_decay)
        elif self.args.optim=='adagrad' or self.args.optim=='adagrad':
            self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr, weight_decay=self.weight_decay)
        else:
            self.optimizer = SGD(self.model.parameters(), lr=self.args.lr, weight_decay=self.weight_decay)

        n_epoch = self.args.n_epoch
        n_batch = self.args.n_batch
        best_mrr = 0

        for epoch in range(n_epoch):
            start = time.time()

            self.epoch = epoch
            rand_idx = torch.randperm(n_train)
            head = head[rand_idx].cuda()
            tail = tail[rand_idx].cuda()
            rela = rela[rand_idx].cuda()
            head_idx = head_idx[rand_idx.numpy()]
            tail_idx = tail_idx[rand_idx.numpy()]
            epoch_loss = 0

            if self.args.save and epoch==self.args.s_epoch:
                self.save(os.path.join(self.args.task_dir, self.args.model + '.mdl'))

            for h, t, r, h_idx, t_idx in batch_by_size(n_batch, head, tail, rela, head_idx, tail_idx, n_sample=n_train):
                self.model.zero_grad()

                h_rand, t_rand = self.neg_sample(h, t, r, h_idx, t_idx, self.args.sample, self.args.loss)
              
                # Bernoulli sampling to select (h', r, t) and (h, r, t')
                prob = corrupter.bern_prob[r]
                selection = torch.bernoulli(prob).type(torch.ByteTensor)
                n_h = torch.LongTensor(h.cpu().numpy()).cuda()
                n_t = torch.LongTensor(t.cpu().numpy()).cuda()
                n_r = torch.LongTensor(r.cpu().numpy()).cuda()
                if n_h.size() != h_rand.size():
                    n_h = n_h.unsqueeze(1).expand_as(h_rand)
                    n_t = n_t.unsqueeze(1).expand_as(h_rand)
                    n_r = n_r.unsqueeze(1).expand_as(h_rand)
                    h = h.unsqueeze(1)
                    r = r.unsqueeze(1)
                    t = t.unsqueeze(1)
                    
                n_h[selection] = h_rand[selection]
                n_t[~selection] = t_rand[~selection]
                
                if not (self.args.sample=='bern'):
                    self.update_cache(h, t, r, h_idx, t_idx)

                if self.args.loss == 'point':
                    p_loss = self.model.point_loss(h, t, r, 1)
                    n_loss = self.model.point_loss(n_h, n_t, n_r, -1)
                    loss = p_loss + n_loss
                else:
                    loss = self.model.pair_loss(h, t, r, n_h, n_t)
                
                loss.backward()
                self.optimizer.step()
                epoch_loss += loss.data.cpu().numpy()
            # get the time of each epoch
            self.time_tot += time.time() - start
            print("Epoch: %d/%d, Loss=%.8f, Time=%.4f"%(epoch+1, n_epoch, epoch_loss/n_train, time.time()-start))
           
               
            if (epoch+1) % self.args.epoch_per_test == 0:
                # output performance 
                valid_mrr, valid_mr, valid_1, valid_3, valid_10 = tester_val()
                test_mrr,  test_mr,  test_1,  test_3,  test_10 =  tester_tst()
                out_str = '%d\t%.2f\t%.4f %.1f %.4f %.4f %.4f\t%.4f %.1f %.4f %.4f %.4f\n' % (epoch, self.time_tot, \
                        valid_mrr, valid_mr, valid_1, valid_3, valid_10, \
                        test_mrr, test_mr, test_1, test_3, test_10)
                with open(self.args.perf_file, 'a') as f:
                    f.write(out_str)

                # remove false negative 
                if self.args.remove:
                    self.remove_positive(self.args.remove)

                # output the best performance info
                if valid_mrr > best_mrr:
                    best_mrr = valid_mrr
                    best_str = out_str
        return best_str
Ejemplo n.º 10
0
    def train(self, train_data, caches, corrupter, tester_val, tester_tst):
        heads, tails, relas = train_data
        # useful information related to cache
        head_idxs, tail_idxs, self.head_cache, self.tail_cache, self.head_pos, self.tail_pos = caches
        self.head_score = np.random.randn(len(self.head_cache), self.args.N_1)
        self.tail_score = np.random.randn(len(self.tail_cache), self.args.N_1)
        n_train = len(heads)

        if self.args.optim=='adam' or self.args.optim=='Adam':
            self.optimizer = Adam(self.model.parameters(), lr=self.args.lr, weight_decay=self.weight_decay)
        elif self.args.optim=='adagrad' or self.args.optim=='Adagrad':
            self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr, weight_decay=self.weight_decay)
        else:
            self.optimizer = SGD(self.model.parameters(), lr=self.args.lr, weight_decay=self.weight_decay)

        n_epoch = self.args.n_epoch
        n_batch = self.args.n_batch
        best_mrr = 0

        losses = []
        for epoch in range(n_epoch):
            start = time.time()
            self.epoch = epoch

            # positive sampling
            logits = self.cache_score
            quant_lo = np.quantile(logits, 0.2)
            quant_hi  = np.quantile(logits, 0.8)
            logits[logits<quant_lo] = quant_lo
            logits[logits>quant_hi] = quant_hi
            logits = (logits-quant_lo)/(quant_hi - quant_lo)
            logits = logits * self.args.alpha_1
            probb = np.exp(logits) / np.exp(logits).sum()
            if epoch == 0:      # use uniform sampling for the first epoch
                probb = np.ones((n_train,)) / n_train
            
            indices = np.random.choice(n_train, n_train, replace=False, p=probb)
            rand_idx = torch.LongTensor(indices)
            head = heads[rand_idx].cuda()
            tail = tails[rand_idx].cuda()
            rela = relas[rand_idx].cuda()
            head_idx = head_idxs[indices]
            tail_idx = tail_idxs[indices]

            epoch_loss = 0

            if self.args.save and epoch==self.args.s_epoch:
                self.save(os.path.join(self.args.task_dir, self.args.model + '.mdl'))

            iters = 0
            for h, t, r, h_idx, t_idx, idx, in batch_by_size(n_batch, head, tail, rela, head_idx, tail_idx, indices, n_sample=n_train):
                self.model.zero_grad()

                h_rand, t_rand = self.neg_sample(h, t, r, h_idx, t_idx, self.args.sample, self.args.loss)
              
                # Bernoulli sampling to select (h', r, t) and (h, r, t')
                prob = corrupter.bern_prob[r]
                selection = torch.bernoulli(prob).type(torch.ByteTensor).cuda()
                n_h = torch.LongTensor(h.cpu().numpy()).cuda()
                n_t = torch.LongTensor(t.cpu().numpy()).cuda()
                n_r = torch.LongTensor(r.cpu().numpy()).cuda()
                if n_h.size() != h_rand.size():
                    n_h = n_h.unsqueeze(1).expand_as(h_rand)
                    n_t = n_t.unsqueeze(1).expand_as(h_rand)
                    n_r = n_r.unsqueeze(1).expand_as(h_rand)
                    h = h.unsqueeze(1)
                    r = r.unsqueeze(1)
                    t = t.unsqueeze(1)
                    
                n_h[selection] = h_rand[selection]
                n_t[~selection] = t_rand[~selection]
                
                if not (self.args.sample=='bern') and iters % self.args.lazy==0:
                    self.update_cache(h, t, r, idx, h_idx, t_idx)

                if self.args.loss == 'point':
                    p_loss = torch.sum(self.model.point_loss(h, t, r, 1))
                    n_loss = torch.sum(self.model.point_loss(n_h, n_t, n_r, -1))
                    loss = p_loss + n_loss
                else:
                    loss = self.model.pair_loss(h, t, r, n_h, n_t)
                
                loss.backward()
                self.optimizer.step()
                self.remove_nan()
                epoch_loss += loss.data.cpu().numpy()
                iters += 1
            # get the time of each epoch
            self.time_tot += time.time() - start
            losses.append(round(epoch_loss/n_train, 4))
           
               
            if (epoch+1) % self.args.epoch_per_test == 0:
                # output performance 
                valid_mrr, valid_mr, valid_1, valid_3, valid_10 = tester_val()
                test_mrr,  test_mr,  test_1,  test_3,  test_10 =  tester_tst()
                out_str = '%d\t%.2f\t%.4f %.1f %.4f %.4f %.4f\t%.4f %.1f %.4f %.4f %.4f\n' % (epoch, self.time_tot, \
                        valid_mrr, valid_mr, valid_1, valid_3, valid_10, \
                        test_mrr, test_mr, test_1, test_3, test_10)
                with open(self.args.perf_file, 'a') as f:
                    f.write(out_str)

                # remove false negative 
                if self.args.remove:
                    self.remove_positive(self.args.remove)

                # output the best performance info
                if valid_mrr > best_mrr:
                    best_mrr = valid_mrr
                    best_str = out_str
        return best_mrr, best_str