Example #1
0
    def __init__(self, n_ent, n_rel, args, struct):
        self.model = KGEModule(n_ent, n_rel, args, struct)
        self.model.cuda()

        self.n_ent = n_ent
        self.n_rel = n_rel
        self.time_tot = 0
        self.args = args
    def __init__(self, n_ent, n_rel, args, rela_cluster, tester_val,
                 tester_tst, tester_trip):

        self.tester_val = tester_val
        self.tester_tst = tester_tst
        self.tester_trip = tester_trip

        GPU = args.GPU
        m = args.m
        n = args.n
        cluster_way = args.clu

        self.model = KGEModule(n_ent, n_rel, args, GPU, rela_cluster, m, n)

        if GPU:
            self.model.cuda()

        self.n_ent = n_ent
        self.n_rel = n_rel

        self.rela_cluster = rela_cluster

        self.time_tot = 0
        self.args = args

        self.n_dim = args.n_dim

        self.K = m

        self.n = n

        self.GPU = GPU

        self.cluster_way = cluster_way

        self.rela_to_dict(rela_cluster)
        """build controller and sub-model"""

        self.controller = None
        self.build_controller()

        #print(self.args.controller_optim)

        controller_lr = 3.5e-4
        controller_optimizer = _get_optimizer(self.args.controller_optim)
        self.controller_optim = controller_optimizer(
            self.controller.parameters(), lr=controller_lr)

        self.derived_raward_history = []
        self.derived_struct_history = []
        if self.cluster_way == "scu":
            self.rela_cluster_history = []
Example #3
0
    def __init__(self, n_ent, n_rel, args, arch):

        self.model = KGEModule(n_ent, n_rel, args, arch)

        self.op_idx = torch.LongTensor(arch)

        if args.GPU:
            self.model.cuda()

        self.n_ent = n_ent
        self.n_rel = n_rel

        self.time_tot = 0
        self.args = args
        self.n_dim = args.n_dim
        self.K = args.num_blocks
        self.GPU = args.GPU

        self.n_arity = args.n_arity + 1

        # initialize the arch parameters
        self.n_ops = self.K**self.n_arity
        self.categories = np.asarray([3 for i in range(self.n_ops)])
        alpha, init_delta, trained_theta = 1.5, 1.0, None
Example #4
0
class BaseModel(object):
    def __init__(self, n_ent, n_rel, args, arch):

        self.model = KGEModule(n_ent, n_rel, args, arch)

        self.op_idx = torch.LongTensor(arch)

        if args.GPU:
            self.model.cuda()

        self.n_ent = n_ent
        self.n_rel = n_rel

        self.time_tot = 0
        self.args = args
        self.n_dim = args.n_dim
        self.K = args.num_blocks
        self.GPU = args.GPU

        self.n_arity = args.n_arity + 1

        # initialize the arch parameters
        self.n_ops = self.K**self.n_arity
        self.categories = np.asarray([3 for i in range(self.n_ops)])
        alpha, init_delta, trained_theta = 1.5, 1.0, None
        #self.asng = CategoricalASNG(self.categories, alpha=alpha, init_delta=init_delta, init_theta=trained_theta)

    def save(self, filename):
        torch.save(self.model.state_dict(), filename)

    def load(self, filename):
        self.model.load_state_dict(
            torch.load(filename,
                       map_location=lambda storage, location: storage.cuda()))

    def get_reward(self, facts):

        if self.args.M_val == "loss":
            archs, loss_archs = [], []
            with torch.no_grad():
                for i in range(2):
                    arch = self.asng.sampling()
                    archs.append(arch)

                    loss_arch = self.model._loss(facts, arch)
                    #loss_arch += self.model.args.lamb * self.model.regul
                    loss_archs.append(loss_arch)

        elif self.args.M_val == "mrr":
            archs, loss_archs = [], []
            with torch.no_grad():
                for i in range(2):
                    arch = self.asng.sampling()
                    archs.append(arch)

                    result = self.tester_val(facts, arch)
                    #                    result = self.tester_val(arch = arch)
                    loss_archs.append(-result[0])

#                    if result[0] > 0.3200:
#                        self.good_struct.append([item.index(True) for item in arch.tolist()])
#                    if result[0] > self.best_mrr:
#                        self.best_mrr = result[0]
#                        self.best_struct = [item.index(True) for item in arch.tolist()]

        return archs, loss_archs  #, embed_time, mrr_time

    def train(self,
              train_data,
              valid_data,
              tester_val,
              tester_tst,
              tester_trip=None):

        self.tester_val = tester_val

        if self.args.optim == 'adam' or self.args.optim == 'Adam':
            self.optimizer = Adam(self.model.parameters(), lr=self.args.lr)
        elif self.args.optim == 'adagrad' or self.args.optim == 'Adagrad':
            self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr)
        else:
            self.optimizer = SGD(self.model.parameters(), lr=self.args.lr)

        scheduler = ExponentialLR(self.optimizer, self.args.decay_rate)

        n_epoch = self.args.n_epoch
        n_batch = self.args.n_batch
        self.best_mrr = 0

        # useful information related to cache
        n_train = train_data.size(0)
        n_valid = valid_data.size(0)

        self.good_struct = []

        for epoch in range(n_epoch):

            self.model.train()

            start = time.time()

            self.epoch = epoch
            rand_idx = torch.randperm(n_train)

            if self.GPU:
                train_data = train_data[rand_idx].cuda()
            else:
                train_data = train_data[rand_idx]

            epoch_loss = 0

            for facts in batch_by_size(n_batch, train_data, n_sample=n_train):

                self.model.zero_grad()
                if self.n_arity == 3:
                    loss = self.model.forward(facts, self.op_idx)
                    loss.backward()

                elif self.n_arity == 4:
                    loss = self.model.forward_tri(facts, self.op_idx)
                    loss.backward()
                """kge step"""
                self.optimizer.step()
                self.prox_operator()

                epoch_loss += loss.data.cpu().numpy()

            scheduler.step()
            self.time_tot += time.time() - start
            print("Epoch: %d/%d, Loss=%.8f, Time=%.4f" %
                  (epoch + 1, n_epoch, epoch_loss / n_train,
                   time.time() - start))

            if (epoch + 1) % self.args.epoch_per_test == 0:

                valid_mrr, valid_mr, valid_1, valid_3, valid_10 = tester_val()
                test_mrr, test_mr, test_1, test_3, test_10 = tester_tst()
                if tester_trip is None:
                    out_str = '%d\t%.2f %.2f \t%.4f  %.1f %.4f %.4f %.4f\t%.4f %.1f %.4f %.4f %.4f\n' % (epoch, self.time_tot, epoch_loss/n_train, \
                        valid_mrr, valid_mr, valid_1, valid_3, valid_10, \
                        test_mrr, test_mr, test_1, test_3, test_10)

                with open(self.args.perf_file, 'a') as f:
                    f.write(out_str)

                if test_mrr > self.best_mrr:
                    self.best_mrr = test_mrr

        with open(self.args.perf_file, 'a') as f:
            f.write("arch:" + str(self.op_idx.tolist()) + "\n")
            f.write("best mrr:" + str(self.best_mrr) + "\n")

        return self.best_mrr

    def prox_operator(self, ):
        for n, p in self.model.named_parameters():
            if 'ent' in n:
                X = p.data.clone()
                Z = torch.norm(X, p=2, dim=1, keepdim=True)
                Z[Z < 1] = 1
                X = X / Z
                p.data.copy_(X.view(self.n_ent, -1))

    def name(self, idx):
        i = idx[0]
        i_rc = self.rela_cluster[i]
        self.r_embed[i, :, :] = self.rel_embed_2K_1[
            i, self.idx_list[i_rc], :] * self.model._arch_parameters[i_rc][
                [j for j in range(self.K * self.K)], self.idx_list[i_rc]].view(
                    -1, 1)

    def test_link(self, test_data, n_ent, heads, tails, filt=True, arch=None):

        mrr_tot = 0.
        mr_tot = 0
        hit_tot = np.zeros((3, ))
        count = 0

        #        if arch is None:
        #            max_idx = torch.Tensor(self.asng.p_model.theta).argmax(1)
        #        else:
        #            max_idx = torch.LongTensor([item.index(True) for item in arch.tolist()])

        max_idx = self.op_idx

        for facts in batch_by_size(self.args.test_batch_size, test_data):

            if self.GPU:

                batch_h = facts[:, 0].cuda()
                batch_t = facts[:, 1].cuda()
                batch_r = facts[:, 2].cuda()
            else:
                batch_h = facts[:, 0]
                batch_t = facts[:, 1]
                batch_r = facts[:, 2]

            length = self.n_dim // self.K
            h_embed = self.model.ent_embed(batch_h).view(-1, self.K, length)
            t_embed = self.model.ent_embed(batch_t).view(-1, self.K, length)
            r_embed = self.model.rel_embed(batch_r).view(-1, self.K, length)

            head_scores = torch.sigmoid(
                self.model.bin_neg_other(r_embed, t_embed, max_idx, 1)).data
            tail_scores = torch.sigmoid(
                self.model.bin_neg_other(r_embed, h_embed, max_idx, 2)).data

            for h, t, r, head_score, tail_score in zip(batch_h, batch_t,
                                                       batch_r, head_scores,
                                                       tail_scores):
                h_idx = int(h.data.cpu().numpy())
                t_idx = int(t.data.cpu().numpy())
                r_idx = int(r.data.cpu().numpy())
                if filt:  # filter
                    if tails[(h_idx, r_idx)]._nnz() > 1:
                        tmp = tail_score[t_idx].data.cpu().numpy()
                        idx = tails[(h_idx, r_idx)]._indices()
                        tail_score[idx] = 0.0

                        if self.GPU:
                            tail_score[t_idx] = torch.from_numpy(tmp).cuda()
                        else:
                            tail_score[t_idx] = torch.from_numpy(tmp)

                    if heads[(t_idx, r_idx)]._nnz() > 1:
                        tmp = head_score[h_idx].data.cpu().numpy()
                        idx = heads[(t_idx, r_idx)]._indices()
                        head_score[idx] = 0.0
                        if self.GPU:
                            head_score[h_idx] = torch.from_numpy(tmp).cuda()
                        else:
                            head_score[h_idx] = torch.from_numpy(tmp)

                mrr, mr, hit = mrr_mr_hitk(tail_score, t_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit
                mrr, mr, hit = mrr_mr_hitk(head_score, h_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit
                count += 2

        if arch is None:
            logging.info('Test_MRR=%f, Test_MR=%f, Test_H=%f %f %f, Count=%d',
                         float(mrr_tot) / count,
                         float(mr_tot) / count, hit_tot[0] / count,
                         hit_tot[1] / count, hit_tot[2] / count, count)

        return float(mrr_tot) / count, float(
            mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[
                2] / count  #, total_loss/n_test

    def evaluate(self, test_data, e1_sp, e2_sp, e3_sp, arch=None):

        mrr_tot = 0.
        mr_tot = 0
        hit_tot = np.zeros((3, ))
        count = 0

        #        if arch is None:
        #            max_idx = torch.Tensor(self.asng.p_model.theta).argmax(1)
        #        else:
        #            max_idx = torch.LongTensor([item.index(True) for item in arch.tolist()])

        self.model.eval()

        max_idx = self.op_idx

        for facts in batch_by_size(self.args.test_batch_size, test_data):

            if self.GPU:
                r, e1, e2, e3 = facts[:, 0].cuda(), facts[:, 1].cuda(
                ), facts[:, 2].cuda(), facts[:, 3].cuda()
            else:
                r, e1, e2, e3 = facts[:, 0], facts[:, 1], facts[:, 2], facts[:,
                                                                             3]

            length = self.n_dim // self.K

            #            r_embed = self.model.rel_embed(r).view(-1, self.K, length)
            #            e1_embed = self.model.ent_embed(e1).view(-1, self.K, length)
            #            e2_embed = self.model.ent_embed(e2).view(-1, self.K, length)
            #            e3_embed = self.model.ent_embed(e3).view(-1, self.K, length)

            #print(max_idx)

            r_embed = self.model.bnr(self.model.rel_embed(r)).view(
                -1, self.K, length)
            e1_embed = self.model.input_dropout(
                self.model.bne(self.model.ent_embed(e1))).view(
                    -1, self.K, length)
            e2_embed = self.model.input_dropout(
                self.model.bne(self.model.ent_embed(e2))).view(
                    -1, self.K, length)
            e3_embed = self.model.input_dropout(
                self.model.bne(self.model.ent_embed(e3))).view(
                    -1, self.K, length)

            e1_scores = F.softmax(self.model.tri_neg_other(
                r_embed, e2_embed, e3_embed, max_idx, 1),
                                  dim=1).data
            e2_scores = F.softmax(self.model.tri_neg_other(
                r_embed, e1_embed, e3_embed, max_idx, 2),
                                  dim=1).data
            e3_scores = F.softmax(self.model.tri_neg_other(
                r_embed, e1_embed, e2_embed, max_idx, 3),
                                  dim=1).data

            for idx in range(len(r)):
                r_idx, e1_idx, e2_idx, e3_idx = int(
                    r[idx].data.cpu().numpy()), int(
                        e1[idx].data.cpu().numpy()), int(
                            e2[idx].data.cpu().numpy()), int(
                                e3[idx].data.cpu().numpy())

                if e1_sp[(r_idx, e2_idx, e3_idx)]._nnz() > 1:
                    tmp = e1_scores[idx][e1_idx].data.cpu().numpy()
                    indic = e1_sp[(r_idx, e2_idx, e3_idx)]._indices()
                    e1_scores[idx][indic] = 0.0
                    if self.GPU:
                        e1_scores[idx][e1_idx] = torch.from_numpy(tmp).cuda()
                    else:
                        e1_scores[idx][e1_idx] = torch.from_numpy(tmp)

                if e2_sp[(r_idx, e1_idx, e3_idx)]._nnz() > 1:
                    tmp = e2_scores[idx][e2_idx].data.cpu().numpy()
                    indic = e2_sp[(r_idx, e1_idx, e3_idx)]._indices()
                    e2_scores[idx][indic] = 0.0
                    if self.GPU:
                        e2_scores[idx][e2_idx] = torch.from_numpy(tmp).cuda()
                    else:
                        e2_scores[idx][e2_idx] = torch.from_numpy(tmp)

                if e3_sp[(r_idx, e1_idx, e2_idx)]._nnz() > 1:
                    tmp = e3_scores[idx][e3_idx].data.cpu().numpy()
                    indic = e3_sp[(r_idx, e1_idx, e2_idx)]._indices()
                    e3_scores[idx][indic] = 0.0
                    if self.GPU:
                        e3_scores[idx][e3_idx] = torch.from_numpy(tmp).cuda()
                    else:
                        e3_scores[idx][e3_idx] = torch.from_numpy(tmp)

                mrr, mr, hit = mrr_mr_hitk(e1_scores[idx], e1_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit

                mrr, mr, hit = mrr_mr_hitk(e2_scores[idx], e2_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit

                mrr, mr, hit = mrr_mr_hitk(e3_scores[idx], e3_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit

                count += 3

        #if arch is None:
        logging.info('Test_MRR=%f, Test_MR=%f, Test_H=%f %f %f, Count=%d',
                     float(mrr_tot) / count,
                     float(mr_tot) / count, hit_tot[0] / count,
                     hit_tot[1] / count, hit_tot[2] / count, count)

        return float(mrr_tot) / count, float(
            mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[
                2] / count  #, embed_time, mrr_time

    """
    def evaluate(self, test_data):
        hits, ranks, losses = [], [], []
        for _ in [1, 3, 10]:
            hits.append([])
        
        ary = test_data.size(1) - 1
                        
        er_vocab_list = []
        er_vocab_pairs_list = []
        for miss_ent_domain in range(1, ary+1):
            er_vocab = self.get_er_vocab(test_data, miss_ent_domain)
            er_vocab_pairs = list(er_vocab.keys())
            er_vocab_list.append(er_vocab)
            er_vocab_pairs_list.append(er_vocab_pairs)
            
        max_idx = torch.Tensor(self.asng.p_model.theta).argmax(1)
        
        for miss_ent_domain in range(1, ary+1):
            er_vocab = er_vocab_list[miss_ent_domain-1]

            #for i in range(0, len(test_data_idxs), self.batch_size):
            for facts in batch_by_size(self.args.test_batch_size, test_data):
                
                
                if self.GPU:
                    r_idx, e1_idx, e2_idx, e3_idx = facts[:,0].cuda(), facts[:,1].cuda(), facts[:,2].cuda(), facts[:,3].cuda()
                else:
                    r_idx, e1_idx, e2_idx, e3_idx = facts[:,0], facts[:,1], facts[:,2], facts[:,3]
                    
                    
                length = self.n_dim // self.K
                r_embed = self.model.rel_embed(r_idx).view(-1, self.K, length)
                e1_embed = self.model.ent_embed(e1_idx).view(-1, self.K, length)
                e2_embed = self.model.ent_embed(e2_idx).view(-1, self.K, length)
                e3_embed = self.model.ent_embed(e3_idx).view(-1, self.K, length)
                
                e1_scores = torch.sigmoid(self.model.tri_neg_other(r_embed, e2_embed, e3_embed, max_idx, 1)).data
                e2_scores = torch.sigmoid(self.model.tri_neg_other(r_embed, e1_embed, e3_embed, max_idx, 2)).data
                e3_scores = torch.sigmoid(self.model.tri_neg_other(r_embed, e1_embed, e2_embed, max_idx, 3)).data
                
                
                if ary == 3:
                    if miss_ent_domain == 1:
                        e_idx = [e2_idx, e3_idx]
                        pred = e1_scores
                    elif miss_ent_domain == 2:
                        e_idx = [e1_idx, e3_idx]
                        pred = e2_scores
                    elif miss_ent_domain == 3:
                        e_idx = [e1_idx, e2_idx]
                        pred = e3_scores
                         
                #pred, _ = model.forward(r_idx, e_idx, miss_ent_domain, W)
                
                
                
                e_all_idx = []
                for k0 in range(1, ary+1):
                    e_all_idx.append(torch.LongTensor(facts[:, k0]))
                

                #print(er_vocab)
                
                for j in range(facts.shape[0]):
                    er_vocab_key = []
                    for k0 in range(ary+1):
                        er_vocab_key.append(facts[j][k0])
                    er_vocab_key.remove(facts[j][miss_ent_domain])
                    
                    filt = er_vocab[tuple(er_vocab_key)]
                    
                    if filt != []:
                        print(er_vocab)
                    
#                    print(er_vocab)
#                    print(tuple(er_vocab_key))
#                    print(filt)
                    
                    target_value = pred[j, e_all_idx[miss_ent_domain-1][j]].item()
                    pred[j, filt] = 0.0
                    pred[j, e_all_idx[miss_ent_domain-1][j]] = target_value
                    
                    
                sort_values, sort_idxs = torch.sort(pred, dim=1, descending=True)
                sort_idxs = sort_idxs.cpu().numpy()
                for j in range(facts.shape[0]):
                    rank = np.where(sort_idxs[j] == e_all_idx[miss_ent_domain-1][j].item())[0][0]
                    ranks.append(rank+1)
                    for id, hits_level in enumerate([1, 3, 10]):
                        if rank+1 <= hits_level:
                            hits[id].append(1.0)
                        else:
                            hits[id].append(0.0)
         

        #logging.info('Test_MRR=%f, Test_MR=%f, Test_H=%f %f %f', np.mean(1./np.array(ranks)), 0, np.mean(hits[0]), np.mean(hits[1]), np.mean(hits[2]))
        #return np.mean(1./np.array(ranks)), 0, np.mean(hits[0]), np.mean(hits[1]), np.mean(hits[2])

    
    
    def get_er_vocab(self, data, miss_ent_domain):
        er_vocab = defaultdict(list)
        if len(data[0])-1 == 3:
            if miss_ent_domain == 1:
                for triple in data:
                    er_vocab[(triple[0], triple[2], triple[3])].append(triple[1])
            elif miss_ent_domain == 2:
                for triple in data:
                    er_vocab[(triple[0], triple[1], triple[3])].append(triple[2])
            elif miss_ent_domain == 3:
                for triple in data:
                    er_vocab[(triple[0], triple[1], triple[2])].append(triple[3])
        elif len(data[0])-1 == 4:
            if miss_ent_domain == 1:
                for triple in data:
                    er_vocab[(triple[0], triple[2], triple[3], triple[4])].append(triple[1])
            elif miss_ent_domain == 2:
                for triple in data:
                    er_vocab[(triple[0], triple[1], triple[3], triple[4])].append(triple[2])
            elif miss_ent_domain == 3:
                for triple in data:
                    er_vocab[(triple[0], triple[1], triple[2], triple[4])].append(triple[3])
            elif miss_ent_domain == 4:
                for triple in data:
                    er_vocab[(triple[0], triple[1], triple[2], triple[3])].append(triple[4])
        return er_vocab
    """
    """
Example #5
0
class BaseModel(object):
    def __init__(self, n_ent, n_rel, args, struct):
        self.model = KGEModule(n_ent, n_rel, args, struct)
        self.model.cuda()

        self.n_ent = n_ent
        self.n_rel = n_rel
        self.time_tot = 0
        self.args = args

    def train(self, train_data, tester_val, tester_tst):
        head, tail, rela = train_data
        # useful information related to cache
        n_train = len(head)

        if self.args.optim=='adam' or self.args.optim=='Adam':
            self.optimizer = Adam(self.model.parameters(), lr=self.args.lr)
        elif self.args.optim=='adagrad' or self.args.optim=='Adagrad':
            self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr)
        else:
            self.optimizer = SGD(self.model.parameters(), lr=self.args.lr)

        scheduler = ExponentialLR(self.optimizer, self.args.decay_rate)

        n_epoch = self.args.n_epoch
        n_batch = self.args.n_batch
        best_mrr = 0

        # used for counting repeated triplets for margin based loss

        for epoch in range(n_epoch):
            start = time.time()

            self.epoch = epoch
            rand_idx = torch.randperm(n_train)
            head = head[rand_idx].cuda()
            tail = tail[rand_idx].cuda()
            rela = rela[rand_idx].cuda()

            epoch_loss = 0

            for h, t, r in batch_by_size(n_batch, head, tail, rela, n_sample=n_train):
                self.model.zero_grad()

                loss = self.model.forward(h, t, r)
                loss += self.args.lamb * self.model.regul
                loss.backward()
                self.optimizer.step()
                self.prox_operator()
                epoch_loss += loss.data.cpu().numpy()

            self.time_tot += time.time() - start
            scheduler.step()

            if (epoch+1) %  self.args.epoch_per_test == 0:
                # output performance 
                valid_mrr, valid_mr, valid_10 = tester_val()
                test_mrr,  test_mr,  test_10 = tester_tst()
                out_str = '%.4f\t\t%.4f\t%.4f\t%.4f\n'%(epoch + 1, test_mr, test_mrr, test_10)

                # output the best performance info
                if valid_mrr > best_mrr:
                    best_mrr = valid_mrr
                    best_str = out_str
                if best_mrr < self.args.thres:
                    print('\tearly stopped in Epoch:{}, best_mrr:{}'.format(epoch+1, best_mrr), self.model.struct)
                    return best_str
        return best_mrr, best_str

    def prox_operator(self,):
        for n, p in self.model.named_parameters():
            if 'ent' in n:
                X = p.data.clone()
                Z = torch.norm(X, p=2, dim=1, keepdim=True)
                Z[Z<1] = 1
                X = X/Z
                p.data.copy_(X.view(self.n_ent, -1))

    def test_link(self, test_data, head_filter, tail_filter):
        heads, tails, relas = test_data
        batch_size = self.args.test_batch_size
        num_batch = len(heads) // batch_size + int(len(heads)%batch_size>0)

        head_probs = []
        tail_probs = []
        for i in range(num_batch):
            start = i * batch_size
            end = min( (i+1)*batch_size, len(heads))
            batch_h = heads[start:end].cuda()
            batch_t = tails[start:end].cuda()
            batch_r = relas[start:end].cuda()

            h_embed = self.model.ent_embed(batch_h)
            r_embed = self.model.rel_embed(batch_r)
            t_embed = self.model.ent_embed(batch_t)

            head_scores = torch.sigmoid(self.model.test_head(r_embed, t_embed)).data
            tail_scores = torch.sigmoid(self.model.test_tail(h_embed, r_embed)).data

            head_probs.append(head_scores.data.cpu().numpy())
            tail_probs.append(tail_scores.data.cpu().numpy())

        head_probs = np.concatenate(head_probs) * head_filter
        tail_probs = np.concatenate(tail_probs) * tail_filter
        head_ranks = cal_ranks(head_probs, label=heads.data.numpy())
        tail_ranks = cal_ranks(tail_probs, label=tails.data.numpy())
        h_mrr, h_mr, h_h10 = cal_performance(head_ranks)
        t_mrr, t_mr, t_h10 = cal_performance(tail_ranks)
        mrr = (h_mrr + t_mrr) / 2
        mr = (h_mr + t_mr) / 2
        h10 = (h_h10 + t_h10) / 2
        return mrr, mr, h10
class BaseModel(object):
    def __init__(self, n_ent, n_rel, args, rela_cluster, tester_val,
                 tester_tst, tester_trip):

        self.tester_val = tester_val
        self.tester_tst = tester_tst
        self.tester_trip = tester_trip

        GPU = args.GPU
        m = args.m
        n = args.n
        cluster_way = args.clu

        self.model = KGEModule(n_ent, n_rel, args, GPU, rela_cluster, m, n)

        if GPU:
            self.model.cuda()

        self.n_ent = n_ent
        self.n_rel = n_rel

        self.rela_cluster = rela_cluster

        self.time_tot = 0
        self.args = args

        self.n_dim = args.n_dim

        self.K = m

        self.n = n

        self.GPU = GPU

        self.cluster_way = cluster_way

        self.rela_to_dict(rela_cluster)
        """build controller and sub-model"""

        self.controller = None
        self.build_controller()

        #print(self.args.controller_optim)

        controller_lr = 3.5e-4
        controller_optimizer = _get_optimizer(self.args.controller_optim)
        self.controller_optim = controller_optimizer(
            self.controller.parameters(), lr=controller_lr)

        self.derived_raward_history = []
        self.derived_struct_history = []
        if self.cluster_way == "scu":
            self.rela_cluster_history = []

    def build_controller(self):

        self.search_space, self.action_list = _get_space_op(self.K, self.n)

        # build RNN controller
        from nas.controller import SimpleNASController
        self.controller = SimpleNASController(self.args,
                                              action_list=self.action_list,
                                              search_space=self.search_space,
                                              cuda=self.GPU)

        if self.GPU:
            self.controller.cuda()

    def mm_train(self, train_data, valid_data):
        """
        Each epoch consists of two phase:
        - In the first phase, shared parameters are trained to exploration.
        - In the second phase, the controller's parameters are trained.
        """

        #num_epoch = self.args.n_epoch
        derived_struct = _init_struct("DistMult", self.K, self.n)

        self.train_oas(train_data, valid_data, derived_struct)

        derived_struct, derived_mrr = self.derive_last()

        if self.cluster_way == "scu":
            return (self.derived_raward_history, self.derived_struct_history,
                    self.rela_cluster_history), (derived_mrr, derived_struct,
                                                 self.rela_cluster)

        elif self.cluster_way == "pde":
            return self.derived_raward_history, self.derived_struct_history, derived_struct

    def get_reward(self, struct_list, test=False, random=True):
        """
        Computes the reward of a single sampled model on validation data.
        """
        reward_list = []
        if random:
            randint = None
        else:
            randint = torch.randint(10000, (1, ))

        for struct in struct_list:
            #print("x")

            struct = torch.LongTensor([int(item) for item in struct])
            struct = struct.view(-1, self.K * self.K)

            if not test:

                valid_mrr, valid_mr, valid_1, valid_3, valid_10 = self.tester_val(
                    struct, test, randint)
                reward_list.append(valid_mrr)

            else:
                valid_mrr, valid_mr, valid_1, valid_3, valid_10 = self.tester_val(
                    struct, test, randint)
                test_mrr, test_mr, test_1, test_3, test_10 = self.tester_tst(
                    struct, test, randint)
                reward_list.append(
                    valid_mrr
                )  # record val mrr in the file, test mrr is just for visualization

        if not test:
            return reward_list
        else:
            return reward_list, test_mrr

    def train_controller(self):
        """
            Train controller to find better structure.
        """

        model = self.controller
        model.train()

        baseline = None
        adv_history = []
        entropy_history = []
        reward_history = []

        total_loss = 0

        for step in range(self.args.n_controller_epoch):

            #print(step)

            # sample struct
            structure_list, log_probs, entropies = self.controller.sample(
                with_details=True)

            # calculate reward
            np_entropies = entropies.data.cpu().numpy()
            rewards = self.get_reward(structure_list, random=False)
            rewards = np.array(rewards)
            torch.cuda.empty_cache()

            # discount
            discount = 1.0
            if 1 > discount > 0:
                rewards = discount(rewards, discount)

            reward_history.extend(rewards)
            entropy_history.extend(np_entropies)

            ema_baseline_decay = 0.5

            # moving average baseline
            if baseline is None:
                baseline = rewards
            else:
                decay = ema_baseline_decay
                #print (decay, baseline, rewards)
                baseline = decay * baseline + (1 - decay) * rewards

            adv = rewards - baseline
            history.append(adv)
            adv = scale(adv, scale_value=0.5)
            adv_history.extend(adv)

            adv = utils.get_variable(adv, self.GPU, requires_grad=False)

            # policy loss
            loss = -log_probs * adv

            entropy_mode = "reward"
            if entropy_mode == 'regularizer':
                loss -= self.args.entropy_coeff * entropies

            loss = loss.sum()  # or loss.mean()

            # update
            self.controller_optim.zero_grad()
            loss.backward()

            controller_grad_clip = 0
            if controller_grad_clip > 0:
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              controller_grad_clip)
            self.controller_optim.step()

            total_loss += utils.to_item(loss.data)

            torch.cuda.empty_cache()

    def derive(self, sample_num=None):

        derive_from_history = False

        if sample_num is None and derive_from_history:
            return self.derive_from_history()
        else:
            structure_list, _, entropies = self.controller.sample(
                sample_num, with_details=True)

            rewards, test_mrr = self.get_reward(structure_list,
                                                test=True,
                                                random=True)
            rewards = torch.Tensor(rewards)

            best_struct = structure_list[0]
            best_struct = torch.LongTensor([int(item) for item in best_struct])
            best_struct = best_struct.view(-1, self.K * self.K)

            self.derived_raward_history.append(max(rewards))
            self.derived_struct_history.append(best_struct)

            return best_struct, test_mrr

    def derive_last(self):

        rewards_list, structs_list = [], []
        for itr in range(self.args.n_derive_sample):
            structure_list, _, entropies = self.controller.sample(
                1, with_details=True)
            rewards, test_mrr = self.get_reward(structure_list,
                                                test=True,
                                                random=True)

            struct = structure_list[0]
            struct = torch.LongTensor([int(item) for item in struct])
            struct = struct.view(-1, self.K * self.K)

            rewards_list.append(max(rewards))
            structs_list.append(struct)

        max_idx = rewards_list.index(max(rewards_list))  # select

        return structs_list[max_idx], rewards_list[max_idx]

    def rela_to_dict(self, rela_cluster):
        self.cluster_rela_dict = dict()
        n = max(rela_cluster) + 1
        for i in range(n):
            self.cluster_rela_dict[i] = []

        for idx, item in enumerate(rela_cluster):
            self.cluster_rela_dict[item].append(idx)

        for i in range(n):
            self.cluster_rela_dict[i] = torch.LongTensor(
                self.cluster_rela_dict[i])

    def cluster(self):

        X = self.model.rel_embed.weight.data.cpu().numpy()
        kmeans = KMeans(n_clusters=self.n, random_state=0).fit(X)
        #self.rela_cluster = kmeans.labels_.tolist()

        return kmeans.labels_.tolist()

    def save(self, filename):
        torch.save(self.model.state_dict(), filename)

    def load(self, filename):
        self.model.load_state_dict(
            torch.load(filename,
                       map_location=lambda storage, location: storage.cuda()))

    def train_oas(self, train_data, valid_data, derived_struct):

        head, tail, rela = train_data
        n_train = len(head)

        if self.args.optim == 'adam' or self.args.optim == 'Adam':
            self.optimizer = Adam(self.model.parameters(), lr=self.args.lr)
        elif self.args.optim == 'adagrad' or self.args.optim == 'Adagrad':
            self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr)
        else:
            self.optimizer = SGD(self.model.parameters(), lr=self.args.lr)

        scheduler = ExponentialLR(self.optimizer, self.args.decay_rate)

        n_batch = self.args.n_batch

        for epoch in range(self.args.n_oas_epoch):
            start = time.time()
            rand_idx = torch.randperm(n_train)

            if self.GPU:
                head = head[rand_idx].cuda()
                tail = tail[rand_idx].cuda()
                rela = rela[rand_idx].cuda()
            else:
                head = head[rand_idx]
                tail = tail[rand_idx]
                rela = rela[rand_idx]

            epoch_loss = 0
            n_iters = 0

            # train model weights
            for h, t, r in batch_by_size(n_batch,
                                         head,
                                         tail,
                                         rela,
                                         n_sample=n_train):

                self.model.zero_grad()

                loss = self.model.forward(derived_struct, h, t, r,
                                          self.cluster_rela_dict)
                loss += self.args.lamb * self.model.regul
                loss.backward()

                nn.utils.clip_grad_norm_(self.model.parameters(),
                                         self.args.grad_clip)
                self.optimizer.step()
                self.prox_operator()

                epoch_loss += loss.data.cpu().numpy()
                n_iters += 1

            scheduler.step()

            if self.cluster_way == "scu":
                self.rela_cluster = self.cluster()
                self.rela_cluster_history.append(self.rela_cluster)
                self.rela_to_dict(self.rela_cluster)

            # train controller
            self.train_controller()

            # derive structs
            self.time_tot += time.time(
            ) - start  # evaluation for the derived architecture is unnessary in searching procedure
            derived_struct, test_mrr = self.derive(sample_num=1)

            print(
                "Epoch: %d/%d, Search Time=%.2f, Loss=%.2f, Sampled Val MRR=%.8f, Tst MRR=%.8f"
                %
                (epoch + 1, self.args.n_oas_epoch, self.time_tot, epoch_loss /
                 n_train, self.derived_raward_history[-1], test_mrr))

    def train_stand(self, train_data, valid_data, derived_struct, rela_cluster,
                    mrr):

        self.rela_to_dict(rela_cluster)

        #self.args.perf_file = os.path.join(self.args.out_dir, self.args.dataset + '_std_' + str(self.args.m) + "_" + str(self.args.n)  + "_" + str(mrr) + '.txt')
        #plot_config(self.args)

        head, tail, rela = train_data
        n_train = len(head)

        if self.args.optim == 'adam' or self.args.optim == 'Adam':
            self.optimizer = Adam(self.model.parameters(), lr=self.args.lr)
        elif self.args.optim == 'adagrad' or self.args.optim == 'Adagrad':
            self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr)
        else:
            self.optimizer = SGD(self.model.parameters(), lr=self.args.lr)
        scheduler = ExponentialLR(self.optimizer, self.args.decay_rate)

        n_batch = self.args.n_batch

        best_mrr = 0
        start = time.time()
        for epoch in range(self.args.n_stand_epoch):

            #self.epoch = epoch
            rand_idx = torch.randperm(n_train)

            if self.GPU:
                head = head[rand_idx].cuda()
                tail = tail[rand_idx].cuda()
                rela = rela[rand_idx].cuda()
            else:
                head = head[rand_idx]
                tail = tail[rand_idx]
                rela = rela[rand_idx]

            epoch_loss = 0
            n_iters = 0
            #lr = scheduler.get_lr()[0]

            # train model weights
            for h, t, r in batch_by_size(n_batch,
                                         head,
                                         tail,
                                         rela,
                                         n_sample=n_train):

                self.model.zero_grad()

                loss = self.model.forward(derived_struct, h, t, r,
                                          self.cluster_rela_dict)
                loss += self.args.lamb * self.model.regul
                loss.backward()

                self.optimizer.step()
                self.prox_operator()

                epoch_loss += loss.data.cpu().numpy()
                n_iters += 1

            scheduler.step()

            print("Epoch: %d/%d, Loss=%.2f, Stand Time=%.2f" %
                  (epoch + 1, self.args.n_stand_epoch, time.time() - start,
                   epoch_loss / n_train))

            if (epoch + 1) % 5 == 0:
                test, randint = True, None

                valid_mrr, valid_mr, valid_1, valid_3, valid_10 = self.tester_val(
                    derived_struct, test, randint)
                test_mrr, test_mr, test_1, test_3, test_10 = self.tester_tst(
                    derived_struct, test, randint)

                out_str = '%d \t %.2f \t %.2f \t %.4f  %.1f %.4f %.4f %.4f\t%.4f %.1f %.4f %.4f %.4f\n' % (epoch, self.time_tot, epoch_loss/n_train,\
                            valid_mrr, valid_mr, valid_1, valid_3, valid_10, \
                            test_mrr, test_mr, test_1, test_3, test_10)

                # output the best performance info
                if test_mrr > best_mrr:
                    best_mrr = test_mrr
                    best_str = out_str

                with open(self.args.perf_file, 'a+') as f:
                    f.write(out_str)

        with open(self.args.perf_file, 'a+') as f:
            f.write("best performance:" + best_str + "\n")
            f.write("struct:" + str(derived_struct) + "\n")
            f.write("rela:" + str(rela_cluster) + "\n")

        return best_mrr

    def prox_operator(self, ):
        for n, p in self.model.named_parameters():
            if 'ent' in n:
                X = p.data.clone()
                Z = torch.norm(X, p=2, dim=1, keepdim=True)
                Z[Z < 1] = 1
                X = X / Z
                p.data.copy_(X.view(self.n_ent, -1))
        return X

    def name(self, idx):
        i = idx[0]
        i_rc = self.rela_cluster[i]
        self.r_embed[i, :, :] = self.rel_embed_2K_1[
            i, self.idx_list[i_rc], :] * self.model._arch_parameters[i_rc][
                [j for j in range(self.K * self.K)], self.idx_list[i_rc]].view(
                    -1, 1)

    def test_link(self,
                  struct,
                  test,
                  randint,
                  test_data,
                  n_ent,
                  heads,
                  tails,
                  filt=True):

        mrr_tot = 0.
        mr_tot = 0.
        hit_tot = np.zeros((3, ))
        count = 0

        self.n_cluster = len(struct)
        test_batch_size = self.args.n_batch

        head, tail, rela = test_data

        if randint is None:
            rand_idx = torch.randperm(len(head))
        else:
            np.random.seed(randint)
            rand_idx = torch.LongTensor(np.random.permutation(len(head)))

        if self.GPU:
            head = head[rand_idx].cuda()
            tail = tail[rand_idx].cuda()
            rela = rela[rand_idx].cuda()
        else:
            head = head[rand_idx]
            tail = tail[rand_idx]
            rela = rela[rand_idx]

        for batch_h, batch_t, batch_r in batch_by_size(test_batch_size, head,
                                                       tail, rela):

            if self.GPU:
                batch_h = batch_h.cuda()
                batch_t = batch_t.cuda()
                batch_r = batch_r.cuda()
            else:
                batch_h = batch_h
                batch_t = batch_t
                batch_r = batch_r

            h_embed = self.model.ent_embed(batch_h).view(
                -1, self.K, self.n_dim // self.K)
            t_embed = self.model.ent_embed(batch_t).view(
                -1, self.K, self.n_dim // self.K)

            length = self.n_dim // self.K

            # create a rela_embed with size (n_rel, 2K+1, length)
            rel_embed_pos = self.model.rel_embed.weight.view(
                -1, self.K, length)
            rel_embed_neg = -rel_embed_pos

            if self.GPU:
                rel_embed_zeros = torch.zeros(self.n_rel, 1, length).cuda()
            else:
                rel_embed_zeros = torch.zeros(self.n_rel, 1, length)

            self.rel_embed_2K_1 = torch.cat(
                (rel_embed_zeros, rel_embed_pos, rel_embed_neg), 1)

            # combine struct
            if self.GPU:
                self.r_embed = torch.zeros(self.n_rel, self.K * self.K,
                                           length).cuda()
            else:
                self.r_embed = torch.zeros(self.n_rel, self.K * self.K, length)

            for i_rc in range(self.n_cluster):
                max_idx_list = struct[i_rc]
                self.r_embed[
                    self.cluster_rela_dict[i_rc], :, :] = self.rel_embed_2K_1[
                        self.cluster_rela_dict[i_rc]][:, max_idx_list, :]

            self.r_embed = self.r_embed.view(-1, self.K, self.K, length)
            self.r_embed = self.r_embed[batch_r, :, :, :]

            head_scores = torch.sigmoid(
                self.model.test_head(self.r_embed, t_embed)).data
            tail_scores = torch.sigmoid(
                self.model.test_tail(h_embed, self.r_embed)).data

            for h, t, r, head_score, tail_score in zip(batch_h, batch_t,
                                                       batch_r, head_scores,
                                                       tail_scores):
                h_idx = int(h.data.cpu().numpy())
                t_idx = int(t.data.cpu().numpy())
                r_idx = int(r.data.cpu().numpy())
                if filt:  # filter
                    if tails[(h_idx, r_idx)]._nnz() > 1:
                        tmp = tail_score[t_idx].data.cpu().numpy()
                        idx = tails[(h_idx, r_idx)]._indices()
                        tail_score[idx] = 0.0

                        if self.GPU:
                            tail_score[t_idx] = torch.from_numpy(tmp).cuda()
                        else:
                            tail_score[t_idx] = torch.from_numpy(tmp)
                    if heads[(t_idx, r_idx)]._nnz() > 1:
                        tmp = head_score[h_idx].data.cpu().numpy()
                        idx = heads[(t_idx, r_idx)]._indices()
                        head_score[idx] = 0.0
                        if self.GPU:
                            head_score[h_idx] = torch.from_numpy(tmp).cuda()
                        else:
                            head_score[h_idx] = torch.from_numpy(tmp)
                mrr, mr, hit = mrr_mr_hitk(tail_score, t_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit

                mrr, mr, hit = mrr_mr_hitk(head_score, h_idx)
                mrr_tot += mrr
                mr_tot += mr
                hit_tot += hit
                count += 2

            if not test:
                break  # one mini batch

        return float(mrr_tot) / count, float(
            mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[
                2] / count  #, total_loss/n_test

    """