def __init__(self, n_ent, n_rel, args, struct): self.model = KGEModule(n_ent, n_rel, args, struct) self.model.cuda() self.n_ent = n_ent self.n_rel = n_rel self.time_tot = 0 self.args = args
def __init__(self, n_ent, n_rel, args, rela_cluster, tester_val, tester_tst, tester_trip): self.tester_val = tester_val self.tester_tst = tester_tst self.tester_trip = tester_trip GPU = args.GPU m = args.m n = args.n cluster_way = args.clu self.model = KGEModule(n_ent, n_rel, args, GPU, rela_cluster, m, n) if GPU: self.model.cuda() self.n_ent = n_ent self.n_rel = n_rel self.rela_cluster = rela_cluster self.time_tot = 0 self.args = args self.n_dim = args.n_dim self.K = m self.n = n self.GPU = GPU self.cluster_way = cluster_way self.rela_to_dict(rela_cluster) """build controller and sub-model""" self.controller = None self.build_controller() #print(self.args.controller_optim) controller_lr = 3.5e-4 controller_optimizer = _get_optimizer(self.args.controller_optim) self.controller_optim = controller_optimizer( self.controller.parameters(), lr=controller_lr) self.derived_raward_history = [] self.derived_struct_history = [] if self.cluster_way == "scu": self.rela_cluster_history = []
def __init__(self, n_ent, n_rel, args, arch): self.model = KGEModule(n_ent, n_rel, args, arch) self.op_idx = torch.LongTensor(arch) if args.GPU: self.model.cuda() self.n_ent = n_ent self.n_rel = n_rel self.time_tot = 0 self.args = args self.n_dim = args.n_dim self.K = args.num_blocks self.GPU = args.GPU self.n_arity = args.n_arity + 1 # initialize the arch parameters self.n_ops = self.K**self.n_arity self.categories = np.asarray([3 for i in range(self.n_ops)]) alpha, init_delta, trained_theta = 1.5, 1.0, None
class BaseModel(object): def __init__(self, n_ent, n_rel, args, arch): self.model = KGEModule(n_ent, n_rel, args, arch) self.op_idx = torch.LongTensor(arch) if args.GPU: self.model.cuda() self.n_ent = n_ent self.n_rel = n_rel self.time_tot = 0 self.args = args self.n_dim = args.n_dim self.K = args.num_blocks self.GPU = args.GPU self.n_arity = args.n_arity + 1 # initialize the arch parameters self.n_ops = self.K**self.n_arity self.categories = np.asarray([3 for i in range(self.n_ops)]) alpha, init_delta, trained_theta = 1.5, 1.0, None #self.asng = CategoricalASNG(self.categories, alpha=alpha, init_delta=init_delta, init_theta=trained_theta) def save(self, filename): torch.save(self.model.state_dict(), filename) def load(self, filename): self.model.load_state_dict( torch.load(filename, map_location=lambda storage, location: storage.cuda())) def get_reward(self, facts): if self.args.M_val == "loss": archs, loss_archs = [], [] with torch.no_grad(): for i in range(2): arch = self.asng.sampling() archs.append(arch) loss_arch = self.model._loss(facts, arch) #loss_arch += self.model.args.lamb * self.model.regul loss_archs.append(loss_arch) elif self.args.M_val == "mrr": archs, loss_archs = [], [] with torch.no_grad(): for i in range(2): arch = self.asng.sampling() archs.append(arch) result = self.tester_val(facts, arch) # result = self.tester_val(arch = arch) loss_archs.append(-result[0]) # if result[0] > 0.3200: # self.good_struct.append([item.index(True) for item in arch.tolist()]) # if result[0] > self.best_mrr: # self.best_mrr = result[0] # self.best_struct = [item.index(True) for item in arch.tolist()] return archs, loss_archs #, embed_time, mrr_time def train(self, train_data, valid_data, tester_val, tester_tst, tester_trip=None): self.tester_val = tester_val if self.args.optim == 'adam' or self.args.optim == 'Adam': self.optimizer = Adam(self.model.parameters(), lr=self.args.lr) elif self.args.optim == 'adagrad' or self.args.optim == 'Adagrad': self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr) else: self.optimizer = SGD(self.model.parameters(), lr=self.args.lr) scheduler = ExponentialLR(self.optimizer, self.args.decay_rate) n_epoch = self.args.n_epoch n_batch = self.args.n_batch self.best_mrr = 0 # useful information related to cache n_train = train_data.size(0) n_valid = valid_data.size(0) self.good_struct = [] for epoch in range(n_epoch): self.model.train() start = time.time() self.epoch = epoch rand_idx = torch.randperm(n_train) if self.GPU: train_data = train_data[rand_idx].cuda() else: train_data = train_data[rand_idx] epoch_loss = 0 for facts in batch_by_size(n_batch, train_data, n_sample=n_train): self.model.zero_grad() if self.n_arity == 3: loss = self.model.forward(facts, self.op_idx) loss.backward() elif self.n_arity == 4: loss = self.model.forward_tri(facts, self.op_idx) loss.backward() """kge step""" self.optimizer.step() self.prox_operator() epoch_loss += loss.data.cpu().numpy() scheduler.step() self.time_tot += time.time() - start print("Epoch: %d/%d, Loss=%.8f, Time=%.4f" % (epoch + 1, n_epoch, epoch_loss / n_train, time.time() - start)) if (epoch + 1) % self.args.epoch_per_test == 0: valid_mrr, valid_mr, valid_1, valid_3, valid_10 = tester_val() test_mrr, test_mr, test_1, test_3, test_10 = tester_tst() if tester_trip is None: out_str = '%d\t%.2f %.2f \t%.4f %.1f %.4f %.4f %.4f\t%.4f %.1f %.4f %.4f %.4f\n' % (epoch, self.time_tot, epoch_loss/n_train, \ valid_mrr, valid_mr, valid_1, valid_3, valid_10, \ test_mrr, test_mr, test_1, test_3, test_10) with open(self.args.perf_file, 'a') as f: f.write(out_str) if test_mrr > self.best_mrr: self.best_mrr = test_mrr with open(self.args.perf_file, 'a') as f: f.write("arch:" + str(self.op_idx.tolist()) + "\n") f.write("best mrr:" + str(self.best_mrr) + "\n") return self.best_mrr def prox_operator(self, ): for n, p in self.model.named_parameters(): if 'ent' in n: X = p.data.clone() Z = torch.norm(X, p=2, dim=1, keepdim=True) Z[Z < 1] = 1 X = X / Z p.data.copy_(X.view(self.n_ent, -1)) def name(self, idx): i = idx[0] i_rc = self.rela_cluster[i] self.r_embed[i, :, :] = self.rel_embed_2K_1[ i, self.idx_list[i_rc], :] * self.model._arch_parameters[i_rc][ [j for j in range(self.K * self.K)], self.idx_list[i_rc]].view( -1, 1) def test_link(self, test_data, n_ent, heads, tails, filt=True, arch=None): mrr_tot = 0. mr_tot = 0 hit_tot = np.zeros((3, )) count = 0 # if arch is None: # max_idx = torch.Tensor(self.asng.p_model.theta).argmax(1) # else: # max_idx = torch.LongTensor([item.index(True) for item in arch.tolist()]) max_idx = self.op_idx for facts in batch_by_size(self.args.test_batch_size, test_data): if self.GPU: batch_h = facts[:, 0].cuda() batch_t = facts[:, 1].cuda() batch_r = facts[:, 2].cuda() else: batch_h = facts[:, 0] batch_t = facts[:, 1] batch_r = facts[:, 2] length = self.n_dim // self.K h_embed = self.model.ent_embed(batch_h).view(-1, self.K, length) t_embed = self.model.ent_embed(batch_t).view(-1, self.K, length) r_embed = self.model.rel_embed(batch_r).view(-1, self.K, length) head_scores = torch.sigmoid( self.model.bin_neg_other(r_embed, t_embed, max_idx, 1)).data tail_scores = torch.sigmoid( self.model.bin_neg_other(r_embed, h_embed, max_idx, 2)).data for h, t, r, head_score, tail_score in zip(batch_h, batch_t, batch_r, head_scores, tail_scores): h_idx = int(h.data.cpu().numpy()) t_idx = int(t.data.cpu().numpy()) r_idx = int(r.data.cpu().numpy()) if filt: # filter if tails[(h_idx, r_idx)]._nnz() > 1: tmp = tail_score[t_idx].data.cpu().numpy() idx = tails[(h_idx, r_idx)]._indices() tail_score[idx] = 0.0 if self.GPU: tail_score[t_idx] = torch.from_numpy(tmp).cuda() else: tail_score[t_idx] = torch.from_numpy(tmp) if heads[(t_idx, r_idx)]._nnz() > 1: tmp = head_score[h_idx].data.cpu().numpy() idx = heads[(t_idx, r_idx)]._indices() head_score[idx] = 0.0 if self.GPU: head_score[h_idx] = torch.from_numpy(tmp).cuda() else: head_score[h_idx] = torch.from_numpy(tmp) mrr, mr, hit = mrr_mr_hitk(tail_score, t_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit mrr, mr, hit = mrr_mr_hitk(head_score, h_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit count += 2 if arch is None: logging.info('Test_MRR=%f, Test_MR=%f, Test_H=%f %f %f, Count=%d', float(mrr_tot) / count, float(mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[2] / count, count) return float(mrr_tot) / count, float( mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[ 2] / count #, total_loss/n_test def evaluate(self, test_data, e1_sp, e2_sp, e3_sp, arch=None): mrr_tot = 0. mr_tot = 0 hit_tot = np.zeros((3, )) count = 0 # if arch is None: # max_idx = torch.Tensor(self.asng.p_model.theta).argmax(1) # else: # max_idx = torch.LongTensor([item.index(True) for item in arch.tolist()]) self.model.eval() max_idx = self.op_idx for facts in batch_by_size(self.args.test_batch_size, test_data): if self.GPU: r, e1, e2, e3 = facts[:, 0].cuda(), facts[:, 1].cuda( ), facts[:, 2].cuda(), facts[:, 3].cuda() else: r, e1, e2, e3 = facts[:, 0], facts[:, 1], facts[:, 2], facts[:, 3] length = self.n_dim // self.K # r_embed = self.model.rel_embed(r).view(-1, self.K, length) # e1_embed = self.model.ent_embed(e1).view(-1, self.K, length) # e2_embed = self.model.ent_embed(e2).view(-1, self.K, length) # e3_embed = self.model.ent_embed(e3).view(-1, self.K, length) #print(max_idx) r_embed = self.model.bnr(self.model.rel_embed(r)).view( -1, self.K, length) e1_embed = self.model.input_dropout( self.model.bne(self.model.ent_embed(e1))).view( -1, self.K, length) e2_embed = self.model.input_dropout( self.model.bne(self.model.ent_embed(e2))).view( -1, self.K, length) e3_embed = self.model.input_dropout( self.model.bne(self.model.ent_embed(e3))).view( -1, self.K, length) e1_scores = F.softmax(self.model.tri_neg_other( r_embed, e2_embed, e3_embed, max_idx, 1), dim=1).data e2_scores = F.softmax(self.model.tri_neg_other( r_embed, e1_embed, e3_embed, max_idx, 2), dim=1).data e3_scores = F.softmax(self.model.tri_neg_other( r_embed, e1_embed, e2_embed, max_idx, 3), dim=1).data for idx in range(len(r)): r_idx, e1_idx, e2_idx, e3_idx = int( r[idx].data.cpu().numpy()), int( e1[idx].data.cpu().numpy()), int( e2[idx].data.cpu().numpy()), int( e3[idx].data.cpu().numpy()) if e1_sp[(r_idx, e2_idx, e3_idx)]._nnz() > 1: tmp = e1_scores[idx][e1_idx].data.cpu().numpy() indic = e1_sp[(r_idx, e2_idx, e3_idx)]._indices() e1_scores[idx][indic] = 0.0 if self.GPU: e1_scores[idx][e1_idx] = torch.from_numpy(tmp).cuda() else: e1_scores[idx][e1_idx] = torch.from_numpy(tmp) if e2_sp[(r_idx, e1_idx, e3_idx)]._nnz() > 1: tmp = e2_scores[idx][e2_idx].data.cpu().numpy() indic = e2_sp[(r_idx, e1_idx, e3_idx)]._indices() e2_scores[idx][indic] = 0.0 if self.GPU: e2_scores[idx][e2_idx] = torch.from_numpy(tmp).cuda() else: e2_scores[idx][e2_idx] = torch.from_numpy(tmp) if e3_sp[(r_idx, e1_idx, e2_idx)]._nnz() > 1: tmp = e3_scores[idx][e3_idx].data.cpu().numpy() indic = e3_sp[(r_idx, e1_idx, e2_idx)]._indices() e3_scores[idx][indic] = 0.0 if self.GPU: e3_scores[idx][e3_idx] = torch.from_numpy(tmp).cuda() else: e3_scores[idx][e3_idx] = torch.from_numpy(tmp) mrr, mr, hit = mrr_mr_hitk(e1_scores[idx], e1_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit mrr, mr, hit = mrr_mr_hitk(e2_scores[idx], e2_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit mrr, mr, hit = mrr_mr_hitk(e3_scores[idx], e3_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit count += 3 #if arch is None: logging.info('Test_MRR=%f, Test_MR=%f, Test_H=%f %f %f, Count=%d', float(mrr_tot) / count, float(mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[2] / count, count) return float(mrr_tot) / count, float( mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[ 2] / count #, embed_time, mrr_time """ def evaluate(self, test_data): hits, ranks, losses = [], [], [] for _ in [1, 3, 10]: hits.append([]) ary = test_data.size(1) - 1 er_vocab_list = [] er_vocab_pairs_list = [] for miss_ent_domain in range(1, ary+1): er_vocab = self.get_er_vocab(test_data, miss_ent_domain) er_vocab_pairs = list(er_vocab.keys()) er_vocab_list.append(er_vocab) er_vocab_pairs_list.append(er_vocab_pairs) max_idx = torch.Tensor(self.asng.p_model.theta).argmax(1) for miss_ent_domain in range(1, ary+1): er_vocab = er_vocab_list[miss_ent_domain-1] #for i in range(0, len(test_data_idxs), self.batch_size): for facts in batch_by_size(self.args.test_batch_size, test_data): if self.GPU: r_idx, e1_idx, e2_idx, e3_idx = facts[:,0].cuda(), facts[:,1].cuda(), facts[:,2].cuda(), facts[:,3].cuda() else: r_idx, e1_idx, e2_idx, e3_idx = facts[:,0], facts[:,1], facts[:,2], facts[:,3] length = self.n_dim // self.K r_embed = self.model.rel_embed(r_idx).view(-1, self.K, length) e1_embed = self.model.ent_embed(e1_idx).view(-1, self.K, length) e2_embed = self.model.ent_embed(e2_idx).view(-1, self.K, length) e3_embed = self.model.ent_embed(e3_idx).view(-1, self.K, length) e1_scores = torch.sigmoid(self.model.tri_neg_other(r_embed, e2_embed, e3_embed, max_idx, 1)).data e2_scores = torch.sigmoid(self.model.tri_neg_other(r_embed, e1_embed, e3_embed, max_idx, 2)).data e3_scores = torch.sigmoid(self.model.tri_neg_other(r_embed, e1_embed, e2_embed, max_idx, 3)).data if ary == 3: if miss_ent_domain == 1: e_idx = [e2_idx, e3_idx] pred = e1_scores elif miss_ent_domain == 2: e_idx = [e1_idx, e3_idx] pred = e2_scores elif miss_ent_domain == 3: e_idx = [e1_idx, e2_idx] pred = e3_scores #pred, _ = model.forward(r_idx, e_idx, miss_ent_domain, W) e_all_idx = [] for k0 in range(1, ary+1): e_all_idx.append(torch.LongTensor(facts[:, k0])) #print(er_vocab) for j in range(facts.shape[0]): er_vocab_key = [] for k0 in range(ary+1): er_vocab_key.append(facts[j][k0]) er_vocab_key.remove(facts[j][miss_ent_domain]) filt = er_vocab[tuple(er_vocab_key)] if filt != []: print(er_vocab) # print(er_vocab) # print(tuple(er_vocab_key)) # print(filt) target_value = pred[j, e_all_idx[miss_ent_domain-1][j]].item() pred[j, filt] = 0.0 pred[j, e_all_idx[miss_ent_domain-1][j]] = target_value sort_values, sort_idxs = torch.sort(pred, dim=1, descending=True) sort_idxs = sort_idxs.cpu().numpy() for j in range(facts.shape[0]): rank = np.where(sort_idxs[j] == e_all_idx[miss_ent_domain-1][j].item())[0][0] ranks.append(rank+1) for id, hits_level in enumerate([1, 3, 10]): if rank+1 <= hits_level: hits[id].append(1.0) else: hits[id].append(0.0) #logging.info('Test_MRR=%f, Test_MR=%f, Test_H=%f %f %f', np.mean(1./np.array(ranks)), 0, np.mean(hits[0]), np.mean(hits[1]), np.mean(hits[2])) #return np.mean(1./np.array(ranks)), 0, np.mean(hits[0]), np.mean(hits[1]), np.mean(hits[2]) def get_er_vocab(self, data, miss_ent_domain): er_vocab = defaultdict(list) if len(data[0])-1 == 3: if miss_ent_domain == 1: for triple in data: er_vocab[(triple[0], triple[2], triple[3])].append(triple[1]) elif miss_ent_domain == 2: for triple in data: er_vocab[(triple[0], triple[1], triple[3])].append(triple[2]) elif miss_ent_domain == 3: for triple in data: er_vocab[(triple[0], triple[1], triple[2])].append(triple[3]) elif len(data[0])-1 == 4: if miss_ent_domain == 1: for triple in data: er_vocab[(triple[0], triple[2], triple[3], triple[4])].append(triple[1]) elif miss_ent_domain == 2: for triple in data: er_vocab[(triple[0], triple[1], triple[3], triple[4])].append(triple[2]) elif miss_ent_domain == 3: for triple in data: er_vocab[(triple[0], triple[1], triple[2], triple[4])].append(triple[3]) elif miss_ent_domain == 4: for triple in data: er_vocab[(triple[0], triple[1], triple[2], triple[3])].append(triple[4]) return er_vocab """ """
class BaseModel(object): def __init__(self, n_ent, n_rel, args, struct): self.model = KGEModule(n_ent, n_rel, args, struct) self.model.cuda() self.n_ent = n_ent self.n_rel = n_rel self.time_tot = 0 self.args = args def train(self, train_data, tester_val, tester_tst): head, tail, rela = train_data # useful information related to cache n_train = len(head) if self.args.optim=='adam' or self.args.optim=='Adam': self.optimizer = Adam(self.model.parameters(), lr=self.args.lr) elif self.args.optim=='adagrad' or self.args.optim=='Adagrad': self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr) else: self.optimizer = SGD(self.model.parameters(), lr=self.args.lr) scheduler = ExponentialLR(self.optimizer, self.args.decay_rate) n_epoch = self.args.n_epoch n_batch = self.args.n_batch best_mrr = 0 # used for counting repeated triplets for margin based loss for epoch in range(n_epoch): start = time.time() self.epoch = epoch rand_idx = torch.randperm(n_train) head = head[rand_idx].cuda() tail = tail[rand_idx].cuda() rela = rela[rand_idx].cuda() epoch_loss = 0 for h, t, r in batch_by_size(n_batch, head, tail, rela, n_sample=n_train): self.model.zero_grad() loss = self.model.forward(h, t, r) loss += self.args.lamb * self.model.regul loss.backward() self.optimizer.step() self.prox_operator() epoch_loss += loss.data.cpu().numpy() self.time_tot += time.time() - start scheduler.step() if (epoch+1) % self.args.epoch_per_test == 0: # output performance valid_mrr, valid_mr, valid_10 = tester_val() test_mrr, test_mr, test_10 = tester_tst() out_str = '%.4f\t\t%.4f\t%.4f\t%.4f\n'%(epoch + 1, test_mr, test_mrr, test_10) # output the best performance info if valid_mrr > best_mrr: best_mrr = valid_mrr best_str = out_str if best_mrr < self.args.thres: print('\tearly stopped in Epoch:{}, best_mrr:{}'.format(epoch+1, best_mrr), self.model.struct) return best_str return best_mrr, best_str def prox_operator(self,): for n, p in self.model.named_parameters(): if 'ent' in n: X = p.data.clone() Z = torch.norm(X, p=2, dim=1, keepdim=True) Z[Z<1] = 1 X = X/Z p.data.copy_(X.view(self.n_ent, -1)) def test_link(self, test_data, head_filter, tail_filter): heads, tails, relas = test_data batch_size = self.args.test_batch_size num_batch = len(heads) // batch_size + int(len(heads)%batch_size>0) head_probs = [] tail_probs = [] for i in range(num_batch): start = i * batch_size end = min( (i+1)*batch_size, len(heads)) batch_h = heads[start:end].cuda() batch_t = tails[start:end].cuda() batch_r = relas[start:end].cuda() h_embed = self.model.ent_embed(batch_h) r_embed = self.model.rel_embed(batch_r) t_embed = self.model.ent_embed(batch_t) head_scores = torch.sigmoid(self.model.test_head(r_embed, t_embed)).data tail_scores = torch.sigmoid(self.model.test_tail(h_embed, r_embed)).data head_probs.append(head_scores.data.cpu().numpy()) tail_probs.append(tail_scores.data.cpu().numpy()) head_probs = np.concatenate(head_probs) * head_filter tail_probs = np.concatenate(tail_probs) * tail_filter head_ranks = cal_ranks(head_probs, label=heads.data.numpy()) tail_ranks = cal_ranks(tail_probs, label=tails.data.numpy()) h_mrr, h_mr, h_h10 = cal_performance(head_ranks) t_mrr, t_mr, t_h10 = cal_performance(tail_ranks) mrr = (h_mrr + t_mrr) / 2 mr = (h_mr + t_mr) / 2 h10 = (h_h10 + t_h10) / 2 return mrr, mr, h10
class BaseModel(object): def __init__(self, n_ent, n_rel, args, rela_cluster, tester_val, tester_tst, tester_trip): self.tester_val = tester_val self.tester_tst = tester_tst self.tester_trip = tester_trip GPU = args.GPU m = args.m n = args.n cluster_way = args.clu self.model = KGEModule(n_ent, n_rel, args, GPU, rela_cluster, m, n) if GPU: self.model.cuda() self.n_ent = n_ent self.n_rel = n_rel self.rela_cluster = rela_cluster self.time_tot = 0 self.args = args self.n_dim = args.n_dim self.K = m self.n = n self.GPU = GPU self.cluster_way = cluster_way self.rela_to_dict(rela_cluster) """build controller and sub-model""" self.controller = None self.build_controller() #print(self.args.controller_optim) controller_lr = 3.5e-4 controller_optimizer = _get_optimizer(self.args.controller_optim) self.controller_optim = controller_optimizer( self.controller.parameters(), lr=controller_lr) self.derived_raward_history = [] self.derived_struct_history = [] if self.cluster_way == "scu": self.rela_cluster_history = [] def build_controller(self): self.search_space, self.action_list = _get_space_op(self.K, self.n) # build RNN controller from nas.controller import SimpleNASController self.controller = SimpleNASController(self.args, action_list=self.action_list, search_space=self.search_space, cuda=self.GPU) if self.GPU: self.controller.cuda() def mm_train(self, train_data, valid_data): """ Each epoch consists of two phase: - In the first phase, shared parameters are trained to exploration. - In the second phase, the controller's parameters are trained. """ #num_epoch = self.args.n_epoch derived_struct = _init_struct("DistMult", self.K, self.n) self.train_oas(train_data, valid_data, derived_struct) derived_struct, derived_mrr = self.derive_last() if self.cluster_way == "scu": return (self.derived_raward_history, self.derived_struct_history, self.rela_cluster_history), (derived_mrr, derived_struct, self.rela_cluster) elif self.cluster_way == "pde": return self.derived_raward_history, self.derived_struct_history, derived_struct def get_reward(self, struct_list, test=False, random=True): """ Computes the reward of a single sampled model on validation data. """ reward_list = [] if random: randint = None else: randint = torch.randint(10000, (1, )) for struct in struct_list: #print("x") struct = torch.LongTensor([int(item) for item in struct]) struct = struct.view(-1, self.K * self.K) if not test: valid_mrr, valid_mr, valid_1, valid_3, valid_10 = self.tester_val( struct, test, randint) reward_list.append(valid_mrr) else: valid_mrr, valid_mr, valid_1, valid_3, valid_10 = self.tester_val( struct, test, randint) test_mrr, test_mr, test_1, test_3, test_10 = self.tester_tst( struct, test, randint) reward_list.append( valid_mrr ) # record val mrr in the file, test mrr is just for visualization if not test: return reward_list else: return reward_list, test_mrr def train_controller(self): """ Train controller to find better structure. """ model = self.controller model.train() baseline = None adv_history = [] entropy_history = [] reward_history = [] total_loss = 0 for step in range(self.args.n_controller_epoch): #print(step) # sample struct structure_list, log_probs, entropies = self.controller.sample( with_details=True) # calculate reward np_entropies = entropies.data.cpu().numpy() rewards = self.get_reward(structure_list, random=False) rewards = np.array(rewards) torch.cuda.empty_cache() # discount discount = 1.0 if 1 > discount > 0: rewards = discount(rewards, discount) reward_history.extend(rewards) entropy_history.extend(np_entropies) ema_baseline_decay = 0.5 # moving average baseline if baseline is None: baseline = rewards else: decay = ema_baseline_decay #print (decay, baseline, rewards) baseline = decay * baseline + (1 - decay) * rewards adv = rewards - baseline history.append(adv) adv = scale(adv, scale_value=0.5) adv_history.extend(adv) adv = utils.get_variable(adv, self.GPU, requires_grad=False) # policy loss loss = -log_probs * adv entropy_mode = "reward" if entropy_mode == 'regularizer': loss -= self.args.entropy_coeff * entropies loss = loss.sum() # or loss.mean() # update self.controller_optim.zero_grad() loss.backward() controller_grad_clip = 0 if controller_grad_clip > 0: torch.nn.utils.clip_grad_norm(model.parameters(), controller_grad_clip) self.controller_optim.step() total_loss += utils.to_item(loss.data) torch.cuda.empty_cache() def derive(self, sample_num=None): derive_from_history = False if sample_num is None and derive_from_history: return self.derive_from_history() else: structure_list, _, entropies = self.controller.sample( sample_num, with_details=True) rewards, test_mrr = self.get_reward(structure_list, test=True, random=True) rewards = torch.Tensor(rewards) best_struct = structure_list[0] best_struct = torch.LongTensor([int(item) for item in best_struct]) best_struct = best_struct.view(-1, self.K * self.K) self.derived_raward_history.append(max(rewards)) self.derived_struct_history.append(best_struct) return best_struct, test_mrr def derive_last(self): rewards_list, structs_list = [], [] for itr in range(self.args.n_derive_sample): structure_list, _, entropies = self.controller.sample( 1, with_details=True) rewards, test_mrr = self.get_reward(structure_list, test=True, random=True) struct = structure_list[0] struct = torch.LongTensor([int(item) for item in struct]) struct = struct.view(-1, self.K * self.K) rewards_list.append(max(rewards)) structs_list.append(struct) max_idx = rewards_list.index(max(rewards_list)) # select return structs_list[max_idx], rewards_list[max_idx] def rela_to_dict(self, rela_cluster): self.cluster_rela_dict = dict() n = max(rela_cluster) + 1 for i in range(n): self.cluster_rela_dict[i] = [] for idx, item in enumerate(rela_cluster): self.cluster_rela_dict[item].append(idx) for i in range(n): self.cluster_rela_dict[i] = torch.LongTensor( self.cluster_rela_dict[i]) def cluster(self): X = self.model.rel_embed.weight.data.cpu().numpy() kmeans = KMeans(n_clusters=self.n, random_state=0).fit(X) #self.rela_cluster = kmeans.labels_.tolist() return kmeans.labels_.tolist() def save(self, filename): torch.save(self.model.state_dict(), filename) def load(self, filename): self.model.load_state_dict( torch.load(filename, map_location=lambda storage, location: storage.cuda())) def train_oas(self, train_data, valid_data, derived_struct): head, tail, rela = train_data n_train = len(head) if self.args.optim == 'adam' or self.args.optim == 'Adam': self.optimizer = Adam(self.model.parameters(), lr=self.args.lr) elif self.args.optim == 'adagrad' or self.args.optim == 'Adagrad': self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr) else: self.optimizer = SGD(self.model.parameters(), lr=self.args.lr) scheduler = ExponentialLR(self.optimizer, self.args.decay_rate) n_batch = self.args.n_batch for epoch in range(self.args.n_oas_epoch): start = time.time() rand_idx = torch.randperm(n_train) if self.GPU: head = head[rand_idx].cuda() tail = tail[rand_idx].cuda() rela = rela[rand_idx].cuda() else: head = head[rand_idx] tail = tail[rand_idx] rela = rela[rand_idx] epoch_loss = 0 n_iters = 0 # train model weights for h, t, r in batch_by_size(n_batch, head, tail, rela, n_sample=n_train): self.model.zero_grad() loss = self.model.forward(derived_struct, h, t, r, self.cluster_rela_dict) loss += self.args.lamb * self.model.regul loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), self.args.grad_clip) self.optimizer.step() self.prox_operator() epoch_loss += loss.data.cpu().numpy() n_iters += 1 scheduler.step() if self.cluster_way == "scu": self.rela_cluster = self.cluster() self.rela_cluster_history.append(self.rela_cluster) self.rela_to_dict(self.rela_cluster) # train controller self.train_controller() # derive structs self.time_tot += time.time( ) - start # evaluation for the derived architecture is unnessary in searching procedure derived_struct, test_mrr = self.derive(sample_num=1) print( "Epoch: %d/%d, Search Time=%.2f, Loss=%.2f, Sampled Val MRR=%.8f, Tst MRR=%.8f" % (epoch + 1, self.args.n_oas_epoch, self.time_tot, epoch_loss / n_train, self.derived_raward_history[-1], test_mrr)) def train_stand(self, train_data, valid_data, derived_struct, rela_cluster, mrr): self.rela_to_dict(rela_cluster) #self.args.perf_file = os.path.join(self.args.out_dir, self.args.dataset + '_std_' + str(self.args.m) + "_" + str(self.args.n) + "_" + str(mrr) + '.txt') #plot_config(self.args) head, tail, rela = train_data n_train = len(head) if self.args.optim == 'adam' or self.args.optim == 'Adam': self.optimizer = Adam(self.model.parameters(), lr=self.args.lr) elif self.args.optim == 'adagrad' or self.args.optim == 'Adagrad': self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr) else: self.optimizer = SGD(self.model.parameters(), lr=self.args.lr) scheduler = ExponentialLR(self.optimizer, self.args.decay_rate) n_batch = self.args.n_batch best_mrr = 0 start = time.time() for epoch in range(self.args.n_stand_epoch): #self.epoch = epoch rand_idx = torch.randperm(n_train) if self.GPU: head = head[rand_idx].cuda() tail = tail[rand_idx].cuda() rela = rela[rand_idx].cuda() else: head = head[rand_idx] tail = tail[rand_idx] rela = rela[rand_idx] epoch_loss = 0 n_iters = 0 #lr = scheduler.get_lr()[0] # train model weights for h, t, r in batch_by_size(n_batch, head, tail, rela, n_sample=n_train): self.model.zero_grad() loss = self.model.forward(derived_struct, h, t, r, self.cluster_rela_dict) loss += self.args.lamb * self.model.regul loss.backward() self.optimizer.step() self.prox_operator() epoch_loss += loss.data.cpu().numpy() n_iters += 1 scheduler.step() print("Epoch: %d/%d, Loss=%.2f, Stand Time=%.2f" % (epoch + 1, self.args.n_stand_epoch, time.time() - start, epoch_loss / n_train)) if (epoch + 1) % 5 == 0: test, randint = True, None valid_mrr, valid_mr, valid_1, valid_3, valid_10 = self.tester_val( derived_struct, test, randint) test_mrr, test_mr, test_1, test_3, test_10 = self.tester_tst( derived_struct, test, randint) out_str = '%d \t %.2f \t %.2f \t %.4f %.1f %.4f %.4f %.4f\t%.4f %.1f %.4f %.4f %.4f\n' % (epoch, self.time_tot, epoch_loss/n_train,\ valid_mrr, valid_mr, valid_1, valid_3, valid_10, \ test_mrr, test_mr, test_1, test_3, test_10) # output the best performance info if test_mrr > best_mrr: best_mrr = test_mrr best_str = out_str with open(self.args.perf_file, 'a+') as f: f.write(out_str) with open(self.args.perf_file, 'a+') as f: f.write("best performance:" + best_str + "\n") f.write("struct:" + str(derived_struct) + "\n") f.write("rela:" + str(rela_cluster) + "\n") return best_mrr def prox_operator(self, ): for n, p in self.model.named_parameters(): if 'ent' in n: X = p.data.clone() Z = torch.norm(X, p=2, dim=1, keepdim=True) Z[Z < 1] = 1 X = X / Z p.data.copy_(X.view(self.n_ent, -1)) return X def name(self, idx): i = idx[0] i_rc = self.rela_cluster[i] self.r_embed[i, :, :] = self.rel_embed_2K_1[ i, self.idx_list[i_rc], :] * self.model._arch_parameters[i_rc][ [j for j in range(self.K * self.K)], self.idx_list[i_rc]].view( -1, 1) def test_link(self, struct, test, randint, test_data, n_ent, heads, tails, filt=True): mrr_tot = 0. mr_tot = 0. hit_tot = np.zeros((3, )) count = 0 self.n_cluster = len(struct) test_batch_size = self.args.n_batch head, tail, rela = test_data if randint is None: rand_idx = torch.randperm(len(head)) else: np.random.seed(randint) rand_idx = torch.LongTensor(np.random.permutation(len(head))) if self.GPU: head = head[rand_idx].cuda() tail = tail[rand_idx].cuda() rela = rela[rand_idx].cuda() else: head = head[rand_idx] tail = tail[rand_idx] rela = rela[rand_idx] for batch_h, batch_t, batch_r in batch_by_size(test_batch_size, head, tail, rela): if self.GPU: batch_h = batch_h.cuda() batch_t = batch_t.cuda() batch_r = batch_r.cuda() else: batch_h = batch_h batch_t = batch_t batch_r = batch_r h_embed = self.model.ent_embed(batch_h).view( -1, self.K, self.n_dim // self.K) t_embed = self.model.ent_embed(batch_t).view( -1, self.K, self.n_dim // self.K) length = self.n_dim // self.K # create a rela_embed with size (n_rel, 2K+1, length) rel_embed_pos = self.model.rel_embed.weight.view( -1, self.K, length) rel_embed_neg = -rel_embed_pos if self.GPU: rel_embed_zeros = torch.zeros(self.n_rel, 1, length).cuda() else: rel_embed_zeros = torch.zeros(self.n_rel, 1, length) self.rel_embed_2K_1 = torch.cat( (rel_embed_zeros, rel_embed_pos, rel_embed_neg), 1) # combine struct if self.GPU: self.r_embed = torch.zeros(self.n_rel, self.K * self.K, length).cuda() else: self.r_embed = torch.zeros(self.n_rel, self.K * self.K, length) for i_rc in range(self.n_cluster): max_idx_list = struct[i_rc] self.r_embed[ self.cluster_rela_dict[i_rc], :, :] = self.rel_embed_2K_1[ self.cluster_rela_dict[i_rc]][:, max_idx_list, :] self.r_embed = self.r_embed.view(-1, self.K, self.K, length) self.r_embed = self.r_embed[batch_r, :, :, :] head_scores = torch.sigmoid( self.model.test_head(self.r_embed, t_embed)).data tail_scores = torch.sigmoid( self.model.test_tail(h_embed, self.r_embed)).data for h, t, r, head_score, tail_score in zip(batch_h, batch_t, batch_r, head_scores, tail_scores): h_idx = int(h.data.cpu().numpy()) t_idx = int(t.data.cpu().numpy()) r_idx = int(r.data.cpu().numpy()) if filt: # filter if tails[(h_idx, r_idx)]._nnz() > 1: tmp = tail_score[t_idx].data.cpu().numpy() idx = tails[(h_idx, r_idx)]._indices() tail_score[idx] = 0.0 if self.GPU: tail_score[t_idx] = torch.from_numpy(tmp).cuda() else: tail_score[t_idx] = torch.from_numpy(tmp) if heads[(t_idx, r_idx)]._nnz() > 1: tmp = head_score[h_idx].data.cpu().numpy() idx = heads[(t_idx, r_idx)]._indices() head_score[idx] = 0.0 if self.GPU: head_score[h_idx] = torch.from_numpy(tmp).cuda() else: head_score[h_idx] = torch.from_numpy(tmp) mrr, mr, hit = mrr_mr_hitk(tail_score, t_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit mrr, mr, hit = mrr_mr_hitk(head_score, h_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit count += 2 if not test: break # one mini batch return float(mrr_tot) / count, float( mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[ 2] / count #, total_loss/n_test """