def evaluate_fixed(self, test_iter, n_cl_sample): cum_loss = 0 cum_prec = 0 cum_rec = 0 cum_size = 0 for t in range(test_iter): words, context, target, cluster_iden, ixs = self.generate( n_cl_sample) input_x = torch.cat([words, context], dim=1) kernel = self.kernel_net(input_x) vals, vecs = custom_decomp()(kernel) subset = DPP()(vals, vecs) pick = subset.diag().mm(words).sum(0, keepdim=True) self.pred = self.pred_net(pick).squeeze() loss = nn.MSELoss()(self.pred, target) # Subset Statistics precision, recall, set_size = self.assess(subset.data, ixs) cum_loss += loss.data[0] cum_prec += precision cum_rec += recall cum_size += set_size print(cum_loss / test_iter, cum_prec / test_iter, cum_rec / test_iter, cum_size / test_iter) return (cum_loss / test_iter, cum_prec / test_iter, cum_rec / test_iter, cum_size / test_iter)
def sample(self): words, context, ixs, target = self.generate() input_x = torch.cat([words, context], dim=1) kernel = self.kernel_net(input_x) vals, vecs = custom_decomp()(kernel) subset = DPP()(vals, vecs) pick = subset.diag().mm(words).sum(0, keepdim=True) self.pred = self.pred_net(pick).squeeze() loss = nn.BCELoss()(self.pred, target) # Classification Accuracy label = torch.round(self.pred.data) acc = ((label == target.data).sum() / self.aspects_n) # Subset Statistics precision, recall, set_size = self.assess(subset.data, ixs) # Print print('Target is: ', target.data) print('Pred is: ', self.pred.data) print('Loss is:', loss.data[0]) print('Acc is:', ((label == target.data).sum() / self.aspects_n)) print('Subset is:', subset.data) print('Ix is:', ixs) print('Subset statistics are:', precision, recall, set_size) return words, context, ixs, target, self.pred, loss, subset
def evaluate(self, test_iter): cum_loss = 0 cum_acc = 0 cum_prec = 0 cum_rec = 0 cum_size = 0 for t in range(test_iter): words, context, ixs, target = self.generate() input_x = torch.cat([words, context], dim=1) kernel = self.kernel_net(input_x) vals, vecs = custom_decomp()(kernel) subset = DPP()(vals, vecs) pick = subset.diag().mm(words).sum(0, keepdim=True) self.pred = self.pred_net(pick).squeeze() loss = nn.BCELoss()(self.pred, target) # Classification Accuracy label = torch.round(self.pred.data) cum_acc += ((label == target.data).sum() / self.aspects_n) # Subset Statistics precision, recall, set_size = self.assess(subset.data, ixs) cum_loss += loss.data[0] cum_prec += precision cum_rec += recall cum_size += set_size print('Loss:', cum_loss / test_iter, 'Pred Acc:', cum_acc / test_iter, 'Precision:', cum_prec / test_iter, 'Recall:', cum_rec / test_iter, 'Set Size:', cum_size / test_iter)
def forward(self, kernels, words): # need to set beforehand! s_ix = self.s_ix e_ix = self.e_ix self.exp_sizes = [] assert s_ix != None and e_ix != None alpha_iter = self.alpha_iter batch_size = len(s_ix) embd_dim = words.size(1) output = [] lengths = [] actions = self.saved_subsets = [[] for i in range(batch_size)] for i, (s, e) in enumerate(zip(s_ix, e_ix)): V = kernels[s:e] word = words[s:e] vals, vecs = custom_decomp()( V) # check if gradients work for non-square matrix exp_size = (vals / (1 + vals)).pow(2).sum() for j in range(alpha_iter): while True: # to avoid zero subsets, problematic as not unbiased anymore, temp fix. # subset = AllInOne()(V) # scrap this after gradient check! subset = DPP()(vals, vecs) if subset.data.sum() >= 1: break else: print("Zero Subset was produced. Re-sample") continue actions[i].append(subset) pick = subset.diag().mm( word ) # but this creates zero rows, however it links the two graphs :) pick = pick.masked_select( Variable(subset.data.byte().expand_as(pick.t())).t()) pick = pick.view(-1, embd_dim) output.append(pick) lengths.append(pick.size(0)) self.exp_sizes.append(exp_size) output = torch.cat(output, dim=0) cum_lengths = list(accumulate(lengths)) self.s_ix = [ix1 - ix2 for (ix1, ix2) in zip(cum_lengths, lengths)] self.e_ix = cum_lengths return output
def run(self, words, context, batch_size, alpha_iter): """ This may be used by sample and by evaluate. Samples once from DPP. Can be used with any batch_size. Returns a tensor of many subsets """ set_size = self.set_size kernel_in = self.kernel_in kernel_out = self.kernel_out actions = self.saved_subsets = [[] for i in range(batch_size)] rewards = self.saved_losses = [[] for i in range(batch_size)] cum_loss = 0. # Concatenate individual words and set context # Dimensions are batch_size x set_size x kernel_in batch_x = Variable(torch.cat([words, context], dim = 2)) # Compute embedding of DPP kernel batch_kernel = self.kernel_net(batch_x.view(-1, kernel_in)) batch_kernel = batch_kernel.view(-1, set_size, kernel_out) for i, kernel in enumerate(batch_kernel): vals, vecs = custom_decomp()(kernel) for j in range(alpha_iter): subset = DPP()(vals, vecs) actions[i].append(subset) subset_tensor = torch.stack([torch.stack(subsets) for subsets in actions]) return subset_tensor
def __call__(self, kernels, batched_words): batch_size, set_size, kernel_dim = kernels.size() batch_size, set_size, embd_dim = batched_words.size() alpha_iter = self.alpha_iter self.exp_sizes = exp_sizes = [] self.saved_subsets = actions = [[] for i in range(batch_size)] self.saved_picks = picks = [[] for i in range(batch_size)] for i, (kernel, words) in enumerate(zip(kernels, batched_words)): vals, vecs = custom_decomp()(kernel) exp_size = (vals / (1 + vals)).sum() exp_sizes.append(exp_size) for j in range(alpha_iter): while True: # to avoid zero subsets, problematic as not unbiased anymore, temp fix. subset = DPP()(vals, vecs) if subset.data.sum() >= 1: break else: print("Zero Subset was produced. Re-sample") continue actions[i].append(subset) pick = words.masked_select( Variable(subset.data.byte().expand_as(words.t())).t()) pick = pick.view(-1, embd_dim) picks[i].append(pick) return picks
def train(self, train_iter, batch_size, lr, alpha_iter=1, baseline=True): """ Training the model. Doesn't use the forward pass as want to sample repeatedly! """ set_size = self.set_size kernel_in = self.kernel_in kernel_out = self.kernel_out loss_log = 100 optimizer = optim.Adam(self.kernel_net.parameters(), lr=lr) for t in range(train_iter): actions = self.saved_subsets = [[] for i in range(batch_size)] rewards = self.saved_losses = [[] for i in range(batch_size)] cum_loss = 0. words, context, target = self.generate(batch_size) # Concatenate individual words and set context # Dimensions are batch_size x set_size x kernel_in batch_x = Variable(torch.cat([words, context], dim = 2)) # Compute embedding of DPP kernel batch_kernel = self.kernel_net(batch_x.view(-1, kernel_in)) batch_kernel = batch_kernel.view(-1, set_size, kernel_out) for i, kernel in enumerate(batch_kernel): vals, vecs = custom_decomp()(kernel) for j in range(alpha_iter): subset = DPP()(vals, vecs) actions[i].append(subset) loss, _, _, _, _ = self._assess(target[i], subset.data) rewards[i].append(loss) cum_loss += loss if baseline: self.saved_baselines = [compute_baseline(i) for i in self.saved_losses] else: self.saved_baselines = self.saved_losses # Register the baselines for actions, rewards in zip(self.saved_subsets, self.saved_baselines): for action, reward in zip(actions, rewards): action.reinforce(reward) pseudo_loss = torch.stack([torch.stack(subsets) for subsets in self.saved_subsets]).sum() pseudo_loss.backward(None) optimizer.step() optimizer.zero_grad() self.loss_dict[t].append(cum_loss / (batch_size * alpha_iter)) if not ((t + 1) % loss_log): print("Loss at it ", t+1, " is: ", cum_loss / (batch_size * alpha_iter))
def sample(self): words, context, target, cluster_iden, ixs = self.generate() input_x = torch.cat([words, context], dim=1) kernel = self.kernel_net(input_x) vals, vecs = custom_decomp()(kernel) subset = DPP()(vals, vecs) pick = subset.diag().mm(words).sum(0, keepdim=True) self.pred = self.pred_net(pick).squeeze() loss = nn.MSELoss()(self.pred, target) # Subset Statistics precision, recall, set_size = self.assess(subset.data, ixs) # Print print('Target is: ', target.data) print('Pred is: ', self.pred.data) print('Loss is:', loss.data[0]) print('Subset is:', subset.data) print('Ix is:', ixs) print('Subset statistics are:', precision, recall, set_size)
def evaluate(self, test_iter): set_size = self.set_size kernel_in = self.kernel_in kernel_out = self.kernel_out set_size = self.set_size n_clusters = self.n_clusters kernel_in = self.kernel_in kernel_out = self.kernel_out embd_dim = self.pred_in dtype = self.dtype criterion = self.criterion cum_loss = 0. cum_prec = 0. cum_rec = 0. cum_ssize = 0. n_missed = 0. n_one = 0 n_many = 0. n_perfect = 0. mean = 0. temp = 0. var = 0. criterion = self.criterion batch_size = 1 embd_dim = self.pred_in for t in range(test_iter): picks = [[] for i in range(batch_size)] words, context, target, index = self.generate(batch_size) # Concatenate individual words and set context # Dimensions are batch_size x set_size x kernel_in batch_x = Variable(torch.cat([words, context], dim = 2)) # Compute embedding of DPP kernel batch_kernel = self.kernel_net(batch_x.view(-1, kernel_in)) batch_kernel = batch_kernel.view(-1, set_size, kernel_out) for i, kernel in enumerate(batch_kernel): vals, vecs = custom_decomp()(kernel) subset = DPP()(vals, vecs) pick = subset.diag().mm(Variable(words[i])).sum(0, keepdim=True) picks[i].append(pick) _, prec, rec, ssize = self._big_assess(index[i], subset.data) missed, one, many, perfect = self._eval_assess(index, subset.data) cum_prec += prec cum_rec += rec cum_ssize += ssize n_missed += missed n_one += one n_many += many n_perfect += perfect picks = torch.stack([torch.stack(pick) for pick in picks]).view(-1, embd_dim) preds = self.pred_net(picks).view(1, -1) targets = target.unsqueeze(0).expand_as(preds) loss = criterion(preds, Variable(targets, volatile=True)) cum_loss += loss.data[0] delta = ssize - mean mean += delta / (t+1) delta2 = ssize - mean temp += delta * delta2 var = temp / test_iter print("Average Subset Size: ", mean) print("Subset Variance: ", var) print("Average Loss", cum_loss / test_iter) print("n_missed share", n_missed / (test_iter * self.n_clusters)) print("n_one share", n_one / (test_iter * self.n_clusters)) print("n_many share", n_many / (test_iter * self.n_clusters)) print("n_perfect share", n_perfect/ test_iter)
# Concatenate individual words and set context # Dimensions are batch_size x set_size x kernel_in batch_x = Variable(torch.cat([words, context], dim = 2)) # Compute embedding of DPP kernel batch_kernel = self.kernel_net(batch_x.view(-1, kernel_in)) batch_kernel = batch_kernel.view(-1, set_size, kernel_out) if reg: reg_loss = 0 for i, kernel in enumerate(batch_kernel): vals, vecs = custom_decomp()(kernel) for j in range(alpha_iter): subset = DPP()(vals, vecs) pick = subset.diag().mm(.a(words[i])).sum(0, keepdim=True) actions[i].append(subset) picks[i].append(pick) _, prec, rec, ssize = self._big_assess(index[i], subset.data) cum_prec += prec cum_rec += rec cum_ssize += ssize if reg: exp_ssize = (vals / (1 + vals)).sum() reg_loss += reg * (exp_ssize - reg_mean)**2 picks = torch.stack([torch.stack(pick) for pick in picks]).view(-1, embd_dim) preds = self.pred_net(picks).view(batch_size, alpha_iter, -1)
def train(self, train_steps, batch_size=1, sample_iter=1, lr=1e-3, baseline=False, reg=0, reg_mean=0): if baseline: assert sample_iter > 1 params = [{ 'params': self.kernel_net.parameters() }, { 'params': self.pred_net.parameters() }] optimizer = optim.Adam(params, lr=lr) train_iter = train_steps * batch_size cum_loss = 0 cum_prec = 0 cum_rec = 0 cum_size = 0 for t in range(train_iter): actions = self.saved_subsets = [] rewards = self.saved_losses = [] picks = [] words, context, ixs, target = self.generate() input_x = torch.cat([words, context], dim=1) kernel = self.kernel_net(input_x) vals, vecs = custom_decomp()(kernel) pred_loss = 0 for j in range(sample_iter): subset = DPP()(vals, vecs) actions.append(subset) pick = subset.diag().mm(words).sum(0, keepdim=True) self.pred = self.pred_net(pick).squeeze() loss = nn.BCELoss()(self.pred, target) rewards.append(loss.data[0]) pred_loss += loss # For the statistics precision, recall, set_size = self.assess(subset.data, ixs) cum_loss += loss.data[0] cum_prec += precision cum_rec += recall cum_size += set_size # Compute baselines if baseline: self.saved_baselines = compute_baseline(self.saved_losses) else: self.saved_baselines = self.saved_losses # Register rewards for action, reward in zip(self.saved_subsets, self.saved_baselines): action.reinforce(reward) # Apply Regularization total_loss = pred_loss if reg: card = (vals / (1 + vals)).sum() reg_loss = sample_iter * reg * ((card - reg_mean)**2) total_loss += reg_loss total_loss.backward() if not ((t + 1) % batch_size): optimizer.step() optimizer.zero_grad() if not ((t + 1) % (batch_size * 100)): print(cum_loss / (batch_size * sample_iter)) self.loss_dict[self.counter].append(cum_loss / (batch_size * sample_iter)) self.prec_dict[self.counter].append(cum_prec / (batch_size * sample_iter)) self.rec_dict[self.counter].append(cum_rec / (batch_size * sample_iter)) self.ssize_dict[self.counter].append( cum_size / (batch_size * sample_iter)) self.counter += 1 cum_loss = 0 cum_prec = 0 cum_rec = 0 cum_size = 0