def __call__(self, pred, target, action_list, exp_sizes): batch_size, alpha_iter, target_dim = pred.size() target = target.unsqueeze(1).expand_as(pred) losses = (pred - target).pow(2).mean(2, keepdim=True) self.saved_losses = [[i.data[0] for i in row] for row in losses] self.saved_baselines = [compute_baseline(i) for i in self.saved_losses] for actions, rewards in zip(action_list, self.saved_baselines): for action, reward in zip(actions, rewards): action.reinforce(reward) if self.reg: reg_loss = self.reg * ( (torch.stack(exp_sizes) - self.reg_mean)**2).sum() reg_loss.backward(retain_variables=True) # backpropagate through kernel network pseudo_loss = torch.stack( [torch.stack(actions) for actions in action_list]).sum() pseudo_loss.backward(None) # backpropgate through prediction network loss.backward() return loss
def train(self, train_iter, batch_size, lr, alpha_iter=1, baseline=True): """ Training the model. Doesn't use the forward pass as want to sample repeatedly! """ set_size = self.set_size kernel_in = self.kernel_in kernel_out = self.kernel_out loss_log = 100 optimizer = optim.Adam(self.kernel_net.parameters(), lr=lr) for t in range(train_iter): actions = self.saved_subsets = [[] for i in range(batch_size)] rewards = self.saved_losses = [[] for i in range(batch_size)] cum_loss = 0. words, context, target = self.generate(batch_size) # Concatenate individual words and set context # Dimensions are batch_size x set_size x kernel_in batch_x = Variable(torch.cat([words, context], dim = 2)) # Compute embedding of DPP kernel batch_kernel = self.kernel_net(batch_x.view(-1, kernel_in)) batch_kernel = batch_kernel.view(-1, set_size, kernel_out) for i, kernel in enumerate(batch_kernel): vals, vecs = custom_decomp()(kernel) for j in range(alpha_iter): subset = DPP()(vals, vecs) actions[i].append(subset) loss, _, _, _, _ = self._assess(target[i], subset.data) rewards[i].append(loss) cum_loss += loss if baseline: self.saved_baselines = [compute_baseline(i) for i in self.saved_losses] else: self.saved_baselines = self.saved_losses # Register the baselines for actions, rewards in zip(self.saved_subsets, self.saved_baselines): for action, reward in zip(actions, rewards): action.reinforce(reward) pseudo_loss = torch.stack([torch.stack(subsets) for subsets in self.saved_subsets]).sum() pseudo_loss.backward(None) optimizer.step() optimizer.zero_grad() self.loss_dict[t].append(cum_loss / (batch_size * alpha_iter)) if not ((t + 1) % loss_log): print("Loss at it ", t+1, " is: ", cum_loss / (batch_size * alpha_iter))
def forward(self, words, target): kernel, words = self.kernel_net( words) # returned words are masked now! self.sampler.s_ix = self.kernel_net.s_ix self.sampler.e_ix = self.kernel_net.e_ix weighted_words = self.sampler(kernel, words) self.pred_net.s_ix = self.sampler.s_ix self.pred_net.e_ix = self.sampler.e_ix self.pred = self.pred_net(weighted_words) if self.activation: self.pred = self.activation(self.pred) self.pred_loss = self.criterion(self.pred, target) if self.reg: perc_extract = (torch.stack(self.sampler.exp_sizes) / Variable(self.kernel_net.lengths.squeeze(2).data)) self.reg_loss = self.reg * (perc_extract - (self.reg_mean / 100)).pow(2).mean() self.loss = self.pred_loss + self.reg_loss else: self.loss = self.pred_loss # add computation of baselines and registering rewards here!! losses = (self.pred - target).pow(2).view(-1, self.alpha_iter, target.size(-1)).mean( 2, keepdim=True) self.saved_losses = [[i.data[0] for i in row] for row in losses] if self.alpha_iter > 1: self.saved_baselines = [ compute_baseline(i) for i in self.saved_losses ] else: self.saved_baselines = self.saved_losses self.saved_subsets = self.sampler.saved_subsets #print("executed") #print('We have so many subsets: ',len(self.saved_subsets)) for actions, rewards in zip(self.saved_subsets, self.saved_baselines): for action, reward in zip(actions, rewards): action.reinforce(reward) return self.loss
if reg: exp_ssize = (vals / (1 + vals)).sum() reg_loss += reg * (exp_ssize - reg_mean)**2 picks = torch.stack([torch.stack(pick) for pick in picks]).view(-1, embd_dim) preds = self.pred_net(picks).view(batch_size, alpha_iter, -1) targets = target.unsqueeze(1).expand_as(preds) loss = criterion(preds, Variable(targets)) # Compute indivdual losses and baseline losses = ((preds - Variable(targets))**2).mean(2) self.saved_losses = [[i.data[0] for i in row] for row in losses] if baseline: self.saved_baselines = [compute_baseline(i) for i in self.saved_losses] else: self.saved_baselines = self.saved_losses # Register the baselines for actions, rewards in zip(self.saved_subsets, self.saved_baselines): for action, reward in zip(actions, rewards): action.reinforce(reward) if reg: (loss + reg_loss).backward() else: loss.backward(None) optimizer.step() optimizer.zero_grad()
def train(self, train_steps, batch_size=1, sample_iter=1, lr=1e-3, baseline=False, reg=0, reg_mean=0): if baseline: assert sample_iter > 1 params = [{ 'params': self.kernel_net.parameters() }, { 'params': self.pred_net.parameters() }] optimizer = optim.Adam(params, lr=lr) train_iter = train_steps * batch_size cum_loss = 0 cum_prec = 0 cum_rec = 0 cum_size = 0 for t in range(train_iter): actions = self.saved_subsets = [] rewards = self.saved_losses = [] picks = [] words, context, ixs, target = self.generate() input_x = torch.cat([words, context], dim=1) kernel = self.kernel_net(input_x) vals, vecs = custom_decomp()(kernel) pred_loss = 0 for j in range(sample_iter): subset = DPP()(vals, vecs) actions.append(subset) pick = subset.diag().mm(words).sum(0, keepdim=True) self.pred = self.pred_net(pick).squeeze() loss = nn.BCELoss()(self.pred, target) rewards.append(loss.data[0]) pred_loss += loss # For the statistics precision, recall, set_size = self.assess(subset.data, ixs) cum_loss += loss.data[0] cum_prec += precision cum_rec += recall cum_size += set_size # Compute baselines if baseline: self.saved_baselines = compute_baseline(self.saved_losses) else: self.saved_baselines = self.saved_losses # Register rewards for action, reward in zip(self.saved_subsets, self.saved_baselines): action.reinforce(reward) # Apply Regularization total_loss = pred_loss if reg: card = (vals / (1 + vals)).sum() reg_loss = sample_iter * reg * ((card - reg_mean)**2) total_loss += reg_loss total_loss.backward() if not ((t + 1) % batch_size): optimizer.step() optimizer.zero_grad() if not ((t + 1) % (batch_size * 100)): print(cum_loss / (batch_size * sample_iter)) self.loss_dict[self.counter].append(cum_loss / (batch_size * sample_iter)) self.prec_dict[self.counter].append(cum_prec / (batch_size * sample_iter)) self.rec_dict[self.counter].append(cum_rec / (batch_size * sample_iter)) self.ssize_dict[self.counter].append( cum_size / (batch_size * sample_iter)) self.counter += 1 cum_loss = 0 cum_prec = 0 cum_rec = 0 cum_size = 0
def forward(self, reviews, target): batch_size = target.size(0) alpha_iter = self.alpha_iter try: target_dim = target.size(1) except RuntimeError: target = target.unsqueeze(1) target_dim = target.size(1) print(target_dim) target = target.unsqueeze(1).expand(batch_size, alpha_iter, target_dim).contiguous().view( batch_size * alpha_iter, target_dim) print(target.size()) words = self.embd(reviews) kernel, words = self.kernel_net(words) self.sampler.s_ix = self.kernel_net.s_ix self.sampler.e_ix = self.kernel_net.e_ix weighted_words = self.sampler(kernel, words) self.pred_net.s_ix = self.sampler.s_ix self.pred_net.e_ix = self.sampler.e_ix self.pred = self.pred_net(weighted_words) if self.activation: self.pred = self.activation(self.pred) self.pred_loss = self.criterion(self.pred, target) if self.reg: self.reg_loss = self.reg * (torch.stack(self.sampler.exp_sizes) - self.reg_mean).pow(2).mean() self.loss = self.pred_loss + self.reg_loss # print("reg_loss is:", reg_loss.data[0]) else: self.loss = self.pred_loss # add computation of baselines and registering reawards here!! losses = (self.pred - target).pow(2).view( batch_size, alpha_iter, target_dim).mean(2, keepdim=True) self.saved_losses = [[i.data[0] for i in row] for row in losses] if self.alpha_iter > 1: self.saved_baselines = [ compute_baseline(i) for i in self.saved_losses ] else: self.saved_baselines = self.saved_losses self.saved_subsets = self.sampler.saved_subsets for actions, rewards in zip(self.saved_subsets, self.saved_baselines): for action, reward in zip(actions, rewards): action.reinforce(reward) return self.loss