def run_epoch(data, is_training, model, optimizer): ''' Train model for one pass of train data, and return loss, acccuracy ''' data_loader = torch.utils.data.DataLoader(data, batch_size=20, shuffle=True, num_workers=4, drop_last=False) losses = [] if is_training: model.train() else: model.eval() for batch in data_loader: pid_title = torch.unsqueeze(Variable(batch['pid_title']), 1) pid_body = torch.unsqueeze(Variable(batch['pid_body']), 1) rest_title = Variable(batch['rest_title']) rest_body = Variable(batch['rest_body']) pid_title_pad = torch.unsqueeze(Variable(batch['pid_title_pad']), 1) pid_body_pad = torch.unsqueeze(Variable(batch['pid_body_pad']), 1) rest_title_pad = Variable(batch['rest_title_pad']) rest_body_pad = Variable(batch['rest_body_pad']) pid_title, pid_body = pid_title.cuda(), pid_body.cuda() rest_title, rest_body = rest_title.cuda(), rest_body.cuda() pid_title_pad, pid_body_pad = pid_title_pad.cuda(), pid_body_pad.cuda() rest_title_pad, rest_body_pad = rest_title_pad.cuda( ), rest_body_pad.cuda() if is_training: optimizer.zero_grad() pt = model(pid_title) pb = model(pid_body) rt = model(rest_title) rb = model(rest_body) # we need to take the mean pooling taking into account the padding # tensors are of dim batch_size x samples x output_size x (len - kernel + 1) # pad tensors are of dim batch_size x samples x (len - kernel + 1) pid_title_pad_ex = torch.unsqueeze(pid_title_pad, 2).expand_as(pt) pid_body_pad_ex = torch.unsqueeze(pid_body_pad, 2).expand_as(pb) rest_title_pad_ex = torch.unsqueeze(rest_title_pad, 2).expand_as(rt) rest_body_pad_ex = torch.unsqueeze(rest_body_pad, 2).expand_as(rb) pt = torch.squeeze(torch.sum(pt * pid_title_pad_ex, dim=3), dim=3) pb = torch.squeeze(torch.sum(pb * pid_body_pad_ex, dim=3), dim=3) rt = torch.squeeze(torch.sum(rt * rest_title_pad_ex, dim=3), dim=3) rb = torch.squeeze(torch.sum(rb * rest_body_pad_ex, dim=3), dim=3) # tensors are not of dim batch_size x samples x output_size # need to scale down because not all uniformly padded ptp_norm = torch.sum(pid_title_pad, dim=2).clamp(min=1).expand_as(pt) pbp_norm = torch.sum(pid_body_pad, dim=2).clamp(min=1).expand_as(pb) rtp_norm = torch.sum(rest_title_pad, dim=2).clamp(min=1).expand_as(rt) rbp_norm = torch.sum(rest_body_pad, dim=2).clamp(min=1).expand_as(rb) pt = pt / ptp_norm pb = pb / pbp_norm rt = rt / rtp_norm rb = rb / rbp_norm pid_tensor = (pt + pb) / 2 rest_tensor = (rt + rb) / 2 if is_training: loss = loss_function(pid_tensor, rest_tensor) loss.backward() losses.append(loss.cpu().data[0]) optimizer.step() else: expanded = pid_tensor.expand_as(rest_tensor) similarity = cs(expanded, rest_tensor, dim=2).squeeze(2) similarity = similarity.data.cpu().numpy() labels = batch['labels'].numpy() l = convert(similarity, labels) losses.extend(l) # Calculate epoch level scores if is_training: avg_loss = np.mean(losses) return avg_loss else: e = Evaluation(losses) MAP = e.MAP() * 100 MRR = e.MRR() * 100 P1 = e.Precision(1) * 100 P5 = e.Precision(5) * 100 return (MAP, MRR, P1, P5)
def run_epoch(data, is_training, model, optimizer, transfer=False): # Make batches data_loader = torch.utils.data.DataLoader( data, batch_size=10, shuffle=True, num_workers=4, drop_last=False) losses = [] actual = [] expected = [] if is_training: model.train() else: model.eval() for batch in data_loader: # Unpack training instances pid_title = torch.unsqueeze(Variable(batch['pid_title']), 1).cuda() # Size: batch_size x 1 x title_length=40 pid_title_mask = torch.unsqueeze(Variable(batch['pid_title_mask']), 1).cuda() # Size: batch_size x 1 x title_length=40 pid_body = torch.unsqueeze(Variable(batch['pid_body']), 1).cuda() # Size: batch_size x 1 x body_length=100 pid_body_mask = torch.unsqueeze(Variable(batch['pid_body_mask']), 1).cuda() # Size: batch_size x 1 x body_length=100 candidate_title = Variable(batch['candidate_titles']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40 candidate_title_mask = Variable(batch['candidate_titles_mask']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40 candidate_body = Variable(batch['candidate_body']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=100 candidate_body_mask = Variable(batch['candidate_body_mask']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=40 if is_training: optimizer.zero_grad() # Run text through model pid_title = model(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN) pid_body = model(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN) candidate_title = model(candidate_title) # batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN) candidate_body = model(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN) pid_title_mask = torch.unsqueeze(pid_title_mask, 2).expand_as(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN) pid_body_mask = torch.unsqueeze(pid_body_mask, 2).expand_as(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN) candidate_title_mask = torch.unsqueeze(candidate_title_mask, 2).expand_as(candidate_title)# batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN) candidate_body_mask = torch.unsqueeze(candidate_body_mask, 2).expand_as(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN) good_title = torch.sum(pid_title * pid_title_mask, 3) # batch_size x 1 x output_size=500 good_body = torch.sum(pid_body * pid_body_mask, 3) # batch_size x 1 x output_size=500 cand_titles = torch.sum(candidate_title * candidate_title_mask, 3) # batch_size x # candidates (21 in training) x output_size=500 cand_bodies = torch.sum(candidate_body * candidate_body_mask, 3) # batch_size x # candidates (21 in training) x output_size=500 good_tensor = (good_title + good_body)/2 # batch_size x 1 x output_size=500 cand_tensor = (cand_titles + cand_bodies)/2 # batch_size x # candidates (21 in training) x output_size=500 if is_training: l = loss(good_tensor, cand_tensor, 1.0) l.backward() losses.append(l.cpu().data[0]) optimizer.step() else: similarity = cosine_sim(good_tensor.expand_as(cand_tensor), cand_tensor, dim=2) if transfer: similarity = torch.FloatTensor(similarity.data.cpu().numpy()) else: similarity = similarity.data.cpu().numpy() if transfer: labels = batch['labels'] else: labels = batch['labels'].numpy() def predict(sim, labels): predictions = [] for i in range(sim.shape[0]): sorted_cand = (-sim[i]).argsort() predictions.append(labels[i][sorted_cand]) return predictions if transfer: for sim in similarity: actual.append(sim) expected.extend(labels.view(-1)) else: l = predict(similarity, labels) losses.extend(l) if is_training: avg_loss = np.mean(losses) return avg_loss else: if transfer: auc = AUCMeter() auc.reset() auc.add(torch.cat(actual), torch.LongTensor(expected)) return auc.value(max_fpr=0.05) else: e = Evaluation(losses) MAP = e.MAP()*100 MRR = e.MRR()*100 P1 = e.Precision(1)*100 P5 = e.Precision(5)*100 return (MAP, MRR, P1, P5)