ans_offset[:, 0]).cuda() if use_cuda else Variable(ans_offset[:, 0]) end_ans = Variable(ans_offset[:, 1]).cuda() if use_cuda else Variable( ans_offset[:, 1]) appear = Variable(appear).cuda() if use_cuda else Variable(appear) start, end, start_attn, end_attn = fusion_net(context, q, appear) loss = criterion(start_attn, start_ans) + criterion( end_attn, end_ans) loss.backward() nn.utils.clip_grad_norm(fusion_net.parameters(), 10) optimizer.step() start, end, scores = decode(start.data.cpu(), end.data.cpu(), 1) f1_score, exact_match_score = batch_score(start, end, ans_offset) if batch % args.display_freq == 0: print( 'epoch: %d | batch: %d/%d| loss: %f | f1: %f | exact: %f' % (epoch, batch, len(train_engine), loss.data[0], f1_score, exact_match_score)) batch += 1 valid_f1, valid_exact = 0, 0 fusion_net.eval() for context, q, ans_offset, appear in valid_engine: context = Variable(context).cuda() if use_cuda else Variable( context) q = Variable(q).cuda() if use_cuda else Variable(q) start_ans = Variable(
fusion_net = torch.load('model.cpt') if use_cuda: fusion_net = fusion_net.cuda() fusion_net.eval() valid_f1, valid_exact = 0, 0 for context, q, ans_offset, appear in valid_engine: context = Variable(context).cuda() if use_cuda else Variable(context) q = Variable(q).cuda() if use_cuda else Variable(q) start_ans = Variable(ans_offset[:, 0]).cuda() if use_cuda else Variable(ans_offset[:, 0]) end_ans = Variable(ans_offset[:, 1]).cuda() if use_cuda else Variable(ans_offset[:, 1]) appear = Variable(appear).cuda() if use_cuda else Variable(appear) start, end, start_attn, end_attn = fusion_net(context, q, appear) start, end, scores = decode(start.data.cpu(), end.data.cpu(), 1, 20) f1_score, exact_match_score = batch_score(start, end, ans_offset) valid_f1 += f1_score valid_exact += exact_match_score print('valid_f1: %f | valid_exact: %f'%( valid_f1/len(valid_engine), valid_exact/len(valid_engine) )) f = open('predict.csv', 'w') f.write('id,answer\n') for i in tqdm(range(len(test_engine))): context, q, ans_offset, appear = test_engine[i] context = Variable(context).cuda() if use_cuda else Variable(context) q = Variable(q).cuda() if use_cuda else Variable(q) appear = Variable(appear).cuda() if use_cuda else Variable(appear) start, end, start_attn, end_attn = fusion_net(context.unsqueeze(0), q.unsqueeze(0), appear.unsqueeze(0))