def train(doc_lstm_model, attn_model, scoring_model, optimizer, words_set, markable_set, feats, word_limit, epochs=2, margin=1.0, use_cuda=False): if not use_cuda: _zero = ag.Variable(torch.Tensor([0])) else: _zero = ag.Variable(torch.cuda.FloatTensor([0])) doc_lstm_model.to_cuda() attn_model.to_cuda() scoring_model.to_cuda() for ep in range(epochs): tot_loss = 0.0 instances = 0 doc_losses = [] for words, marks in zip(words_set, markable_set): words = words[:word_limit] marks = [m for m in marks if m.end_token < word_limit] optimizer.zero_grad() doc_lstm_model.clear_hidden_state() if not use_cuda: loss = ag.Variable(torch.FloatTensor([0.0])) else: loss = ag.Variable(torch.cuda.FloatTensor([0.0])) base_embs = doc_lstm_model(words) att_embs = [attn_model(base_embs, m) for m in marks] true_ants = coref.get_true_antecedents(marks) for i in range(len(marks)): max_t, max_f = scoring_model.instance_top_scores( att_embs, marks, i, true_ants[i], feats) if max_t is None: continue if not use_cuda: marg = ag.Variable(torch.Tensor([margin])) - max_t + max_f else: marg = ag.Variable(torch.cuda.FloatTensor( [margin])) - max_t + max_f loss += torch.max(torch.cat((_zero, marg))) instances += len(marks) sc_loss = utils.to_scalar(loss) tot_loss += sc_loss doc_losses.append(f'{sc_loss / len(marks):.5f}') loss.backward() optimizer.step() print( f'Epoch {ep+1} complete.\nDocument losses = {", ".join(doc_losses)}' ) print(f'Overall loss = {tot_loss / instances:.5f}')
def train(data, model, optimizer, verbose=True): criterion = nn.NLLLoss() if model.use_cuda: criterion.cuda() correct_actions = 0 total_actions = 0 tot_loss = 0. instance_count = 0 for sentence, actions in data: if len(sentence) <= 2: continue optimizer.zero_grad() model.refresh() outputs, _, actions_done = model(sentence, actions) if model.use_cuda: loss = ag.Variable(cuda.FloatTensor([0])) action_idxs = [ ag.Variable(cuda.LongTensor([a])) for a in actions_done ] else: loss = ag.Variable(torch.FloatTensor([0])) action_idxs = [ ag.Variable(torch.LongTensor([a])) for a in actions_done ] for output, act in zip(outputs, action_idxs): loss += criterion(output.view(-1, 3), act) tot_loss += utils.to_scalar(loss.data) instance_count += 1 for gold, output in zip(actions_done, outputs): pred_act = utils.argmax(output.data) if pred_act == gold: correct_actions += 1 total_actions += len(outputs) loss.backward() optimizer.step() acc = float(correct_actions) / total_actions loss = float(tot_loss) / instance_count if verbose: print( "Number of instances: {} Number of network actions: {}".format( instance_count, total_actions)) print("Acc: {} Loss: {}".format( float(correct_actions) / total_actions, tot_loss / instance_count))
def evaluate(data, model, verbose=False): correct_actions = 0 total_actions = 0 tot_loss = 0. instance_count = 0 criterion = nn.NLLLoss() if model.use_cuda: criterion.cuda() for sentence, actions in data: if len(sentence) > 1: outputs, _, actions_done = model(sentence, actions) if model.use_cuda: loss = ag.Variable(cuda.FloatTensor([0])) action_idxs = [ ag.Variable(cuda.LongTensor([a])) for a in actions_done ] else: loss = ag.Variable(torch.FloatTensor([0])) action_idxs = [ ag.Variable(torch.LongTensor([a])) for a in actions_done ] for output, act in zip(outputs, action_idxs): loss += criterion(output.view((-1, 3)), act) tot_loss += utils.to_scalar(loss.data) instance_count += 1 for gold, output in zip(actions_done, outputs): pred_act = utils.argmax(output.data) if pred_act == gold: correct_actions += 1 total_actions += len(outputs) acc = float(correct_actions) / total_actions loss = float(tot_loss) / instance_count if verbose: print( "Number of instances: {} Number of network actions: {}".format( instance_count, total_actions)) print("Acc: {} Loss: {}".format( float(correct_actions) / total_actions, tot_loss / instance_count)) return acc, loss