def train(doc_lstm_model,
          attn_model,
          scoring_model,
          optimizer,
          words_set,
          markable_set,
          feats,
          word_limit,
          epochs=2,
          margin=1.0,
          use_cuda=False):
    if not use_cuda:
        _zero = ag.Variable(torch.Tensor([0]))
    else:
        _zero = ag.Variable(torch.cuda.FloatTensor([0]))
        doc_lstm_model.to_cuda()
        attn_model.to_cuda()
        scoring_model.to_cuda()
    for ep in range(epochs):
        tot_loss = 0.0
        instances = 0
        doc_losses = []
        for words, marks in zip(words_set, markable_set):
            words = words[:word_limit]
            marks = [m for m in marks if m.end_token < word_limit]
            optimizer.zero_grad()
            doc_lstm_model.clear_hidden_state()

            if not use_cuda:
                loss = ag.Variable(torch.FloatTensor([0.0]))
            else:
                loss = ag.Variable(torch.cuda.FloatTensor([0.0]))

            base_embs = doc_lstm_model(words)
            att_embs = [attn_model(base_embs, m) for m in marks]
            true_ants = coref.get_true_antecedents(marks)
            for i in range(len(marks)):
                max_t, max_f = scoring_model.instance_top_scores(
                    att_embs, marks, i, true_ants[i], feats)
                if max_t is None: continue

                if not use_cuda:
                    marg = ag.Variable(torch.Tensor([margin])) - max_t + max_f
                else:
                    marg = ag.Variable(torch.cuda.FloatTensor(
                        [margin])) - max_t + max_f

                loss += torch.max(torch.cat((_zero, marg)))
            instances += len(marks)
            sc_loss = utils.to_scalar(loss)
            tot_loss += sc_loss
            doc_losses.append(f'{sc_loss / len(marks):.5f}')
            loss.backward()
            optimizer.step()
        print(
            f'Epoch {ep+1} complete.\nDocument losses = {", ".join(doc_losses)}'
        )
        print(f'Overall loss = {tot_loss / instances:.5f}')
예제 #2
0
def train(data, model, optimizer, verbose=True):
    criterion = nn.NLLLoss()

    if model.use_cuda:
        criterion.cuda()

    correct_actions = 0
    total_actions = 0
    tot_loss = 0.
    instance_count = 0

    for sentence, actions in data:

        if len(sentence) <= 2:
            continue

        optimizer.zero_grad()
        model.refresh()

        outputs, _, actions_done = model(sentence, actions)

        if model.use_cuda:
            loss = ag.Variable(cuda.FloatTensor([0]))
            action_idxs = [
                ag.Variable(cuda.LongTensor([a])) for a in actions_done
            ]
        else:
            loss = ag.Variable(torch.FloatTensor([0]))
            action_idxs = [
                ag.Variable(torch.LongTensor([a])) for a in actions_done
            ]

        for output, act in zip(outputs, action_idxs):
            loss += criterion(output.view(-1, 3), act)

        tot_loss += utils.to_scalar(loss.data)
        instance_count += 1

        for gold, output in zip(actions_done, outputs):
            pred_act = utils.argmax(output.data)
            if pred_act == gold:
                correct_actions += 1
        total_actions += len(outputs)

        loss.backward()
        optimizer.step()

    acc = float(correct_actions) / total_actions
    loss = float(tot_loss) / instance_count
    if verbose:
        print(
            "Number of instances: {}    Number of network actions: {}".format(
                instance_count, total_actions))
        print("Acc: {}  Loss: {}".format(
            float(correct_actions) / total_actions, tot_loss / instance_count))
예제 #3
0
def evaluate(data, model, verbose=False):

    correct_actions = 0
    total_actions = 0
    tot_loss = 0.
    instance_count = 0
    criterion = nn.NLLLoss()

    if model.use_cuda:
        criterion.cuda()

    for sentence, actions in data:

        if len(sentence) > 1:
            outputs, _, actions_done = model(sentence, actions)

            if model.use_cuda:
                loss = ag.Variable(cuda.FloatTensor([0]))
                action_idxs = [
                    ag.Variable(cuda.LongTensor([a])) for a in actions_done
                ]
            else:
                loss = ag.Variable(torch.FloatTensor([0]))
                action_idxs = [
                    ag.Variable(torch.LongTensor([a])) for a in actions_done
                ]

            for output, act in zip(outputs, action_idxs):
                loss += criterion(output.view((-1, 3)), act)

            tot_loss += utils.to_scalar(loss.data)
            instance_count += 1

            for gold, output in zip(actions_done, outputs):
                pred_act = utils.argmax(output.data)
                if pred_act == gold:
                    correct_actions += 1

            total_actions += len(outputs)

    acc = float(correct_actions) / total_actions
    loss = float(tot_loss) / instance_count
    if verbose:
        print(
            "Number of instances: {}    Number of network actions: {}".format(
                instance_count, total_actions))
        print("Acc: {}  Loss: {}".format(
            float(correct_actions) / total_actions, tot_loss / instance_count))
    return acc, loss