Beispiel #1
0
def parallel_train_1_epoc(srl_model,
                          criterion,
                          optimizer,
                          train_dataset,
                          labeled_dataset_fr,
                          batch_size,
                          word2idx,
                          fr_word2idx,
                          lemma2idx,
                          pos2idx,
                          pretrain2idx,
                          fr_pretrain2idx,
                          deprel2idx,
                          argument2idx,
                          idx2word,
                          shuffle=False,
                          lang='En',
                          dev_best_score=None,
                          test_best_score=None,
                          test_ood_best_score=None):
    unlabeled_dataset_en, unlabeled_dataset_fr = train_dataset
    unlabeled_Generator_En = inter_utils.get_batch(unlabeled_dataset_en,
                                                   batch_size,
                                                   word2idx,
                                                   fr_word2idx,
                                                   lemma2idx,
                                                   pos2idx,
                                                   pretrain2idx,
                                                   fr_pretrain2idx,
                                                   deprel2idx,
                                                   argument2idx,
                                                   idx2word,
                                                   shuffle=False,
                                                   lang="En")

    for batch_i, unlabeled_data_fr in enumerate(
            inter_utils.get_batch(unlabeled_dataset_fr,
                                  batch_size,
                                  word2idx,
                                  fr_word2idx,
                                  lemma2idx,
                                  pos2idx,
                                  pretrain2idx,
                                  fr_pretrain2idx,
                                  deprel2idx,
                                  argument2idx,
                                  idx2word,
                                  shuffle=False,
                                  lang='Fr')):
        srl_model.train()
        unlabeled_data_en = unlabeled_Generator_En.next()

        predicates_1D = unlabeled_data_en['predicates_idx']
        predicates_1D_fr = unlabeled_data_fr['predicates_idx']
        #log(predicates_1D, predicates_1D_fr)
        u_loss = srl_model((unlabeled_data_en, unlabeled_data_fr),
                           lang='En',
                           unlabeled='True')
        optimizer.zero_grad()
        u_loss.backward()
        optimizer.step()

        if batch_i % 50 == 0:
            log(batch_i, u_loss)

        if batch_i % 500 == 0:
            log('\n')
            log('*' * 80)
            srl_model.eval()
            # eval_train_batch(epoch, batch_i, loss.data[0], flat_argument, pred, argument2idx)

            log('FR test:')
            score, dev_output = eval_data(srl_model,
                                          elmo,
                                          labeled_dataset_fr,
                                          30,
                                          word2idx,
                                          fr_word2idx,
                                          lemma2idx,
                                          pos2idx,
                                          pretrain2idx,
                                          fr_pretrain2idx,
                                          deprel2idx,
                                          argument2idx,
                                          idx2argument,
                                          idx2word,
                                          False,
                                          dev_predicate_correct,
                                          dev_predicate_sum,
                                          lang='Fr')

            if dev_best_score is None or score[5] > dev_best_score[5]:
                dev_best_score = score
                output_predict(
                    os.path.join(
                        result_path, 'dev_argument_{:.2f}.pred'.format(
                            dev_best_score[2] * 100)), dev_output)
                # torch.save(srl_model, os.path.join(os.path.dirname(__file__),'model/best_{:.2f}.pkl'.format(dev_best_score[2]*100)))
            log('\tdev best P:{:.2f} R:{:.2f} F1:{:.2f} NP:{:.2f} NR:{:.2f} NF1:{:.2f}'
                .format(dev_best_score[0] * 100, dev_best_score[1] * 100,
                        dev_best_score[2] * 100, dev_best_score[3] * 100,
                        dev_best_score[4] * 100, dev_best_score[5] * 100))
    return dev_best_score
Beispiel #2
0
                                                  lemma2idx,
                                                  pos2idx,
                                                  pretrain2idx,
                                                  fr_pretrain2idx,
                                                  deprel2idx,
                                                  argument2idx,
                                                  idx2argument,
                                                  idx2word,
                                                  False,
                                                  dev_predicate_correct,
                                                  dev_predicate_sum,
                                                  isPretrain=True)
                    if dev_best_score is None or score[5] > dev_best_score[5]:
                        dev_best_score = score
                        output_predict(
                            os.path.join(
                                result_path, 'dev_argument_{:.2f}.pred'.format(
                                    dev_best_score[2] * 100)), dev_output)
                        torch.save(srl_model.EN_Labeler.state_dict(),
                                   'Best_Pretrained_EN_Labeler.pkl')
                        log('Pretrained Model Saved!')
                    log('\tdev best P:{:.2f} R:{:.2f} F1:{:.2f} NP:{:.2f} NR:{:.2f} NF1:{:.2f}'
                        .format(dev_best_score[0] * 100,
                                dev_best_score[1] * 100,
                                dev_best_score[2] * 100,
                                dev_best_score[3] * 100,
                                dev_best_score[4] * 100,
                                dev_best_score[5] * 100))

        log("start adversarial training!")
        opt_D = optim.Adam(srl_model.Discriminator.parameters(), lr=0.001)
        opt_G = optim.Adam(srl_model.EN_Labeler.parameters(), lr=0.001)
Beispiel #3
0
def train_1_epoc(srl_model,
                 criterion,
                 optimizer,
                 train_dataset,
                 labeled_dataset_fr,
                 batch_size,
                 word2idx,
                 fr_word2idx,
                 lemma2idx,
                 pos2idx,
                 pretrain2idx,
                 fr_pretrain2idx,
                 deprel2idx,
                 argument2idx,
                 idx2word,
                 shuffle=False,
                 lang='En',
                 dev_best_score=None,
                 test_best_score=None,
                 test_ood_best_score=None):
    for batch_i, train_input_data in enumerate(
            inter_utils.get_batch(train_dataset,
                                  batch_size,
                                  word2idx,
                                  fr_word2idx,
                                  lemma2idx,
                                  pos2idx,
                                  pretrain2idx,
                                  fr_pretrain2idx,
                                  deprel2idx,
                                  argument2idx,
                                  idx2word,
                                  shuffle=shuffle,
                                  lang=lang)):

        flat_argument = train_input_data['flat_argument']
        target_batch_variable = get_torch_variable_from_np(flat_argument)

        out, out_word = srl_model(train_input_data, lang='En')
        loss = criterion(out, target_batch_variable)
        loss_word = criterion(out_word, target_batch_variable)
        if batch_i % 50 == 0:
            log(batch_i, loss, loss_word)

        optimizer.zero_grad()
        (loss + loss_word).backward()
        optimizer.step()

        if batch_i > 0 and batch_i % show_steps == 0:

            _, pred = torch.max(out, 1)

            pred = get_data(pred)

            # pred = pred.reshape([bs, sl])

            log('\n')
            log('*' * 80)

            eval_train_batch(epoch, batch_i, loss.data[0], flat_argument, pred,
                             argument2idx)

            log('FR test:')
            score, dev_output = eval_data(srl_model,
                                          elmo,
                                          labeled_dataset_fr,
                                          batch_size,
                                          word2idx,
                                          fr_word2idx,
                                          lemma2idx,
                                          pos2idx,
                                          pretrain2idx,
                                          fr_pretrain2idx,
                                          deprel2idx,
                                          argument2idx,
                                          idx2argument,
                                          idx2word,
                                          False,
                                          dev_predicate_correct,
                                          dev_predicate_sum,
                                          lang='Fr')

            if dev_best_score is None or score[5] > dev_best_score[5]:
                dev_best_score = score
                output_predict(
                    os.path.join(
                        result_path, 'dev_argument_{:.2f}.pred'.format(
                            dev_best_score[2] * 100)), dev_output)
                # torch.save(srl_model, os.path.join(os.path.dirname(__file__),'model/best_{:.2f}.pkl'.format(dev_best_score[2]*100)))
            log('\tdev best P:{:.2f} R:{:.2f} F1:{:.2f} NP:{:.2f} NR:{:.2f} NF1:{:.2f}'
                .format(dev_best_score[0] * 100, dev_best_score[1] * 100,
                        dev_best_score[2] * 100, dev_best_score[3] * 100,
                        dev_best_score[4] * 100, dev_best_score[5] * 100))
    return dev_best_score