Exemple #1
0
def ner_evaluate(data, wordrep, hiddenlist, model, name):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print "Error: wrong evaluate name,", name

    wordrep.eval()
    for hidden in hiddenlist:
        hidden.eval()
    model.eval()
    batch_size = data.HP_batch_size

    correct = 0
    total = 0

    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue

        with torch.no_grad():
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, _ \
                = batchify_with_label(instance, data.HP_gpu, True)

            ner_word_rep = wordrep.forward(batch_word, batch_features,
                                           batch_wordlen, batch_char,
                                           batch_charlen, batch_charrecover,
                                           None, None)

            ner_hidden = ner_word_rep
            for i in range(opt.hidden_num):
                ner_lstm_out, ner_att_out = hiddenlist[i].forward(
                    ner_hidden, batch_wordlen, False)
                ner_hidden = ner_lstm_out

            tag_seq = model(ner_hidden, mask)

        for idx in range(mask.shape[0]):
            for idy in range(mask.shape[1]):
                if mask[idx][idy] != 0:
                    total += 1
                    if tag_seq[idx][idy] == batch_label[idx][idy]:
                        correct += 1

    acc = 1.0 * correct / total
    return acc
Exemple #2
0
def ner_evaluateWhenTest(data, wordrep, hiddenlist, model):

    instances = data.raw_Ids
    nbest_pred_results = []
    wordrep.eval()
    for hidden in hiddenlist:
        hidden.eval()
    model.eval()
    batch_size = data.HP_batch_size

    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue
        batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, _  \
            = batchify_with_label(instance, data.HP_gpu, True)

        ner_word_rep = wordrep.forward(batch_word, batch_features,
                                       batch_wordlen, batch_char,
                                       batch_charlen, batch_charrecover, None,
                                       None)

        ner_hidden = ner_word_rep
        for i in range(opt.hidden_num):
            ner_lstm_out, ner_att_out = hiddenlist[i].forward(
                ner_hidden, batch_wordlen, False)
            ner_hidden = ner_lstm_out

        scores, nbest_tag_seq = model.decode_nbest(ner_hidden, mask,
                                                   data.nbest)

        nbest_pred_result = ner.recover_nbest_label(nbest_tag_seq, mask,
                                                    data.label_alphabet,
                                                    batch_wordrecover)
        nbest_pred_results += nbest_pred_result

    return nbest_pred_results
def joint_train(data, old_data, opt):

    if not os.path.exists(opt.output):
        os.makedirs(opt.output)

    if opt.pretrained_model_dir != 'None':
        seq_model = SeqModel(data)
        if opt.test_in_cpu:
            seq_model.load_state_dict(
                torch.load(os.path.join(opt.pretrained_model_dir,
                                        'ner_model.pkl'),
                           map_location='cpu'))
        else:
            seq_model.load_state_dict(
                torch.load(
                    os.path.join(opt.pretrained_model_dir, 'ner_model.pkl')))

        if (data.label_alphabet_size != seq_model.crf.tagset_size)\
                or (data.HP_hidden_dim != seq_model.hidden2tag.weight.size(1)):
            raise RuntimeError("ner_model not compatible")

        seq_wordseq = WordSequence(data, False, True, True, True)

        if ((data.word_emb_dim != seq_wordseq.wordrep.word_embedding.embedding_dim)\
            or (data.char_emb_dim != seq_wordseq.wordrep.char_feature.char_embeddings.embedding_dim)\
            or (data.feature_emb_dims[0] != seq_wordseq.wordrep.feature_embedding_dims[0])\
            or (data.feature_emb_dims[1] != seq_wordseq.wordrep.feature_embedding_dims[1])):
            raise RuntimeError("ner_wordseq not compatible")

        old_seq_wordseq = WordSequence(old_data, False, True, True, True)
        if opt.test_in_cpu:
            old_seq_wordseq.load_state_dict(
                torch.load(os.path.join(opt.pretrained_model_dir,
                                        'ner_wordseq.pkl'),
                           map_location='cpu'))
        else:
            old_seq_wordseq.load_state_dict(
                torch.load(
                    os.path.join(opt.pretrained_model_dir, 'ner_wordseq.pkl')))

        # sd = old_seq_wordseq.lstm.state_dict()

        for old, new in zip(old_seq_wordseq.lstm.parameters(),
                            seq_wordseq.lstm.parameters()):
            new.data.copy_(old)

        vocab_size = old_seq_wordseq.wordrep.word_embedding.num_embeddings
        seq_wordseq.wordrep.word_embedding.weight.data[
            0:
            vocab_size, :] = old_seq_wordseq.wordrep.word_embedding.weight.data[
                0:vocab_size, :]

        vocab_size = old_seq_wordseq.wordrep.char_feature.char_embeddings.num_embeddings
        seq_wordseq.wordrep.char_feature.char_embeddings.weight.data[
            0:
            vocab_size, :] = old_seq_wordseq.wordrep.char_feature.char_embeddings.weight.data[
                0:vocab_size, :]

        for i, feature_embedding in enumerate(
                old_seq_wordseq.wordrep.feature_embeddings):
            vocab_size = feature_embedding.num_embeddings
            seq_wordseq.wordrep.feature_embeddings[i].weight.data[
                0:vocab_size, :] = feature_embedding.weight.data[
                    0:vocab_size, :]

        # for word in data.word_alphabet.iteritems():
        #
        #     old_seq_wordseq.wordrep.word_embedding.weight.data data.word_alphabet.get_index(word)
        #

        classify_wordseq = WordSequence(data, True, False, True, False)

        if ((data.word_emb_dim != classify_wordseq.wordrep.word_embedding.embedding_dim)\
            or (data.re_feature_emb_dims[data.re_feature_name2id['[POSITION]']] != classify_wordseq.wordrep.position1_emb.embedding_dim)\
            or (data.feature_emb_dims[1] != classify_wordseq.wordrep.feature_embedding_dims[0])):
            raise RuntimeError("re_wordseq not compatible")

        old_classify_wordseq = WordSequence(old_data, True, False, True, False)
        if opt.test_in_cpu:
            old_classify_wordseq.load_state_dict(
                torch.load(os.path.join(opt.pretrained_model_dir,
                                        're_wordseq.pkl'),
                           map_location='cpu'))
        else:
            old_classify_wordseq.load_state_dict(
                torch.load(
                    os.path.join(opt.pretrained_model_dir, 're_wordseq.pkl')))

        for old, new in zip(old_classify_wordseq.lstm.parameters(),
                            classify_wordseq.lstm.parameters()):
            new.data.copy_(old)

        vocab_size = old_classify_wordseq.wordrep.word_embedding.num_embeddings
        classify_wordseq.wordrep.word_embedding.weight.data[
            0:
            vocab_size, :] = old_classify_wordseq.wordrep.word_embedding.weight.data[
                0:vocab_size, :]

        vocab_size = old_classify_wordseq.wordrep.position1_emb.num_embeddings
        classify_wordseq.wordrep.position1_emb.weight.data[
            0:
            vocab_size, :] = old_classify_wordseq.wordrep.position1_emb.weight.data[
                0:vocab_size, :]

        vocab_size = old_classify_wordseq.wordrep.position2_emb.num_embeddings
        classify_wordseq.wordrep.position2_emb.weight.data[
            0:
            vocab_size, :] = old_classify_wordseq.wordrep.position2_emb.weight.data[
                0:vocab_size, :]

        vocab_size = old_classify_wordseq.wordrep.feature_embeddings[
            0].num_embeddings
        classify_wordseq.wordrep.feature_embeddings[0].weight.data[
            0:vocab_size, :] = old_classify_wordseq.wordrep.feature_embeddings[
                0].weight.data[0:vocab_size, :]

        classify_model = ClassifyModel(data)

        old_classify_model = ClassifyModel(old_data)
        if opt.test_in_cpu:
            old_classify_model.load_state_dict(
                torch.load(os.path.join(opt.pretrained_model_dir,
                                        're_model.pkl'),
                           map_location='cpu'))
        else:
            old_classify_model.load_state_dict(
                torch.load(
                    os.path.join(opt.pretrained_model_dir, 're_model.pkl')))

        if (data.re_feature_alphabet_sizes[data.re_feature_name2id['[RELATION]']] != old_classify_model.linear.weight.size(0)
            or (data.re_feature_emb_dims[data.re_feature_name2id['[ENTITY_TYPE]']] != old_classify_model.entity_type_emb.embedding_dim) \
                or (data.re_feature_emb_dims[
                        data.re_feature_name2id['[ENTITY]']] != old_classify_model.entity_emb.embedding_dim)
                or (data.re_feature_emb_dims[
                        data.re_feature_name2id['[TOKEN_NUM]']] != old_classify_model.tok_num_betw_emb.embedding_dim) \
                or (data.re_feature_emb_dims[
                        data.re_feature_name2id['[ENTITY_NUM]']] != old_classify_model.et_num_emb.embedding_dim) \
                ):
            raise RuntimeError("re_model not compatible")

        vocab_size = old_classify_model.entity_type_emb.num_embeddings
        classify_model.entity_type_emb.weight.data[
            0:vocab_size, :] = old_classify_model.entity_type_emb.weight.data[
                0:vocab_size, :]

        vocab_size = old_classify_model.entity_emb.num_embeddings
        classify_model.entity_emb.weight.data[
            0:vocab_size, :] = old_classify_model.entity_emb.weight.data[
                0:vocab_size, :]

        vocab_size = old_classify_model.tok_num_betw_emb.num_embeddings
        classify_model.tok_num_betw_emb.weight.data[
            0:vocab_size, :] = old_classify_model.tok_num_betw_emb.weight.data[
                0:vocab_size, :]

        vocab_size = old_classify_model.et_num_emb.num_embeddings
        classify_model.et_num_emb.weight.data[
            0:vocab_size, :] = old_classify_model.et_num_emb.weight.data[
                0:vocab_size, :]

    else:
        seq_model = SeqModel(data)
        seq_wordseq = WordSequence(data, False, True, True, True)

        classify_wordseq = WordSequence(data, True, False, True, False)
        classify_model = ClassifyModel(data)

    iter_parameter = itertools.chain(
        *map(list, [seq_wordseq.parameters(),
                    seq_model.parameters()]))
    seq_optimizer = optim.Adam(iter_parameter,
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    iter_parameter = itertools.chain(*map(
        list, [classify_wordseq.parameters(),
               classify_model.parameters()]))
    classify_optimizer = optim.Adam(iter_parameter,
                                    lr=data.HP_lr,
                                    weight_decay=data.HP_l2)

    if data.tune_wordemb == False:
        my_utils.freeze_net(seq_wordseq.wordrep.word_embedding)
        my_utils.freeze_net(classify_wordseq.wordrep.word_embedding)

    re_X_positive = []
    re_Y_positive = []
    re_X_negative = []
    re_Y_negative = []
    relation_vocab = data.re_feature_alphabets[
        data.re_feature_name2id['[RELATION]']]
    my_collate = my_utils.sorted_collate1
    for i in range(len(data.re_train_X)):
        x = data.re_train_X[i]
        y = data.re_train_Y[i]

        if y != relation_vocab.get_index("</unk>"):
            re_X_positive.append(x)
            re_Y_positive.append(y)
        else:
            re_X_negative.append(x)
            re_Y_negative.append(y)

    re_dev_loader = DataLoader(my_utils.RelationDataset(
        data.re_dev_X, data.re_dev_Y),
                               data.HP_batch_size,
                               shuffle=False,
                               collate_fn=my_collate)
    # re_test_loader = DataLoader(my_utils.RelationDataset(data.re_test_X, data.re_test_Y), data.HP_batch_size, shuffle=False, collate_fn=my_collate)

    best_ner_score = -1
    best_re_score = -1
    count_performance_not_grow = 0

    for idx in range(data.HP_iteration):
        epoch_start = time.time()

        seq_wordseq.train()
        seq_wordseq.zero_grad()
        seq_model.train()
        seq_model.zero_grad()

        classify_wordseq.train()
        classify_wordseq.zero_grad()
        classify_model.train()
        classify_model.zero_grad()

        batch_size = data.HP_batch_size

        random.shuffle(data.train_Ids)
        ner_train_num = len(data.train_Ids)
        ner_total_batch = ner_train_num // batch_size + 1

        re_train_loader, re_train_iter = makeRelationDataset(
            re_X_positive, re_Y_positive, re_X_negative, re_Y_negative,
            data.unk_ratio, True, my_collate, data.HP_batch_size)
        re_total_batch = len(re_train_loader)

        total_batch = max(ner_total_batch, re_total_batch)
        min_batch = min(ner_total_batch, re_total_batch)

        for batch_id in range(total_batch):

            if batch_id < ner_total_batch:
                start = batch_id * batch_size
                end = (batch_id + 1) * batch_size
                if end > ner_train_num:
                    end = ner_train_num
                instance = data.train_Ids[start:end]
                batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, \
                    batch_permute_label = batchify_with_label(instance, data.HP_gpu)

                hidden = seq_wordseq.forward(batch_word, batch_features,
                                             batch_wordlen, batch_char,
                                             batch_charlen, batch_charrecover,
                                             None, None)
                hidden_adv = None
                loss, tag_seq = seq_model.neg_log_likelihood_loss(
                    hidden, hidden_adv, batch_label, mask)
                loss.backward()
                seq_optimizer.step()
                seq_wordseq.zero_grad()
                seq_model.zero_grad()

            if batch_id < re_total_batch:
                [batch_word, batch_features, batch_wordlen, batch_wordrecover, \
                 batch_char, batch_charlen, batch_charrecover, \
                 position1_seq_tensor, position2_seq_tensor, e1_token, e1_length, e2_token, e2_length, e1_type, e2_type, \
                 tok_num_betw, et_num], [targets, targets_permute] = my_utils.endless_get_next_batch_without_rebatch1(
                    re_train_loader, re_train_iter)

                hidden = classify_wordseq.forward(batch_word, batch_features,
                                                  batch_wordlen, batch_char,
                                                  batch_charlen,
                                                  batch_charrecover,
                                                  position1_seq_tensor,
                                                  position2_seq_tensor)
                hidden_adv = None
                loss, pred = classify_model.neg_log_likelihood_loss(
                    hidden, hidden_adv, batch_wordlen, e1_token, e1_length,
                    e2_token, e2_length, e1_type, e2_type, tok_num_betw,
                    et_num, targets)
                loss.backward()
                classify_optimizer.step()
                classify_wordseq.zero_grad()
                classify_model.zero_grad()

        epoch_finish = time.time()
        logging.info("epoch: %s training finished. Time: %.2fs" %
                     (idx, epoch_finish - epoch_start))

        _, _, _, _, ner_score, _, _ = ner.evaluate(data, seq_wordseq,
                                                   seq_model, "dev")
        logging.info("ner evaluate: f: %.4f" % (ner_score))
        if ner_score > best_ner_score:
            logging.info("new best score: ner: %.4f" % (ner_score))
            best_ner_score = ner_score

            torch.save(seq_wordseq.state_dict(),
                       os.path.join(opt.output, 'ner_wordseq.pkl'))
            torch.save(seq_model.state_dict(),
                       os.path.join(opt.output, 'ner_model.pkl'))

            count_performance_not_grow = 0

            # _, _, _, _, test_ner_score, _, _ = ner.evaluate(data, seq_wordseq, seq_model, "test")
            # logging.info("ner evaluate on test: f: %.4f" % (test_ner_score))

        else:
            count_performance_not_grow += 1

        re_score = relation_extraction.evaluate(classify_wordseq,
                                                classify_model, re_dev_loader)
        logging.info("re evaluate: f: %.4f" % (re_score))
        if re_score > best_re_score:
            logging.info("new best score: re: %.4f" % (re_score))
            best_re_score = re_score

            torch.save(classify_wordseq.state_dict(),
                       os.path.join(opt.output, 're_wordseq.pkl'))
            torch.save(classify_model.state_dict(),
                       os.path.join(opt.output, 're_model.pkl'))

            count_performance_not_grow = 0

            # test_re_score = relation_extraction.evaluate(classify_wordseq, classify_model, re_test_loader)
            # logging.info("re evaluate on test: f: %.4f" % (test_re_score))
        else:
            count_performance_not_grow += 1

        if count_performance_not_grow > 2 * data.patience:
            logging.info("early stop")
            break

    logging.info("train finished")
Exemple #4
0
def train(data, ner_dir, re_dir):

    task_num = 2
    init_alpha = 1.0 / task_num
    if opt.hidden_num <= 1:
        step_alpha = 0
    else:
        step_alpha = (1.0 - init_alpha) / (opt.hidden_num - 1)

    ner_wordrep = WordRep(data, False, True, True, data.use_char)
    ner_hiddenlist = []
    for i in range(opt.hidden_num):
        if i == 0:
            input_size = data.word_emb_dim+data.HP_char_hidden_dim+data.feature_emb_dims[data.feature_name2id['[Cap]']]+ \
                         data.feature_emb_dims[data.feature_name2id['[POS]']]
            output_size = data.HP_hidden_dim
        else:
            input_size = data.HP_hidden_dim
            output_size = data.HP_hidden_dim

        temp = HiddenLayer(data, input_size, output_size)
        ner_hiddenlist.append(temp)

    seq_model = SeqModel(data)

    re_wordrep = WordRep(data, True, False, True, False)
    re_hiddenlist = []
    for i in range(opt.hidden_num):
        if i == 0:
            input_size = data.word_emb_dim + data.feature_emb_dims[data.feature_name2id['[POS]']]+\
                         2*data.re_feature_emb_dims[data.re_feature_name2id['[POSITION]']]
            output_size = data.HP_hidden_dim
        else:
            input_size = data.HP_hidden_dim
            output_size = data.HP_hidden_dim

        temp = HiddenLayer(data, input_size, output_size)
        re_hiddenlist.append(temp)

    classify_model = ClassifyModel(data)

    iter_parameter = itertools.chain(
        *map(list, [ner_wordrep.parameters(),
                    seq_model.parameters()] +
             [f.parameters() for f in ner_hiddenlist]))
    ner_optimizer = optim.Adam(iter_parameter,
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    iter_parameter = itertools.chain(
        *map(list, [re_wordrep.parameters(),
                    classify_model.parameters()] +
             [f.parameters() for f in re_hiddenlist]))
    re_optimizer = optim.Adam(iter_parameter,
                              lr=data.HP_lr,
                              weight_decay=data.HP_l2)

    if data.tune_wordemb == False:
        my_utils.freeze_net(ner_wordrep.word_embedding)
        my_utils.freeze_net(re_wordrep.word_embedding)

    re_X_positive = []
    re_Y_positive = []
    re_X_negative = []
    re_Y_negative = []
    relation_vocab = data.re_feature_alphabets[
        data.re_feature_name2id['[RELATION]']]
    my_collate = my_utils.sorted_collate1
    for i in range(len(data.re_train_X)):
        x = data.re_train_X[i]
        y = data.re_train_Y[i]

        if y != relation_vocab.get_index("</unk>"):
            re_X_positive.append(x)
            re_Y_positive.append(y)
        else:
            re_X_negative.append(x)
            re_Y_negative.append(y)

    re_test_loader = DataLoader(my_utils.RelationDataset(
        data.re_test_X, data.re_test_Y),
                                data.HP_batch_size,
                                shuffle=False,
                                collate_fn=my_collate)

    best_ner_score = -1
    best_re_score = -1

    for idx in range(data.HP_iteration):
        epoch_start = time.time()

        ner_wordrep.train()
        ner_wordrep.zero_grad()
        for hidden_layer in ner_hiddenlist:
            hidden_layer.train()
            hidden_layer.zero_grad()
        seq_model.train()
        seq_model.zero_grad()

        re_wordrep.train()
        re_wordrep.zero_grad()
        for hidden_layer in re_hiddenlist:
            hidden_layer.train()
            hidden_layer.zero_grad()
        classify_model.train()
        classify_model.zero_grad()

        batch_size = data.HP_batch_size

        random.shuffle(data.train_Ids)
        ner_train_num = len(data.train_Ids)
        ner_total_batch = ner_train_num // batch_size + 1

        re_train_loader, re_train_iter = makeRelationDataset(
            re_X_positive, re_Y_positive, re_X_negative, re_Y_negative,
            data.unk_ratio, True, my_collate, data.HP_batch_size)
        re_total_batch = len(re_train_loader)

        total_batch = max(ner_total_batch, re_total_batch)
        min_batch = min(ner_total_batch, re_total_batch)

        for batch_id in range(total_batch):

            if batch_id < min_batch:
                start = batch_id * batch_size
                end = (batch_id + 1) * batch_size
                if end > ner_train_num:
                    end = ner_train_num
                instance = data.train_Ids[start:end]
                ner_batch_word, ner_batch_features, ner_batch_wordlen, ner_batch_wordrecover, ner_batch_char, ner_batch_charlen, \
                ner_batch_charrecover, ner_batch_label, ner_mask, ner_batch_permute_label = batchify_with_label(instance, data.HP_gpu)

                [re_batch_word, re_batch_features, re_batch_wordlen, re_batch_wordrecover, re_batch_char, re_batch_charlen,
                 re_batch_charrecover, re_position1_seq_tensor, re_position2_seq_tensor, re_e1_token, re_e1_length, re_e2_token, re_e2_length,
                 re_e1_type, re_e2_type, re_tok_num_betw, re_et_num], [re_targets, re_targets_permute] = \
                    my_utils.endless_get_next_batch_without_rebatch1(re_train_loader, re_train_iter)

                if ner_batch_word.size(0) != re_batch_word.size(0):
                    continue  # if batch size is not equal, we ignore such batch

                ner_word_rep = ner_wordrep.forward(
                    ner_batch_word, ner_batch_features, ner_batch_wordlen,
                    ner_batch_char, ner_batch_charlen, ner_batch_charrecover,
                    None, None)

                re_word_rep = re_wordrep.forward(
                    re_batch_word, re_batch_features, re_batch_wordlen,
                    re_batch_char, re_batch_charlen, re_batch_charrecover,
                    re_position1_seq_tensor, re_position2_seq_tensor)
                alpha = init_alpha
                ner_hidden = ner_word_rep
                re_hidden = re_word_rep
                for i in range(opt.hidden_num):
                    if alpha > 0.99:
                        use_attn = False

                        ner_lstm_out, ner_att_out = ner_hiddenlist[i].forward(
                            ner_hidden, ner_batch_wordlen, use_attn)
                        re_lstm_out, re_att_out = re_hiddenlist[i].forward(
                            re_hidden, re_batch_wordlen, use_attn)

                        ner_hidden = ner_lstm_out
                        re_hidden = re_lstm_out
                    else:
                        use_attn = True

                        ner_lstm_out, ner_att_out = ner_hiddenlist[i].forward(
                            ner_hidden, ner_batch_wordlen, use_attn)
                        re_lstm_out, re_att_out = re_hiddenlist[i].forward(
                            re_hidden, re_batch_wordlen, use_attn)

                        ner_hidden = alpha * ner_lstm_out + (
                            1 - alpha) * re_att_out.unsqueeze(1)
                        re_hidden = alpha * re_lstm_out + (
                            1 - alpha) * ner_att_out.unsqueeze(1)

                    alpha += step_alpha

                ner_loss, ner_tag_seq = seq_model.neg_log_likelihood_loss(
                    ner_hidden, ner_batch_label, ner_mask)
                re_loss, re_pred = classify_model.neg_log_likelihood_loss(
                    re_hidden, re_batch_wordlen, re_e1_token, re_e1_length,
                    re_e2_token, re_e2_length, re_e1_type, re_e2_type,
                    re_tok_num_betw, re_et_num, re_targets)

                ner_loss.backward(retain_graph=True)
                re_loss.backward()

                ner_optimizer.step()
                re_optimizer.step()

                ner_wordrep.zero_grad()
                for hidden_layer in ner_hiddenlist:
                    hidden_layer.zero_grad()
                seq_model.zero_grad()

                re_wordrep.zero_grad()
                for hidden_layer in re_hiddenlist:
                    hidden_layer.zero_grad()
                classify_model.zero_grad()

            else:

                if batch_id < ner_total_batch:
                    start = batch_id * batch_size
                    end = (batch_id + 1) * batch_size
                    if end > ner_train_num:
                        end = ner_train_num
                    instance = data.train_Ids[start:end]
                    batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, \
                        batch_permute_label = batchify_with_label(instance, data.HP_gpu)

                    ner_word_rep = ner_wordrep.forward(
                        batch_word, batch_features, batch_wordlen, batch_char,
                        batch_charlen, batch_charrecover, None, None)

                    ner_hidden = ner_word_rep
                    for i in range(opt.hidden_num):
                        ner_lstm_out, ner_att_out = ner_hiddenlist[i].forward(
                            ner_hidden, batch_wordlen, False)
                        ner_hidden = ner_lstm_out

                    loss, tag_seq = seq_model.neg_log_likelihood_loss(
                        ner_hidden, batch_label, mask)

                    loss.backward()
                    ner_optimizer.step()
                    ner_wordrep.zero_grad()
                    for hidden_layer in ner_hiddenlist:
                        hidden_layer.zero_grad()
                    seq_model.zero_grad()

                if batch_id < re_total_batch:
                    [batch_word, batch_features, batch_wordlen, batch_wordrecover, \
                     batch_char, batch_charlen, batch_charrecover, \
                     position1_seq_tensor, position2_seq_tensor, e1_token, e1_length, e2_token, e2_length, e1_type, e2_type, \
                     tok_num_betw, et_num], [targets, targets_permute] = my_utils.endless_get_next_batch_without_rebatch1(
                        re_train_loader, re_train_iter)

                    re_word_rep = re_wordrep.forward(
                        batch_word, batch_features, batch_wordlen, batch_char,
                        batch_charlen, batch_charrecover, position1_seq_tensor,
                        position2_seq_tensor)

                    re_hidden = re_word_rep
                    for i in range(opt.hidden_num):
                        re_lstm_out, re_att_out = re_hiddenlist[i].forward(
                            re_hidden, batch_wordlen, False)
                        re_hidden = re_lstm_out

                    loss, pred = classify_model.neg_log_likelihood_loss(
                        re_hidden, batch_wordlen, e1_token, e1_length,
                        e2_token, e2_length, e1_type, e2_type, tok_num_betw,
                        et_num, targets)
                    loss.backward()
                    re_optimizer.step()
                    re_wordrep.zero_grad()
                    for hidden_layer in re_hiddenlist:
                        hidden_layer.zero_grad()
                    classify_model.zero_grad()

        epoch_finish = time.time()
        print("epoch: %s training finished. Time: %.2fs" %
              (idx, epoch_finish - epoch_start))

        ner_score = ner_evaluate(data, ner_wordrep, ner_hiddenlist, seq_model,
                                 "test")
        print("ner evaluate: f: %.4f" % (ner_score))

        re_score = re_evaluate(re_wordrep, re_hiddenlist, classify_model,
                               re_test_loader)
        print("re evaluate: f: %.4f" % (re_score))

        if ner_score + re_score > best_ner_score + best_re_score:
            print("new best score: ner: %.4f , re: %.4f" %
                  (ner_score, re_score))
            best_ner_score = ner_score
            best_re_score = re_score

            torch.save(ner_wordrep.state_dict(),
                       os.path.join(ner_dir, 'wordrep.pkl'))
            for i, hidden_layer in enumerate(ner_hiddenlist):
                torch.save(hidden_layer.state_dict(),
                           os.path.join(ner_dir, 'hidden_{}.pkl'.format(i)))
            torch.save(seq_model.state_dict(),
                       os.path.join(ner_dir, 'model.pkl'))

            torch.save(re_wordrep.state_dict(),
                       os.path.join(re_dir, 'wordrep.pkl'))
            for i, hidden_layer in enumerate(re_hiddenlist):
                torch.save(hidden_layer.state_dict(),
                           os.path.join(re_dir, 'hidden_{}.pkl'.format(i)))
            torch.save(classify_model.state_dict(),
                       os.path.join(re_dir, 'model.pkl'))
Exemple #5
0
def pipeline(data, ner_dir, re_dir):

    seq_model = SeqModel(data)
    seq_wordseq = WordSequence(data, False, True, True, data.use_char)

    classify_wordseq = WordSequence(data, True, False, True, False)
    classify_model = ClassifyModel(data)
    if torch.cuda.is_available():
        classify_model = classify_model.cuda(data.HP_gpu)

    iter_parameter = itertools.chain(
        *map(list, [seq_wordseq.parameters(),
                    seq_model.parameters()]))
    seq_optimizer = optim.Adam(iter_parameter,
                               lr=opt.ner_lr,
                               weight_decay=data.HP_l2)
    iter_parameter = itertools.chain(*map(
        list, [classify_wordseq.parameters(),
               classify_model.parameters()]))
    classify_optimizer = optim.Adam(iter_parameter,
                                    lr=opt.re_lr,
                                    weight_decay=data.HP_l2)

    if data.tune_wordemb == False:
        my_utils.freeze_net(seq_wordseq.wordrep.word_embedding)
        my_utils.freeze_net(classify_wordseq.wordrep.word_embedding)

    re_X_positive = []
    re_Y_positive = []
    re_X_negative = []
    re_Y_negative = []
    relation_vocab = data.re_feature_alphabets[
        data.re_feature_name2id['[RELATION]']]
    my_collate = my_utils.sorted_collate1
    for i in range(len(data.re_train_X)):
        x = data.re_train_X[i]
        y = data.re_train_Y[i]

        if y != relation_vocab.get_index("</unk>"):
            re_X_positive.append(x)
            re_Y_positive.append(y)
        else:
            re_X_negative.append(x)
            re_Y_negative.append(y)

    re_test_loader = DataLoader(my_utils.RelationDataset(
        data.re_test_X, data.re_test_Y),
                                data.HP_batch_size,
                                shuffle=False,
                                collate_fn=my_collate)

    best_ner_score = -1
    best_re_score = -1

    for idx in range(data.HP_iteration):
        epoch_start = time.time()

        seq_wordseq.train()
        seq_wordseq.zero_grad()
        seq_model.train()
        seq_model.zero_grad()

        classify_wordseq.train()
        classify_wordseq.zero_grad()
        classify_model.train()
        classify_model.zero_grad()

        batch_size = data.HP_batch_size

        random.shuffle(data.train_Ids)
        ner_train_num = len(data.train_Ids)
        ner_total_batch = ner_train_num // batch_size + 1

        re_train_loader, re_train_iter = makeRelationDataset(
            re_X_positive, re_Y_positive, re_X_negative, re_Y_negative,
            data.unk_ratio, True, my_collate, data.HP_batch_size)
        re_total_batch = len(re_train_loader)

        total_batch = max(ner_total_batch, re_total_batch)
        min_batch = min(ner_total_batch, re_total_batch)

        for batch_id in range(total_batch):

            if batch_id < ner_total_batch:
                start = batch_id * batch_size
                end = (batch_id + 1) * batch_size
                if end > ner_train_num:
                    end = ner_train_num
                instance = data.train_Ids[start:end]
                batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, \
                    batch_permute_label = batchify_with_label(instance, data.HP_gpu)

                hidden = seq_wordseq.forward(batch_word, batch_features,
                                             batch_wordlen, batch_char,
                                             batch_charlen, batch_charrecover,
                                             None, None)
                hidden_adv = None
                loss, tag_seq = seq_model.neg_log_likelihood_loss(
                    hidden, hidden_adv, batch_label, mask)
                loss.backward()
                seq_optimizer.step()
                seq_wordseq.zero_grad()
                seq_model.zero_grad()

            if batch_id < re_total_batch:
                [batch_word, batch_features, batch_wordlen, batch_wordrecover, \
                 batch_char, batch_charlen, batch_charrecover, \
                 position1_seq_tensor, position2_seq_tensor, e1_token, e1_length, e2_token, e2_length, e1_type, e2_type, \
                 tok_num_betw, et_num], [targets, targets_permute] = my_utils.endless_get_next_batch_without_rebatch1(
                    re_train_loader, re_train_iter)

                hidden = classify_wordseq.forward(batch_word, batch_features,
                                                  batch_wordlen, batch_char,
                                                  batch_charlen,
                                                  batch_charrecover,
                                                  position1_seq_tensor,
                                                  position2_seq_tensor)
                hidden_adv = None
                loss, pred = classify_model.neg_log_likelihood_loss(
                    hidden, hidden_adv, batch_wordlen, e1_token, e1_length,
                    e2_token, e2_length, e1_type, e2_type, tok_num_betw,
                    et_num, targets)
                loss.backward()
                classify_optimizer.step()
                classify_wordseq.zero_grad()
                classify_model.zero_grad()

        epoch_finish = time.time()
        print("epoch: %s training finished. Time: %.2fs" %
              (idx, epoch_finish - epoch_start))

        # _, _, _, _, f, _, _ = ner.evaluate(data, seq_wordseq, seq_model, "test")
        ner_score = ner.evaluate1(data, seq_wordseq, seq_model, "test")
        print("ner evaluate: f: %.4f" % (ner_score))

        re_score = relation_extraction.evaluate(classify_wordseq,
                                                classify_model, re_test_loader)
        print("re evaluate: f: %.4f" % (re_score))

        if ner_score + re_score > best_ner_score + best_re_score:
            print("new best score: ner: %.4f , re: %.4f" %
                  (ner_score, re_score))
            best_ner_score = ner_score
            best_re_score = re_score

            torch.save(seq_wordseq.state_dict(),
                       os.path.join(ner_dir, 'wordseq.pkl'))
            torch.save(seq_model.state_dict(),
                       os.path.join(ner_dir, 'model.pkl'))
            torch.save(classify_wordseq.state_dict(),
                       os.path.join(re_dir, 'wordseq.pkl'))
            torch.save(classify_model.state_dict(),
                       os.path.join(re_dir, 'model.pkl'))