Ejemplo n.º 1
0
def train(datasets, mode):  # optimizer, criterion, args,
    # JointModel.train()
    if args.use_RL:
        mini_batches = get_bags(datasets, relations, args.batchsize)
        noisy_sentences_vec = Variable(
            torch.FloatTensor(1, args.hidden_dim).fill_(0))
        noisy_vec_mean = torch.mean(noisy_sentences_vec, 0, True)
    else:
        mini_batches = get_minibatches(datasets, args.batchsize)
    batchcnt = len(datasets[0]) // args.batchsize  # len(list(mini_batches))
    logger.info("********************%s data*********************" % mode)
    logger.info("number of batches: %s" % batchcnt)
    NER_correct, NER_total = 0., 0.
    RE_correct, RE_total = 0., 0.
    if mode != 'train':
        # NER_target_all, NER_output_all = None, None
        # RE_target_all, RE_output_all = None, None
        NER_target_all2, NER_output_all2 = [], []
        RE_target_all2, RE_output_all2 = [], []
        NER_output_logits, RE_output_logits = [], []

    for b, data in enumerate(mini_batches):
        if b >= batchcnt:
            break
        sentences, pos_lambda, tags, sentences_words, relation_tags, relation_names = data
        input_tensor, input_length = padding_sequence(sentences, pad_token=0)
        pos_tensor, input_length = padding_sequence(pos_lambda, pad_token=0)
        target_tensor, target_length = padding_sequence(
            tags, pad_token=args.entity_tag_size)  # entity tags
        relation_target_tensor = relation_tags  # padding_sequence_recurr(relation_tags)  		# relation tag
        if torch.cuda.is_available():
            input_tensor = Variable(
                torch.cuda.LongTensor(input_tensor, device=device)).cuda()
            target_tensor = Variable(
                torch.cuda.LongTensor(target_tensor, device=device)).cuda()
            if args.encoder_model == "BiLSTM":
                mask = torch.cuda.ByteTensor(
                    (1 - (target_tensor == args.entity_tag_size))).to(device)
            else:
                mask = torch.cuda.ByteTensor(
                    (1 - (input_tensor == 0))).to(device)
            pos_tensor = Variable(
                torch.cuda.FloatTensor(pos_tensor, device=device)).cuda()
            relation_target_tensor = Variable(
                torch.cuda.LongTensor(relation_target_tensor,
                                      device=device)).cuda()
        else:
            input_tensor = Variable(
                torch.LongTensor(input_tensor, device=device))
            target_tensor = Variable(
                torch.LongTensor(target_tensor, device=device))
            if args.encoder_model == "BiLSTM":
                mask = torch.ByteTensor(
                    (1 - (target_tensor == args.entity_tag_size))).to(device)
            else:
                mask = torch.ByteTensor((1 - (input_tensor == 0))).to(device)
            pos_tensor = Variable(torch.Tensor(pos_tensor, device=device))
            relation_target_tensor = Variable(
                torch.LongTensor(relation_target_tensor, device=device))

        if mode == 'train':
            optimizer.zero_grad()
            NER_active_logits, NER_active_labels, RE_output_tag, NER_output_tag, NER_output, BERT_pooled_output = JointModel(
                input_tensor, pos_tensor, target_tensor, args.batchsize,
                mask)  # , input_length, target_length
            if args.use_RL:
                mask_entity = [
                    list(map(lambda x: 1 if x in [1, 2, 4, 5] else 0, i))
                    for i in target_tensor
                ]
                if torch.cuda.is_available():
                    mask_entity = torch.cuda.ByteTensor(mask_entity).to(device)
                else:
                    mask_entity = torch.ByteTensor(mask_entity).to(device)
                NER_embedding = None
                for i in range(len(mask_entity)):
                    NER_embedding = torch.mean(NER_output[i][mask_entity[i]], 0).view(1, -1) if NER_embedding is None \
                     else torch.cat((NER_embedding, torch.mean(NER_output[i][mask_entity[i]], 0).view(1, -1)), 0)

                RE_rewards, loss_RL, noisy_sentences_vec, noisy_vec_mean = RL_model(
                    BERT_pooled_output, NER_embedding, JointModel.noysy_model,
                    RE_output_tag, relation_target_tensor, noisy_sentences_vec,
                    noisy_vec_mean)

            if not args.use_RL:
                loss_entity = criterion(NER_active_logits, NER_active_labels)
                loss_RE = criterion(RE_output_tag, relation_target_tensor)
                loss = loss_entity + loss_RE
                if args.merge_loss:
                    loss.backward()
                else:
                    loss_entity.backward(
                        retain_graph=True)  # retain_graph=True
                    loss_RE.backward(retain_graph=True)
            if args.use_RL:
                loss = loss_RL
                loss_RL.backward()
            '''
			use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
			if use_teacher_forcing:
				# Teacher forcing: Feed the target as the next input
				for di in range(target_length):
					decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
					loss += criterion(decoder_output, target_tensor[di])
					decoder_input = target_tensor[di]  # Teacher forcing
			else:
				# Without teacher forcing: use its own predictions as the next input
				for di in range(target_length):
					decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
					topv, topi = decoder_output.topk(1)
					decoder_input = topi.squeeze().detach()  # detach from history as input
	
					loss += criterion(decoder_output, target_tensor[di])
					if decoder_input.item() == EOS_token:
						break
			'''

            optimizer.step()
        else:
            NER_active_logits, NER_active_labels, RE_output_tag, NER_output_tag, _, _ = JointModel(
                input_tensor, pos_tensor, target_tensor, args.batchsize, mask,
                True)  # , input_length, target_length
        NER_correct += (torch.argmax(NER_active_logits,
                                     -1) == NER_active_labels).sum().item()
        NER_total += len(NER_active_logits)
        # temp = 0.
        # for i in range(len(relation_target_tensor[0])):
        # 	target = torch.transpose(relation_target_tensor, 0, 1)[i]
        # 	temp += (torch.argmax(RE_output_tag, -1) == target).sum().item()
        RE_correct += (torch.argmax(
            RE_output_tag, -1) == relation_target_tensor).sum().item()
        RE_total += len(RE_output_tag)
        if mode != 'train':
            NER_target_all2.append(target_tensor.cpu().tolist(
            ))  # target_tensor, NER_active_labels .numpy()
            NER_output_all2.append(
                torch.argmax(
                    NER_output_tag,
                    -1).cpu().tolist())  # NER_output_tag, NER_active_logits
            NER_output_logits.append(NER_output_tag.detach().cpu().tolist())
            RE_output_all2.append(
                torch.argmax(RE_output_tag, -1).cpu().tolist())
            RE_target_all2.append(
                relation_target_tensor.detach().cpu().tolist())
            RE_output_logits.append(RE_output_tag.cpu().tolist())
            if b % args.print_batch == 0:
                logger.info(
                    'seq-seq model: (%d %.2f%%), NER acc: %.4f, RE acc: %.4f' %
                    (b, float(b) / batchcnt * 100, NER_correct / NER_total,
                     RE_correct / RE_total))
            '''if not args.do_train:
				if NER_target_all is None:
					NER_target_all = NER_active_labels.to('cpu')
					NER_output_all = NER_active_logits.to('cpu')
				else:
					NER_target_all = torch.cat((NER_target_all.to('cpu'), NER_active_labels.to('cpu')), dim=0)
					NER_output_all = torch.cat((NER_output_all.to('cpu'), NER_active_logits.to('cpu')), dim=0)
				if RE_target_all is None:
					RE_target_all = relation_target_tensor.to('cpu')
					RE_output_all = RE_output_tag.to('cpu')
				else:
					RE_target_all = torch.cat((RE_target_all.to('cpu'), relation_target_tensor.to('cpu')), dim=0)
					RE_output_all = torch.cat((RE_output_all.to('cpu'), RE_output_tag.to('cpu')), dim=0)'''
        if mode == 'train':
            out_losses.append(loss.item())
            if b % args.print_batch == 0:
                logger.info(
                    'seq-seq model: (%d %.2f%%), loss_NER: %.4f, loss_RE: %.4f, NER acc: %.4f, RE acc: %.4f'
                    % (b, float(b) / batchcnt * 100, loss_entity.item(),
                       loss_RE.item(), NER_correct / NER_total,
                       RE_correct / RE_total))

    if mode != 'train':
        cal_F_score(RE_output_all2, RE_target_all2, NER_target_all2,
                    NER_output_all2, args.batchsize)
        if args.do_train:
            if mode == 'test' or (mode == 'dev' and e == args.epochRL - 1):
                with open(
                        args.output_dir + 'predict_%s_epoch_%s.json' %
                    (mode, e), "a+") as fw:
                    json.dump(
                        {
                            "RE_predict": RE_output_all2,
                            "RE_actual": RE_target_all2,
                            "RE_output_logits": RE_output_logits,
                            "NER_predict": NER_output_all2,
                            "NER_actual": NER_target_all2,
                            "NER_output_logits": NER_output_logits
                        }, fw)
        else:
            with open(args.output_dir + 'predict_%s.json' % mode, "a+") as fw:
                json.dump(
                    {
                        "RE_predict": RE_output_all2,
                        "RE_actual": RE_target_all2,
                        "RE_output_logits": RE_output_logits,
                        "NER_predict": NER_output_all2,
                        "NER_actual": NER_target_all2,
                        "NER_output_logits": NER_output_logits
                    }, fw)
            # np.save('pred_res/RE_predict', RE_output_all2)  # RE_output_all.to('cpu').detach().numpy()
            # np.save('pred_res/RE_actual', RE_target_all2)
            # np.save('pred_res/NER_predict', NER_output_all2)
            # np.save('pred_res/NER_actual', NER_target_all2)
            '''NER_pred_res = metrics.classification_report(NER_target_all2, NER_output_all2)
			logger.info('NER Prediction results: \n{}'.format(NER_pred_res))
			RE_pred_res = metrics.classification_report(RE_target_all2, RE_output_all2)
			logger.info('RE Prediction results: \n{}'.format(RE_pred_res))'''
    else:
        np.save(args.output_dir + "loss_train", out_losses)
Ejemplo n.º 2
0
        print("training epoch ", e)

        # random.shuffle(train_data)
        # batchcnt = (len(train_data) - 1) // args.batchsize + 1
        # for b in range(batchcnt):
        # 	# start = time.time()
        # 	datas = train_data[b * args.batchsize: (b + 1) * args.batchsize]

        mini_batches = get_minibatches(dev_datasets, args.batchsize)
        batchcnt = len(
            dev_datasets[0]) // args.batchsize  # len(list(mini_batches))
        for b, data in enumerate(mini_batches):
            if b >= batchcnt:
                break
            sentences, pos_lambda, tags, sentences_words, relation_tags, relation_names = data
            input_tensor, input_length = padding_sequence(
                sentences, pad_token=args.embedding_size)
            pos_tensor, input_length = padding_sequence(pos_lambda,
                                                        pad_token=0)
            target_tensor, target_length = padding_sequence(
                tags, pad_token=args.entity_tag_size)
            relation_target_tensor = padding_sequence_recurr(relation_tags)
            if torch.cuda.is_available():
                input_tensor = Variable(
                    torch.cuda.LongTensor(input_tensor, device=device)).cuda()
                target_tensor = Variable(
                    torch.cuda.LongTensor(target_tensor,
                                          device=device)).cuda()
                pos_tensor = Variable(
                    torch.cuda.FloatTensor(pos_tensor, device=device)).cuda()
                relation_target_tensor = Variable(
                    torch.cuda.LongTensor(relation_target_tensor,
Ejemplo n.º 3
0
def trainEpoches(encoder,
                 decoder,
                 criterion,
                 print_every=10,
                 learning_rate=0.001,
                 l2=0.0001):
    start = time.time()
    out_losses = []
    print_loss_total = 0  # Reset every print_every
    # plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.Adam(encoder.parameters(),
                                   lr=learning_rate,
                                   weight_decay=l2)  # SGD
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=learning_rate,
                                   weight_decay=l2)
    # training_pairs = [tensorsFromPair(random.choice(pairs))
    # 				  for i in range(n_iters)]

    # for iter in range(1, n_iters + 1):
    # training_pair = training_pairs[iter - 1]
    # for epoch in range(epoches):
    # i = 0
    mini_batches = get_minibatches(train_datasets, BATCH)
    batches_size = len(train_datasets[0]) // BATCH  # len(list(mini_batches))
    for i, data in enumerate(mini_batches):
        if i == batches_size:
            break
        # for i, data in enumerate(train_dataloader, 1):
        sentences, tags = data
        input_tensor, input_length = padding_sequence(sentences,
                                                      pad_token=EMBEDDING_SIZE)
        target_tensor, target_length = padding_sequence(tags,
                                                        pad_token=TAG_SIZE)
        if torch.cuda.is_available():
            input_tensor = Variable(
                torch.cuda.LongTensor(input_tensor, device=device)).cuda()
            target_tensor = Variable(
                torch.cuda.LongTensor(target_tensor, device=device)).cuda()
        else:
            input_tensor = Variable(
                torch.LongTensor(input_tensor, device=device))
            target_tensor = Variable(
                torch.LongTensor(target_tensor, device=device))

        loss = train(input_tensor, target_tensor, encoder, decoder,
                     encoder_optimizer, decoder_optimizer,
                     criterion)  # , input_length, target_length
        out_losses.append(loss)
        print_loss_total += loss
        # plot_loss_total += loss

        if i % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print(' (%d %d%%) %.4f' %
                  (i, float(i) / batches_size * 100, print_loss_avg))
            # print('%s (%d %d%%) %.4f' % (timeSince(start, float(i) / batches_size),
            # i, float(i) / batches_size * 100, print_loss_avg))

        # plot_loss_avg = plot_loss_total / plot_every
        # plot_losses.append(plot_loss_avg)
        # plot_loss_total = 0
        # i += 1
    np.save("loss", out_losses)
    if epoch % 10 == 0:
        model_name = "./model/model_encoder_epoch" + str(epoch) + ".pkl"
        torch.save(encoder, model_name)
        model_name = "./model/model_decoder_epoch" + str(epoch) + ".pkl"
        torch.save(decoder, model_name)
        print("Model has been saved")