コード例 #1
0
def train(model, data_loaders, word_vocab, wordpiece_vocab, hierarchy, epoch_start = 1):

	logger.info("Training model.")
	
	# Set up a new Bert Client, for encoding the wordpieces
	if cf.EMBEDDING_MODEL == "bert":
		bc = BertClient()
	else:
		bc = None

	modelEvaluator = ModelEvaluator(model, data_loaders['dev'], word_vocab, wordpiece_vocab, hierarchy, bc)
	
	#optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=cf.LEARNING_RATE, momentum=0.9)
	optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=cf.LEARNING_RATE)#, eps=1e-4, amsgrad=True)#, momentum=0.9)
	model.cuda()


	num_batches = len(data_loaders["train"])
	print(num_batches)
	progress_bar = ProgressBar(num_batches = num_batches, max_epochs = cf.MAX_EPOCHS, logger = logger)
	avg_loss_list = []

	# Train the model

	for epoch in range(epoch_start, cf.MAX_EPOCHS + 1):
		epoch_start_time = time.time()
		epoch_losses = []

		if cf.TASK == "end_to_end":
			if cf.BATCH_SIZE != 10:
				print("Warning: batch size must currently be set to 10 for the end-to-end model.")
			for (i, (batch_x, batch_y, batch_z, _, batch_tx, batch_ty, _)) in enumerate(data_loaders["train"]):


				if len(batch_x) < cf.BATCH_SIZE:
					continue

				batch_y = batch_y.float().to(device)
				batch_z = batch_z.float().to(device)

				model.zero_grad()
				model.train()

				#if i > 1:
				#	continue
				# 1. Convert the batch_x from wordpiece ids into wordpieces
				if cf.EMBEDDING_MODEL == "bert":
					wordpieces = batch_to_wordpieces(batch_x, wordpiece_vocab)


			
					# 2. Encode the wordpieces into Bert vectors
					bert_embs  = wordpieces_to_bert_embs(wordpieces, bc)

					bert_embs = bert_embs.to(device)

					y_hat = model(bert_embs)

					loss = model.calculate_loss(y_hat, batch_x, batch_y, batch_z)
					
				elif cf.EMBEDDING_MODEL in ['random', 'glove', 'word2vec']:
					batch_tx_cuda = batch_tx.long().to(device)
					batch_ty = batch_ty.float().to(device)

					#print(batch_tx.size())

					y_hat = model(batch_tx_cuda)

					loss = model.calculate_loss(y_hat, batch_tx, batch_ty, batch_z)

				

				# 3. Feed these Bert vectors to our model
				

				

				# 4. Backpropagate
				loss.backward()
				optimizer.step()
				epoch_losses.append(loss)

				# 5. Draw the progress bar
				progress_bar.draw_bar(i, epoch, epoch_start_time)

		elif cf.TASK == "mention_level":
			for (i, (batch_xl, batch_xr, batch_xa, batch_xm, batch_y)) in enumerate(data_loaders["train"]):

				#torch.cuda.empty_cache()
				#if i > 1:
				#	continue
				# 1. Convert the batch_x from wordpiece ids into wordpieces
				wordpieces_l = batch_to_wordpieces(batch_xl, wordpiece_vocab)
				wordpieces_r = batch_to_wordpieces(batch_xr, wordpiece_vocab)
				#wordpieces_a = batch_to_wordpieces(batch_xa, wordpiece_vocab)
				wordpieces_m = batch_to_wordpieces(batch_xm, wordpiece_vocab)
	
				
				#print len(wordpieces_l[0]), len(wordpieces_r[0]), len(wordpieces_m[0])
				

				#print len(wordpieces_l[0]),  len(wordpieces_r[0]),  len(wordpieces_a[0]) ,  len(wordpieces_m[0])
				

				# 2. Encode the wordpieces into Bert vectors
				bert_embs_l  = wordpieces_to_bert_embs(wordpieces_l, bc).to(device)
				bert_embs_r  = wordpieces_to_bert_embs(wordpieces_r, bc).to(device)				
				#bert_embs_a  = wordpieces_to_bert_embs(wordpieces_a, bc).to(device)
				bert_embs_m  = wordpieces_to_bert_embs(wordpieces_m, bc).to(device)
				
				batch_y = batch_y.float().to(device)	

				# 3. Feed these Bert vectors to our model
				model.zero_grad()
				model.train()

				y_hat = model(bert_embs_l, bert_embs_r, None, bert_embs_m)

				loss = model.calculate_loss(y_hat, batch_y)

				# 4. Backpropagate
				loss.backward()
				optimizer.step()
				epoch_losses.append(loss)

				# 5. Draw the progress bar
				progress_bar.draw_bar(i, epoch, epoch_start_time)

					

				

		avg_loss = sum(epoch_losses) / float(len(epoch_losses))
		avg_loss_list.append(avg_loss)

		progress_bar.draw_completed_epoch(avg_loss, avg_loss_list, epoch, epoch_start_time)

		#logger.info(avg_loss)

		modelEvaluator.evaluate_every_n_epochs(1, epoch)
コード例 #2
0
def main():
    with open("models/%s/params.txt" % cf.MODEL_NAME, "w") as f:
        f.write("\n".join(
            ["%s : %s" % (k, cf.__dict__[k]) for k in cf.__dict__]))

    progress_bar = ProgressBar()
    data_iterators, word_embeddings, char_embeddings, word_to_ix, ix_to_word, wtag_to_ix, ix_to_wtag, char_to_ix, ix_to_char, ctag_to_ix, ix_to_ctag = load_data(
    )
    logger.info("Building model...")

    if cf.GRANULARITY == CHAR_LEVEL:
        model_class = CharLSTMTagger
    elif cf.GRANULARITY == WORD_LEVEL:
        model_class = WordLSTMTagger
    elif cf.GRANULARITY == CHAR_AND_WORD_LEVEL:
        model_class = CombinedLSTMTagger
    if cf.GRANULARITY == WORD_LEVEL and cf.WORD_LEVEL_WITH_FLAGGER:
        model_class = WordTaggerWithFlagger
    if cf.GRANULARITY == WORD_LEVEL and cf.EMBEDDING_MODEL == "Bert":
        model_class = FeedForwardBert

    #counter = 0
    #for w in word_embeddings:
    #	if w[0] == 0:
    #		counter+= 1
    #	print w[:5]
    #print counter, len(word_embeddings)
    #exit()

    model = model_class(
        cf.MODEL_TYPE, cf.WORD_EMBEDDING_DIM, cf.CHAR_EMBEDDING_DIM,
        cf.HIDDEN_DIM, len(char_to_ix), len(ix_to_word),
        len(wtag_to_ix) if cf.GRANULARITY == WORD_LEVEL else len(ctag_to_ix),
        cf.BATCH_SIZE, cf.MAX_WORD_LENGTH, cf.MAX_SENT_LENGTH, word_embeddings,
        char_embeddings)
    # Ensure the word embeddings aren't modified during training

    epoch_start = 1
    #model.load_state_dict(torch.load('models/%s/model_trained/epoch_90' % cf.MODEL_NAME))
    #epoch_start = 90

    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 model.parameters()),
                          lr=cf.LEARNING_RATE,
                          momentum=0.9)
    model.cuda()
    #if(cf.LOAD_PRETRAINED_MODEL):
    #	model.load_state_dict(torch.load('asset/model_trained'))
    #else:
    num_batches = len(data_iterators["train"])
    avg_loss_list = []  # A place to store the loss history
    best_f1 = [0.0, -1]  # F1, epoch number
    for epoch in range(epoch_start, cf.MAX_EPOCHS + 1):
        epoch_start_time = time.time()
        epoch_losses = []
        for (i, (batch_w, batch_x, batch_y,
                 batch_f)) in enumerate(data_iterators["train"]):
            #if i > 1:
            #	continue
            # Ignore batch if it is not the same size as the others (happens at the end sometimes)

            if len(batch_w) != cf.BATCH_SIZE:
                print(batch_w)
                print(len(batch_w))
                logger.warn(
                    "A batch did not have the correct number of sentences.")
                continue

            # Ignore batch if it is not the same size as the others (happens at the end sometimes)
            if len(batch_x) != cf.BATCH_SIZE:
                print(len(batch_x))
                logger.warn(
                    "A batch did not have the correct number of words.")
                continue

            batch_w = batch_w.to(device)
            batch_x = batch_x.to(device)
            if cf.WORD_LEVEL_WITH_FLAGGER:
                batch_f = batch_f.to(device)

            # Step 1. Remember that Pytorch accumulates gradients.
            # We need to clear them out before each instance
            model.zero_grad()

            batch_x_lengths = []
            for x in batch_x:
                batch_x_lengths.append(np.nonzero(x).size(0))

            batch_w_lengths = []
            for w in batch_w:
                batch_w_lengths.append(np.nonzero(w).size(0))

            #print batch_x
            #print batch_y

            # Step 3. Run our forward pass.
            model.train()
            if cf.WORD_LEVEL_WITH_FLAGGER:
                tag_scores, tag_scores_f = model(batch_f, batch_x,
                                                 batch_w_lengths,
                                                 batch_x_lengths)
                loss = model.calculate_loss(tag_scores, tag_scores_f, batch_y,
                                            batch_f)
            else:
                tag_scores = model(batch_w, batch_x, batch_w_lengths,
                                   batch_x_lengths)
                loss = model.calculate_loss(tag_scores, batch_y)

            loss.backward()
            optimizer.step()
            epoch_losses.append(loss)
            progress_bar.draw_bar(i, epoch, num_batches, cf.MAX_EPOCHS,
                                  epoch_start_time)

        avg_loss = sum(epoch_losses) / float(len(epoch_losses))
        avg_loss_list.append(avg_loss)

        progress_bar.draw_completed_epoch(avg_loss, avg_loss_list, epoch,
                                          cf.MAX_EPOCHS, epoch_start_time)

        if epoch % 10 == 0 or epoch == cf.MAX_EPOCHS:
            f1 = evaluate_model(model,
                                data_iterators["test"],
                                word_to_ix,
                                ix_to_word,
                                wtag_to_ix,
                                ix_to_wtag,
                                char_to_ix,
                                ix_to_char,
                                ctag_to_ix,
                                ix_to_ctag,
                                epoch,
                                print_output=True)
            if f1 > best_f1[0]:
                best_f1 = [f1, epoch]
                logger.info("New best F1 score achieved!")
                logger.info("Saving model...")
                model_filename = "models/%s/model_trained/epoch_%d" % (
                    cf.MODEL_NAME, epoch)
                torch.save(model.state_dict(), model_filename)
                logger.info("Model saved to %s." % model_filename)
            elif epoch - best_f1[1] >= 50:
                logger.info(
                    "No improvement to F1 score in past 50 epochs. Stopping early."
                )
                logger.info("Best F1 Score: %.4f" % best_f1[0])
                return
コード例 #3
0
def train(model,
          data_loader_train,
          data_loader_dev,
          dataset_dev,
          ground_truth_triples,
          epoch_start=1):

    logger.info("Training model.")

    modelEvaluator = ModelEvaluator(model, data_loader_dev, dataset_dev,
                                    ground_truth_triples, cf)

    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=cf.LEARNING_RATE)  #, momentum=0.9)
    model.cuda()

    num_batches = len(data_loader_train)
    progress_bar = ProgressBar(num_batches=num_batches,
                               max_epochs=cf.MAX_EPOCHS,
                               logger=logger)
    avg_loss_list = []

    # Train the model
    for epoch in range(epoch_start, cf.MAX_EPOCHS + 1):
        epoch_start_time = time.time()
        epoch_losses = []

        for (i, (batch_idx, batch_doc_idx, batch_d, batch_h, batch_r, batch_t,
                 batch_y)) in enumerate(data_loader_train):

            # 1. Place each component onto CUDA
            batch_d = batch_d.to(device)
            batch_h = batch_h.to(device)
            batch_r = batch_r.to(device)
            batch_t = batch_t.to(device)
            batch_y = batch_y.float().to(device)

            # 2. Feed these Bert vectors to our model
            model.zero_grad()
            model.train()

            y_hat = model(batch_d, batch_h, batch_r, batch_t)

            # 3. Calculate the loss via BCE
            loss = model.calculate_loss(y_hat, batch_y)

            # 4. Backpropagate
            loss.backward()
            optimizer.step()
            epoch_losses.append(loss)

            # 5. Draw the progress bar
            progress_bar.draw_bar(i, epoch, epoch_start_time)

        avg_loss = sum(epoch_losses) / float(len(epoch_losses))
        avg_loss_list.append(avg_loss)

        progress_bar.draw_completed_epoch(avg_loss, avg_loss_list, epoch,
                                          epoch_start_time)
        modelEvaluator.evaluate_every_n_epochs(1, epoch)
コード例 #4
0
def train(model,
          data_loaders,
          word_vocab,
          wordpiece_vocab,
          hierarchy,
          ground_truth_triples,
          epoch_start=1):

    logger.info("Training model.")

    # Set up a new Bert Client, for encoding the wordpieces
    bc = BertClient()

    modelEvaluator = ModelEvaluator(model, data_loaders['dev'], word_vocab,
                                    wordpiece_vocab, hierarchy,
                                    ground_truth_triples, cf)

    #optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=cf.LEARNING_RATE, momentum=0.9)
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=cf.LEARNING_RATE)  #, momentum=0.9)
    model.cuda()
    print(cf.LEARNING_RATE)

    num_batches = len(data_loaders["train"])
    max_epochs = 1000
    progress_bar = ProgressBar(num_batches=num_batches,
                               max_epochs=max_epochs,
                               logger=logger)
    avg_loss_list = []

    # Train the model

    for epoch in range(epoch_start, max_epochs + 1):
        epoch_start_time = time.time()
        epoch_losses = []

        for (i, (batch_x, batch_y, batch_z, _, batch_tx, _,
                 _)) in enumerate(data_loaders["train"]):

            if len(batch_x) < cf.BATCH_SIZE:
                continue

            # 1. Convert wordpiece ids into wordpiece tokens
            wordpieces = batch_to_wordpieces(batch_x, wordpiece_vocab)
            wordpiece_embs = wordpieces_to_bert_embs(wordpieces, bc)

            # 2. Create sin embeddings and concatenate them to the bert embeddings

            wordpiece_embs = wordpiece_embs.to(device)
            batch_y = batch_y.float().to(device)
            batch_z = batch_z.float().to(device)

            # 3. Feed these vectors to our model

            if cf.POSITIONAL_EMB_DIM > 0:
                sin_embs = SinusoidalPositionalEmbedding(
                    embedding_dim=cf.POSITIONAL_EMB_DIM,
                    padding_idx=0,
                    left_pad=True)
                sin_embs = sin_embs(
                    torch.ones([batch_x.size()[0],
                                batch_x.size()[1]])).to(device)
                joined_embs = torch.cat((wordpiece_embs, sin_embs), dim=2)
            else:
                joined_embs = wordpiece_embs

            # if len(batch_x) < cf.BATCH_SIZE:
            # 	zeros = torch.zeros((cf.BATCH_SIZE - len(batch_x), joined_embs.size()[1], joined_embs.size()[2])).to(device)
            # 	joined_embs = torch.cat((joined_embs, zeros), dim=0)
            # 	print(joined_embs)
            # 	print(joined_embs.size())

            model.zero_grad()
            model.train()

            y_hat = model(joined_embs)

            loss = model.calculate_loss(y_hat, batch_x, batch_y, batch_z)

            # 4. Backpropagate
            loss.backward()
            optimizer.step()
            epoch_losses.append(loss)

            # 5. Draw the progress bar
            progress_bar.draw_bar(i, epoch, epoch_start_time)

        avg_loss = sum(epoch_losses) / float(len(epoch_losses))
        avg_loss_list.append(avg_loss)

        progress_bar.draw_completed_epoch(avg_loss, avg_loss_list, epoch,
                                          epoch_start_time)

        modelEvaluator.evaluate_every_n_epochs(1, epoch)
コード例 #5
0
def main():
    progress_bar = ProgressBar()
    data_iterator, glove_embeddings, word_to_ix, ix_to_word = load_data()
    logger.info("Building model...")
    model = LSTMTagger(cf.EMBEDDING_DIM, cf.HIDDEN_DIM, len(word_to_ix),
                       cf.BATCH_SIZE, cf.MAX_SENT_LENGTH, glove_embeddings)
    # Ensure the word embeddings aren't modified during training
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 model.parameters()),
                          lr=0.1)
    model.cuda()
    #if(cf.LOAD_PRETRAINED_MODEL):
    #	model.load_state_dict(torch.load('asset/model_trained'))
    #else:
    num_batches = len(data_iterator)
    loss_list = []  # A place to store the loss history
    for epoch in range(1, cf.MAX_EPOCHS + 1):
        epoch_start_time = time.time()
        for (i, (batch_x, batch_y)) in enumerate(data_iterator):
            # Ignore batch if it is not the same size as the others (happens at the end sometimes)
            if len(batch_x) != cf.BATCH_SIZE:
                continue
            batch_x = batch_x.to(device)
            # Step 1. Remember that Pytorch accumulates gradients.
            # We need to clear them out before each instance
            model.zero_grad()

            # Also, we need to clear out the hidden state of the LSTM,
            # detaching it from its history on the last instance.
            model.hidden = model.init_hidden()

            # Step 2. Get our inputs ready for the network, that is, turn them into
            # Tensors of word indices.
            #sentence_in = prepare_sequence(sentence, word_to_ix)
            #target = torch.tensor([word_to_ix[tag]], dtype=torch.long, device=device)

            batch_x_lengths = []
            for x in batch_x:
                batch_x_lengths.append(len(x))

            # Step 3. Run our forward pass.
            tag_scores = model(batch_x, batch_x_lengths)

            #loss = loss_function(tag_scores, batch_y)
            loss = modified_loss(tag_scores, batch_y, batch_x_lengths,
                                 word_to_ix)

            loss.backward()
            optimizer.step()
            progress_bar.draw_bar(i, epoch, num_batches, cf.MAX_EPOCHS,
                                  epoch_start_time)

        progress_bar.draw_completed_epoch(loss, loss_list, epoch,
                                          cf.MAX_EPOCHS, epoch_start_time)

        loss_list.append(loss)
        if epoch % 10 == 0:
            avg_loss = sum([l for l in loss_list[epoch - 10:]]) / 10
            logger.info("Average loss over past 10 epochs: %.6f" % avg_loss)
            if epoch >= 20:
                prev_avg_loss = sum(
                    [l for l in loss_list[epoch - 20:epoch - 10]]) / 10
                if (avg_loss >= prev_avg_loss):
                    logger.info(
                        "Average loss has not improved over past 10 epochs. Stopping early."
                    )
                    evaluate_model(model, ix_to_word)
                    break
        if epoch == 1 or epoch % 10 == 0 or epoch == cf.MAX_EPOCHS:
            evaluate_model(model, ix_to_word)

    logger.info("Saving model...")
    torch.save(model.state_dict(), "asset/model_trained")
    logger.info("Model saved to %s." % "asset/model_trained")