def train(task1_data, task2_data, test_data, label_sent, label_mask, test_lines, epoch_num, model, loss_function, optimizer): '''combine train set and dev set''' ''' task1_data,task2_data,test_data, ''' training_data_index, training_masks, training_labels = task1_data training_data_task2_index, training_task2_masks, training_task2_labels, train_task2_other_labels = task2_data testing_data_index, testing_masks = test_data label_sent = autograd.Variable(torch.cuda.LongTensor(label_sent)) label_mask = autograd.Variable(torch.cuda.FloatTensor(label_mask)) iter = 0 for epoch in range(epoch_num): print('current epoch: ', epoch) minibatches_idx = get_minibatches_idx( len(training_data_index), minibatch_size=config['batch_size'], shuffle=False) minibatches_idx_task2 = get_minibatches_idx( len(training_data_task2_index), minibatch_size=config['batch_size'], shuffle=False) for i, minibatch in enumerate(minibatches_idx): model.train() '''这里是决定一个mibatch之后才去pad, 感觉有点低效''' sentence_batch, mask_batch, targets_batch = get_mask( training_data_index, training_masks, training_labels, minibatch) sentence_batch = autograd.Variable( torch.cuda.LongTensor(sentence_batch)) targets_batch = autograd.Variable( torch.cuda.FloatTensor(targets_batch)) mask_batch = autograd.Variable(torch.cuda.FloatTensor(mask_batch)) '''dim=-1好像是指the last dimension''' lengths_batch = mask_batch.sum(dim=-1) # is a list seq_lengths, seq_idx = lengths_batch.sort( 0, descending=True) # a list seq_lengths = seq_lengths.int().data.tolist() sentence_batch = sentence_batch[seq_idx] targets_batch = targets_batch[seq_idx] mask_batch = mask_batch[seq_idx] model.zero_grad() tag_scores, _ = model(sentence_batch, seq_lengths, mask_batch, label_sent, label_mask) '''Binary Cross Entropy''' temp_loss_matrix = torch_where( targets_batch[:, :-1].reshape(-1) < 1, 1.0 - tag_scores[:, :-1].reshape(-1), tag_scores[:, :-1].reshape(-1)) loss = -torch.mean(torch.log(temp_loss_matrix)) loss.backward() optimizer.step() '''task2''' if i < len(minibatches_idx_task2): model.train() minibatch_task2 = minibatches_idx_task2[i] '''这里是决定一个mibatch之后才去pad, 感觉有点低效''' sentence_batch, mask_batch, targets_batch, others_batch = get_mask_task2( training_data_task2_index, training_task2_masks, training_task2_labels, train_task2_other_labels, minibatch_task2) sentence_batch = autograd.Variable( torch.cuda.LongTensor(sentence_batch)) targets_batch = autograd.Variable( torch.cuda.FloatTensor(targets_batch)) mask_batch = autograd.Variable( torch.cuda.FloatTensor(mask_batch)) others_batch = autograd.Variable( torch.cuda.LongTensor(others_batch)) '''dim=-1好像是指the last dimension''' lengths_batch = mask_batch.sum(dim=-1) # is a list seq_lengths, seq_idx = lengths_batch.sort( 0, descending=True) # a list seq_lengths = seq_lengths.int().data.tolist() sentence_batch = sentence_batch[seq_idx] targets_batch = targets_batch[seq_idx] mask_batch = mask_batch[seq_idx] others_batch = others_batch[seq_idx] model.zero_grad() tag_scores, tag_scores_task2 = model(sentence_batch, seq_lengths, mask_batch, label_sent, label_mask) # print('tag_scores_task2:',tag_scores_task2) '''Binary Cross Entropy''' temp_loss_matrix = torch_where( targets_batch[:, :-1].reshape(-1) < 1, 1.0 - tag_scores[:, :-1].reshape(-1), tag_scores[:, :-1].reshape(-1)) loss_task1 = -torch.mean(torch.log(temp_loss_matrix)) '''task2 loss''' other_label_scores = tag_scores_task2.index_select( 1, others_batch.view(-1)) loss_task2 = -torch.mean(torch.log(other_label_scores)) # print('loss_task1:',loss_task1) # print('loss_task2:', loss_task2) loss = loss_task1 + loss_task2 loss.backward() optimizer.step() iter += 1 if iter % 200 == 0: print(iter, ' loss: ', loss) # if epoch == 3: # torch.save(model.state_dict(), 'models_'+str(iter)+'.pt') '''test after one epoch''' print('testing....') test(testing_data_index, testing_masks, model, label_sent, label_mask, test_lines) print('train over.')
def train(task1_data, task2_data, test_data, label_sent, label_mask, id2word, epoch_num, model, loss_function, optimizer): '''combine train set and dev set''' ''' task1_data,task2_data,test_data, ''' training_data_index, training_masks, training_labels = task1_data training_data_task2_index, training_task2_masks, training_task2_labels, train_task2_other_labels = task2_data testing_data_index, testing_masks, test_lines = test_data label_sent = autograd.Variable(torch.cuda.LongTensor(label_sent)) label_mask = autograd.Variable(torch.cuda.FloatTensor(label_mask)) print("training...") iter = 0 for epoch in range(epoch_num): print('current epoch: ', epoch) minibatches_idx = get_minibatches_idx( len(training_data_index), minibatch_size=config['batch_size'], shuffle=True) minibatches_idx_task2 = get_minibatches_idx( len(training_data_task2_index), minibatch_size=config['batch_size'], shuffle=True) for i, minibatch in enumerate(minibatches_idx): model.train() '''这里是决定一个mibatch之后才去pad, 感觉有点低效''' sentence_batch, mask_batch, targets_batch = get_mask( training_data_index, training_masks, training_labels, minibatch) sentence_batch = autograd.Variable( torch.cuda.LongTensor(sentence_batch)) targets_batch = autograd.Variable( torch.cuda.FloatTensor(targets_batch)) mask_batch = autograd.Variable(torch.cuda.FloatTensor(mask_batch)) '''dim=-1好像是指the last dimension''' lengths_batch = mask_batch.sum(dim=-1) # is a list seq_lengths, seq_idx = lengths_batch.sort( 0, descending=True) # a list seq_lengths = seq_lengths.int().data.tolist() sentence_batch = sentence_batch[seq_idx] targets_batch = targets_batch[seq_idx] mask_batch = mask_batch[seq_idx] model.zero_grad() '''Bert''' # sentence_numpy = sentence_batch.cpu().array() # bert_rep_batch = [] # for i in range(config['batch_size']): # sent_str = '' # for id in list(sentence_numpy[i]): # if id !=0: # sent_str+=id2word.get(id)+' ' sent_list = recover_pytorch_idmatrix_2_text( sentence_batch, id2word) bert_rep_batch = [] if use_bert: for sent in sent_list: bert_rep = sent_to_embedding_last4(sent, bert_tokenizer, bert_model, True) bert_rep_batch.append(bert_rep.reshape(1, -1)) bert_rep_batch = torch.cat(bert_rep_batch, 0) #(batch, 768) tag_scores, _ = model(sentence_batch, seq_lengths, mask_batch, label_sent, label_mask, bert_rep_batch) '''Binary Cross Entropy''' temp_loss_matrix = torch_where( targets_batch[:, :-1].reshape(-1) < 1, 1.0 - tag_scores[:, :-1].reshape(-1), tag_scores[:, :-1].reshape(-1)) loss = -torch.mean(torch.log(temp_loss_matrix)) loss.backward() optimizer.step() '''task2''' if i < len(minibatches_idx_task2): model.train() minibatch_task2 = minibatches_idx_task2[i] '''这里是决定一个mibatch之后才去pad, 感觉有点低效''' sentence_batch, mask_batch, targets_batch, others_batch = get_mask_task2( training_data_task2_index, training_task2_masks, training_task2_labels, train_task2_other_labels, minibatch_task2) sentence_batch = autograd.Variable( torch.cuda.LongTensor(sentence_batch)) targets_batch = autograd.Variable( torch.cuda.FloatTensor(targets_batch)) mask_batch = autograd.Variable( torch.cuda.FloatTensor(mask_batch)) others_batch = autograd.Variable( torch.cuda.LongTensor(others_batch)) '''dim=-1好像是指the last dimension''' lengths_batch = mask_batch.sum(dim=-1) # is a list seq_lengths, seq_idx = lengths_batch.sort( 0, descending=True) # a list seq_lengths = seq_lengths.int().data.tolist() sentence_batch = sentence_batch[seq_idx] targets_batch = targets_batch[seq_idx] mask_batch = mask_batch[seq_idx] others_batch = others_batch[seq_idx] model.zero_grad() sent_list = recover_pytorch_idmatrix_2_text( sentence_batch, id2word) bert_rep_batch = [] if use_bert: for sent in sent_list: bert_rep = sent_to_embedding_last4( sent, bert_tokenizer, bert_model, True) bert_rep_batch.append(bert_rep.reshape(1, -1)) bert_rep_batch = torch.cat(bert_rep_batch, 0) #(batch, 768) tag_scores, tag_scores_task2 = model(sentence_batch, seq_lengths, mask_batch, label_sent, label_mask, bert_rep_batch) # print('tag_scores_task2:',tag_scores_task2) '''Binary Cross Entropy''' temp_loss_matrix = torch_where( targets_batch[:, :-1].reshape(-1) < 1, 1.0 - tag_scores[:, :-1].reshape(-1), tag_scores[:, :-1].reshape(-1)) loss_task1 = -torch.mean(torch.log(temp_loss_matrix)) '''task2 loss''' other_label_scores = tag_scores_task2.index_select( 1, others_batch.view(-1)) loss_task2 = -torch.mean(torch.log(other_label_scores)) # print('loss_task1:',loss_task1) # print('loss_task2:', loss_task2) loss = loss_task1 + loss_task2 loss.backward() optimizer.step() iter += 1 if iter % 20 == 0: print(iter, ' loss: ', loss) # if epoch == 3: # torch.save(model.state_dict(), 'models_'+str(iter)+'.pt') '''test after one epoch''' # torch.save(model.state_dict(), save_model_path) # print('model saved succeed. train over') # return # else: if epoch > 18 and (epoch + 1) % 10 == 0: print('testing....') test(testing_data_index, testing_masks, test_lines, model, label_sent, label_mask, id2word)
def train(all_sentences, all_masks, all_labels, label_sent, label_mask, epoch_num, model, loss_function, optimizer): '''combine train set and dev set''' training_data_index, training_masks, training_labels = all_sentences[ 0] + all_sentences[1], all_masks[0] + all_masks[1], all_labels[ 0] + all_labels[1] testing_data_index, testing_masks, testing_labels = all_sentences[ 2], all_masks[2], all_labels[2] label_sent = autograd.Variable(torch.cuda.LongTensor(label_sent)) label_mask = autograd.Variable(torch.cuda.FloatTensor(label_mask)) iter = 0 for epoch in range(epoch_num): print('current epoch: ', epoch) minibatches_idx = get_minibatches_idx( len(training_data_index), minibatch_size=config['batch_size'], shuffle=False) #random.shuffle(minibatches_idx) for i, minibatch in minibatches_idx: model.train() '''这里是决定一个mibatch之后才去pad, 感觉有点低效''' sentence_batch, mask_batch, targets_batch = get_mask( training_data_index, training_masks, training_labels, minibatch) sentence_batch = autograd.Variable( torch.cuda.LongTensor(sentence_batch)) targets_batch = autograd.Variable( torch.cuda.FloatTensor(targets_batch)) mask_batch = autograd.Variable(torch.cuda.FloatTensor(mask_batch)) '''dim=-1好像是指the last dimension''' lengths_batch = mask_batch.sum(dim=-1) # is a list seq_lengths, seq_idx = lengths_batch.sort( 0, descending=True) # a list seq_lengths = seq_lengths.int().data.tolist() sentence_batch = sentence_batch[seq_idx] targets_batch = targets_batch[seq_idx] mask_batch = mask_batch[seq_idx] model.zero_grad() tag_scores = model(sentence_batch, seq_lengths, mask_batch, label_sent, label_mask) '''Binary Cross Entropy''' temp_loss_matrix = torch_where( targets_batch[:, :-1].reshape(-1) < 1, 1.0 - tag_scores[:, :-1].reshape(-1), tag_scores[:, :-1].reshape(-1)) loss = -torch.mean(torch.log(temp_loss_matrix)) # loss = loss_function(tag_scores[:,:-1].reshape(-1), targets_batch[:,:-1].reshape(-1)) # l2_name_set = set(['conv_1.weight', 'conv_2.weight', 'hidden2tag.weight', 'emb2hidden.weight']) # reg_loss = None # for name, param in model.named_parameters(): # if name in l2_name_set: # if reg_loss is None: # reg_loss = 0.5 * torch.sum(param**2) # else: # reg_loss = reg_loss + 0.5 * param.norm(2)**2 # loss+=reg_loss*1e-6 loss.backward() optimizer.step() iter += 1 if iter % 200 == 0: print(iter, ' loss: ', loss) # if epoch == 3: # torch.save(model.state_dict(), 'models_'+str(iter)+'.pt') '''test after one epoch''' print('testing....') test(testing_data_index, testing_masks, testing_labels, model, label_sent, label_mask) print('train over.')