def train(model, iterator, optimizer, criterion): model.train() for i, batch in enumerate(iterator): words, x, is_heads, tags, y, seqlens = batch _y = y # for monitoring optimizer.zero_grad() logits, y, _ = model(x, y) # logits: (N, T, VOCAB), y: (N, T) logits = logits.view(-1, logits.shape[-1]) # (N*T, VOCAB) y = y.view(-1) # (N*T,) loss = criterion(logits, y) loss.backward() optimizer.step() if i==0: print("=====sanity check======") print("words:", words[0]) print("x:", x.cpu().numpy()[0][:seqlens[0]]) print("tokens:", tokenizer.convert_ids_to_tokens(x.cpu().numpy()[0])[:seqlens[0]]) print("is_heads:", is_heads[0]) print("y:", _y.cpu().numpy()[0][:seqlens[0]]) print("tags:", tags[0]) print("seqlen:", seqlens[0]) print("=======================") if i%10==0: # monitoring print(f"step: {i}, loss: {loss.item()}")
def train(model, iterator, optimizer, criterion, sanity_check=False): model.train() print("=======================") print("Training started at {}".format(time.asctime(time.localtime(time.time())))) for i, batch in enumerate(iterator): words, x, is_heads, tags, y, seqlens = batch _y = y # for monitoring optimizer.zero_grad() logits, y, _ = model(x, y) # logits: (N, T, VOCAB), y: (N, T) logits = logits.view(-1, logits.shape[-1]) # (N*T, VOCAB) y = y.view(-1) # (N*T,) loss = criterion(logits, y) loss.backward() optimizer.step() if sanity_check: if i==0: print("=====sanity check======") print("words:", words[0]) print("x:", x.cpu().numpy()[0][:seqlens[0]]) print("tokens:", tokenizer.convert_ids_to_tokens(x.cpu().numpy()[0])[:seqlens[0]]) print("is_heads:", is_heads[0]) print("y:", _y.cpu().numpy()[0][:seqlens[0]]) print("tags:", tags[0]) print("seqlen:", seqlens[0]) print("=======================") if i%100==0: # monitoring print(f"time: {time.asctime(time.localtime(time.time()))}, step: {i}, loss: {loss.item()}") print(f"Finished {i} steps training at {time.asctime(time.localtime(time.time()))}\n") return loss.item()
def train(model, iterator, optimizer, criterion): model.train() for i, batch in enumerate(iterator): tokens_x_2d, entities_x_3d, triggers_y_2d, arguments_y_2d, seqlens_1d, is_heads_2d, words_2d, triggers_2d = batch optimizer.zero_grad() logits, y, y_hat = model(tokens_x_2d, entities_x_3d, triggers_y_2d) logits = logits.view(-1, logits.shape[-1]) y = y.view(-1) loss = criterion(logits, y) nn.utils.clip_grad_norm_(model.parameters(), 1.0) loss.backward() optimizer.step() if i == 0: print("=====sanity check======") print("tokens:", tokenizer.convert_ids_to_tokens(tokens_x_2d[0])[:seqlens_1d[0]]) print("entities_x_3d:", entities_x_3d[0][:seqlens_1d[0]]) print("is_heads:", is_heads_2d[0]) print("triggers:", triggers_2d[0]) print("triggers_y:", triggers_y_2d[0][:seqlens_1d[0]]) print('triggers_y_hat:', y_hat.cpu().numpy().tolist()[0][:seqlens_1d[0]]) print("seqlen:", seqlens_1d[0]) print("=======================") if i % 10 == 0: # monitoring print("step: {}, loss: {}".format(i, loss.item()))
def train(model, iterator, optimizer, criterion): model.train() for i, batch in enumerate(iterator): tokens_x_2d, entities_x_3d, postags_x_2d, triggers_y_2d, arguments_2d, seqlens_1d, head_indexes_2d, words_2d, triggers_2d = batch optimizer.zero_grad() trigger_logits, triggers_y_2d, trigger_hat_2d, argument_hidden, argument_keys = model.module.predict_triggers( tokens_x_2d=tokens_x_2d, entities_x_3d=entities_x_3d, postags_x_2d=postags_x_2d, head_indexes_2d=head_indexes_2d, triggers_y_2d=triggers_y_2d, arguments_2d=arguments_2d) trigger_logits = trigger_logits.view(-1, trigger_logits.shape[-1]) trigger_loss = criterion(trigger_logits, triggers_y_2d.view(-1)) if len(argument_keys) > 0: argument_logits, arguments_y_1d, argument_hat_1d, argument_hat_2d = model.module.predict_arguments( argument_hidden, argument_keys, arguments_2d) argument_loss = criterion(argument_logits, arguments_y_1d) loss = trigger_loss + 2 * argument_loss if i == 0: print("=====sanity check for arguments======") print('arguments_y_1d:', arguments_y_1d) print("arguments_2d[0]:", arguments_2d[0]['events']) print("argument_hat_2d[0]:", argument_hat_2d[0]['events']) print("=======================") else: loss = trigger_loss nn.utils.clip_grad_norm_(model.parameters(), 1.0) loss.backward() optimizer.step() if i == 0: print("=====sanity check======") print( "tokens_x_2d[0]:", tokenizer.convert_ids_to_tokens( tokens_x_2d[0])[:seqlens_1d[0]]) print("entities_x_3d[0]:", entities_x_3d[0][:seqlens_1d[0]]) print("postags_x_2d[0]:", postags_x_2d[0][:seqlens_1d[0]]) print("head_indexes_2d[0]:", head_indexes_2d[0][:seqlens_1d[0]]) print("triggers_2d[0]:", triggers_2d[0]) print("triggers_y_2d[0]:", triggers_y_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]]) print('trigger_hat_2d[0]:', trigger_hat_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]]) print("seqlens_1d[0]:", seqlens_1d[0]) print("arguments_2d[0]:", arguments_2d[0]) print("=======================") if i % 10 == 0: # monitoring print("step: {}, loss: {}".format(i, loss.item()))
def train(model, iterator, optimizer, criterio): model.train() # batch_size 24 step 600 for i, batch in enumerate(iterator): # tokens_x_2d, entities_x_3d, postags_x_2d, triggers_y_2d, arguments_2d, seqlens_1d, head_indexes_2d, words_2d, triggers_2d, \ pre_sent_tokens_x, next_sent_tokens_x, pre_sent_len, next_sent_len, maxlen = batch # maxlen = max(seqlens_1d) # pre_sent_len_max = max(pre_sent_len) # next_sent_len_max = max(next_sent_len) pre_sent_flags = [] next_sent_flags = [] pre_sent_len_mat = [] next_sent_len_mat = [] for i in pre_sent_len: tmp = [[1] * 768] * i + [[0] * 768] * (maxlen - i) pre_sent_flags.append(tmp) pre_sent_len_mat.append([i] * 768) for i in next_sent_len: tmp = [[1] * 768] * i + [[0] * 768] * (maxlen - i) next_sent_flags.append(tmp) next_sent_len_mat.append([i] * 768) optimizer.zero_grad() # trigger_logits, triggers_y_2d, trigger_hat_2d, argument_hidden, argument_keys = model.module.predict_triggers(tokens_x_2d=tokens_x_2d, entities_x_3d=entities_x_3d, trigger_logits, triggers_y_2d, trigger_hat_2d = model.predict_triggers( tokens_x_2d=tokens_x_2d, entities_x_3d=entities_x_3d, postags_x_2d=postags_x_2d, head_indexes_2d=head_indexes_2d, triggers_y_2d=triggers_y_2d, arguments_2d=arguments_2d, pre_sent_tokens_x=pre_sent_tokens_x, next_sent_tokens_x=next_sent_tokens_x, pre_sent_flags=pre_sent_flags, next_sent_flags=next_sent_flags, pre_sent_len_mat=pre_sent_len_mat, next_sent_len_mat=next_sent_len_mat) # print("trigger_logits 1 " + "="*100) # print(trigger_logits.shape) trigger_logits = trigger_logits.view(-1, trigger_logits.shape[-1]) print("trigger_check " + "=" * 100) print(torch.argmax(trigger_logits, 1).to('cpu').numpy().tolist()) print(triggers_y_2d.view(1, -1).to('cpu').numpy().tolist()) # print("trigger_logits 2 " + "="*100) # print(trigger_logits.shape) # print("triggers_y_2d 1 " + "="*100) # print(triggers_y_2d.shape) # print("triggers_y_2d 2 " + "=" * 100) # print(triggers_y_2d.view(-1).shape) # 这里计算损失函数很有意思,本来model输出的结果trigger_logits: batch_size * sentence_len * 分类数 ,标准结果是 batch_size * sentence_len; 这里句子中每个词对应一个标签 # 然后直接将 batch_size和sentence合在一起了,不再区分词是哪个句子中的,直接计算整个batch中词分类的效果 # CrossEntropyLoss的输入: 预测结果(2维): batch_size * 分类数 标准结果(一维): batch_size ; 当然具体在这里不是batch_size 而是 batch_size * sentence_len trigger_loss = criterion(trigger_logits, triggers_y_2d.view(-1)) # print("triggers_loss 1 " + "=" * 100) # print(trigger_loss.shape) loss = trigger_loss # 梯度裁剪 # nn.utils.clip_grad_norm_(model.parameters(), 1.0) loss.backward() optimizer.step() if i == 0: print("=====sanity check======") print( "tokens_x_2d[0]:", tokenizer.convert_ids_to_tokens( tokens_x_2d[0])[:seqlens_1d[0]]) print("entities_x_3d[0]:", entities_x_3d[0][:seqlens_1d[0]]) print("postags_x_2d[0]:", postags_x_2d[0][:seqlens_1d[0]]) print("head_indexes_2d[0]:", head_indexes_2d[0][:seqlens_1d[0]]) print("triggers_2d[0]:", triggers_2d[0]) print("triggers_y_2d[0]:", triggers_y_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]]) print('trigger_hat_2d[0]:', trigger_hat_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]]) print("seqlens_1d[0]:", seqlens_1d[0]) print("arguments_2d[0]:", arguments_2d[0]) print("=======================") if i % 10 == 0: # monitoring print("step: {}, loss: {}".format(i, loss.item()))
def train(model, iterator, optimizer, criterion): model.train() decision_criterion = nn.BCEWithLogitsLoss() for i, batch in enumerate(iterator): tokens_x_2d, entities_x_3d, postags_x_2d, triggers_y_2d, arguments_2d, seqlens_1d, head_indexes_2d, words_2d, triggers_2d = batch optimizer.zero_grad() trigger_logits, triggers_y_2d, trigger_hat_2d, argument_hidden, argument_keys, trigger_info, auxiliary_feature = model.module.predict_triggers(tokens_x_2d=tokens_x_2d, entities_x_3d=entities_x_3d, postags_x_2d=postags_x_2d, head_indexes_2d=head_indexes_2d, triggers_y_2d=triggers_y_2d, arguments_2d=arguments_2d) trigger_logits = trigger_logits.view(-1, trigger_logits.shape[-1]) trigger_loss = criterion(trigger_logits, triggers_y_2d.view(-1)) loss = trigger_loss # if len(argument_keys) > 0: # argument_logits, arguments_y_1d, argument_hat_1d, argument_hat_2d = model.module.predict_arguments(argument_hidden, argument_keys, arguments_2d) # argument_loss = criterion(argument_logits, arguments_y_1d) # input('Look at batch shape') # print(arguments_y_1d.shape) # input('The shape of argument y 1d') # loss = trigger_loss + 2 * argument_loss # if i == 0: # print("=====sanity check for arguments======") # print('arguments_y_1d:', arguments_y_1d) # print("arguments_2d[0]:", arguments_2d[0]['events']) # print("argument_hat_2d[0]:", argument_hat_2d[0]['events']) # print("=======================") # else: # loss = trigger_loss for module_arg in ARGUMENTS: if len(argument_keys) > 0: argument_logits, arguments_y_1d, argument_hat_1d, argument_hat_2d = model.module.module_predict_arguments(argument_hidden, argument_keys, arguments_2d, module_arg) argument_loss = criterion(argument_logits, arguments_y_1d) loss += 2 * argument_loss # meta classifier module_decisions_logit, module_decisions_y, argument_hat_2d = model.module.meta_classifier(argument_keys, arguments_2d, trigger_info, argument_logits, argument_hat_1d, auxiliary_feature, module_arg) module_decisions_logit = module_decisions_logit.view(-1) # print('Module decision logit \n {}'.format(module_decisions_logit)) # print('Module decision true \n {}'.format(module_decisions_y)) # print('module decision logit shape={}'.format(module_decisions_logit.shape)) # print('module decision true shape={}'.format(module_decisions_y.shape)) # input('') decision_loss = decision_criterion(module_decisions_logit, module_decisions_y) loss += 2 * decision_loss if i == 0: print("=====sanity check for arguments======") print('arguments_y_1d:', arguments_y_1d) print("arguments_2d[0]:", arguments_2d[0]['events']) print("argument_hat_2d[0]:", argument_hat_2d[0]['events']) print("module decision y = {}".format(module_decisions_y)) print("module decision pred = {}".format(torch.round(torch.sigmoid(module_decisions_logit)))) print("=======================") #else: #loss = trigger_loss nn.utils.clip_grad_norm_(model.parameters(), 1.0) loss.backward() optimizer.step() if i == 0: print("=====sanity check======") print("tokens_x_2d[0]:", tokenizer.convert_ids_to_tokens(tokens_x_2d[0])[:seqlens_1d[0]]) print("entities_x_3d[0]:", entities_x_3d[0][:seqlens_1d[0]]) print("postags_x_2d[0]:", postags_x_2d[0][:seqlens_1d[0]]) print("head_indexes_2d[0]:", head_indexes_2d[0][:seqlens_1d[0]]) print("triggers_2d[0]:", triggers_2d[0]) print("triggers_y_2d[0]:", triggers_y_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]]) print('trigger_hat_2d[0]:', trigger_hat_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]]) print("seqlens_1d[0]:", seqlens_1d[0]) print("arguments_2d[0]:", arguments_2d[0]) print("=======================") if i % 100 == 0: # monitoring print("step: {}, loss: {}".format(i, loss.item()))