'cl': optimizer_cl, 'ct': optimizer_ct, 'clct': optimizer_clct } processor = SeqLabelProcessor(gpu=use_gpu) train_args = vars(args) train_args['word_embed_size'] = word_embed_1.num_embeddings state = { 'model': { 'word_embed': word_embed_1.state_dict(), 'char_embed': char_embed.state_dict(), 'char_hw': char_hw.state_dict(), 'lstm': lstm.state_dict(), 'crf': crf_1.state_dict(), 'univ_linear': shared_linear_1.state_dict(), 'spec_linear': spec_linear_1_1.state_dict(), 'lstm_crf': lstm_crf_tgt.state_dict() }, 'args': train_args, 'vocab': { 'token': token_vocab_1, 'label': label_vocab_1, 'char': char_vocab, } } # Calculate mixing rates batch_num = len(train_set_tgt) // batch_size r_tgt = math.sqrt(len(train_set_tgt)) r_cl = 1.0 * .1 * math.sqrt(len(datasets['cl']['train']))
if use_gpu: torch.cuda.set_device(args.gpu_idx) lstm_crf.cuda() # Task optimizer = optim.SGD(filter(lambda p: p.requires_grad, lstm_crf.parameters()), lr=args.lr, momentum=args.momentum) state = { 'model': { 'word_embed': word_embed.state_dict(), 'char_cnn': char_cnn.state_dict(), 'char_highway': char_highway.state_dict(), 'lstm': lstm.state_dict(), 'crf': crf.state_dict(), 'output_linear': output_linear.state_dict(), 'lstm_crf': lstm_crf.state_dict() }, 'args': vars(args), 'vocab': { 'token': token_vocab, 'label': label_vocab, 'char': char_vocab, } } try: global_step = 0 best_dev_score = 0.0 for epoch in range(args.max_epoch):
'tgt': optimizer_tgt, 'cl': optimizer_cl, 'ct': optimizer_ct, 'clct': optimizer_clct } state = { 'model': { 'word_embed_1': word_embed_1.state_dict(), 'word_embed_2': word_embed_2.state_dict(), 'char_cnn': char_cnn.state_dict(), 'char_highway': char_highway.state_dict(), 'lstm': lstm.state_dict(), 'crf_1': crf_1.state_dict(), 'crf_2': crf_2.state_dict(), 'shared_output_linear_1': shared_output_linear_1.state_dict(), 'spec_output_linear_1_1': spec_output_linear_1_1.state_dict(), 'spec_output_linear_1_2': spec_output_linear_1_2.state_dict(), 'shared_output_linear_2': shared_output_linear_2.state_dict(), 'spec_output_linear_2_1': spec_output_linear_2_1.state_dict(), 'spec_output_linear_2_2': spec_output_linear_2_2.state_dict(), 'lstm_crf_tgt': lstm_crf_tgt.state_dict(), 'lstm_crf_cl': lstm_crf_cl.state_dict(), 'lstm_crf_ct': lstm_crf_ct.state_dict(), 'lstm_crf_clct': lstm_crf_clct.state_dict() }, 'args': vars(args), 'vocab': { 'token_1': token_vocab_1, 'token_2': token_vocab_2, 'label_1': label_vocab_1,