train_loss = train_model(dataloader['train_dataloader'], model, criterion, optimizer) print('train_loss:', train_loss) writer.add_scalar('loss', train_loss, epoch) test_error = evaluate(dataloader['dev_dataloader'], model) print('testing error:', test_error) writer.add_scalar('test_error', test_error, epoch) if args.optimizer == 'SGD': scheduler.step() if test_error < best_error: early_stop = 0 best_error = test_error torch.save(model.state_dict(), model_name) else: early_stop += 1 epoch_end = time.time() cost_time = epoch_end - epoch_begin print('train {}th epoch cost {}m {}s'.format(epoch + 1, int(cost_time / 60), int(cost_time % 60))) print() if early_stop >= args.patience: exit(0) train_end = time.time()
torch.set_num_threads(args.thread) logger.debug(lstm_crf) # Task optimizer = optim.SGD(filter(lambda p: p.requires_grad, lstm_crf.parameters()), lr=args.lr, momentum=args.momentum) processor = SeqLabelProcessor(gpu=use_gpu) train_args = vars(args) train_args['word_embed_size'] = word_embed.num_embeddings state = { 'model': { 'word_embed': word_embed.state_dict(), 'char_embed': char_embed.state_dict(), 'char_hw': char_hw.state_dict(), 'lstm': lstm.state_dict(), 'crf': crf.state_dict(), 'linear': linear.state_dict(), 'lstm_crf': lstm_crf.state_dict() }, 'args': train_args, 'vocab': { 'token': token_vocab, 'label': label_vocab, 'char': char_vocab, } } try: global_step = 0
linear_dropout_prob=args.linear_dropout, char_highway=char_highway ) if use_gpu: torch.cuda.set_device(args.gpu_idx) lstm_crf.cuda() # Task optimizer = optim.SGD(filter(lambda p: p.requires_grad, lstm_crf.parameters()), lr=args.lr, momentum=args.momentum) state = { 'model': { 'word_embed': word_embed.state_dict(), 'char_cnn': char_cnn.state_dict(), 'char_highway': char_highway.state_dict(), 'lstm': lstm.state_dict(), 'crf': crf.state_dict(), 'output_linear': output_linear.state_dict(), 'lstm_crf': lstm_crf.state_dict() }, 'args': vars(args), 'vocab': { 'token': token_vocab, 'label': label_vocab, 'char': char_vocab, } } try: