all_time_start = datetime.now() config = Config() corpus = Processing(config.train_file, config.pre_trained_embed_file) # train_sentences = Processing.data_handle(config.train_file) dev_sentences = Processing.data_handle(config.dev_file, True) test_sentences = Processing.data_handle(config.test_file, True) print("\n训练预料:") print("句子数:%d" % len(corpus.sentences)) print("词数:%d" % len(corpus.words)) print("字符数:%d" % len(corpus.chars)) print("词性数:%d" % len(corpus.tags)) lstm = LSTMTagger(corpus.word2id, corpus.char2id, corpus.tag2id, corpus.embedding_matrix, config.embed_dim, config.char_embed_dim, config.n_hidden) train_data_loader = lstm.get_loader(dataset=corpus.load(config.train_file), batch_size=config.batch_size, thread_num=config.thread_num, shuffle=config.shuffle) dev_data_loader = lstm.get_loader(dataset=corpus.load(config.dev_file), batch_size=config.batch_size, thread_num=config.thread_num, shuffle=config.shuffle) test_data_loader = lstm.get_loader(dataset=corpus.load(config.test_file), batch_size=config.batch_size, thread_num=config.thread_num, shuffle=config.shuffle) optimizer = torch.optim.Adam(lstm.parameters(), lr=config.learn_rate) loss_func = torch.nn.CrossEntropyLoss() torch.set_num_threads(config.thread_num)