コード例 #1
0
    all_time_start = datetime.now()
    config = Config()
    corpus = Processing(config.train_file, config.pre_trained_embed_file)
    # train_sentences = Processing.data_handle(config.train_file)
    dev_sentences = Processing.data_handle(config.dev_file, True)
    test_sentences = Processing.data_handle(config.test_file, True)
    print("\n训练预料:")
    print("句子数:%d" % len(corpus.sentences))
    print("词数:%d" % len(corpus.words))
    print("字符数:%d" % len(corpus.chars))
    print("词性数:%d" % len(corpus.tags))

    lstm = LSTMTagger(corpus.word2id, corpus.char2id, corpus.tag2id,
                      corpus.embedding_matrix, config.embed_dim,
                      config.char_embed_dim, config.n_hidden)
    train_data_loader = lstm.get_loader(dataset=corpus.load(config.train_file),
                                        batch_size=config.batch_size,
                                        thread_num=config.thread_num,
                                        shuffle=config.shuffle)
    dev_data_loader = lstm.get_loader(dataset=corpus.load(config.dev_file),
                                      batch_size=config.batch_size,
                                      thread_num=config.thread_num,
                                      shuffle=config.shuffle)
    test_data_loader = lstm.get_loader(dataset=corpus.load(config.test_file),
                                       batch_size=config.batch_size,
                                       thread_num=config.thread_num,
                                       shuffle=config.shuffle)
    optimizer = torch.optim.Adam(lstm.parameters(), lr=config.learn_rate)
    loss_func = torch.nn.CrossEntropyLoss()

    torch.set_num_threads(config.thread_num)