def test_training(): # get default dictionary dictionary: Dictionary = Dictionary.load('chars') # init forward LM with 128 hidden states and 1 layer language_model: LanguageModel = LanguageModel(dictionary, is_forward_lm=True, hidden_size=128, nlayers=1) # get the example corpus and process at character level in forward direction corpus: TextCorpus = TextCorpus('resources/corpora/lorem_ipsum', dictionary, language_model.is_forward_lm, character_level=True) # train the language model trainer: LanguageModelTrainer = LanguageModelTrainer( language_model, corpus) trainer.train('./results', sequence_length=10, mini_batch_size=10, max_epochs=5) # use the character LM as embeddings to embed the example sentence 'I love Berlin' char_lm_embeddings = CharLMEmbeddings('./results/best-lm.pt') sentence = Sentence('I love Berlin') char_lm_embeddings.embed(sentence) print(sentence[1].embedding.size()) # clean up results directory shutil.rmtree('./results', ignore_errors=True)
def test_train_language_model(results_base_path, resources_path): # get default dictionary dictionary: Dictionary = Dictionary.load('chars') # init forward LM with 128 hidden states and 1 layer language_model: LanguageModel = LanguageModel(dictionary, is_forward_lm=True, hidden_size=128, nlayers=1) # get the example corpus and process at character level in forward direction corpus: TextCorpus = TextCorpus(str(resources_path / 'corpora/lorem_ipsum'), dictionary, language_model.is_forward_lm, character_level=True) # train the language model trainer: LanguageModelTrainer = LanguageModelTrainer(language_model, corpus, test_mode=True) trainer.train(str(results_base_path), sequence_length=10, mini_batch_size=10, max_epochs=2) # use the character LM as embeddings to embed the example sentence 'I love Berlin' char_lm_embeddings = CharLMEmbeddings(str(results_base_path / 'best-lm.pt')) sentence = Sentence('I love Berlin') char_lm_embeddings.embed(sentence) text = language_model.generate_text(100) assert (text is not None) assert (len(text) == 100) loaded_language_model = LanguageModel.load_language_model( str(results_base_path / 'best-lm.pt')) assert (loaded_language_model.best_score < 100) # clean up results directory shutil.rmtree(results_base_path, ignore_errors=True)