def test_load_ag_news_data(tasks_base_path): # get training, test and dev data corpus = NLPTaskDataFetcher.load_corpus(NLPTask.AG_NEWS, tasks_base_path) assert len(corpus.train) == 10 assert len(corpus.dev) == 10 assert len(corpus.test) == 10
def test_load_sequence_labeling_data(tasks_base_path): # get training, test and dev data corpus = NLPTaskDataFetcher.load_corpus(NLPTask.FASHION, tasks_base_path) assert len(corpus.train) == 6 assert len(corpus.dev) == 1 assert len(corpus.test) == 1
def test_text_classifier_param_selector(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus(u'imdb', base_path=tasks_base_path) glove_embedding = WordEmbeddings(u'en-glove') search_space = SearchSpace() search_space.add(Parameter.EMBEDDINGS, hp.choice, options=[[glove_embedding]]) search_space.add(Parameter.HIDDEN_SIZE, hp.choice, options=[64, 128, 256, 512]) search_space.add(Parameter.RNN_LAYERS, hp.choice, options=[1, 2]) search_space.add(Parameter.REPROJECT_WORDS, hp.choice, options=[True, False]) search_space.add(Parameter.REPROJECT_WORD_DIMENSION, hp.choice, options=[64, 128]) search_space.add(Parameter.BIDIRECTIONAL, hp.choice, options=[True, False]) search_space.add(Parameter.DROPOUT, hp.uniform, low=0.25, high=0.75) search_space.add(Parameter.WORD_DROPOUT, hp.uniform, low=0.25, high=0.75) search_space.add(Parameter.LOCKED_DROPOUT, hp.uniform, low=0.25, high=0.75) search_space.add(Parameter.LEARNING_RATE, hp.uniform, low=0, high=1) search_space.add(Parameter.MINI_BATCH_SIZE, hp.choice, options=[4, 8, 16, 32]) search_space.add(Parameter.ANNEAL_FACTOR, hp.uniform, low=0, high=0.75) search_space.add(Parameter.PATIENCE, hp.choice, options=[3, 5]) param_selector = TextClassifierParamSelector( corpus, False, results_base_path, document_embedding_type=u'lstm', max_epochs=2) param_selector.optimize(search_space, max_evals=2) shutil.rmtree(results_base_path)
def test_train_load_use_tagger(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus(NLPTask.FASHION, base_path=tasks_base_path) tag_dictionary = corpus.make_tag_dictionary('ner') embeddings = WordEmbeddings('turian') tagger: SequenceTagger = SequenceTagger(hidden_size=64, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type='ner', use_crf=False) # initialize trainer trainer: ModelTrainer = ModelTrainer(tagger, corpus) trainer.train(results_base_path, EvaluationMetric.MICRO_F1_SCORE, learning_rate=0.1, mini_batch_size=2, max_epochs=2, test_mode=True) loaded_model: SequenceTagger = SequenceTagger.load_from_file( results_base_path / 'final-model.pt') sentence = Sentence('I love Berlin') sentence_empty = Sentence(' ') loaded_model.predict(sentence) loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) # clean up results directory shutil.rmtree(results_base_path)
def test_train_optimizer_arguments(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus(NLPTask.FASHION, base_path=tasks_base_path) tag_dictionary = corpus.make_tag_dictionary(u'ner') embeddings = WordEmbeddings(u'glove') tagger = SequenceTagger(hidden_size=64, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=u'ner', use_crf=False) optimizer = AdamW trainer = ModelTrainer(tagger, corpus, optimizer=optimizer) trainer.train(results_base_path, EvaluationMetric.MICRO_F1_SCORE, learning_rate=0.1, mini_batch_size=2, max_epochs=2, test_mode=True, weight_decay=0.001) loaded_model = SequenceTagger.load_from_file( (results_base_path / u'final-model.pt')) sentence = Sentence(u'I love Berlin') sentence_empty = Sentence(u' ') loaded_model.predict(sentence) loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) shutil.rmtree(results_base_path)
def test_sequence_tagger_param_selector(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus(NLPTask.FASHION, base_path=tasks_base_path) search_space = SearchSpace() search_space.add(Parameter.EMBEDDINGS, hp.choice, options=[ StackedEmbeddings([WordEmbeddings(u'glove')]), StackedEmbeddings([ WordEmbeddings(u'glove'), FlairEmbeddings(u'news-forward'), FlairEmbeddings(u'news-backward') ]) ]) search_space.add(Parameter.USE_CRF, hp.choice, options=[True, False]) search_space.add(Parameter.DROPOUT, hp.uniform, low=0.25, high=0.75) search_space.add(Parameter.WORD_DROPOUT, hp.uniform, low=0.0, high=0.25) search_space.add(Parameter.LOCKED_DROPOUT, hp.uniform, low=0.0, high=0.5) search_space.add(Parameter.HIDDEN_SIZE, hp.choice, options=[64, 128]) search_space.add(Parameter.RNN_LAYERS, hp.choice, options=[1, 2]) search_space.add(Parameter.OPTIMIZER, hp.choice, options=[SGD]) search_space.add(Parameter.MINI_BATCH_SIZE, hp.choice, options=[4, 8, 32]) search_space.add(Parameter.LEARNING_RATE, hp.uniform, low=0.01, high=1) search_space.add(Parameter.ANNEAL_FACTOR, hp.uniform, low=0.3, high=0.75) search_space.add(Parameter.PATIENCE, hp.choice, options=[3, 5]) search_space.add(Parameter.WEIGHT_DECAY, hp.uniform, low=0.01, high=1) optimizer = SequenceTaggerParamSelector(corpus, u'ner', results_base_path, max_epochs=2) optimizer.optimize(search_space, max_evals=2) shutil.rmtree(results_base_path)
def test_train_charlm_load_use_tagger(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus(NLPTask.FASHION, base_path=tasks_base_path) tag_dictionary = corpus.make_tag_dictionary(u'ner') embeddings = FlairEmbeddings(u'news-forward-fast') tagger = SequenceTagger(hidden_size=64, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=u'ner', use_crf=False) trainer = ModelTrainer(tagger, corpus) trainer.train(results_base_path, EvaluationMetric.MICRO_F1_SCORE, learning_rate=0.1, mini_batch_size=2, max_epochs=2, test_mode=True) loaded_model = SequenceTagger.load_from_file( (results_base_path / u'final-model.pt')) sentence = Sentence(u'I love Berlin') sentence_empty = Sentence(u' ') loaded_model.predict(sentence) loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) shutil.rmtree(results_base_path)
def test_train_load_use_tagger_large(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus( NLPTask.UD_ENGLISH).downsample(0.05) tag_dictionary = corpus.make_tag_dictionary(u'pos') embeddings = WordEmbeddings(u'glove') tagger = SequenceTagger(hidden_size=64, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=u'pos', use_crf=False) trainer = ModelTrainer(tagger, corpus) trainer.train(results_base_path, EvaluationMetric.MICRO_F1_SCORE, learning_rate=0.1, mini_batch_size=32, max_epochs=2, test_mode=True) loaded_model = SequenceTagger.load_from_file( (results_base_path / u'final-model.pt')) sentence = Sentence(u'I love Berlin') sentence_empty = Sentence(u' ') loaded_model.predict(sentence) loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) shutil.rmtree(results_base_path)
def test_train_load_use_classifier(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus("imdb", base_path=tasks_base_path) label_dict = corpus.make_label_dictionary() word_embedding: WordEmbeddings = WordEmbeddings("turian") document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings( [word_embedding], 128, 1, False, 64, False, False) model = TextClassifier(document_embeddings, label_dict, False) trainer = ModelTrainer(model, corpus) trainer.train(results_base_path, EvaluationMetric.MICRO_F1_SCORE, max_epochs=2, test_mode=True) sentence = Sentence("Berlin is a really nice city.") for s in model.predict(sentence): for l in s.labels: assert l.value is not None assert 0.0 <= l.score <= 1.0 assert type(l.score) is float loaded_model = TextClassifier.load(results_base_path / "final-model.pt") sentence = Sentence("I love Berlin") sentence_empty = Sentence(" ") loaded_model.predict(sentence) loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) # clean up results directory shutil.rmtree(results_base_path)
def test_train_load_use_classifier(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus(NLPTask.IMDB, base_path=tasks_base_path) label_dict = corpus.make_label_dictionary() glove_embedding: WordEmbeddings = WordEmbeddings('en-glove') document_embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove_embedding], 128, 1, False, 64, False, False) model = TextClassifier(document_embeddings, label_dict, False) trainer = ModelTrainer(model, corpus) trainer.train(results_base_path, EvaluationMetric.MICRO_F1_SCORE, max_epochs=2, test_mode=True) sentence = Sentence("Berlin is a really nice city.") for s in model.predict(sentence): for l in s.labels: assert (l.value is not None) assert (0.0 <= l.score <= 1.0) assert (type(l.score) is float) loaded_model = TextClassifier.load_from_file(results_base_path / 'final-model.pt') sentence = Sentence('I love Berlin') sentence_empty = Sentence(' ') loaded_model.predict(sentence) loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) # clean up results directory shutil.rmtree(results_base_path)
def test_train_resume_text_classification_training(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus('imdb', base_path=tasks_base_path) label_dict = corpus.make_label_dictionary() embeddings: TokenEmbeddings = FlairEmbeddings('news-forward-fast', use_cache=False) document_embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings( [embeddings], 128, 1, False) model = TextClassifier(document_embeddings, label_dict, False) trainer = ModelTrainer(model, corpus) trainer.train(results_base_path, max_epochs=2, test_mode=True, checkpoint=True) trainer = ModelTrainer.load_from_checkpoint( results_base_path / 'checkpoint.pt', 'TextClassifier', corpus) trainer.train(results_base_path, max_epochs=2, test_mode=True, checkpoint=True) # clean up results directory shutil.rmtree(results_base_path)
def test_load_imdb_data(tasks_base_path): # get training, test and dev data corpus = NLPTaskDataFetcher.load_corpus('imdb', tasks_base_path) print(len(list(corpus.train()))) assert len(list(corpus.train())) == 5 assert len(list(corpus.dev())) == 5 assert len(list(corpus.test())) == 5
def test_train_charlm_nocache_load_use_classifier(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus("imdb", base_path=tasks_base_path) label_dict = corpus.make_label_dictionary() embedding: TokenEmbeddings = FlairEmbeddings("news-forward-fast", use_cache=False) document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings( [embedding], 128, 1, False, 64, False, False ) model = TextClassifier(document_embeddings, label_dict, False) trainer = ModelTrainer(model, corpus) trainer.train(results_base_path, max_epochs=2, test_mode=True) sentence = Sentence("Berlin is a really nice city.") for s in model.predict(sentence): for l in s.labels: assert l.value is not None assert 0.0 <= l.score <= 1.0 assert type(l.score) is float loaded_model = TextClassifier.load_from_file(results_base_path / "final-model.pt") sentence = Sentence("I love Berlin") sentence_empty = Sentence(" ") loaded_model.predict(sentence) loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) # clean up results directory shutil.rmtree(results_base_path)
def test_train_charlm_load_use_classifier(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus(u'imdb', base_path=tasks_base_path) label_dict = corpus.make_label_dictionary() glove_embedding = FlairEmbeddings(u'news-forward-fast') document_embeddings = DocumentLSTMEmbeddings([glove_embedding], 128, 1, False, 64, False, False) model = TextClassifier(document_embeddings, label_dict, False) trainer = ModelTrainer(model, corpus) trainer.train(results_base_path, EvaluationMetric.MACRO_F1_SCORE, max_epochs=2, test_mode=True) sentence = Sentence(u'Berlin is a really nice city.') for s in model.predict(sentence): for l in s.labels: assert (l.value is not None) assert (0.0 <= l.score <= 1.0) assert (type(l.score) is float) loaded_model = TextClassifier.load_from_file( (results_base_path / u'final-model.pt')) sentence = Sentence(u'I love Berlin') sentence_empty = Sentence(u' ') loaded_model.predict(sentence) loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) shutil.rmtree(results_base_path)
def test_download_load_data(tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus(NLPTask.UD_ENGLISH) assert (len(corpus.train) == 12543) assert (len(corpus.dev) == 2002) assert (len(corpus.test) == 2077) shutil.rmtree( ((Path(flair.file_utils.CACHE_ROOT) / u'datasets') / u'ud_english'))
def test_load_imdb_data(tasks_base_path): # get training, test and dev data corpus = NLPTaskDataFetcher.load_corpus("imdb", tasks_base_path) assert len(corpus.train) == 5 assert len(corpus.dev) == 5 assert len(corpus.test) == 5
def test_load_germeval_data(tasks_base_path): # get training, test and dev data corpus = NLPTaskDataFetcher.load_corpus(NLPTask.GERMEVAL, tasks_base_path) assert len(corpus.train) == 2 assert len(corpus.dev) == 1 assert len(corpus.test) == 1
def test_load_ud_english_data(tasks_base_path): # get training, test and dev data corpus = NLPTaskDataFetcher.load_corpus(NLPTask.UD_ENGLISH, tasks_base_path) assert len(corpus.train) == 6 assert len(corpus.test) == 4 assert len(corpus.dev) == 2
def init(tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus(NLPTask.AG_NEWS, tasks_base_path) label_dict = corpus.make_label_dictionary() glove_embedding = WordEmbeddings(u'en-glove') document_embeddings = DocumentLSTMEmbeddings([glove_embedding], 128, 1, False, 64, False, False) model = TextClassifier(document_embeddings, label_dict, False) return (corpus, label_dict, model)
def init(tasks_base_path) -> Tuple[(Corpus, TextRegressor, ModelTrainer)]: corpus = NLPTaskDataFetcher.load_corpus(NLPTask.REGRESSION, tasks_base_path) glove_embedding = WordEmbeddings('glove') document_embeddings = DocumentRNNEmbeddings([glove_embedding], 128, 1, False, 64, False, False) model = TextRegressor(document_embeddings) trainer = ModelTrainer(model, corpus) return (corpus, model, trainer)
def test_sentence_to_real_string(tasks_base_path): sentence = Sentence(u'I love Berlin.', use_tokenizer=True) assert (u'I love Berlin.' == sentence.to_plain_string()) corpus = NLPTaskDataFetcher.load_corpus(NLPTask.GERMEVAL, tasks_base_path) sentence = corpus.train[0] assert (u'Schartau sagte dem " Tagesspiegel " vom Freitag , Fischer sei " in einer Weise aufgetreten , die alles andere als überzeugend war " .' == sentence.to_tokenized_string()) assert (u'Schartau sagte dem "Tagesspiegel" vom Freitag, Fischer sei "in einer Weise aufgetreten, die alles andere als überzeugend war".' == sentence.to_plain_string()) sentence = corpus.train[1] assert (u'Firmengründer Wolf Peter Bree arbeitete Anfang der siebziger Jahre als Möbelvertreter , als er einen fliegenden Händler aus dem Libanon traf .' == sentence.to_tokenized_string()) assert (u'Firmengründer Wolf Peter Bree arbeitete Anfang der siebziger Jahre als Möbelvertreter, als er einen fliegenden Händler aus dem Libanon traf.' == sentence.to_plain_string())
def test_download_load_data(tasks_base_path): # get training, test and dev data for full English UD corpus from web corpus = NLPTaskDataFetcher.load_corpus(NLPTask.UD_ENGLISH) assert len(corpus.train) == 12543 assert len(corpus.dev) == 2002 assert len(corpus.test) == 2077 # clean up data directory shutil.rmtree(Path(flair.cache_root) / "datasets" / "ud_english")
def init(tasks_base_path) -> Tuple[TaggedCorpus, Dictionary, TextClassifier]: corpus = NLPTaskDataFetcher.load_corpus(NLPTask.AG_NEWS, tasks_base_path) label_dict = corpus.make_label_dictionary() glove_embedding: WordEmbeddings = WordEmbeddings('turian') document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings( [glove_embedding], 128, 1, False, 64, False, False) model = TextClassifier(document_embeddings, label_dict, False) return corpus, label_dict, model
def test_download_load_data(tasks_base_path): # get training, test and dev data for full English UD corpus from web corpus = NLPTaskDataFetcher.load_corpus(NLPTask.UD_ENGLISH) assert len(list(corpus.train())) == 12543 assert len(list(corpus.dev())) == 2002 assert len(list(corpus.test())) == 2077 # clean up data directory shutil.rmtree( Path(flair.file_utils.CACHE_ROOT) / 'datasets' / 'ud_english')
def init(tasks_base_path) -> Tuple[TaggedCorpus, TextRegressor]: corpus = NLPTaskDataFetcher.load_corpus(NLPTask.REGRESSION, tasks_base_path) glove_embedding: WordEmbeddings = WordEmbeddings("glove") document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings( [glove_embedding], 128, 1, False, 64, False, False) model = TextRegressor(document_embeddings, Dictionary(), False) trainer = RegressorTrainer(model, corpus) return corpus, model, trainer
def test_find_learning_rate(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus(NLPTask.FASHION, base_path=tasks_base_path) tag_dictionary = corpus.make_tag_dictionary(u'ner') embeddings = WordEmbeddings(u'glove') tagger = SequenceTagger(hidden_size=64, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=u'ner', use_crf=False) optimizer = SGD trainer = ModelTrainer(tagger, corpus, optimizer=optimizer) trainer.find_learning_rate(results_base_path, iterations=5) shutil.rmtree(results_base_path)
def test_train_charlm_changed_chache_load_use_tagger( results_base_path, tasks_base_path ): corpus = NLPTaskDataFetcher.load_corpus(NLPTask.FASHION, base_path=tasks_base_path) tag_dictionary = corpus.make_tag_dictionary("ner") # make a temporary cache directory that we remove afterwards cache_dir = results_base_path / "cache" os.makedirs(cache_dir, exist_ok=True) embeddings = FlairEmbeddings("news-forward-fast", cache_directory=cache_dir) tagger: SequenceTagger = SequenceTagger( hidden_size=64, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type="ner", use_crf=False, ) # initialize trainer trainer: ModelTrainer = ModelTrainer(tagger, corpus) trainer.train( results_base_path, EvaluationMetric.MACRO_ACCURACY, learning_rate=0.1, mini_batch_size=2, max_epochs=2, test_mode=True, ) # remove the cache directory shutil.rmtree(cache_dir) loaded_model: SequenceTagger = SequenceTagger.load_from_file( results_base_path / "final-model.pt" ) sentence = Sentence("I love Berlin") sentence_empty = Sentence(" ") loaded_model.predict(sentence) loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) # clean up results directory shutil.rmtree(results_base_path)
def train(data_folder, model_output_folder): corpus: TaggedCorpus = NLPTaskDataFetcher.load_corpus( NLPTask.CONLL_03, base_path=data_folder) # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # init Flair embeddings flair_forward_embedding = FlairEmbeddings('multi-forward') flair_backward_embedding = FlairEmbeddings('multi-backward') # init multilingual BERT bert_embedding = BertEmbeddings('bert-base-multilingual-cased') # 4. initialize embeddings embedding_types: List[TokenEmbeddings] = [ flair_forward_embedding, flair_backward_embedding, bert_embedding ] embeddings: StackedEmbeddings = StackedEmbeddings( embeddings=embedding_types) # 5. initialize sequence tagger from flair.models import SequenceTagger tagger: SequenceTagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type) # 6. initialize trainer from flair.trainers import ModelTrainer trainer: ModelTrainer = ModelTrainer(tagger, corpus) # 7. start training trainer.train(model_output_folder, mini_batch_size=256, max_epochs=150) # 8. plot training curves (optional) from flair.visual.training_curves import Plotter plotter = Plotter() plotter.plot_training_curves(model_output_folder + '/loss.tsv') plotter.plot_weights(model_output_folder + '/weights.txt')
def test_train_optimizer_arguments(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus(NLPTask.FASHION, base_path=tasks_base_path) tag_dictionary = corpus.make_tag_dictionary("ner") embeddings = WordEmbeddings("turian") tagger: SequenceTagger = SequenceTagger( hidden_size=64, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type="ner", use_crf=False, ) optimizer: Optimizer = AdamW # initialize trainer trainer: ModelTrainer = ModelTrainer(tagger, corpus, optimizer=optimizer) trainer.train( results_base_path, EvaluationMetric.MICRO_F1_SCORE, learning_rate=0.1, mini_batch_size=2, max_epochs=2, test_mode=True, weight_decay=1e-3, ) loaded_model: SequenceTagger = SequenceTagger.load_from_file( results_base_path / "final-model.pt" ) sentence = Sentence("I love Berlin") sentence_empty = Sentence(" ") loaded_model.predict(sentence) loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) # clean up results directory shutil.rmtree(results_base_path)
def test_train_load_use_tagger_large(results_base_path, tasks_base_path): corpus = NLPTaskDataFetcher.load_corpus(NLPTask.UD_ENGLISH).downsample(0.05) tag_dictionary = corpus.make_tag_dictionary("pos") embeddings = WordEmbeddings("turian") tagger: SequenceTagger = SequenceTagger( hidden_size=64, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type="pos", use_crf=False, ) # initialize trainer trainer: ModelTrainer = ModelTrainer(tagger, corpus) trainer.train( results_base_path, EvaluationMetric.MICRO_F1_SCORE, learning_rate=0.1, mini_batch_size=32, max_epochs=2, test_mode=True, ) loaded_model: SequenceTagger = SequenceTagger.load_from_file( results_base_path / "final-model.pt" ) sentence = Sentence("I love Berlin") sentence_empty = Sentence(" ") loaded_model.predict(sentence) loaded_model.predict([sentence, sentence_empty]) loaded_model.predict([sentence_empty]) # clean up results directory shutil.rmtree(results_base_path)