def _train(self, corpus: Corpus, params: dict, base_path: Path, max_epochs: int, optimization_value: str): corpus = corpus label_dict = corpus.make_label_dictionary() for sent in corpus.get_all_sentences(): sent.clear_embeddings() model = self._set_up_model(params, label_dict) training_parameters = { key: params[key] for key, value in params.items() if key in TRAINING_PARAMETERS } model_trainer_parameters = { key: params[key] for key, value in params.items() if key in MODEL_TRAINER_PARAMETERS and key != 'model' } trainer: ModelTrainer = ModelTrainer(model, corpus, **model_trainer_parameters) path = base_path results = trainer.train(path, max_epochs=max_epochs, param_selection_mode=True, **training_parameters) if optimization_value == "score": result = results['test_score'] else: result = results['dev_loss_history'][-1] return {'result': result, 'params': params}
def test_tagged_corpus_get_all_sentences(): train_sentence = Sentence("I'm used in training.", use_tokenizer=True) dev_sentence = Sentence("I'm a dev sentence.", use_tokenizer=True) test_sentence = Sentence('I will be only used for testing.', use_tokenizer=True) corpus = Corpus([train_sentence], [dev_sentence], [test_sentence]) all_sentences = corpus.get_all_sentences() assert (3 == len(all_sentences))
def make_relations_tag_dictionary(corpus: Corpus, tag_type='dependency', special_tags=[]) -> Dictionary: tag_dictionary: Dictionary = Dictionary(add_unk=False) # for tag in special_tags: # tag_dictionary.add_item(tag) for sentence in corpus.get_all_sentences(): for token in sentence.tokens: tag_dictionary.add_item(token.get_tag(tag_type).value) return tag_dictionary
def predict(self, corpus: Corpus): """ Predict labels given a list of sentences and returns the respective class indices. :param corpus: the flair corpus this wrapper will use for predicting the labels. """ X = self._convert_dataset(corpus) log.info("Start the prediction " + str(self.model) + " with " + str(len(X)) + " Datapoints.") predict = self.model.predict(X) for idx, sentence in enumerate(corpus.get_all_sentences()): sentence.set_label("cluster", str(predict[idx])) log.info("Finished prediction and labeled all sentences.") return predict