def init_document_embeddings(): text = 'I love Berlin. Berlin is a great place to live.' sentence: Sentence = Sentence(text) glove: TokenEmbeddings = WordEmbeddings('en-glove') charlm: TokenEmbeddings = CharLMEmbeddings('mix-backward') return sentence, glove, charlm
def test_stacked_embeddings(): text = 'I love Berlin.' sentence: Sentence = Sentence(text) glove: TokenEmbeddings = WordEmbeddings('en-glove') news: TokenEmbeddings = WordEmbeddings('en-news') charlm: TokenEmbeddings = CharLMEmbeddings('mix-backward') embeddings: StackedEmbeddings = StackedEmbeddings([glove, news, charlm]) embeddings.embed(sentence) for token in sentence.tokens: assert (len(token.get_embedding()) != 0) token.clear_embeddings() assert (len(token.get_embedding()) == 0)
def init() -> Tuple[TaggedCorpus, Dictionary, TextClassifier]: corpus = NLPTaskDataFetcher.fetch_data(NLPTask.AG_NEWS) label_dict = corpus.make_label_dictionary() glove_embedding: WordEmbeddings = WordEmbeddings('en-glove') document_embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove_embedding], 128, 1, False, 64, False, False) model = TextClassifier(document_embeddings, label_dict, False) return corpus, label_dict, model
def load_and_apply_word_embeddings(emb_type: str): text = 'I love Berlin.' sentence: Sentence = Sentence(text) embeddings: TokenEmbeddings = WordEmbeddings(emb_type) embeddings.embed(sentence) for token in sentence.tokens: assert (len(token.get_embedding()) != 0) token.clear_embeddings() assert (len(token.get_embedding()) == 0)
def test_document_mean_embeddings(): text = 'I love Berlin. Berlin is a great place to live.' sentence: Sentence = Sentence(text) glove: TokenEmbeddings = WordEmbeddings('en-glove') charlm: TokenEmbeddings = CharLMEmbeddings('mix-backward') embeddings: DocumentMeanEmbeddings = DocumentMeanEmbeddings( [glove, charlm]) embeddings.embed(sentence) assert (len(sentence.get_embedding()) != 0) sentence.clear_embeddings() assert (len(sentence.get_embedding()) == 0)
def test_training(): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION) tag_dictionary = corpus.make_tag_dictionary('ner') embeddings = WordEmbeddings('glove') tagger: SequenceTagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type='ner', use_crf=False) # initialize trainer trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=True) trainer.train('./results', learning_rate=0.1, mini_batch_size=2, max_epochs=10) # clean up results directory shutil.rmtree('./results')
def test_text_classifier_mulit_label(): corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB) label_dict = corpus.make_label_dictionary() glove_embedding: WordEmbeddings = WordEmbeddings('en-glove') document_embeddings: DocumentMeanEmbeddings = DocumentMeanEmbeddings( [glove_embedding]) model = TextClassifier(document_embeddings, label_dict, True) trainer = TextClassifierTrainer(model, corpus, label_dict, False) trainer.train('./results', max_epochs=2) sentence = Sentence("Berlin is a really nice city.") for s in model.predict(sentence): for l in s.labels: assert (l.name is not None) assert (0.0 <= l.confidence <= 1.0) assert (type(l.confidence) is float) # clean up results directory shutil.rmtree('./results')
# 1. get the corpus corpus: TaggedCorpus = NLPTaskDataFetcher.fetch_data( NLPTask.CONLL_03).downsample(0.1) print(corpus) # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # initialize embeddings embedding_types: List[TokenEmbeddings] = [ WordEmbeddings('glove'), # comment in this line to use character embeddings # CharacterEmbeddings(), # comment in these lines to use contextual string embeddings # # CharLMEmbeddings('news-forward'), # # CharLMEmbeddings('news-backward'), ] embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types) # initialize sequence tagger from flairrelex.models import SequenceTagger