コード例 #1
0
def test_load_ud_english_data():
    # get training, test and dev data
    corpus = NLPTaskDataFetcher.fetch_data(NLPTask.UD_ENGLISH)

    assert len(corpus.train) == 6
    assert len(corpus.test) == 4
    assert len(corpus.dev) == 2
コード例 #2
0
def test_load_germeval_data():
    # get training, test and dev data
    corpus = NLPTaskDataFetcher.fetch_data(NLPTask.GERMEVAL)

    assert len(corpus.train) == 2
    assert len(corpus.dev) == 1
    assert len(corpus.test) == 1
コード例 #3
0
def test_load_imdb_data():
    # get training, test and dev data
    corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB)

    assert len(corpus.train) == 5
    assert len(corpus.dev) == 5
    assert len(corpus.test) == 5
コード例 #4
0
def test_load_sequence_labeling_data():
    # get training, test and dev data
    corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION)

    assert len(corpus.train) == 6
    assert len(corpus.dev) == 1
    assert len(corpus.test) == 1
コード例 #5
0
def test_load_ag_news_data():
    # get training, test and dev data
    corpus = NLPTaskDataFetcher.fetch_data(NLPTask.AG_NEWS)

    assert len(corpus.train) == 10
    assert len(corpus.dev) == 10
    assert len(corpus.test) == 10
コード例 #6
0
def init() -> Tuple[TaggedCorpus, Dictionary, TextClassifier]:
    corpus = NLPTaskDataFetcher.fetch_data(NLPTask.AG_NEWS)
    label_dict = corpus.make_label_dictionary()

    glove_embedding: WordEmbeddings = WordEmbeddings('en-glove')
    document_embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove_embedding], 128, 1, False, 64, False, False)

    model = TextClassifier(document_embeddings, label_dict, False)

    return corpus, label_dict, model
コード例 #7
0
ファイル: test_data.py プロジェクト: DFKI-NLP/flairRelEx
def test_sentence_to_real_string():
    sentence: Sentence = Sentence('I love Berlin.', use_tokenizer=True)
    assert ('I love Berlin.' == sentence.to_plain_string())

    corpus = NLPTaskDataFetcher.fetch_data(NLPTask.GERMEVAL)

    sentence = corpus.train[0]
    assert (
                'Schartau sagte dem " Tagesspiegel " vom Freitag , Fischer sei " in einer Weise aufgetreten , die alles andere als überzeugend war " .' == sentence.to_tokenized_string())
    assert (
                'Schartau sagte dem "Tagesspiegel" vom Freitag, Fischer sei "in einer Weise aufgetreten, die alles andere als überzeugend war".' == sentence.to_plain_string())

    sentence = corpus.train[1]
    assert (
                'Firmengründer Wolf Peter Bree arbeitete Anfang der siebziger Jahre als Möbelvertreter , als er einen fliegenden Händler aus dem Libanon traf .' == sentence.to_tokenized_string())
    assert (
                'Firmengründer Wolf Peter Bree arbeitete Anfang der siebziger Jahre als Möbelvertreter, als er einen fliegenden Händler aus dem Libanon traf.' == sentence.to_plain_string())
コード例 #8
0
def test_training():

    corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION)
    tag_dictionary = corpus.make_tag_dictionary('ner')

    embeddings = WordEmbeddings('glove')

    tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                            embeddings=embeddings,
                                            tag_dictionary=tag_dictionary,
                                            tag_type='ner',
                                            use_crf=False)

    # initialize trainer
    trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=True)

    trainer.train('./results', learning_rate=0.1, mini_batch_size=2, max_epochs=10)

    # clean up results directory
    shutil.rmtree('./results')
コード例 #9
0
def test_text_classifier_mulit_label():
    corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB)
    label_dict = corpus.make_label_dictionary()

    glove_embedding: WordEmbeddings = WordEmbeddings('en-glove')
    document_embeddings: DocumentMeanEmbeddings = DocumentMeanEmbeddings(
        [glove_embedding])

    model = TextClassifier(document_embeddings, label_dict, True)

    trainer = TextClassifierTrainer(model, corpus, label_dict, False)
    trainer.train('./results', max_epochs=2)

    sentence = Sentence("Berlin is a really nice city.")

    for s in model.predict(sentence):
        for l in s.labels:
            assert (l.name is not None)
            assert (0.0 <= l.confidence <= 1.0)
            assert (type(l.confidence) is float)

    # clean up results directory
    shutil.rmtree('./results')
コード例 #10
0
ファイル: train.py プロジェクト: DFKI-NLP/flairRelEx
from typing import List

import torch

from flairrelex.data_fetcher import NLPTaskDataFetcher, NLPTask
from flairrelex.data import TaggedCorpus
from flairrelex.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings, CharacterEmbeddings

# 1. get the corpus
corpus: TaggedCorpus = NLPTaskDataFetcher.fetch_data(
    NLPTask.CONLL_03).downsample(0.1)
print(corpus)

# 2. what tag do we want to predict?
tag_type = 'ner'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# initialize embeddings
embedding_types: List[TokenEmbeddings] = [
    WordEmbeddings('glove'),

    # comment in this line to use character embeddings
    # CharacterEmbeddings(),

    # comment in these lines to use contextual string embeddings
    #
    # CharLMEmbeddings('news-forward'),
    #