Ejemplo n.º 1
0
def run(args):
    # 1. get the corpus
    corpus: Corpus = WNUT_17()

    # 2. what tag do we want to predict?
    tag_type = 'ner'

    # 3. make the tag dictionary from the corpus
    tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)

    # initialize embeddings
    embedding_types: List[TokenEmbeddings] = [
        WordEmbeddings('crawl'),
        WordEmbeddings('twitter'),
        FlairEmbeddings('news-forward'),
        FlairEmbeddings('news-backward'),
    ]

    embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

    # initialize sequence tagger
    from flair.models import SequenceTagger

    tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                            embeddings=embeddings,
                                            tag_dictionary=tag_dictionary,
                                            tag_type=tag_type)

    # initialize trainer
    from flair.trainers import ModelTrainer

    trainer: ModelTrainer = ModelTrainer(tagger, corpus, use_tensorboard=True)

    trainer.train(
        args.job_dir,
        train_with_dev=True,
        max_epochs=args.epochs
    )

    upload_results(args)
Ejemplo n.º 2
0
flair.set_seed(3)

tagger = TARSSequenceTagger2.load("resources/v3/sequence-2/final-model.pt")

label_name_map = {
    "person": "Person",
    "location": "Location",
    "creative-work": "Creative Work",
    "product": "Product",
    "corporation": "Corporation",
    "group": "Group"
}
print(label_name_map)
corpus = WNUT_17(tag_to_bioes=None,
                 tag_to_bio2="ner",
                 label_name_map=label_name_map)
corpus = corpus.downsample(0.1)
tag_type = "ner"
tag_dictionary = corpus.make_label_dictionary(tag_type)

tagger.add_and_switch_to_new_task("sequence-3-train",
                                  tag_dictionary=tag_dictionary,
                                  tag_type=tag_type)

trainer = ModelTrainer(tagger, corpus, optimizer=torch.optim.AdamW)
trainer.train(
    base_path='resources/v3/sequence-3',
    learning_rate=5.0e-5,
    mini_batch_size=32,
    mini_batch_chunk_size=None,
Ejemplo n.º 3
0
from FlairParamOptimizer import search_strategies, search_spaces, orchestrator
import FlairParamOptimizer.parameter_listings.parameters_for_user_input as param
from flair.embeddings import WordEmbeddings

from flair.datasets import WNUT_17

corpus = WNUT_17()

search_space = search_spaces.SequenceTaggerSearchSpace()
search_strategy = search_strategies.RandomSearch()

search_space.add_tag_type("ner")

search_space.add_budget(param.Budget.TIME_IN_H, 24)
search_space.add_evaluation_metric(param.EvaluationMetric.MICRO_F1_SCORE)
search_space.add_optimization_value(param.OptimizationValue.DEV_SCORE)
search_space.add_max_epochs_per_training_run(25)

search_space.add_parameter(param.SequenceTagger.HIDDEN_SIZE,
                           options=[128, 256, 512])
search_space.add_parameter(param.SequenceTagger.DROPOUT,
                           options=[0, 0.1, 0.2, 0.3])
search_space.add_parameter(param.SequenceTagger.WORD_DROPOUT,
                           options=[0, 0.01, 0.05, 0.1])
search_space.add_parameter(param.SequenceTagger.RNN_LAYERS,
                           options=[2, 3, 4, 5, 6])
search_space.add_parameter(param.SequenceTagger.USE_RNN, options=[True, False])
search_space.add_parameter(param.SequenceTagger.USE_CRF, options=[True, False])
search_space.add_parameter(param.SequenceTagger.REPROJECT_EMBEDDINGS,
                           options=[True, False])
search_space.add_parameter(
from flair.data import Corpus
from flair.datasets import WNUT_17
from flair.embeddings import FlairEmbeddings, WordEmbeddings, StackedEmbeddings

# 1. get the corpus
corpus: Corpus = WNUT_17()
print(corpus)

# 2. what tag do we want to predict?
tag_type = 'ner'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary)

# 4. initialize embeddings
embedding_types = [
    WordEmbeddings('glove'),
    FlairEmbeddings('news-forward'),
    FlairEmbeddings('news-backward'),
]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# 5. initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
Ejemplo n.º 5
0
from flair.data import MultiCorpus
from flair.datasets import WNUT_17

flair.set_seed(3)

label_name_map = {
    "person": "Person",
    "location": "Location",
    "creative-work": "Creative Work",
    "product": "Product",
    "corporation": "Corporation",
    "group": "Group"
}

print(label_name_map)
corpus = WNUT_17(tag_to_bioes=None, tag_to_bio2="ner")
corpus = corpus.downsample(0.1)

tag_type = "ner"
label_dictionary = corpus.make_label_dictionary(tag_type)
print(label_dictionary)

tagger = TARSSequenceTagger2(tag_dictionary=label_dictionary,
                             tag_type=tag_type,
                             task_name="TEST_NER")

trainer = ModelTrainer(tagger, corpus, optimizer=torch.optim.AdamW)
from torch.optim.lr_scheduler import OneCycleLR

trainer.train(
    base_path='resources/v3/wnut17-simple',