def run(args): # 1. get the corpus corpus: Corpus = WNUT_17() # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) # initialize embeddings embedding_types: List[TokenEmbeddings] = [ WordEmbeddings('crawl'), WordEmbeddings('twitter'), FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward'), ] embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types) # initialize sequence tagger from flair.models import SequenceTagger tagger: SequenceTagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type) # initialize trainer from flair.trainers import ModelTrainer trainer: ModelTrainer = ModelTrainer(tagger, corpus, use_tensorboard=True) trainer.train( args.job_dir, train_with_dev=True, max_epochs=args.epochs ) upload_results(args)
flair.set_seed(3) tagger = TARSSequenceTagger2.load("resources/v3/sequence-2/final-model.pt") label_name_map = { "person": "Person", "location": "Location", "creative-work": "Creative Work", "product": "Product", "corporation": "Corporation", "group": "Group" } print(label_name_map) corpus = WNUT_17(tag_to_bioes=None, tag_to_bio2="ner", label_name_map=label_name_map) corpus = corpus.downsample(0.1) tag_type = "ner" tag_dictionary = corpus.make_label_dictionary(tag_type) tagger.add_and_switch_to_new_task("sequence-3-train", tag_dictionary=tag_dictionary, tag_type=tag_type) trainer = ModelTrainer(tagger, corpus, optimizer=torch.optim.AdamW) trainer.train( base_path='resources/v3/sequence-3', learning_rate=5.0e-5, mini_batch_size=32, mini_batch_chunk_size=None,
from FlairParamOptimizer import search_strategies, search_spaces, orchestrator import FlairParamOptimizer.parameter_listings.parameters_for_user_input as param from flair.embeddings import WordEmbeddings from flair.datasets import WNUT_17 corpus = WNUT_17() search_space = search_spaces.SequenceTaggerSearchSpace() search_strategy = search_strategies.RandomSearch() search_space.add_tag_type("ner") search_space.add_budget(param.Budget.TIME_IN_H, 24) search_space.add_evaluation_metric(param.EvaluationMetric.MICRO_F1_SCORE) search_space.add_optimization_value(param.OptimizationValue.DEV_SCORE) search_space.add_max_epochs_per_training_run(25) search_space.add_parameter(param.SequenceTagger.HIDDEN_SIZE, options=[128, 256, 512]) search_space.add_parameter(param.SequenceTagger.DROPOUT, options=[0, 0.1, 0.2, 0.3]) search_space.add_parameter(param.SequenceTagger.WORD_DROPOUT, options=[0, 0.01, 0.05, 0.1]) search_space.add_parameter(param.SequenceTagger.RNN_LAYERS, options=[2, 3, 4, 5, 6]) search_space.add_parameter(param.SequenceTagger.USE_RNN, options=[True, False]) search_space.add_parameter(param.SequenceTagger.USE_CRF, options=[True, False]) search_space.add_parameter(param.SequenceTagger.REPROJECT_EMBEDDINGS, options=[True, False]) search_space.add_parameter(
from flair.data import Corpus from flair.datasets import WNUT_17 from flair.embeddings import FlairEmbeddings, WordEmbeddings, StackedEmbeddings # 1. get the corpus corpus: Corpus = WNUT_17() print(corpus) # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary) # 4. initialize embeddings embedding_types = [ WordEmbeddings('glove'), FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward'), ] embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types) # 5. initialize sequence tagger from flair.models import SequenceTagger tagger: SequenceTagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type,
from flair.data import MultiCorpus from flair.datasets import WNUT_17 flair.set_seed(3) label_name_map = { "person": "Person", "location": "Location", "creative-work": "Creative Work", "product": "Product", "corporation": "Corporation", "group": "Group" } print(label_name_map) corpus = WNUT_17(tag_to_bioes=None, tag_to_bio2="ner") corpus = corpus.downsample(0.1) tag_type = "ner" label_dictionary = corpus.make_label_dictionary(tag_type) print(label_dictionary) tagger = TARSSequenceTagger2(tag_dictionary=label_dictionary, tag_type=tag_type, task_name="TEST_NER") trainer = ModelTrainer(tagger, corpus, optimizer=torch.optim.AdamW) from torch.optim.lr_scheduler import OneCycleLR trainer.train( base_path='resources/v3/wnut17-simple',