import mxnet as mx from bertsota.common.utils import mxnet_prefer_gpu from bertsota.tagger.corpus import NLPTaskDataFetcher, NLPTask # get training, test and dev data from bertsota.tagger.embeddings import WordEmbeddings, StackedEmbeddings, BERTEmbeddings, CharLMEmbeddings from bertsota.tagger.sequence_tagger_model import SequenceTagger from bertsota.tagger.sequence_tagger_trainer import SequenceTaggerTrainer model_path = 'data/model/conll03-pe-fe2' columns = {0: 'text', 1: 'pos', 2: 'np', 3: 'ner'} corpus = NLPTaskDataFetcher.fetch_column_corpus('data/conll03', columns, train_file='train.tsv', test_file='test.tsv', dev_file='dev.tsv', tag_to_biloes='ner') # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings with mx.Context(mxnet_prefer_gpu()): embedding_types = [ WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'), BERTEmbeddings([
import mxnet as mx from bertsota.common.utils import mxnet_prefer_gpu from bertsota.tagger.corpus import NLPTaskDataFetcher from bertsota.tagger.embeddings import WordEmbeddings, CharLMEmbeddings, StackedEmbeddings, BERTEmbeddings from bertsota.tagger.sequence_tagger_model import SequenceTagger from bertsota.tagger.sequence_tagger_trainer import SequenceTaggerTrainer model_path = 'data/model/wsj-pos-dotatt-belc' columns = {0: 'text', 1: 'pos'} corpus = NLPTaskDataFetcher.fetch_column_corpus('data/wsj-pos', columns, train_file='train.short.tsv', test_file='test.tsv', dev_file='dev.tsv' # train_file='debug.tsv', # test_file='debug.tsv', # dev_file='debug.tsv' ) # 2. what tag do we want to predict? tag_type = 'pos' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings with mx.Context(mxnet_prefer_gpu()): embedding_types = [ # WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'),
import os import mxnet as mx from bertsota.common.utils import mxnet_prefer_gpu from bertsota.tagger.corpus import NLPTaskDataFetcher from bertsota.tagger.embeddings import WordEmbeddings, CharLMEmbeddings, StackedEmbeddings, BERTEmbeddings from bertsota.tagger.sequence_tagger_model import SequenceTagger from bertsota.tagger.sequence_tagger_trainer import SequenceTaggerTrainer model_path = 'data/model/ctb-pos3' columns = {0: 'text', 1: 'pos'} corpus = NLPTaskDataFetcher.fetch_column_corpus('data/ctb5.1-pos', columns, train_file='train.short.tsv', test_file='test.short.tsv', dev_file='dev.short.tsv') # 2. what tag do we want to predict? tag_type = 'pos' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings with mx.Context(mxnet_prefer_gpu()): embedding_types = [ WordEmbeddings('data/embedding/ctb.pos.fasttext.300.txt'), # BERTEmbeddings(['data/embedding/bert_base_sum/ctb.pos.train.bert', # 'data/embedding/bert_base_sum/ctb.pos.dev.bert',
# -*- coding:utf-8 -*- # Author: hankcs # Date: 2019-02-12 15:25 import mxnet as mx from bertsota.common.utils import mxnet_prefer_gpu from bertsota.tagger.corpus import NLPTaskDataFetcher from bertsota.tagger.embeddings import WordEmbeddings, CharLMEmbeddings, StackedEmbeddings from bertsota.tagger.sequence_tagger_model import SequenceTagger from bertsota.tagger.sequence_tagger_trainer import SequenceTaggerTrainer columns = {0: 'text', 1: 'pos'} corpus = NLPTaskDataFetcher.fetch_column_corpus('data/wsj-pos', columns, train_file='dev.tsv', test_file='dev.tsv', dev_file='dev.tsv') # 2. what tag do we want to predict? tag_type = 'pos' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings with mx.Context(mxnet_prefer_gpu()): embedding_types = [ WordEmbeddings('data/embedding/glove/glove.6B.100d.debug.txt'), # CharLMEmbeddings('data/model/lm-news-forward'), # CharLMEmbeddings('data/model/lm-news-backward'), ]
import mxnet as mx from bertsota.common.utils import mxnet_prefer_gpu from bertsota.tagger.corpus import NLPTaskDataFetcher, NLPTask # get training, test and dev data from bertsota.tagger.embeddings import WordEmbeddings, StackedEmbeddings, BERTEmbeddings, CharLMEmbeddings from bertsota.tagger.sequence_tagger_model import SequenceTagger from bertsota.tagger.sequence_tagger_trainer import SequenceTaggerTrainer model_path = 'data/model/ontoen-bert' columns = {0: 'id', 1: 'text', 2: 'lemma', 3: 'ner'} corpus = NLPTaskDataFetcher.fetch_column_corpus('data/ontonotes-en', columns, train_file='train.tsv', test_file='test.tsv', dev_file='dev.tsv', tag_to_biloes='ner', source_scheme='ioblu') # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings with mx.Context(mxnet_prefer_gpu()): embedding_types = [