def load_from_file(cls, model_folder, context: mx.Context = None, **kwargs): if context is None: context = mxnet_prefer_gpu() config_path = os.path.join(model_folder, 'config.pkl') with open(config_path, 'rb') as f: config = pickle.load(f) with context: embedding_types = [ WordEmbeddings( '{}data/embedding/fasttext100.vec.txt'.format( kwargs.get('word_embedding_path', ''))), # comment in this line to use character embeddings # CharacterEmbeddings(), # comment in these lines to use contextual string embeddings CharLMEmbeddings('{}data/model/lm-news-forward'.format( kwargs.get('word_embedding_path', '')), context=context), CharLMEmbeddings('{}data/model/lm-news-backward'.format( kwargs.get('word_embedding_path', '')), context=context), ] embeddings = StackedEmbeddings(embeddings=embedding_types) model = SequenceTagger(hidden_size=config['hidden_size'], embeddings=embeddings, tag_dictionary=config['tag_dictionary'], tag_type=config['tag_type'], use_crf=config['use_crf'], use_rnn=config['use_rnn'], rnn_layers=config['rnn_layers']) model.load_parameters(os.path.join(model_folder, 'model.bin'), ctx=context) return model
train_file='train.tsv', test_file='test.tsv', dev_file='dev.tsv', tag_to_biloes='ner') # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings with mx.Context(mxnet_prefer_gpu()): embedding_types = [ WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'), BERTEmbeddings([ 'data/embedding/bert_large_sum/conll03.train.bert', 'data/embedding/bert_large_sum/conll03.dev.bert', 'data/embedding/bert_large_sum/conll03.test.bert' ]), # comment in this line to use character embeddings # CharacterEmbeddings(), # comment in these lines to use contextual string embeddings CharLMEmbeddings('data/model/lm-news-forward'), CharLMEmbeddings('data/model/lm-news-backward'), ] embeddings = StackedEmbeddings(embeddings=embedding_types)
corpus = NLPTaskDataFetcher.fetch_column_corpus('data/ctb5.1-pos', columns, train_file='train.short.tsv', test_file='test.short.tsv', dev_file='dev.short.tsv') # 2. what tag do we want to predict? tag_type = 'pos' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings with mx.Context(mxnet_prefer_gpu()): embedding_types = [ WordEmbeddings('data/embedding/ctb.pos.fasttext.300.txt'), # BERTEmbeddings(['data/embedding/bert_base_sum/ctb.pos.train.bert', # 'data/embedding/bert_base_sum/ctb.pos.dev.bert', # 'data/embedding/bert_base_sum/ctb.pos.test.bert']) # CharLMEmbeddings('data/model/lm-news-forward'), # CharLMEmbeddings('data/model/lm-news-backward'), ] embeddings = StackedEmbeddings(embeddings=embedding_types) # 5. initialize sequence tagger tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type, use_crf=True)