Ejemplos de NLPTaskDataFetcher.fetch_column_corpus en Python

Lenguaje de programación: Python

Namespace/Package Name: bertsota.tagger.corpus

Clase / Tipo: NLPTaskDataFetcher

Método / Función: fetch_column_corpus

Ejemplos en hotexamples.com: 5

Python NLPTaskDataFetcher.fetch_column_corpus - 5 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de bertsota.tagger.corpus.NLPTaskDataFetcher.fetch_column_corpus extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

fetch_column_corpus(5)

Métodos usados con frecuencia

fetch_column_corpus (5)

Ejemplo n.º 1

Mostrar archivo

import mxnet as mx

from bertsota.common.utils import mxnet_prefer_gpu
from bertsota.tagger.corpus import NLPTaskDataFetcher, NLPTask

# get training, test and dev data
from bertsota.tagger.embeddings import WordEmbeddings, StackedEmbeddings, BERTEmbeddings, CharLMEmbeddings
from bertsota.tagger.sequence_tagger_model import SequenceTagger
from bertsota.tagger.sequence_tagger_trainer import SequenceTaggerTrainer

model_path = 'data/model/conll03-pe-fe2'
columns = {0: 'text', 1: 'pos', 2: 'np', 3: 'ner'}
corpus = NLPTaskDataFetcher.fetch_column_corpus('data/conll03',
                                                columns,
                                                train_file='train.tsv',
                                                test_file='test.tsv',
                                                dev_file='dev.tsv',
                                                tag_to_biloes='ner')

# 2. what tag do we want to predict?
tag_type = 'ner'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# 4. initialize embeddings
with mx.Context(mxnet_prefer_gpu()):
    embedding_types = [
        WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'),
        BERTEmbeddings([

Ejemplo n.º 2

Mostrar archivo

Archivo: wsj_att_belc.py Proyecto: xlzwhboy/bert-2019

import mxnet as mx

from bertsota.common.utils import mxnet_prefer_gpu
from bertsota.tagger.corpus import NLPTaskDataFetcher
from bertsota.tagger.embeddings import WordEmbeddings, CharLMEmbeddings, StackedEmbeddings, BERTEmbeddings
from bertsota.tagger.sequence_tagger_model import SequenceTagger
from bertsota.tagger.sequence_tagger_trainer import SequenceTaggerTrainer

model_path = 'data/model/wsj-pos-dotatt-belc'
columns = {0: 'text', 1: 'pos'}
corpus = NLPTaskDataFetcher.fetch_column_corpus('data/wsj-pos',
                                                columns,
                                                train_file='train.short.tsv',
                                                test_file='test.tsv',
                                                dev_file='dev.tsv'
                                                # train_file='debug.tsv',
                                                # test_file='debug.tsv',
                                                # dev_file='debug.tsv'
                                                )
# 2. what tag do we want to predict?
tag_type = 'pos'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# 4. initialize embeddings
with mx.Context(mxnet_prefer_gpu()):
    embedding_types = [
        # WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'),

Ejemplo n.º 3

Mostrar archivo

import os

import mxnet as mx

from bertsota.common.utils import mxnet_prefer_gpu
from bertsota.tagger.corpus import NLPTaskDataFetcher
from bertsota.tagger.embeddings import WordEmbeddings, CharLMEmbeddings, StackedEmbeddings, BERTEmbeddings
from bertsota.tagger.sequence_tagger_model import SequenceTagger
from bertsota.tagger.sequence_tagger_trainer import SequenceTaggerTrainer

model_path = 'data/model/ctb-pos3'

columns = {0: 'text', 1: 'pos'}
corpus = NLPTaskDataFetcher.fetch_column_corpus('data/ctb5.1-pos',
                                                columns,
                                                train_file='train.short.tsv',
                                                test_file='test.short.tsv',
                                                dev_file='dev.short.tsv')
# 2. what tag do we want to predict?
tag_type = 'pos'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# 4. initialize embeddings
with mx.Context(mxnet_prefer_gpu()):
    embedding_types = [
        WordEmbeddings('data/embedding/ctb.pos.fasttext.300.txt'),
        # BERTEmbeddings(['data/embedding/bert_base_sum/ctb.pos.train.bert',
        #                 'data/embedding/bert_base_sum/ctb.pos.dev.bert',

Ejemplo n.º 4

Mostrar archivo

# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-02-12 15:25
import mxnet as mx

from bertsota.common.utils import mxnet_prefer_gpu
from bertsota.tagger.corpus import NLPTaskDataFetcher
from bertsota.tagger.embeddings import WordEmbeddings, CharLMEmbeddings, StackedEmbeddings
from bertsota.tagger.sequence_tagger_model import SequenceTagger
from bertsota.tagger.sequence_tagger_trainer import SequenceTaggerTrainer

columns = {0: 'text', 1: 'pos'}
corpus = NLPTaskDataFetcher.fetch_column_corpus('data/wsj-pos',
                                                columns,
                                                train_file='dev.tsv',
                                                test_file='dev.tsv',
                                                dev_file='dev.tsv')
# 2. what tag do we want to predict?
tag_type = 'pos'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# 4. initialize embeddings
with mx.Context(mxnet_prefer_gpu()):
    embedding_types = [
        WordEmbeddings('data/embedding/glove/glove.6B.100d.debug.txt'),
        # CharLMEmbeddings('data/model/lm-news-forward'),
        # CharLMEmbeddings('data/model/lm-news-backward'),
    ]

Ejemplo n.º 5

Mostrar archivo

import mxnet as mx

from bertsota.common.utils import mxnet_prefer_gpu
from bertsota.tagger.corpus import NLPTaskDataFetcher, NLPTask

# get training, test and dev data
from bertsota.tagger.embeddings import WordEmbeddings, StackedEmbeddings, BERTEmbeddings, CharLMEmbeddings
from bertsota.tagger.sequence_tagger_model import SequenceTagger
from bertsota.tagger.sequence_tagger_trainer import SequenceTaggerTrainer

model_path = 'data/model/ontoen-bert'
columns = {0: 'id', 1: 'text', 2: 'lemma', 3: 'ner'}
corpus = NLPTaskDataFetcher.fetch_column_corpus('data/ontonotes-en',
                                                columns,
                                                train_file='train.tsv',
                                                test_file='test.tsv',
                                                dev_file='dev.tsv',
                                                tag_to_biloes='ner',
                                                source_scheme='ioblu')

# 2. what tag do we want to predict?
tag_type = 'ner'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# 4. initialize embeddings
with mx.Context(mxnet_prefer_gpu()):
    embedding_types = [