예제 #1
0
def test_token_level_sampler_swap_sampling():
    tls = TokenLevelSampler()
    text = "人为什么活着?生而为人必须要有梦想!还要有尽可能多的精神体验。"
    tokens = cut_words(text)
    res = tls.swap_sampling(tokens, [5, 8])
    assert type(res) == list
    assert len(res) == len(tokens)
예제 #2
0
def test_token_level_sampler_dependent_sampling():
    tls = TokenLevelSampler()
    text = "人为什么活着?生而为人必须要有梦想!还要有尽可能多的精神体验。"
    tokens = cut_zhchar(text)
    res = tls.dependent_sampling(tokens)
    assert type(res) == list
    assert type(res[0]) == str
예제 #3
0
def test_token_level_sampler_swap():
    tls = TokenLevelSampler(types=["swap"])
    text = "人为什么活着?生而为人必须要有梦想!还要有尽可能多的精神体验。"
    res = tls.make_samples(text)
    assert type(res) == dict
    assert len(res) == 2
예제 #4
0
def test_token_level_sampler_single_sent():
    tls = TokenLevelSampler()
    text = "人为什么活着?"
    res = tls.make_samples(text)
    assert len(res) == 4
예제 #5
0
def test_token_level_sampler_none_text():
    tls = TokenLevelSampler()
    text = ""
    res = tls.make_samples(text)
    assert res == {}
예제 #6
0
def test_token_level_sampler_none():
    tls = TokenLevelSampler(types=[])
    text = "人为什么活着?生而为人必须要有梦想!还要有尽可能多的精神体验。"
    res = tls.make_samples(text)
    assert res == {}
예제 #7
0
def test_token_level_sampler_token_pos_spliter():
    tls = TokenLevelSampler()
    text = "人为什么活着?生而为人必须要有梦想!还要有尽可能多的精神体验。"
    res = tls.make_samples(text, cut_wps)
    assert len(res) == 4
예제 #8
0
파일: __init__.py 프로젝트: hscspring/pnlp
from pnlp.piop import Reader, Dict
from pnlp.ptxt import Regex, Text, Length
from pnlp.pnorm import NumNorm
from pnlp.penh import TokenLevelSampler, SentenceLevelSampler
from pnlp.pmag import MagicDict
from pnlp.stopwords import StopWords
from pnlp.stopwords import chinese_stopwords, english_stopwords

from pnlp.utils import pstr, concurring, divide2int
from pnlp.utils import generate_batches_by_num, generate_batches_by_size


num_norm = NumNorm()
reg = Regex()
reader = Reader()
tlsampler = TokenLevelSampler()
slsampler = SentenceLevelSampler()


__title__ = 'pnlp'
__version__ = '0.4.0'
__author__ = 'Yam'
__license__ = 'Apache-2.0'
__copyright__ = 'Copyright 2019, 2020 Yam'
__all__ = ['Reader',
           'Text', 'Regex', 'Length',
           'MagicDict',
           'NumNorm',
           'StopWords',
           'TokenLevelSampler', 'SentenceLevelSampler'
           ]