def test_token_level_sampler_swap_sampling(): tls = TokenLevelSampler() text = "人为什么活着?生而为人必须要有梦想!还要有尽可能多的精神体验。" tokens = cut_words(text) res = tls.swap_sampling(tokens, [5, 8]) assert type(res) == list assert len(res) == len(tokens)
def test_token_level_sampler_dependent_sampling(): tls = TokenLevelSampler() text = "人为什么活着?生而为人必须要有梦想!还要有尽可能多的精神体验。" tokens = cut_zhchar(text) res = tls.dependent_sampling(tokens) assert type(res) == list assert type(res[0]) == str
def test_token_level_sampler_swap(): tls = TokenLevelSampler(types=["swap"]) text = "人为什么活着?生而为人必须要有梦想!还要有尽可能多的精神体验。" res = tls.make_samples(text) assert type(res) == dict assert len(res) == 2
def test_token_level_sampler_single_sent(): tls = TokenLevelSampler() text = "人为什么活着?" res = tls.make_samples(text) assert len(res) == 4
def test_token_level_sampler_none_text(): tls = TokenLevelSampler() text = "" res = tls.make_samples(text) assert res == {}
def test_token_level_sampler_none(): tls = TokenLevelSampler(types=[]) text = "人为什么活着?生而为人必须要有梦想!还要有尽可能多的精神体验。" res = tls.make_samples(text) assert res == {}
def test_token_level_sampler_token_pos_spliter(): tls = TokenLevelSampler() text = "人为什么活着?生而为人必须要有梦想!还要有尽可能多的精神体验。" res = tls.make_samples(text, cut_wps) assert len(res) == 4
from pnlp.piop import Reader, Dict from pnlp.ptxt import Regex, Text, Length from pnlp.pnorm import NumNorm from pnlp.penh import TokenLevelSampler, SentenceLevelSampler from pnlp.pmag import MagicDict from pnlp.stopwords import StopWords from pnlp.stopwords import chinese_stopwords, english_stopwords from pnlp.utils import pstr, concurring, divide2int from pnlp.utils import generate_batches_by_num, generate_batches_by_size num_norm = NumNorm() reg = Regex() reader = Reader() tlsampler = TokenLevelSampler() slsampler = SentenceLevelSampler() __title__ = 'pnlp' __version__ = '0.4.0' __author__ = 'Yam' __license__ = 'Apache-2.0' __copyright__ = 'Copyright 2019, 2020 Yam' __all__ = ['Reader', 'Text', 'Regex', 'Length', 'MagicDict', 'NumNorm', 'StopWords', 'TokenLevelSampler', 'SentenceLevelSampler' ]