예제 #1
0
def test_klass():
    dset = Dataset()
    # dset.add(dset.load_emojis())
    dset.add(dset.tm_words())
    kl = dset.klasses
    xx = dset.klass("xxx good xxx morning xxT")
    for k in xx:
        assert k in kl
예제 #2
0
def test_add():
    dset = Dataset()
    assert len(dset.klasses) == 0
    dset.add(dset.load_emojis())
    cnt = len(dset.klasses)
    assert cnt > 0
    words = dset.tm_words()
    dset.add(words)
    print(len(dset.klasses), len(words), cnt)
    assert len(dset.klasses) <= len(words) + cnt
예제 #3
0
def test_remove():
    dset = Dataset()
    dset.add(dset.load_emojis())
    dset.add(dset.tm_words())
    xx = dset.klass("xxx good morning xxx asdfa")
    print(xx)
    assert len(xx) == 2
    dset.remove("~good~")
    xx = dset.klass("xxx good xxx morning xxx")
    print(xx)
    assert len(xx) == 1
예제 #4
0
def test_process():
    
    from microtc.emoticons import convert_emoji
    dset = Dataset()
    dset.add(dset.load_emojis())
    dset.add(dset.tm_words())
    xx = dset.process("xxx good 9 morning xxx fax x la", "~x~")
    for a, b in zip(xx, ["~xxx~good~9~morning~xxx~fax~", "~la~", "~la~"]):
        print(a, b)
        assert a == b
    txt = 'xxx good {} morning xxx fax x la'.format(convert_emoji('1F600'))
    xx = dset.process(txt, convert_emoji('1F600'))
    print(xx)
    for a, b in zip(xx, ["~xxx~good~", "~morning~xxx~fax~x~la~"]):
        assert a == b