Beispiel #1
0
def test_remove():
    dset = Dataset()
    dset.add(dset.load_emojis())
    dset.add(dset.tm_words())
    xx = dset.klass("xxx good morning xxx asdfa")
    print(xx)
    assert len(xx) == 2
    dset.remove("~good~")
    xx = dset.klass("xxx good xxx morning xxx")
    print(xx)
    assert len(xx) == 1
Beispiel #2
0
def test_klass():
    dset = Dataset()
    # dset.add(dset.load_emojis())
    dset.add(dset.tm_words())
    kl = dset.klasses
    xx = dset.klass("xxx good xxx morning xxT")
    for k in xx:
        assert k in kl
Beispiel #3
0
def emo_data(lang='zh'):
    fnames = glob(join('data', lang, '*.gz'))
    ds = Dataset(text_transformations=False)
    ds.add(ds.load_emojis())
    for fname in fnames:
        output = dict()
        for key, tweets in load_model(fname).items():
            labels = [ds.klass(x['text']) for x in tweets]
            inner = []
            for tweet, label in zip(tweets, labels):
                if len(label) == 0:
                    continue
                tweet['klass'] = label
                inner.append(tweet)
            if len(inner):
                output[key] = inner
        if len(output) == 0:
            continue
        output_fname = join(dirname(fname), 'emo')
        if not isdir(output_fname):
            os.mkdir(output_fname)
        output_fname = join(output_fname, basename(fname))
        save_model(output, output_fname)
Beispiel #4
0
def test_map():
    dset = Dataset()
    dset.add(dict(buenos="malos"))
    res = dset.klass("en estos buenos dias")
    print(res)
    assert "malos" in res