예제 #1
0
def test_date():
    from datetime import datetime
    voc = Vocabulary(dict(year=2020, month=2, day=14))
    assert isinstance(voc.date, datetime)
    days = [dict(year=2020, month=2, day=14), dict(year=2021, month=2, day=14)]
    voc = Vocabulary(days)
    assert voc.date is None
예제 #2
0
def test_country():
    date = dict(year=2020, month=2, day=14)
    voc = Vocabulary(date, lang="Es", country="MX")
    assert len(voc)
    voc2 = Vocabulary(date, lang="Es")
    print(len(voc2), len(voc))
    assert voc2.voc.update_calls > voc.voc.update_calls
예제 #3
0
def test_histogram():
    day = dict(year=2020, month=2, day=14)
    voc = Vocabulary(day, lang="En")
    hist = voc.histogram(min_elements=30)
    keys = list(hist.keys())
    for k in keys[:-1]:
        assert len(hist[k]) >= 30
    print(keys[-1], hist[keys[-1]])
예제 #4
0
def test_remove():
    voc = Vocabulary(dict(year=2020, month=2, day=14))
    numterms = len(voc.voc)
    voc.remove(voc.common_words())
    assert numterms > len(voc.voc)
    voc = Vocabulary(dict(year=2020, month=2, day=14))
    numterms = len(voc.voc)
    voc.remove(voc.common_words(quantile=0.85), bigrams=False)
    assert numterms > len(voc.voc)
예제 #5
0
def test_TopicDetection_laplace_smoothing():
    from text_models.vocabulary import TopicDetection

    date1 = dict(year=2020, month=2, day=14)
    date2 = dict(year=2020, month=2, day=13)
    voc1 = Vocabulary(date1, lang="En", country="US")
    voc2 = Vocabulary(date2, lang="En", country="US")

    updated_voc = TopicDetection.laplace_smoothing(voc1, voc2)
    assert updated_voc["the"] > 0 and updated_voc["the"] < 1
예제 #6
0
def test_dict_functions():
    voc = Vocabulary(dict(year=2020, month=2, day=14))

    assert len(voc) == len([x for x in voc])
    data = [k for k, v in voc.items()]
    assert len(voc) == len(data)
    assert data[0] in voc
    assert "BLABLA" not in voc
    assert voc.get("BLABLA") == 0
    assert voc[data[0]] == voc.get(data[0])
예제 #7
0
def test_vocabulary_dict():
    class D(object):
        def __init__(self, year, month, day):
            self.year = year
            self.month = month
            self.day = day

    voc = Vocabulary(dict(year=2020, month=2, day=14))
    assert voc["buenos"]
    voc2 = Vocabulary(D(2020, 2, 14))
    assert voc["buenos"] == voc2["buenos"]
예제 #8
0
def test_available_data():
    from text_models.vocabulary import Vocabulary
    from text_models.utils import date_range
    countries = ['CU', 'MX']
    days = date_range(dict(year=2020, month=2, day=6),
                      dict(year=2020, month=5, day=13))
    dates = Vocabulary.available_dates(days,
                                       n=1,
                                       countries=countries,
                                       lang="Es")
    assert len(dates) == 1
    days = date_range(dict(year=2020, month=2, day=6),
                      dict(year=2020, month=2, day=8))
    dates = Vocabulary.available_dates(days, n=-1, countries=None, lang="Es")
    assert len(dates) == 3
예제 #9
0
def test_TopicDetection_probability():
    from text_models.vocabulary import TopicDetection

    day = dict(year=2020, month=2, day=14)
    voc = Vocabulary(day, lang="En", country="US")
    voc_prob = TopicDetection.probability(voc.voc)
    assert voc_prob["the"] > 0 and voc_prob["the"] < 1
예제 #10
0
def test_vocabulary_data_lst():
    import pandas as pd

    d = list(pd.date_range("2020-02-13", "2020-02-14"))
    print(len(d))
    vocs = Vocabulary(d)
    assert vocs["buenos"]
    assert len(d) == 2
예제 #11
0
def test_Vocabulary_file():
    from text_models.vocabulary import Vocabulary
    from os.path import isfile
    if not isfile('es-PT.gz'):
        fname = "https://github.com/INGEOTEC/text_models/releases/download/20220110/es-PT.gz"
        request.urlretrieve(fname, 'es-PT.gz')
    voc = Vocabulary('es-PT.gz')
    assert voc.voc.update_calls == 195
예제 #12
0
def test_TopicDetection_topic_wordcloud():
    from text_models.vocabulary import TopicDetection

    day = dict(year=2020, month=2, day=14)
    voc = Vocabulary(day, lang="En", country="US")
    td = TopicDetection(day)
    td.topic_wordcloud()
    assert td._voc != voc
    assert len(td._voc) == len(voc)
예제 #13
0
def test_co_occurrence():
    voc = Vocabulary(dict(year=2020, month=2, day=14))
    data = voc.co_occurrence("amor")
    assert isinstance(data, dict)
    assert "amistad" in data
    voc = Vocabulary(dict(year=2020, month=2, day=14),
                     country="MX",
                     states=True)
    data = voc.co_occurrence("amor")
    assert "MX-DIF" in data
    assert "amistad" in data["MX-DIF"]
예제 #14
0
def test_common_words():
    voc = Vocabulary(dict(year=2020, month=2, day=14))
    words = voc.common_words()
    assert len(words) > 10000
    w = voc.common_words(quantile=0.85)
    print(len(w))
    w2 = voc.common_words(quantile=0.85, bigrams=False)
    assert len(w) > len(w2)
    w3 = voc.common_words(quantile=0.80, bigrams=False)
    assert len(w2) > len(w3)
    print(len(w3))
예제 #15
0
def test_init():
    from microtc.utils import Counter
    day = dict(year=2020, month=2, day=14)
    voc = Vocabulary(day, lang="En")
    assert isinstance(voc.voc, Counter)
    assert voc._n_words > 0
    voc2 = Vocabulary(voc.voc)
    assert voc2["love"] == voc["love"]
    voc = Vocabulary(day, lang="En", country="US")
    assert isinstance(voc.voc, Counter)
    voc = Vocabulary(day, lang="Es", country="MX", states=True)
    assert isinstance(voc.voc, dict)
    day2 = dict(year=2021, month=2, day=14)
    voc = Vocabulary([day2, day])
    assert isinstance(voc.voc, Counter)
    voc = Vocabulary([day2, day], lang="En", country="US")
    assert isinstance(voc.voc, Counter)
    voc = Vocabulary([day2, day], lang="Es", country="MX", states=True)
    assert isinstance(voc.voc, dict)
예제 #16
0
def test_previous_day():
    from os.path import basename

    voc = Vocabulary(dict(year=2020, month=2, day=14), lang="En")
    prev = voc.previous_day()
    assert prev.date.day == 13
예제 #17
0
def test_day_words():
    voc = Vocabulary(dict(year=2020, month=2, day=14), lang="En")
    words = voc.day_words()
    assert words is not None
    assert isinstance(words, Vocabulary)
    print(words.date)
예제 #18
0
def test_probability():
    day = dict(year=2020, month=2, day=14)
    voc = Vocabulary(day, lang="En")
    voc.probability()
    assert voc["the"] > 0 and voc["the"] < 1
예제 #19
0
def test_remove_emojis():
    voc = Vocabulary(dict(year=2020, month=2, day=14))
    voc.remove_emojis()