Exemple #1
0
def test_build():
    """
    Ensure that the build process builds the same DB that gets distributed.
    """
    if not os.path.exists(config.RAW_DATA_DIR):
        download_and_extract_raw_data()

    tempdir = tempfile.mkdtemp('.wordfreq')
    try:
        db_file = os.path.join(tempdir, 'test.db')
        load_all_data(config.RAW_DATA_DIR, db_file, do_it_anyway=True)
        conn = sqlite3.connect(db_file)

        # Compare the information we got to the information in the default DB.
        new_info = flatten_list_of_dicts(wordlist_info(conn))
        old_info = flatten_list_of_dicts(wordlist_info(None))
        eq_(len(new_info), len(old_info))
        for i in range(len(new_info)):
            # Don't test Greek and emoji on Python 2; we can't make them
            # consistent with Python 3.
            if PYTHON2 and ((u'lang', u'el') in new_info[i]):
                continue
            if PYTHON2 and ((u'wordlist', u'twitter') in new_info[i]):
                continue
            eq_(new_info[i], old_info[i])
    finally:
        shutil.rmtree(tempdir)
Exemple #2
0
def test_python2():
    """
    Python 2 got to skip two tests up there, because we built a slightly
    wrong wordlist. Now let's test that, in normal operation, it will refuse
    to build this wordlist.
    """
    if PYTHON2:
        try:
            load_all_data(config.RAW_DATA_DIR, tempfile.mkstemp())
            assert False, "The database should not have been built"
        except UnicodeError:
            # This is the correct case
            pass
Exemple #3
0
 def run(self):
     from wordfreq.build import load_all_data
     load_all_data()