def test_build(): """ Ensure that the build process builds the same DB that gets distributed. """ if not os.path.exists(config.RAW_DATA_DIR): download_and_extract_raw_data() tempdir = tempfile.mkdtemp('.wordfreq') try: db_file = os.path.join(tempdir, 'test.db') load_all_data(config.RAW_DATA_DIR, db_file, do_it_anyway=True) conn = sqlite3.connect(db_file) # Compare the information we got to the information in the default DB. new_info = flatten_list_of_dicts(wordlist_info(conn)) old_info = flatten_list_of_dicts(wordlist_info(None)) eq_(len(new_info), len(old_info)) for i in range(len(new_info)): # Don't test Greek and emoji on Python 2; we can't make them # consistent with Python 3. if PYTHON2 and ((u'lang', u'el') in new_info[i]): continue if PYTHON2 and ((u'wordlist', u'twitter') in new_info[i]): continue eq_(new_info[i], old_info[i]) finally: shutil.rmtree(tempdir)
def test_python2(): """ Python 2 got to skip two tests up there, because we built a slightly wrong wordlist. Now let's test that, in normal operation, it will refuse to build this wordlist. """ if PYTHON2: try: load_all_data(config.RAW_DATA_DIR, tempfile.mkstemp()) assert False, "The database should not have been built" except UnicodeError: # This is the correct case pass
def run(self): from wordfreq.build import load_all_data load_all_data()