def test_find(): eq_( list(wpi_utils.find("dewiki*redirect*.sql.gz", DOWNLOAD_DIR)), [os.path.join(DOWNLOAD_DIR, "de", "20091023", "dewiki-20091023-redirect.sql.gz")], ) eq_( list(wpi_utils.find("enwiki*langlinks*.sql.gz", DOWNLOAD_DIR)), [os.path.join(DOWNLOAD_DIR, "en", "20091017", "enwiki-20091017-langlinks.sql.gz")], )
def test_insert_statements(): fn_pat = re.compile( r'''(?P<language>\w+)wiki-(?P<date>\d{8})-(?P<table>[\w_]+).*''') for dump_path in sorted(wpi_utils.find('*.sql.gz', DOWNLOAD_DIR)): filename = os.path.basename(dump_path) mat = fn_pat.match(filename) stmts = list(wpi_psql.insert_statements(dump_path)) eq_(list(wpi_psql.insert_statements(dump_path)), EXPECTED_STMTS[mat.group('table')])
def test_categorylink_pipeline(): for file_path in wpi_utils.find('*categorylinks*.sql.gz', DOWNLOAD_DIR): with wpi_utils.open_compressed(file_path) as cl_file: eq_(list(wpi_psql.categorylinks_pipeline(cl_file)), EXPECTED_STMTS['categorylinks'])