def test_read_csv(self): test_dict = self.create_dataframe() with TemporaryDirectory() as tmp: fp1 = os.path.join(tmp, "test-1.csv") fp2 = os.path.join(tmp, "test-2.csv") with atomic_write(fp1, as_file=False) as pf1: test_dict.to_csv(pf1) with atomic_write(fp2, as_file=False) as pf2: test_dict.to_csv(pf2) t = create_csv_target(tmp) df = t.read_dask(check_complete=False) v = df["test_1"] self.assertEqual(len(v), 8)
def test_file_exists(self): """Ensure an error is raised when a file exists""" with TemporaryFile() as tf: tf.write(b"already here!") with self.assertRaises(FileExistsError): with atomic_write(tf.name) as f: raise AssertionError( "File exists was not properly detected!")
def test_atomic_write_by_name(self): """Ensure file name is correctly returned vs file object""" with TemporaryDirectory() as tmp: fp = path.join(tmp, "asdf.txt") _, filename = path.split(fp) with atomic_write(fp, "w", False) as f: _, temp_filename = path.split(f) _, ext = path.splitext(temp_filename) self.assertTrue(type(temp_filename) == str) self.assertEqual(".txt", ext)
def test_atomic_failure(self): """Ensure that file does not exist after failure during write""" with TemporaryDirectory() as tmp: fp = path.join(tmp, "asdf.txt") with self.assertRaises(FakeFileFailure): with atomic_write(fp, "w") as f: tmpfile = f.name assert path.exists(tmpfile) raise FakeFileFailure() assert not path.exists(tmpfile) assert not path.exists(fp)
def test_get_parquet_column(self): """Ensure a column can be retrieved from a parquet file""" # create a data frame for testing test_list = [1, 2] test_column = "test_1" test_dict = {test_column: test_list, "test_2": [3, 4]} test_dict = DataFrame(data=test_dict) # create a temporary directory and write the df as a parquet file for test with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "test.parquet") with atomic_write(fp, as_file=False) as pf: test_dict.to_parquet(pf) self.compare_columns(fp, test_column, test_list)
def test_atomic_write(self): """Ensure file exists after being written successfully""" with TemporaryDirectory() as tmp: fp = path.join(tmp, "asdf.txt") with atomic_write(fp, "w") as f: assert not path.exists(fp) tmpfile = f.name f.write("asdf") assert not path.exists(tmpfile) assert path.exists(fp) with open(fp) as f: self.assertEqual(f.read(), "asdf")
def test_convert_xls_to_parquet(self): """Ensure a Parquet file can be created from and Excel file""" # create a data frame for testing test_list = [1, 2] test_column = "test_1" test_dict = {test_column: test_list, "test_2": [3, 4]} test_dict = DataFrame(data=test_dict) # create a temporary directory and write the df as a parquet file for test with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "test.xls") with atomic_write(fp, as_file=False) as pf: test_dict.to_excel(pf, "Sheet1") parquet_file = convert_xls_to_parquet(fp, "Sheet1") self.assertTrue(os.path.exists(parquet_file)) self.compare_columns(parquet_file, test_column, test_list)