def test_write_data_parquet(tmpdir, expected_df): fname = str(tmpdir.mkdir("tmp_test_fs_writer").join("person.parquet")) config = {"type": "fs", "output_path": fname, "output_format": "parquet"} writer = FileSystemWriter(config) writer.write_data(expected_df) result_df = cudf.read_parquet(fname) assert result_df.equals(expected_df)
def test_write_data_json(tmpdir, expected_df): fname = str(tmpdir.mkdir("tmp_test_fs_writer").join("person.json")) config = { "type": "fs", "output_path": fname, "output_format": "json", "orient": "records" } writer = FileSystemWriter(config) writer.write_data(expected_df) result_df = cudf.read_json(fname, orient="records") assert result_df.equals(expected_df)
def test_write_data_parquet(test_output_base_path, expected_df, df): test_output_path = "%s/person_parquet" % (test_output_base_path) if os.path.exists(test_output_path) and os.path.isdir(test_output_path): shutil.rmtree(test_output_path) config = { "type": "fs", "output_path": test_output_path, "output_format": "parquet" } writer = FileSystemWriter(config) writer.write_data(df) output_files = glob.glob("%s/*" % (test_output_path)) result = pd.read_parquet(output_files[0], engine="pyarrow") assert result.equals(expected_df)
def test_write_data_json(test_output_base_path, expected_df): test_output_path = "%s/person.json" % (test_output_base_path) if os.path.exists(test_output_path): os.remove(test_output_path) config = { "type": "fs", "output_path": test_output_path, "output_format": "json", "orient": "records" } writer = FileSystemWriter(config) writer.write_data(expected_df) result_gdf = cudf.io.json.read_json(test_output_path, orient="records") assert result_gdf.equals(expected_df)
def test_write_data_text(test_output_base_path, df): test_output_path = "%s/person.csv" % (test_output_base_path) if os.path.exists(test_output_path): os.remove(test_output_path) config = { "type": "fs", "output_path": test_output_path, "output_format": "text", "index": False } writer = FileSystemWriter(config) writer.write_data(df) with open(test_output_path) as f: reader = csv.reader(f) data = [] for row in reader: data.append(row) assert data[0] == ["firstname", "lastname", "gender"] assert data[1] == ["Emma", "Olivia", "F"] assert data[2] == ["Ava", "Isabella", "F"] assert data[3] == ["Sophia", "Charlotte", "F"]
def get_writer(self): return FileSystemWriter(self.config)