def _save_df(cls, df: pd.DataFrame) -> DPTmpFile: fn = DPTmpFile(ArrowFormat.ext) df = to_df(df) process_df(df) ArrowFormat.save_file(fn.name, df) log.debug(f"Saved df to {fn} ({os.path.getsize(fn.file)} bytes)") return fn
def test_to_df_structured_array(): out_df = to_df( np.array( [("Rex", 9, 81.0), ("Fido", 3, 27.0)], dtype=[("name", "U10"), ("age", "i4"), ("weight", "f4")], )) unittest.TestCase().assertListEqual(out_df.columns.tolist(), ["name", "age", "weight"])
def save_df(df: pd.DataFrame) -> DPTmpFile: """Export a df for uploading""" fn = DPTmpFile(ArrowFormat.ext) # create a copy of the df to process df = to_df(df) # process_df called in Arrow.save_file # process_df(df) ArrowFormat.save_file(fn.name, df) log.debug(f"Saved df to {fn} ({os.path.getsize(fn.file)} bytes)") return fn
def test_to_df_numpy_array_redundant_dims_scaler(): out_df = to_df(np.array([[[3]]])) assert out_df.columns == ["Result"] assert out_df.iloc[0, 0] == 3
def test_to_df_numpy_array_redundant_dims(): out_df = to_df(np.array([[[2, 3]]])) assert out_df.columns == ["Result"] assert out_df.shape == (2, 1)
def test_to_df_2_dim_numpy_array(): out_df = to_df(np.array([[2, 3], [4, 5]])) unittest.TestCase().assertListEqual(out_df.columns.tolist(), [0, 1]) assert out_df.shape == (2, 2)
def assert_unnamed_col_works(col): out_df = to_df(col) assert out_df.columns == ["Result"] assert len(out_df) == len(col)
def test_to_df_df(): empty = pd.DataFrame() new_df = to_df(empty) pd.testing.assert_frame_equal(empty, new_df, check_like=True)
def assert_scalar_works(scalar): out_df = to_df(scalar) assert out_df.columns == ["Result"] assert len(out_df) == 1 assert out_df.iloc[0, 0] == scalar
def assert_named_col_works(col): out_df = to_df(col) assert out_df.columns.tolist() == [col.name] assert len(out_df) == len(col)