Esempio n. 1
0
 def _save_df(cls, df: pd.DataFrame) -> DPTmpFile:
     fn = DPTmpFile(ArrowFormat.ext)
     df = to_df(df)
     process_df(df)
     ArrowFormat.save_file(fn.name, df)
     log.debug(f"Saved df to {fn} ({os.path.getsize(fn.file)} bytes)")
     return fn
Esempio n. 2
0
def check_df_equal(left: pd.DataFrame, right: pd.DataFrame, **kwargs):
    """
    Wraps pd.assert_frame_equal whilst processing dfs and ignoring the order of the columns.
    NOTE - this mutates the dfs
    """
    left = process_df(left)
    right = process_df(right)
    pd.testing.assert_frame_equal(left, right, check_like=True, **kwargs)
Esempio n. 3
0
    def convert_csv_pd_(string: str) -> pd.DataFrame:
        import textwrap
        from io import StringIO

        from datapane.common.df_processor import process_df

        buf = StringIO(textwrap.dedent(string).strip())
        df = pd.read_csv(buf, engine="c", sep=",")
        df["timedelta_col1"] = pd.to_timedelta(df["timedelta_col1"])
        process_df(df)
        return df
Esempio n. 4
0
    def _test_order(df: pd.DataFrame):
        # process and compare
        df1 = process_df(df, copy=True)
        assert list(df.columns) == list(df1.columns)

        # convert to arrow and back
        df2 = save_load_arrow(tmp_path, df)
        assert list(df.columns) == list(df2.columns)
Esempio n. 5
0
    def _test_df(df: pd.DataFrame, expected_types: List[str]):
        df_conv = df.convert_dtypes()
        df_proc = process_df(df, copy=True)

        # check both df's have same nulls
        pd.testing.assert_frame_equal(pd.isnull(df), pd.isnull(df_conv))
        pd.testing.assert_frame_equal(pd.isnull(df), pd.isnull(df_proc))

        # check we can save and load processed file
        df2 = save_load_arrow(tmp_path, df_proc)
        pd.testing.assert_frame_equal(df_proc, df2)
        assert [str(x) for x in df2.dtypes] == expected_types
Esempio n. 6
0
def convert_csv_pd(string: str, process: bool = False) -> pd.DataFrame:
    """Helper function to convert a well-formatted csv into a DataFrame"""
    buf: TextIO = StringIO(textwrap.dedent(string).strip())

    try:
        df = pd.read_csv(buf, engine="c", sep=",")
    except ParserError as e:
        log.warning(f"Error parsing CSV file ({e}), trying python fallback")
        df = pd.read_csv(buf, engine="python", sep=None)

    if process:
        df = process_df(df)
    return df
Esempio n. 7
0
def test_parse_categories_roundtrip(tmp_path: Path):
    # initial df
    df = pd.DataFrame(
        dict(
            str1=[str(x) for x in range(10000)],
            str2=[str(x % 25) for x in range(10000)],
        )
    )
    # process it
    df1 = process_df(df, copy=True)
    # arrow converted
    df2 = save_load_arrow(tmp_path, df)

    _check_categories_parsed(df2, ["str2"])
    pd.testing.assert_frame_equal(df1, df2)