def test_df_dtypes(csvfile):
    df = _io.df_dtypes(_io.csv_sample(csvfile))
    # pd.Series -> pd.DataFrame
    assert isinstance(df, pd.DataFrame)
    # column name, dtype
    assert len(df.columns) == 2
    assert tuple(df.columns) == ("columns", "types")
Exemple #2
0
def test_df_fix_schema(csvfile, flags):
    df = csv_sample(csvfile)
    flags, flagged = set_flags(flags, df.columns)
    # set flags to dtypes
    flags.update((col, "string") for col in flagged)
    df_t = df_fix_schema(df, flags).dtypes
    assert df_t[flagged].astype(str).to_list() == ["string"] * len(flagged)
Exemple #3
0
def test_df_schema_flags(csvfile, flags):
    df = csv_sample(csvfile)
    res = df_schema_flags(df, flags)
    assert glom(res, ["datatype"]) == ["yesno"] * len(res)
    # NOTE: index 0 relies on deterinistic dictionary order
    assert glom(res, [(tuple, "0")]) == df.columns.to_list()
    assert glom(res, [(T.values(), tuple, "0")]) == [
        _FLAG_FMT.format(name=flags.instanceName, col=col)
        for col in df.columns
    ]
Exemple #4
0
def test_df_schema_dtype(csvfile, flags):
    df = csv_sample(csvfile)
    flags, flagged_cols = set_flags(flags, df.columns)
    res = df_schema_dtype(df, flags)
    assert glom(res, ["choices"]) == [_TYPES] * len(res)
    # NOTE: index 0 relies on deterinistic dictionary order
    assert glom(res, [(tuple, "0")]) == flagged_cols
    assert glom(res, [(T.values(), tuple, "0")]) == [
        _FLAG_FMT.format(name=flags.instanceName, col=col)
        for col in flagged_cols
    ]
Exemple #5
0
def test_df_schema_flags_prompt(csvfile, flags):
    df = csv_sample(csvfile)

    hdr, hline, *txttbl = df_schema_flags_prompt(df, flags).splitlines()
    assert re.compile(("-+\\|" * 3)[:-2]).match(hline)

    col_sep = " +\\| +"
    cols_hdr = ["Columns", "Types", "Edit \\(Y/N\\)"]
    assert re.compile(col_sep.join(cols_hdr)).match(hdr.strip())

    ESCAPED_FMT = _FIELD_FMT.format(_FLAG_FMT).replace("[", "\\[")
    fields = [
        ESCAPED_FMT.format(name=flags.instanceName, col=col)
        for col in df.columns
    ]
    tbl_cells = zip(df.columns, map(str, df.dtypes), fields)
    assert all(
        re.compile(col_sep.join(cells)).match(row.strip())
        for cells, row in zip(tbl_cells, txttbl))
def test_dfT_markdown(csvfile):
    df = _io.csv_sample(csvfile)
    txttbl = _io.dfT_markdown(df)
    # check: transposed, php format, no index
    assert check_md_flavour(txttbl, df.T, flavour="php")
def test_df_markdown(csvfile):
    df = _io.csv_sample(csvfile)
    txttbl = _io.df_markdown(df)
    # check: presto format, no index
    assert check_md_flavour(txttbl, df, flavour="php")
def test_csv_dtypes(csvfile):
    txttbl = _io.csv_dtypes(csvfile)
    ref_df = _io.csv_sample(csvfile)
    assert compare_cols_transposed(txttbl, ref_df)
def test_csv_preview(csvfile):
    txttbl = _io.csv_preview(csvfile, nrows=5)
    ref_df = _io.csv_sample(csvfile, nrows=5)
    assert compare_cols_transposed(txttbl, ref_df)
def test_csv_sample(csvfile):
    df = _io.csv_sample(csvfile, nrows=3)
    assert isinstance(df, pd.DataFrame)
    assert len(df) == 3  # nrows