Beispiel #1
0
t.create(
    op, "load.json", "remote_json", "df",
    "https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.json"
)

t.create(
    op, "load.parquet", "remote_parquet", "df",
    "https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.parquet"
)

# +
from optimus.profiler.profiler import Profiler
p = Profiler()

print(p.run(source_df1, "japanese name"))
# -

# df_string = source_df.cols.cast("*","str")
t.create(source_df, "save.csv", None, None, "test.csv")

t.create(None, "save.json", None, None, "test.json")

t.create(None, "save.parquet", None, None, "test.parquet")

t.run()

source_df.table()

# # Ouliers
Beispiel #2
0

numeric_col = "height(ft)"
numeric_col_B = "rank"
numeric_col_C = "rank"
string_col = "function"
date_col = "date arrival"
date_col_B = "last date seen"
new_col = "new col"
array_col = "attributes"
# -

from optimus.profiler.profiler import Profiler
p = Profiler()

p.run(source_df, "*")

t.create(p, "dataset", None, 'json', None, source_df, "*")

t.run()

mismatch = {
    "names": "dd/mm/yyyy",
    "height(ft)":
    r'^([0-2][0-9]|(3)[0-1])(\/)(((0)[0-9])|((1)[0-2]))(\/)\d{4}$',
    "function": "yyyy-mm-dd"
}
t.create(p,
         "dataset",
         "mismatch",
         'json',