t.create( op, "load.json", "remote_json", "df", "https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.json" ) t.create( op, "load.parquet", "remote_parquet", "df", "https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.parquet" ) # + from optimus.profiler.profiler import Profiler p = Profiler() print(p.run(source_df1, "japanese name")) # - # df_string = source_df.cols.cast("*","str") t.create(source_df, "save.csv", None, None, "test.csv") t.create(None, "save.json", None, None, "test.json") t.create(None, "save.parquet", None, None, "test.parquet") t.run() source_df.table() # # Ouliers
numeric_col = "height(ft)" numeric_col_B = "rank" numeric_col_C = "rank" string_col = "function" date_col = "date arrival" date_col_B = "last date seen" new_col = "new col" array_col = "attributes" # - from optimus.profiler.profiler import Profiler p = Profiler() p.run(source_df, "*") t.create(p, "dataset", None, 'json', None, source_df, "*") t.run() mismatch = { "names": "dd/mm/yyyy", "height(ft)": r'^([0-2][0-9]|(3)[0-1])(\/)(((0)[0-9])|((1)[0-2]))(\/)\d{4}$', "function": "yyyy-mm-dd" } t.create(p, "dataset", "mismatch", 'json',