def pd_dsa2_custom(df: pd.DataFrame, col: list = None, pars: dict = None): """ Example of custom Processor Combining Usage : ,{"uri": THIS_FILEPATH + "::pd_dsa2_custom", "pars": {'coldate': 'date'}, "cols_family": "coldate", "cols_out": "coldate_features1", "type": "" }, """ prefix = "tseries_feat" ### Used acolumn index #### Inference time LOAD previous pars ########################################### from prepro import prepro_load, prepro_save prepro, pars_saved, cols_saved = prepro_load(prefix, pars) ################################################################################### #### Do something ################################################################# from source.prepro_tseries import pd_ts_date, pd_ts_rolling if prepro is None: ### Training time dfy, coly = pars['dfy'], pars['coly'] coldate = pars['coldate'] #df = df.set_index(coldate) #### time features dfi, coli = pd_ts_date( df, cols=[coldate], pars={'col_add': ['day', 'month', 'year', 'weekday']}) df_new = dfi #### Rolling features dfi, coli = pd_ts_rolling(df, cols=['date', 'item', 'store', 'sales'], pars={ 'col_groupby': ['store', 'item'], 'col_stat': 'sales', 'lag_list': [7, 30] }) df_new = pd.concat([df_new, dfi], axis=1) else: ### predict time """ y is NOT provided, need to calculate y based on past values. Auto-regressive feature engineering. """ pars = pars_saved ##merge ################################################################################### ################################################################################### ### Clean up the df ############################################################### df_new.index = df.index ### Impt for JOIN df_new.columns = [col + f"_{prefix}" for col in df_new.columns] cols_new = list(df_new.columns) ###### Training time save all ##################################################### df_new, col_pars = prepro_save(prefix, pars, df_new, cols_new, prepro) return df_new, col_pars
def pd_prepro_custom2(df: pd.DataFrame, cols: list = None, pars: dict = None): """ Generic template for feature generation 'colnum' : ['sales1' 'units' ] 'pars_function_list' : [ { 'name': 'deltapy.transform::robust_scaler', 'pars': {'drop':["Close_1"]} }, { 'name': 'deltapy.transform::standard_scaler', 'pars': {'drop':["Close_1"]} }, ]e :param df: :param col: :param pars: :return: """ prefix = "coltseries_custom2" #### Inference time LOAD previous pars ########################################### from prepro import prepro_load, prepro_save prepro, pars_saved, cols_saved = prepro_load(prefix, pars) #### Do something ################################################################# df = df[col] coldate = pars['coldate'] colnum = pars['colnum'] colcat = pars['colcat'] colgroups = pars['colgroup'] colgstat = pars['colstat'] log("### Only dates") df1 = pd_ts_date(df, coldate, pars) coldate1 = list(df1.columns) log("### Initial features") df1 = df1.join(df, on=coldate, how='left') log("### Groupby features") df2 = pd_ts_groupby(df, col, pars) df1 = df1.join(df2, on=coldate, how='left') log("### Numerical features") colnum2 = list(df2.columns) + colnum df1 = df1.set_index(coldate1) log("### Deltapy features") for pars_function_dict_i in pars.get('pars_function_list', []): dfi = pd_ts_deltapy_generic(df1, col=colnum2, pars=pars_function_dict_i) df1 = df1.join(dfi, on=coldate, how='left') df_new = df1 ### Transform features ################################### df_new.columns = [col + f"_{prefix}" for col in df_new.columns] cols_new = list(df_new.columns) ###### Training time save all ##################################################### df_new, col_pars = prepro_save(prefix, pars, df_new, cols_new, prepro) return df_new, col_pars
def pd_col_amyfun(df: pd.DataFrame, col: list = None, pars: dict = None): """ Example of custom Processor Used at prediction time "path_pipeline" : Training time : "path_features_store" : to store intermediate dataframe "path_pipeline_export": to store pipeline for later usage """ prefix = "myfun" #### Inference time LOAD previous pars ########################################### from prepro import prepro_load, prepro_save prepro, pars_saved, cols_saved = prepro_load(prefix, pars) #### Do something ################################################################# if prepro is None: ### Training time dfy, coly = pars['dfy'], pars['coly'] def prepro(df, a=0): return df ### model pars['pars_prepro'] = {'a': 5} ### new params else: ### predict time pars = pars_saved ##merge ### Transform features ################################### df_new = prepro(df[col], **pars['pars_prepro']) ### Do Nothing df_new.index = df.index ### Impt for JOIN df_new.columns = [col + f"_{prefix}" for col in df_new.columns] cols_new = list(df_new.columns) ################################################################################### ###### Training time save all ##################################################### df_new, col_pars = prepro_save(prefix, pars, df_new, cols_new, prepro) return df_new, col_pars