Ejemplo n.º 1
0
def pd_dsa2_custom(df: pd.DataFrame, col: list = None, pars: dict = None):
    """
    Example of custom Processor Combining
    Usage :
    ,{"uri":  THIS_FILEPATH + "::pd_dsa2_custom",   "pars": {'coldate': 'date'}, "cols_family": "coldate",   "cols_out": "coldate_features1",  "type": "" },

    """
    prefix = "tseries_feat"  ### Used acolumn index
    #### Inference time LOAD previous pars  ###########################################
    from prepro import prepro_load, prepro_save
    prepro, pars_saved, cols_saved = prepro_load(prefix, pars)

    ###################################################################################
    #### Do something #################################################################
    from source.prepro_tseries import pd_ts_date, pd_ts_rolling
    if prepro is None:  ###  Training time
        dfy, coly = pars['dfy'], pars['coly']
        coldate = pars['coldate']
        #df        = df.set_index(coldate)

        #### time features
        dfi, coli = pd_ts_date(
            df,
            cols=[coldate],
            pars={'col_add': ['day', 'month', 'year', 'weekday']})
        df_new = dfi

        #### Rolling features
        dfi, coli = pd_ts_rolling(df,
                                  cols=['date', 'item', 'store', 'sales'],
                                  pars={
                                      'col_groupby': ['store', 'item'],
                                      'col_stat': 'sales',
                                      'lag_list': [7, 30]
                                  })
        df_new = pd.concat([df_new, dfi], axis=1)

    else:  ### predict time
        """
          y is NOT provided, need to calculate y based on past values.
          Auto-regressive feature engineering.
          


        """

        pars = pars_saved  ##merge
    ###################################################################################

    ###################################################################################
    ### Clean up the df ###############################################################
    df_new.index = df.index  ### Impt for JOIN
    df_new.columns = [col + f"_{prefix}" for col in df_new.columns]
    cols_new = list(df_new.columns)

    ###### Training time save all #####################################################
    df_new, col_pars = prepro_save(prefix, pars, df_new, cols_new, prepro)
    return df_new, col_pars
Ejemplo n.º 2
0
def pd_prepro_custom2(df: pd.DataFrame, cols: list = None, pars: dict = None):
    """   Generic template for feature generation
       'colnum' : ['sales1' 'units' ]
      'pars_function_list' :  [
       { 'name': 'deltapy.transform::robust_scaler',                 'pars': {'drop':["Close_1"]} },
       { 'name': 'deltapy.transform::standard_scaler',               'pars': {'drop':["Close_1"]} },
       ]e

    :param df:
    :param col:
    :param pars:
    :return:
    """
    prefix = "coltseries_custom2"
    #### Inference time LOAD previous pars  ###########################################
    from prepro import prepro_load, prepro_save
    prepro, pars_saved, cols_saved = prepro_load(prefix, pars)

    #### Do something #################################################################
    df = df[col]
    coldate = pars['coldate']
    colnum = pars['colnum']
    colcat = pars['colcat']

    colgroups = pars['colgroup']
    colgstat = pars['colstat']

    log("### Only dates")
    df1 = pd_ts_date(df, coldate, pars)
    coldate1 = list(df1.columns)

    log("### Initial features")
    df1 = df1.join(df, on=coldate, how='left')

    log("### Groupby features")
    df2 = pd_ts_groupby(df, col, pars)
    df1 = df1.join(df2, on=coldate, how='left')

    log("### Numerical features")
    colnum2 = list(df2.columns) + colnum
    df1 = df1.set_index(coldate1)

    log("### Deltapy features")
    for pars_function_dict_i in pars.get('pars_function_list', []):
        dfi = pd_ts_deltapy_generic(df1,
                                    col=colnum2,
                                    pars=pars_function_dict_i)
        df1 = df1.join(dfi, on=coldate, how='left')

    df_new = df1

    ### Transform features ###################################
    df_new.columns = [col + f"_{prefix}" for col in df_new.columns]
    cols_new = list(df_new.columns)

    ###### Training time save all #####################################################
    df_new, col_pars = prepro_save(prefix, pars, df_new, cols_new, prepro)
    return df_new, col_pars
Ejemplo n.º 3
0
def test_prepro_v1():
    df = test_get_sampledata()
    time_eng = pd_ts_date(df, ['Date'], pars={})
    onehot = pd_ts_onehot(df, ['Name'], {})
    trendless = pd_ts_difference(df, ['Close'], {})