Python clean_colsの例

プログラミング言語: Python

名前空間/パッケージ名: capacity_planning.utilities.pandas_utils

メソッド/関数: clean_cols

hotexamples.comのコード掲載数: 6

Python clean_cols - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのcapacity_planning.utilities.pandas_utils.clean_colsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def get_actuals(cutoff_date_):
    fdir = os.path.expanduser('~/my_tmp/cleaned/')  # '~/my_tmp/in_df_data_'
    adf = None
    for f in os.listdir(fdir):
        if str(
                cutoff_date_.date()
        ) in f and 'tickets_' in f and 'old' not in f:  # 'in_df_data_' in f:   # we do not know the rolling window
            s_ut.my_print('getting actuals from ' + fdir + f)
            adf = p_ut.read_df(fdir + f)
            break
    if adf is None:
        s_ut.my_print('no available actuals data for ' +
                      str(cutoff_date_.date()))
        return None
    adf.reset_index(inplace=True, drop=True)
    p_ut.clean_cols(adf,
                    ["language", "service_tier", "channel", "business_unit"],
                    '~/my_repos/capacity_planning/data/config/col_values.json',
                    check_new=False,
                    do_nan=False,
                    rename=True)
    adf.rename(columns={
        'ticket_count': 'y',
        'ds_week_starting': 'ds'
    },
               inplace=True)
    i_vals = ['nan', 'NULL', None, 'other', np.nan, 'null', 'N/A']
    imp_data = imputer.impute(adf, i_vals=i_vals, ex_cols=['ds'])
    imp_data['y'] = np.round(imp_data['y'].values, 0)
    return imp_data

コード例 #2

ファイルを表示

ファイル: ratio_forecast.py プロジェクト: josepm/FB_Prophet

def tmp_ratios(cu, window, gcols):  # tmp fix
    wf = pd.read_parquet('~/my_tmp/cleaned/old_tickets_2020-02-29.par')
    _ = p_ut.clean_cols(
        wf, ["language", "service_tier", "channel", "business_unit"],
        '~/my_repos/capacity_planning/data/config/col_values.json',
        check_new=True,
        do_nan=True,
        rename=True)
    wf.rename(columns={'ds_week_starting': 'ds'}, inplace=True)
    wf['channel'] = wf.apply(
        lambda x: 'directly'
        if x['service_tier'] == 'directly' else x['channel'],
        axis=1)
    i_vals = ['nan', 'NULL', None, 'other', np.nan, 'null', 'N/A']
    wf['ds'] = wf['ds'].dt.date.astype(str)
    wf = imputer.impute(wf, i_vals=i_vals, ex_cols=['ds'])
    wf['ds'] = pd.to_datetime(wf['ds'])
    wf = wf[(wf['ds'] <= cu)
            & (wf['ds'] >= cu - pd.to_timedelta(window, unit='W'))].copy()
    wf['channel'] = wf.apply(
        lambda x: 'directly'
        if x['service_tier'] == 'directly' else x['channel'],
        axis=1)  # again in case imputation added directly wrongly
    a_df = wf[wf['channel'] != 'directly'].copy()
    lct_df = a_df.groupby(gcols).sum(numeric_only=True).reset_index()
    l_df = lct_df.groupby(['language']).sum(numeric_only=True).reset_index()
    lct_ratio = lct_df.merge(l_df, on=['language'], how='left')
    lct_ratio['ratio'] = lct_ratio['ticket_count' +
                                   '_x'] / lct_ratio['ticket_count' + '_y']
    lct_ratio.drop(['ticket_count' + '_x', 'ticket_count' + '_y'],
                   axis=1,
                   inplace=True)
    return lct_ratio

コード例 #3

ファイルを表示

 def __init__(self, file_path):
     s_ut.my_print('setting forecast from ' + file_path)
     t_info = file_path.split('.')[0].split('/')[-1]
     self.raw = True if 'raw' in t_info else False
     self.adj = not self.raw
     self.rolling = True if '_r_' in t_info else False
     self.cutoff_date = pd.to_datetime(t_info.split('_')[-1])
     self.has_actuals = True if '_xls_' in t_info else False
     self.data = p_ut.read_df(file_path)
     p_ut.clean_cols(
         self.data,
         ["language", "service_tier", "channel", "business_unit"],
         '~/my_repos/capacity_planning/data/config/col_values.json',
         check_new=False,
         do_nan=False,
         rename=True)
     if 'ds_week_ending' in self.data.columns:
         self.data['ds'] = pd.to_datetime(
             self.data['ds_week_ending']) - pd.to_timedelta(6, unit='D')
         self.data.drop('ds_week_ending', inplace=True, axis=1)
     self.forecast = (self.cutoff_date +
                      pd.to_timedelta(7, unit='D')).month_name()
     self.froot = file_path.split('.')[0][:-10]

コード例 #4

ファイルを表示

ファイル: cx_fcast.py プロジェクト: josepm/FB_Prophet

    ]

    dfx.columns = l_cols + dr
    dfm = pd.melt(dfx,
                  value_vars=dr,
                  id_vars=l_cols,
                  var_name='ds_week_ending',
                  value_name='cx_yhat')
    start = pd.to_datetime(cutoff_date) + pd.to_timedelta(
        7, unit='D')  # cutoff is a week_ending date. Go to the next week
    dfm['ds_week_ending'] = pd.to_datetime(dfm['ds_week_ending'].values)
    dfm = dfm[(dfm['ds_week_ending'] >= start)
              & (dfm['ds_week_ending'] <= horizon_date)].copy()

    _ = p_ut.clean_cols(
        dfm, ['sector', 'language'],
        '~/my_repos/capacity_planning/data/config/col_values.json',
        check_new=False)
    dfm['cx_yhat'] = dfm['cx_yhat'].apply(lambda x: to_float(x))
    p_df = pd.pivot_table(dfm,
                          index=['ds_week_ending', 'language', 'sector'],
                          values='cx_yhat',
                          columns=['type']).reset_index()
    c_df = p_df[p_df['sector'].isin([
        'Claims', 'Community Education', 'Experiences', 'PST', 'Payments',
        'Regulatory Response', 'Resolutions 1', 'Resolutions 2', 'Safety'
    ])].copy()
    c_df.fillna(0, inplace=True)

    # language level: no language level agg for FTE
    if 'inbound-vol' in ts_name:
        g_df = c_df.groupby(['ds_week_ending', 'language']).agg({

コード例 #5

ファイルを表示

    end = time.time()
    s_ut.my_print('imputer:  reduce secs: ' + str(end - start))
    return z_all


if __name__ == '__main__':
    # df = pd.DataFrame({
    #     'a': [1,2,np.nan, 4, 5, 6, 7],
    #     'b': ['x', 'y', None, None, 'z', 'x', 'z'],
    #     'c': [1.5, 2.3, 5.2, 3, None, np.nan, 5.2],
    #     'ds': ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun'],
    #     'd': [None, None, 'a', 'bb', 'a', 'a', 'bb']}
    # )
    # ex_cols_ = ['ds']
    df = pd.read_parquet('~/my_tmp/phone-aht_2019-11-23.par')
    from capacity_planning.utilities import pandas_utils as p_ut

    new_vals = p_ut.clean_cols(
        df, ["service_region", "language", "sector", "interaction_type"],
        '~/my_repos/capacity_planning/data/config/col_values.json',
        check_new=True,
        do_nan=True)
    ex_cols_ = ['ds', 'agent_id']
    df['tenure_days'] = df['tenure_days'].apply(lambda x: x
                                                if x > 0 else np.nan)
    # df = df[df['ds'] >= '2019-06-01'].copy()
    print(df.head(10))
    zz = impute(df, ex_cols=ex_cols_)
    print(zz.head(10))
    print('DONE')

コード例 #6

ファイルを表示

ファイル: rolling.py プロジェクト: josepm/FB_Prophet

    # avg_fdf['initiative'] = False
    # s_ut.my_print('saving raw fcast data to ' + fcast_f)
    # p_ut.save_df(avg_fdf, fcast_f)   # ONLY forecasted data

    # accuracy for the <months> months old raw forecast
    adf = errs.get_actuals(cutoff_date)  # raw actuals up to cutoff_date
    window = 4
    fcast_file = errs.get_fcast_file(
        cutoff_date,
        '~/Forecasts/rolling/par/raw_r_fcast_' + str(window) + '_',
        months=3)  # file path from <months> months old forecast from cutoff
    fdf_obj = ts.TicketForecast(fcast_file)  # fcast obj from 3 months ago
    fdf = fdf_obj.data
    p_ut.clean_cols(fdf,
                    ["language", "service_tier", "channel", "business_unit"],
                    '~/my_repos/capacity_planning/data/config/col_values.json',
                    check_new=False,
                    do_nan=False,
                    rename=True)
    fdf.rename(columns={'ticket_count': 'forecasted_count'}, inplace=True)
    # fdf['ds_week_ending'] = pd.to_datetime(fdf['ds_week_ending'])
    # fdf['ds'] = fdf['ds_week_ending'] - pd.to_timedelta(6, unit='D')
    if fdf is None or adf is None:
        lang_errs, tier_errs = None, None
    else:
        s_ut.my_print('Error wrt actuals for an old forecast')
        lang_errs, tier_errs, off_df = errs.get_errs(
            cutoff_date, fdf_obj, adf,
            tcol='ds')  # errs on filtered actuals for old raw fcast
        lang_errs['adj'] = False
        lang_errs['initiative'] = False
        tier_errs['adj'] = False