def process_woe_trans(in_data_path=None,
                      rst_path=None,
                      out_path=None,
                      config_path=None):
    cfg = config.config()
    cfg.load_file(config_path, in_data_path)

    for var in [
            tmp for tmp in cfg.bin_var_list
            if tmp in list(cfg.dataset_train.columns)
    ]:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(), (var)] = -1

    for var in [
            tmp for tmp in cfg.discrete_var_list
            if tmp in list(cfg.dataset_train.columns)
    ]:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(),
                              (var)] = 'missing'

    fp.change_feature_dtype(cfg.dataset_train, cfg.variable_type)

    output = open(rst_path, 'rb')
    rst = pickle.load(output)
    output.close()

    # Training dataset Woe Transformation
    for r in rst:
        cfg.dataset_train[r.var_name] = fp.woe_trans(
            cfg.dataset_train[r.var_name], r)

    cfg.dataset_train.to_csv(out_path)
def process_woe_trans(in_data_path=None,rst_path=None):
    config_path = r'E:\Code\Python_ML_Code\cs_model\config\config_cs_model_201705.csv'
    data_path = in_data_path
    cfg = config.config()
    cfg.load_file(config_path, data_path)

    cfg.dataset_train = cfg.dataset_train.rename(columns={'cs_cpd':'cpd'}) # rename
    # dataset['raw_cs_cpd'] = dataset['cs_cpd']

    for var in [tmp for tmp in cfg.bin_var_list if tmp in list(cfg.dataset_train.columns)]:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(), (var)] = -1

    for var in [tmp for tmp in cfg.discrete_var_list if tmp in list(cfg.dataset_train.columns)]:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(), (var)] = 'missing'

    fp.change_feature_dtype(cfg.dataset_train, cfg.variable_type)

    output = open(rst_path, 'rb')
    rst = pickle.load(output)
    output.close()

    # Training dataset Woe Transformation
    for r in rst:
        cfg.dataset_train[r.var_name] = fp.woe_trans(cfg.dataset_train[r.var_name], r)

    return cfg.dataset_train
Ejemplo n.º 3
0
def proc_validattion(dataset_path,config_path,model_path):
    print '####PROC VALIDATION#####'
    print 'dataset_path:\n',dataset_path
    print 'config_path:\n',config_path
    print 'model_path:\n',model_path
    #fillna
    config_path = r'E:\Code\Python_ML_Code\cs_model\config\config_cs_model.csv'
    cfg = config.config()
    cfg.load_file(config_path, dataset_path)

    for var in [tmp for tmp in cfg.bin_var_list if tmp in list(cfg.dataset_train.columns)]:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(), (var)] = 0

    for var in [tmp for tmp in cfg.discrete_var_list if tmp in list(cfg.dataset_train.columns)]:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(), (var)] = 0

    output = open(model_path, 'rb')
    clf_model = pickle.load(output)
    output.close()

    clf = clf_model['clf']
    X_test = cfg.dataset_train[clf_model['features_list']]
    y_test = cfg.dataset_train['target']

    y_hat = clf.predict_proba(X_test)[:,1]
    ks = compute_ks(y_hat,y_test)
    print 'global_bt:',cfg.global_bt
    print 'global_gt:', cfg.global_gt
    print 'ks:',ks
    return ks
Ejemplo n.º 4
0
def process_woe_trans(dataset, rst_path=None, config_path=None):
    ''' Process WOE features from fitted InfoValues

    Parameters
    ----------
    dataset : Pandas dataframe to be processed
    rst_path : Path to list of InfoValue pickle object
    config_path : Path to read config file from.

    Return
    ------
    dataset_transformed : Pandas dataframe that is transformed
    '''
    # Load config
    cfg = config.config()
    cfg.load_file(config_path)
    cfg.set_dataset(dataset)

    # Prepare variable list
    bin_var_list = [
        tmp for tmp in cfg.bin_var_list
        if tmp in list(cfg.dataset_train.columns)
    ]
    discrete_var_list = [
        tmp for tmp in cfg.discrete_var_list
        if tmp in list(cfg.dataset_train.columns)
    ]

    # Impute missing values
    for var in bin_var_list:
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(), (var)] = -1

    for var in discrete_var_list:
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(),
                              (var)] = 'missing'

    # Cast dataframe dtypes
    change_feature_dtype(cfg.dataset_train, cfg.variable_type)

    # Load fitted InfoValues
    with open(rst_path, 'rb') as f:
        rst = pickle.load(f)

    # Training dataset Woe Transformation
    for r in rst:
        cfg.dataset_train[r.var_name] = woe_trans(
            cfg.dataset_train[r.var_name], r)

    return cfg.dataset_train.copy()
def process_woe_trans(in_data_path=None, rst_path=None, out_path=None):
    print time.asctime(time.localtime(time.time())), 'load config file'
    config_path = r'E:\Code\Python_ML_Code\cs_model\config\config_cs_daily_model.csv'
    data_path = in_data_path
    cfg = config.config()
    cfg.load_file(config_path, data_path)

    print time.asctime(time.localtime(time.time())), 'fill na'
    dataset = pd.read_csv(in_data_path)

    print time.asctime(time.localtime(
        time.time())), 'fill na continuous variables'
    for var in [
            tmp for tmp in cfg.bin_var_list if tmp in list(dataset.columns)
    ]:
        # fill null
        dataset.loc[dataset[var].isnull(), (var)] = -1

    print time.asctime(time.localtime(
        time.time())), 'fill na discrete variables'
    for var in [
            tmp for tmp in cfg.discrete_var_list
            if tmp in list(dataset.columns)
    ]:
        # fill null
        dataset.loc[dataset[var].isnull(), (var)] = 'missing'

    print time.asctime(time.localtime(time.time())), 'change feature dtypes'
    fp.change_feature_dtype(dataset, cfg.variable_type)

    print time.asctime(time.localtime(time.time())), 'load woe rule'
    output = open(rst_path, 'rb')
    rst = pickle.load(output)
    output.close()

    # Training dataset Woe Transformation
    for r in rst:
        print 'woe trans:', r.var_name
        dataset[r.var_name] = fp.woe_trans(dataset[r.var_name], r)

    dataset.to_csv(out_path, index=False)
    print('%s\tSUCCESS EXPORT FILE: \n%s' %
          (time.asctime(time.localtime(time.time())), out_path))
Ejemplo n.º 6
0
def process_train_woe(infile_path=None,outfile_path=None,rst_path=None):
    print 'run into process_train_woe: \n',time.asctime(time.localtime(time.time()))
    config_path = 'E:\\Code\\Python_ML_Code\\cs_model\\config\\config_cs_model_pos_m2.csv'
    data_path = infile_path
    cfg = config.config()
    cfg.load_file(config_path,data_path)
    bin_var_list = [tmp for tmp in cfg.bin_var_list if tmp in list(cfg.dataset_train.columns)]

    for var in bin_var_list:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(), (var)] = -1

    # change feature dtypes
    fp.change_feature_dtype(cfg.dataset_train, cfg.variable_type)
    rst = []

    # process woe transformation of continuous variables
    print 'process woe transformation of continuous variables: \n',time.asctime(time.localtime(time.time()))
    print 'cfg.global_bt',cfg.global_bt
    print 'cfg.global_gt', cfg.global_gt

    for var in bin_var_list:
        rst.append(fp.proc_woe_continuous(cfg.dataset_train,var,cfg.global_bt,cfg.global_gt,cfg.min_sample,alpha=0.05))

    # process woe transformation of discrete variables
    print 'process woe transformation of discrete variables: \n',time.asctime(time.localtime(time.time()))
    for var in [tmp for tmp in cfg.discrete_var_list if tmp in list(cfg.dataset_train.columns)]:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(), (var)] = 'missing'
        rst.append(fp.proc_woe_discrete(cfg.dataset_train,var,cfg.global_bt,cfg.global_gt,cfg.min_sample,alpha=0.05))

    feature_detail = eval.eval_feature_detail(rst, outfile_path)

    print 'save woe transformation rule into pickle: \n',time.asctime(time.localtime(time.time()))
    output = open(rst_path, 'wb')
    pickle.dump(rst,output)
    output.close()

    return feature_detail,rst
Ejemplo n.º 7
0
# -*- coding:utf-8 -*-
__author__ = 'maomaochong'
import pandas as pd
import woe.feature_process as fp
import woe.config as config

dataset_train_path1 = r'E:\work_file\mmt_application_card\raw_data\mmt_application_model_feature_ftrain1.csv'
config_path = r'E:\work_file\mmt_application_card\config\config_mmt_application_model.csv'

dataset = pd.read_csv(dataset_train_path1)
var = 'data_status'
dataset.loc[dataset[var].isnull(), (var)] = 'missing'
cfg = config.config()
cfg.load_file(config_path,dataset_train_path1)

print 'cfg.global_bt',cfg.global_bt
print 'cfg.global_gt',cfg.global_gt
print 'cfg.min_sample',cfg.min_sample
# rst = fp.proc_woe_discrete(dataset,var,cfg.global_bt,cfg.global_gt,cfg.min_sample,alpha=0.05)

var = 'pos_sales_commission'
fp.proc_woe_continuous(dataset,var,cfg.global_bt,cfg.global_gt,cfg.min_sample,alpha=0.05)

var = 'pos_dd_fail_cnt'
fp.proc_woe_continuous(dataset,var,cfg.global_bt,cfg.global_gt,cfg.min_sample,alpha=0.05)
Ejemplo n.º 8
0
def process_train_woe(dataset,
                      outfile_path=None,
                      rst_path=None,
                      config_path=None,
                      min_sample_weight_config=None):
    ''' Process training data for WOE

    Parameters
    ----------
    dataset : Pandas dataframe of training dataset. Includes 'target' column.
    outfile_path : Path for WOE feature details output.
    rst_path : Path for WOE InfoValue object output.
    config_path : Path to read config file from.
    min_sample_weight : Adjust the percentage of samples required for leaf.

    Return
    ------
    feature_detail : WOE feature details
    rst : List of InfoValue instances
    '''
    # Load config
    cfg = config.config()
    cfg.load_file(config_path)
    cfg.set_dataset(dataset)
    cfg.load_min_sample_weight_config(min_sample_weight_config)
    print(cfg.min_sample_weight_config)

    # Prepare variable list
    bin_var_list = [
        tmp for tmp in cfg.bin_var_list
        if tmp in list(cfg.dataset_train.columns)
    ]

    discrete_var_list = [
        tmp for tmp in cfg.discrete_var_list
        if tmp in list(cfg.dataset_train.columns)
    ]

    # Impute missing values for features to be binned
    for var in bin_var_list:
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(), (var)] = -1

    # Cast feature dtypes
    change_feature_dtype(cfg.dataset_train, cfg.variable_type)

    # Process woe transformation of continuous variables
    rst = []
    for var in bin_var_list:
        iv_obj = proc_woe_continuous(cfg.dataset_train,
                                     var,
                                     cfg.global_bt,
                                     cfg.global_gt,
                                     cfg.get_min_sample(var),
                                     alpha=0.05)
        rst.append(iv_obj)

    # Process woe transformation of discrete variables
    for var in discrete_var_list:
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(),
                              (var)] = 'missing'
        iv_obj = proc_woe_discrete(cfg.dataset_train,
                                   var,
                                   cfg.global_bt,
                                   cfg.global_gt,
                                   cfg.get_min_sample(var),
                                   alpha=0.05)
        rst.append(iv_obj)

    feature_detail = eval.eval_feature_detail(rst, outfile_path)

    # Write list of InfoValue instances to output path
    with open(rst_path, 'wb') as f:
        pickle.dump(rst, f)

    return feature_detail, rst
Ejemplo n.º 9
0
def process_train_woe(infile_path=None,
                      outfile_path=None,
                      rst_path=None,
                      config_path=None):
    print('run into process_train_woe: ',
          time.asctime(time.localtime(time.time())))
    data_path = infile_path
    cfg = config.config()
    cfg.load_file(config_path, data_path)
    bin_var_list = [
        tmp for tmp in cfg.bin_var_list
        if tmp in list(cfg.dataset_train.columns)
    ]

    for var in bin_var_list:
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(), (var)] = -1

    change_feature_dtype(cfg.dataset_train, cfg.variable_type)
    rst = []

    print('process woe transformation of continuous variables: ',
          time.asctime(time.localtime(time.time())))
    print('cfg.global_bt', cfg.global_bt)
    print('cfg.global_gt', cfg.global_gt)
    # 处理连续变量
    for var in bin_var_list:
        rst.append(
            proc_woe_continuous(cfg.dataset_train,
                                var,
                                cfg.global_bt,
                                cfg.global_gt,
                                cfg.min_sample,
                                alpha=0.05))

    # 处理离散变量
    print('process woe transformation of discrete variables: ',
          time.asctime(time.localtime(time.time())))
    for var in [
            tmp for tmp in cfg.discrete_var_list
            if tmp in list(cfg.dataset_train.columns)
    ]:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(),
                              (var)] = 'missing'
        rst.append(
            proc_woe_discrete(cfg.dataset_train,
                              var,
                              cfg.global_bt,
                              cfg.global_gt,
                              cfg.min_sample,
                              alpha=0.05))

    feature_detail = eval.eval_feature_detail(rst, outfile_path)

    print('save woe transformation rule into pickle: ',
          time.asctime(time.localtime(time.time())))
    output = open(rst_path, 'wb')
    pickle.dump(rst, output)
    output.close()

    return feature_detail, rst
def process_train_woe(infile_path=None, outfile_path=None, rst_path=None):
    print 'run into process_train_woe: \n', time.asctime(
        time.localtime(time.time()))
    config_path = r'E:\Code\Python_ML_Code\cs_model\config\config_cs_daily_model_lr.csv'
    data_path = infile_path
    cfg = config.config()
    cfg.load_file(config_path, data_path)

    # rst = []
    output = open(rst_path, 'rb')
    rst = pickle.load(output)
    output.close()

    exists_var_list = [rst[i].var_name for i in range(rst.__len__())]
    bin_var_list = [
        tmp for tmp in cfg.bin_var_list if
        tmp in list(cfg.dataset_train.columns) and tmp not in exists_var_list
    ]

    for var in bin_var_list:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(), (var)] = -1

    # change feature dtypes
    fp.change_feature_dtype(cfg.dataset_train, cfg.variable_type)

    # process woe transformation of continuous variables
    print 'process woe transformation of continuous variables: \n', time.asctime(
        time.localtime(time.time()))
    print 'cfg.global_bt', cfg.global_bt
    print 'cfg.global_gt', cfg.global_gt

    for var in bin_var_list:
        print var
        if rst.__len__() == 0:
            pass
        else:
            output = open(rst_path, 'rb')
            rst = pickle.load(output)
            output.close()
            print 'load'
        rst.append(
            fp.proc_woe_continuous(cfg.dataset_train,
                                   var,
                                   cfg.global_bt,
                                   cfg.global_gt,
                                   cfg.min_sample,
                                   alpha=0.05))
        output = open(rst_path, 'wb')
        pickle.dump(rst, output)
        output.close()
        print 'dump'

    # process woe transformation of discrete variables
    print 'process woe transformation of discrete variables: \n', time.asctime(
        time.localtime(time.time()))
    for var in [
            tmp for tmp in cfg.discrete_var_list
            if tmp in list(cfg.dataset_train.columns)
            and tmp not in exists_var_list
    ]:
        print var
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(),
                              (var)] = 'missing'
        if rst.__len__() == 0:
            pass
        else:
            output = open(rst_path, 'rb')
            rst = pickle.load(output)
            output.close()
            print 'load'
        rst.append(
            fp.proc_woe_discrete(cfg.dataset_train,
                                 var,
                                 cfg.global_bt,
                                 cfg.global_gt,
                                 cfg.min_sample,
                                 alpha=0.05))
        output = open(rst_path, 'wb')
        pickle.dump(rst, output)
        output.close()
        print 'dump'

    feature_detail = eval.eval_feature_detail(rst, outfile_path)
    return feature_detail, rst
Ejemplo n.º 11
0
def process_train_woe(infile_path=None,
                      outfile_path=None,
                      rst_path=None,
                      config_path=None,
                      rebin_feature_path=None):
    print('run into process_train_woe: \n',
          time.asctime(time.localtime(time.time())))
    data_path = infile_path
    cfg = config.config()
    cfg.load_file(config_path, data_path, rebin_feature_path)
    bin_var_list = [
        tmp for tmp in cfg.bin_var_list
        if tmp in list(cfg.dataset_train.columns)
    ]
    orig_dataset_train = cfg.dataset_train

    # change feature dtypes
    change_feature_dtype(cfg.dataset_train, cfg.variable_type)
    rst = []

    print('cfg.global_bt', cfg.global_bt)
    print('cfg.global_gt', cfg.global_gt)
    print('cfg.global_categorical_missing', cfg.global_categorical_missing)
    print('cfg.global_numeric_missing', cfg.global_numeric_missing)

    # process woe transformation of continuous variables
    print('process woe transformation of continuous variables: \n',
          time.asctime(time.localtime(time.time())))
    for var in bin_var_list:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(),
                              (var)] = cfg.global_numeric_missing
        rst.append(
            proc_woe_continuous(cfg.dataset_train,
                                var,
                                cfg.global_bt,
                                cfg.global_gt,
                                cfg.min_sample,
                                cfg.global_numeric_missing,
                                cfg.global_categorical_missing,
                                alpha=0.05))

    # process woe transformation of continuous variables based on the re-binning logic provided
    print(
        'process woe transformation of continuous variables based on rebin logic: \n',
        time.asctime(time.localtime(time.time())))
    rebin_var_list = [
        tmp for tmp in cfg.rebin_var_list
        if tmp in list(cfg.dataset_train.columns)
    ]
    for var in rebin_var_list:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(),
                              (var)] = cfg.global_numeric_missing
        var_df = cfg.dataset_rebin.loc[cfg.dataset_rebin['var_name'] == var]
        split_list = list(np.unique(var_df[['split']].astype(float)))
        rst.append(
            proc_woe_continuous_rebin(cfg.dataset_train,
                                      var,
                                      split_list,
                                      cfg.global_bt,
                                      cfg.global_gt,
                                      cfg.min_sample,
                                      cfg.global_numeric_missing,
                                      cfg.global_categorical_missing,
                                      alpha=0.05))

    # process woe transformation of discrete variables
    print('process woe transformation of discrete variables: \n',
          time.asctime(time.localtime(time.time())))
    discrete_var_list = [
        tmp for tmp in cfg.discrete_var_list
        if tmp in list(cfg.dataset_train.columns)
    ]
    for var in [
            tmp for tmp in cfg.discrete_var_list
            if tmp in list(cfg.dataset_train.columns)
    ]:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(),
                              (var)] = cfg.global_categorical_missing
        rst.append(
            proc_woe_discrete(cfg.dataset_train,
                              var,
                              cfg.global_bt,
                              cfg.global_gt,
                              cfg.min_sample,
                              cfg.global_numeric_missing,
                              cfg.global_categorical_missing,
                              alpha=0.05))

    # process woe transformation of discrete variables based on re-binning logic
    print(
        'process woe transformation of discrete variables based on rebin logic: \n',
        time.asctime(time.localtime(time.time())))
    rebin_discrete_var_list = [
        tmp for tmp in cfg.rebin_discrete_var_list
        if tmp in list(cfg.dataset_train.columns)
    ]
    for var in [
            tmp for tmp in cfg.rebin_discrete_var_list
            if tmp in list(cfg.dataset_train.columns)
    ]:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(),
                              (var)] = cfg.global_categorical_missing
        var_df = cfg.dataset_rebin.loc[cfg.dataset_rebin['var_name'] == var]
        var_df.loc[:, 'split'] = var_df['split'].astype(object)
        rebin_list = list(np.unique(var_df[['split']]))
        rst.append(
            proc_woe_discrete_rebin(cfg.dataset_train,
                                    var,
                                    rebin_list,
                                    cfg.global_bt,
                                    cfg.global_gt,
                                    cfg.min_sample,
                                    cfg.global_numeric_missing,
                                    cfg.global_categorical_missing,
                                    alpha=0.05))

    feature_detail = woeeval.eval_feature_detail(rst, outfile_path)

    import pandas as pd
    pd.options.display.float_format = '{:.3f}'.format
    for var in bin_var_list + rebin_var_list + discrete_var_list + rebin_discrete_var_list:
        missing_obs = cfg.dataset_train.loc[cfg.dataset_train[var].isin(
            [cfg.global_numeric_missing,
             cfg.global_categorical_missing])].shape[0]
        print 'variable = ', var, '\t# obs = ', orig_dataset_train[var].shape[
            0], '\t# valid = ', (
                orig_dataset_train[var].shape[0] -
                missing_obs), '\t% valid = ', (
                    orig_dataset_train[var].shape[0] -
                    missing_obs) * 100.0 / (orig_dataset_train[var].shape[0])
        df = feature_detail.loc[feature_detail['var_name'] == var]
        print(df[[
            'split_list', 'sub_total_sample_num', 'positive_sample_num',
            'weight_positive_freq', 'weight_negative_freq',
            'perc_cum_weight_freq', 'perc_cum_weight_positive_freq',
            'perc_cum_weight_negative_freq', 'woe_list', 'iv_list', 'ks_list'
        ]])
        woeeval.plot_woe(df, var)

    s = 'summary of WOE transformation'
    print(s.center(60, '-'))
    smry_df = feature_detail[['var_name', 'iv', 'maxks', 'linearity'
                              ]].drop_duplicates().sort_values('iv',
                                                               ascending=False)
    print(smry_df)

    print('save woe transformation rule into pickle: \n',
          time.asctime(time.localtime(time.time())))
    output = open(rst_path, 'wb')
    pickle.dump(rst, output)
    output.close()

    return feature_detail, rst
Ejemplo n.º 12
0
def process_woe_trans(in_data_path=None,
                      rst_path=None,
                      out_path=None,
                      config_path=None,
                      rebin_feature_path=None):
    cfg = config.config()
    cfg.load_file(config_path, in_data_path, rebin_feature_path)

    for var in [
            tmp for tmp in cfg.bin_var_list
            if tmp in list(cfg.dataset_train.columns)
    ]:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(),
                              (var)] = cfg.global_numeric_missing

    rebin_var_list = [
        tmp for tmp in cfg.rebin_var_list
        if tmp in list(cfg.dataset_train.columns)
    ]
    for var in rebin_var_list:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(),
                              (var)] = cfg.global_numeric_missing

    for var in [
            tmp for tmp in cfg.discrete_var_list
            if tmp in list(cfg.dataset_train.columns)
    ]:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(),
                              (var)] = cfg.global_categorical_missing

    rebin_discrete_var_list = [
        tmp for tmp in cfg.rebin_discrete_var_list
        if tmp in list(cfg.dataset_train.columns)
    ]
    for var in [
            tmp for tmp in cfg.rebin_discrete_var_list
            if tmp in list(cfg.dataset_train.columns)
    ]:
        # fill null
        cfg.dataset_train.loc[cfg.dataset_train[var].isnull(),
                              (var)] = cfg.global_categorical_missing
        # first use the split values from features_rebin to re-bin the existing dataset, that way the WOE transformation is applied correctly
        rebin_var_df = cfg.dataset_rebin.loc[cfg.dataset_rebin['var_name'] ==
                                             var]
        rebin_var_df['split'] = rebin_var_df['split'].astype(object)
        rebin_list = list(np.unique(rebin_var_df[['split']]))
        for rebin_val in rebin_list:
            cfg.dataset_train.loc[cfg.dataset_train[var].isin(eval(rebin_val)),
                                  (var)] = str(rebin_val).strip('[]').replace(
                                      '\'', '')

    change_feature_dtype(cfg.dataset_train, cfg.variable_type)

    output = open(rst_path, 'rb')
    rst = pickle.load(output)
    output.close()

    # Training dataset Woe Transformation
    for r in rst:
        cfg.dataset_train[r.var_name] = woe_trans(
            cfg.dataset_train[r.var_name], r)
    # Output to a csv
    cfg.dataset_train.to_csv(out_path)