Example #1
0
File: ease.py Project: andvikt/woe
def process(data_in, config_features='config'):
    '''
    process data_in CSV with woe transformations
    :config_features = 'config.csv'
    :data_in 
    :data_out
    :config_woe=None
    :return: None
    '''

    config_path = os.getcwd() + '/' + config_features + '.csv'
    data_path = os.getcwd() + '/' + data_in + '.csv'
    feature_detail_path = os.getcwd() + '/' + data_in + '_stats_wide' + '.csv'
    rst_pkl_path = os.getcwd() + '/woe_rule.pkl'
    # train woe rule
    feature_detail, rst = fp.process_train_woe(
        infile_path=data_path,
        outfile_path=feature_detail_path,
        rst_path=rst_pkl_path,
        config_path=config_path)

    # proc woe transformation
    woe_train_path = os.getcwd() + '/' + data_in + '_out' + '.csv'
    fp.process_woe_trans(data_path, rst_pkl_path, woe_train_path, config_path)

    #make short statistics in excel
    stats = (feature_detail.groupby('var_name').agg({
        'iv': max
    }).sort_values(['iv'], ascending=False))
    writer = pd.ExcelWriter(data_in + '_stats.xlsx')
    stats.to_excel(writer, 'Short')
    feature_detail.to_excel(writer, 'Full')
    writer.save()

    return feature_detail
Example #2
0
    def transform(self, X, y=None):
        """Transform the X features into WOE features.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            The training input samples.
        y : array-like, shape = [n_samples]
            The target values.
        """
        if self.config_filepath is None:
            raise ValueError("Config filepath does not exist."
                             " Please define a config file first.")

        if self.load_woe_pickle_filepath is None:
            raise ValueError("Load WOE pickle filepath does not exist."
                             " Either fit model or load model first.")

        data = X.copy()
        data['target'] = -1

        # with open(os.devnull, 'w') as devnull:
        #     with contextlib.redirect_stdout(devnull):
        df_transformed = fp.process_woe_trans(
            data,
            self.load_woe_pickle_filepath,
            self.config_filepath)

        X_processed = df_transformed.drop('target', axis=1)

        return X_processed
Example #3
0
import woe.GridSearch as gs

if __name__ == '__main__':
    config_path = os.getcwd() + '\\config.csv'
    data_path = os.getcwd() + '\\UCI_Credit_Card.csv'
    feature_detail_path = os.getcwd() + '\\features_detail.csv'
    rst_pkl_path = os.getcwd() + '\\woe_rule.pkl'
    # train woe rule
    feature_detail, rst = fp.process_train_woe(
        infile_path=data_path,
        outfile_path=feature_detail_path,
        rst_path=rst_pkl_path,
        config_path=config_path)
    # proc woe transformation
    woe_train_path = os.getcwd() + '\\dataset_train_woed.csv'
    fp.process_woe_trans(data_path, rst_pkl_path, woe_train_path, config_path)
    # here i take the same dataset as test dataset
    woe_test_path = os.getcwd() + '\\dataset_test_woed.csv'
    fp.process_woe_trans(data_path, rst_pkl_path, woe_test_path, config_path)

    print('###TRAIN SCORECARD MODEL###')
    params = {}
    params['dataset_path'] = woe_train_path
    params['validation_path'] = woe_test_path
    params['config_path'] = config_path

    params['df_coef_path'] = os.getcwd() + '\\df_model_coef_path.csv'
    params['pic_coefpath'] = os.getcwd() + '\\model_coefpath.png'
    params['pic_performance'] = os.getcwd() + '\\model_performance_path.png'
    params['pic_coefpath_title'] = 'model_coefpath'
    params['pic_performance_title'] = 'model_performance_path'