Esempio n. 1
0
File: ease.py Progetto: andvikt/woe
def process(data_in, config_features='config'):
    '''
    process data_in CSV with woe transformations
    :config_features = 'config.csv'
    :data_in 
    :data_out
    :config_woe=None
    :return: None
    '''

    config_path = os.getcwd() + '/' + config_features + '.csv'
    data_path = os.getcwd() + '/' + data_in + '.csv'
    feature_detail_path = os.getcwd() + '/' + data_in + '_stats_wide' + '.csv'
    rst_pkl_path = os.getcwd() + '/woe_rule.pkl'
    # train woe rule
    feature_detail, rst = fp.process_train_woe(
        infile_path=data_path,
        outfile_path=feature_detail_path,
        rst_path=rst_pkl_path,
        config_path=config_path)

    # proc woe transformation
    woe_train_path = os.getcwd() + '/' + data_in + '_out' + '.csv'
    fp.process_woe_trans(data_path, rst_pkl_path, woe_train_path, config_path)

    #make short statistics in excel
    stats = (feature_detail.groupby('var_name').agg({
        'iv': max
    }).sort_values(['iv'], ascending=False))
    writer = pd.ExcelWriter(data_in + '_stats.xlsx')
    stats.to_excel(writer, 'Short')
    feature_detail.to_excel(writer, 'Full')
    writer.save()

    return feature_detail
Esempio n. 2
0
def woe(data_path):
    config_path = config(data_path)
    data_path = data_path
    feature_detail_path = output_dir + '\\features_detail.csv'
    rst_pkl_path = output_dir + '\\woe_rule.pkl'
    # train Weight of Evidence rule
    feature_detail, rst = fp.process_train_woe(
        infile_path=data_path,
        outfile_path=feature_detail_path,
        config_path=config_path)
    return feature_detail
    logging.info("WOE和IV计算完成")
Esempio n. 3
0
    def fit(self, X, y):
        """Fit the WOE model on the given data.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            The training input samples.
        y : array-like, shape = [n_samples]
            The target values.
        """
        data = X.copy()
        data['target'] = y

        with open(os.devnull, 'w') as devnull:
            with contextlib.redirect_stdout(devnull):
                fp.process_train_woe(
                    data,
                    FEATURE_DETAILS_OUTPUT,
                    self.save_woe_pickle_filepath,
                    self.config_filepath,
                    self.min_sample_weight_config)

        self.load_woe_pickle_filepath = self.save_woe_pickle_filepath
Esempio n. 4
0
# -*- coding:utf-8 -*-
__author__ = 'boredbird'
import os
import numpy as np
import woe.feature_process as fp
import woe.GridSearch as gs

if __name__ == '__main__':
    config_path = os.getcwd() + '\\config.csv'
    data_path = os.getcwd() + '\\UCI_Credit_Card.csv'
    feature_detail_path = os.getcwd() + '\\features_detail.csv'
    rst_pkl_path = os.getcwd() + '\\woe_rule.pkl'
    # train woe rule
    feature_detail, rst = fp.process_train_woe(
        infile_path=data_path,
        outfile_path=feature_detail_path,
        rst_path=rst_pkl_path,
        config_path=config_path)
    # proc woe transformation
    woe_train_path = os.getcwd() + '\\dataset_train_woed.csv'
    fp.process_woe_trans(data_path, rst_pkl_path, woe_train_path, config_path)
    # here i take the same dataset as test dataset
    woe_test_path = os.getcwd() + '\\dataset_test_woed.csv'
    fp.process_woe_trans(data_path, rst_pkl_path, woe_test_path, config_path)

    print('###TRAIN SCORECARD MODEL###')
    params = {}
    params['dataset_path'] = woe_train_path
    params['validation_path'] = woe_test_path
    params['config_path'] = config_path