Ejemplo n.º 1
0
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return X.astype(self.dtype)


if __name__ == '__main__':
    config = get_config()

    target = config['target']
    categorical_predictors = config['cat_predictors']
    numerical_predictors = config['num_predictors']

    diw_path = os.path.join(project_directory(), config['diw_path'], 'diw.txt')

    diw_df = pd.read_csv(diw_path, sep=config['data_sep'])

    num_pipeline = Pipeline([('selector',
                              DataFrameSelector(numerical_predictors)),
                             ('imputer', Imputer()),
                             ('scaler', MinMaxScaler())])
    # The cat vars we have now don't require imputing
    cat_pipeline = Pipeline([('selector',
                              DataFrameSelector(categorical_predictors)),
                             ('label_encoder', MultiColumnLabelEncoder()),
                             ('one_hot_encoder', OneHotEncoder(sparse=False))])
    target_pipeline = Pipeline([
        ('selector', DataFrameSelector(target)),
        ('dtype_transform', DataTypeTransformer(np.int8)),
Ejemplo n.º 2
0
import keras

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import classification_report, auc, roc_curve

import numpy as np
import pandas as pd

from dirutil import project_directory
from configfile import get_config
from transformer import clean_nulls, SamplerFactory

if __name__ == '__main__':
    sample_method = 'under'
    project_path = project_directory()
    config = get_config('standard_model')
    train_pct = .9

    performance_data_path = os.path.join(project_path, config['diw_path'],
                                         'diw.txt')

    performance_data = pd.read_csv(performance_data_path,
                                   sep=config['data_sep'])

    target = config['target']
    predictors = config['predictors']

    # Pull out the predictors & target
    model_data = performance_data[predictors + [target]]
Ejemplo n.º 3
0
def get_config(filename='default'):
    config_file = filename + '.json'
    config_path = os.path.join(project_directory(), 'config', config_file)
    with open(config_path, 'r') as cfg_file:
        config = json.loads(cfg_file.read())
    return config