def fit(self, X, y=None): return self def transform(self, X): return X.astype(self.dtype) if __name__ == '__main__': config = get_config() target = config['target'] categorical_predictors = config['cat_predictors'] numerical_predictors = config['num_predictors'] diw_path = os.path.join(project_directory(), config['diw_path'], 'diw.txt') diw_df = pd.read_csv(diw_path, sep=config['data_sep']) num_pipeline = Pipeline([('selector', DataFrameSelector(numerical_predictors)), ('imputer', Imputer()), ('scaler', MinMaxScaler())]) # The cat vars we have now don't require imputing cat_pipeline = Pipeline([('selector', DataFrameSelector(categorical_predictors)), ('label_encoder', MultiColumnLabelEncoder()), ('one_hot_encoder', OneHotEncoder(sparse=False))]) target_pipeline = Pipeline([ ('selector', DataFrameSelector(target)), ('dtype_transform', DataTypeTransformer(np.int8)),
import keras from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler, StandardScaler from sklearn.metrics import classification_report, auc, roc_curve import numpy as np import pandas as pd from dirutil import project_directory from configfile import get_config from transformer import clean_nulls, SamplerFactory if __name__ == '__main__': sample_method = 'under' project_path = project_directory() config = get_config('standard_model') train_pct = .9 performance_data_path = os.path.join(project_path, config['diw_path'], 'diw.txt') performance_data = pd.read_csv(performance_data_path, sep=config['data_sep']) target = config['target'] predictors = config['predictors'] # Pull out the predictors & target model_data = performance_data[predictors + [target]]
def get_config(filename='default'): config_file = filename + '.json' config_path = os.path.join(project_directory(), 'config', config_file) with open(config_path, 'r') as cfg_file: config = json.loads(cfg_file.read()) return config