def __init__(self, pipeline_): train = pd.read_csv('data/train.csv', index_col=0) test = pd.read_csv('data/test.csv', index_col=0) add_features = AddFeatures(train, test) add_features.add_bmi_sist_dist_map() add_features.add_f_score() add_features.add_ap_features() add_features.del_features() train = add_features.train test = add_features.test self.Y = train['cardio'].values train.drop('cardio', axis=1, inplace=True) self.X = train self.test = test self.pipeline = pipeline_ self.model = None self.kf = None self.results = pd.DataFrame()
from sklearn.ensemble import RandomForestClassifier from hyperopt import fmin, hp, STATUS_OK, Trials, tpe import warnings from FEATURES import AddFeatures warnings.filterwarnings('ignore') pd.set_option('display.max_columns', 16) pd.set_option('display.width', 1000) np.random.seed(42) train = pd.read_csv('data/train.csv', index_col=0) test = pd.read_csv('data/test.csv', index_col=0) add_features = AddFeatures(train, test) add_features.add_bmi_sist_dist_map() add_features.add_f_score() train = add_features.train test = add_features.test Y = train['cardio'].values train.drop('cardio', axis=1, inplace=True) X = train.as_matrix() def hyperopt_train_test(hpparams): params_est = { 'n_estimators': int(hpparams['n_estimators']), 'criterion': 'gini', 'max_features': hpparams['max_features'],