예제 #1
0
    def __init__(self, pipeline_):
        train = pd.read_csv('data/train.csv', index_col=0)
        test = pd.read_csv('data/test.csv', index_col=0)

        add_features = AddFeatures(train, test)
        add_features.add_bmi_sist_dist_map()
        add_features.add_f_score()
        add_features.add_ap_features()
        add_features.del_features()

        train = add_features.train
        test = add_features.test

        self.Y = train['cardio'].values
        train.drop('cardio', axis=1, inplace=True)
        self.X = train
        self.test = test

        self.pipeline = pipeline_
        self.model = None
        self.kf = None

        self.results = pd.DataFrame()
예제 #2
0
from sklearn.ensemble import RandomForestClassifier
from hyperopt import fmin, hp, STATUS_OK, Trials, tpe
import warnings
from FEATURES import AddFeatures

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 16)
pd.set_option('display.width', 1000)
np.random.seed(42)

train = pd.read_csv('data/train.csv', index_col=0)
test = pd.read_csv('data/test.csv', index_col=0)

add_features = AddFeatures(train, test)
add_features.add_bmi_sist_dist_map()
add_features.add_f_score()

train = add_features.train
test = add_features.test

Y = train['cardio'].values
train.drop('cardio', axis=1, inplace=True)
X = train.as_matrix()


def hyperopt_train_test(hpparams):

    params_est = {
        'n_estimators': int(hpparams['n_estimators']),
        'criterion': 'gini',
        'max_features': hpparams['max_features'],