예제 #1
0
파일: 05-keras.py 프로젝트: thoolihan/porto
model.add(Dense(units=64, input_dim=n))
model.add(Activation('relu'))
model.add(Dropout(cfg["dropout"]))

model.add(Dense(units=64, input_dim=n))
model.add(Activation('relu'))
model.add(Dropout(cfg["dropout"]))

model.add(Dense(units=1))
model.add(Activation('sigmoid'))

model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

logger.info("Fitting model on X_train...")
model.fit(X_train, y_train, epochs=cfg["epochs"], batch_size=cfg["batch_size"])

logger.info("Predicting on X_val...")
results_val = model.predict(X_val)
score = gini_normalized(y_val, results_val)
logger.info("normalized gini score on validation set is {}".format(score))

logger.info("Loading and predicting on Test set...")
test = load_file("test")
test["bias"] = 1
X_test = pipe.transform(test)
results_test = model.predict(X_test)
test['target'] = results_test
write_submission_file(test, columns=['target'], name='keras-v1')

logger.info("Finished with time {}".format(datetime.now() - start))
예제 #2
0
X = train.drop('target', axis = 1)
drop_cols = drop_cols(X, names = True)
X.drop(drop_cols, axis = 1, inplace = True)
y = train.target
cat_columns = get_cat_features_idx(X)

logger.info("Making Ensemble...")
classifiers = [('xgb', XGBClassifier(learning_rate=0.07, reg_alpha=8, reg_lambda=0.75, max_depth=4, n_estimators = 800, gamma = 3)),
               ('lgbm', LGBMClassifier(learning_rate = 0.018, max_depth = 6, num_leaves = 11, colsample_bytree=0.85)),
               ('rf', RandomForestClassifier(n_estimators = 200, criterion = 'gini'))]

model = Pipeline([('impute', Imputer(missing_values = -1, strategy = "most_frequent")),
                  ('encode', OneHotEncoder(categorical_features=cat_columns, handle_unknown = 'ignore')),
                  ('ensemble', VotingClassifier(estimators = classifiers, voting = 'soft'))])

logger.info("Fitting model on X...")
model.fit(X, y)

logger.info("Predicting score (w/Cross-Val) on X...")
results = cross_val_predict(model, X, y, cv = 3, method = 'predict_proba')[:, 1]
score = gini_normalized(y, results)
logger.info("normalized gini score on training set is {}".format(score))

logger.info("Loading and predicting on Test set...")
test = load_file("test")
test.drop(drop_cols, axis = 1, inplace = True)
test['target'] = model.predict_proba(test)[:, 1]
write_submission_file(test, columns = ['target'], name = 'ensemble-v1')

logger.info("Finished with time {}".format(datetime.now() - start))