Beispiel #1
0
data_path = "data/"
X_train, X_test, Y_train, Y_test = get_all_data(data_path)
"""
Define model
"""
# model = RandomForestClassifier()
model = RandomForestClassifier(n_estimators=800, oob_score='TRUE')
"""
Crossvalidation
"""
detect_with_cross_validation(model, X_train, Y_train)
"""
Detecting
"""
model.fit(X_train, Y_train)
detect(model, X_test, Y_test)

# param = {
#     'n_estimators': [500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400]
# }
# kflod = StratifiedKFold(n_splits=10)
# grid_search = GridSearchCV(estimator=model, param_grid=param, scoring='accuracy', cv=kflod)
# grid_result = grid_search.fit(X_train, Y_train)
# print("Best: %f using %s" % (grid_result.best_score_, grid_search.best_params_))
#
# means = grid_result.cv_results_['mean_test_score']
# params = grid_result.cv_results_['params']
# for mean, param in zip(means,params):
#     print("%f  with:   %r" % (mean, param))

pyplot.bar(range(len(model.feature_importances_)), model.feature_importances_)
Beispiel #2
0

"""
Crossvalidation
"""
detect_with_cross_validation(clf, X_train, y_train)

"""
Learning
"""
clf.fit(X_train, y_train)

"""
Detect
"""
detect(clf, X_test, y_test)

# score = clf.score(X_test, y_test)
# print score


# # iterate over classifiers
# for clf in classifiers:
#     # ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
#     clf.fit(X_train, y_train)
#     score = clf.score(X_test, y_test)
#     # clf.fit(X, y)
#     # score = clf.score(X_test_, y_test_)
#     print score

#     subsample=0.8,
#     colsample_bytree=0.8,
#     objective='binary:logistic',
#     nthread=4,
#     scale_pos_weight=1,
#     seed=27)
"""
Crossvalidation
"""
detect_with_cross_validation(model, np_X_train, np_y_train)
"""
Detect model
"""

model.fit(np_X_train, np_y_train)
detect(model, np_X_test, np_y_test)

# pyplot.bar(range(len(model.feature_importances_)), model.feature_importances_)
# pyplot.show()

# param = {
#     'n_estimators': [1300, 1400, 1500, 1600, 1700, 1800], #[600, 700, 800, 900, 950, 1000, 1050, 1100, 1150, 1200],
#     'learning_rate': [0.1],
#     'max_depth': [10],
#     'min_child_weight': [1],
#     'gamma': [0],
#     'subsample': [0.8],
#     'colsample_bytree': [0.8],
#     'objective': ['binary:logistic'],
#     'nthread': [4],
#     'scale_pos_weight': [1],