コード例 #1
0
ファイル: rf_compare.py プロジェクト: catalinux/pml1
def get_pca_model(n_c):
    global pca, X_train_pca, X_imp_test, X_test_std, X_test_pca, Y_pred, row
    pca = PCA(n_components=n_c, random_state=42)
    X_train_pca = pca.fit_transform(X_train_std)
    X_imp_test = imp.transform(X_test)
    X_test_std = std.fit_transform(X_imp_test)
    X_test_pca = pca.fit_transform(X_test_std)
    modelAfterPCA.fit(X_train_pca, Y_train)
    Y_pred = modelAfterPCA.predict(X_test_pca)
    row = get_conclusion(Y_pred, Y_test, "pca" + str(n_c))
コード例 #2
0
def pca_model_smote(n_c):
    global pca, X_train_pca, X_imp_test, X_test_std, X_test_pca, Y_pred, row
    pca = PCA(n_components=n_c, random_state=42)
    X_train_pca = pca.fit_transform(X_train_std)
    X_imp_test = imp.transform(X_test)
    X_test_std = std.fit_transform(X_imp_test)
    X_test_pca = pca.fit_transform(X_test_std)
    sm = over_sampling.SMOTE()
    X_train_sampled, Y_train_sampled = sm.fit_sample(X_train_pca, Y_train)
    modelAfterPCA.fit(X_train_sampled, Y_train_sampled)
    Y_pred = modelAfterPCA.predict(X_test_pca)
    row = get_conclusion(Y_pred, Y_test, "smote-pca" + str(n_c))
    return row
コード例 #3
0
ファイル: rf_compare.py プロジェクト: catalinux/pml1
def runByImputer(X_train, Y_train, X_test, Y_test, prefix):
    print("Start ", prefix)
    imp = SimpleImputer(missing_values=np.nan, strategy=prefix)
    rf_ = []
    basicRF = RandomForestClassifier(n_estimators=100)
    basicRF.name = "basic"

    rf_.append(basicRF)

    tunnedRF = RandomForestClassifier(max_depth=None,
                                      n_estimators=311,
                                      min_samples_split=2,
                                      min_samples_leaf=1,
                                      max_features='sqrt',
                                      bootstrap=False,
                                      random_state=0)

    tunnedRF.name = "tuned"
    rf_.append(tunnedRF)
    # {'n_estimators': 600, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 30, 'bootstrap': False}
    tunnedRFScoring = RandomForestClassifier(max_depth=30,
                                             n_estimators=600,
                                             min_samples_split=2,
                                             min_samples_leaf=1,
                                             max_features='sqrt',
                                             bootstrap=False,
                                             random_state=0)
    tunnedRFScoring.name = "tuned-scoring"
    rf_.append(tunnedRFScoring)

    # model_full_rf = RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=0, n_jobs=-1)
    # model_full_rf.name = "article"
    # rf_.append(model_full_rf)

    #    rf_ = [basicRF, tunnedRF, tunnedRFlog]
    for index, model in enumerate(rf_):
        imp.fit(X_train)
        X_imp_train = imp.transform(X_train)
        print("Get conclustion for ", model.name)
        row = {}
        model.fit(X_imp_train, Y_train)
        imp.fit(X_test)
        X_imp_test = imp.transform(X_test)
        Y_pred = model.predict(X_imp_test)
        print("ask conconclustion")
        row = get_conclusion(Y_test, Y_pred, prefix + '_' + model.name)
        conclusion.append(row)
コード例 #4
0
from xgboost import XGBClassifier

from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

from lib.read import cost_confusion_matrix, read_data, get_conclusion

X_train, Y_train, X_test, Y_test = read_data()

conclusions = []
basic = XGBClassifier()
basic.name = "basic"
basic.fit(X_train, Y_train)
Y_pred = basic.predict(X_test)
cm = confusion_matrix(Y_test, Y_pred)
cost_confusion_matrix(cm)
row = get_conclusion(Y_test, Y_pred, 'basic')

# "{'subsample': 0.9, 'silent': False, 'reg_lambda': 10.0, 'n_estimators': 100, 'min_child_weight': 0.5, 'max_depth': 10, 'learning_rate': 0.2, 'gamma': 0, 'colsample_bytree': 0.7, 'colsample_bylevel': 0.4}
bestParamsModel = XGBClassifier(subsample=0.9,
                                silent=False,
                                reg_lambda=10,
                                n_estimators=100,
                                min_child_weight=0.5,
                                max_depth=10,
                                learning_rate=0.2,
                                gamma=0,
                                colsample_bytree=0.7,
                                colsample_bylevel=0.4)
bestParamsModel.name = "bestParamsModel"
bestParamsModel.fit(X_train, Y_train)
Y_pred = bestParamsModel.predict(X_test)