Esempio n. 1
0
# Our cvresults table (note, includes all individuals
#   with their mean, max, min, and std test score).
out = pd.DataFrame(
                  clf2.cv_results_
                   )

out = out.sort_values(
                     "mean_test_score",
                      ascending=False
                      )

out.head()


y_pred_gen = clf2.predict(X_test)
accuracy_gen = accuracy_score(y_test, y_pred_gen)
accuracy_gen    # 81.88 %

####################### I am done ######################

"""
How PermutationImportance works?
Remove a feature only from the test part of the dataset,
and compute score without using this feature. It doesn’t
work as-is, because estimators expect feature to be present.
So instead of removing a feature we can replace it with
random noise - feature column is still there, but it no
longer contains useful information. This method works if
noise is drawn from the same distribution as original
feature values (as otherwise estimator may fail).
Esempio n. 2
0
                                                     random_state=24)
    total_CASESJ_train, total_CASESJ_test = train_test_split(total_CASESJ,
                                                             test_size=0.2,
                                                             random_state=24)

    train_IQ_train, train_IQ_test = train_test_split(train_IQ,
                                                     test_size=0.2,
                                                     random_state=24)
    total_CASEIQ_train, total_CASEIQ_test = train_test_split(total_CASEIQ,
                                                             test_size=0.2,
                                                             random_state=24)

    rtreeForSJ.fit(train_SJ, total_CASESJ)
    rtreeForIQ.fit(train_IQ, total_CASEIQ)

    predictionsSJ = rtreeForSJ.predict(train_SJ_test)
    predictionsIQ = rtreeForIQ.predict(train_IQ_test)
    sjscore = mean_absolute_error(total_CASESJ_test, predictionsSJ)
    iqscore = mean_absolute_error(total_CASEIQ_test, predictionsIQ)
    print(sjscore)
    print(iqscore)
    # print(len(predictionsSJ)+len(predictionsIQ))
    # print(len(predictionsIQ))
    # finalArr = []
    # for k in predictionsSJ:
    #     finalArr.append(k)
    # for t in predictionsIQ:
    #     finalArr.append(t)
    # submission = pd.read_csv('submission_format.csv')
    # # print(finalArr[2])
    # # for i in range(len(submission['total_cases'])):
Esempio n. 3
0
}

random.seed(1)

cv = EvolutionaryAlgorithmSearchCV(estimator=mlp,
                                   params=p_grid,
                                   scoring="accuracy",
                                   cv=StratifiedKFold(n_splits=4),
                                   verbose=1,
                                   population_size=5,
                                   gene_mutation_prob=0.10,
                                   gene_crossover_prob=0.5,
                                   tournament_size=3,
                                   generations_number=5,
                                   n_jobs=4)

#clf = GridSearchCV(mlp, param_grid=p_grid, cv=5, scoring='accuracy')
cv.fit(X_train, y_train)
print("VALIDATION score:" + str(cv.best_score_))
print("BEST parameters:" + str(cv.best_params_))
y_pred = cv.predict(X_test)

print("accuracy:" + str(accuracy_score(y_test, y_pred)))
"""
result:
VALIDATION score: 0.980712563622
BEST parameters: {'alpha': 1e-05, 'verbose': True, 'hidden_layer_sizes': (50,)}
accuracy: 0.975530179445

"""