def abclassifier(training_samples, eval_samples): X_train, Y_train = training_samples X_eval, Y_eval = eval_samples do_grid_search=False clf = RandomForestClassifier(n_estimators=2000, criterion='gini', max_depth=None, min_samples_split=8, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=40, max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=10, random_state=None, verbose=0, warm_start=False, class_weight=None) if do_grid_search: to_be_tuned_parameters = { 'n_estimators':[500, 1000, 2000], 'max_features':['log2', 'auto', None], 'min_samples_split':[2, 4, 8], 'min_samples_leaf': [1, 2], } clf = GridSearchCV(clf, to_be_tuned_parameters, cv=5, n_jobs=5, scoring='log_loss') #Best parameters set found on development set: #() #{'max_features': 'log2', 'min_samples_split': 8, 'criterion': 'gini', 'min_samples_leaf': 1} clf = AdaBoostClassifier(base_estimator=clf, n_estimators=200, learning_rate=0.2, algorithm='SAMME.R', random_state=None) print(clf) clf.fit(X_train, Y_train) if do_grid_search: print("Best parameters set found on development set:") print() print(clf.best_params_) print() print("Grid scores on development set:") print() for params, mean_score, scores in clf.grid_scores_: print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() * 2, params)) else: scores = cross_validation.cross_val_score(clf, X_train, Y_train, cv=5, n_jobs=5, scoring='log_loss') print scores, np.mean(scores), np.median(scores) Y_eval = clf.predict(X_eval) Y_prob = clf.predict_proba(X_eval) return Y_eval, Y_prob, clf.feature_importances_()
correlation_matrix(df) #splitiing data in train and test X = df.iloc[:,:(nVar-1)] print(X) Y = df.iloc[:,(nVar-1):] print(Y) X_train, X_test,Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, random_state=24) print(Y_train) ## Model 1-RandomForest clf1 = RandomForestClassifier() clf1.fit(X_train,Y_train) #feature importance clf1.feature_importances_() predict = clf1.predict(X_test) #cross val score score1 = np.mean(cross_val_score(clf, X, Y, scoring='accuracy', cv=10)) print(score1) ## Metrics-accuracy print(accuracy_score(predict,Y_test)) #kappa score score3 = cohen_kappa_score(Y_test,predict) print(score3) #recall score score2=recall_score(Y_test, predict, average='macro')