def run(X_train, y_train, X_test, y_test, predciction_filename=None, graph_name=None): # param_grid ={ # 'n_estimators':[10,20], # 'max_depth':[2], # 'criterion':['gini','entropy'], # 'max_depth':[3,4,5,6] # } param_grid = { 'n_estimators': [10, 20, 50, 80], 'learning_rate': [1, .8, .6, .4], } gs = GridSearchCV(AdaBoostClassifier(), param_grid, cv=2, n_jobs=-1, scoring='accuracy') gs.fit(X_train, y_train) predicted_classes = gs.best_estimator_.predict(X_test) if graph_name != None: performance.plot_roc(gs, X_test, y_test, graph_name) return performance.get_scores(gs.best_params_, predicted_classes, y_test, predciction_filename)
def run(X_train, y_train, X_test, y_test, predciction_filename=None, graph_name=None): #Find the best parameters using GridSearchCV -- SPECIFY param_grid param_grid = { 'activation': ['identity', 'logistic', 'tanh'], 'alpha': [0.0001], 'batch_size': ['auto'], 'learning_rate_init': [0.001], 'max_iter': [2000], #Higher numbers will avoid ConvergenceWarnings 'hidden_layer_sizes': [(1, ), (2, ), (5, ), (2, 2), (3, 5)] } gs = GridSearchCV(MLPClassifier(), param_grid, cv=2, n_jobs=-1, scoring='accuracy') gs.fit(X_train, y_train) predicted_classes = gs.best_estimator_.predict(X_test) if graph_name != None: performance.plot_roc(gs, X_test, y_test, graph_name) return performance.get_scores(gs.best_params_, predicted_classes, y_test, predciction_filename)
def run(X_train, y_train, X_test, y_test, collapseType=-1, predciction_filename=None, graph_name=None): #Find the best parameters using GridSearchCV -- SPECIFY param_grid param_grid = { 'criterion': ['gini', 'entropy'], 'max_depth': np.arange(2, 8) } gs = GridSearchCV(tree.DecisionTreeClassifier(), param_grid, cv=4, n_jobs=-1, scoring='accuracy') gs.fit(X_train, y_train) predicted_classes = gs.best_estimator_.predict(X_test) classifier, accuracy, precision, recall, f1, confusion_matrix = performance.get_scores( gs.best_params_, predicted_classes, y_test, predciction_filename) #Save Deceision tree if graph_name != None: class_names = performance.get_class_labels(collapseType) feature_names = [ 'Age', 'Gender', 'Education', 'Country', 'Ethnicity', 'Neuroticism', 'Extraversion', 'Openness', 'Agreeableness', 'Conscientiousness', 'Impulsiveness', 'Sensation' ] tree_data = tree.export_graphviz(gs.best_estimator_, out_file=None, feature_names=feature_names, class_names=class_names, filled=True, rounded=True, special_characters=True) graph = graphviz.Source(tree_data) graph.render(graph_name + "_DT") #predciction_filename is not the rite naming scheme but it should work for these purposes performance.plot_roc(gs, X_test, y_test, predciction_filename) return classifier, accuracy, precision, recall, f1, confusion_matrix
def run(X_train, y_train, X_test, y_test, predciction_filename=None, graph_name=None): #Find the best parameters using GridSearchCV -- SPECIFY param_grid param_grid = {'n_neighbors': [5, 10, 20, 40], 'p': [1, 2, 3, np.inf]} gs = GridSearchCV(KNeighborsClassifier(), param_grid, cv=4, n_jobs=-1) gs.fit(X_train, y_train) predicted_classes = gs.best_estimator_.predict(X_test) if graph_name != None: performance.plot_roc(gs, X_test, y_test, graph_name) return performance.get_scores(gs.best_params_, predicted_classes, y_test, predciction_filename)
def run(X_train, y_train, X_test, y_test, predciction_filename=None, graph_name=None): #Find the best parameters using GridSearchCV -- SPECIFY param_grid param_grid = { 'C': [1, 2, 3, 4, 5, 6, 7], 'gamma': [0.001, 0.0001, 0.01, 0.1, 1, 10, 100], 'probability': [True] } gs = GridSearchCV(SVC(), param_grid, cv=2, n_jobs=-1, scoring='accuracy') gs.fit(X_train, y_train) predicted_classes = gs.best_estimator_.predict(X_test) if graph_name != None: performance.plot_roc(gs, X_test, y_test, graph_name) return performance.get_scores(gs.best_params_, predicted_classes, y_test, predciction_filename)
clf.fit(t, training_ids) # predict predictions = clf.predict(q) confidence = clf.predict_proba(q) # Save predictions for i in range(len(predictions)): # Append the scores if predictions[i] == query_ids[i]: genuine_scores.append(confidence[i][predictions[i]]) else: imposter_scores.append(confidence[i][predictions[i]]) # log print("Finished training/prediction") # Measure fold performance import pdb pdb.set_trace() # breakpoint 95dbf694 // all_scores = genuine_scores + imposter_scores eer, far, frr, tpr = p.getScores(genuine_scores, imposter_scores) fold_performance.append((eer, far, frr, tpr)) # Plot p.plot_scoreDist(genuine_scores, imposter_scores, name=counter) # Plot Performance p.plot_det(fold_performance, name="knn-chebyshev") p.plot_roc(fold_performance, name="knn-chebyshev")