def run(X_train, y_train, X_test, y_test, predciction_filename=None, graph_name=None): #Find the best parameters using GridSearchCV -- SPECIFY param_grid param_grid = { 'activation': ['identity', 'logistic', 'tanh'], 'alpha': [0.0001], 'batch_size': ['auto'], 'learning_rate_init': [0.001], 'max_iter': [2000], #Higher numbers will avoid ConvergenceWarnings 'hidden_layer_sizes': [(1, ), (2, ), (5, ), (2, 2), (3, 5)] } gs = GridSearchCV(MLPClassifier(), param_grid, cv=2, n_jobs=-1, scoring='accuracy') gs.fit(X_train, y_train) predicted_classes = gs.best_estimator_.predict(X_test) if graph_name != None: performance.plot_roc(gs, X_test, y_test, graph_name) return performance.get_scores(gs.best_params_, predicted_classes, y_test, predciction_filename)
def run(X_train, y_train, X_test, y_test, predciction_filename=None, graph_name=None): # param_grid ={ # 'n_estimators':[10,20], # 'max_depth':[2], # 'criterion':['gini','entropy'], # 'max_depth':[3,4,5,6] # } param_grid = { 'n_estimators': [10, 20, 50, 80], 'learning_rate': [1, .8, .6, .4], } gs = GridSearchCV(AdaBoostClassifier(), param_grid, cv=2, n_jobs=-1, scoring='accuracy') gs.fit(X_train, y_train) predicted_classes = gs.best_estimator_.predict(X_test) if graph_name != None: performance.plot_roc(gs, X_test, y_test, graph_name) return performance.get_scores(gs.best_params_, predicted_classes, y_test, predciction_filename)
def run(X_train, y_train, X_test, y_test, predciction_filename=None): if (len(X_train[0])) == 1: eprint("\nERROR: MPP number of dimensions is equal to 1\n") sys.exit() #Find the best parameters using GridSearchCV -- SPECIFY param_grid param_grid = { 'case': [1, 2, 3], } gs = GridSearchCV(MPPClassifier(), param_grid, cv=4, n_jobs=1) gs.fit(X_train, y_train) predicted_classes = gs.best_estimator_.predict(X_test) return performance.get_scores(gs.best_params_, predicted_classes, y_test, predciction_filename)
def run(X_train, y_train, X_test, y_test, collapseType=-1, predciction_filename=None, graph_name=None): #Find the best parameters using GridSearchCV -- SPECIFY param_grid param_grid = { 'criterion': ['gini', 'entropy'], 'max_depth': np.arange(2, 8) } gs = GridSearchCV(tree.DecisionTreeClassifier(), param_grid, cv=4, n_jobs=-1, scoring='accuracy') gs.fit(X_train, y_train) predicted_classes = gs.best_estimator_.predict(X_test) classifier, accuracy, precision, recall, f1, confusion_matrix = performance.get_scores( gs.best_params_, predicted_classes, y_test, predciction_filename) #Save Deceision tree if graph_name != None: class_names = performance.get_class_labels(collapseType) feature_names = [ 'Age', 'Gender', 'Education', 'Country', 'Ethnicity', 'Neuroticism', 'Extraversion', 'Openness', 'Agreeableness', 'Conscientiousness', 'Impulsiveness', 'Sensation' ] tree_data = tree.export_graphviz(gs.best_estimator_, out_file=None, feature_names=feature_names, class_names=class_names, filled=True, rounded=True, special_characters=True) graph = graphviz.Source(tree_data) graph.render(graph_name + "_DT") #predciction_filename is not the rite naming scheme but it should work for these purposes performance.plot_roc(gs, X_test, y_test, predciction_filename) return classifier, accuracy, precision, recall, f1, confusion_matrix
def run(X_train, y_train, X_test, y_test, predciction_filename=None, graph_name=None): #Find the best parameters using GridSearchCV -- SPECIFY param_grid param_grid = {'n_neighbors': [5, 10, 20, 40], 'p': [1, 2, 3, np.inf]} gs = GridSearchCV(KNeighborsClassifier(), param_grid, cv=4, n_jobs=-1) gs.fit(X_train, y_train) predicted_classes = gs.best_estimator_.predict(X_test) if graph_name != None: performance.plot_roc(gs, X_test, y_test, graph_name) return performance.get_scores(gs.best_params_, predicted_classes, y_test, predciction_filename)
def run(X_train, y_train, X_test, y_test, predciction_filename=None, graph_name=None): #Find the best parameters using GridSearchCV -- SPECIFY param_grid param_grid = { 'C': [1, 2, 3, 4, 5, 6, 7], 'gamma': [0.001, 0.0001, 0.01, 0.1, 1, 10, 100], 'probability': [True] } gs = GridSearchCV(SVC(), param_grid, cv=2, n_jobs=-1, scoring='accuracy') gs.fit(X_train, y_train) predicted_classes = gs.best_estimator_.predict(X_test) if graph_name != None: performance.plot_roc(gs, X_test, y_test, graph_name) return performance.get_scores(gs.best_params_, predicted_classes, y_test, predciction_filename)
def run(X_train, y_train, X_test, y_test, predciction_filename=None): #Find the best parameters using GridSearchCV -- SPECIFY param_grid #epsilon is only used for WTA param_grid = [{ 'algo': ['kMeans'], 'k_value': [6, 10], 'minkowski_p': [1, 2], 'max_iter': [20] }, { 'algo': ['WTA'], 'epsilon': [.001], 'k_value': [12], 'minkowski_p': [1, 2], 'max_iter': [10] }] gs = GridSearchCV(clusteringClassifier(), param_grid, cv=2, n_jobs=-1) gs.fit(X_train, y_train) predicted_classes = gs.best_estimator_.predict(X_test) return performance.get_scores(gs.best_params_, predicted_classes, y_test, predciction_filename)