def optimize_boosting_decision_tree(training_features, training_classes, problem_name, depth_range, learning_rate_range): def create_boosting_decision_tree(depth): return sklearn.ensemble.AdaBoostClassifier(base_estimator=sklearn.tree.DecisionTreeClassifier(max_depth=depth)) max_depth = complexity_analysis.run_complexity_analysis('Boosting Decision Tree Max Depth for %s' % problem_name, 'Max Depth', 'Accuracy', create_boosting_decision_tree, depth_range, training_features, training_classes, complexity_analysis.general_accuracy, folds=5) def create_boosting_decision_tree_learning_rate(learning_rate): return sklearn.ensemble.AdaBoostClassifier(learning_rate=learning_rate, base_estimator=sklearn.tree.DecisionTreeClassifier(max_depth=max_depth)) learning_rate = complexity_analysis.run_complexity_analysis('Boosting Decision Tree Learning Rate for %s' % problem_name, 'Learning Rate', 'Accuracy', create_boosting_decision_tree_learning_rate, learning_rate_range, training_features, training_classes, complexity_analysis.general_accuracy, folds=5) estimator = sklearn.ensemble.AdaBoostClassifier(learning_rate=learning_rate, base_estimator=sklearn.tree.DecisionTreeClassifier(max_depth=max_depth)) complexity_analysis.run_learning_curve_analysis(problem_name + ' Boosting Decision Tree', estimator, training_features, training_classes) return estimator
def optimize_svm(training_features, training_classes, problem_name, gamma_range, C_range, degree_range): def create_svc_gamma(gamma): return sklearn.svm.SVC(gamma=gamma) gamma = complexity_analysis.run_complexity_analysis('SVM gamma for %s' % problem_name, 'Gamma', 'Accuracy', create_svc_gamma, gamma_range, training_features, training_classes, complexity_analysis.general_accuracy, folds=5, x_log_space=True) def create_svc_C(c): return sklearn.svm.SVC(gamma=gamma, C=c) C = complexity_analysis.run_complexity_analysis('SVM C for %s' % problem_name, 'C', 'Accuracy', create_svc_C, C_range, training_features, training_classes, complexity_analysis.general_accuracy, folds=5, x_log_space=True) def create_svc_kernel(kernel): return sklearn.svm.SVC(kernel=kernel) complexity_analysis.run_complexity_analysis('SVM C for %s' % problem_name, 'kernel', 'Accuracy', create_svc_kernel, ['linear', 'rbf'], training_features, training_classes, complexity_analysis.general_accuracy, folds=5) complexity_analysis.run_learning_curve_analysis(problem_name + ' SVM', sklearn.svm.SVC(), training_features, training_classes) return sklearn.svm.SVC()
def optimize_nearest_neighbors(training_features, training_classes, problem_name, k_range, p_range): def create_nearest_neighbor_k(k): return sklearn.neighbors.KNeighborsClassifier(n_neighbors=k, weights='distance') k = complexity_analysis.run_complexity_analysis( 'K Nearest Neighbors for %s' % problem_name, '# of Neighbors', 'Accuracy', create_nearest_neighbor_k, k_range, training_features, training_classes, complexity_analysis.general_accuracy, folds=5) def create_nearest_neighbors_p(p): return sklearn.neighbors.KNeighborsClassifier(n_neighbors=k, weights='distance', p=p) p = complexity_analysis.run_complexity_analysis( 'Nearest Neighbors P for %s' % problem_name, 'P in Distance', 'Accuracy', create_nearest_neighbors_p, p_range, training_features, training_classes, complexity_analysis.general_accuracy, folds=5) estimator = sklearn.neighbors.KNeighborsClassifier(n_neighbors=k, weights='distance', p=p) complexity_analysis.run_learning_curve_analysis( problem_name + ' Nearest Neighbors', estimator, training_features, training_classes) return estimator
def optimize_decision_tree(training_features, training_classes, problem_name, depth_range, number_features_range, scorer=complexity_analysis.general_accuracy): def create_decision_tree(depth): return sklearn.tree.DecisionTreeClassifier(max_depth=depth) max_depth = complexity_analysis.run_complexity_analysis( 'Decision Tree Max Depth for %s' % problem_name, 'Max Depth', 'Accuracy', create_decision_tree, depth_range, training_features, training_classes, scorer, folds=5) def create_decision_tree_max_features(number_features): return sklearn.tree.DecisionTreeClassifier( max_depth=max_depth, max_features=number_features) max_features = complexity_analysis.run_complexity_analysis( 'Decision Tree Max Features for %s' % problem_name, 'Max Features to Examine', 'Accuracy', create_decision_tree_max_features, number_features_range, training_features, training_classes, scorer, folds=5) estimator = sklearn.tree.DecisionTreeClassifier(max_depth=max_depth, max_features=max_features) complexity_analysis.run_learning_curve_analysis( problem_name + ' Decision Tree', estimator, training_features, training_classes) return estimator
def optimize_neural_network(training_features, training_classes, problem_name, hidden_layer_sizes): def create_neural_net_hidden_units(hidden_units): return sklearn.neural_network.MLPClassifier( hidden_layer_sizes=(hidden_units, ), max_iter=10000) hidden_units = complexity_analysis.run_complexity_analysis( 'Neural Net Hidden Units for %s' % problem_name, 'Hidden Units', 'Accuracy', create_neural_net_hidden_units, hidden_layer_sizes, training_features, training_classes, complexity_analysis.general_accuracy, folds=5) hidden_units = 5 learning_rate = 0.0001 def create_neural_net(): return sklearn.neural_network.MLPClassifier( hidden_layer_sizes=(hidden_units, ), activation='logistic', max_iter=10000, solver='sgd', learning_rate='constant', learning_rate_init=0.0001, batch_size=10) weight_updates(create_neural_net, training_features, training_classes, max_iterations=50) plot.savefig('%s_nn_weight_updates' % problem_name.replace(' ', '_').lower()) complexity_analysis.run_learning_curve_analysis(problem_name + ' NN', create_neural_net(), training_features, training_classes) return sklearn.neural_network.MLPClassifier( hidden_layer_sizes=(hidden_units, ), learning_rate_init=learning_rate, max_iter=10000)
def perform_complexity_analysis(training_features, training_classes, problem_name, hidden_layer_sizes): def create_neural_net_hidden_units(hidden_units): return sklearn.neural_network.MLPClassifier(hidden_layer_sizes=hidden_units, max_iter=10000) def hidden_units_to_string(hidden_units): string = '{}'.format(hidden_units[0]) for h in range(1, len(hidden_units)): string += '_{}'.format(hidden_units[h]) return string hidden_units = complexity_analysis.run_complexity_analysis('Neural Net Hidden Units for %s' % problem_name, 'Hidden Units', 'Accuracy', create_neural_net_hidden_units, hidden_layer_sizes, training_features, training_classes, complexity_analysis.f1_accuracy, folds=5, parameter_to_string=hidden_units_to_string) return hidden_units