Exemple #1
0
def optimize_boosting_decision_tree(training_features, training_classes, problem_name, depth_range, learning_rate_range):
    def create_boosting_decision_tree(depth):
        return sklearn.ensemble.AdaBoostClassifier(base_estimator=sklearn.tree.DecisionTreeClassifier(max_depth=depth))
    max_depth = complexity_analysis.run_complexity_analysis('Boosting Decision Tree Max Depth for %s' % problem_name, 
                                                            'Max Depth', 
                                                            'Accuracy', 
                                                            create_boosting_decision_tree, 
                                                            depth_range, 
                                                            training_features, 
                                                            training_classes, 
                                                            complexity_analysis.general_accuracy,
                                                            folds=5)

    def create_boosting_decision_tree_learning_rate(learning_rate):
        return sklearn.ensemble.AdaBoostClassifier(learning_rate=learning_rate, base_estimator=sklearn.tree.DecisionTreeClassifier(max_depth=max_depth))
    learning_rate = complexity_analysis.run_complexity_analysis('Boosting Decision Tree Learning Rate for %s' % problem_name, 
                                                            'Learning Rate', 
                                                            'Accuracy', 
                                                            create_boosting_decision_tree_learning_rate, 
                                                            learning_rate_range, 
                                                            training_features, 
                                                            training_classes, 
                                                            complexity_analysis.general_accuracy,
                                                            folds=5)
    estimator = sklearn.ensemble.AdaBoostClassifier(learning_rate=learning_rate, base_estimator=sklearn.tree.DecisionTreeClassifier(max_depth=max_depth))
    complexity_analysis.run_learning_curve_analysis(problem_name + ' Boosting Decision Tree', estimator, training_features, training_classes)
    return estimator
Exemple #2
0
def optimize_svm(training_features, training_classes, problem_name, gamma_range, C_range, degree_range):
    def create_svc_gamma(gamma):
        return sklearn.svm.SVC(gamma=gamma)
    gamma = complexity_analysis.run_complexity_analysis('SVM gamma for %s' % problem_name,
                                                        'Gamma',
                                                        'Accuracy',
                                                        create_svc_gamma,
                                                        gamma_range,
                                                        training_features,
                                                        training_classes,
                                                        complexity_analysis.general_accuracy,
                                                        folds=5,
                                                        x_log_space=True)
    
    def create_svc_C(c):
        return sklearn.svm.SVC(gamma=gamma, C=c)
    C = complexity_analysis.run_complexity_analysis('SVM C for %s' % problem_name,
                                                        'C',
                                                        'Accuracy',
                                                        create_svc_C,
                                                        C_range,
                                                        training_features,
                                                        training_classes,
                                                        complexity_analysis.general_accuracy,
                                                        folds=5,
                                                        x_log_space=True)

    def create_svc_kernel(kernel):
        return sklearn.svm.SVC(kernel=kernel)
    complexity_analysis.run_complexity_analysis('SVM C for %s' % problem_name,
                                                        'kernel',
                                                        'Accuracy',
                                                        create_svc_kernel,
                                                        ['linear', 'rbf'],
                                                        training_features,
                                                        training_classes,
                                                        complexity_analysis.general_accuracy,
                                                        folds=5)
    
    complexity_analysis.run_learning_curve_analysis(problem_name + ' SVM', sklearn.svm.SVC(), training_features, training_classes)
    return sklearn.svm.SVC()
Exemple #3
0
def optimize_nearest_neighbors(training_features, training_classes,
                               problem_name, k_range, p_range):
    def create_nearest_neighbor_k(k):
        return sklearn.neighbors.KNeighborsClassifier(n_neighbors=k,
                                                      weights='distance')

    k = complexity_analysis.run_complexity_analysis(
        'K Nearest Neighbors for %s' % problem_name,
        '# of Neighbors',
        'Accuracy',
        create_nearest_neighbor_k,
        k_range,
        training_features,
        training_classes,
        complexity_analysis.general_accuracy,
        folds=5)

    def create_nearest_neighbors_p(p):
        return sklearn.neighbors.KNeighborsClassifier(n_neighbors=k,
                                                      weights='distance',
                                                      p=p)

    p = complexity_analysis.run_complexity_analysis(
        'Nearest Neighbors P for %s' % problem_name,
        'P in Distance',
        'Accuracy',
        create_nearest_neighbors_p,
        p_range,
        training_features,
        training_classes,
        complexity_analysis.general_accuracy,
        folds=5)

    estimator = sklearn.neighbors.KNeighborsClassifier(n_neighbors=k,
                                                       weights='distance',
                                                       p=p)
    complexity_analysis.run_learning_curve_analysis(
        problem_name + ' Nearest Neighbors', estimator, training_features,
        training_classes)
    return estimator
Exemple #4
0
def optimize_decision_tree(training_features,
                           training_classes,
                           problem_name,
                           depth_range,
                           number_features_range,
                           scorer=complexity_analysis.general_accuracy):
    def create_decision_tree(depth):
        return sklearn.tree.DecisionTreeClassifier(max_depth=depth)

    max_depth = complexity_analysis.run_complexity_analysis(
        'Decision Tree Max Depth for %s' % problem_name,
        'Max Depth',
        'Accuracy',
        create_decision_tree,
        depth_range,
        training_features,
        training_classes,
        scorer,
        folds=5)

    def create_decision_tree_max_features(number_features):
        return sklearn.tree.DecisionTreeClassifier(
            max_depth=max_depth, max_features=number_features)

    max_features = complexity_analysis.run_complexity_analysis(
        'Decision Tree Max Features for %s' % problem_name,
        'Max Features to Examine',
        'Accuracy',
        create_decision_tree_max_features,
        number_features_range,
        training_features,
        training_classes,
        scorer,
        folds=5)
    estimator = sklearn.tree.DecisionTreeClassifier(max_depth=max_depth,
                                                    max_features=max_features)
    complexity_analysis.run_learning_curve_analysis(
        problem_name + ' Decision Tree', estimator, training_features,
        training_classes)
    return estimator
def optimize_neural_network(training_features, training_classes, problem_name,
                            hidden_layer_sizes):
    def create_neural_net_hidden_units(hidden_units):
        return sklearn.neural_network.MLPClassifier(
            hidden_layer_sizes=(hidden_units, ), max_iter=10000)

    hidden_units = complexity_analysis.run_complexity_analysis(
        'Neural Net Hidden Units for %s' % problem_name,
        'Hidden Units',
        'Accuracy',
        create_neural_net_hidden_units,
        hidden_layer_sizes,
        training_features,
        training_classes,
        complexity_analysis.general_accuracy,
        folds=5)

    hidden_units = 5
    learning_rate = 0.0001

    def create_neural_net():
        return sklearn.neural_network.MLPClassifier(
            hidden_layer_sizes=(hidden_units, ),
            activation='logistic',
            max_iter=10000,
            solver='sgd',
            learning_rate='constant',
            learning_rate_init=0.0001,
            batch_size=10)

    weight_updates(create_neural_net,
                   training_features,
                   training_classes,
                   max_iterations=50)
    plot.savefig('%s_nn_weight_updates' %
                 problem_name.replace(' ', '_').lower())
    complexity_analysis.run_learning_curve_analysis(problem_name + ' NN',
                                                    create_neural_net(),
                                                    training_features,
                                                    training_classes)
    return sklearn.neural_network.MLPClassifier(
        hidden_layer_sizes=(hidden_units, ),
        learning_rate_init=learning_rate,
        max_iter=10000)
Exemple #6
0
def perform_complexity_analysis(training_features, training_classes, problem_name, hidden_layer_sizes):
    def create_neural_net_hidden_units(hidden_units):
        return sklearn.neural_network.MLPClassifier(hidden_layer_sizes=hidden_units, max_iter=10000)
    
    def hidden_units_to_string(hidden_units):
        string = '{}'.format(hidden_units[0])
        for h in range(1, len(hidden_units)):
            string += '_{}'.format(hidden_units[h])
        return string

    hidden_units = complexity_analysis.run_complexity_analysis('Neural Net Hidden Units for %s' % problem_name,
                                                           'Hidden Units',
                                                           'Accuracy',
                                                           create_neural_net_hidden_units,
                                                           hidden_layer_sizes,
                                                           training_features,
                                                           training_classes,
                                                           complexity_analysis.f1_accuracy,
                                                           folds=5,
                                                           parameter_to_string=hidden_units_to_string)
    return hidden_units