def run(X_train,
        y_train,
        X_test,
        y_test,
        predciction_filename=None,
        graph_name=None):
    #Find the best parameters using GridSearchCV -- SPECIFY param_grid
    param_grid = {
        'activation': ['identity', 'logistic', 'tanh'],
        'alpha': [0.0001],
        'batch_size': ['auto'],
        'learning_rate_init': [0.001],
        'max_iter': [2000],  #Higher numbers will avoid ConvergenceWarnings
        'hidden_layer_sizes': [(1, ), (2, ), (5, ), (2, 2), (3, 5)]
    }
    gs = GridSearchCV(MLPClassifier(),
                      param_grid,
                      cv=2,
                      n_jobs=-1,
                      scoring='accuracy')
    gs.fit(X_train, y_train)
    predicted_classes = gs.best_estimator_.predict(X_test)

    if graph_name != None:
        performance.plot_roc(gs, X_test, y_test, graph_name)

    return performance.get_scores(gs.best_params_, predicted_classes, y_test,
                                  predciction_filename)
Exemple #2
0
def run(X_train,
        y_train,
        X_test,
        y_test,
        predciction_filename=None,
        graph_name=None):
    # param_grid ={
    #                 'n_estimators':[10,20],
    #                 'max_depth':[2],
    #                 'criterion':['gini','entropy'],
    #                 'max_depth':[3,4,5,6]
    #             }
    param_grid = {
        'n_estimators': [10, 20, 50, 80],
        'learning_rate': [1, .8, .6, .4],
    }

    gs = GridSearchCV(AdaBoostClassifier(),
                      param_grid,
                      cv=2,
                      n_jobs=-1,
                      scoring='accuracy')
    gs.fit(X_train, y_train)
    predicted_classes = gs.best_estimator_.predict(X_test)

    if graph_name != None:
        performance.plot_roc(gs, X_test, y_test, graph_name)

    return performance.get_scores(gs.best_params_, predicted_classes, y_test,
                                  predciction_filename)
Exemple #3
0
def run(X_train, y_train, X_test, y_test, predciction_filename=None):
    if (len(X_train[0])) == 1:
        eprint("\nERROR: MPP number of dimensions is equal to 1\n")
        sys.exit()

    #Find the best parameters using GridSearchCV -- SPECIFY param_grid
    param_grid = {
        'case': [1, 2, 3],
    }
    gs = GridSearchCV(MPPClassifier(), param_grid, cv=4, n_jobs=1)
    gs.fit(X_train, y_train)
    predicted_classes = gs.best_estimator_.predict(X_test)

    return performance.get_scores(gs.best_params_, predicted_classes, y_test,
                                  predciction_filename)
def run(X_train,
        y_train,
        X_test,
        y_test,
        collapseType=-1,
        predciction_filename=None,
        graph_name=None):
    #Find the best parameters using GridSearchCV -- SPECIFY param_grid
    param_grid = {
        'criterion': ['gini', 'entropy'],
        'max_depth': np.arange(2, 8)
    }
    gs = GridSearchCV(tree.DecisionTreeClassifier(),
                      param_grid,
                      cv=4,
                      n_jobs=-1,
                      scoring='accuracy')
    gs.fit(X_train, y_train)
    predicted_classes = gs.best_estimator_.predict(X_test)

    classifier, accuracy, precision, recall, f1, confusion_matrix = performance.get_scores(
        gs.best_params_, predicted_classes, y_test, predciction_filename)

    #Save Deceision tree
    if graph_name != None:
        class_names = performance.get_class_labels(collapseType)
        feature_names = [
            'Age', 'Gender', 'Education', 'Country', 'Ethnicity',
            'Neuroticism', 'Extraversion', 'Openness', 'Agreeableness',
            'Conscientiousness', 'Impulsiveness', 'Sensation'
        ]
        tree_data = tree.export_graphviz(gs.best_estimator_,
                                         out_file=None,
                                         feature_names=feature_names,
                                         class_names=class_names,
                                         filled=True,
                                         rounded=True,
                                         special_characters=True)
        graph = graphviz.Source(tree_data)
        graph.render(graph_name + "_DT")

        #predciction_filename is not the rite naming scheme but it should work for these purposes
        performance.plot_roc(gs, X_test, y_test, predciction_filename)

    return classifier, accuracy, precision, recall, f1, confusion_matrix
Exemple #5
0
def run(X_train,
        y_train,
        X_test,
        y_test,
        predciction_filename=None,
        graph_name=None):

    #Find the best parameters using GridSearchCV -- SPECIFY param_grid
    param_grid = {'n_neighbors': [5, 10, 20, 40], 'p': [1, 2, 3, np.inf]}
    gs = GridSearchCV(KNeighborsClassifier(), param_grid, cv=4, n_jobs=-1)
    gs.fit(X_train, y_train)
    predicted_classes = gs.best_estimator_.predict(X_test)

    if graph_name != None:
        performance.plot_roc(gs, X_test, y_test, graph_name)

    return performance.get_scores(gs.best_params_, predicted_classes, y_test,
                                  predciction_filename)
Exemple #6
0
def run(X_train,
        y_train,
        X_test,
        y_test,
        predciction_filename=None,
        graph_name=None):
    #Find the best parameters using GridSearchCV -- SPECIFY param_grid
    param_grid = {
        'C': [1, 2, 3, 4, 5, 6, 7],
        'gamma': [0.001, 0.0001, 0.01, 0.1, 1, 10, 100],
        'probability': [True]
    }
    gs = GridSearchCV(SVC(), param_grid, cv=2, n_jobs=-1, scoring='accuracy')
    gs.fit(X_train, y_train)
    predicted_classes = gs.best_estimator_.predict(X_test)

    if graph_name != None:
        performance.plot_roc(gs, X_test, y_test, graph_name)

    return performance.get_scores(gs.best_params_, predicted_classes, y_test,
                                  predciction_filename)
Exemple #7
0
def run(X_train, y_train, X_test, y_test, predciction_filename=None):
    #Find the best parameters using GridSearchCV -- SPECIFY param_grid
    #epsilon is only used for WTA
    param_grid = [{
        'algo': ['kMeans'],
        'k_value': [6, 10],
        'minkowski_p': [1, 2],
        'max_iter': [20]
    }, {
        'algo': ['WTA'],
        'epsilon': [.001],
        'k_value': [12],
        'minkowski_p': [1, 2],
        'max_iter': [10]
    }]
    gs = GridSearchCV(clusteringClassifier(), param_grid, cv=2, n_jobs=-1)
    gs.fit(X_train, y_train)
    predicted_classes = gs.best_estimator_.predict(X_test)

    return performance.get_scores(gs.best_params_, predicted_classes, y_test,
                                  predciction_filename)