Esempio n. 1
0
    return scores


def plotAccuracy(accuracy, pred, title):
    fig = plt.figure(figsize=(10, 4), tight_layout=True)
    ax = fig.add_subplot(1, 1, 1)
    plt.plot(k, accuracy)
    ax.set_xlabel("Predictors")
    ax.set_ylabel("Accuracy")
    ax.set_title(title, fontsize=12)
    plt.show()


if __name__ == '__main__':

    X, Y = get_training()

    num_pred_list = [3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 30]
    alpha_list = [0, .125, .25, .375, .5, .625, .75, .875, 1]
    subsets = generate_subsets()

    ten_scores_subsets = getScores(X, Y, subsets, 10)
    loocv_scores_subsets = getScores(X, Y, subsets, len(X))

    ten_scores_pca = getScores_pca(X, Y, num_pred_list, 10)
    loocv_scores_pca = getScores_pca(X, Y, num_pred_list, len(X))
    ten_scores_lasso = getScores_lasso(X, Y, alpha_list, 10)
    loocv_scores_lasso = getScores_lasso(X, Y, alpha_list, len(X))

    print("_______________________Subsets______________________________")
    for i in range(len(subsets)):
Esempio n. 2
0
        """Helper function that gets full set of results

            Returns:
                Returns a dictionary of results as follows:
                {
                    'accuracy' : [list of accuracies],
                    'recall': [list of recall results],
                    'precision': [list of precision results]
                }
        """
        return self.results

if __name__ == '__main__':
    from data_utils import get_training
    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
    X, y = get_training()
    mdl = LinearDiscriminantAnalysis()
    val = Validation(X, y)

    pca_comp_to_test = [ 1, 3, 5, 10, 30]
    results = []
    best_so_far = (0, -1)

    # common validation pattern
    for d in pca_comp_to_test:
        # update hyper params for preprocessing
        val.update(pca=d)
        
        # generally change model params
        acc = val.cross_val_accuracy(mdl)
        results.append(acc)
import matplotlib.pyplot as plt
from data_utils import get_training, get_testing
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score
from my_pca import my_pca


def best_logistic_regression():
    pass


if __name__ == '__main__':
    # Load data
    X_train, y_train = get_training()
    N, D = X_train.shape

    # Logistic Regression
    models = [('L2, C=1', LogisticRegression()),
              ('L2, C=10', LogisticRegression(C=10)),
              ('L2, C=100', LogisticRegression(C=100)),
              ('L2, C=1000', LogisticRegression(C=1000)),
              ('L1, C=1', LogisticRegression(penalty='l1')),
              ('L1, C=10', LogisticRegression(penalty='l1', C=10)),
              ('L1, C=100', LogisticRegression(penalty='l1', C=100)),
              ('L1, C=1000', LogisticRegression(penalty='l1', C=1000))]

    print 'Without PCA'
    for name, model in models:
        scores = cross_val_score(model,