예제 #1
0
파일: q3.py 프로젝트: deusi/pythonProjects
def q3():
    # Initialize the values from the datasets
    boston25_x, boston25_y = prepare_boston25()
    boston50_x, boston50_y = prepare_boston50()

    # Number of folds: assignment specifies the value of 5
    k_fold = 5

    default_order = [
        ('MyLogisticReg2', 'Boston50'),
        ('MyLogisticReg2', 'Boston25'),
        ('LogisticRegression', 'Boston50'),
        ('LogisticRegression', 'Boston25')
    ]
    methods = {
        ('MyLogisticReg2', 'Boston50'):
        (MyLogisticReg2(boston25_x.shape[1]), boston50_x, boston50_y),
        ('MyLogisticReg2', 'Boston25'):
        (MyLogisticReg2(boston50_x.shape[1]), boston25_x, boston25_y),
        ('LogisticRegression', 'Boston50'):
        (LogisticRegression(), boston50_x, boston50_y),
        ('LogisticRegression', 'Boston25'):
        (LogisticRegression(), boston25_x, boston25_y)
    }

    for key in default_order:
        name, dataset = key
        method, x, y = methods[key]
        # Using my implementation of cross validation instead of the built-in one
        scores = my_cross_val(method, x, y, k_fold)
        my_pretty_print(name, dataset, scores)
    print('==============')
예제 #2
0
def q3i(argv=None):

    dataset, method_name, k, latex = wrapper_args(
        argv, 'q3i', ['Boston50', 'Boston75', 'Digits'])

    Boston50_X, Boston50_y = prepare_boston50()
    Boston75_X, Boston75_y = prepare_boston75()
    Digits_X, Digits_y = prepare_digits()

    default_order = [
        ('LinearSVC', 'Boston50'),
        ('LinearSVC', 'Boston75'),
        ('LinearSVC', 'Digits'),
        ('SVC', 'Boston50'),
        ('SVC', 'Boston75'),
        ('SVC', 'Digits'),
        ('LogisticRegression', 'Boston50'),
        ('LogisticRegression', 'Boston75'),
        ('LogisticRegression', 'Digits')
    ]

    methods = {('LinearSVC', 'Boston50'):
               (LinearSVC(), Boston50_X, Boston50_y),
               ('LinearSVC', 'Boston75'):
               (LinearSVC(), Boston75_X, Boston75_y),
               ('LinearSVC', 'Digits'):
               (LinearSVC(), Digits_X, Digits_y),
               ('SVC', 'Boston50'):
               (SVC(), Boston50_X, Boston50_y),
               ('SVC', 'Boston75'):
               (SVC(), Boston75_X, Boston75_y),
               ('SVC', 'Digits'):
               (SVC(), Digits_X, Digits_y),
               ('LogisticRegression', 'Boston50'):
               (LogisticRegression(), Boston50_X, Boston50_y),
               ('LogisticRegression', 'Boston75'):
               (LogisticRegression(), Boston75_X, Boston75_y),
               ('LogisticRegression', 'Digits'):
               (LogisticRegression(), Digits_X, Digits_y)}

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(key[0], key[1]))
        scores = my_cross_val(method, X, y, k)
        report(name, dataset, scores, latex=latex)
예제 #3
0
def q3(argv=None):

    dataset, method_name, k, latex = wrapper_args(
        argv, 'q3', ['Boston50', 'Boston75', 'Digits'])

    Boston50_X, Boston50_y = prepare_boston50()
    Boston75_X, Boston75_y = prepare_boston75()
    # Note that prepare_digits adds gaussian noise to the data to
    # avoid singlar covariance matrices.  For details, see
    # datasets.prepare_digits
    Digits_X, Digits_y = prepare_digits()

    default_order = [
        ('MultiGaussClassify', 'Boston50'),
        ('MultiGaussClassify', 'Boston75'),
        ('MultiGaussClassify', 'Digits'),
        ('LogisticRegression', 'Boston50'),
        ('LogisticRegression', 'Boston75'),
        ('LogisticRegression', 'Digits')
    ]

    methods = {
        ('MultiGaussClassify', 'Boston50'):
        (MultiGaussClassify(), Boston50_X, Boston50_y),
        ('MultiGaussClassify', 'Boston75'):
        (MultiGaussClassify(), Boston75_X, Boston75_y),
        ('MultiGaussClassify', 'Digits'):
        (MultiGaussClassify(linear=False), Digits_X, Digits_y),
        ('LogisticRegression', 'Boston50'):
        (LogisticRegression(), Boston50_X, Boston50_y),
        ('LogisticRegression', 'Boston75'):
        (LogisticRegression(), Boston75_X, Boston75_y),
        ('LogisticRegression', 'Digits'):
        (LogisticRegression(), Digits_X, Digits_y)
    }

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(key[0], key[1]))
        scores = my_cross_val(method, X, y, k)
        report(name, dataset, scores, latex=latex)
예제 #4
0
파일: q3.py 프로젝트: deusi/pythonProjects
def q3():
    # Initialize the values from the datasets
    boston25_x, boston25_y = prepare_boston25()
    boston50_x, boston50_y = prepare_boston50()

    # Number of folds: assignment specifies the value of 5
    k_fold = 5

    default_order = [('MySVM2 with m = 40', 'Boston50'),
                     ('MySVM2 with m = 200', 'Boston50'),
                     ('MySVM2 with m = n', 'Boston50'),
                     ('LogisticRegression', 'Boston50'),
                     ('MySVM2 with m = 40', 'Boston25'),
                     ('MySVM2 with m = 200', 'Boston25'),
                     ('MySVM2 with m = n', 'Boston25'),
                     ('LogisticRegression', 'Boston25')]
    methods = {
        ('MySVM2 with m = 40', 'Boston50'):
        (MySVM2(boston50_x.shape[1], 500, 40), boston50_x, boston50_y),
        ('MySVM2 with m = 200', 'Boston50'):
        (MySVM2(boston50_x.shape[1], 500, 200), boston50_x, boston50_y),
        ('MySVM2 with m = n', 'Boston50'):
        # note that we deliberately pass boston50_x.shape[0] to trigger special case that makes batch size m = n
        (MySVM2(boston50_x.shape[1], 500,
                boston50_x.shape[0]), boston50_x, boston50_y),
        ('LogisticRegression', 'Boston50'): (LogisticRegression(), boston50_x,
                                             boston50_y),
        ('MySVM2 with m = 40', 'Boston25'):
        (MySVM2(boston25_x.shape[1], 500, 40), boston25_x, boston25_y),
        ('MySVM2 with m = 200', 'Boston25'):
        (MySVM2(boston25_x.shape[1], 500, 200), boston25_x, boston25_y),
        ('MySVM2 with m = n', 'Boston25'):
        # note that we deliberately pass boston25_x.shape[0] to trigger special case that makes batch size m = n
        (MySVM2(boston25_x.shape[1], 500,
                boston25_x.shape[0]), boston25_x, boston25_y),
        ('LogisticRegression', 'Boston25'): (LogisticRegression(), boston25_x,
                                             boston25_y)
    }

    for key in default_order:
        name, dataset = key
        method, x, y = methods[key]
        # Using my implementation of cross validation instead of the built-in one
        scores = my_cross_val(method, x, y, k_fold)
        my_pretty_print(name, dataset, scores)
    print('==============')
예제 #5
0
def q3():
    # Initialize the values from the datasets
    Boston50_X, Boston50_y, Boston50_k, Boston50_d = prepare_boston50()
    Boston25_X, Boston25_y, Boston25_k, Boston25_d = prepare_boston25()
    Digits_X, Digits_y, Digits_k, Digits_d = prepare_digits()

    default_order = [
        ('MultiGaussClassify with full covariance matrix', 'Boston50'),
        ('MultiGaussClassify with full covariance matrix', 'Boston25'),
        ('MultiGaussClassify with full covariance matrix', 'Digits'),
        ('MultiGaussClassify with diagonal covariance matrix', 'Boston50'),
        ('MultiGaussClassify with diagonal covariance matrix', 'Boston25'),
        ('MultiGaussClassify with diagonal covariance matrix', 'Digits'),
        ('LogisticRegression', 'Boston50'), ('LogisticRegression', 'Boston25'),
        ('LogisticRegression', 'Digits')
    ]

    methods = {
        ('MultiGaussClassify with full covariance matrix', 'Boston50'):
        (MultiGaussClassify(Boston50_k, Boston50_d), Boston50_X, Boston50_y),
        ('MultiGaussClassify with full covariance matrix', 'Boston25'):
        (MultiGaussClassify(Boston25_k, Boston25_d), Boston25_X, Boston25_y),
        ('MultiGaussClassify with full covariance matrix', 'Digits'):
        (MultiGaussClassify(Digits_k, Digits_d), Digits_X, Digits_y),
        ('MultiGaussClassify with diagonal covariance matrix', 'Boston50'):
        (MultiGaussClassify(Boston50_k, Boston50_d,
                            True), Boston50_X, Boston50_y),
        ('MultiGaussClassify with diagonal covariance matrix', 'Boston25'):
        (MultiGaussClassify(Boston25_k, Boston25_d,
                            True), Boston25_X, Boston25_y),
        ('MultiGaussClassify with diagonal covariance matrix', 'Digits'):
        (MultiGaussClassify(Digits_k, Digits_d, True), Digits_X, Digits_y),
        ('LogisticRegression', 'Boston50'): (LogisticRegression(), Boston50_X,
                                             Boston50_y),
        ('LogisticRegression', 'Boston25'): (LogisticRegression(), Boston25_X,
                                             Boston25_y),
        ('LogisticRegression', 'Digits'): (LogisticRegression(), Digits_X,
                                           Digits_y)
    }

    for key in default_order:
        name, dataset = key
        method, X, y = methods[key]
        # Using my implementation of cross validation instead of the built-in one
        scores = my_cross_val(method, X, y, 5)
        my_pretty_print(name, dataset, scores)
예제 #6
0
파일: q3.py 프로젝트: craigching/csci-5521
def q3(argv=None):

    dataset, method_name, k, latex = wrapper_args(
        argv, 'q3', ['Boston50', 'Boston75'],
        ['MyFLDA2', 'LogisticRegression'])

    Boston50_X, Boston50_y = prepare_boston50()
    Boston75_X, Boston75_y = prepare_boston75()

    default_order = [('MyFLDA2', 'Boston50'), ('MyFLDA2', 'Boston75'),
                     ('LogisticRegression', 'Boston50'),
                     ('LogisticRegression', 'Boston75')]

    # Find the optimal separation for the training set
    print('Finding optimal threshold for MyFLDA2 on Boston50 ...')
    myflda_boston50 = find_best_myflda2(MyFLDA2, Boston50_X, Boston50_y, k)

    print('Finding optimal threshold for MyFLDA2 on Boston75 ...')
    myflda_boston75 = find_best_myflda2(MyFLDA2, Boston75_X, Boston75_y, k)

    print('Done.')

    methods = {
        ('MyFLDA2', 'Boston50'): (myflda_boston50, Boston50_X, Boston50_y),
        ('MyFLDA2', 'Boston75'): (myflda_boston75, Boston75_X, Boston75_y),
        ('LogisticRegression', 'Boston50'):
        (LogisticRegression(), Boston50_X, Boston50_y),
        ('LogisticRegression', 'Boston75'):
        (LogisticRegression(), Boston75_X, Boston75_y)
    }

    if dataset == 'all':
        order = default_order
    else:
        order = [(method_name, dataset)]

    for key in order:
        name, dataset = key
        method, X, y = methods[key]
        print('==============')
        print('method: {}, dataset: {}'.format(key[0], key[1]))
        scores = my_cross_val(method, X, y, k)
        report(name, dataset, scores, latex=latex)