Beispiel #1
0
def classify(data, labels):
    method = sys.argv[1].upper()

    method_name = {'SVM': 'SVM', 'RF': 'Random Forest'}
    test_size_arr = [0.6, 0.5, 0.4]

    print '方法:%s' % method_name[method]
    for test_size in test_size_arr:
        if (method == 'SVM'):
            train_score, test_score = svm.run(data, labels, test_size)
        elif (method == 'RF'):
            train_score, test_score = rf.run(data, labels, test_size)
        else:
            train_score, test_score = svm.run(data, labels, test_size)

        print '============================================='
        print '训练集 %s | 测试集 %s' % (1 - test_size, test_size)
        print '训练集正确率:%s' % train_score
        print '测试集正确率:%s' % test_score
Beispiel #2
0
def result():
    sns.set(palette="muted", color_codes=True)
    # 3a
    print '============================================='
    print 'HW1-3a'
    print '============================================='
    # fittingGaussian('average_age')
    normalTest(df['average_age'].values, name='Col[7]')

    # 3b
    print '============================================='
    print 'HW1-3b'
    print '============================================='
    arr_7 = grouping('average_age')
    for i in range(0, 5):
        normalTest(arr_7[i], name='Col[7] in Group %s' % (i + 1))

    print '---------------------------------------------'
    leveneTest(arr_7, name='Col[7]')

    # 3c
    print '============================================='
    print 'HW1-3c'
    print '============================================='
    oneway(arr_7, name='Col[7]')

    # 4
    print '============================================='
    print 'HW1-4'
    print '============================================='
    selectCol = ['message_number', 'variance_age', 'conversation_number']

    for key in selectCol:
        normalTest(df[key].values, name='Col[%s]' % key)

    print '---------------------------------------------'

    for key in selectCol:
        normalTest(np.log(df[key].values), name='log Col[%s]' % key)

    # 5b
    print '============================================='
    print 'HW1-5'
    print '============================================='

    # no box-cox
    for key in selectCol:
        arr = grouping(key)
        oneway(arr, 'Col[%s]' % key)

    print '---------------------------------------------'

    # box-cox
    for key in selectCol:
        arr = grouping(key, mode=True)
        oneway(arr, 'box-cox Col[%s]' % key)

    # 6
    print '============================================='
    print 'HW1-5'
    print '============================================='
    testSizeArr = [0.6, 0.5, 0.4]

    print "SVM ['average_age', 'variance_age']"
    for size in testSizeArr:
        train_score, test_score = svm.run(df[['average_age', 'variance_age']],
                                          df['group_category'], size)

        print '[train:test = %s:%s] train correct=%s,test correct=%s.' % (
            1 - size, size, train_score, test_score)

    print '---------------------------------------------'

    print "Random Forest ['average_age', 'variance_age']"
    for size in testSizeArr:
        train_score, test_score = rf.run(df[['average_age', 'variance_age']],
                                         df['group_category'], size)

        print '[train:test = %s:%s] train correct=%s,test correct=%s.' % (
            1 - size, size, train_score, test_score)
if __name__ == "__main__":
    loan_2 = 'data/loan_2.csv'
    sourceData = np.loadtxt(loan_2,
                            dtype=float,
                            delimiter=',',
                            converters={0: loan_type},
                            skiprows=1)

    # x是数据 y是标签
    y, x = np.split(sourceData, (1, ), axis=1)

    method = sys.argv[1].upper()

    method_name = {'SVM': 'SVM', 'RF': 'Random Forest'}
    test_size_arr = [0.6, 0.5, 0.4]

    print '方法:%s' % method_name[method]
    for test_size in test_size_arr:
        if (method == 'SVM'):
            train_score, test_score = svm.run(x, y, test_size)
        elif (method == 'RF'):
            train_score, test_score = rf.run(x, y, test_size)
        else:
            train_score, test_score = svm.run(x, y, test_size)

        print '============================================='
        print '训练集 %s | 测试集 %s' % (1 - test_size, test_size)
        print '训练集正确率:%s' % train_score
        print '测试集正确率:%s' % test_score
Beispiel #4
0
 def fun():
     rf.run()
Beispiel #5
0
                            converters={0: loan_type},
                            skiprows=1)

    # x是数据 y是标签
    y, x = np.split(sourceData, (1, ), axis=1)

    method = sys.argv[1].upper()

    method_name = {'SVM': 'SVM', 'RF': 'Random Forest'}
    test_size_arr = [0.6, 0.5, 0.4]

    print '方法:%s' % method_name[method]
    for test_size in test_size_arr:
        if (method == 'SVM'):
            train_score, test_score, a_score, b_score, c_score = svm.run(
                x, y, test_size)
        elif (method == 'RF'):
            train_score, test_score, a_score, b_score, c_score = rf.run(
                x, y, test_size)
        else:
            train_score, test_score, a_score, b_score, c_score = svm.run(
                x, y, test_size)

        print '============================================='
        print '训练集 %s | 测试集 %s' % (1 - test_size, test_size)
        print '训练集正确率:%s' % train_score
        print '测试集正确率:%s' % test_score
        print 'good 正确率:%s' % a_score
        print 'bad 正确率:%s' % b_score
        print 'neutral 正确率:%s' % c_score