def classify(data, labels): method = sys.argv[1].upper() method_name = {'SVM': 'SVM', 'RF': 'Random Forest'} test_size_arr = [0.6, 0.5, 0.4] print '方法:%s' % method_name[method] for test_size in test_size_arr: if (method == 'SVM'): train_score, test_score = svm.run(data, labels, test_size) elif (method == 'RF'): train_score, test_score = rf.run(data, labels, test_size) else: train_score, test_score = svm.run(data, labels, test_size) print '=============================================' print '训练集 %s | 测试集 %s' % (1 - test_size, test_size) print '训练集正确率:%s' % train_score print '测试集正确率:%s' % test_score
def result(): sns.set(palette="muted", color_codes=True) # 3a print '=============================================' print 'HW1-3a' print '=============================================' # fittingGaussian('average_age') normalTest(df['average_age'].values, name='Col[7]') # 3b print '=============================================' print 'HW1-3b' print '=============================================' arr_7 = grouping('average_age') for i in range(0, 5): normalTest(arr_7[i], name='Col[7] in Group %s' % (i + 1)) print '---------------------------------------------' leveneTest(arr_7, name='Col[7]') # 3c print '=============================================' print 'HW1-3c' print '=============================================' oneway(arr_7, name='Col[7]') # 4 print '=============================================' print 'HW1-4' print '=============================================' selectCol = ['message_number', 'variance_age', 'conversation_number'] for key in selectCol: normalTest(df[key].values, name='Col[%s]' % key) print '---------------------------------------------' for key in selectCol: normalTest(np.log(df[key].values), name='log Col[%s]' % key) # 5b print '=============================================' print 'HW1-5' print '=============================================' # no box-cox for key in selectCol: arr = grouping(key) oneway(arr, 'Col[%s]' % key) print '---------------------------------------------' # box-cox for key in selectCol: arr = grouping(key, mode=True) oneway(arr, 'box-cox Col[%s]' % key) # 6 print '=============================================' print 'HW1-5' print '=============================================' testSizeArr = [0.6, 0.5, 0.4] print "SVM ['average_age', 'variance_age']" for size in testSizeArr: train_score, test_score = svm.run(df[['average_age', 'variance_age']], df['group_category'], size) print '[train:test = %s:%s] train correct=%s,test correct=%s.' % ( 1 - size, size, train_score, test_score) print '---------------------------------------------' print "Random Forest ['average_age', 'variance_age']" for size in testSizeArr: train_score, test_score = rf.run(df[['average_age', 'variance_age']], df['group_category'], size) print '[train:test = %s:%s] train correct=%s,test correct=%s.' % ( 1 - size, size, train_score, test_score)
if __name__ == "__main__": loan_2 = 'data/loan_2.csv' sourceData = np.loadtxt(loan_2, dtype=float, delimiter=',', converters={0: loan_type}, skiprows=1) # x是数据 y是标签 y, x = np.split(sourceData, (1, ), axis=1) method = sys.argv[1].upper() method_name = {'SVM': 'SVM', 'RF': 'Random Forest'} test_size_arr = [0.6, 0.5, 0.4] print '方法:%s' % method_name[method] for test_size in test_size_arr: if (method == 'SVM'): train_score, test_score = svm.run(x, y, test_size) elif (method == 'RF'): train_score, test_score = rf.run(x, y, test_size) else: train_score, test_score = svm.run(x, y, test_size) print '=============================================' print '训练集 %s | 测试集 %s' % (1 - test_size, test_size) print '训练集正确率:%s' % train_score print '测试集正确率:%s' % test_score
def fun(): rf.run()
converters={0: loan_type}, skiprows=1) # x是数据 y是标签 y, x = np.split(sourceData, (1, ), axis=1) method = sys.argv[1].upper() method_name = {'SVM': 'SVM', 'RF': 'Random Forest'} test_size_arr = [0.6, 0.5, 0.4] print '方法:%s' % method_name[method] for test_size in test_size_arr: if (method == 'SVM'): train_score, test_score, a_score, b_score, c_score = svm.run( x, y, test_size) elif (method == 'RF'): train_score, test_score, a_score, b_score, c_score = rf.run( x, y, test_size) else: train_score, test_score, a_score, b_score, c_score = svm.run( x, y, test_size) print '=============================================' print '训练集 %s | 测试集 %s' % (1 - test_size, test_size) print '训练集正确率:%s' % train_score print '测试集正确率:%s' % test_score print 'good 正确率:%s' % a_score print 'bad 正确率:%s' % b_score print 'neutral 正确率:%s' % c_score