def main(): file=open("Bupa.txt",'r') features=[] #数据集特征集 labels=[] #数据集类标集 for line in file: #一行行读数据文件 line=line.strip() tempVec=line.split(',') labels.append(tempVec[len(tempVec)-1]) tempVec2=[tempVec[i] for i in range(0,len(tempVec)-1)] features.append(tempVec2) Bay=BayesClassifier() Bay.train(features,labels) correct=0 for i in range(0,len(features)): label=Bay.classify(features[i]) print("Original:"+str(labels[i])+"==>"+"Classified:"+label) if str(label)==str(labels[i]): correct+=1 print("Accuracy:",correct/len(features)) #正确率
def testBayes(): features = [] #数据集特征集 labels = [] #数据集类标集 features_t = [] maxProbability_t = [] tables_result = [] Merchant_ids_t = [] Merchant_ids_test = {} #商家ID字典,test Merchant_ids_train = {} #商家ID字典,train testData = [] trainData = [] #测试数据集读取 test_data = open('./test_data/data_revised.csv') for line in test_data.readlines(): lineArr = line.strip().split(',') Merchant_ids_t.append(int(lineArr[1])) features_t.append([float(lineArr[3]), int(lineArr[4])]) table4 = Table4(lineArr[0], lineArr[2], lineArr[5], '0') tables_result.append(table4) Merchant_ids_test[lineArr[2]] = testData.append( [float(lineArr[3]), int(lineArr[4])]) #训练数据集读取 all_data = open('./train_data/Date_all.csv') for line in all_data.readlines(): lineArr = line.strip().split(',') features.append([float(lineArr[3]), int(lineArr[4])]) labels.append(int(lineArr[7])) Merchant_ids_train[lineArr[2]] = trainData.append( [float(lineArr[3]), int(lineArr[4]), int(lineArr[7])]) # print Merchant_ids_train.keys() # print Merchant_ids_test.keys() features_key = [] #数据集特征集 labels_key = [] #数据集类标集 num_not_in = 0 for i in range(0, len(features_t)): key = Merchant_ids_t[i] key_dir_name = './train_data/merchant_train_data/' + str( key) + '_noNull' + '.csv' features_key = [] labels_key = [] if os.path.exists(key_dir_name) == True: key_data = open(key_dir_name) for line in key_data.readlines(): lineArr = line.strip().split(',') features_key.append([float(lineArr[3]), int(lineArr[4])]) labels_key.append(int(lineArr[7])) print len(features_key) print len(labels_key) if len(features_key) > 1: Bay = BayesClassifier() Bay.train(features_key, labels_key) label, maxProbability = Bay.classify(features_t[i]) print("maxProbability:" + str(maxProbability) + "==>" + "Classified:" + label) tables_result[i].giveProbability(str(maxProbability)) items = [ tables_result[i].User_id, tables_result[i].Coupon_id, tables_result[i].Date_received, tables_result[i].Probability ] dir_name = './result/table4_4' savecsv(dir_name, items) else: num_not_in = num_not_in + 1 print num_not_in