Example #1
0
def main():
    file=open("Bupa.txt",'r')
    features=[]            #数据集特征集
    labels=[]                #数据集类标集
    for line in file:         #一行行读数据文件
        line=line.strip()
        tempVec=line.split(',')
        labels.append(tempVec[len(tempVec)-1])
        tempVec2=[tempVec[i] for i in range(0,len(tempVec)-1)]
        features.append(tempVec2)
    Bay=BayesClassifier()
    Bay.train(features,labels)
    correct=0
    for i in range(0,len(features)):
        label=Bay.classify(features[i])
        print("Original:"+str(labels[i])+"==>"+"Classified:"+label)
        if str(label)==str(labels[i]):
            correct+=1
    print("Accuracy:",correct/len(features))    #正确率
Example #2
0
def testBayes():
    features = []  #数据集特征集
    labels = []  #数据集类标集

    features_t = []
    maxProbability_t = []
    tables_result = []
    Merchant_ids_t = []

    Merchant_ids_test = {}  #商家ID字典,test
    Merchant_ids_train = {}  #商家ID字典,train
    testData = []
    trainData = []

    #测试数据集读取
    test_data = open('./test_data/data_revised.csv')
    for line in test_data.readlines():
        lineArr = line.strip().split(',')
        Merchant_ids_t.append(int(lineArr[1]))
        features_t.append([float(lineArr[3]), int(lineArr[4])])
        table4 = Table4(lineArr[0], lineArr[2], lineArr[5], '0')
        tables_result.append(table4)
        Merchant_ids_test[lineArr[2]] = testData.append(
            [float(lineArr[3]), int(lineArr[4])])
    #训练数据集读取
    all_data = open('./train_data/Date_all.csv')
    for line in all_data.readlines():
        lineArr = line.strip().split(',')
        features.append([float(lineArr[3]), int(lineArr[4])])
        labels.append(int(lineArr[7]))
        Merchant_ids_train[lineArr[2]] = trainData.append(
            [float(lineArr[3]),
             int(lineArr[4]),
             int(lineArr[7])])

    # print Merchant_ids_train.keys()
    # print Merchant_ids_test.keys()
    features_key = []  #数据集特征集
    labels_key = []  #数据集类标集
    num_not_in = 0
    for i in range(0, len(features_t)):
        key = Merchant_ids_t[i]
        key_dir_name = './train_data/merchant_train_data/' + str(
            key) + '_noNull' + '.csv'
        features_key = []
        labels_key = []
        if os.path.exists(key_dir_name) == True:
            key_data = open(key_dir_name)
            for line in key_data.readlines():
                lineArr = line.strip().split(',')
                features_key.append([float(lineArr[3]), int(lineArr[4])])
                labels_key.append(int(lineArr[7]))
            print len(features_key)
            print len(labels_key)

            if len(features_key) > 1:
                Bay = BayesClassifier()
                Bay.train(features_key, labels_key)

                label, maxProbability = Bay.classify(features_t[i])
                print("maxProbability:" + str(maxProbability) + "==>" +
                      "Classified:" + label)
                tables_result[i].giveProbability(str(maxProbability))
                items = [
                    tables_result[i].User_id, tables_result[i].Coupon_id,
                    tables_result[i].Date_received,
                    tables_result[i].Probability
                ]
                dir_name = './result/table4_4'
                savecsv(dir_name, items)
        else:
            num_not_in = num_not_in + 1
    print num_not_in